def test_model(x, y, x_test, y_test): for c in [0.01, 0.05, 0.25, 0.5, 1]: lr = logReg(C=c) lr.fit(x, y) print("Accuracy for C=%s, train = %s, test = %s" % (c, accuracy_score(y, lr.predict(x)), accuracy_score(y_test, lr.predict(x_test))))
# Get data, group indexes train_cases, valid_cases, df_feat, df_labl = get_data(dataset) groups = get_groups(df_feat, train_cases) n_groups = get_n_groups(groups, train_cases) # Test/Train splits feat_train, feat_valid, labl_train, labl_valid = test_train_split_feat_labl( df_feat, df_labl, train_cases, valid_cases, feat_keys, n_labl) # Instantiate classifier idf = logReg( random_state=False, fit_intercept=False, class_weight='balanced', max_iter=200, # penalty='elasticnet', solver='lbfgs', C=1, # l1_ratio=0.0, # 0.0=l2, 1.0=l1 verbose=True, n_jobs=-1, ) ##################################################################### ### Manual Cross-Validation ##################################################################### test_score = [] train_score = [] for test_case in train_cases: # LeaveOneGroupOut splits subtrain_cases_bool = np.array(train_cases) != np.array(test_case) subtrain_cases = np.array(train_cases)[subtrain_cases_bool].tolist()
feat_train = np.vstack([s[0] for s in sampls[1:]]) labl_train = np.hstack([s[1] for s in sampls[1:]]) # feat_train = np.vstack([s[0] for s in [sampls[0]] + sampls[2:]]) # labl_train = np.hstack([s[1] for s in [sampls[0]] + sampls[2:]]) c = (l1_min_c(feat_train, labl_train, loss='log') * np.logspace(0, 4, 5)).tolist()[1] # if sample_size > 100: # Fix for PH-Breuer # c = 0.02761796 # Learn identifier idf = logReg( random_state=False, fit_intercept=False, class_weight='none', max_iter=max_iter, penalty=penalty, solver=solver, C=c, l1_ratio=l1_ratio, # 0.0=l2, 1.0=l1 verbose=False, n_jobs=-1, ) idf.fit(feat_train, labl_train) # Evaluate trainining error labl_pred_train = idf.predict(feat_train) f1_trains.append(f1_score(labl_train, labl_pred_train)) acc_trains.append(accuracy_score(labl_train, labl_pred_train)) # Evaluate generalisation labl_pred = idf.predict(feat_test) f1_tests.append(f1_score(labl_test, labl_pred))
dual = False fit_intercept = False intercept_scaling = 10 penalty = 'elasticnet' l1_ratio = 0.5 max_iter = 2000 n_jobs = -1 random_state = 42 solver = 'saga' tol = 1e-10 verbose = 5 warm_start = False # Construct identifier idf = logReg() idf.coef_ = coef idf.intercept_ = intercept idf.classes_ = classes idf.class_weight = class_weight idf.C = C idf.penalty = penalty idf. max_iter = max_iter idf.n_jobs = n_jobs idf.tol = tol idf.verbose = verbose # Test prediction feat_test = np.arange(5).reshape(1, 5)-5 print("Decision function gives:\t{0}".format(idf.decision_function(feat_test)))
# BIN0, BIN1 = np.bincount(ytest) # print("Imbalancing in train labels: {:3.2f}".format(bin1/(bin1 + bin0))) # print("Imbalancing in test labels: {:3.2f}".format(BIN1/(BIN1 + BIN0))) # Create constant classifier # dy = DummyClassifier(strategy="constant", random_state=False, constant=0) # dy.fit(xtrain, ytrain) # Create random classifier dy = DummyClassifier(strategy="uniform", random_state=False) dy.fit(xtrain, ytrain) # Learn logReg classifier lr = logReg(random_state=True, fit_intercept=False, class_weight='balanced', verbose=False, n_jobs=-1) lr.fit(xtrain, ytrain) # Test classifiers with distinct metrics print("\n") print("Dummy performance:") dy_pred = dy.predict(xtest) print( classification_report(ytest, dy_pred, target_names=['Inactive', 'Active'])) dy_conf = confusion_matrix(ytest, dy_pred) dy_TN, dy_FP = dy_conf[0] dy_FN, dy_TP = dy_conf[1] dy_sen = dy_TP / (dy_TP + dy_FN) dy_spe = dy_TN / (dy_TN + dy_FP)