Exemple #1
0
def test_model(x, y, x_test, y_test):
    for c in [0.01, 0.05, 0.25, 0.5, 1]:
        lr = logReg(C=c)
        lr.fit(x, y)
        print("Accuracy for C=%s, train = %s, test = %s" %
              (c, accuracy_score(y, lr.predict(x)),
               accuracy_score(y_test, lr.predict(x_test))))
# Get data, group indexes
train_cases, valid_cases, df_feat, df_labl = get_data(dataset)
groups = get_groups(df_feat, train_cases)
n_groups = get_n_groups(groups, train_cases)

# Test/Train splits
feat_train, feat_valid, labl_train, labl_valid = test_train_split_feat_labl(
    df_feat, df_labl, train_cases, valid_cases, feat_keys, n_labl)

# Instantiate classifier
idf = logReg(
    random_state=False,
    fit_intercept=False,
    class_weight='balanced',
    max_iter=200,
    # penalty='elasticnet',
    solver='lbfgs',
    C=1,
    # l1_ratio=0.0,  # 0.0=l2, 1.0=l1
    verbose=True,
    n_jobs=-1,
)

#####################################################################
### Manual Cross-Validation
#####################################################################
test_score = []
train_score = []
for test_case in train_cases:
    # LeaveOneGroupOut splits
    subtrain_cases_bool = np.array(train_cases) != np.array(test_case)
    subtrain_cases = np.array(train_cases)[subtrain_cases_bool].tolist()
Exemple #3
0
            feat_train = np.vstack([s[0] for s in sampls[1:]])
            labl_train = np.hstack([s[1] for s in sampls[1:]])
            # feat_train = np.vstack([s[0] for s in [sampls[0]] + sampls[2:]])
            # labl_train = np.hstack([s[1] for s in [sampls[0]] + sampls[2:]])

            c = (l1_min_c(feat_train, labl_train, loss='log') *
                 np.logspace(0, 4, 5)).tolist()[1]
            # if sample_size > 100:  # Fix for PH-Breuer
            #     c = 0.02761796
            # Learn identifier
            idf = logReg(
                random_state=False,
                fit_intercept=False,
                class_weight='none',
                max_iter=max_iter,
                penalty=penalty,
                solver=solver,
                C=c,
                l1_ratio=l1_ratio,  # 0.0=l2, 1.0=l1
                verbose=False,
                n_jobs=-1,
            )
            idf.fit(feat_train, labl_train)

            # Evaluate trainining error
            labl_pred_train = idf.predict(feat_train)
            f1_trains.append(f1_score(labl_train, labl_pred_train))
            acc_trains.append(accuracy_score(labl_train, labl_pred_train))

            # Evaluate generalisation
            labl_pred = idf.predict(feat_test)
            f1_tests.append(f1_score(labl_test, labl_pred))
Exemple #4
0
dual = False
fit_intercept = False
intercept_scaling = 10
penalty = 'elasticnet'
l1_ratio = 0.5
max_iter = 2000
n_jobs = -1
random_state = 42
solver = 'saga'
tol = 1e-10
verbose = 5
warm_start = False


# Construct identifier
idf = logReg()
idf.coef_ = coef
idf.intercept_ = intercept
idf.classes_ = classes
idf.class_weight = class_weight
idf.C = C
idf.penalty = penalty
idf. max_iter = max_iter
idf.n_jobs = n_jobs
idf.tol = tol
idf.verbose = verbose


# Test prediction
feat_test = np.arange(5).reshape(1, 5)-5
print("Decision function gives:\t{0}".format(idf.decision_function(feat_test)))
# BIN0, BIN1 = np.bincount(ytest)
# print("Imbalancing in train labels: {:3.2f}".format(bin1/(bin1 + bin0)))
# print("Imbalancing in test  labels: {:3.2f}".format(BIN1/(BIN1 + BIN0)))

# Create constant classifier
# dy = DummyClassifier(strategy="constant", random_state=False, constant=0)
# dy.fit(xtrain, ytrain)

# Create random classifier
dy = DummyClassifier(strategy="uniform", random_state=False)
dy.fit(xtrain, ytrain)

# Learn logReg classifier
lr = logReg(random_state=True,
            fit_intercept=False,
            class_weight='balanced',
            verbose=False,
            n_jobs=-1)
lr.fit(xtrain, ytrain)

# Test classifiers with distinct metrics
print("\n")
print("Dummy performance:")
dy_pred = dy.predict(xtest)
print(
    classification_report(ytest, dy_pred, target_names=['Inactive', 'Active']))
dy_conf = confusion_matrix(ytest, dy_pred)
dy_TN, dy_FP = dy_conf[0]
dy_FN, dy_TP = dy_conf[1]
dy_sen = dy_TP / (dy_TP + dy_FN)
dy_spe = dy_TN / (dy_TN + dy_FP)