Ejemplo n.º 1
0
def train_xgboost(train_X,
                  train_y,
                  valid_X,
                  valid_y,
                  test_X,
                  test_y,
                  learning_rate=None,
                  mask=None):
    learning_rate = 0.01
    n_estimators = 350
    subsample = 0.3
    if mask:
        train_X = mask_columns(train_X, mask)
        valid_X = mask_columns(valid_X, mask)
        test_X = mask_columns(test_X, mask)
    xgb_model = xgb.XGBClassifier(objective="binary:logistic",
                                  tree_method='gpu_hist',
                                  predictor='gpu_predictor')
    xgb_model.learning_rate = learning_rate
    xgb_model.n_estimators = n_estimators
    xgb_model.subsample = subsample
    xgb_model.fit(train_X, train_y.values.ravel())
    train_pred = xgb_model.predict(train_X)
    valid_pred = xgb_model.predict(valid_X)
    test_pred = xgb_model.predict(test_X)
    return (get_metrics(train_y, train_pred, verbose=True),
            get_metrics(valid_y, valid_pred, verbose=True),
            get_metrics(test_y, test_pred, verbose=True))
Ejemplo n.º 2
0
def train_neural_net(train_X,
                     train_y,
                     valid_X,
                     valid_y,
                     test_X,
                     test_y,
                     plotting=False,
                     save_model=False):
    # settings:
    path = '~/Desktop/keras_models'  # WRONG XD

    def scheduler(epoch, lr):
        if epoch < 20:
            return lr
        else:
            return lr * math.exp(-0.2)

    learning_rate = 0.0006
    epochs = 22
    batch_size = 8500
    model = Sequential()
    model.add(Input(shape=(len(train_X.columns), )))
    model.add(Dense(1000, activation='relu'))
    model.add(Dense(1500, activation='relu'))
    model.add(Dense(400, activation='relu'))
    model.add(Dense(1, activation='sigmoid'))
    optimizer = Adam(learning_rate=learning_rate)
    model.compile(loss='binary_crossentropy',
                  optimizer=optimizer,
                  metrics=['accuracy'])
    lr_scheduler = tf.keras.callbacks.LearningRateScheduler(scheduler)

    history = model.fit(train_X,
                        train_y,
                        epochs=epochs,
                        batch_size=batch_size,
                        callbacks=[lr_scheduler],
                        verbose=0,
                        validation_data=(valid_X, valid_y))
    train_pred = model.predict_classes(train_X)
    train_pred = [1 in lst for lst in train_pred]
    valid_pred = model.predict_classes(valid_X)
    valid_pred = [1 in lst for lst in valid_pred]
    test_pred = model.predict_classes(test_X)
    test_pred = [1 in lst for lst in test_pred]
    if plotting:
        plot_history_loss(history, "Artificial Neural Network")
    if save_model:
        model.save(os.path.join(path, next_name(path, "model-{0}")))
    return (get_metrics(train_y, train_pred, verbose=True),
            get_metrics(valid_y, valid_pred, verbose=True),
            get_metrics(test_y, test_pred, verbose=True))
Ejemplo n.º 3
0
def train_svm_grid(train_path, test_path):
    train_X, train_y = prepare_data(train_path)
    test_X, test_y = prepare_data(test_path)
    for kernel in ['rbf', 'linear', 'poly', 'sigmoid']:
        for class_weight in [None, 'balanced']:
            for gamma in ['auto', 'scale']:
                for C in [0.7, 0.8, 0.9, 1.0, 1.1, 1.2, 1.3]:
                    classifier = svm.SVC(kernel=kernel,
                                         class_weight=class_weight,
                                         gamma=gamma,
                                         C=C)
                    classifier.fit(train_X, train_y.values.ravel())
                    test_pred = classifier.predict(test_X)
                    train_pred = classifier.predict(train_X)
                    get_metrics(test_y, test_pred)
                    get_metrics(train_y, train_pred)
Ejemplo n.º 4
0
def train_cumulative_voting(train_X,
                            train_y,
                            valid_X,
                            valid_y,
                            test_X,
                            test_y,
                            threshold=0.5,
                            mask=None):
    if mask:
        train_X = mask_columns(train_X, mask)
        valid_X = mask_columns(valid_X, mask)
        test_X = mask_columns(test_X, mask)
    model = CumulativeVoting(threshold)
    train_pred = model.predict(train_X)
    valid_pred = model.predict(valid_X)
    test_pred = model.predict(test_X)
    return (get_metrics(train_y, train_pred, verbose=True),
            get_metrics(valid_y, valid_pred, verbose=True),
            get_metrics(test_y, test_pred, verbose=True))
Ejemplo n.º 5
0
def train_svm(kernel,
              train_X,
              train_y,
              valid_X,
              valid_y,
              test_X,
              test_y,
              mask=None):
    if mask:
        train_X = mask_columns(train_X, mask)
        valid_X = mask_columns(valid_X, mask)
        test_X = mask_columns(test_X, mask)
    classifier = svm.SVC(kernel=kernel)
    classifier.fit(train_X, train_y.values.ravel())
    train_pred = classifier.predict(train_X)
    valid_pred = classifier.predict(valid_X)
    test_pred = classifier.predict(test_X)
    return (get_metrics(train_y, train_pred, verbose=True),
            get_metrics(valid_y, valid_pred, verbose=True),
            get_metrics(test_y, test_pred, verbose=True))
Ejemplo n.º 6
0
def train_majority_voting(train_X,
                          train_y,
                          valid_X,
                          valid_y,
                          test_X,
                          test_y,
                          vote_threshold=0.5,
                          count_threshold=0.5,
                          mask=None):
    if mask:
        train_X = mask_columns(train_X, mask)
        valid_X = mask_columns(valid_X, mask)
        test_X = mask_columns(test_X, mask)
    model = MajorityVoting(vote_threshold, count_threshold)
    train_pred = model.predict(train_X)
    valid_pred = model.predict(valid_X)
    test_pred = model.predict(test_X)
    return (get_metrics(train_y, train_pred, verbose=True),
            get_metrics(valid_y, valid_pred, verbose=True),
            get_metrics(test_y, test_pred, verbose=True))
Ejemplo n.º 7
0
def train_xgboost_gridsearch(train_X,
                             train_y,
                             valid_X,
                             valid_y,
                             test_X,
                             test_y,
                             learning_rate=None,
                             mask=None):
    if mask:
        train_X = mask_columns(train_X, mask)
        valid_X = mask_columns(valid_X, mask)
        test_X = mask_columns(test_X, mask)

    param_grid_gb = {
        'learning_rate': [0.01],
        'n_estimators': [350],
        'subsample': [0.3]
    }

    # Regressor Instantiation
    gb = xgb.XGBClassifier()

    mse_grid = GridSearchCV(estimator=gb,
                            param_grid=param_grid_gb,
                            scoring='neg_mean_squared_error',
                            cv=4,
                            verbose=2)
    # xgb_model = xgb.XGBClassifier(objective="binary:logistic")
    mse_grid.fit(train_X, train_y.values.ravel())
    train_pred = mse_grid.predict(train_X)
    valid_pred = mse_grid.predict(valid_X)
    test_pred = mse_grid.predict(test_X)
    print("Best parameters:", mse_grid.best_params_)
    return (get_metrics(train_y, train_pred, verbose=True),
            get_metrics(valid_y, valid_pred, verbose=True),
            get_metrics(test_y, test_pred, verbose=True))