Ejemplo n.º 1
0

def getSVC(df, random_split=None):
    X, Y = to_array(df.drop("validation", axis=1))
    scaler = StandardScaler()
    X = scaler.fit_transform(X)

    tr_ind = df[df["validation"] == 0].index.values.astype(int)
    val_ind = df[df["validation"] == 1].index.values.astype(int)
    custom_CV_iterator = [(tr_ind, val_ind)]
    print("Create a Random Forest Classifier")
    print("__Parameter searching...")
    # TODOs: cross-validation for best hyper parameter
    clf = GridSearchCV(SVC(probability=False),
                       param_grid=TUNED_PARAMS,
                       scoring='roc_auc',
                       n_jobs=10,
                       verbose=5,
                       cv=custom_CV_iterator)
    clf.fit(X, Y)
    print("Best score: {}".format(clf.best_score_))
    print("Best parameters: {}".format(clf.best_params_))
    return clf, scaler


if __name__ == "__main__":
    output_fname = sys.argv[1]
    df, test_df = preprocess()
    model, scaler = getSVC(df)
    write_ans(model, test_df, ofname=output_fname, scaler=scaler)
Ejemplo n.º 2
0
Archivo: RF.py Proyecto: stegben/Numeri
    'max_depth': [7, 9, None],
    'max_features': [0.05, 0.1, 0.3]
}]


def getRF(df, random_split=None):
    X, Y = to_array(df.drop("validation", axis=1))
    tr_ind = df[df["validation"] == 0].index.values.astype(int)
    val_ind = df[df["validation"] == 1].index.values.astype(int)
    custom_CV_iterator = [(tr_ind, val_ind)]
    print("Create a Random Forest Classifier")
    print("__Parameter searching...")
    # TODOs: cross-validation for best hyper parameter
    clf = GridSearchCV(RandomForestClassifier(n_estimators=10000, n_jobs=2),
                       param_grid=TUNED_PARAMS,
                       scoring='roc_auc',
                       n_jobs=10,
                       verbose=5,
                       cv=custom_CV_iterator)
    clf.fit(X, Y)
    print("Best score: {}".format(clf.best_score_))
    print("Best parameters: {}".format(clf.best_params_))
    return clf


if __name__ == "__main__":
    output_fname = sys.argv[1]
    df, test_df = preprocess()
    model = getRF(df)
    write_ans(model, test_df, ofname=output_fname)
Ejemplo n.º 3
0
def getSVC(df, random_split=None):
    X, Y = to_array(df.drop("validation", axis=1))
    scaler = StandardScaler()
    X = scaler.fit_transform(X)

    tr_ind = df[df["validation"]==0].index.values.astype(int)
    val_ind = df[df["validation"]==1].index.values.astype(int)
    custom_CV_iterator = [(tr_ind, val_ind)]
    print("Create a Random Forest Classifier")
    print("__Parameter searching...")
    # TODOs: cross-validation for best hyper parameter
    clf = GridSearchCV(SVC(probability=False),
                       param_grid=TUNED_PARAMS,
                       scoring='roc_auc',
                       n_jobs=10, 
                       verbose=5,
                       cv=custom_CV_iterator
                      )
    clf.fit(X, Y)
    print("Best score: {}".format(clf.best_score_))
    print("Best parameters: {}".format(clf.best_params_))
    return clf, scaler


if __name__ == "__main__":
    output_fname = sys.argv[1] 
    df, test_df = preprocess()
    model, scaler = getSVC(df)
    write_ans(model, test_df, ofname=output_fname, scaler=scaler)

Ejemplo n.º 4
0
Archivo: RF.py Proyecto: stegben/Numeri
               ]


def getRF(df, random_split=None):
    X, Y = to_array(df.drop("validation", axis=1))
    tr_ind = df[df["validation"]==0].index.values.astype(int)
    val_ind = df[df["validation"]==1].index.values.astype(int)
    custom_CV_iterator = [(tr_ind, val_ind)]
    print("Create a Random Forest Classifier")
    print("__Parameter searching...")
    # TODOs: cross-validation for best hyper parameter
    clf = GridSearchCV(RandomForestClassifier(n_estimators=10000, n_jobs=2),
                       param_grid=TUNED_PARAMS,
                       scoring='roc_auc',
                       n_jobs=10, 
                       verbose=5,
                       cv=custom_CV_iterator
                      )
    clf.fit(X, Y)
    print("Best score: {}".format(clf.best_score_))
    print("Best parameters: {}".format(clf.best_params_))
    return clf


if __name__ == "__main__":
    output_fname = sys.argv[1] 
    df, test_df = preprocess()
    model = getRF(df)
    write_ans(model, test_df, ofname=output_fname)