Example #1
0
def get_sparse_onehot_features():
    X, y, X_holdout, ids = prepare_data("./data", drop_categorical=False)
    cat_idx = get_cat_columns()
    encoder = OneHotEncoder(categorical_features=cat_idx, sparse=True, handle_unknown="ignore")

    X[:, cat_idx] = X[:, cat_idx] + 1
    X_holdout[:, cat_idx] = X_holdout[:, cat_idx] + 1
    X = encoder.fit_transform(X)
    X_holdout = encoder.transform(X_holdout)

    return X.tocsr(), y, X_holdout.tocsr(), ids
Example #2
0
def build_multinomial_nb_features():
    X, y, X_holdout, _ = prepare_data("./data", drop_categorical=False)

    cat_idx = get_cat_columns()
    X, X_holdout = X[:, cat_idx], X_holdout[:, cat_idx]
    X = X + 1
    X_holdout = X_holdout + 1

    print "Getting OOB predictions from mNB"
    clf = MultinomialNB(alpha=1)
    X_1, X_2 = build_base_features(clf, X, X_holdout, y, 10)
    np.vstack((X_1, X_2)).tofile('./features/NB_oob.npy')
Example #3
0
    grid_cv = GridSearchCV(SVC(probability=True, random_state=SEED),
                           param_grid=params,
                           scoring='log_loss',
                           n_jobs=-1,
                           cv=5,
                           verbose=1)

    grid_cv.fit(X_train, y_train)

    return grid_cv

if __name__ == "__main__":
    X, y, X_holdout, ids = prepare_data("./data", drop_categorical=False)

    # prepare the categorical columns
    cat_idx = get_cat_columns()
    encoder = OneHotEncoder(categorical_features=cat_idx, sparse=True, handle_unknown="ignore")
    n_rows = X.shape[0]

    X[:, cat_idx] = X[:, cat_idx] + 1
    X_holdout[:, cat_idx] = X_holdout[:, cat_idx] + 1

    X = encoder.fit_transform(X)
    X_holdout = encoder.transform(X_holdout)

    print "Run SVC with {} data points and {} features.".format(X.shape[0], X.shape[1])
    t0 = time()
    grid_cv = find_svm_model(X, y)
    best_clf = grid_cv.best_estimator_
    print "CV log-loss: {}".format(grid_cv.best_score_)
    best_clf.fit(X, y)