Esempio n. 1
0
def get_test_data():
    '''
    Input:  None
    Output: DataFrame - data from test set
    '''
    df = load_data(train=False)
    df = transform_data(df)
    X, ids = prep_for_modeling(df, column='id', columns_to_drop=columns_to_drop[:-1])
    return X, ids
Esempio n. 2
0
def make_model(df, model, model_name):
    '''
    Input:  DataFrame, Model Instance - implementing fit method, Str - of name for pickled model file
    Output: None
    '''
    df = transform_data(df)
    X, y = prep_for_modeling(df, column='country_destination', columns_to_drop=columns_to_drop)
    model.fit(X, y)
    with open(model_path.format(model_name), 'w+') as model_file:
        pickle.dump(model, model_file)
Esempio n. 3
0
def modeling_exclaimation_point(df):
    '''
    Input:  DataFrame
    Output: NpArray - scores from k-fold tests on current data transformation

    Function to quickly test engineered features.
    '''
    df = transform_data(df)
    X, y = prep_for_modeling(df, column='country_destination', columns_to_drop=columns_to_drop)
    rfc = RandomForestClassifier(n_estimators=100, n_jobs=-1)
    scores = ndcg_cross_val_score(rfc, X, y)
    return scores