def get_test_data(): ''' Input: None Output: DataFrame - data from test set ''' df = load_data(train=False) df = transform_data(df) X, ids = prep_for_modeling(df, column='id', columns_to_drop=columns_to_drop[:-1]) return X, ids
def make_model(df, model, model_name): ''' Input: DataFrame, Model Instance - implementing fit method, Str - of name for pickled model file Output: None ''' df = transform_data(df) X, y = prep_for_modeling(df, column='country_destination', columns_to_drop=columns_to_drop) model.fit(X, y) with open(model_path.format(model_name), 'w+') as model_file: pickle.dump(model, model_file)
def modeling_exclaimation_point(df): ''' Input: DataFrame Output: NpArray - scores from k-fold tests on current data transformation Function to quickly test engineered features. ''' df = transform_data(df) X, y = prep_for_modeling(df, column='country_destination', columns_to_drop=columns_to_drop) rfc = RandomForestClassifier(n_estimators=100, n_jobs=-1) scores = ndcg_cross_val_score(rfc, X, y) return scores