def submit(test_df, h1n1_clf, seasonal_clf): test_df = clean_data(test_df) X_test = test_df.iloc[:, 1:] test_ids = test_df.iloc[:, 0] # X_test, test_ids = X_test.astype(str), test_ids.astype(int) X_test = X_test.astype('category') h1n1_preds = h1n1_clf.predict(X_test) seasonal_preds = seasonal_clf.predict(X_test) result_df = concat([test_ids, DataFrame(h1n1_preds, columns=['h1n1_vaccine']), DataFrame(seasonal_preds, columns=['seasonal_vaccine'])], axis=1) # print(f'Exporting as pickle...') # dump(model, open("classifier.pkl", "wb")) result_df.to_csv('Submissions/submission.csv', index=False) print('done') result_df = concat([test_ids, DataFrame(h1n1_preds, columns=['h1n1_vaccine']), DataFrame(seasonal_preds, columns=['seasonal_vaccine'])], axis=1) # print(f'Exporting as pickle...') # dump(model, open("classifier.pkl", "wb")) result_df.to_csv('Submissions/submission.csv', index=False) print('done')
def classify(document): model = pickle.load( open("model.pkl", "rb") ) #using pickled clf model (will be most recent version if retrained) vectorizer = pickle.load( open("vectorizer.pkl", "rb") ) #using pickled vectorizer object (will be most recent verion if retrained) label_dict = {0: 'non_satire', 1: 'satire'} cleandocument = clean_data(document) X = vectorizer.transform([cleandocument]) y = model.predict(X)[0] proba = np.max(model.predict_proba(X)) return label_dict[y], proba, cleandocument
def preprocess_data(df, count): if 'id' in df.columns: df = df.drop(['id'], axis=1) if 'stroke' in df.columns: df = df.drop(['stroke'], axis=1) df = clean_data(df) df = one_hot_encode(df, colnames=['work_type', 'smoking_status']) df = normalize_columns(df, colnames=['avg_glucose_level', 'bmi'], scaler=MinMaxScaler()) df = df.iloc[:count, :] # print(list(df.columns)) return df
def submit(test_df, model): test_df = clean_data(test_df) X_test = test_df.iloc[:, 1:] test_ids = test_df.iloc[:, 0] X_test = np.array(X_test) X_test, test_ids = X_test.astype(str), test_ids.astype(int) h1n1_preds, seasonal_preds = make_predictions(model, X_test) result_df = concat([test_ids, DataFrame(h1n1_preds, columns=['h1n1_vaccine']), DataFrame(seasonal_preds, columns=['seasonal_vaccine'])], axis=1) print(f'Exporting as pickle...') dump(model, open("classifier.pkl", "wb")) result_df.to_csv('Submissions/submission.csv', index=False) print('done')
def submit(test_df, model): test_df = clean_data(test_df) ohe_cols = cols[1:36] test_df = one_hot_encode(test_df, colnames=ohe_cols) X_test = test_df.iloc[:, 1:] test_ids = test_df.iloc[:, 0] X_test = np.array(X_test) h1n1_preds, seasonal_preds = make_predictions(model, X_test) result_df = concat([ test_ids, DataFrame(h1n1_preds, columns=['h1n1_vaccine']), DataFrame(seasonal_preds, columns=['seasonal_vaccine']) ], axis=1) print(f'Exporting as pickle...') # dump(model, open("classifier.pkl", "wb")) model.save('nn_model') print('neural network pickled') result_df.to_csv('Submissions/submission.csv', index=False) print('done')
, cat_features=cat_features , random_state=42, verbose=30 )) ovr.fit(x_train, y_train) cross_validated = np.mean(cross_val_score(ovr, x_train, y_train, cv=5)) print(f'Cross Validation Score: {cross_validated}') return ovr if __name__ == '__main__': df = import_data(train=True) test_df = import_data(features='Datasets/test_set_features.csv', train=False) cols = list(df.columns) set_df_values(df) df = clean_data(df) x_train, x_val, y_train, y_val, train_ids, val_ids = split_dataset(df, test_size=0.1, seed=42) x_train, y_train = x_train.astype(str), y_train.astype(int) x_val, y_val = x_val.astype(str), y_val.astype(int) # model = fit_random_search_model(x_train, y_train) model = fit_model(x_train, y_train) h1n1_preds, seasonal_preds = make_predictions(model, x_train) h1n1_true, seasonal_true = y_train['h1n1_vaccine'].values.tolist(), y_train['seasonal_vaccine'].values.tolist() train_score = get_scores(h1n1_true, h1n1_preds, seasonal_true, seasonal_preds) print(f'Training Accuracy: {train_score}') h1n1_preds, seasonal_preds = make_predictions(model, x_val) h1n1_true, seasonal_true = y_val['h1n1_vaccine'].values.tolist(), y_val['seasonal_vaccine'].values.tolist() validation_score = get_scores(h1n1_true, h1n1_preds, seasonal_true, seasonal_preds) print(f'Validation Accuracy: {validation_score}')