예제 #1
0
def submit(test_df, h1n1_clf, seasonal_clf):
    test_df = clean_data(test_df)
    X_test = test_df.iloc[:, 1:]
    test_ids = test_df.iloc[:, 0]

    # X_test, test_ids = X_test.astype(str), test_ids.astype(int)
    X_test = X_test.astype('category')
    h1n1_preds = h1n1_clf.predict(X_test)
    seasonal_preds = seasonal_clf.predict(X_test)

    result_df = concat([test_ids,
                        DataFrame(h1n1_preds, columns=['h1n1_vaccine']),
                        DataFrame(seasonal_preds, columns=['seasonal_vaccine'])],
                       axis=1)
    # print(f'Exporting as pickle...')
    # dump(model, open("classifier.pkl", "wb"))
    result_df.to_csv('Submissions/submission.csv', index=False)
    print('done')

    result_df = concat([test_ids,
                        DataFrame(h1n1_preds, columns=['h1n1_vaccine']),
                        DataFrame(seasonal_preds, columns=['seasonal_vaccine'])],
                       axis=1)
    # print(f'Exporting as pickle...')
    # dump(model, open("classifier.pkl", "wb"))
    result_df.to_csv('Submissions/submission.csv', index=False)
    print('done')
예제 #2
0
def classify(document):
    model = pickle.load(
        open("model.pkl", "rb")
    )  #using pickled clf model (will be most recent version if retrained)
    vectorizer = pickle.load(
        open("vectorizer.pkl", "rb")
    )  #using pickled vectorizer object (will be most recent verion if retrained)
    label_dict = {0: 'non_satire', 1: 'satire'}
    cleandocument = clean_data(document)
    X = vectorizer.transform([cleandocument])
    y = model.predict(X)[0]
    proba = np.max(model.predict_proba(X))
    return label_dict[y], proba, cleandocument
예제 #3
0
def preprocess_data(df, count):
    if 'id' in df.columns:
        df = df.drop(['id'], axis=1)
    if 'stroke' in df.columns:
        df = df.drop(['stroke'], axis=1)

    df = clean_data(df)
    df = one_hot_encode(df, colnames=['work_type', 'smoking_status'])
    df = normalize_columns(df,
                           colnames=['avg_glucose_level', 'bmi'],
                           scaler=MinMaxScaler())
    df = df.iloc[:count, :]

    # print(list(df.columns))
    return df
예제 #4
0
def submit(test_df, model):
    test_df = clean_data(test_df)
    X_test = test_df.iloc[:, 1:]
    test_ids = test_df.iloc[:, 0]
    X_test = np.array(X_test)
    X_test, test_ids = X_test.astype(str), test_ids.astype(int)
    h1n1_preds, seasonal_preds = make_predictions(model, X_test)

    result_df = concat([test_ids,
                        DataFrame(h1n1_preds, columns=['h1n1_vaccine']),
                        DataFrame(seasonal_preds, columns=['seasonal_vaccine'])],
                       axis=1)
    print(f'Exporting as pickle...')
    dump(model, open("classifier.pkl", "wb"))
    result_df.to_csv('Submissions/submission.csv', index=False)
    print('done')
예제 #5
0
def submit(test_df, model):
    test_df = clean_data(test_df)
    ohe_cols = cols[1:36]
    test_df = one_hot_encode(test_df, colnames=ohe_cols)
    X_test = test_df.iloc[:, 1:]
    test_ids = test_df.iloc[:, 0]
    X_test = np.array(X_test)

    h1n1_preds, seasonal_preds = make_predictions(model, X_test)

    result_df = concat([
        test_ids,
        DataFrame(h1n1_preds, columns=['h1n1_vaccine']),
        DataFrame(seasonal_preds, columns=['seasonal_vaccine'])
    ],
                       axis=1)
    print(f'Exporting as pickle...')
    # dump(model, open("classifier.pkl", "wb"))
    model.save('nn_model')
    print('neural network pickled')
    result_df.to_csv('Submissions/submission.csv', index=False)
    print('done')
예제 #6
0
                                                           , cat_features=cat_features
                                                           , random_state=42,
                                                           verbose=30
                                                           ))
    ovr.fit(x_train, y_train)
    cross_validated = np.mean(cross_val_score(ovr, x_train, y_train, cv=5))
    print(f'Cross Validation Score: {cross_validated}')
    return ovr


if __name__ == '__main__':
    df = import_data(train=True)
    test_df = import_data(features='Datasets/test_set_features.csv', train=False)
    cols = list(df.columns)
    set_df_values(df)
    df = clean_data(df)
    x_train, x_val, y_train, y_val, train_ids, val_ids = split_dataset(df, test_size=0.1, seed=42)
    x_train, y_train = x_train.astype(str), y_train.astype(int)
    x_val, y_val = x_val.astype(str), y_val.astype(int)

    # model = fit_random_search_model(x_train, y_train)
    model = fit_model(x_train, y_train)
    h1n1_preds, seasonal_preds = make_predictions(model, x_train)
    h1n1_true, seasonal_true = y_train['h1n1_vaccine'].values.tolist(), y_train['seasonal_vaccine'].values.tolist()
    train_score = get_scores(h1n1_true, h1n1_preds, seasonal_true, seasonal_preds)
    print(f'Training Accuracy: {train_score}')

    h1n1_preds, seasonal_preds = make_predictions(model, x_val)
    h1n1_true, seasonal_true = y_val['h1n1_vaccine'].values.tolist(), y_val['seasonal_vaccine'].values.tolist()
    validation_score = get_scores(h1n1_true, h1n1_preds, seasonal_true, seasonal_preds)
    print(f'Validation Accuracy: {validation_score}')