Ejemplo n.º 1
0
    def test_split_dataset(self):
        parquets = utils.get_files("parquets", "*.parquet")

        if len(parquets) > 0:
            data = pd.read_parquet(parquets[0])

            # split into training and validation
            training_set, validation_set = model.split_dataset(data, 0.25, 1)

            number_of_customers = len(data)
            customers_to_train = len(training_set)
            customers_to_validate = len(validation_set)

            assert number_of_customers == customers_to_train + customers_to_validate
Ejemplo n.º 2
0
                                                           , random_state=42,
                                                           verbose=30
                                                           ))
    ovr.fit(x_train, y_train)
    cross_validated = np.mean(cross_val_score(ovr, x_train, y_train, cv=5))
    print(f'Cross Validation Score: {cross_validated}')
    return ovr


if __name__ == '__main__':
    df = import_data(train=True)
    test_df = import_data(features='Datasets/test_set_features.csv', train=False)
    cols = list(df.columns)
    set_df_values(df)
    df = clean_data(df)
    x_train, x_val, y_train, y_val, train_ids, val_ids = split_dataset(df, test_size=0.1, seed=42)
    x_train, y_train = x_train.astype(str), y_train.astype(int)
    x_val, y_val = x_val.astype(str), y_val.astype(int)

    # model = fit_random_search_model(x_train, y_train)
    model = fit_model(x_train, y_train)
    h1n1_preds, seasonal_preds = make_predictions(model, x_train)
    h1n1_true, seasonal_true = y_train['h1n1_vaccine'].values.tolist(), y_train['seasonal_vaccine'].values.tolist()
    train_score = get_scores(h1n1_true, h1n1_preds, seasonal_true, seasonal_preds)
    print(f'Training Accuracy: {train_score}')

    h1n1_preds, seasonal_preds = make_predictions(model, x_val)
    h1n1_true, seasonal_true = y_val['h1n1_vaccine'].values.tolist(), y_val['seasonal_vaccine'].values.tolist()
    validation_score = get_scores(h1n1_true, h1n1_preds, seasonal_true, seasonal_preds)
    print(f'Validation Accuracy: {validation_score}')