Esempio n. 1
0
                        validation_data=(val_x, val_y),
                        epochs=NUM_EPOCHS,
                        verbose=0,
                        callbacks=[ExponentialMovingAverage()])

    test_df = std_scaler.transform(test_df)
    raw_val_prob = model.predict(val_x)
    raw_test_prob = model.predict(test_df)
    test_pred = np.expm1(raw_test_prob)

    return results.history, test_pred, val_idx, raw_val_prob, raw_test_prob


if __name__ == '__main__':
    start_time = time.time()
    preprocess_dict = preprocess_csv()
    (train_ids, test_ids, processed_train_df, processed_test_df) = [
        preprocess_dict[key] for key in
        ['train_ids', 'test_ids', 'processed_train', 'processed_test']
    ]

    folds = split_to_folds(processed_train_df,
                           num_folds=NUM_FOLDS,
                           seed=SEED,
                           shuffle=False)
    all_fold_results = []
    all_fold_preds = []

    with Pool(NUM_FOLDS) as p:
        combined_results = p.starmap(
            train_fold, ((curr_fold, processed_train_df, processed_test_df)
                test_df_1.astype('float32') *
                np.random.normal(1., scale=(0.1 / 0.9), size=test_df_1.shape),
                test_rolling.astype('float32') * np.random.normal(
                    1., scale=(0.1 / 0.9), size=test_rolling.shape)
            ])[0])
    raw_test_prob = np.mean(np.stack(agg_test_prob, axis=0), axis=0)
    test_pred = np.expm1(np.dot(raw_test_prob, supports))

    return results.history, test_pred, val_idx, raw_val_prob, raw_test_prob


if __name__ == '__main__':
    start_time = time.time()
    preprocess_dict = preprocess_csv(rolling_macro={
        'min_unique': 20,
        'lookback_period': 12,
        'monthly_resampling': True
    })

    (train_ids, test_ids, processed_train_df, processed_test_df, train_rolling,
     test_rolling) = [
         preprocess_dict[key] for key in [
             'train_ids', 'test_ids', 'processed_train', 'processed_test',
             'train_rolling', 'test_rolling'
         ]
     ]

    # generate distribution labels
    generate_target_partial = partial(generate_target_dist,
                                      num_bins=NUM_BINS,
                                      low=LOW,