Beispiel #1
0
    raw_test_prob = model.predict(test_df)
    test_pred = np.expm1(raw_test_prob)

    return results.history, test_pred, val_idx, raw_val_prob, raw_test_prob


if __name__ == '__main__':
    start_time = time.time()
    preprocess_dict = preprocess_csv()
    (train_ids, test_ids, processed_train_df, processed_test_df) = [
        preprocess_dict[key] for key in
        ['train_ids', 'test_ids', 'processed_train', 'processed_test']
    ]

    folds = split_to_folds(processed_train_df,
                           num_folds=NUM_FOLDS,
                           seed=SEED,
                           shuffle=False)
    all_fold_results = []
    all_fold_preds = []

    with Pool(NUM_FOLDS) as p:
        combined_results = p.starmap(
            train_fold, ((curr_fold, processed_train_df, processed_test_df)
                         for curr_fold in folds))

    losses = np.mean([x[0]['loss'] for x in combined_results], axis=0)
    val_losses = np.mean([x[0]['val_loss'] for x in combined_results], axis=0)
    print('Loss: {}'.format(losses))
    print('Val loss: {}'.format(val_losses))
    plt.plot(losses, label='train_losses')
    plt.plot(val_losses, label='val_losses')
                        validation_data=(val_x, val_y),
                        epochs=NUM_EPOCHS)

    test_pred = model.predict(test_df)

    return model, results.history, test_pred


if __name__ == '__main__':
    preprocess_dict = preprocess_csv()
    (train_ids, test_ids, processed_train_df, processed_test_df) = [
        preprocess_dict[key] for key in
        ['train_ids', 'test_ids', 'processed_train', 'processed_test']
    ]

    folds = split_to_folds(processed_train_df, NUM_FOLDS, SEED)
    all_fold_results = []
    all_fold_preds = []
    for curr_fold in folds:
        _, curr_results, curr_pred = train_fold(
            build_dense_model(num_features=processed_train_df.shape[1] - 1),
            curr_fold, processed_train_df, processed_test_df)

        all_fold_results.append(curr_results)
        all_fold_preds.append(curr_pred)

    print('Val loss: {}'.format(
        np.mean([x['val_loss'] for x in all_fold_results], axis=0)))
    print('Val RMSLE: {}'.format(
        np.mean([x['val_rmsle'] for x in all_fold_results], axis=0)))