'n_estimators': sp_randint(10, 100),
    'max_depth': sp_randint(10, 90),
    'min_samples_split': sp_randint(100, 1000),
    'random_state': sp_randint(0, 999),
}
rf_grid = RandomizedSearchCV(estimator=clf,
                             param_distributions=param_grid,
                             n_iter=100,
                             cv=5,
                             random_state=42)

print('Training random forest on all predictions...')
ensemble_train = np.vstack([
    y_train_pred1, y_train_pred2, y_train_pred3, y_train_pred4, y_train_pred5
]).T
rf_grid.fit(ensemble_train, y_train_full)

print('Best hyperparameters:', rf_grid.best_params_)
best_rf = rf_grid.best_estimator_
print('Feature importances:', best_rf.feature_importances_)

ensemble_test = np.vstack(
    [y_test_pred1, y_test_pred2, y_test_pred3, y_test_pred4, y_test_pred5]).T
final_preds = best_rf.predict(ensemble_test)

final_preds[final_preds == 0] = -1
print('Final prediction mean:', final_preds.mean())

create_csv_submission(np.arange(1, 10001), final_preds, 'kaggle_ensemble.csv')
print("Kaggle csv saved!")
Beispiel #2
0
print('Finished Training')

# Evaluate accuracy of predictions from validation data
cnn.eval()
accuracy = 0
nb_steps = 0
step_size = 100  # Calculate in steps, since GPU memory might be too small for whole testset
for i in range(0, len(x_val), step_size):
    val_output = cnn(
        Variable(
            get_tweets_tensor(x_val[i:i + step_size], word_vectors,
                              vector_length).cuda()))
    y_val_pred = torch.max(val_output.cpu(), 1)[1].data.numpy().squeeze()
    accuracy += accuracy_score(y_val[i:i + step_size], y_val_pred)
    nb_steps += 1

print('Validation accuracy:', accuracy / nb_steps)

### Step 5: Make predictions and save model ###

cnn.cpu()
torch.save(cnn, './ensemble_models/model5.pth')

test_output = cnn(
    Variable(get_tweets_tensor(x_text_test_pad, word_vectors, vector_length)))
y_pred = torch.max(test_output, 1)[1].data.numpy().squeeze()
y_pred[y_pred == 0] = -1
ids = np.arange(len(y_pred) + 1)[1:]
create_csv_submission(ids, y_pred, 'kaggle_model_5.csv')
Beispiel #3
0
        if (i+1) % 1000 == 0:
            print('Epoch [%d/%d], Iter [%d/%d] Loss: %.4f'
                  %(epoch+1, num_epochs, i+1, len(train_dataset)//batch_size, loss.data[0]))

print('Finished Training')

# Evaluate accuracy of predictions from validation data
cnn.eval()
accuracy = 0
nb_steps = 0
step_size = 100 # Calculate in steps, since GPU memory might be too small for whole testset
for i in range(0, len(x_val), step_size):
    val_output = cnn(Variable(get_tweets_tensor(x_val[i:i+step_size], word_vectors, vector_length).cuda()))
    y_val_pred = torch.max(val_output.cpu(), 1)[1].data.numpy().squeeze()
    accuracy += accuracy_score(y_val[i:i+step_size], y_val_pred)
    nb_steps += 1

print('Validation accuracy:', accuracy/nb_steps)

### Step 5: Make predictions and save model ###

cnn.cpu()
torch.save(cnn, './sentiment_models/sentiment_v5.pth')

test_output = cnn(Variable(get_tweets_tensor(x_text_test_pad, word_vectors, vector_length)))
y_pred = torch.max(test_output, 1)[1].data.numpy().squeeze()
y_pred[y_pred == 0] = -1
ids = np.arange(len(y_pred)+1)[1:]
create_csv_submission(ids, y_pred, 'kaggle_sentiment_v5.csv')