from attention_decoder import AttentionDecoderRNN from encoder import EncoderRNN from language import Language from torch.autograd import Variable import torch.nn as nn import torch.optim as optim import random # Parse argument for input sentence # parser = argparse.ArgumentParser() # parser.add_argument('language') # parser.add_argument('input') # args = parser.parse_args() language = 'spa-eng' helpers.validate_language_params(language) input_lang, output_lang, pairs = etl.prepare_data(language) attn_model = 'general' hidden_size = 500 n_layers = 2 dropout_p = 0.05 teacher_forcing_ratio = .5 clip = 5. criterion = nn.NLLLoss() # Initialize models encoder = EncoderRNN(input_lang.n_words, hidden_size, n_layers) decoder = AttentionDecoderRNN(attn_model, hidden_size, output_lang.n_words, n_layers,
import numpy as np from etl import prepare_data, prepare_submission from sklearn.feature_selection import SelectFromModel from sklearn.metrics import mean_absolute_error from sklearn.model_selection import train_test_split from xgboost import XGBRegressor # load and split train/dev/test (X_train, y_train), (X_test, test_id) = prepare_data() X_train, X_dev, y_train, y_dev = train_test_split(X_train, y_train, test_size=0.2, random_state=69) # run without hyperparams for fscore calculations model = XGBRegressor() model.fit(X_train, y_train) y_hat = model.predict(X_dev) mae = mean_absolute_error(np.expm1(y_dev), np.expm1(y_hat)) print("Mae: {}".format(mae)) thresholds = np.sort(model.feature_importances_) thresholds = np.unique(thresholds) threshold = 0 best_mae = mae for thresh in thresholds[:50]: selection = SelectFromModel(model, threshold=thresh, prefit=True) select_X_train = selection.transform(X_train) # train model selection_model = XGBRegressor() selection_model.fit(select_X_train, y_train)