def main(args): if args.model == 'LSTM': model = LSTM(input_dim=args.input_dim, lstm_hidden_dim=args.lstm_hidden_dim, time_step=args.time_step) else: raise ValueError model.cuda() if args.optim == 'SGD': optimizer = torch.optim.SGD(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) elif args.optim == 'SGD_momentum': optimizer = torch.optim.SGD(model.parameters(), lr=args.lr, momentum=0.9, weight_decay=args.weight_decay) elif args.optim == 'Adagrad': optimizer = torch.optim.Adagrad(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) elif args.optim == 'RMSprop': optimizer = torch.optim.RMSprop(model.parameters(), lr=args.lr, alpha=0.999, eps=1e-8, weight_decay=args.weight_decay) elif args.optim == 'Adam': optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, betas=(0.9, 0.999), eps=1e-8, weight_decay=args.weight_decay) else: raise ValueError lr_scheduler = None if args.load_path: if args.recover: load_model(model, args.load_path, strict=True) print('load model state dict in {}'.format(args.load_path)) map_file_path = 'divide.csv' data_file_path = 'processed_data.txt' social_economical_path = '2010-2016.csv' if args.dataset == 'NaiveDataset': train_set = NaiveDataset(data_file_path, map_file_path) elif args.dataset == 'AdvancedDataset': train_set = AdvancedDataset(data_file_path, map_file_path, social_economical_path) else: raise ValueError train_dataloader = DataLoader(train_set, batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers, pin_memory=True) if args.evaluate: validate(train_dataloader, model) return train(train_dataloader, train_dataloader, model, optimizer, lr_scheduler, args)
def fit(args): exp_name = get_experiment_name(args) logging_path = os.path.join(args.save_path, exp_name) + ".log" logging.basicConfig(filename=logging_path, level=logging.INFO, format="%(message)s") seed_everything(args.seed) label_map = load_label_map(args.dataset) if args.use_cnn: train_dataset = FeaturesDatset( features_dir=os.path.join(args.data_dir, f"{args.dataset}_train_features"), label_map=label_map, mode="train", ) val_dataset = FeaturesDatset( features_dir=os.path.join(args.data_dir, f"{args.dataset}_val_features"), label_map=label_map, mode="val", ) else: train_dataset = KeypointsDataset( keypoints_dir=os.path.join(args.data_dir, f"{args.dataset}_train_keypoints"), use_augs=args.use_augs, label_map=label_map, mode="train", max_frame_len=169, ) val_dataset = KeypointsDataset( keypoints_dir=os.path.join(args.data_dir, f"{args.dataset}_val_keypoints"), use_augs=False, label_map=label_map, mode="val", max_frame_len=169, ) train_dataloader = data.DataLoader( train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=4, pin_memory=True, ) val_dataloader = data.DataLoader( val_dataset, batch_size=args.batch_size, shuffle=False, num_workers=4, pin_memory=True, ) n_classes = 50 if args.dataset == "include": n_classes = 263 if args.model == "lstm": config = LstmConfig() if args.use_cnn: config.input_size = CnnConfig.output_dim model = LSTM(config=config, n_classes=n_classes) else: config = TransformerConfig(size=args.transformer_size) if args.use_cnn: config.input_size = CnnConfig.output_dim model = Transformer(config=config, n_classes=n_classes) model = model.to(device) optimizer = torch.optim.AdamW(model.parameters(), lr=args.learning_rate, weight_decay=0.01) scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode="max", factor=0.2) if args.use_pretrained == "resume_training": model, optimizer, scheduler = load_pretrained(args, n_classes, model, optimizer, scheduler) model_path = os.path.join(args.save_path, exp_name) + ".pth" es = EarlyStopping(patience=15, mode="max") for epoch in range(args.epochs): print(f"Epoch: {epoch+1}/{args.epochs}") train_loss, train_acc = train(train_dataloader, model, optimizer, device) val_loss, val_acc = validate(val_dataloader, model, device) logging.info( "Epoch: {}, train loss: {}, train acc: {}, val loss: {}, val acc: {}" .format(epoch + 1, train_loss, train_acc, val_loss, val_acc)) scheduler.step(val_acc) es( model_path=model_path, epoch_score=val_acc, model=model, optimizer=optimizer, scheduler=scheduler, ) if es.early_stop: print("Early stopping") break print("### Training Complete ###")
if __name__ == "__main__": train_dataset_dict, test_dataset_dict = load_ECG_dataset(root_dir) train_dataset, validation_dataset = split_dataset(train_dataset_dict, val_num=100, seed=0) train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True) val_dataloader = torch.utils.data.DataLoader(validation_dataset, batch_size=16, shuffle=False) dataloaders_dict = {"train": train_dataloader, "val": val_dataloader} model = LSTM(num_classes, input_size, hidden_size, num_layers, device) model = model.to(device) criterion = nn.CrossEntropyLoss().to(device) optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate * 1e-3) exp_lr_scheduler = lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.9) trained_model = train_model(model, dataloaders_dict, criterion, optimizer, exp_lr_scheduler, device, num_epochs, stopping_epoch, savedirpath=out)
# Model and optimizer stop = False# while(not stop):# if(args.model=="LSTM"): model = LSTM(nfeat=lstm_features, nhid=args.hidden, n_nodes=n_nodes, window=args.window, dropout=args.dropout,batch_size = args.batch_size, recur=args.recur).to(device) elif(args.model=="MPNN_LSTM"): model = MPNN_LSTM(nfeat=nfeat, nhid=args.hidden, nout=1, n_nodes=n_nodes, window=args.graph_window, dropout=args.dropout).to(device) elif(args.model=="MPNN"): model = MPNN(nfeat=nfeat, nhid=args.hidden, nout=1, dropout=args.dropout).to(device) optimizer = optim.Adam(model.parameters(), lr=args.lr) scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=10) #------------------- Train best_val_acc= 1e8 val_among_epochs = [] train_among_epochs = [] stop = False for epoch in range(args.epochs): start = time.time() model.train() train_loss = AverageMeter() # Train for one epoch
# TODO: n_vocab === train set model = LSTM(input_size=flags.embedding_size, hidden_size=flags.hidden_size, output_size=dataset.vocabulary.vocab_size) device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') model.to(device) # hidden state and cell state (needed for LSTM) state_h, state_c = model.zero_state(flags.batch_size) state_h = state_h.to(device) state_c = state_c.to(device) criterion = nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters(), lr=flags.learning_rate) epoch = 1 iterator = 0 for i in range(flags.epochs * flags.max_batch): if iterator >= epoch * flags.max_batch: epoch += 1 state_h, state_c = model.zero_state(flags.batch_size) state_h = state_h.to(device) state_c = state_c.to(device) iterator += 1 inputs, targets = dataset() model.train()
RESULTS = [] MARGINS = [0.2] MAX_EPOCHS = 50 BATCH_SIZE = 32 FILTER_WIDTHS = [3] POOL_METHOD = "average" FEATURE_DIMS = [667] DROPOUT_PS = [0.1] NUM_HIDDEN_UNITS = [240] LEARNING_RATES = [1E-3] MODELS = [] LSTM_HYPERPARAMETERS = itertools.product(MARGINS, NUM_HIDDEN_UNITS, LEARNING_RATES) for margin, num_hidden_units, learning_rate in LSTM_HYPERPARAMETERS: model = LSTM(EMBEDDINGS, num_hidden_units, POOL_METHOD, CUDA) criterion = helpers.MaxMarginLoss(margin) parameters = filter(lambda p: p.requires_grad, model.parameters()) optimizer = torch.optim.Adam(parameters, lr=learning_rate) model, mrr = train_utils.train_model(model, optimizer, criterion, ASK_UBUNTU_DATA, \ MAX_EPOCHS, BATCH_SIZE, CUDA, eval_data=ANDROID_DATA) torch.save(model.state_dict(), "./lstm_" + str(margin) + "_" + str(num_hidden_units) + "_" + str(learning_rate) + "_" + "auc=" + str(mrr)) MODELS.append((mrr, margin, num_hidden_units, learning_rate)) ############################################################################## # Train models by adverserial domain adaptation and evaluate ############################################################################## MAX_EPOCHS = 50 BATCH_SIZE = 32
hidden_size = 1024 lr = 0.001 # list of uppercase letters to init bandnames uppers = string.ascii_uppercase rnn_type = str(sys.argv[1]) if rnn_type == 'milstm': decoder = miLSTM(n_characters, hidden_size, 64, n_characters) elif rnn_type == 'lstm': decoder = LSTM(n_characters, hidden_size, 64, n_characters) else: decoder = LN_miLSTM(n_characters, hidden_size, 64, n_characters) decoder_optimizer = torch.optim.Adam(decoder.parameters(), lr=lr) criterion = nn.CrossEntropyLoss() decoder.cuda() death_metal_bands = pd.read_csv('../data/death-metal/bands.csv') band_raw = death_metal_bands['name'].tolist() band_nms = [] for i, bnd in enumerate(band_raw): band_nms.append(bnd + '<EOS>') print('Found', len(band_nms), 'bands!')
def main(): global args, best_auc args = parser.parse_args() cuda_available = torch.cuda.is_available() print args embedding_file = 'data/glove/glove.pruned.txt.gz' embedding_iter = Embedding.iterator(embedding_file) embed_size = 300 embedding = Embedding(embed_size, embedding_iter) print 'Embeddings loaded.' android_corpus_file = 'data/android/corpus.tsv.gz' android_dataset = AndroidDataset(android_corpus_file) android_corpus = android_dataset.get_corpus() android_ids = embedding.corpus_to_ids(android_corpus) print 'Got Android corpus ids.' ubuntu_corpus_file = 'data/askubuntu/text_tokenized.txt.gz' ubuntu_dataset = UbuntuDataset(ubuntu_corpus_file) ubuntu_corpus = ubuntu_dataset.get_corpus() ubuntu_ids = embedding.corpus_to_ids(ubuntu_corpus) print 'Got AskUbuntu corpus ids.' padding_id = embedding.vocab_ids['<padding>'] ubuntu_train_file = 'data/askubuntu/train_random.txt' ubuntu_train_data = ubuntu_dataset.read_annotations(ubuntu_train_file) dev_pos_file = 'data/android/dev.pos.txt' dev_neg_file = 'data/android/dev.neg.txt' android_dev_data = android_dataset.read_annotations( dev_pos_file, dev_neg_file) android_dev_batches = batch_utils.generate_eval_batches( android_ids, android_dev_data, padding_id) assert args.model in ['lstm', 'cnn'] if args.model == 'lstm': model_encoder = LSTM(embed_size, args.hidden) else: model_encoder = CNN(embed_size, args.hidden) model_classifier = FFN(args.hidden) print model_encoder print model_classifier optimizer_encoder = torch.optim.Adam(model_encoder.parameters(), lr=args.elr) criterion_encoder = nn.MultiMarginLoss(margin=args.margin) optimizer_classifier = torch.optim.Adam(model_classifier.parameters(), lr=args.clr) criterion_classifier = nn.CrossEntropyLoss() if cuda_available: criterion_encoder = criterion_encoder.cuda() criterion_classifier = criterion_classifier.cuda() if args.load: if os.path.isfile(args.load): print 'Loading checkpoint.' checkpoint = torch.load(args.load) args.start_epoch = checkpoint['epoch'] best_auc = checkpoint.get('best_auc', -1) model_encoder.load_state_dict(checkpoint['encoder_state_dict']) model_classifier.load_state_dict( checkpoint['classifier_state_dict']) print 'Loaded checkpoint at epoch {}.'.format(checkpoint['epoch']) else: print 'No checkpoint found here.' if args.eval: test_pos_file = 'data/android/test.pos.txt' test_neg_file = 'data/android/test.neg.txt' android_test_data = android_dataset.read_annotations( test_pos_file, test_neg_file) android_test_batches = batch_utils.generate_eval_batches( android_ids, android_test_data, padding_id) print 'Evaluating on dev set.' train_utils.evaluate_auc(args, model_encoder, embedding, android_dev_batches, padding_id) print 'Evaluating on test set.' train_utils.evaluate_auc(args, model_encoder, embedding, android_test_batches, padding_id) return for epoch in xrange(args.start_epoch, args.epochs): encoder_train_batches = batch_utils.generate_train_batches( ubuntu_ids, ubuntu_train_data, args.batch_size, padding_id) classifier_train_batches = \ batch_utils.generate_classifier_train_batches( ubuntu_ids, android_ids, args.batch_size, len(encoder_train_batches), padding_id) train_utils.train_encoder_classifer( args, model_encoder, model_classifier, embedding, optimizer_encoder, optimizer_classifier, criterion_encoder, criterion_classifier, zip(encoder_train_batches, classifier_train_batches), padding_id, epoch, args.lmbda) auc = train_utils.evaluate_auc(args, model_encoder, embedding, android_dev_batches, padding_id) is_best = auc > best_auc best_auc = max(auc, best_auc) save( args, { 'epoch': epoch + 1, 'arch': 'lstm', 'encoder_state_dict': model_encoder.state_dict(), 'classifier_state_dict': model_classifier.state_dict(), 'best_auc': best_auc, }, is_best)
'loss_train: {:.4f}'.format(train_loss / count), 'time: {:.4f}s'.format(time.time() - t)) # Stores DeepSets model into disk torch.save( { 'state_dict': deepsets.state_dict(), 'optimizer': optimizer.state_dict(), }, 'model_deepsets.pth.tar') print("Finished training for DeepSets model") print() # Initializes LSTM model and optimizer lstm = LSTM(n_digits, embedding_dim, hidden_dim).to(device) optimizer = optim.Adam(lstm.parameters(), lr=learning_rate) loss_function = nn.L1Loss() # Trains the LSTM model for epoch in range(epochs): t = time.time() lstm.train() train_loss = 0 count = 0 idx = np.random.permutation(n_train) for i in range(0, n_train, batch_size): ############## Task 5 ##################
def train_with_lstm(self): x_train, y_train, x_test, y_test = split_data( self.data, self.input_params['lookback']) print('x_train.shape = ', x_train.shape) print('y_train.shape = ', y_train.shape) print('x_test.shape = ', x_test.shape) print('y_test.shape = ', y_test.shape) x_train = torch.from_numpy(x_train).type(torch.Tensor) x_test = torch.from_numpy(x_test).type(torch.Tensor) y_train_lstm = torch.from_numpy(y_train).type(torch.Tensor) y_test_lstm = torch.from_numpy(y_test).type(torch.Tensor) y_train_gru = torch.from_numpy(y_train).type(torch.Tensor) y_test_gru = torch.from_numpy(y_test).type(torch.Tensor) model = LSTM(input_dim=self.input_params['input_dim'], hidden_dim=self.input_params['hidden_dim'], output_dim=self.input_params['output_dim'], num_layers=self.input_params['num_layers']) criterion = torch.nn.MSELoss(reduction='mean') # optimiser = torch.optim.Adam(model.parameters(), lr=0.01) # Adam 一种可以替代传统随机梯度下降过程的一阶优化算法,它能基于训练数据迭代地更新神经网络权重 optimiser = torch.optim.Adam(model.parameters(), lr=self.input_params['lr']) hist = np.zeros(self.input_params['num_epochs']) start_time = time.time() lstm = [] # 随机梯度下降 for t in range(self.input_params['num_epochs']): y_train_pred = model(x_train) loss = criterion(y_train_pred, y_train_lstm) print("Epoch ", t, "MSE: ", loss.item()) hist[t] = loss.item() # 将模型的参数梯度初始化为 0 optimiser.zero_grad() # 反向传播计算梯度 loss.backward() # 更新所有参数 optimiser.step() training_time = time.time() - start_time print("Training time: {}".format(training_time)) # 将标准化后的数据转换为原始数据 predict = pd.DataFrame( self.scaler.inverse_transform(y_train_pred.detach().numpy())) original = pd.DataFrame( self.scaler.inverse_transform(y_train_lstm.detach().numpy())) print(predict) fig = plt.figure() # 调整子图布局 fig.subplots_adjust(hspace=0.2, wspace=0.2) # 股票价格 plt.subplot(1, 2, 1) ax = sns.lineplot(x=original.index, y=original[0], label="Data", color='royalblue') ax = sns.lineplot(x=predict.index, y=predict[0], label="Training Prediction (LSTM)", color='tomato') ax.set_title('Stock price', size=14, fontweight='bold') ax.set_xlabel("Days", size=14) ax.set_ylabel("Cost (USD)", size=14) ax.set_xticklabels('', size=10) plt.show() # # 训练损失 # plt.subplot(1, 2, 2) # print(hist) # ax = sns.lineplot(data=hist, color='royalblue') # ax.set_xlabel("Epoch", size=14) # ax.set_ylabel("Loss", size=14) # ax.set_title("Training Loss", size=14, fontweight='bold') # fig.set_figheight(6) # fig.set_figwidth(16) # fig.show() # > 数据预测 # make predictions y_test_pred = model(x_test) # invert predictions # X = scaler.inverse_transform(X[, copy]) 将标准化后的数据转换为原始数据 y_train_pred = self.scaler.inverse_transform( y_train_pred.detach().numpy()) y_train = self.scaler.inverse_transform(y_train_lstm.detach().numpy()) y_test_pred = self.scaler.inverse_transform( y_test_pred.detach().numpy()) y_test = self.scaler.inverse_transform(y_test_lstm.detach().numpy()) # mean_squared_error 均方误差 trainScore = math.sqrt( mean_squared_error(y_train[:, 0], y_train_pred[:, 0])) print('Train Score: %.2f RMSE' % (trainScore)) testScore = math.sqrt( mean_squared_error(y_test[:, 0], y_test_pred[:, 0])) print('Test Score: %.2f RMSE' % (testScore)) lstm.append(trainScore) lstm.append(testScore) lstm.append(training_time) # > train and test # empty_like 生成和已有数组相同大小,类型的数组 trainPredictPlot = np.empty_like(self.data) trainPredictPlot[:, :] = np.nan trainPredictPlot[self.input_params['lookback']:len(y_train_pred) + self.input_params['lookback'], :] = y_train_pred # shift test predictions for plotting testPredictPlot = np.empty_like(self.data) testPredictPlot[:, :] = np.nan testPredictPlot[len(y_train_pred) + self.input_params['lookback'] - 1:len(self.data) - 1, :] = y_test_pred original = self.scaler.inverse_transform( self.data['Close'].values.reshape(-1, 1)) predictions = np.append(trainPredictPlot, testPredictPlot, axis=1) predictions = np.append(predictions, original, axis=1) result = pd.DataFrame(predictions) # >> fig fig = go.Figure() fig.add_trace( go.Scatter( go.Scatter(x=result.index, y=result[0], mode='lines', name='Train prediction'))) fig.add_trace( go.Scatter(x=result.index, y=result[1], mode='lines', name='Test prediction')) fig.add_trace( go.Scatter( go.Scatter(x=result.index, y=result[2], mode='lines', name='Actual Value'))) fig.update_layout(xaxis=dict(showline=True, showgrid=True, showticklabels=False, linecolor='white', linewidth=2), yaxis=dict( title_text='Close (USD)', titlefont=dict( family='Rockwell', size=12, color='white', ), showline=True, showgrid=True, showticklabels=True, linecolor='white', linewidth=2, ticks='outside', tickfont=dict( family='Rockwell', size=12, color='white', ), ), showlegend=True, template='plotly_dark') annotations = [] annotations.append( dict(xref='paper', yref='paper', x=0.0, y=1.05, xanchor='left', yanchor='bottom', text='Results (LSTM)', font=dict(family='Rockwell', size=26, color='white'), showarrow=False)) fig.update_layout(annotations=annotations) fig.show() # py.iplot(fig, filename='stock_prediction_lstm') return lstm
def main(): global args, best_auc args = parser.parse_args() cuda_available = torch.cuda.is_available() print args embedding_file = 'data/glove/glove.pruned.txt.gz' embedding_iter = Embedding.iterator(embedding_file) embed_size = 300 embedding = Embedding(embed_size, embedding_iter) print 'Embeddings loaded.' android_corpus_file = 'data/android/corpus.tsv.gz' android_dataset = AndroidDataset(android_corpus_file) android_corpus = android_dataset.get_corpus() android_ids = embedding.corpus_to_ids(android_corpus) print 'Got Android corpus ids.' ubuntu_corpus_file = 'data/askubuntu/text_tokenized.txt.gz' ubuntu_dataset = UbuntuDataset(ubuntu_corpus_file) ubuntu_corpus = ubuntu_dataset.get_corpus() ubuntu_ids = embedding.corpus_to_ids(ubuntu_corpus) print 'Got AskUbuntu corpus ids.' padding_id = embedding.vocab_ids['<padding>'] dev_pos_file = 'data/android/dev.pos.txt' dev_neg_file = 'data/android/dev.neg.txt' android_dev_data = android_dataset.read_annotations( dev_pos_file, dev_neg_file) android_dev_batches = batch_utils.generate_eval_batches( android_ids, android_dev_data, padding_id) assert args.model in ['lstm', 'cnn'] if os.path.isfile(args.load): checkpoint = torch.load(args.load) else: print 'No checkpoint found here.' return if args.model == 'lstm': encoder_src = LSTM(embed_size, args.hidden) encoder_tgt = LSTM(embed_size, args.hidden) else: encoder_src = CNN(embed_size, args.hidden) encoder_tgt = CNN(embed_size, args.hidden) encoder_src.load_state_dict(checkpoint['state_dict']) encoder_src.eval() model_discrim = FFN(args.hidden) print encoder_src print encoder_tgt print model_discrim criterion = nn.CrossEntropyLoss() if cuda_available: criterion = criterion.cuda() betas = (0.5, 0.999) weight_decay = 1e-4 optimizer_tgt = torch.optim.Adam(encoder_tgt.parameters(), lr=args.elr, betas=betas, weight_decay=weight_decay) optimizer_discrim = torch.optim.Adam(model_discrim.parameters(), lr=args.dlr, betas=betas, weight_decay=weight_decay) for epoch in xrange(args.start_epoch, args.epochs): train_batches = \ batch_utils.generate_classifier_train_batches( ubuntu_ids, android_ids, args.batch_size, args.batch_count, padding_id) train_utils.train_adda(args, encoder_src, encoder_tgt, model_discrim, embedding, optimizer_tgt, optimizer_discrim, criterion, train_batches, padding_id, epoch) auc = train_utils.evaluate_auc(args, encoder_tgt, embedding, android_dev_batches, padding_id) is_best = auc > best_auc best_auc = max(auc, best_auc) save( args, { 'epoch': epoch + 1, 'arch': 'lstm', 'encoder_tgt_state_dict': encoder_tgt.state_dict(), 'discrim_state_dict': model_discrim.state_dict(), 'best_auc': best_auc, }, is_best)
def main(): global args, best_mrr, best_auc args = parser.parse_args() cuda_available = torch.cuda.is_available() print args corpus_file = 'data/askubuntu/text_tokenized.txt.gz' dataset = UbuntuDataset(corpus_file) corpus = dataset.get_corpus() if args.embedding == 'askubuntu': embedding_file = 'data/askubuntu/vector/vectors_pruned.200.txt.gz' else: embedding_file = 'data/glove/glove.pruned.txt.gz' embedding_iter = Embedding.iterator(embedding_file) embedding = Embedding(args.embed, embedding_iter) print 'Embeddings loaded.' corpus_ids = embedding.corpus_to_ids(corpus) padding_id = embedding.vocab_ids['<padding>'] train_file = 'data/askubuntu/train_random.txt' train_data = dataset.read_annotations(train_file) dev_file = 'data/askubuntu/dev.txt' dev_data = dataset.read_annotations(dev_file, max_neg=-1) dev_batches = batch_utils.generate_eval_batches(corpus_ids, dev_data, padding_id) assert args.model in ['lstm', 'cnn'] if args.model == 'lstm': model = LSTM(args.embed, args.hidden) else: model = CNN(args.embed, args.hidden) print model print 'Parameters: {}'.format(params(model)) optimizer = torch.optim.Adam(model.parameters(), args.lr) criterion = nn.MultiMarginLoss(margin=args.margin) if cuda_available: criterion = criterion.cuda() if args.load: if os.path.isfile(args.load): print 'Loading checkpoint.' checkpoint = torch.load(args.load) args.start_epoch = checkpoint['epoch'] best_mrr = checkpoint.get('best_mrr', -1) best_auc = checkpoint.get('best_auc', -1) model.load_state_dict(checkpoint['state_dict']) print 'Loaded checkpoint at epoch {}.'.format(checkpoint['epoch']) else: print 'No checkpoint found here.' if args.eval: test_file = 'data/askubuntu/test.txt' test_data = dataset.read_annotations(test_file, max_neg=-1) test_batches = batch_utils.generate_eval_batches( corpus_ids, test_data, padding_id) print 'Evaluating on dev set.' train_utils.evaluate_metrics(args, model, embedding, dev_batches, padding_id) print 'Evaluating on test set.' train_utils.evaluate_metrics(args, model, embedding, test_batches, padding_id) return if args.android: android_file = 'data/android/corpus.tsv.gz' android_dataset = AndroidDataset(android_file) android_ids = embedding.corpus_to_ids(android_dataset.get_corpus()) dev_pos_file = 'data/android/dev.pos.txt' dev_neg_file = 'data/android/dev.neg.txt' android_data = android_dataset.read_annotations( dev_pos_file, dev_neg_file) android_batches = batch_utils.generate_eval_batches( android_ids, android_data, padding_id) for epoch in xrange(args.start_epoch, args.epochs): train_batches = batch_utils.generate_train_batches( corpus_ids, train_data, args.batch_size, padding_id) train_utils.train(args, model, embedding, optimizer, criterion, train_batches, padding_id, epoch) map, mrr, p1, p5 = train_utils.evaluate_metrics( args, model, embedding, dev_batches, padding_id) auc = -1 if args.android: auc = train_utils.evaluate_auc(args, model, embedding, android_batches, padding_id) is_best = auc > best_auc if args.android else mrr > best_mrr best_mrr = max(mrr, best_mrr) best_auc = max(auc, best_auc) save( args, { 'epoch': epoch + 1, 'arch': 'lstm', 'state_dict': model.state_dict(), 'best_mrr': best_mrr, 'best_auc': best_auc, }, is_best)