def train_nn(n_epochs=2, log_interval=10): df = pd.read_csv('./data/df_super.csv') train_df, val_df, test_df = data.data_split(data.create_nn_dataset(df)) model = LSTM(output_size=64) model, history = train_loop(model, train_df, val_df, n_epochs=n_epochs, log_interval=log_interval) torch.save({'model_state_dict': model.state_dict()}, 'nn.hdf5')
inputs, targets = dataset() model.train() optimizer.zero_grad() x = torch.tensor(inputs).to(device) y = torch.tensor(targets).to(device) logits, (state_h, state_c) = model(x, (state_h, state_c)) loss = criterion(logits.transpose(1, 2), y) loss_value = loss.item() loss.backward() state_h = state_h.detach() state_c = state_c.detach() torch.nn.utils.clip_grad_norm_(model.parameters(), flags.gradients_norm) optimizer.step() if iterator % 100 == 0: print('Epoch: {}/{}'.format(epoch, flags.epochs), 'Iteration: {}'.format(iterator), 'Loss: {}'.format(loss_value)) if iterator % (epoch * flags.max_batch) == 0: predict(device, model, dataset.vocabulary, top_k=5) torch.save(model.state_dict(), 'LSTM-word-level/states_testing/epoch-{}.pth'.format(epoch))
break if( epoch>args.early_stop): if(len(set([round(val_e) for val_e in val_among_epochs[-50:]])) == 1):# print("break") #stop = True break stop = True #--------- Remember best accuracy and save checkpoint if val_loss < best_val_acc: best_val_acc = val_loss torch.save({ 'state_dict': model.state_dict(), 'optimizer' : optimizer.state_dict(), }, 'model_best.pth.tar') scheduler.step(val_loss) print("validation") #print(best_val_acc) #---------------- Testing test_loss = AverageMeter() #print("Loading checkpoint!") checkpoint = torch.load('model_best.pth.tar') model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer'])
loss_avg += loss if chunk_counter % print_every == 0: print( 'Epoch:', epoch, 'Loss:', np.round(loss_avg / print_every, decimals=3), '%:', np.round(chunk_counter / float(len(band_nms)), decimals=3)) rand_char = uppers[np.random.permutation(len(uppers))[0]] print(evaluate(rand_char, 100), '\n') if chunk_counter % plot_every == 0: all_losses.append(loss_avg / plot_every) loss_avg = 0 chunk_counter += 1 for param_group in decoder_optimizer.param_groups: param_group['lr'] = param_group['lr'] * 0.75 print('New Learning rate:', param_group['lr']) except KeyboardInterrupt: pass torch.save(decoder.state_dict(), '../models/' + rnn_type + '_' + str(hidden_size) + '.pth') with open('../plots/' + rnn_type + '_' + str(hidden_size) + '.pkl', 'wb') as f: pickle.dump(all_losses, f)
FILTER_WIDTHS = [3] POOL_METHOD = "average" FEATURE_DIMS = [667] DROPOUT_PS = [0.1] NUM_HIDDEN_UNITS = [240] LEARNING_RATES = [1E-3] MODELS = [] LSTM_HYPERPARAMETERS = itertools.product(MARGINS, NUM_HIDDEN_UNITS, LEARNING_RATES) for margin, num_hidden_units, learning_rate in LSTM_HYPERPARAMETERS: model = LSTM(EMBEDDINGS, num_hidden_units, POOL_METHOD, CUDA) criterion = helpers.MaxMarginLoss(margin) parameters = filter(lambda p: p.requires_grad, model.parameters()) optimizer = torch.optim.Adam(parameters, lr=learning_rate) model, mrr = train_utils.train_model(model, optimizer, criterion, ASK_UBUNTU_DATA, \ MAX_EPOCHS, BATCH_SIZE, CUDA, eval_data=ANDROID_DATA) torch.save(model.state_dict(), "./lstm_" + str(margin) + "_" + str(num_hidden_units) + "_" + str(learning_rate) + "_" + "auc=" + str(mrr)) MODELS.append((mrr, margin, num_hidden_units, learning_rate)) ############################################################################## # Train models by adverserial domain adaptation and evaluate ############################################################################## MAX_EPOCHS = 50 BATCH_SIZE = 32 MARGINS = [0.2] FILTER_WIDTH = 2 POOL_METHOD = "average" FEATURE_DIM = 240
def main(): global args, best_auc args = parser.parse_args() cuda_available = torch.cuda.is_available() print args embedding_file = 'data/glove/glove.pruned.txt.gz' embedding_iter = Embedding.iterator(embedding_file) embed_size = 300 embedding = Embedding(embed_size, embedding_iter) print 'Embeddings loaded.' android_corpus_file = 'data/android/corpus.tsv.gz' android_dataset = AndroidDataset(android_corpus_file) android_corpus = android_dataset.get_corpus() android_ids = embedding.corpus_to_ids(android_corpus) print 'Got Android corpus ids.' ubuntu_corpus_file = 'data/askubuntu/text_tokenized.txt.gz' ubuntu_dataset = UbuntuDataset(ubuntu_corpus_file) ubuntu_corpus = ubuntu_dataset.get_corpus() ubuntu_ids = embedding.corpus_to_ids(ubuntu_corpus) print 'Got AskUbuntu corpus ids.' padding_id = embedding.vocab_ids['<padding>'] ubuntu_train_file = 'data/askubuntu/train_random.txt' ubuntu_train_data = ubuntu_dataset.read_annotations(ubuntu_train_file) dev_pos_file = 'data/android/dev.pos.txt' dev_neg_file = 'data/android/dev.neg.txt' android_dev_data = android_dataset.read_annotations( dev_pos_file, dev_neg_file) android_dev_batches = batch_utils.generate_eval_batches( android_ids, android_dev_data, padding_id) assert args.model in ['lstm', 'cnn'] if args.model == 'lstm': model_encoder = LSTM(embed_size, args.hidden) else: model_encoder = CNN(embed_size, args.hidden) model_classifier = FFN(args.hidden) print model_encoder print model_classifier optimizer_encoder = torch.optim.Adam(model_encoder.parameters(), lr=args.elr) criterion_encoder = nn.MultiMarginLoss(margin=args.margin) optimizer_classifier = torch.optim.Adam(model_classifier.parameters(), lr=args.clr) criterion_classifier = nn.CrossEntropyLoss() if cuda_available: criterion_encoder = criterion_encoder.cuda() criterion_classifier = criterion_classifier.cuda() if args.load: if os.path.isfile(args.load): print 'Loading checkpoint.' checkpoint = torch.load(args.load) args.start_epoch = checkpoint['epoch'] best_auc = checkpoint.get('best_auc', -1) model_encoder.load_state_dict(checkpoint['encoder_state_dict']) model_classifier.load_state_dict( checkpoint['classifier_state_dict']) print 'Loaded checkpoint at epoch {}.'.format(checkpoint['epoch']) else: print 'No checkpoint found here.' if args.eval: test_pos_file = 'data/android/test.pos.txt' test_neg_file = 'data/android/test.neg.txt' android_test_data = android_dataset.read_annotations( test_pos_file, test_neg_file) android_test_batches = batch_utils.generate_eval_batches( android_ids, android_test_data, padding_id) print 'Evaluating on dev set.' train_utils.evaluate_auc(args, model_encoder, embedding, android_dev_batches, padding_id) print 'Evaluating on test set.' train_utils.evaluate_auc(args, model_encoder, embedding, android_test_batches, padding_id) return for epoch in xrange(args.start_epoch, args.epochs): encoder_train_batches = batch_utils.generate_train_batches( ubuntu_ids, ubuntu_train_data, args.batch_size, padding_id) classifier_train_batches = \ batch_utils.generate_classifier_train_batches( ubuntu_ids, android_ids, args.batch_size, len(encoder_train_batches), padding_id) train_utils.train_encoder_classifer( args, model_encoder, model_classifier, embedding, optimizer_encoder, optimizer_classifier, criterion_encoder, criterion_classifier, zip(encoder_train_batches, classifier_train_batches), padding_id, epoch, args.lmbda) auc = train_utils.evaluate_auc(args, model_encoder, embedding, android_dev_batches, padding_id) is_best = auc > best_auc best_auc = max(auc, best_auc) save( args, { 'epoch': epoch + 1, 'arch': 'lstm', 'encoder_state_dict': model_encoder.state_dict(), 'classifier_state_dict': model_classifier.state_dict(), 'best_auc': best_auc, }, is_best)
############## Task 5 ################## # your code here # ################## x_batch = X_train[idx[i:min(n_train, i + batch_size)], :] y_batch = y_train[idx[i:min(n_train, i + batch_size)]] x_batch = torch.LongTensor(x_batch).to(device) y_batch = torch.LongTensor(y_batch).to(device) optimizer.zero_grad() output = lstm(x_batch) loss = loss_function(output, y_batch) loss.backward() optimizer.step() train_loss += loss.item() * output.size(0) count += output.size(0) print('Epoch: {:04d}'.format(epoch + 1), 'loss_train: {:.4f}'.format(train_loss / count), 'time: {:.4f}s'.format(time.time() - t)) # Stores LSTM model into disk torch.save( { 'state_dict': lstm.state_dict(), 'optimizer': optimizer.state_dict(), }, 'model_lstm.pth.tar') print("Finished training for LSTM model")
def main(): global args, best_auc args = parser.parse_args() cuda_available = torch.cuda.is_available() print args embedding_file = 'data/glove/glove.pruned.txt.gz' embedding_iter = Embedding.iterator(embedding_file) embed_size = 300 embedding = Embedding(embed_size, embedding_iter) print 'Embeddings loaded.' android_corpus_file = 'data/android/corpus.tsv.gz' android_dataset = AndroidDataset(android_corpus_file) android_corpus = android_dataset.get_corpus() android_ids = embedding.corpus_to_ids(android_corpus) print 'Got Android corpus ids.' ubuntu_corpus_file = 'data/askubuntu/text_tokenized.txt.gz' ubuntu_dataset = UbuntuDataset(ubuntu_corpus_file) ubuntu_corpus = ubuntu_dataset.get_corpus() ubuntu_ids = embedding.corpus_to_ids(ubuntu_corpus) print 'Got AskUbuntu corpus ids.' padding_id = embedding.vocab_ids['<padding>'] dev_pos_file = 'data/android/dev.pos.txt' dev_neg_file = 'data/android/dev.neg.txt' android_dev_data = android_dataset.read_annotations( dev_pos_file, dev_neg_file) android_dev_batches = batch_utils.generate_eval_batches( android_ids, android_dev_data, padding_id) assert args.model in ['lstm', 'cnn'] if os.path.isfile(args.load): checkpoint = torch.load(args.load) else: print 'No checkpoint found here.' return if args.model == 'lstm': encoder_src = LSTM(embed_size, args.hidden) encoder_tgt = LSTM(embed_size, args.hidden) else: encoder_src = CNN(embed_size, args.hidden) encoder_tgt = CNN(embed_size, args.hidden) encoder_src.load_state_dict(checkpoint['state_dict']) encoder_src.eval() model_discrim = FFN(args.hidden) print encoder_src print encoder_tgt print model_discrim criterion = nn.CrossEntropyLoss() if cuda_available: criterion = criterion.cuda() betas = (0.5, 0.999) weight_decay = 1e-4 optimizer_tgt = torch.optim.Adam(encoder_tgt.parameters(), lr=args.elr, betas=betas, weight_decay=weight_decay) optimizer_discrim = torch.optim.Adam(model_discrim.parameters(), lr=args.dlr, betas=betas, weight_decay=weight_decay) for epoch in xrange(args.start_epoch, args.epochs): train_batches = \ batch_utils.generate_classifier_train_batches( ubuntu_ids, android_ids, args.batch_size, args.batch_count, padding_id) train_utils.train_adda(args, encoder_src, encoder_tgt, model_discrim, embedding, optimizer_tgt, optimizer_discrim, criterion, train_batches, padding_id, epoch) auc = train_utils.evaluate_auc(args, encoder_tgt, embedding, android_dev_batches, padding_id) is_best = auc > best_auc best_auc = max(auc, best_auc) save( args, { 'epoch': epoch + 1, 'arch': 'lstm', 'encoder_tgt_state_dict': encoder_tgt.state_dict(), 'discrim_state_dict': model_discrim.state_dict(), 'best_auc': best_auc, }, is_best)
def main(): global args, best_mrr, best_auc args = parser.parse_args() cuda_available = torch.cuda.is_available() print args corpus_file = 'data/askubuntu/text_tokenized.txt.gz' dataset = UbuntuDataset(corpus_file) corpus = dataset.get_corpus() if args.embedding == 'askubuntu': embedding_file = 'data/askubuntu/vector/vectors_pruned.200.txt.gz' else: embedding_file = 'data/glove/glove.pruned.txt.gz' embedding_iter = Embedding.iterator(embedding_file) embedding = Embedding(args.embed, embedding_iter) print 'Embeddings loaded.' corpus_ids = embedding.corpus_to_ids(corpus) padding_id = embedding.vocab_ids['<padding>'] train_file = 'data/askubuntu/train_random.txt' train_data = dataset.read_annotations(train_file) dev_file = 'data/askubuntu/dev.txt' dev_data = dataset.read_annotations(dev_file, max_neg=-1) dev_batches = batch_utils.generate_eval_batches(corpus_ids, dev_data, padding_id) assert args.model in ['lstm', 'cnn'] if args.model == 'lstm': model = LSTM(args.embed, args.hidden) else: model = CNN(args.embed, args.hidden) print model print 'Parameters: {}'.format(params(model)) optimizer = torch.optim.Adam(model.parameters(), args.lr) criterion = nn.MultiMarginLoss(margin=args.margin) if cuda_available: criterion = criterion.cuda() if args.load: if os.path.isfile(args.load): print 'Loading checkpoint.' checkpoint = torch.load(args.load) args.start_epoch = checkpoint['epoch'] best_mrr = checkpoint.get('best_mrr', -1) best_auc = checkpoint.get('best_auc', -1) model.load_state_dict(checkpoint['state_dict']) print 'Loaded checkpoint at epoch {}.'.format(checkpoint['epoch']) else: print 'No checkpoint found here.' if args.eval: test_file = 'data/askubuntu/test.txt' test_data = dataset.read_annotations(test_file, max_neg=-1) test_batches = batch_utils.generate_eval_batches( corpus_ids, test_data, padding_id) print 'Evaluating on dev set.' train_utils.evaluate_metrics(args, model, embedding, dev_batches, padding_id) print 'Evaluating on test set.' train_utils.evaluate_metrics(args, model, embedding, test_batches, padding_id) return if args.android: android_file = 'data/android/corpus.tsv.gz' android_dataset = AndroidDataset(android_file) android_ids = embedding.corpus_to_ids(android_dataset.get_corpus()) dev_pos_file = 'data/android/dev.pos.txt' dev_neg_file = 'data/android/dev.neg.txt' android_data = android_dataset.read_annotations( dev_pos_file, dev_neg_file) android_batches = batch_utils.generate_eval_batches( android_ids, android_data, padding_id) for epoch in xrange(args.start_epoch, args.epochs): train_batches = batch_utils.generate_train_batches( corpus_ids, train_data, args.batch_size, padding_id) train_utils.train(args, model, embedding, optimizer, criterion, train_batches, padding_id, epoch) map, mrr, p1, p5 = train_utils.evaluate_metrics( args, model, embedding, dev_batches, padding_id) auc = -1 if args.android: auc = train_utils.evaluate_auc(args, model, embedding, android_batches, padding_id) is_best = auc > best_auc if args.android else mrr > best_mrr best_mrr = max(mrr, best_mrr) best_auc = max(auc, best_auc) save( args, { 'epoch': epoch + 1, 'arch': 'lstm', 'state_dict': model.state_dict(), 'best_mrr': best_mrr, 'best_auc': best_auc, }, is_best)