for i, source in enumerate(batchify_data(test_source)): output = model.decode(source) for words in output: print(' '.join(words)) exit(0) if args.model == 'baseline': model = BaselineModel(vocab).to(device) elif args.model == 'transformer': model = TransformerModel(vocab).to(device) else: print('error: invalid model or model not specified (--model)', file=sys.stderr) sys.exit() for p in model.parameters(): if p.dim() > 1: torch.nn.init.xavier_uniform_(p) criterion = torch.nn.CrossEntropyLoss(ignore_index=pad_id) lr = 5 # learning rate optimizer = torch.optim.SGD(model.parameters(), lr=lr) scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 1.0, gamma=0.95) bos_token = vocab.numberize('<BOS>') eos_token = vocab.numberize('<EOS>') cpy_token = vocab.numberize('<CPY>') def train(): model.train() # Turn on the train mode total_loss = 0.
def train(args): label_name = ['0', '1'] device = torch.device("cuda:0" if args['--cuda'] else "cpu") start_time = time.time() print('Initializing Glove vocab and embeddings...', file=sys.stderr) glove_word2id = pickle.load(open(args['--vocab'], 'rb')) glove_word2id.update({'<unk>': len(glove_word2id)}) glove_word2id.update({'<pad>': len(glove_word2id)}) vocab = VocabEntry(glove_word2id) embedding_matrix = np.load(open(args['--embeddings'], 'rb')) embedding_matrix = np.vstack( (embedding_matrix, np.random.uniform(embedding_matrix.min(), embedding_matrix.max(), (2, embedding_matrix.shape[1])))) glove_embeddings = torch.tensor(embedding_matrix, dtype=torch.float, device=device) print('Done! time elapsed %.2f sec' % (time.time() - start_time), file=sys.stderr) print('-' * 80, file=sys.stderr) start_time = time.time() print('Importing data...', file=sys.stderr) df_train = pd.read_csv(args['--train'], index_col=0) df_val = pd.read_csv(args['--dev'], index_col=0) train_label = dict(df_train.InformationType_label.value_counts()) label_max = float(max(train_label.values())) train_label_weight = torch.tensor( [label_max / train_label[i] for i in range(len(train_label))], device=device) print('Done! time elapsed %.2f sec' % (time.time() - start_time), file=sys.stderr) print('-' * 80, file=sys.stderr) start_time = time.time() print('Set up model...', file=sys.stderr) model = BaselineModel(hidden_size=int(args['--hidden-size']), embedding=glove_embeddings, vocab=vocab, n_class=len(label_name), dropout_rate=float(args['--dropout'])) model = model.to(device) print('Use device: %s' % device, file=sys.stderr) print('Done! time elapsed %.2f sec' % (time.time() - start_time), file=sys.stderr) print('-' * 80, file=sys.stderr) model.train() optimizer = torch.optim.Adam(model.parameters(), lr=float(args['--lr'])) cn_loss = torch.nn.CrossEntropyLoss(weight=train_label_weight.float(), reduction='mean') torch.save(cn_loss, 'loss_func') # for later testing train_batch_size = int(args['--batch-size']) clip_grad = float(args['--clip-grad']) valid_niter = int(args['--valid-niter']) log_every = int(args['--log-every']) model_save_path = args['--save-to'] num_trial = 0 train_iter = patience = cum_loss = report_loss = 0 cum_examples = report_examples = epoch = 0 hist_valid_scores = [] train_time = begin_time = time.time() print('Begin Maximum Likelihood training...') while True: epoch += 1 for sents, targets in batch_iter(df_train, batch_size=train_batch_size, shuffle=True): # for each epoch train_iter += 1 optimizer.zero_grad() batch_size = len(sents) pre_softmax = model(sents) print(type(targets[0])) loss = cn_loss( pre_softmax, torch.tensor(targets, dtype=torch.long, device=device)) loss.backward() # clip gradient grad_norm = torch.nn.utils.clip_grad_norm_(model.parameters(), clip_grad) optimizer.step() batch_losses_val = loss.item() * batch_size report_loss += batch_losses_val cum_loss += batch_losses_val report_examples += batch_size cum_examples += batch_size if train_iter % log_every == 0: print('epoch %d, iter %d, avg. loss %.2f, ' 'cum. examples %d, speed %.2f examples/sec, ' 'time elapsed %.2f sec' % (epoch, train_iter, report_loss / report_examples, cum_examples, report_examples / (time.time() - train_time), time.time() - begin_time), file=sys.stderr) train_time = time.time() report_loss = report_examples = 0. # perform validation if train_iter % valid_niter == 0: print( 'epoch %d, iter %d, cum. loss %.2f, cum. examples %d' % (epoch, train_iter, cum_loss / cum_examples, cum_examples), file=sys.stderr) cum_loss = cum_examples = 0. print('begin validation ...', file=sys.stderr) validation_loss = validation( model, df_val, cn_loss, device) # dev batch size can be a bit larger print('validation: iter %d, loss %f' % (train_iter, validation_loss), file=sys.stderr) is_better = len( hist_valid_scores ) == 0 or validation_loss < min(hist_valid_scores) hist_valid_scores.append(validation_loss) if is_better: patience = 0 print('save currently the best model to [%s]' % model_save_path, file=sys.stderr) model.save(model_save_path) # also save the optimizers' state torch.save(optimizer.state_dict(), model_save_path + '.optim') elif patience < int(args['--patience']): patience += 1 print('hit patience %d' % patience, file=sys.stderr) if patience == int(args['--patience']): num_trial += 1 print('hit #%d trial' % num_trial, file=sys.stderr) if num_trial == int(args['--max-num-trial']): print('early stop!', file=sys.stderr) exit(0) # decay lr, and restore from previously best checkpoint lr = optimizer.param_groups[0]['lr'] * float( args['--lr-decay']) print( 'load previously best model and decay learning rate to %f' % lr, file=sys.stderr) # load model params = torch.load( model_save_path, map_location=lambda storage, loc: storage) model.load_state_dict(params['state_dict']) model = model.to(device) print('restore parameters of the optimizers', file=sys.stderr) optimizer.load_state_dict( torch.load(model_save_path + '.optim')) # set new lr for param_group in optimizer.param_groups: param_group['lr'] = lr # reset patience patience = 0 if epoch == int(args['--max-epoch']): print('reached maximum number of epochs!', file=sys.stderr) exit(0)
def best_cell_and_baseline_hyperparameters(train_dataloader, validate_dataloader, test_dataloader, embedding): configs = [] RNN_config = {} RNN_config["model"] = "LSTM" RNN_config["hidden_size"] = 30 RNN_config["num_layers"] = 3 RNN_config["dropout"] = 0.9 RNN_config["bidirectional"] = True RNN_config["fc1_width"] = "//" RNN_config["fc2_width"] = "//" RNN_config["vocab_size"] = -1 RNN_config["lr"] = 0.0001 RNN_config["optimizer"] = torch.optim.Adam baseline_config = {} baseline_config["model"] = "Baseline" baseline_config["hidden_size"] = "//" baseline_config["num_layers"] = "//" baseline_config["dropout"] = "//" baseline_config["bidirectional"] = "//" baseline_config["fc1_width"] = 150 baseline_config["fc2_width"] = 150 baseline_config["vocab_size"] = -1 baseline_config["lr"] = 0.0001 baseline_config["optimizer"] = torch.optim.Adam hyperparameters = {} hyperparameters["vocab_size"] = [50, 1000, 10000] hyperparameters["lr"] = [0.0001, 0.001, 0.01, 0.1] hyperparameters["dropout"] = [0, 0.2, 0.4, 0.6, 0.8, 1] hyperparameters["num_layers"] = [1, 3, 6] hyperparameters["hidden_size"] = [30, 100, 150, 200] hyperparameters["optimizer"] = [torch.optim.Adam, torch.optim.SGD, torch.optim.RMSprop] supports = {} supports["vocab_size"] = [BaselineModel, RNN.RecurrentModel] supports["lr"] = [BaselineModel, RNN.RecurrentModel] supports["dropout"] = [RNN.RecurrentModel] supports["num_layers"] = [RNN.RecurrentModel] supports["hidden_size"] = [RNN.RecurrentModel] supports["optimizer"] = [BaselineModel, RNN.RecurrentModel] initial_config = {} initial_config["clip"] = args.clip initial_config["epochs"] = args.epochs initial_config["input_width"] = 300 initial_config["output_width"] = 1 models = [BaselineModel, RNN.RecurrentModel] criterion = nn.BCEWithLogitsLoss() for model_type in models: for (key, values) in hyperparameters.items(): # Skip this hyperparameter testing if the model does not support it if model_type not in supports[key]: continue for value in values: start = time.time() config = {} if model_type == RNN.RecurrentModel: config.update(RNN_config) train = RNN.train evaluate = RNN.evaluate model = RNN.RecurrentModel(config["model"], initial_config["input_width"], config["hidden_size"], initial_config["output_width"], config["num_layers"], config["bidirectional"], config["dropout"]) else: config.update(baseline_config) train = baseline.train evaluate = baseline.evaluate model = BaselineModel(initial_config["input_width"], config["fc1_width"], config["fc2_width"], initial_config["output_width"]) config.update(initial_config) config[key] = value print(config) optimizer = config["optimizer"](model.parameters(), lr=config["lr"]) for epoch in range(args.epochs): print(f'\nEpoch: {epoch}') train(model, train_dataloader, optimizer, criterion, embedding, args.clip) evaluate(model, validate_dataloader, criterion, embedding) accuracy, f1, confusion_matrix = evaluate(model, test_dataloader, criterion, embedding) config["accuracy"] = accuracy.item() config["f1"] = f1.item() config["TP"] = confusion_matrix[0, 0].item() config["FP"] = confusion_matrix[0, 1].item() config["FN"] = confusion_matrix[1, 0].item() config["TN"] = confusion_matrix[1, 1].item() end = time.time() config["time"] = end - start config["optimizer"] = method_to_string(config["optimizer"]) configs.append(config) print_to_file("5_final.xls", "RNN baseline hyperparameters", configs)