def get_model(text_proc, args): sent_vocab = text_proc.vocab model = Transformer(dict_size=len(sent_vocab), image_feature_dim=args.image_feat_size, vocab=sent_vocab, tf_ratio=args.teacher_forcing) # Initialize the networks and the criterion if len(args.start_from) > 0: print("Initializing weights from {}".format(args.start_from)) model.load_state_dict(torch.load(args.start_from, map_location=lambda storage, location: storage)) # Ship the model to GPU, maybe if torch.cuda.is_available(): model.cuda() # if args.distributed: # model.cuda() # model = torch.nn.parallel.DistributedDataParallel(model) # else: # model = torch.nn.DataParallel(model).cuda() # elif torch.cuda.device_count() > 1: # model = torch.nn.DataParallel(model).cuda() # else: # model.cuda() return model
data_loader_tra, data_loader_val, data_loader_tst, vocab, program_number = prepare_data_seq( batch_size=config.batch_size) if (config.model == "trs"): model = Transformer(vocab, decoder_number=program_number, model_file_path=config.save_path, is_eval=True) elif (config.model == "experts"): model = Transformer_experts(vocab, decoder_number=program_number, model_file_path=config.save_path, is_eval=True) if (config.USE_CUDA): model.cuda() model = model.eval() print('Start to chat') context = deque(DIALOG_SIZE * ['None'], maxlen=DIALOG_SIZE) while (True): msg = input(">>> ") if (len(str(msg).rstrip().lstrip()) != 0): context.append(str(msg).rstrip().lstrip()) #print(context) batch = make_batch(context, vocab) sent_g = model.decoder_greedy(batch, max_dec_step=30) #sent_t = model.decoder_topk(batch, max_dec_step=30) print(">>>", sent_g[0]) context.append(sent_g[0])
def train_model(train_iterator, val_iterator, test_iterator): batch_size = 32 vocab_size = len(train_iterator.word2index) dmodel = 64 output_size = 2 padding_idx = train_iterator.word2index['<PAD>'] n_layers = 4 ffnn_hidden_size = dmodel * 2 heads = 8 pooling = 'max' dropout = 0.5 label_smoothing = 0.1 learning_rate = 0.001 epochs = 30 CUDA = torch.cuda.is_available() max_len = 0 for batches in train_iterator: x_lengths = batches['x_lengths'] if max(x_lengths) > max_len: max_len = int(max(x_lengths)) model = Transformer(vocab_size, dmodel, output_size, max_len, padding_idx, n_layers, \ ffnn_hidden_size, heads, pooling, dropout) if CUDA: model.cuda() if label_smoothing: loss_fn = LabelSmoothingLoss(output_size, label_smoothing) else: loss_fn = nn.NLLLoss() model.add_loss_fn(loss_fn) optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate) model.add_optimizer(optimizer) device = torch.device('cuda' if CUDA else 'cpu') model.add_device(device) params = { 'batch_size': batch_size, 'dmodel': dmodel, 'n_layers': n_layers, 'ffnn_hidden_size': ffnn_hidden_size, 'heads': heads, 'pooling': pooling, 'dropout': dropout, 'label_smoothing': label_smoothing, 'learning_rate': learning_rate } train_writer = SummaryWriter('runs/transformer_train') val_writer = SummaryWriter('runs/transformer_val') early_stop = EarlyStopping(wait_epochs=3) train_losses_list, train_avg_loss_list, train_accuracy_list = [], [], [] eval_avg_loss_list, eval_accuracy_list, conf_matrix_list = [], [], [] for epoch in range(epochs): try: print('\nStart epoch [{}/{}]'.format(epoch + 1, epochs)) train_losses, train_avg_loss, train_accuracy = model.train_model( train_iterator) train_losses_list.append(train_losses) train_avg_loss_list.append(train_avg_loss) train_accuracy_list.append(train_accuracy) _, eval_avg_loss, eval_accuracy, conf_matrix = model.evaluate_model( val_iterator) eval_avg_loss_list.append(eval_avg_loss) eval_accuracy_list.append(eval_accuracy) conf_matrix_list.append(conf_matrix) print( '\nEpoch [{}/{}]: Train accuracy: {:.3f}. Train loss: {:.4f}. Evaluation accuracy: {:.3f}. Evaluation loss: {:.4f}' \ .format(epoch + 1, epochs, train_accuracy, train_avg_loss, eval_accuracy, eval_avg_loss)) train_writer.add_scalar('Training loss', train_avg_loss, epoch) val_writer.add_scalar('Validation loss', eval_avg_loss, epoch) if early_stop.stop(eval_avg_loss, model, delta=0.003): break finally: train_writer.close() val_writer.close() _, test_avg_loss, test_accuracy, test_conf_matrix = model.evaluate_model( test_iterator) print('Test accuracy: {:.3f}. Test error: {:.3f}'.format( test_accuracy, test_avg_loss))