def __init__(self, config, inputs, pretrained_embeddings, tasks): with tf.variable_scope('encoder'): self.encoder = encoder.Encoder(config, inputs, pretrained_embeddings) self.modules = {} for task in tasks: with tf.variable_scope(task.name): self.modules[task.name] = task.get_module(inputs, self.encoder)
def main(args): writer = SummaryWriter() encoder_model = encoder.Encoder(args.encoded_img_size) decoder_model = decoder.Decoder(attention_dim=args.attention_dim, embed_dim=args.word_embedding_dim, decoder_dim=args.decoder_dim, vocab_size=len(line2idx), dropout=args.dropout) encoder_model.fine_tune(True) decoder_model = decoder_model.to(device) encoder_model = encoder_model.to(device) criterion = nn.CrossEntropyLoss().to(device) try: state_dict_encoder = torch.load(args.encoder_path) encoder_model.load_state_dict(state_dict_encoder) except: print('pre-trained encoder model not provided') try: state_dict_decoder = torch.load(args.decoder_path) decoder_model.load_state_dict(state_dict_decoder) except: print('pre-trained decoder model not provided') encoder_optimizer = torch.optim.Adam( params=filter(lambda p: p.requires_grad, encoder_model.parameters()), lr=args.encoder_lr) if args.fine_tune_encoder else None decoder_optimizer = torch.optim.Adam(params=filter( lambda p: p.requires_grad, decoder_model.parameters()), lr=args.decoder_lr) train_dataset = torch.load(args.train_data) train_sampler = RandomSampler(train_dataset) train_dataloader = DataLoader(train_dataset, sampler=train_sampler, batch_size=1) val_dataset = torch.load(args.val_data) val_sampler = RandomSampler(val_dataset) val_dataloader = DataLoader(val_dataset, sampler=val_sampler, batch_size=1) print('--finished loading train & validation data--') start = timeit.default_timer() for epoch in range(args.epochs): train_temp_loss = [] for count, item in enumerate(train_dataloader): train_loss = train(encoder_model, decoder_model, encoder_optimizer, decoder_optimizer, criterion, line2idx, item, count, args.alpha_c, args.grad_clip, args.gradient_accumulation) train_temp_loss.append(train_loss) if epoch % args.print_every == 0: print('train loss at epoch ', epoch, ' = ', np.mean(train_temp_loss)) if epoch % args.val_every == 0: val_temp_accu = [] for item in val_dataloader: hypotheses, accu = validate_no_teacher_forcing( encoder_model, decoder_model, line2idx, item) val_temp_accu.append(accu) print('validation accuracy scores = ', np.mean(val_temp_accu)) stop = timeit.default_timer() print('epochs ', epoch, ' takes ', stop - start, ' sec') print(' ') start = timeit.default_timer() writer.add_scalar('train_loss', np.mean(train_temp_loss), epoch) writer.add_scalar('val_acc', np.mean(val_temp_accu), epoch) # save model after finish the epochs encoder_file = 'result_dir/encoder_' + args.model_name + '_' + str( args.epochs) + '.pt' torch.save(encoder_model.state_dict(), encoder_file) decoder_file = 'result_dir/decoder_' + args.model_name + '_' + str( args.epochs) + '.pt' torch.save(decoder_model.state_dict(), decoder_file) print('Saved model to ' + encoder_file + ' and ' + decoder_file) writer.close()
bidirectional = True batches, batch_size = 5, 10 # read data data = [ torch.randint(2, voc_size, size=(batch_size, max_len)) for _ in range(batches) ] targets = [ torch.randint(2, voc_size, size=(batch_size, max_len)) for _ in range(batches) ] # set model encoder = encoder.Encoder(voc_size=voc_size, hid_size=hid_size, bidirectional=bidirectional) decoder = decoder.Decoder(voc_size=voc_size, hid_size=hid_size, attn_type="dot-prod", max_len=max_len, sos=sos, eos=eos, bidirectional=bidirectional) my_model = seq2seq.Seq2seq(encoder, decoder) # set loss function and optimizer loss_function = torch.nn.CrossEntropyLoss() optimizer = torch.optim.RMSprop(my_model.parameters(), lr=0.001) # Train process