def evaluate(input_sentences, output_sentences, input_vocab, output_vocab, input_reverse, output_reverse, hy, writer): dataset = NMTDataset(input_sentences, output_sentences, input_vocab, output_vocab, input_reverse, output_reverse) loader = DataLoader(dataset, batch_size=hy.batch_size, shuffle=True, drop_last=True) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") input_vocab_size = len(input_vocab.keys()) output_vocab_size = len(output_vocab.keys()) encoder = EncoderRNN(input_vocab_size, hy.embedding_size, hy.hidden_size, hy.rnn_layers, hy.bidirectional, device) decoder = DecoderRNN(output_vocab_size, hy.embedding_size, hy.hidden_size, hy.rnn_layers, hy.bidirectional, device) accuracies = [] for epoch in range(1, hy.num_epochs + 1): encoder.load_state_dict( torch.load("saved_runs/encoder_{}_weights.pt".format(epoch))) decoder.load_state_dict( torch.load("saved_runs/decoder_{}_weights.pt".format(epoch))) accuracy = compute_model_accuracy(encoder, decoder, loader, device, epoch, writer) accuracies.append(accuracy) print("=" * 80) print("Final Accuracy = {:.1f}".format(100. * np.max(accuracies))) print("=" * 80) return accuracies
def __init__(self): if hp.use_cuda: self.encoder: nn.Module = EncoderGCN(hp.graph_number, hp.graph_picture_size, hp.out_f_num, hp.Nz, bias_need=False).cuda() self.decoder: nn.Module = DecoderRNN().cuda() else: self.encoder: nn.Module = EncoderGCN(hp.graph_number, hp.graph_picture_size, hp.out_f_num, hp.Nz, bias_need=False) self.decoder: nn.Module = DecoderRNN() self.encoder_optimizer = optim.Adam(self.encoder.parameters(), hp.lr) self.decoder_optimizer = optim.Adam(self.decoder.parameters(), hp.lr) self.eta_step = hp.eta_min self.pi: torch.Tensor = torch.Tensor() self.z: torch.Tensor = torch.Tensor() self.mu_x: torch.Tensor = torch.Tensor() self.mu_y: torch.Tensor = torch.Tensor() self.sigma_x: torch.Tensor = torch.Tensor() self.sigma_y: torch.Tensor = torch.Tensor() self.rho_xy: torch.Tensor = torch.Tensor() self.q: torch.Tensor = torch.Tensor()
def __init__(self, num_vocab, emb_size, enc_word_rnn_size, enc_word_num_layers, enc_context_rnn_size, enc_context_num_layers, KG_word_rnn_size, KG_word_num_layers, dec_rnn_size, dec_num_layers, dec_num_softmax, dropout, pre_embedding = None): super(HRED, self).__init__() self.word_embedding = nn.Embedding(num_vocab, emb_size) if pre_embedding is not None: self.word_embedding.weight = nn.Parameter(self.word_embedding.weight.data.new(pre_embedding)) self.context_encoder = ContextEncoderRNN(word_embedding = self.word_embedding, word_rnn_size = enc_word_rnn_size, word_num_layers = enc_word_num_layers, context_rnn_size = enc_context_rnn_size, context_num_layers = enc_context_num_layers, dropout = dropout) self.KG_encoder = WordEncoderRNN(word_embedding = self.word_embedding, rnn_size = KG_word_rnn_size, num_layers = KG_word_num_layers, dropout = dropout) self.decoder = DecoderRNN(word_embedding = self.word_embedding, src_word_size = enc_word_rnn_size, src_context_size = enc_context_rnn_size, KG_word_size = KG_word_rnn_size, rnn_size = dec_rnn_size, num_layers = dec_num_layers, num_softmax = dec_num_softmax, dropout = dropout)
def build_model(self): if self.use_embeddings: self.embedding = nn.Embedding.from_pretrained(self.embedding_wts) else: self.embedding = nn.Embedding(self.vocab.n_words, self.embedding_dim) self.encoder = MultimodalEncoderRNN(self.fusion, self.hidden_size, self.enc_n_layers, self.dropout, self.unit, self.modality, self.embedding, self.device).to(self.device) if self.fusion == 'early' or self.fusion is None: parameter_list = self.encoder.parameters() else: parameter_list = [] for m in self.encoder.modalities: parameter_list += list(self.encoder.encoder[m].parameters()) # Need to expand hidden layer according to # modalities for early fusion self.decoder = DecoderRNN(self.attn_model, self.embedding_dim, self.hidden_size, self.vocab.n_words, self.unit, self.dec_n_layers, self.dropout, self.embedding).to(self.device) self.encoder_optimizer = optim.Adam(parameter_list, lr=self.lr) self.decoder_optimizer = optim.Adam(self.decoder.parameters(), lr=self.lr*self.dec_learning_ratio) self.epoch = 0 # define here to add resume training feature
def build_model(self): if self.use_embeddings: self.embedding = nn.Embedding.from_pretrained(self.embedding_wts) else: self.embedding = nn.Embedding(self.vocab.n_words, self.embedding_dim) self.encoders = [] self.encoder_optimizers = [] # Note: No embeddings used in the encoders for m in ['v', 's']: encoder = EncoderRNN(self.enc_input_dim[m], self.hidden_size, self.enc_n_layers, self.dropout, self.unit, m).to(self.device) encoder_optimizer = optim.Adam(encoder.parameters(), lr=self.lr) if self.modality == 'ss-vv': checkpoint = torch.load(self.pretrained_modality[m], map_location=self.device) encoder.load_state_dict(checkpoint['en']) encoder_optimizer.load_state_dict(checkpoint['en_op']) self.encoders.append(encoder) self.encoder_optimizers.append(encoder_optimizer) self.decoder = DecoderRNN(self.attn_model, self.embedding_dim, self.hidden_size, self.vocab.n_words, self.unit, self.dec_n_layers, self.dropout, self.embedding).to(self.device) text_checkpoint = torch.load(self.pretrained_modality['t'], map_location=self.device) self.decoder.load_state_dict(text_checkpoint['de']) self.project_factor = self.encoders[0].project_factor self.latent2hidden = nn.Linear(self.latent_dim, self.hidden_size * self.project_factor).to(self.device) self.epoch = 0
def __init__(self): if hp.use_cuda: self.encoder: nn.Module = EncoderGCN(hp.graph_number, hp.graph_picture_size, hp.out_f_num, hp.Nz, bias_need=False).cuda() self.decoder: nn.Module = DecoderRNN().cuda() else: self.encoder: nn.Module = EncoderGCN(hp.graph_number, hp.graph_picture_size, hp.out_f_num, hp.Nz, bias_need=False) self.decoder: nn.Module = DecoderRNN() self.encoder_optimizer = optim.Adam(self.encoder.parameters(), hp.lr) self.decoder_optimizer = optim.Adam(self.decoder.parameters(), hp.lr) self.eta_step = hp.eta_min
def build_model(self): if self.use_embeddings: self.embedding = nn.Embedding.from_pretrained(self.embedding_wts) else: self.embedding = nn.Embedding(self.vocab.n_words, self.embedding_dim) if self.modality == 't': # Need embedding only for t2t mode self.encoder = EncoderRNN(self.embedding_dim, self.hidden_size, self.enc_n_layers, self.dropout, self.unit, self.modality, self.embedding, fusion_or_unimodal=True).to(self.device) else: # Note: no embedding used here self.encoder = EncoderRNN(self.enc_input_dim, self.hidden_size, self.enc_n_layers, self.dropout, self.unit, self.modality, fusion_or_unimodal=True).to(self.device) self.decoder = DecoderRNN(self.attn_model, self.embedding_dim, self.hidden_size, self.vocab.n_words, self.unit, self.dec_n_layers, self.dropout, self.embedding).to(self.device) self.encoder_optimizer = optim.Adam(self.encoder.parameters(), lr=self.lr) self.decoder_optimizer = optim.Adam(self.decoder.parameters(), lr=self.lr * self.dec_learning_ratio) self.epoch = 0 # define here to add resume training feature self.project_factor = self.encoder.project_factor self.latent2hidden = nn.Linear(self.latent_dim, self.hidden_size * self.project_factor).to(self.device)
def choose_coders(dc, attention, search_size=8): ''' Trains search_size coders and return the best one ''' encoder = EncoderRNN() decoder = DecoderRNN(dc.word2idx, dc.idx2word, dc.idx2emb, max_tokens=dc.max_tokens, attention=attention) logging.info('Choosing coders...') logger = logging.getLogger() logger.disabled = True results_encoder = u.multiples_launch(pretrain_rnn_layer, [encoder, encoder.encoder_cell, dc], num_process=search_size) results_decoder = u.multiples_launch(pretrain_rnn_layer, [decoder, decoder.decoder_cell, dc], num_process=search_size) logger.disabled = False results_encoder.sort(key=lambda x: x[0], reverse=True) results_decoder.sort(key=lambda x: x[0], reverse=True) logging.info('Accuracy of the best encoder = {}'.format(results_encoder[0][0])) encoder.load(name='{}-{}'.format(encoder.name, results_encoder[0][1])) logging.info('Accuracy of the best decoder = {}'.format(results_decoder[0][0])) decoder.load(name='{}-{}'.format(decoder.name, results_decoder[0][1]), only_lstm=True) return encoder, decoder
def main(args): """Output learned representation of RRMs from CNN_LSTM autoencoder.""" # Load pickled vocab with open(args.vocab_path, 'rb') as f: vocab = pickle.load(f) # Load the already preprocessed data df_aligned = preprocess(preprocessed=True, RRM_path=args.processed_RRM_path, output_path=args.processed_RRM_path, vocab=vocab) # Data loader loader = RRM_Sequence(df_aligned, vocab) loader = DataLoader(loader, 16, shuffle=True, collate_fn=collate_fn) encoderCNN = ResNetEncoder(84, 26, 64) # TODO don't hardcode? decoderRNN = DecoderRNN(64, 128, 26, 1) # TODO don't hardcode? # Use CUDA if available if torch.cuda.is_available(): encoderCNN.cuda() decoderRNN.cuda() # Load pickled models with open(args.encoder_path, 'rb') as encoder: encoderCNN.load_state_dict(torch.load(encoder)) with open(args.decoder_path, 'rb') as decoder: decoderRNN.load_state_dict(torch.load(decoder)) # Loop over data for batch_idx, (names, rrms_aligned, rrms_unaligned, lengths) in enumerate(loader): rrms_aligned = to_var(rrms_aligned) rrms_unaligned = to_var(rrms_unaligned) features = encoderCNN(rrms_aligned) if args.which_representation == 'encoder': hiddens = features else: hiddens = forward(decoderRNN, features, rrms_unaligned, lengths) hiddens = hiddens.data.cpu().numpy() # hiddens have shape (16, 64) for encoder, (16, 128) for decoder if batch_idx == 0: df = pd.DataFrame(hiddens) df['name'] = names else: df1 = pd.DataFrame(hiddens) df1['name'] = names df = pd.concat([df, df1]) # Write to file df.to_csv(args.hidden_path)
def __init__(self, **kwargs): dp = DataPreprocessor() file_name_formatted = dp.write_to_file() dc = DataCleaner(file_name_formatted) dc.clean_data_pipeline().trim_rare_words() self.data_loader = DataLoader(dc.vocabulary, dc.pairs) self.dp = dp self.dc = dc load_embedding = kwargs.get('pretrained_embedding', False) embedding_file = kwargs.get('pretrained_embedding_file', None) load_enc_dec = kwargs.get('pretrained_enc_dec', False) load_enc_file = kwargs.get('pretrained_enc_file', None) load_dec_file = kwargs.get('pretrained_dec_file', None) self.model_name = kwargs.get('model_name', 'cb_model') attn_model = kwargs.get('attention_type', 'dot') self.hidden_size = kwargs.get('hidden_size', 500) self.encoder_nr_layers = kwargs.get('enc_nr_layers', 2) self.decoder_nr_layers = kwargs.get('dec_nr_layers', 2) dropout = kwargs.get('dropout', 0.1) self.batch_size = kwargs.get('batch_size', 64) self.clip = kwargs.get('clip', 50.0) self.teacher_forcing_ratio = kwargs.get('teacher_forcing_ratio', 1.0) self.learning_rate = kwargs.get('lr', 0.0001) self.decoder_learning_ratio = kwargs.get('decoder_learning_ratio', 5.0) self.nr_iteration = kwargs.get('nr_iterations', 4000) self.print_every = kwargs.get('print_every', 1) self.save_every = 500 self.embedding = nn.Embedding(self.dc.vocabulary.num_words, self.hidden_size) if load_embedding: self.embedding.load_state_dict(embedding_file) # Initialize encoder & decoder models encoder = EncoderRNN(self.hidden_size, self.embedding, self.encoder_nr_layers, dropout) decoder = DecoderRNN( attn_model, self.embedding, self.hidden_size, self.dc.vocabulary.num_words, self.decoder_nr_layers, dropout ) if load_enc_dec: encoder.load_state_dict(load_enc_file) decoder.load_state_dict(load_dec_file) # Use appropriate device encoder = encoder.to(device) decoder = decoder.to(device) self.encoder = encoder self.decoder = decoder self.encoder_optimizer = optim.Adam(encoder.parameters(), lr=self.learning_rate) self.decoder_optimizer = optim.Adam(decoder.parameters(), lr=self.learning_rate * self.decoder_learning_ratio) return
def get_data(): dc = DataContainer(os.environ['INPUT'], os.environ['EMB']) dc.prepare_data() x_a = [sample for batch in dc.x_train for sample in batch] + dc.x_te sl_a = [sample for batch in dc.sl_train for sample in batch] + dc.sl_te y_parrot_a = [ sample for batch in dc.y_parrot_padded_batch for sample in batch ] + dc.y_p_p_te sos = dc.get_sos_batch_size(len(x_a)) encoder = EncoderRNN() decoder = DecoderRNN(dc.word2idx, dc.idx2word, dc.idx2emb, max_tokens=dc.max_tokens, attention=False) optimizer = tf.train.AdamOptimizer() x_batch = u.create_batch(x_a, batch_size=dc.batch_size) y_parrot_batch = u.create_batch(y_parrot_a, batch_size=dc.batch_size) sl_batch = u.create_batch(sl_a, batch_size=dc.batch_size) return dc, x_a, sl_a, y_parrot_a, sos, encoder, decoder, optimizer, x_batch, y_parrot_batch, sl_batch
def parrot_initialization_encoder_decoder(dataset, emb_path, attention): ''' Trains the encoder-decoder to reproduce the input ''' dc = DataContainer(dataset, emb_path) dc.prepare_data() x_batch, y_parrot_batch, sl_batch = u.to_batch(dc.x, dc.y_parrot_padded, dc.sl, batch_size=dc.batch_size) def get_loss(encoder, decoder, epoch, x, y, sl, sos): output, cell_state = encoder.forward(x, sl) loss = decoder.get_loss(epoch, sos, (cell_state, output), y, sl, x, encoder.outputs) return loss if os.path.isdir('models/Encoder-Decoder'): rep = input('Load previously trained Encoder-Decoder? (y or n): ') if rep == 'y' or rep == '': encoder = EncoderRNN() decoder = DecoderRNN(dc.word2idx, dc.idx2word, dc.idx2emb, max_tokens=dc.max_tokens, attention=attention) encoder.load(name='Encoder-Decoder/Encoder') decoder.load(name='Encoder-Decoder/Decoder') sos = dc.get_sos_batch_size(len(dc.x)) see_parrot_results(encoder, decoder, 'final', dc.x, dc.y_parrot_padded, dc.sl, sos, greedy=True) else: encoder, decoder = choose_coders(dc, attention, search_size=5) else: encoder, decoder = choose_coders(dc, attention, search_size=5) optimizer = tf.train.AdamOptimizer() for epoch in range(300): for x, y, sl in zip(x_batch, y_parrot_batch, sl_batch): sos = dc.get_sos_batch_size(len(x)) # grad_n_vars = optimizer.compute_gradients(lambda: get_loss(encoder, decoder, epoch, x, y, sl, sos)) # optimizer.apply_gradients(grad_n_vars) optimizer.minimize(lambda: get_loss(encoder, decoder, epoch, x, y, sl, sos)) if epoch % 30 == 0: # to reduce training time, compute global accuracy only every 30 epochs sos = dc.get_sos_batch_size(len(dc.x)) see_parrot_results(encoder, decoder, epoch, dc.x, dc.y_parrot_padded, dc.sl, sos, greedy=True) # see_parrot_results(encoder, decoder, epoch, dc.x, dc.y_parrot_padded, dc.sl, sos) encoder.save(name='Encoder-Decoder/Encoder') decoder.save(name='Encoder-Decoder/Decoder') if decoder.parrot_stopping: break # x_batch, y_parrot_batch, sl_batch = u.shuffle_data(x_batch, y_parrot_batch, sl_batch) # strangely, shuffle data between epoch make the training realy noisy return encoder, decoder, dc
def __init__(self, hidden_size, cond_embed_size, output_size, target_path, criterion, epoch, train_or_not, lr, input_embed_size, teacher_forcing_ratio, ratio_kind): # initialize variable self.hidden_size = hidden_size self.cond_embed_size = cond_embed_size self.output_size = output_size self.target_path = target_path self.criterion = criterion self.train_or_not = train_or_not self.epoch = epoch self.learning_rate = lr self.teacher_forcing_ratio = teacher_forcing_ratio self.input_embed_size = input_embed_size self.ratio_kind = ratio_kind filename = self.get_bleuname() self.weight_name = 'CVAE_' + filename.replace('.csv', '') + '.pt' # initialize using class self.C2D = Char2Dict(cond_embed_size) self.DataLoader = Data(target_path) self.Encoder = EncoderRNN(input_embed_size, hidden_size, cond_embed_size).to(device) self.Decoder = DecoderRNN(input_embed_size, hidden_size, output_size).to(device) self.CVAE = CVAE(encoder=self.Encoder, decoder=self.Decoder, hidden_size=self.hidden_size, cond_embed_size=self.cond_embed_size, C2D=self.C2D, Train=self.train_or_not, output_size=self.output_size, teacher_forcing_ratio=self.teacher_forcing_ratio, input_embed_size=self.input_embed_size) self.CVAE_optimizer = optim.SGD(self.CVAE.parameters(), lr=self.learning_rate, momentum=0.9)
def main(args): # Make a directory to save models if not os.path.exists(args.model_path): os.makedirs(args.model_path) # Preprocess the RRM data vocab, df_aligned = preprocess(preprocessed=args.preprocessed, RRM_path=args.aligned_RRM_path, output_path=args.processed_RRM_path, sep=args.sep) df_aligned = train_test_split(df_aligned) with open(os.path.join(args.model_path, 'vocab.pkl'), 'wb') as f: pickle.dump(vocab, f) # Prepare the training and validation sets train_index = pd.read_csv('../data/train_index.csv', header=None).iloc[:, 0] train_loader = RRM_Sequence(df_aligned.loc[train_index, :], vocab) train_loader = DataLoader(train_loader, batch_size=args.batch_size, shuffle=True, collate_fn=collate_fn) val_index = pd.read_csv('../data/val_index.csv', header=None).iloc[:, 0] val_loader = RRM_Sequence(df_aligned.loc[val_index, :], vocab) val_loader = DataLoader(val_loader, batch_size=args.batch_size, shuffle=True, collate_fn=collate_fn) # Define the models encoder = ResNetEncoder(df_aligned.shape[1], len(vocab), args.embed_size) decoder = DecoderRNN(args.embed_size, args.hidden_size, len(vocab), args.num_layers) # Use CUDA if available if torch.cuda.is_available(): encoder.cuda() decoder.cuda() # Define the loss and optimizer criterion = nn.CrossEntropyLoss() params = list(decoder.parameters()) + list(encoder.parameters()) optimizer = torch.optim.Adam(params, lr=args.learning_rate) # Train the models total_step = len(train_loader) val_loss_history = [] for epoch_num, epoch in enumerate(range(args.num_epochs)): for batch_idx, (names, rrms_aligned, rrms_unaligned, lengths) in enumerate(train_loader): rrms_aligned = to_var(rrms_aligned) rrms_unaligned = to_var(rrms_unaligned) targets = pack_padded_sequence(rrms_unaligned, lengths, batch_first=True)[0] # Forward, backward, and optimize decoder.zero_grad() encoder.zero_grad() features = encoder(rrms_aligned) outputs = decoder(features, rrms_unaligned, lengths) loss = criterion(outputs, targets) loss.backward() optimizer.step() # Print log info if (batch_idx + 1) % args.log_step == 0: val_loss = validate(val_loader, encoder, decoder, criterion) val_loss_history.append(val_loss) print( 'Epoch [%d/%d], Step [%d/%d], Training Loss: %.4f, Validation loss: %.4f' % (epoch + 1, args.num_epochs, batch_idx + 1, total_step, loss.data[0], val_loss)) stop = early_stop(val_loss_history) if stop: print( '=== Early stopping === Validation loss not improving significantly ===' ) torch.save( decoder.state_dict(), os.path.join( args.model_path, 'decoder-anneal%s-%dcolumns-%d-%d.pkl' % (args.learning_rate_annealing, df_aligned.shape[1], epoch + 1, batch_idx + 1))) torch.save( encoder.state_dict(), os.path.join( args.model_path, 'encoder-anneal%s-%dcolumns-%d-%d.pkl' % (args.learning_rate_annealing, df_aligned.shape[1], epoch + 1, batch_idx + 1))) break # Save the models if (batch_idx + 1) % args.save_step == 0: torch.save( decoder.state_dict(), os.path.join( args.model_path, 'decoder-anneal%s-%dcolumns-%d-%d.pkl' % (args.learning_rate_annealing, df_aligned.shape[1], epoch + 1, batch_idx + 1))) torch.save( encoder.state_dict(), os.path.join( args.model_path, 'encoder-anneal%s-%dcolumns-%d-%d.pkl' % (args.learning_rate_annealing, df_aligned.shape[1], epoch + 1, batch_idx + 1))) # Decay the learning rate if specified if args.learning_rate_annealing: adjust_learning_rate(optimizer, epoch + 1) if stop: break
43: 'punch', 44: 'throw', 45: 'pushup', 46: 'pour', 47: 'dribble', 48: 'ride_bike', 49: 'situp' } abnormal_length = 0 device = torch.device( "cuda:1" if torch.cuda.is_available() else "cpu") # use CPU or GPU three_d_encoder = Res3d().to(device).eval() embed_encoder = CNN_fc_EmbedEncoder().to(device).eval() rnn_decoder = DecoderRNN().to(device).eval() embed_encoder.load_state_dict( torch.load('./ckpt/cnn_encoder_epoch201.pth')) rnn_decoder.load_state_dict(torch.load('./ckpt/rnn_decoder_epoch201.pth')) print('---state dict loaded----') video_dir = '../dataset/val_data' # get videos with torch.no_grad(): lib_vidos = os.listdir(video_dir) lib_vidos.sort(key=lambda x: int(x[:-4])) for video_name in lib_vidos:
class FusionNetwork(Sequence2SequenceNetwork): def __init__(self, config): self.init_writer() self.load_configuration(config) self.load_vocabulary() self.prepare_data() self.build_model() self.load_pretrained_model() self.train_model() self.save_model(self.n_epochs) self.evaluate_all() self.close_writer() def prepare_data(self): self.modality = self.modality.split('-') self.pairs = prepareData(self.langs, self.modality) # dict: m => pairs num_pairs = len(random.choice(list(self.pairs.values()))) rand_indices = random.sample(list(range(num_pairs)), num_pairs) self.n_iters = num_pairs for m in self.modality: self.pairs[m] = self.pairs[m][: self.batch_size * ( num_pairs // self.batch_size)] # Shuffle all modalities the same way self.pairs[m] = [p for p, _ in sorted(zip(self.pairs[m], rand_indices))] print(random.choice(self.pairs[m])) print('\nLoading test data pairs') self.test_pairs = prepareData(self.langs, self.modality, train=False) self.num_test_pairs = len(random.choice(list(self.test_pairs.values()))) rand_indices = random.sample(list(range(self.num_test_pairs)), self.num_test_pairs) if self.use_embeddings: if self.generate_word_embeddings: self.embedding_wts = generateWordEmbeddings(self.vocab, self.emb_mode) else: self.embedding_wts = loadWordEmbeddings(self.emb_mode) def build_model(self): if self.use_embeddings: self.embedding = nn.Embedding.from_pretrained(self.embedding_wts) else: self.embedding = nn.Embedding(self.vocab.n_words, self.embedding_dim) self.encoder = MultimodalEncoderRNN(self.fusion, self.hidden_size, self.enc_n_layers, self.dropout, self.unit, self.modality, self.embedding, self.device).to(self.device) if self.fusion == 'early' or self.fusion is None: parameter_list = self.encoder.parameters() else: parameter_list = [] for m in self.encoder.modalities: parameter_list += list(self.encoder.encoder[m].parameters()) # Need to expand hidden layer according to # modalities for early fusion self.decoder = DecoderRNN(self.attn_model, self.embedding_dim, self.hidden_size, self.vocab.n_words, self.unit, self.dec_n_layers, self.dropout, self.embedding).to(self.device) self.encoder_optimizer = optim.Adam(parameter_list, lr=self.lr) self.decoder_optimizer = optim.Adam(self.decoder.parameters(), lr=self.lr*self.dec_learning_ratio) self.epoch = 0 # define here to add resume training feature def load_pretrained_model(self): if self.load_model_name: checkpoint = torch.load(self.load_model_name, map_location=self.device) print('Loaded {}'.format(self.load_model_name)) self.epoch = checkpoint['epoch'] self.encoder.load_state_dict(checkpoint['en']) self.decoder.load_state_dict(checkpoint['de']) self.encoder_optimizer.load_state_dict(checkpoint['en_op']) self.decoder_optimizer.load_state_dict(checkpoint['de_op']) self.embedding.load_state_dict(checkpoint['embedding']) self.vocab.__dict__ = checkpoint['vocab_dict'] self.evaluate_all() def train_model(self): best_score = 1e-200 print_loss_total = 0 # Reset every epoch num_pairs = {} for m in self.modality: num_pairs[m] = len(self.pairs[m]) saving_skipped = 0 for epoch in range(self.epoch, self.n_epochs): incomplete = False for iter in range(0, self.n_iters, self.batch_size): training_batch = {} input_variable = {} lengths = {} for m in self.modality: pairs = self.pairs[m][iter: iter + self.batch_size] # Skip incomplete batch if len(pairs) < self.batch_size: incomplete = True continue training_batch[m] = batch2TrainData( self.vocab, pairs, m) # Extract fields from batch input_variable[m], lengths[m], target_variable, \ mask, max_target_len, _ = training_batch[m] if incomplete: break # Run a training iteration with the current batch loss = self.train(input_variable, lengths, target_variable, mask, max_target_len, epoch, iter) self.writer.add_scalar('{}loss'.format(self.data_dir), loss, iter) print_loss_total += loss print_loss_avg = print_loss_total * self.batch_size / self.n_iters print_loss_total = 0 print('Epoch: [{}/{}] Loss: {:.4f}'.format( epoch, self.n_epochs, print_loss_avg)) # evaluate and save the model curr_score = self.evaluate_all() self.writer.add_scalar('{}bleu_score'.format(self.data_dir), curr_score, iter) if curr_score > best_score: saving_skipped = 0 best_score = curr_score self.save_model(epoch) saving_skipped += 1 if self.use_scheduler and saving_skipped > 3: saving_skipped = 0 new_lr = self.lr * 0.5 print('Entered the dungeon...') if new_lr > self.lr_lower_bound: # lower bound on lr self.lr = new_lr print('lr decreased to => {}'.format(self.lr)) def train(self, input_variable, lengths, target_variable, mask, max_target_len, epoch, iter): self.encoder.train() self.decoder.train() self.encoder_optimizer.zero_grad() self.decoder_optimizer.zero_grad() for m in self.modality: input_variable[m] = input_variable[m].to(self.device) lengths[m] = lengths[m].to(self.device) target_variable = target_variable.to(self.device) mask = mask.to(self.device) # Initialize variables loss = 0 print_losses = [] n_totals = 0 # Forward pass through encoder encoder_outputs, encoder_hidden = self.encoder(input_variable, lengths) # Create initial decoder input (start with SOS tokens for each sentence) decoder_input = torch.LongTensor([[self.SOS_TOKEN] * self.batch_size]) decoder_input = decoder_input.to(self.device) # Set initial decoder hidden state to the encoder's final hidden state if self.unit == 'gru': decoder_hidden = encoder_hidden[:self.decoder.n_layers] else: decoder_hidden = (encoder_hidden[0][:self.decoder.n_layers], encoder_hidden[1][:self.decoder.n_layers]) if iter % conf['log_tb_every'] == 1: # Visualize latent space if self.unit == 'gru': vis_hidden = decoder_hidden[-1, :, :] else: vis_hidden = decoder_hidden[0][-1, :, :] self.writer.add_embedding(vis_hidden, tag='decoder_hidden_{}_{}'.format( epoch, iter)) use_teacher_forcing = True if random.random() < self.teacher_forcing_ratio else False if use_teacher_forcing: for t in range(max_target_len): decoder_output, decoder_hidden = self.decoder( decoder_input, decoder_hidden, encoder_outputs) # Teacher forcing: next input is current target decoder_input = target_variable[t].view(1, -1) # Calculate and accumulate loss mask_loss, nTotal = self.mask_nll_loss(decoder_output, target_variable[t], mask[t]) loss += mask_loss print_losses.append(mask_loss.item() * nTotal) n_totals += nTotal else: for t in range(max_target_len): decoder_output, decoder_hidden = self.decoder( decoder_input, decoder_hidden, encoder_outputs ) # No teacher forcing: next input is decoder's own current output _, topi = decoder_output.topk(1) decoder_input = torch.LongTensor( [[topi[i][0] for i in range(self.batch_size)]]) decoder_input = decoder_input.to(self.device) # Calculate and accumulate loss mask_loss, nTotal = self.mask_nll_loss( decoder_output, target_variable[t], mask[t]) loss += mask_loss print_losses.append(mask_loss.item() * nTotal) n_totals += nTotal loss.backward() # Clip gradients: gradients are modified in place torch.nn.utils.clip_grad_norm_(self.encoder.parameters(), self.clip) torch.nn.utils.clip_grad_norm_(self.decoder.parameters(), self.clip) self.encoder_optimizer.step() self.decoder_optimizer.step() return sum(print_losses) / n_totals def mask_nll_loss(self, inp, target, mask): n_total = mask.sum() cross_entropy = -torch.log(torch.gather(inp, 1, target.view(-1, 1)).squeeze(1)) loss = cross_entropy.masked_select(mask).sum() loss = loss.to(self.device) return loss, n_total.item() def save_model(self, epoch): directory = self.save_dir if not os.path.exists(directory): os.makedirs(directory) torch.save({ 'epoch': epoch, 'en': self.encoder.state_dict(), 'de': self.decoder.state_dict(), 'en_op': self.encoder_optimizer.state_dict(), 'de_op': self.decoder_optimizer.state_dict(), 'vocab_dict': self.vocab.__dict__, 'embedding': self.embedding.state_dict()}, '{}{}-{}-{}.pth'.format(directory, self.model_code, epoch, iter)) def evaluate_all(self): self.encoder.eval() self.decoder.eval() searcher = GreedySearchDecoder( self.encoder, self.decoder, None, self.device, self.SOS_TOKEN) refs = [] hyp = [] for id in range(self.num_test_pairs): # Sample test pairs of each modality output_words, reference = self.evaluate( searcher, self.vocab, self.test_pairs, id) if output_words: final_output = [] for x in output_words: if x == '<EOS>': break final_output.append(x) refs.append(reference.split()) hyp.append(final_output) bleu_scores = calculateBleuScores(refs, hyp) print('Bleu score: {bleu_1} | {bleu_2} | {bleu_3} | {bleu_4}'.format( **bleu_scores)) eg_idx = random.choice(range(len(hyp))) print(hyp[eg_idx], refs[eg_idx]) return bleu_scores['bleu_4'] def evaluate(self, searcher, vocab, test_pairs, id, max_length=conf['MAX_LENGTH']): lengths = {} input_batch = {} with torch.no_grad(): reference = random.choice(list(test_pairs.values()))[id][1] for m in self.modality: sentence_or_vector = test_pairs[m][id][0] if m == 't': # `sentence_or_vector` ~> sentence # Format input sentence as a batch # words => indexes indexes_batch = [indexesFromSentence(vocab, sentence_or_vector)] if None in indexes_batch: return None for idx, indexes in enumerate(indexes_batch): indexes_batch[idx] = indexes_batch[idx] + [self.EOS_TOKEN] # Create lengths tensor lengths[m] = torch.tensor( [len(indexes) for indexes in indexes_batch]) # Transpose dimensions of batch to match models' expectations input_batch[m] = torch.LongTensor( indexes_batch).transpose(0, 1) else: # `sentence_or_vector` ~> vector input_batch[m], lengths[m] = \ inputVarVec([sentence_or_vector], m) # Use appropriate device input_batch[m] = input_batch[m].to(self.device) lengths[m] = lengths[m].to(self.device) # Decode sentence with searcher tokens, scores = searcher(input_batch, lengths, max_length) # indexes -> words decoded_words = [vocab.index2word[token.item()] for token in tokens] return decoded_words, reference def close_writer(self): self.writer.close()
class Sequence2SequenceNetwork(object): def __init__(self, config): self.init_writer() self.load_configuration(config) self.load_vocabulary() self.prepare_data() self.build_model() self.load_pretrained_model() self.train_model() self.save_model(self.n_epochs) self.evaluate_all() self.close_writer() def init_writer(self): self.writer = SummaryWriter() def load_configuration(self, config): # Load configuration self.iter_num = 0 self.lr = config['lr'] self.gpu = config['gpu'] self.unit = config['unit'] self.clip = config['clip'] self.beta1 = config['beta1'] self.beta2 = config['beta2'] self.langs = config['langs'] self.fusion = config['fusion'] self.log_tb = config['log_tb'] self.epsilon = config['epsilon'] self.attn_model = config['attn'] self.dropout = config['dropout'] self.emb_mode = config['emb_mode'] self.save_dir = config['save_dir'] self.data_dir = config['data_dir'] self.n_epochs = config['n_epochs'] self.SOS_TOKEN = config['SOS_TOKEN'] self.EOS_TOKEN = config['EOS_TOKEN'] self.MAX_LENGTH = config['MAX_LENGTH'] self.latent_dim = config['latent_dim'] self.batch_size = config['batch_size'] self.model_code = config['model_code'] self.vocab_path = config['vocab_path'] self.hidden_size = config['hidden_size'] self.use_cuda = torch.cuda.is_available() self.log_tb_every = config['log_tb_every'] self.enc_n_layers = config['enc_n_layers'] self.dec_n_layers = config['dec_n_layers'] self.dec_learning_ratio = config['dec_lr'] self.bidirectional = config['bidirectional'] self.enc_input_dim = config['enc_input_dim'] self.embedding_dim = config['embedding_dim'] self.use_scheduler = config['use_scheduler'] self.use_embeddings = config['use_embeddings'] self.lr_lower_bound = config['lr_lower_bound'] self.teacher_forcing_ratio = config['tf_ratio'] self.load_model_name = config['load_model_name'] self.modality = config[ 'modalities'] # no splitting as it's not multimodal case if self.modality in ['ss-vv', 'v-s']: self.pretrained_modality = config['pretrained_modality'] self.generate_word_embeddings = config['generate_word_embeddings'] self.device = torch.device( 'cuda:{}'.format(self.gpu) if self.use_cuda else 'cpu') def load_vocabulary(self): try: with open(self.vocab_path, 'rb') as f: self.vocab = pickle.load(f) except FileNotFoundError as e: # build vocab if it doesn't exist self.vocab = buildVocab() def prepare_data(self): # Note: The below workaround is used a lot and doing so is okay # because this script would only be run for unimodal cases self.pairs = prepareData(self.langs, [self.modality])[self.modality] num_pairs = len(self.pairs) self.pairs = self.pairs[:self.batch_size * (num_pairs // self.batch_size)] random.shuffle(self.pairs) self.n_iters = len(self.pairs) print('\nLoading test data pairs') self.test_pairs = prepareData(self.langs, [self.modality], train=False)[self.modality] random.shuffle(self.test_pairs) print(random.choice(self.pairs)) if self.use_embeddings: if self.generate_word_embeddings: self.embedding_wts = generateWordEmbeddings( self.vocab, self.emb_mode) else: self.embedding_wts = loadWordEmbeddings(self.emb_mode) def build_model(self): if self.use_embeddings: self.embedding = nn.Embedding.from_pretrained(self.embedding_wts) else: self.embedding = nn.Embedding(self.vocab.n_words, self.embedding_dim) if self.modality == 't': # Need embedding only for t2t mode self.encoder = EncoderRNN(self.embedding_dim, self.hidden_size, self.enc_n_layers, self.dropout, self.unit, self.modality, self.embedding, fusion_or_unimodal=True).to(self.device) else: # Note: no embedding used here self.encoder = EncoderRNN(self.enc_input_dim, self.hidden_size, self.enc_n_layers, self.dropout, self.unit, self.modality, fusion_or_unimodal=True).to(self.device) self.decoder = DecoderRNN(self.attn_model, self.embedding_dim, self.hidden_size, self.vocab.n_words, self.unit, self.dec_n_layers, self.dropout, self.embedding).to(self.device) self.encoder_optimizer = optim.Adam(self.encoder.parameters(), lr=self.lr) self.decoder_optimizer = optim.Adam(self.decoder.parameters(), lr=self.lr * self.dec_learning_ratio) self.epoch = 0 # define here to add resume training feature self.project_factor = self.encoder.project_factor self.latent2hidden = nn.Linear(self.latent_dim, self.hidden_size * self.project_factor).to(self.device) def load_pretrained_model(self): if self.load_model_name: checkpoint = torch.load(self.load_model_name, map_location=self.device) print('Loaded {}'.format(self.load_model_name)) self.epoch = checkpoint['epoch'] self.encoder.load_state_dict(checkpoint['en']) self.decoder.load_state_dict(checkpoint['de']) self.encoder_optimizer.load_state_dict(checkpoint['en_op']) self.decoder_optimizer.load_state_dict(checkpoint['de_op']) self.embedding.load_state_dict(checkpoint['embedding']) def train_model(self): best_score = 1e-200 print_loss_total = 0 # Reset every epoch saving_skipped = 0 for epoch in range(self.epoch, self.n_epochs): incomplete = False for iter in range(0, self.n_iters, self.batch_size): pairs = self.pairs[iter:iter + self.batch_size] # Skip incomplete batch if len(pairs) < self.batch_size: incomplete = True continue training_batch = batch2TrainData(self.vocab, pairs, self.modality) # Extract fields from batch input_variable, lengths, target_variable, \ mask, max_target_len, _ = training_batch if incomplete: break # Run a training iteration with the current batch loss = self.train(input_variable, lengths, target_variable, mask, max_target_len, iter) self.writer.add_scalar('{}loss'.format(self.data_dir), loss, iter) print_loss_total += loss print_loss_avg = print_loss_total * self.batch_size / self.n_iters print_loss_total = 0 print('Epoch: [{}/{}] Loss: {:.4f}'.format(epoch, self.n_epochs, print_loss_avg)) # evaluate and save the model curr_score = self.evaluate_all() self.writer.add_scalar('{}bleu_score'.format(self.data_dir), curr_score) if curr_score > best_score: saving_skipped = 0 best_score = curr_score self.save_model(epoch) saving_skipped += 1 if self.use_scheduler and saving_skipped > 3: saving_skipped = 0 new_lr = self.lr * 0.5 print('Entered the dungeon...') if new_lr > self.lr_lower_bound: # lower bound on lr self.lr = new_lr print('lr decreased to => {}'.format(self.lr)) def train(self, input_variable, lengths, target_variable, mask, max_target_len, iter): self.encoder.train() self.decoder.train() self.encoder_optimizer.zero_grad() self.decoder_optimizer.zero_grad() input_variable = input_variable.to(self.device) lengths = lengths.to(self.device) target_variable = target_variable.to(self.device) mask = mask.to(self.device) # Initialize variables loss = 0 print_losses = [] n_totals = 0 # Forward pass through encoder encoder_outputs, encoder_hidden = self.encoder(input_variable, lengths) # Create initial decoder input (start with SOS tokens for each sentence) decoder_input = torch.LongTensor([[self.SOS_TOKEN] * self.batch_size]) decoder_input = decoder_input.to(self.device) # Set initial decoder hidden state to the encoder's final hidden state if self.unit == 'gru': decoder_hidden = encoder_hidden[:self.decoder.n_layers] else: decoder_hidden = (encoder_hidden[0][:self.decoder.n_layers], encoder_hidden[1][:self.decoder.n_layers]) if iter % conf['log_tb_every'] == 0: # Visualize latent space if self.unit == 'gru': vis_hidden = decoder_hidden[-1, :, :] else: vis_hidden = decoder_hidden[0][-1, :, :] self.writer.add_embedding(vis_hidden, tag='decoder_hidden_{}'.format(iter)) use_teacher_forcing = True if random.random( ) < self.teacher_forcing_ratio else False if use_teacher_forcing: for t in range(max_target_len): decoder_output, decoder_hidden = self.decoder( decoder_input, decoder_hidden, encoder_outputs) # Teacher forcing: next input is current target decoder_input = target_variable[t].view(1, -1) # Calculate and accumulate loss mask_loss, nTotal = self.mask_nll_loss(decoder_output, target_variable[t], mask[t]) loss += mask_loss print_losses.append(mask_loss.item() * nTotal) n_totals += nTotal else: for t in range(max_target_len): decoder_output, decoder_hidden = self.decoder( decoder_input, decoder_hidden, encoder_outputs) # No teacher forcing: next input is decoder's own current output _, topi = decoder_output.topk(1) decoder_input = torch.LongTensor( [[topi[i][0] for i in range(self.batch_size)]]) decoder_input = decoder_input.to(self.device) # Calculate and accumulate loss mask_loss, nTotal = self.mask_nll_loss(decoder_output, target_variable[t], mask[t]) loss += mask_loss print_losses.append(mask_loss.item() * nTotal) n_totals += nTotal loss.backward() # Clip gradients: gradients are modified in place torch.nn.utils.clip_grad_norm_(self.encoder.parameters(), self.clip) torch.nn.utils.clip_grad_norm_(self.decoder.parameters(), self.clip) self.encoder_optimizer.step() self.decoder_optimizer.step() return sum(print_losses) / n_totals def mask_nll_loss(self, inp, target, mask): n_total = mask.sum() cross_entropy = -torch.log( torch.gather(inp, 1, target.view(-1, 1)).squeeze(1)) loss = cross_entropy.masked_select(mask).sum() loss = loss.to(self.device) return loss, n_total.item() def save_model(self, epoch): directory = self.save_dir if not os.path.exists(directory): os.makedirs(directory) torch.save( { 'epoch': epoch, 'en': self.encoder.state_dict(), 'de': self.decoder.state_dict(), 'en_op': self.encoder_optimizer.state_dict(), 'de_op': self.decoder_optimizer.state_dict(), 'embedding': self.embedding.state_dict() }, '{}{}-{}-{}-{}.pth'.format(directory, self.model_code, self.modality, self.langs, epoch)) def evaluate_all(self): self.encoder.eval() self.decoder.eval() searcher = GreedySearchDecoder(self.encoder, self.decoder, None, self.device, self.SOS_TOKEN) refs = [] hyp = [] for pair in self.test_pairs: output_words = self.evaluate(self.encoder, self.decoder, searcher, self.vocab, pair[0]) if output_words: final_output = [] for x in output_words: if x == '<EOS>': break final_output.append(x) refs.append([pair[1].split()]) hyp.append(final_output) bleu_scores = calculateBleuScores(refs, hyp) print('Bleu score: {bleu_1} | {bleu_2} | {bleu_3} | {bleu_4}'.format( **bleu_scores)) eg_idx = random.choice(range(len(hyp))) print(hyp[eg_idx], refs[eg_idx]) return bleu_scores['bleu_4'] def evaluate(self, encoder, decoder, searcher, vocab, sentence_or_vector, max_length=conf['MAX_LENGTH']): with torch.no_grad(): if self.modality == 't': # `sentence_or_vector` ~> sentence # Format input sentence as a batch # words => indexes indexes_batch = [ indexesFromSentence(vocab, sentence_or_vector) ] if None in indexes_batch: return None for idx, indexes in enumerate(indexes_batch): indexes_batch[idx] = indexes_batch[idx] + [self.EOS_TOKEN] # Create lengths tensor lengths = torch.tensor( [len(indexes) for indexes in indexes_batch]) # Transpose dimensions of batch to match models' expectations input_batch = torch.LongTensor(indexes_batch).transpose(0, 1) else: # `sentence_or_vector` ~> vector input_batch, lengths = inputVarVec([sentence_or_vector], self.modality) # Use appropriate device input_batch = input_batch.to(self.device) lengths = lengths.to(self.device) # Decode sentence with searcher tokens, scores = searcher(input_batch, lengths, max_length) # indexes -> words decoded_words = [ vocab.index2word[token.item()] for token in tokens ] return decoded_words def close_writer(self): self.writer.close()
class HRED(nn.Module): def __init__(self, num_vocab, emb_size, enc_word_rnn_size, enc_word_num_layers, enc_context_rnn_size, enc_context_num_layers, KG_word_rnn_size, KG_word_num_layers, dec_rnn_size, dec_num_layers, dec_num_softmax, dropout, pre_embedding = None): super(HRED, self).__init__() self.word_embedding = nn.Embedding(num_vocab, emb_size) if pre_embedding is not None: self.word_embedding.weight = nn.Parameter(self.word_embedding.weight.data.new(pre_embedding)) self.context_encoder = ContextEncoderRNN(word_embedding = self.word_embedding, word_rnn_size = enc_word_rnn_size, word_num_layers = enc_word_num_layers, context_rnn_size = enc_context_rnn_size, context_num_layers = enc_context_num_layers, dropout = dropout) self.KG_encoder = WordEncoderRNN(word_embedding = self.word_embedding, rnn_size = KG_word_rnn_size, num_layers = KG_word_num_layers, dropout = dropout) self.decoder = DecoderRNN(word_embedding = self.word_embedding, src_word_size = enc_word_rnn_size, src_context_size = enc_context_rnn_size, KG_word_size = KG_word_rnn_size, rnn_size = dec_rnn_size, num_layers = dec_num_layers, num_softmax = dec_num_softmax, dropout = dropout) def forward(self, src_sents, src_word_len, src_utterance_len, KG_sents, KG_word_len, tgt_word_input, combine_knowledge): """ src_sents (LongTensor) : [src_num_sent, src_word_len] src_word_len (LongTensor) : [src_num_sent] src_utteracen_len (LongTensor) : [batch_size] KG_sents (LongTensor) : [KG_num_sent, KG_word_len] KG_word_len (LongTensor) : [KG_num_sent] tgt_word_input (LongTensor) : [tgt_num_sent, tgt_word_len] """ # src_context_outputs : [tgt_num_sents, src_context_rnn_size] # src_word_outputs : [tgt_num_sents, max_src_word_len, src_word_rnn_size] src_context_outputs, src_word_outputs, src_word_len = self.context_encoder(src_sents, src_word_len, src_utterance_len) # KG_outputs : [KG_num_sent, max_KG_word_len, KG_word_rnn_size] KG_word_output, _ = self.KG_encoder(KG_sents, KG_word_len) # logit : [batch_size, tgt_word_len, num_vocab] # converage : [batch_size, tgt_word_len] logit = self.decoder(src_context_outputs, src_word_outputs, src_word_len, KG_word_output, KG_word_len, KG_sents, tgt_word_input, combine_knowledge) return logit def greedy_generate(self, src_sents, src_word_len, src_utterance_len, KG_sents, KG_word_len, max_tgt_word_len, initial_word_idx, combine_knowledge, temperature, topk, topp): """ src_sents (LongTensor) : [src_num_sent, src_word_len] src_word_len (LongTensor) : [src_num_sent] src_utteracen_len (LongTensor) : [batch_size] max_tgt_len (int) : The maxium length of target sequence """ # src_context_outputs : [tgt_num_sents, src_context_rnn_size] # src_word_outputs : [tgt_num_sents, src_word_len, src_word_rnn_size] src_context_output, src_word_output, src_word_len = self.context_encoder(src_sents, src_word_len, src_utterance_len) KG_word_output, _ = self.KG_encoder(KG_sents, KG_word_len) num_sents = src_context_output.size(0) # init_tgt_word_input : [tgt_num_sents] init_tgt_word_input = torch.ones(num_sents, device = src_sents.device).long() * initial_word_idx generated_tgt_word = self.decoder.greedy_generate(src_context_output, src_word_output, src_word_len, KG_word_output, KG_word_len, KG_sents, init_tgt_word_input, max_tgt_word_len, combine_knowledge, temperature, topk, topp) return generated_tgt_word
def __init__(self, src_vocab_size, tgt_vocab_size, src_emb_dim, tgt_emb_dim, enc_hidden_size, dec_hidden_size, context_hidden_size, batch_size, image_in_size, bidirectional_enc=True, bidirectional_context=False, num_enc_layers=1, num_dec_layers=1, num_context_layers=1, dropout_enc=0.4, dropout_dec=0.4, dropout_context=0.4, max_decode_len=40, non_linearity='tanh', enc_type='GRU', dec_type='GRU', context_type='GRU', use_attention=True, decode_function='softmax', sos_id=2, eos_id=3, tie_embedding=True, activation_bridge='Tanh', num_states=None, use_kb=False, kb_size=None, celeb_vec_size=None): super(HRED, self).__init__() self.src_vocab_size = src_vocab_size self.tgt_vocab_size = tgt_vocab_size self.src_emb_dim = src_emb_dim self.tgt_emb_dim = tgt_emb_dim self.batch_size = batch_size self.bidirectional_enc = bidirectional_enc self.bidirectional_context = bidirectional_context self.num_enc_layers = num_enc_layers self.num_dec_layers = num_dec_layers self.num_context_layers = num_context_layers self.dropout_enc = dropout_enc #dropout prob for encoder self.dropout_dec = dropout_dec #dropout prob for decoder self.dropout_context = dropout_context #dropout prob for context self.non_linearity = non_linearity # default nn.tanh(); nn.relu() self.enc_type = enc_type self.dec_type = dec_type self.context_type = context_type self.sos_id = sos_id # start token self.eos_id = eos_id # end token self.decode_function = decode_function # @TODO: softmax or log softmax self.max_decode_len = max_decode_len # max timesteps for decoder self.attention_size = dec_hidden_size # Same as enc/dec hidden size!! # self.context_hidden_size = context_hidden_size # self.enc_hidden_size = enc_hidden_size # All implementations have encoder hidden size halved self.num_directions = 2 if bidirectional_enc else 1 self.enc_hidden_size = enc_hidden_size // self.num_directions self.num_directions = 2 if bidirectional_context else 1 self.context_hidden_size = context_hidden_size // self.num_directions self.dec_hidden_size = dec_hidden_size self.use_attention = use_attention self.image_in_size = image_in_size self.image_out_size = self.dec_hidden_size # Project on same size as enc hidden self.use_kb = use_kb self.kb_size = kb_size self.celeb_vec_size = celeb_vec_size # Equating to emb_size = tgt_emb_dim for now # Default to hidden_size = dec_hidden_size for now. self.kb_emb_size = self.tgt_emb_dim self.kb_hidden_size = self.dec_hidden_size self.kb_encoder = KbEncoder(self.kb_size, self.kb_emb_size, self.kb_hidden_size, rnn_type='GRU', num_layers=1, batch_first=True, dropout=0, bidirectional=False) # Same for kb and celebs for now. self.celeb_encoder = KbEncoder(self.celeb_vec_size, self.kb_emb_size, self.kb_hidden_size, rnn_type='GRU', num_layers=1, batch_first=True, dropout=0, bidirectional=False) # Initialize encoder self.encoder = EncoderRNN(self.src_vocab_size, self.src_emb_dim, self.enc_hidden_size, self.enc_type, self.num_enc_layers, batch_first=True, dropout=self.dropout_enc, bidirectional=self.bidirectional_enc) # self.image_encoder = ImageEncoder(self.image_in_size, self.image_out_size) # Initialize bridge layer self.activation_bridge = activation_bridge self.bridge = BridgeLayer(self.enc_hidden_size, self.dec_hidden_size, self.activation_bridge) # Initialize context encoder self.context_input_size = enc_hidden_size #self.image_out_size + enc_hidden_size # image+text self.context_encoder = ContextRNN( self.context_input_size, self.context_hidden_size, self.context_type, self.num_context_layers, batch_first=True, dropout=self.dropout_context, bidirectional=self.bidirectional_context) # Initialize RNN decoder self.decoder = DecoderRNN(self.tgt_vocab_size, self.tgt_emb_dim, self.dec_hidden_size, self.dec_type, self.num_dec_layers, self.max_decode_len, self.dropout_dec, batch_first=True, use_attention=self.use_attention, attn_size=self.attention_size, sos_id=self.sos_id, eos_id=self.eos_id, use_input_feed=True, use_kb=self.use_kb, kb_size=self.kb_hidden_size, celeb_vec_size=self.kb_hidden_size) if tie_embedding: self.decoder.embedding = self.encoder.embedding # Initialize parameters self.init_params()
def train(input_sentences, output_sentences, input_vocab, output_vocab, input_reverse, output_reverse, hy, writer): dataset = NMTDataset(input_sentences, output_sentences, input_vocab, output_vocab, input_reverse, output_reverse) loader = DataLoader(dataset, batch_size=hy.batch_size, shuffle=True, drop_last=True) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") input_vocab_size = len(input_vocab.keys()) output_vocab_size = len(output_vocab.keys()) encoder = EncoderRNN(input_vocab_size, hy.embedding_size, hy.hidden_size, hy.rnn_layers, hy.bidirectional, device) decoder = DecoderRNN(output_vocab_size, hy.embedding_size, hy.hidden_size, hy.rnn_layers, hy.bidirectional, device) loss_function = nn.CrossEntropyLoss().to(device) encoder_optimizer = optim.Adam(encoder.parameters(), lr=hy.lr) decoder_optimizer = optim.Adam(decoder.parameters(), lr=hy.lr) n_iterations = 0 loss_history = [] training_accuracy = 0. encoder.train() decoder.train() for epoch in range(1, hy.num_epochs + 1): for encoder_input, decoder_input, decoder_output in tqdm( loader, desc="{}/{}".format(epoch, hy.num_epochs)): encoder_input = encoder_input.to(device) decoder_input = decoder_input.to(device) decoder_output = decoder_output.to(device) encoder_optimizer.zero_grad() decoder_optimizer.zero_grad() _, encoder_hidden = encoder(encoder_input) logits = decoder(decoder_input, encoder_hidden) loss = loss_function( logits.view(hy.batch_size * decoder_output.shape[1], -1), decoder_output.view(-1)) loss.backward() encoder_optimizer.step() decoder_optimizer.step() writer.add_scalar("TrainingLoss", loss.item(), n_iterations) n_iterations = n_iterations + 1 loss_history.append(loss.item()) training_accuracy = compute_model_accuracy(encoder, decoder, loader, device, epoch, writer) torch.save(encoder.state_dict(), "saved_runs/encoder_{}_weights.pt".format(epoch)) torch.save(decoder.state_dict(), "saved_runs/decoder_{}_weights.pt".format(epoch)) return loss_history, training_accuracy
descibe = [pair[2] for pair in trainpairs] print( "ontology name:", trainpairs[0][0], "\nontology token:", trainpairs[0][1], "\ngene desciption:", descibe[0], "\n", ) encodernum = max(map(lambda x: len(x), descibe)) print(encodernum) hidden_size = 256 encoder1 = EncoderRNN(voc.num_words, hidden_size).to(device) attn_decoder1 = DecoderRNN(hidden_size, voc.num_words).to(device) CombineEncoder = CombineEncoderRNN(hidden_size, hidden_size).to(device) trainIters(encoder1, attn_decoder1, CombineEncoder, trainpairs, 20) encoder_save_path = "model/combineEncoder+" + nowTime + "+.pth" decoder_save_path = "model/combineDecoder+" + nowTime + "+.pth" combiner_save_path = "model/combineCombiner+" + nowTime + "+.pth" torch.save(encoder1, current_dir + '/' + encoder_save_path) torch.save(attn_decoder1, current_dir + "/" + decoder_save_path) torch.save(CombineEncoder, current_dir + "/" + combiner_save_path) model1 = torch.load(current_dir + "/" + encoder_save_path) model2 = torch.load(current_dir + "/" + decoder_save_path) model3 = torch.load(current_dir + "/" + combiner_save_path) # evaluateRandomly( # model1.to(device), model2.to(device),model3.to(device),testpairs)
# reset data loader train_loader_params = {'batch_size': batch_size, 'shuffle': True} val_loader_params = {'batch_size': batch_size, 'shuffle': False} # dataloaders loader_train = Dataset_CRNN(data_path=data_dir) loader_val = Dataset_CRNN_VAL(data_path=data_dir) train_data_loader = data.DataLoader(loader_train, **train_loader_params) val_data_loader = data.DataLoader(loader_val, **val_loader_params) dict_train = loader_train.idx2word dict_val = loader_val.idx2word # models embed_encoder = CNN_fc_EmbedEncoder().to(device) rnn_decoder = DecoderRNN(CNN_embed_dim=CNN_embed_dim, h_RNN_layers=RNN_hidden_layers, h_RNN=RNN_hidden_nodes, h_FC_dim=RNN_FC_dim, drop_p=dropout_p, num_classes=category).to(device) crnn_params = list( list(rnn_decoder.parameters()) + list(embed_encoder.parameters())) optimizer = torch.optim.Adam(crnn_params, lr=learning_rate, weight_decay=1e-4) criterion = torch.nn.CrossEntropyLoss() scores = [] g_minibatch_train = 0 g_minibatch_val = 0 def train(log_interval, cnn_encoder, rnn_decoder, device, train_loader, optimizer, epoch):
decoder_hidden_size = hidden_size * 2 if opt.bidirectional else hidden_size encoder = EncoderRNN(len(src.vocab), max_len, hidden_size, opt.embedding_size, dropout_p=opt.dropout_p_encoder, n_layers=opt.n_layers, bidirectional=opt.bidirectional, rnn_cell=opt.rnn_cell, variable_lengths=True) decoder = DecoderRNN(len(tgt.vocab), max_len, decoder_hidden_size, dropout_p=opt.dropout_p_decoder, n_layers=opt.n_layers, use_attention=opt.attention, attention_method=opt.attention_method, full_focus=opt.full_focus, bidirectional=opt.bidirectional, rnn_cell=opt.rnn_cell, eos_id=tgt.eos_id, sos_id=tgt.sos_id) seq2seq = Seq2seq(encoder, decoder) seq2seq.to(device) for param in seq2seq.parameters(): param.data.uniform_(-0.08, 0.08) input_vocabulary = input_vocab.itos output_vocabulary = output_vocab.itos # random.seed(3)
class CorrelationNetwork(Sequence2SequenceNetwork): def __init__(self, config): self.init_writer() self.load_configuration(config) self.load_vocabulary() self.prepare_data() self.build_model() self.load_pretrained_model() self.train_model() self.save_model(self.n_epochs) self.evaluate_all() self.close_writer() def build_model(self): if self.use_embeddings: self.embedding = nn.Embedding.from_pretrained(self.embedding_wts) else: self.embedding = nn.Embedding(self.vocab.n_words, self.embedding_dim) self.encoders = [] self.encoder_optimizers = [] # Note: No embeddings used in the encoders for m in ['v', 's']: encoder = EncoderRNN(self.enc_input_dim[m], self.hidden_size, self.enc_n_layers, self.dropout, self.unit, m).to(self.device) encoder_optimizer = optim.Adam(encoder.parameters(), lr=self.lr) if self.modality == 'ss-vv': checkpoint = torch.load(self.pretrained_modality[m], map_location=self.device) encoder.load_state_dict(checkpoint['en']) encoder_optimizer.load_state_dict(checkpoint['en_op']) self.encoders.append(encoder) self.encoder_optimizers.append(encoder_optimizer) self.decoder = DecoderRNN(self.attn_model, self.embedding_dim, self.hidden_size, self.vocab.n_words, self.unit, self.dec_n_layers, self.dropout, self.embedding).to(self.device) text_checkpoint = torch.load(self.pretrained_modality['t'], map_location=self.device) self.decoder.load_state_dict(text_checkpoint['de']) self.project_factor = self.encoders[0].project_factor self.latent2hidden = nn.Linear(self.latent_dim, self.hidden_size * self.project_factor).to(self.device) self.epoch = 0 def train_model(self): best_score = 1e-200 plot_losses = [] print_loss_total = 0 # Reset every epoch saving_skipped = 0 for epoch in range(self.epoch, self.n_epochs): random.shuffle(self.pairs) for iter in range(0, self.n_iters, self.batch_size): training_batch = batch2TrainData( self.vocab, self.pairs[iter:iter + self.batch_size], self.modality) # Extract fields from batch vid_vec, lengths, speech_vec, tar_lengths = training_batch # Run a training iteration with the current batch loss = self.train(vid_vec, lengths, speech_vec, tar_lengths, iter) self.writer.add_scalar('{}loss'.format(self.data_dir), loss, iter) print_loss_total += loss print_loss_avg = print_loss_total * self.batch_size / self.n_iters print_loss_total = 0 print('Epoch: [{}/{}] Loss: {:.4f}'.format(epoch, self.n_epochs, print_loss_avg)) # evaluate and save the model curr_score = self.evaluate_all() self.writer.add_scalar('{}bleu_score'.format(self.data_dir), curr_score) if curr_score > best_score: saving_skipped = 0 best_score = curr_score self.save_model(epoch) saving_skipped += 1 if self.use_scheduler and saving_skipped > 3: saving_skipped = 0 new_lr = self.lr * 0.5 print('Entered the dungeon...') if new_lr > self.lr_lower_bound: # lower bound on lr self.lr = new_lr print('lr decreased to => {}'.format(self.lr)) def train(self, input_variable, lengths, target_variable, tar_lengths, iter): for i, _ in enumerate(self.encoders): self.encoders[i].train() self.encoders[i].zero_grad() input_variable = input_variable.to(self.device) lengths = lengths.to(self.device) target_variable = target_variable.to(self.device) tar_lengths = tar_lengths.to(self.device) # Initialize variables loss = 0 print_losses = [] n_totals = 0 # Forward pass through encoder enc_out_1, enc_hidden_1 = self.encoders[0](input_variable, lengths) enc_out_2, enc_hidden_2 = self.encoders[1](target_variable, tar_lengths) if self.unit == 'gru': latent_1 = enc_hidden_1 latent_2 = enc_hidden_2 else: # lstm (latent_1, cs_1) = enc_hidden_1 (latent_2, cs_2) = enc_hidden_2 loss = self.mean_square_error(latent_1, latent_2) loss.backward() # Clip gradients: gradients are modified in place for i, _ in enumerate(self.encoders): torch.nn.utils.clip_grad_norm_(self.encoders[i].parameters(), self.clip) self.encoder_optimizers[i].step() return loss.item() def mean_square_error(self, inp, target): criterion = nn.MSELoss() return criterion(inp, target) def save_model(self, epoch): directory = '{}'.format(self.save_dir) if not os.path.exists(directory): os.makedirs(directory) torch.save( { 'epoch': epoch, 'en_1': self.encoders[0].state_dict(), 'en_2': self.encoders[1].state_dict(), 'en_op1': self.encoder_optimizers[0].state_dict(), 'en_op2': self.encoder_optimizers[1].state_dict(), 'de': self.decoder.state_dict() }, '{}{}-{}-{}.pth'.format(directory, self.modality, self.langs, epoch)) def evaluate_all(self): for i, _ in enumerate(self.encoders): self.encoders[i].eval() self.decoder.eval() searcher = GreedySearchDecoder(self.encoders[0], self.decoder, self.latent2hidden, self.device, self.SOS_TOKEN) refs = [] hyp = [] for pair in self.test_pairs: output_words = self.evaluate(searcher, self.vocab, pair[0]) if output_words: final_output = [] for x in output_words: if x == '<EOS>': break final_output.append(x) refs.append([pair[2].split()]) hyp.append(final_output) bleu_scores = calculateBleuScores(refs, hyp) print('Bleu score: {bleu_1} | {bleu_2} | {bleu_3} | {bleu_4}'.format( **bleu_scores)) eg_idx = random.choice(range(len(hyp))) print(hyp[eg_idx], refs[eg_idx]) return bleu_scores['bleu_4'] def evaluate(self, searcher, vocab, sentence_or_vector, max_length=conf['MAX_LENGTH']): with torch.no_grad(): input_batch, lengths = inputVarVec([sentence_or_vector], self.modality) # Use appropriate device input_batch = input_batch.to(self.device) lengths = lengths.to(self.device) # Decode sentence with searcher tokens, scores = searcher(input_batch, lengths, max_length) # indexes -> words decoded_words = [ vocab.index2word[token.item()] for token in tokens ] return decoded_words