def load_model(saved_vae, stored_info, device, cache_path=str(Path('../tmp')), seed=None): stored_info = stored_info.split(os.sep)[-1] cache_file = os.path.join(cache_path, stored_info) start_load = time.time() print(f"Fetching cached info at {cache_file}") with open(cache_file, "rb") as f: dataset, z_size, condition_size, condition_on, decoder_hidden_size, encoder_hidden_size, n_encoder_layers = pickle.load(f) end_load = time.time() print(f"Cache {cache_file} loaded (load time: {end_load - start_load:.2f}s)") if os.path.exists(saved_vae): print(f"Found saved model {saved_vae}") start_load_model = time.time() e = model.EncoderRNN(dataset.input_side.n_words, encoder_hidden_size, z_size, n_encoder_layers, bidirectional=True) d = model.DecoderRNN(z_size, dataset.trn_split.n_conditions, condition_size, decoder_hidden_size, dataset.input_side.n_words, 1, word_dropout=0) vae = model.VAE(e, d).to(device) vae.load_state_dict(torch.load(saved_vae, map_location=lambda storage, loc: storage)) vae.eval() print(f"Trained for {vae.steps_seen} steps (load time: {time.time() - start_load_model:.2f}s)") print("Setting new random seed") if seed is None: # TODO: torch.manual_seed(1999) in model.py is affecting this new_seed = int(time.time()) new_seed = abs(new_seed) % 4294967295 # must be between 0 and 4294967295 else: new_seed = seed torch.manual_seed(new_seed) random_state = np.random.RandomState(new_seed) #random_state.shuffle(dataset.trn_pairs) return vae, dataset, z_size, random_state
def __init__(self, input_size, embedding_size, hidden_size, vocab_size, num_layer): super(Model, self).__init__() self.encoder = model.EncoderCNN(input_size, embedding_size) self.decoder = model.DecoderRNN(embedding_size, hidden_size, vocab_size, num_layer) self.criterion = nn.CrossEntropyLoss()
def main(args): transform = transforms.Compose([ transforms.Resize(256), transforms.RandomCrop(224), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) with open(args.vocab_path, "rb") as f1, \ open(args.batched_file_path, "rb") as f2: vocab = pickle.load(f1) batched_val_set = pickle.load(f2) coco_caps = COCO(args.caption_path) batched_val_loader = get_loader(args.image_dir, args.caption_path, batched_val_set, vocab, transform, shuffle=True, num_workers=3) encoder = model.EncoderCNN() decoder = model.DecoderRNN(512, 196, 512, 512, len(vocab), 1) if torch.cuda.is_available(): encoder = encoder.cuda() decoder = decoder.cuda() checkpoint = torch.load(args.load_checkpoint) decoder.load_state_dict(checkpoint["state_dict"]) checkpoint = None torch.cuda.empty_cache() for i, (images, captions, lengths, ids) in enumerate(batched_val_loader): if i == args.num_runs: break print("\nactual captions for batch " + str(i) + " are: ") annIds = coco_caps.getAnnIds(imgIds=ids) anns = coco_caps.loadAnns(annIds) for ann in anns: print(ann["caption"]) images = to_var(images, volatile=True) captions = to_var(captions, volatile=True) features = encoder(images) results = decoder.sample(features, args.beam_size) print("predicted captions are: ") for result in results: candidate = [vocab(i) for i in result[1][:-1]] references = [nltk.tokenize.word_tokenize(ann["caption"].lower()) for ann in anns] score = bleu_score.sentence_bleu(references, candidate) print("probability: %5.4f, BLEU score: %5.4f, caption: %s" %(result[0], score, caption_id_to_string(result[1], vocab)))
def define_simple_decoder(hidden_size, input_vocab_len, output_vocab_len, max_length): """ Provides a simple decoder instance NOTE: Not all the function arguments are needed - you need to figure out which arguments to use :return: a simple decoder instance """ # Write your implementation here decoder = model.DecoderRNN(hidden_size, output_vocab_len) # End of implementation return decoder
def __init__(self, embed_size, hidden_size, vocab, dropout_rate=0.2): super(NMT, self).__init__() self.embed_size = embed_size self.hidden_size = hidden_size self.dropout_rate = dropout_rate self.vocab = vocab src_vocab_size = len(self.vocab.src.word2id) tgt_vocab_size = len(self.vocab.tgt.word2id) self.encoder = model.EncoderRNN(vocab_size=src_vocab_size, embed_size=self.embed_size, hidden_size=self.hidden_size) self.decoder = model.DecoderRNN(embed_size=self.embed_size, hidden_size=self.hidden_size, output_size=tgt_vocab_size) self.encoder = self.encoder.cuda() self.decoder = self.decoder.cuda() self.criterion = torch.nn.CrossEntropyLoss().cuda()
imgh = args.imh imgw = args.imw embed_dim = args.embed_size hidden_dim = args.nhid attention_dim =args.attention_dim transform = transforms.Compose([transforms.Resize((imgh, imgw)), transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) ]) fine_tune_encoder = False encoder = model.EncoderCNN().to(device) encoder.fine_tune(fine_tune_encoder) decoder = model.DecoderRNN(ntokens, embed_dim, hidden_dim, idx2word, word2idx).to(device) loss_fn = nn.CrossEntropyLoss().to(device) decoder_optimizer = t.optim.Adam(params=filter(lambda p: p.requires_grad, decoder.parameters()), lr=decoder_lr) encoder_optimizer = t.optim.Adam(params=filter(lambda p: p.requires_grad, encoder.parameters()), lr=encoder_lr) if fine_tune_encoder else None # def prepare_sequence(seq, to_ix): # idxs = [to_ix[w] for w in seq] # return t.tensor(idxs, dtype=t.long, device = device) def batchify(data, bs): shuffle(data)
shuffle=True, collate_fn=dataload.collate_fn, **kwargs) val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=args.batch_size, shuffle=True, collate_fn=dataload.collate_fn, **kwargs) ## Load the proper neural network model. if args.model == 'Pretrained': model.encoder = model.EncoderCNN(args.embed_dim) model.decoder = model.DecoderRNN(embed_size=args.embed_dim, hidden_size=args.hidden_dim, vocab_size=vocab_size, num_layers=1, max_seq_length=10) else: raise Exception('Unknown model {}'.format(args.model)) ## the loss function -cross-entropy. criterion = functional.cross_entropy ## Activate CUDA if specified and available. if args.cuda: model.encoder.cuda() model.decoder.cuda()
def main(args): transform = transforms.Compose([ transforms.Resize(256), transforms.RandomCrop(224), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) useCuda = not args.disable_cuda with open(args.vocab_path, 'rb') as f1, open(args.batched_train_path, 'rb') as f2, open(args.batched_val_path, 'rb') as f3: vocab = pickle.load(f1) batched_train_set = pickle.load(f2) batched_val_set = pickle.load(f3) batched_train_loader = get_loader(args.train_image_dir, args.train_caption_path, batched_train_set, vocab, transform, shuffle=True, num_workers=3) batched_val_loader = get_loader(args.val_image_dir, args.val_caption_path, batched_val_set, vocab, transform, shuffle=True, num_workers=1) random_val_loader = get_loader(args.val_image_dir, args.val_caption_path, batched_val_set, vocab, transform, shuffle=True, num_workers=1) encoder_cnn = model.EncoderCNN(args.is_normalized, useCuda=useCuda) decoder_rnn = model.DecoderRNN(args.embedding_dim, args.hidden_size, len(vocab), args.batch_size, dropout=args.dropout, useCuda=useCuda) if torch.cuda.is_available() and useCuda: decoder_rnn.cuda() loss_function = nn.NLLLoss() #loss_function = nn.CrossEntropyLoss() params = list(decoder_rnn.parameters()) optimizer = optim.Adam(params, lr=args.encoder_lr) #scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', patience=1) output_train_file = open(args.output_train_name, 'w') output_val_file = open(args.output_val_name, 'w') start_epoch = 0 save_name = file_namer.make_checkpoint_name(args.batch_size, args.min_occurrences, args.num_epochs, \ args.dropout, args.decoder_lr, args.encoder_lr, args.embedding_dim, args.hidden_size, args.grad_clip, \ args.is_normalized) if args.load_checkpoint == "" else args.load_checkpoint checkpoint_name = file_namer.get_checkpoint(save_name) if checkpoint_name is not None: print("loading from checkpoint " + checkpoint_name) checkpoint = torch.load(checkpoint_name) if useCuda else torch.load( checkpoint_name, map_location=lambda storage, loc: storage) start_epoch = checkpoint['epoch'] decoder_rnn.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) args.load_checkpoint = checkpoint_name checkpoint = None torch.cuda.empty_cache() else: print("No existing checkpoints, starting from scratch") args.load_checkpoint = "No checkpoint found" full_return_index = mp.Value('i', 0) full_return_value = mp.Value('d', 0.0) full_val_processes = None for epoch in range(start_epoch, args.num_epochs): val_processes = None return_index = mp.Value('i', 0) return_value = mp.Value('d', 0.0) train_progress_bar = tqdm(iterable=batched_train_loader, desc='Epoch [%i/%i] (Train)' % (epoch, args.num_epochs)) train_sum_loss = 0 for i, (images, captions, _) in enumerate(train_progress_bar): train_sum_loss += trainer.train(encoder_cnn, decoder_rnn, loss_function, optimizer, images, captions, args.grad_clip, useCuda) train_progress_bar.set_postfix(loss=train_sum_loss / ((i % 100) + 1)) if i % 100 == 0: output_train_file.write( "%d, %5.4f\n" % (epoch * len(batched_train_loader) + i, train_sum_loss / 100 if i > 0 else train_sum_loss)) if i % 1000 == 0: if val_processes is not None: val_processes.join() output_val_file.write( "%d, %5.4f\n" % (return_index.value, return_value.value)) val_processes = mp.Process( target=validate, args=(random_val_loader, encoder_cnn, decoder_rnn, loss_function, useCuda, epoch * len(batched_train_loader) + i, return_index, return_value)) val_processes.start() train_sum_loss = 0 if full_val_processes is not None: full_val_processes.join() #scheduler.step(full_return_value.value) output_val_file.write( "End of Epoch\n%d, %5.4f\n" % (full_return_index.value, full_return_value.value)) full_val_processes = mp.Process( target=validate_full, args=(batched_val_loader, encoder_cnn, decoder_rnn, loss_function, useCuda, epoch, args.num_epochs, len(batched_train_loader), full_return_index, full_return_value)) full_val_processes.start() torch.save({'epoch': epoch + 1, 'state_dict': decoder_rnn.state_dict(), 'optimizer': optimizer.state_dict()}, file_namer.make_checkpoint_name(args.batch_size, args.min_occurrences, epoch + 1, args.dropout, \ args.decoder_lr, args.encoder_lr, args.embedding_dim, args.hidden_size, args.grad_clip, args.is_normalized)) if full_val_processes is not None: full_val_processes.join() output_val_file.write( "End of Epoch\n%d, %5.4f\n" % (full_return_index.value, full_return_value.value)) full_val_processes = None output_train_file.close() output_val_file.close() if args.plot: args.train_files.append(args.output_train_name) args.val_files.append(args.output_val_name) plot(args) args.png_files = [args.plot_name] if args.send_email: args.txt_files = [args.output_train_name, args.output_val_name] f = open('arguments.txt', 'w') for arg in sorted(vars(args)): # arguments we don't want sent in the email ignore_args = [ 'user', 'password', 'to', 'plot_name', 'train_image_dir', 'val_image_dir', 'send_email', 'plot', 'plot_name', 'train_caption_path', 'val_caption_path', 'png_files', 'txt_files', 'disable_cuda', 'body', 'output_train_name', 'output_val_name', 'show', 'subject', 'max_batched_set_size' ] if not arg in ignore_args: f.write("%s: %s\n" % (arg, getattr(args, arg))) f.close() if not args.body: args.body = 'arguments.txt' else: args.txt_files.append('arguments.txt') send_email(args)
with open('vocabSet.pkl', 'rb') as f: vocabularySet = pickle.load(f) print("Loaded Vocabulary Set") with open('vocabSet2.pkl', 'rb') as f: vocabularySet2 = pickle.load(f) print("Loaded Reverse Vocabulary Set") modelsPath = "LSTM4Models/" imagesPath = "../data/val2014/" captionsPath = "../data/annotations/captions_val.json" cnnEn = model.EncoderCNN(wordEmbeddings).eval() lstmDe = model.DecoderRNN(wordEmbeddings, lstmHiddenStates, len(vocabularySet), lstmLayers) cnnEn = cnnEn.to(device) lstmDe = lstmDe.to(device) valData = COCO(captionsPath) #Exploiting Pycocotools to get insights about data print("Total Annotations: " + str(len(valData.anns.keys()))) print("Total Images: " + str(len(valData.imgs.keys()))) #Visualise print(valData.imgToAnns[393212]) for (i, key) in enumerate(valData.imgToAnns.keys()): origCaptionSet = [] for rec in valData.imgToAnns[key]:
def __init__(self, embed_size, hidden_size, vocab, dropout_rate, num_layers, bidirectional, attention_type, self_attention, tau, gamma1, gamma2, cost_fcn, uniform_init, embedding_file=None): super(NMT, self).__init__() self.embed_size = embed_size self.hidden_size = hidden_size self.dropout_rate = dropout_rate self.vocab = vocab self.bidirectional = bidirectional self.tau = tau self.gamma1 = gamma1 self.gamma2 = gamma2 self.cost_fcn = cost_fcn src_vocab_size = len(self.vocab.src.word2id) tgt_vocab_size = len(self.vocab.tgt.word2id) if embedding_file is not None: Glove = {} f = open(embedding_file) print("Loading the vectors.") i = 0 for line in f: if i != 0: word, vec = line.split(' ', 1) Glove[word] = np.fromstring(vec, sep=' ') i += 1 f.close() print("Done.") X_train = np.zeros((len(self.vocab.src.id2word), self.embed_size)) for i in range(len(self.vocab.src.id2word)): if self.vocab.src.id2word[i] in Glove: X_train[i] = Glove[self.vocab.src.id2word[i]] embeddings = np.asarray(X_train) else: embeddings = None self.encoder = model.EncoderRNN(vocab_size=src_vocab_size, embed_size=self.embed_size, hidden_size=hidden_size, dropout_rate=dropout_rate, num_layers=num_layers, bidirectional=bidirectional, embeddings=embeddings) self.decoder = model.DecoderRNN(embed_size=self.embed_size, hidden_size=self.hidden_size, output_size=tgt_vocab_size, dropout_rate=dropout_rate, num_layers=num_layers, attention_type=attention_type, self_attention=self_attention, bidirectional=bidirectional) self.encoder = self.encoder.cuda() self.decoder = self.decoder.cuda() # Initialize all parameter weights uniformly for param in list(self.encoder.parameters()) + list( self.decoder.parameters()): torch.nn.init.uniform(param, a=-uniform_init, b=uniform_init) self.criterion = torch.nn.CrossEntropyLoss(reduce=0).cuda()
num_epochs=100 learning_rate=1e-4 log_interval=10 # The interval at which the model will be saved root_dir='../Data/' #--------------------------------------------------------------- # Dataset loader train_loader=Dataset_CRNN(root_dir=root_dir,) # Define the cnn model cnnEnc=m.initialize_model(model_name,cnn_encoding_length,feature_extract,use_pretrained) # To use pretrained model # cnnEnc=MyModel() # To use your own model # Define RNN decoder rnnDec=m.DecoderRNN(CNN_embed_dim=cnn_encoding_length,h_RNN_layers=3, h_RNN=256, h_FC_dim=128, drop_p=0.3, num_classes=num_classes) # Params to update crnn_params=list(cnnEnc.parameters()) + list(rnnDec.parameters()) # Specify the loss to use loss_criterion=F.BCELoss() # Define the optimizer optimizer = torch.optim.Adam(crnn_params, lr=learning_rate) # Specify the device device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # Load the models to device cnnEnc=cnnEnc.to(device)
print(relations) relation_count = len( relations) # args.relation_tag_size # data['relation_tag_size'] noisy_count = args.noisy_tag_size # ata['noisy_tag_size'] learning_rate = args.lr # data['lr'] l2 = args.l2 # data['l2'] print("relation count: ", relation_count) print("Reading vector file......") vec_model = KeyedVectors.load_word2vec_format(args.datapath + 'vector2.txt', binary=False) # vec_model = KeyedVectors.load_word2vec_format('/home/xiaoya/data/GoogleNews-vectors-negative300.bin.gz', binary=True) # load models encoder = model.EncoderRNN(args, wv).to(device) decoder = model.DecoderRNN(args, wv).to(device) RE_model = model.RE_RNN(args, wv, relation_count).to(device) criterion = nn.NLLLoss() # CrossEntropyLoss() # criterion_RE = nn.BCELoss() # attn_decoder1 = AttnDecoderRNN(hidden_size, output_lang.n_words, dropout_p=0.1).to(device) if torch.cuda.is_available(): encoder = encoder.cuda() decoder = decoder.cuda() RE_model = RE_model.cuda() criterion = criterion.cuda() # criterion_RE = criterion_RE.cuda() encoder_optimizer = optim.Adam(encoder.parameters(), lr=learning_rate, weight_decay=l2) # SGD
with open("../ImageCaptioner/data/vocab/vocab_occurrence_5.pkl", 'rb') as f1,\ open("../ImageCaptioner/data/batched_data/val_batch_1.pkl", "rb") as f2: vocab = pickle.load(f1) batched_val_set = pickle.load(f2) coco_caps = COCO("../ImageCaptioner/data/annotations/captions_val2014.json") batched_val_loader = get_loader( "../ImageCaptioner/data/val2014", "../ImageCaptioner/data/annotations/captions_val2014.json", batched_val_set, vocab, transform, shuffle=True, num_workers=3) encoder = model.EncoderCNN() decoder = model.DecoderRNN(512, 196, 512, 512, len(vocab), 1) if torch.cuda.is_available(): encoder = encoder.cuda() decoder = decoder.cuda() checkpoint = torch.load( "noNorm/model_batch_100_dims_512x512_lr_0.0001/checkpoint_25.pt") decoder.load_state_dict(checkpoint['state_dict']) checkpoint = None torch.cuda.empty_cache() for i, (images, captions, lengths, ids) in enumerate(batched_val_loader): if i == 1: break print("actual captions are: ") annIds = coco_caps.getAnnIds(imgIds=ids)
def main(args): transform = transforms.Compose([ transforms.Resize(256), transforms.RandomCrop(224), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) useCuda = not args.disable_cuda with open(args.vocab_path, 'rb') as vocab_path, \ open(args.batched_train_path, 'rb') as batched_train_path, \ open(args.batched_val_path, 'rb') as batched_val_path: vocab = pickle.load(vocab_path) batched_train_set = pickle.load(batched_train_path) batched_val_set = pickle.load(batched_val_path) batched_train_loader = get_loader(args.train_image_dir, args.train_caption_path, batched_train_set, vocab, transform, shuffle=True, num_workers=3) batched_val_loader = get_loader(args.val_image_dir, args.val_caption_path, batched_val_set, vocab, transform, shuffle=True, num_workers=1) batched_val_loader_full = get_loader(args.val_image_dir, args.val_caption_path, batched_val_set, vocab, transform, shuffle=True, num_workers=1) encoder_cnn = model.EncoderCNN() decoder_rnn = model.DecoderRNN(512, 196, args.embedding_dim, args.hidden_dim, len(vocab), args.num_layers, args.dropout, useCuda=useCuda) if torch.cuda.is_available() and useCuda: encoder_cnn.cuda() decoder_rnn.cuda() loss_function = nn.NLLLoss() params = list(decoder_rnn.parameters()) optimizer = optim.Adam(params, lr=args.lr) output_train_file = open( args.output_dir + "/train_" + str(args.num_epochs) + ".txt", 'w') output_val_file = open( args.output_dir + "/val_" + str(args.num_epochs) + ".txt", 'w') start_epoch = 0 if args.load_checkpoint is not None: checkpoint = torch.load( args.load_checkpoint) if useCuda else torch.load( args.load_checkpoint, map_location=lambda storage, loc: storage) print("loading from checkpoint " + str(args.load_checkpoint)) start_epoch = checkpoint['epoch'] decoder_rnn.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) checkpoint = None torch.cuda.empty_cache() for epoch in range(start_epoch, args.num_epochs): progress_bar = tqdm(iterable=batched_train_loader, desc='Epoch [%i/%i] (Train)' % (epoch, args.num_epochs)) train_sum_loss = 0 for i, (images, captions, lengths, ids) in enumerate(progress_bar, 1): loss = train(images, captions, encoder_cnn, decoder_rnn, loss_function, optimizer, args.grad_clip, useCuda) train_sum_loss += loss.data.select(0, 0) progress_bar.set_postfix(loss=train_sum_loss / ((i % 100) + 1)) if i % 100 == 0: output_train_file.write("%d, %5.4f\n" % (epoch * len(batched_train_loader) + i, train_sum_loss / 100)) train_sum_loss = 0 if i % 1000 == 0: temp_loss = validate(batched_val_loader, encoder_cnn, decoder_rnn, loss_function, useCuda) output_val_file.write( "%d, %5.4f\n" % (epoch * len(batched_train_loader) + i, temp_loss)) # end of batch output_train_file.write( "%d, %5.4f\n" % ((epoch + 1) * len(batched_train_loader), train_sum_loss / len(batched_train_loader) / 100)) val_sum_loss = 0 val_progress_bar = tqdm(iterable=batched_val_loader_full, desc='Epoch [%i/%i] (Val)' % (epoch, args.num_epochs)) for i, (images, captions, lengths, ids) in enumerate(val_progress_bar, 1): loss = evaluate(images, captions, encoder_cnn, decoder_rnn, loss_function, optimizer, useCuda) val_sum_loss += loss.data.select(0, 0) val_progress_bar.set_postfix(loss=val_sum_loss / i) output_val_file.write("%d, %5.4f\n" % ((epoch + 1) * len(batched_train_loader), val_sum_loss / len(batched_val_loader_full))) torch.save( { 'epoch': epoch + 1, 'state_dict': decoder_rnn.state_dict(), 'optimizer': optimizer.state_dict() }, args.output_dir + "/checkpoint_" + str(epoch + 1) + ".pt") output_train_file.close() output_val_file.close()
decoder = decoder.cuda() for dialog in validation_data: sample(my_lang, dialog, encoder, context, decoder) time.sleep(3) sys.exit(0) learning_rate = args.lr criterion = nn.NLLLoss() if not args.restore: encoder = model.EncoderRNN(len(my_lang.word2index), args.encoder_hidden, \ args.encoder_layer, args.dropout) context = model.ContextRNN(args.encoder_hidden * args.encoder_layer, args.context_hidden, \ args.context_layer, args.dropout) decoder = model.DecoderRNN(args.context_hidden * args.context_layer, args.decoder_hidden, \ len(my_lang.word2index), args.decoder_layer, args.dropout) else: print("Load last model in %s" % (args.save)) number = torch.load(os.path.join(args.save, 'checkpoint.pt')) encoder = torch.load( os.path.join(args.save, 'encoder' + str(number) + '.pt')) context = torch.load( os.path.join(args.save, 'context' + str(number) + '.pt')) decoder = torch.load( os.path.join(args.save, 'decoder' + str(number) + '.pt')) if torch.cuda.is_available(): encoder = encoder.cuda() context = context.cuda() decoder = decoder.cuda() if torch.cuda.is_available():
decoder_input = Variable(torch.LongTensor([[ni]])) decoder_input = decoder_input.cuda() if use_cuda else decoder_input return decoded_words def evaluateRandomly(encoder, decoder, n=10): for i in range(n): pair = random.choice(pairs) print('>', pair[0]) print('=', pair[1]) output_words = evaluate(encoder, decoder, pair[0]) output_sentence = ' '.join(output_words) print('<', output_sentence) print('') hidden_dim = 256 embedding_dim = 100 encoder_1 = model.EncoderRNN(input_lang.n_words, embedding_dim, hidden_dim) decoder_1 = model.DecoderRNN(output_lang.n_words, embedding_dim, hidden_dim) #attn_decoder1 = AttnDecoderRNN(hidden_size, output_lang.n_words, 1, dropout_p=0.1) if use_cuda: encoder_1 = encoder_1.cuda() decoder_1 = decoder_1.cuda() #attn_decoder1 = attn_decoder1.cuda() trainIters(encoder_1, decoder_1, 75000)
def main(config): if(config.dataset == 'real'): #initialize the dictionary lang_real = prepare.Lang_real('txt') lines = open('data/opensubtitles/vocab4000').read().strip().split('\n') for sen in lines: lang_real.addSentence(sen) lang_txt = lang_real train_data = prepare.get_dataset('data/opensubtitles/train.txt', batch_size = 16, lang_txt = lang_real, task = 'real') shuffle(train_data) dev_data = prepare.get_dataset('data/opensubtitles/dev.txt', batch_size = 16, lang_txt = lang_real, task = 'real') test_data = prepare.get_dataset('data/opensubtitles/test.txt', batch_size = 16, lang_txt = lang_real, task = 'real') elif(config.dataset == 'counting'): lang_counting = prepare.Lang_counting('txt') lang_txt = lang_counting train_data = prepare.get_dataset('data/counting/train_counting.txt', batch_size = 16, lang_txt = lang_counting, task = 'counting') shuffle(train_data) dev_data = prepare.get_dataset('data/counting/dev_counting.txt', batch_size = 16, lang_txt = lang_counting, task = 'counting') test_data = prepare.get_dataset_test_counting('data/counting/test_counting.txt', batch_size = 16) feature = config.feature encoder = model.EncoderRNN(feature, feature, lang_txt.n_words) decoder = model.DecoderRNN(feature, feature, lang_txt.n_words) evaluater = model.EvaluateR(feature) decoder_prev = model.DecoderRNN(feature, feature, lang_txt.n_words) encoder_prev = model.EncoderRNN(feature, feature, lang_txt.n_words) dis_encoder = model.disEncoderRNN(feature, feature, lang_txt.n_words) dis_decoder = model.disDecoderRNN(feature, feature, lang_txt.n_words) eva_encoder = model.disEncoderRNN(feature, feature, lang_txt.n_words) eva_decoder = model.disDecoderRNN(feature, feature, lang_txt.n_words) if use_cuda: encoder = encoder.cuda() decoder = decoder.cuda() evaluater= evaluater.cuda() decoder_prev = decoder_prev.cuda() encoder_prev = encoder_prev.cuda() dis_encoder = dis_encoder.cuda(0) dis_decoder = dis_decoder.cuda(0) eva_encoder = eva_encoder.cuda(0) eva_decoder = eva_decoder.cuda(0) print_every = config.print_every dev_every = config.dev_every use_ppo = config.use_ppo ppo_a1 = config.ppo_a1 ppo_a2 = config.ppo_a2 ppo_b1 = config.ppo_b1 ppo_b2 = config.ppo_b2 if(config.type == 'reinforce'): lr = config.lr test1 = train.seq2seq(lang_txt, dev_data,test_data, encoder, decoder, evaluater, encoder_prev,decoder_prev, task = config.dataset, god_rs_dev = [], god_loss_dev = [], god_loss = [], god_rs_test = []) losses, rewards = test1.trainIters(train_data,1,1, use_ppo = use_ppo,actor_fixed = False, min_rein_step = 0, max_rein_step = 5, ppo_b1 = ppo_b1, ppo_b2 = ppo_b2, ppo_a1 = ppo_a1, ppo_a2 = ppo_a2, ppo_a3 = 1e10, rate = 1, lr = lr, dev_every = dev_every, print_every = print_every, plot_every = 5000000000, name = '_z', file_name = 'MIXER') elif(config.type == 'gan'): test_gan = train.ganSeq2seq(lang_txt, dev_data,test_data, encoder,decoder, dis_encoder,dis_decoder, eva_encoder, eva_decoder, encoder_prev, decoder_prev, god_rs_dev = [], god_loss_dev = [], god_loss = [], god_rs_test = [], task = config.dataset) loss_g, loss_d = test_gan.trainIters(train_data, 0,0,1, use_ppo= config.use_ppo,g_lr = config.g_lr, d_lr = config.d_lr, search_n = 1, width = 1, ppo_b1 = ppo_b1, ppo_b2 = ppo_b2, ppo_a1 = ppo_a1, ppo_a2 = ppo_a2, ppo_a3 = 10000000000, print_every = print_every, plot_every = 50000000000, dev_every = dev_every)
for i in range(n): output_words = evaluate(encoder1, encoder2, decoder, d['image'][i], d['post'][i], d['tags'][i]) output_sentence = ' '.join(output_words) print('ground truth:', d['comment'][i]) print('generated:', output_sentence) break def evaluateScore(encoder, decoder, weights): val_loader = DataLoader(dataset=val_dataset, batch_size=1, shuffle=False) total_score = 0 for d in val_loader: #for i in range(n): output_words = evaluate(encoder, decoder, d['image'][0], d['post'][0], d['tags'][0]) score = sentence_bleu([d['comment'][0].split(' ')], output_words, weights=weights) total_score += score return float(total_score) / val_data_size encoder1 = model.EncoderRNN(300, post_hidden_size).to(device) encoder2 = model.Encoder(input_size, final_hidden_size).to(device) decoder = model.DecoderRNN(final_hidden_size, vocab.n_words).to(device) trainIters(encoder1, encoder2, decoder, learning_rate=0.0001) evaluateRandomly(encoder1, encoder2, decoder, 'val', 10)
bleu_total += bleu print_loss_total /= test_len bleu_total /= test_len print(f'Test loss: {print_loss_total}, bleu: {bleu_total}') with open(f'{latent_hidden_size}/train_loss', 'a') as f: f.write(f'{str(train_loss_total/tot_cnt)}\n') with open(f'{latent_hidden_size}/train_KL_loss', 'a') as f: f.write(f'{str(train_KL_total/tot_cnt)}\n') with open(f'{latent_hidden_size}/test_bleu', 'a') as f: f.write(f'{str(bleu_total)}\n') test_bleu_list.append(bleu_total) train_loss_list.append(train_loss_total/tot_cnt) train_KL_list.append(train_KL_total/tot_cnt) train_loss_total = 0 train_KL_total = 0 tot_cnt = 0 if bleu_total > highest_score: highest_score = bleu_total torch.save(encoder, f'/home/karljackab/DL/lab5/{latent_hidden_size}/encoder_{str(bleu_total)}.pkl') torch.save(decoder, f'/home/karljackab/DL/lab5/{latent_hidden_size}/decoder_{str(bleu_total)}.pkl') torch.save(enc_last, f'/home/karljackab/DL/lab5/{latent_hidden_size}/enc_last_{str(bleu_total)}.pkl') print('save model') enc_last = model.EncodeLast(hidden_size+4, latent_hidden_size, device).to(device) encoder = model.EncoderRNN(vocab_size, hidden_size+4, device).to(device) decoder = model.DecoderRNN(hidden_size+4, vocab_size, device).to(device) trainIters(encoder, decoder, enc_last, 300, print_every=2000)
print_rec_total = print_rec_total / print_every print_kl_total = print_kl_total / print_every print('average kl = %.4f' % print_kl_total) print('average reconstruction = %.4f' % print_rec_total) if iter % plot_every == 0: plot_loss_avg = plot_loss_total / plot_every plot_losses.append(plot_loss_avg) plot_loss_total = 0 print_kl_total = 0 print_rec_total = 0 encoder = model.EncoderRNN(vocabulary.n_words, latent_space, embeddings).to(device) decoder = model.DecoderRNN(embedding_space, embeddings, vocabulary.n_words).to(device) linear = model.RGB_to_Hidden(latent_space, embedding_space).to(device) trainIters(encoder, decoder, linear, epochs, plot_every=500, print_every=500, learning_rate=learning_r) if SAVE: dirpath = os.getcwd() encoder_path = dirpath + '/enc' decoder_path = dirpath + '/dec' torch.save(encoder, encoder_path)
shuffle=True, collate_fn=dataload.collate_fn, **kwargs) test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=args.batch_size, shuffle=True, collate_fn=dataload.val_collate, **kwargs) ## Load the proper neural network model. if args.model == 'Pretrained': # Problem 2 (no hidden layer, input -> output) model.encoder = model.EncoderCNN(10) model.decoder = model.DecoderRNN(encoder_dim=2048, decoder_dim=512, attention_dim=512, embed_size=512, hidden_size=args.hidden_dim, vocab_size=vocab_size, num_layers=1, max_seq_length=15) #elif args.model == 'resnet_common': # Problem 5 (multiple hidden layers, input -> hidden layers -> output) # print("sruthi check 1") # model = models.resnetcommon.ResnetCommon(im_size, args.hidden_dim, args.kernel_size, n_classes) else: raise Exception('Unknown model {}'.format(args.model)) ## Deinfe the loss function as cross-entropy. ## This is the softmax loss function (i.e., multiclass classification). criterion = functional.cross_entropy
def main(args): # random set manualSeed = random.randint(1, 100) # print("Random Seed: ", manualSeed) random.seed(manualSeed) torch.manual_seed(manualSeed) torch.cuda.manual_seed_all(manualSeed) # Create model directory if not os.path.exists(args.model_path): os.makedirs(args.model_path) # Load vocabulary wrapper with open(args.vocab_path, 'rb') as f: vocab = pickle.load(f) audio_len, comment_len, mfcc_dim = caculate_max_len( args.audio_dir, args.text_path, vocab) # mfcc_features = audio_preprocess(args.audio_dir, N, AUDIO_LEN, MFCC_DIM).astype(np.float32) # Build data loader data_loader = data_get(args.audio_dir, audio_len, args.text_path, comment_len, vocab) # Build the models encoder = model.EncoderRNN(mfcc_dim, args.embed_size, args.hidden_size).to(device) decoder = model.DecoderRNN(args.embed_size + Z_DIM, args.hidden_size, len(vocab), args.num_layers).to(device) # decoder = DecoderRNN(args.embed_size, args.hidden_size, len(vocab), args.num_layers).to(device) # Loss and optimizer criterion_BCEWithLogitsLoss = nn.BCEWithLogitsLoss() criterion_CrossEntropyLoss = nn.CrossEntropyLoss() # Loss and optimizer # criterion = nn.CrossEntropyLoss() params = list(decoder.parameters()) + list(encoder.parameters()) optimizer = torch.optim.Adam(params, lr=args.learning_rate) # GAN #296'''in_dim=len(vocab)''' netD = model.LSTMDiscriminator(in_dim=1, hidden_dim=256).to(device) # setup optimizer optimizerD = torch.optim.Adam(netD.parameters(), lr=args.learning_rate) # Train the models total_step = len(data_loader) for epoch in range(args.num_epochs): for i, ((audio, audio_len), (comment, comment_len)) in enumerate(data_loader): audio = audio.to(device) audio = audio.unsqueeze(0) comment = comment.to(device) comment = comment.unsqueeze(0) targets = pack_padded_sequence(comment, [comment_len], batch_first=True)[0] batch_size = comment.shape[0] seq_len = targets.shape[0] # discriminator:1 -- real comment label0 = torch.full((batch_size, seq_len, 1), 0, device=device) label1 = torch.full((batch_size, seq_len, 1), 1, device=device) # real sample logits_real = netD(comment, [comment_len]) # batch*seq errD_real = criterion_BCEWithLogitsLoss(logits_real, label1) # discriminator:2 -- real comment audio_features = encoder(audio, [audio_len]) if (Z_DIM > 0): z = Variable(torch.randn(audio_features.shape[0], Z_DIM)).cuda() audio_features = torch.cat([z, audio_features], 1) outputs = decoder(audio_features, comment, [comment_len]) # generate comment discrimination max_v, max_index = outputs.detach().max(1) logits_fake = netD(max_index.unsqueeze(0), [comment_len]) # batch*seq*1 errD_fake = criterion_BCEWithLogitsLoss(logits_fake, label0) errD = errD_fake + errD_real optimizerD.zero_grad() errD.backward() optimizerD.step() # 2.generator audio_features = encoder(audio, [audio_len]) if (Z_DIM > 0): z = Variable(torch.randn(audio_features.shape[0], Z_DIM)).cuda() audio_features = torch.cat([z, audio_features], 1) outputs = decoder(audio_features, comment, [comment_len]) max_v, max_index = outputs.max(1) logits_fake = netD(max_index.unsqueeze(0), [comment_len]) # batch*seq*vobsize errG = criterion_BCEWithLogitsLoss(logits_fake, label1) loss = criterion_CrossEntropyLoss(outputs, targets) + errG optimizer.zero_grad() loss.backward() optimizer.step() # Print log info if i % args.log_step == 0: print( 'Epoch [{}/{}], Step [{}/{}], Loss_D: {:.4f}, Loss_G: {:.4f}, Perplexity: {:5.4f}' .format(epoch, args.num_epochs, i, total_step, errD.item(), loss.item(), np.exp(loss.item()))) # Save the model checkpoints if (epoch + 1) % args.save_step == 0: torch.save( decoder.state_dict(), os.path.join(args.model_path, 'decoder-{}-{}.ckpt'.format(epoch + 1, i + 1))) torch.save( encoder.state_dict(), os.path.join(args.model_path, 'encoder-{}-{}.ckpt'.format(epoch + 1, i + 1)))