Ejemplo n.º 1
0
 def get_algorithm(self, words, model_dir=None):
     if model_dir is None:
         model_dir = self.model_dir
         
     if self.algorithm == 'uniskip':
         return UniSkip(model_dir, words)
     else:
         return BiSkip(model_dir, words)
Ejemplo n.º 2
0
def get_text_enc(config, vocab):
    skipthoughts_dir, text_enc = config['skipthoughts_dir'], config['txt_enc']
    if text_enc == 'BayesianUniSkip':
        return BayesianUniSkip(skipthoughts_dir, vocab)
    if text_enc == 'UniSkip':
        return UniSkip(skipthoughts_dir, vocab)
    if text_enc == 'BiSkip':
        return BiSkip(skipthoughts_dir, vocab)
    if text_enc == 'DropUniSkip':
        return DropUniSkip(skipthoughts_dir, vocab)
Ejemplo n.º 3
0
def main(args):
    # Create model directory
    if not os.path.exists(args.model_path):
        os.makedirs(args.model_path)

    # Image preprocessing
    # For normalization, see https://github.com/pytorch/vision#models
    transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
    ])

    # Load vocabulary wrapper.
    with open(args.vocab_path, 'rb') as f:
        vocab = pickle.load(f)

    #Load vocab_list for uniskip
    vocab_list = pd.read_csv("./data/vocab_list.csv", header=None)
    vocab_list = vocab_list.values.tolist()[0]

    #Build data loader
    data_loader = get_loader(args.image_dir,
                             args.img_embeddings_dir,
                             args.data_path,
                             vocab,
                             transform,
                             args.batch_size,
                             shuffle=True,
                             num_workers=args.num_workers)

    # Build the models
    #im_encoder = preprocess_get_model.model()
    attention = T_Att()
    decoder = DecoderRNN(args.embed_size, args.hidden_size, len(vocab),
                         args.num_layers, args.dropout)
    uniskip = UniSkip('./data/skip-thoughts', vocab_list)
    decoder.eval()

    if torch.cuda.is_available():
        #im_encoder.cuda()
        attention.cuda()
        decoder.cuda()
        uniskip.cuda()

    attention.load_state_dict(torch.load(args.attention_path))
    decoder.load_state_dict(torch.load(args.decoder_path))

    for i, (images, captions, cap_lengths, qa, qa_lengths,
            vocab_words) in enumerate(data_loader):

        #         # Set mini-batch dataset
        img_embeddings = to_var(images.data, volatile=True)
        captions = to_var(captions)
        #         qa = to_var(qa)
        #         targets = pack_padded_sequence(qa, qa_lengths, batch_first=True)[0]

        #         # Forward, Backward and Optimize
        #         decoder.zero_grad()
        #         attention.zero_grad()
        #         #features = encoder(images)

        #img_embeddings = im_encoder(images)
        #uniskip = UniSkip('/Users/tushar/Downloads/code/data/skip-thoughts', vocab_list)
        cap_embeddings = uniskip(captions, cap_lengths)
        cap_embeddings = cap_embeddings.data
        img_embeddings = img_embeddings.data
        ctx_vec = attention(img_embeddings, cap_embeddings)
        outputs = decoder.sample(ctx_vec)
        output_ids = outputs.cpu().data.numpy()
        qa = qa.numpy()
        qa = qa[0]

        #     predicted_q = []
        #     predicted_a = []
        sample = []
        #     flag = -1
        for word_id in output_ids:
            word = vocab.idx2word[word_id]
            sample.append(word)
        #    if word == '<end>':
        #        if flag == -1:
        #            predicted_q = sample
        #            sample = []
        #            flag = 0
        #        else:
        #            predicted_a = sample
        # predicted_q = ' '.join(predicted_q[1:])
        # predicted_a = ' '.join(predicted_a[1:])
        sample = ' '.join(sample)
        actual = []
        # print("predicted q was : " + predicted_q)
        for word_id in qa:
            word = vocab.idx2word[word_id]
            actual.append(word)
        actual = ' '.join(actual)
        #print(im_id)
        print("actual_qa : " + actual + " | predicted_qa : " + sample)
	# Adds the words to the vocabulary.
	for i, word in enumerate(words):
		vocab.add_word(word)
	return vocab

vocab = build_vocab()
with open('birdsdataset/birds_vocab.pkl', 'wb') as f:
     pickle.dump(vocab, f)
print('hello')

#with open('birdsdataset/birds_vocab.pkl', 'rb') as f:
#	vocab = pickle.load(f)

all_words_in_vocab = vocab.word2idx.keys()
uniskip = UniSkip(dir_st, all_words_in_vocab)

def get_ids(tokens, vocab):
	ids = []
	# appending start and eos at the beginning and the end respectively for every sequence
	ids.append(vocab('<start>'))
	for word in tokens:
		ids.append(vocab(word))
	ids.append(vocab('<end>'))
	return ids

for _class in sorted(os.listdir(embedding_path)):
	split = ''
	if _class in train_classes:
		split = train
	elif _class in val_classes:
Ejemplo n.º 5
0
def main(args):
    # Create model directory
    if not os.path.exists(args.model_path):
        os.makedirs(args.model_path)
    
    # Image preprocessing
    # For normalization, see https://github.com/pytorch/vision#models
    transform = transforms.Compose([ 
        transforms.Resize((224,224)), 
        transforms.ToTensor(), 
        transforms.Normalize((0.485, 0.456, 0.406), 
                             (0.229, 0.224, 0.225))])
    
    # Load vocabulary wrapper.
    with open(args.vocab_path, 'rb') as f:
        vocab = pickle.load(f)
    
    #Load vocab_list for uniskip
    vocab_list = pd.read_csv("./data/vocab_list.csv", header=None)
    vocab_list = vocab_list.values.tolist()[0]
    
    # Build data loader
    data_loader = get_loader(args.image_dir, args.img_embeddings_dir, args.data_path, vocab, 
                             transform, args.batch_size,
                             shuffle=True, num_workers=args.num_workers) 

    # Build the models
    #im_encoder = preprocess_get_model.model()
    attention = T_Att()
    decoder = DecoderRNN(args.embed_size, args.hidden_size, 
                         len(vocab), args.num_layers, args.dropout)
   
    uniskip = UniSkip('./data/skip-thoughts', vocab_list)
    if torch.cuda.is_available():
        #im_encoder.cuda()
        attention.cuda()
        decoder.cuda()
        uniskip.cuda()

    # Loss and Optimizer
    criterion = nn.CrossEntropyLoss()
    params = list(decoder.parameters()) + list(attention.parameters())
    optimizer = torch.optim.Adam(params, lr=args.learning_rate)
    
    # Train the Models
    total_step = len(data_loader)
    for epoch in range(args.num_epochs):
        for i, (images, captions, cap_lengths, qa, qa_lengths, vocab_words) in enumerate(tqdm(data_loader)):
            
            #Re-initialize decoder hidden state
            decoder.hidden = decoder.init_hidden()

            # Set mini-batch dataset
            img_embeddings = to_var(images.data, volatile=True)
            captions = to_var(captions)
            qa = to_var(qa)
            targets = pack_padded_sequence(qa, qa_lengths, batch_first=True)[0]

            # Forward, Backward and Optimize
            decoder.zero_grad()
            attention.zero_grad()
            #features = encoder(images)
            #img_embeddings = im_encoder(images)   
            cap_embeddings = uniskip(captions, cap_lengths)
            cap_embeddings = cap_embeddings.data
            img_embeddings = img_embeddings.data
           # print(img_embeddings.size())
           # print(type(img_embeddings))
           # print(cap_embeddings.size())
            #print(type(cap_embeddings)) 
            ctx_vec = attention(img_embeddings,cap_embeddings)
            outputs = decoder(ctx_vec, qa, qa_lengths)
            predicted = outputs.max(1)[1]
            loss = criterion(outputs, targets)
            loss.backward()
            optimizer.step()
            #pred_ids = []
            #print(predicted.size())
            # pred_ids.append(predicted)
            # Print log info
            if i % args.log_step == 0:
                print('Epoch [%d/%d], Step [%d/%d], Loss: %.4f, Perplexity: %5.4f'
                      %(epoch, args.num_epochs, i, total_step, 
                        loss.data[0], np.exp(loss.data[0]))) 
                #output_ids = predicted.cpu().data.numpy()
                #sample = []                
                #for word_id in output_ids:
                #    word = vocab.idx2word[word_id]
                #    sample.append(word)
                #sample = ' '.join(sample)
                #print("predicted qa : " + sample)

            # Save the models
            if (i+1)%args.save_step == 0:
                torch.save(decoder.state_dict(), 
                           os.path.join(args.model_path, 
                                        'decoder-%d-%d.pkl' %(epoch+1, i+1)))
                torch.save(attention.state_dict(), 
                           os.path.join(args.model_path, 
                                        'attention-%d-%d.pkl' %(epoch+1, i+1)))