예제 #1
0
    index_np = index.numpy()
    sentences = []
    for batch in range(index_np.shape[0]):
        sentence = []
        for i in range(index_np.shape[1]):
            sentence.append(ind2word[index_np[batch, i]])
        sentences.append(sentence)
    return sentences


if __name__ == '__main__':
    model = ShowTellNet()
    model.load_state_dict(torch.load("./model_param/epoch27.pth"))

    test_loader = DataLoader(VisDialDataset(None, 'val'),
                             collate_fn=visdial_collate_fn,
                             batch_size=2,
                             shuffle=True,
                             num_workers=4)

    for cnt, batched in enumerate(test_loader):
        img = batched['features']
        cap = batched['captions']
        cap_pred = model.predict(img)

        # print("pred: ", cap_pred)
        # print("origin: ", cap)
        sentence_origin = index2word(cap)
        sentence_pred = index2word(cap_pred)
        print("pred: ", sentence_pred)
예제 #2
0
import json
import math
import os
from tqdm import tqdm

import torch
from torch.autograd import Variable
from torch.utils.data import DataLoader

from dataloader import VisDialDataset
from encoders import Encoder
from decoders import Decoder
from utils import process_ranks, compute_ranks_gt, compute_ranks_nogt

parser = argparse.ArgumentParser()
VisDialDataset.add_cmdline_args(parser)

parser.add_argument_group('Evaluation related arguments')
parser.add_argument('-load_path',
                    default='checkpoints/model.pth',
                    help='Checkpoint to load path from')
parser.add_argument('-split',
                    default='val',
                    choices=['val', 'test'],
                    help='Split to evaluate on')
parser.add_argument('-use_gt',
                    action='store_true',
                    help='Whether to use ground truth for retrieving ranks')
parser.add_argument('-batch_size', default=12, type=int, help='Batch size')
parser.add_argument('-gpuid', default=0, type=int, help='GPU id to use')
parser.add_argument(
예제 #3
0
    def forward(self, cap_index, pred):
        loss_sum=0
        
        for batch in range(cap_index.shape[0]):
            num = cap_index.shape[1]
            for i in range(cap_index.shape[1]):
                if(i+1==cap_index.shape[1]):
                    break
                ind = cap_index[batch, i+1]
                loss_sum = loss_sum - np.square(1-i/num)*torch.log(pred[batch, i, ind])
        return loss_sum


if __name__=="__main__":
    train_loader = DataLoader(VisDialDataset(None, 'train'), collate_fn=visdial_collate_fn,
                              batch_size=4, shuffle=True, num_workers=4)
    net = ShowTellNet()
    loss_fn = LossNet()
    net.load_state_dict(torch.load("model_param/epoch18.pth"))
    # train
    for cnt, batched in enumerate(train_loader):
        cap = batched['captions']
        img = batched['features']
        #one_hot_cap = one_hot_encoder(cap)
        seq_prob, (h_t, c_t)=net(img, cap)
        loss = loss_fn(cap, seq_prob)
        print(loss)
        break

    # predict
import gc
import math
import os

import torch
from torch import nn, optim
from torch.autograd import Variable
from torch.optim import lr_scheduler
from torch.utils.data import DataLoader

from dataloader import VisDialDataset
from encoders import EncoderArgs, Encoder
from decoders import Decoder

parser = argparse.ArgumentParser()
VisDialDataset.add_cmdline_args(parser)

parser.add_argument_group('Encoder Decoder choice arguments')
parser.add_argument('-encoder',
                    default='lf-ques-im-hist',
                    choices=['lf-ques-im-hist', 'hr-ques-im-hist'],
                    help='Encoder to use for training')
parser.add_argument('-decoder',
                    default='disc',
                    choices=['disc'],
                    help='Decoder to use for training')

parser.add_argument_group('Optimization related arguments')
parser.add_argument('-num_epochs', default=20, type=int, help='Epochs')
parser.add_argument('-batch_size', default=12, type=int, help='Batch size')
parser.add_argument('-lr', default=1e-3, type=float, help='Learning rate')
# Setup
#---------------------------------------------------------------------------------------

# Read the command line options
params = options.readCommandLine()

# Seed rng for reproducibility
random.seed(params['randomSeed'])
torch.manual_seed(params['randomSeed'])
if params['useGPU']:
    torch.cuda.manual_seed_all(params['randomSeed'])

# Setup dataloader
splits = ['train', 'val', 'test']

dataset = VisDialDataset(params, splits)

# Params to transfer from dataset
transfer = ['vocabSize', 'numOptions', 'numRounds']
for key in transfer:
    if hasattr(dataset, key):
        params[key] = getattr(dataset, key)

# Create save path and checkpoints folder
os.makedirs('/hhd/lvxinyu/aqm_plus/checkpoints/', exist_ok=True)
os.mkdir(params['savePath'])

# Loading Modules
parameters = []
aBot = None
qBot = None
예제 #6
0
# Generate sentence from tokens
with open('data/cache/word2ind_32525.pkl', 'rb') as f:
    word2ind = pkl.load(f)
with open('data/cache/ind2word_32525.pkl', 'rb') as f:
    ind2word = pkl.load(f)

# Seed rng for reproducibility
random.seed(params.seed)
torch.manual_seed(params.seed)
if params.cuda:
    torch.cuda.manual_seed_all(params.seed)

# Setup dataloader
splits = ['train', 'val', 'test']
train_loader = DataLoader(VisDialDataset(params, 'train'), collate_fn=visdial_collate_fn,
                          batch_size=params.batch_size, shuffle=True, num_workers=params.n_works)
val_loader = DataLoader(VisDialDataset(params, 'val'), collate_fn=visdial_collate_fn,
                        batch_size=params.batch_size, shuffle=True, num_workers=params.n_works)


# ---------------------------------------------------------------------------------------
# Training
# ---------------------------------------------------------------------------------------


def batch_data(entry):
    features = entry['features']
    captions = entry['captions'].long()
    questions = entry['questions'].long()
    answers = entry['answers'].long()
예제 #7
0
params = options.readCommandLine()

manualSeed = 1234
random.seed(manualSeed)
torch.manual_seed(manualSeed)
if params['useGPU']:
    torch.cuda.manual_seed_all(manualSeed)

# setup dataloader
dlparams = params.copy()
dlparams['useIm'] = True
dlparams['useHistory'] = True
dlparams['numRounds'] = 10
splits = ['val', 'test']

dataset = VisDialDataset(dlparams, splits)

# Transferring dataset parameters
transfer = ['vocabSize', 'numOptions', 'numRounds']
for key in transfer:
    if hasattr(dataset, key):
        params[key] = getattr(dataset, key)

if 'numRounds' not in params:
    params['numRounds'] = 10

# Always load checkpoint parameters with continue flag
params['continue'] = True

excludeParams = ['batchSize', 'visdomEnv', 'startFrom', 'qstartFrom', 'trainMode', \
                 'evalModeList', 'inputImg', 'inputQues', 'inputJson', 'evalTitle', 'beamSize', \