index_np = index.numpy() sentences = [] for batch in range(index_np.shape[0]): sentence = [] for i in range(index_np.shape[1]): sentence.append(ind2word[index_np[batch, i]]) sentences.append(sentence) return sentences if __name__ == '__main__': model = ShowTellNet() model.load_state_dict(torch.load("./model_param/epoch27.pth")) test_loader = DataLoader(VisDialDataset(None, 'val'), collate_fn=visdial_collate_fn, batch_size=2, shuffle=True, num_workers=4) for cnt, batched in enumerate(test_loader): img = batched['features'] cap = batched['captions'] cap_pred = model.predict(img) # print("pred: ", cap_pred) # print("origin: ", cap) sentence_origin = index2word(cap) sentence_pred = index2word(cap_pred) print("pred: ", sentence_pred)
import json import math import os from tqdm import tqdm import torch from torch.autograd import Variable from torch.utils.data import DataLoader from dataloader import VisDialDataset from encoders import Encoder from decoders import Decoder from utils import process_ranks, compute_ranks_gt, compute_ranks_nogt parser = argparse.ArgumentParser() VisDialDataset.add_cmdline_args(parser) parser.add_argument_group('Evaluation related arguments') parser.add_argument('-load_path', default='checkpoints/model.pth', help='Checkpoint to load path from') parser.add_argument('-split', default='val', choices=['val', 'test'], help='Split to evaluate on') parser.add_argument('-use_gt', action='store_true', help='Whether to use ground truth for retrieving ranks') parser.add_argument('-batch_size', default=12, type=int, help='Batch size') parser.add_argument('-gpuid', default=0, type=int, help='GPU id to use') parser.add_argument(
def forward(self, cap_index, pred): loss_sum=0 for batch in range(cap_index.shape[0]): num = cap_index.shape[1] for i in range(cap_index.shape[1]): if(i+1==cap_index.shape[1]): break ind = cap_index[batch, i+1] loss_sum = loss_sum - np.square(1-i/num)*torch.log(pred[batch, i, ind]) return loss_sum if __name__=="__main__": train_loader = DataLoader(VisDialDataset(None, 'train'), collate_fn=visdial_collate_fn, batch_size=4, shuffle=True, num_workers=4) net = ShowTellNet() loss_fn = LossNet() net.load_state_dict(torch.load("model_param/epoch18.pth")) # train for cnt, batched in enumerate(train_loader): cap = batched['captions'] img = batched['features'] #one_hot_cap = one_hot_encoder(cap) seq_prob, (h_t, c_t)=net(img, cap) loss = loss_fn(cap, seq_prob) print(loss) break # predict
import gc import math import os import torch from torch import nn, optim from torch.autograd import Variable from torch.optim import lr_scheduler from torch.utils.data import DataLoader from dataloader import VisDialDataset from encoders import EncoderArgs, Encoder from decoders import Decoder parser = argparse.ArgumentParser() VisDialDataset.add_cmdline_args(parser) parser.add_argument_group('Encoder Decoder choice arguments') parser.add_argument('-encoder', default='lf-ques-im-hist', choices=['lf-ques-im-hist', 'hr-ques-im-hist'], help='Encoder to use for training') parser.add_argument('-decoder', default='disc', choices=['disc'], help='Decoder to use for training') parser.add_argument_group('Optimization related arguments') parser.add_argument('-num_epochs', default=20, type=int, help='Epochs') parser.add_argument('-batch_size', default=12, type=int, help='Batch size') parser.add_argument('-lr', default=1e-3, type=float, help='Learning rate')
# Setup #--------------------------------------------------------------------------------------- # Read the command line options params = options.readCommandLine() # Seed rng for reproducibility random.seed(params['randomSeed']) torch.manual_seed(params['randomSeed']) if params['useGPU']: torch.cuda.manual_seed_all(params['randomSeed']) # Setup dataloader splits = ['train', 'val', 'test'] dataset = VisDialDataset(params, splits) # Params to transfer from dataset transfer = ['vocabSize', 'numOptions', 'numRounds'] for key in transfer: if hasattr(dataset, key): params[key] = getattr(dataset, key) # Create save path and checkpoints folder os.makedirs('/hhd/lvxinyu/aqm_plus/checkpoints/', exist_ok=True) os.mkdir(params['savePath']) # Loading Modules parameters = [] aBot = None qBot = None
# Generate sentence from tokens with open('data/cache/word2ind_32525.pkl', 'rb') as f: word2ind = pkl.load(f) with open('data/cache/ind2word_32525.pkl', 'rb') as f: ind2word = pkl.load(f) # Seed rng for reproducibility random.seed(params.seed) torch.manual_seed(params.seed) if params.cuda: torch.cuda.manual_seed_all(params.seed) # Setup dataloader splits = ['train', 'val', 'test'] train_loader = DataLoader(VisDialDataset(params, 'train'), collate_fn=visdial_collate_fn, batch_size=params.batch_size, shuffle=True, num_workers=params.n_works) val_loader = DataLoader(VisDialDataset(params, 'val'), collate_fn=visdial_collate_fn, batch_size=params.batch_size, shuffle=True, num_workers=params.n_works) # --------------------------------------------------------------------------------------- # Training # --------------------------------------------------------------------------------------- def batch_data(entry): features = entry['features'] captions = entry['captions'].long() questions = entry['questions'].long() answers = entry['answers'].long()
params = options.readCommandLine() manualSeed = 1234 random.seed(manualSeed) torch.manual_seed(manualSeed) if params['useGPU']: torch.cuda.manual_seed_all(manualSeed) # setup dataloader dlparams = params.copy() dlparams['useIm'] = True dlparams['useHistory'] = True dlparams['numRounds'] = 10 splits = ['val', 'test'] dataset = VisDialDataset(dlparams, splits) # Transferring dataset parameters transfer = ['vocabSize', 'numOptions', 'numRounds'] for key in transfer: if hasattr(dataset, key): params[key] = getattr(dataset, key) if 'numRounds' not in params: params['numRounds'] = 10 # Always load checkpoint parameters with continue flag params['continue'] = True excludeParams = ['batchSize', 'visdomEnv', 'startFrom', 'qstartFrom', 'trainMode', \ 'evalModeList', 'inputImg', 'inputQues', 'inputJson', 'evalTitle', 'beamSize', \