# flickr doesnt need to be split at the root node def iterate_data(h5_file): for x in h5_file.root: yield x f_nodes = [node for node in iterate_data(data_file)] # split the database into train test and validation sets. default settings uses the json file # with the karpathy split train, test, val = split_data(f_nodes, args.split_loc) ############################### Neural network setup ################################################# # network modules img_net = img_encoder(image_config) cap_net = text_gru_encoder(token_config) # Adam optimiser. I found SGD to work terribly and could not find appropriate parameter settings for it. optimizer = torch.optim.Adam( list(img_net.parameters()) + list(cap_net.parameters()), 1) #plateau_scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, mode = 'min', factor = 0.9, patience = 100, # threshold = 0.0001, min_lr = 1e-8, cooldown = 100) #step_scheduler = lr_scheduler.StepLR(optimizer, 1000, gamma=0.1, last_epoch=-1) def create_cyclic_scheduler(max_lr, min_lr, stepsize): lr_lambda = lambda iteration: (max_lr - min_lr) * (0.5 * (np.cos(np.pi * ( 1 + (3 - 1) / stepsize * iteration)) + 1)) + min_lr cyclic_scheduler = lr_scheduler.LambdaLR(optimizer,
yield y f_nodes = [node for node in iterate_data(data_file)] # split the database into train test and validation sets. default settings uses the json file # with the karpathy split train, val = split_data_coco(f_nodes) # set aside 5000 images as test set test = train[-5000:] train = train[:-5000] ##################################################### # network modules img_net = img_encoder(image_config) cap_net = text_gru_encoder(char_config) # list all the trained model parameters models = os.listdir(args.results_loc) caption_models = [x for x in models if 'caption' in x] img_models = [x for x in models if 'image' in x] # create a trainer with just the evaluator for the purpose of testing a pretrained model trainer = flickr_trainer(img_net, cap_net, args.visual, args.cap) trainer.set_raw_text_batcher() # optionally use cuda if cuda: trainer.set_cuda() trainer.set_evaluator([1, 5, 10]) for img, cap in zip(img_models, caption_models):
'gru': { 'input_size': 20, 'hidden_size': 1024, 'num_layers': 1, 'batch_first': True, 'bidirectional': True, 'dropout': 0 }, 'att': { 'in_size': 2048, 'hidden_size': 128, 'heads': 1 } } # create encoder encoder = text_gru_encoder(text_config) for p in encoder.parameters(): p.requires_grad = False encoder.cuda() # load pretrained model encoder_state = torch.load(PATH_TO_ENC) encoder.load_state_dict(encoder_state) # Set params for SentEval params_senteval = {'task_path': PATH_TO_DATA, 'usepytorch': True, 'kfold': 5} params_senteval['classifier'] = { 'nhid': 0, 'optim': 'rmsprop', 'batch_size': 128, 'tenacity': 3, 'epoch_size': 10