encoder, decoder = get_models(model_path) word_map, rev_word_map = get_word_map(run_local=args.run_local) transform = transforms.Compose([ transforms.Resize((336, 336)), transforms.ToTensor(), data_normalization ]) desktop_path = os.path.join(os.path.join(os.path.expanduser('~')), 'Desktop') lm_model = load_lm_model() corpus = Corpus('../../../language_model/word_language_model/data_dir') # image_name = 'Fork.png' ############ dataloader = load('custom', args.run_local, 1, 1) for i, data in tqdm(enumerate(dataloader)): image = data[0].to(device) if image.shape[1] != 3: continue run(encoder, decoder, word_map, rev_word_map, save_dir, image, data[1][0]) ######### # # for image_name in os.listdir(os.path.join(desktop_path, 'custom_images')): # image_path = 'custom_images/{}'.format(image_name) # img = Image.open(os.path.join(desktop_path, image_path)) # # try: # image = transform(img).unsqueeze(0)
image = image.unsqueeze(0) # (1, 3, 256, 256) # subsec: move to device image = image.to(device) # subsec: run beam search seq_, top_seq_total_scors_, seq_sum_, words = caption_image_beam_search(encoder, decoder, image, word_map, rev_word_map, args, representations) if not None == words: hp_metric_dic['annotations'].append({u'image_id': None, u'caption': words}) if not None == seq_sum_: generated_sentences_likelihood.append((None, seq_sum_)) if args.data == 'cartoon': dataloader = load('cartoon', args.run_local, 1, 1) for bi, data in tqdm(enumerate(dataloader)): image = data[0].to(device) if image.shape[1] != 3: continue # subsec: move to device image = image.to(device) # subsec: run top_k/p # subsec: run beam search seq_, top_seq_total_scors_, seq_sum_, words = caption_image_beam_search(encoder, decoder, image, word_map, rev_word_map, args, representations) if not None == words: hp_metric_dic['annotations'].append({u'image_id': bi, u'caption': words}) if not None == seq_sum_: generated_sentences_likelihood.append((bi, seq_sum_))
seq, alphas, top_seq_total_scors, seq_sum, logits_list = beam_search_decode(encoder, image, args.beam_size, word_map, decoder) alphas = torch.FloatTensor(alphas) visualize_att(image, seq, alphas, rev_word_map, top_seq_total_scors, save_dir, image_id, args.smooth) f = open(os.path.join(save_dir, 'seq_sum.txt'), 'a+') f.write('seq_sum: {} for image id: {} with caption: {}\n'.format(seq_sum, image_id, image_title)) print('seq_sum: {}'.format(seq_sum)) if __name__ == '__main__': save_dir_name = '{}_{}'.format(args.beam_size, args.save_dir_name) model_path, save_dir = get_model_path_and_save_path(args, save_dir_name) # Load model encoder, decoder = get_models(model_path) # Create rev word map word_map, rev_word_map = get_word_map() dataloader = load('flicker', args.run_local, 1, 1) for ind, image_data in enumerate(dataloader): image = image_data[0].unsqueeze(0) image_title = image_data[1] image_title = min(image_title, key=len) image_id = dataloader.ids[ind] run(encoder, decoder, word_map, rev_word_map, save_dir, image, image_title, image_id)
alphas = torch.FloatTensor(alphas) visualize_att(image, seq, alphas, rev_word_map, top_seq_total_scors, save_dir, image_title, args.smooth) f = open(os.path.join(save_dir, 'seq_sum.txt'), 'a+') f.write('seq_sum: {} for image id: {} with caption: {}\n'.format(seq_sum, image_id, image_title)) print('seq_sum: {}'.format(seq_sum)) if __name__ == '__main__': save_dir_name = '{}_{}'.format(args.beam_size, args.save_dir_name) model_path, save_dir = get_model_path_and_save_path(args, save_dir_name) # Load model encoder, decoder = get_models(model_path) # Create rev word map word_map, rev_word_map = get_word_map() dataloader = load('sbu', args.run_local, 1, 1) for ind, image_data in enumerate(dataloader): image = image_data[0] translator = str.maketrans('', '', string.punctuation) image_title = image_data[1][0].translate(translator) image_id = dataloader.dataset.photos[ind] # image_title = image_data[1][0].strip(string.punctuation) run(encoder, decoder, word_map, rev_word_map, save_dir, image, image_title, image_id)
decoder, image, word_map, top_k, top_p) alphas = torch.FloatTensor(alphas) visualize_att(image, seq, alphas, rev_word_map, top_seq_total_scors, save_dir, image_title, args.smooth) if __name__ == '__main__': top_k = 5 # NOTICE: inr top_p = 0 # NOTICE: double model_path, save_dir = get_model_path_and_save_path(args, 'top_k_{}'.format(top_k) if top_k > 0 else 'top_p_{}'.format(top_p)) # Load model encoder, decoder = get_models(model_path) # Create rev word map word_map, rev_word_map = get_word_map() dataloader = load('custom', True, 1, 1) for ind, image_data in enumerate(dataloader): image = image_data[0] image_title = image_data[1][0] run(encoder, decoder, word_map, rev_word_map, save_dir, top_k, top_p, image, image_title)