def nn_metrics(): image_root = eval_generation.determine_image_pattern('birds_fg', '') vocab_file = '%s/%s.txt' %(eval_generation.determine_vocab_folder('birds_fg', ''), 'CUB_vocab_noUNK') vocab = open_txt(vocab_file) #gt json anno_path_train = eval_generation.determine_anno_path('birds_fg', 'test') sg = eval_generation.build_sequence_generator(anno_path_train, 100, image_root, vocab = vocab, max_words=50) caption_experiment = eval_generation.CaptionExperiment(sg = sg) caption_experiment.score_generation(json_filename='generated_sentences/nearest_neighbor_baseline.json')
def extract_train_val(args): #Initialize captions experiment = {'type': 'generation'} experiment['prev_word_restriction'] = args.prev_word strategy_name = 'gt' dataset_subdir = '%s_%s' % (args.dataset_name, args.split_name) dataset_cache_dir = '%s/%s/%s' % (cache_home, dataset_subdir, args.model_name[0]) feature_cache_dir = '%s/%s/%s' % (cache_home, dataset_subdir, args.model_name[0]) cache_dir = '%s/%s' % (dataset_cache_dir, strategy_name) captioner, sg, dataset = eval_generation.build_captioner( [args.model_name], args.image_net, args.LM_net, args.dataset_name, args.split_name, args.vocab, args.precomputed_h5, args.gpu, experiment['prev_word_restriction']) save_activation = 'lstm2' experimenter = eval_generation.CaptionExperiment(captioner, dataset, feature_cache_dir, cache_dir, sg) experimenter.descriptor_filename = experimenter.images num_descriptors = len(experimenter.descriptor_filename) experimenter.compute_descriptors(0) descriptor_files = experimenter.descriptor_filename #add class condition descriptor_labels = [ df.split('/')[-2].split('.')[0] for df in descriptor_files ] # size_input_feature = 200 # concat_descriptors = np.zeros((num_descriptors, size_input_feature)) # for i in range(num_descriptors): # binary_vec = np.zeros((200,)) # binary_vec[int(descriptor_labels[i])-1] = 1 # concat_descriptors[i,] = binary_vec # # experimenter.descriptors = concat_descriptors descriptor_dict = {} for name, des in zip(experimenter.descriptor_filename, experimenter.descriptors): descriptor_dict[name] = des #generate sentences cont = np.ones((20, 1000)) cont[0, :] = 0 input_sent = np.zeros((20, 1000)) save_activation_mat = np.zeros((20, len(experimenter.captions), 1000)) net = captioner.lstm_nets[0] max_batch_size = 1000 im_list = [] for i in range(0, len(experimenter.captions), max_batch_size): print i, len(experimenter.captions) batch_size = min(max_batch_size, len(experimenter.captions) - i) image_features = np.zeros((batch_size, args.size_input_feature)) cont_in = cont[:, :batch_size] sent_in = input_sent[:, :batch_size] for idx, caption in enumerate(experimenter.captions[i:i + batch_size]): c = caption['caption'] im = caption['source_image'] sent_in[0:min(20, len(c)), idx] = c[:min(20, len(c))] image_features[idx, :] = descriptor_dict[im] im_list.append(im) net.blobs['cont_sentence'].reshape(20, batch_size) net.blobs['input_sentence'].reshape(20, batch_size) net.blobs['cont_sentence'].data[...] = cont_in net.blobs['input_sentence'].data[...] = sent_in if image_input: net.blobs['image_features'].reshape(batch_size, args.size_input_feature) net.blobs['image_features'].data[...] = image_features net.forward() save_activation_mat[:, i:i + batch_size, :] = copy.deepcopy( net.blobs[save_activation].data) average_weights = np.zeros( (save_activation_mat.shape[1], save_activation_mat.shape[2])) for ix, caption in enumerate(experimenter.captions): len_cap = min(20, len(caption['caption'])) average_weights[ix, :] = np.mean(save_activation_mat[:len_cap, ix, :], axis=0) # mat_file = '%s_%s_gt_0930.mat' %(args.model_name.split('/')[-1], args.split_name) # sio.savemat(mat_file, {'files': im_list, 'average_weights': average_weights}) # print "Saved mat file to %s." %mat_file class_weights = np.zeros((200, 1000)) class_count = np.zeros((200, )) for i, f in enumerate(im_list): c = int(f.split('/')[-2].split('.')[0]) - 1 class_weights[c, :] += average_weights[i, :] class_count[c] += 1 for i in range(200): class_weights[i, :] /= class_count[i] save_name = 'data/%s_%s_gt_0930.p' % (args.model_name.split('/')[-1], args.split_name) pkl.dump(class_weights, open(save_name, 'w')) print "Wrote file to: %s" % save_name
def compute_metrics(results): caption_experiment = eval_generation.CaptionExperiment(sg=sg) caption_experiment.score_generation(json_filename='tmp_json_out.json')
def eval_class_caffe_model(args): experiment = {'type': 'generation'} args.model_name = args.model_name.split(',') experiment = {'type': 'generation'} experiment['prev_word_restriction'] = args.prev_word pred = args.pred #set everything up captioner, sg, dataset = eval_generation.build_captioner( args.model_name, None, args.LM_net, args.dataset_name, args.split_name, args.vocab, None, args.gpu, experiment['prev_word_restriction']) beam_size = 1 strategy = {'type': 'beam', 'beam_size': beam_size} strategy_name = 'beam%d' % strategy['beam_size'] dataset_subdir = '%s_%s' % (args.dataset_name, args.split_name) dataset_cache_dir = '%s/%s/%s' % (cache_home, dataset_subdir, args.model_name[0]) feature_cache_dir = '%s/%s/%s' % (cache_home, dataset_subdir, args.model_name[0]) if pred: dataset_cache_dir = '%s/%s/%s_pred' % (cache_home, dataset_subdir, args.model_name[0]) feature_cache_dir = '%s/%s/%s_pred' % (cache_home, dataset_subdir, args.model_name[0]) cache_dir = '%s/%s' % (dataset_cache_dir, strategy_name) experimenter = eval_generation.CaptionExperiment(captioner, dataset, feature_cache_dir, cache_dir, sg) experimenter.descriptor_filename = experimenter.images num_descriptors = len(experimenter.descriptor_filename) descriptor_files = experimenter.descriptor_filename #add class condition descriptor_labels = [ df.split('/')[-2].split('.')[0] for df in descriptor_files ] if pred: label_dict = pkl.load( open( '/yy2/lisaanne/fine_grained/bilinear_features/finegrained/bilinear_preds.p', 'r')) descriptor_labels = [ label_dict['/'.join(df.split('/')[-2:])] + 1 for df in experimenter.images ] size_input_feature = args.size_input_features concat_descriptors = np.zeros((num_descriptors, size_input_feature)) if size_input_feature == 200: for i in range(num_descriptors): binary_vec = np.zeros((200, )) binary_vec[int(descriptor_labels[i]) - 1] = 1 concat_descriptors[i, ] = binary_vec if size_input_feature == 1000: lookup_mat = pkl.load(open(args.lookup_mat, 'r')) for i in range(num_descriptors): lookup_index = int(descriptor_labels[i]) - 1 concat_descriptors[i, :] = lookup_mat[lookup_index, :] experimenter.descriptors = concat_descriptors #generate descriptions max_batch_size = 1000 num_images = len(experimenter.images) do_batches = (strategy['type'] == 'beam' and strategy['beam_size'] == 1) or \ (strategy['type'] == 'sample' and ('temp' not in strategy or strategy['temp'] in (1, float('inf'))) and ('num' not in strategy or strategy['num'] == 1)) batch_size = min(max_batch_size, num_images) if do_batches else 1 all_captions = [None] * num_images image_index = 0 all_captions, image_index = experimenter.generate_captions( strategy, do_batches, batch_size, image_index=image_index) experimenter.save_and_score_generation(all_captions)
def eval_cc_caffe_model(args): experiment = {'type': 'generation'} args.model_name = args.model_name.split(',') experiment = {'type': 'generation'} experiment['prev_word_restriction'] = args.prev_word pred = args.pred #set everything up captioner, sg, dataset = eval_generation.build_captioner( args.model_name, args.image_net, args.LM_net, args.dataset_name, args.split_name, args.vocab, args.precomputed_h5, args.gpu, experiment['prev_word_restriction']) beam_size = 1 strategy = {'type': 'beam', 'beam_size': beam_size} strategy_name = 'beam%d' % strategy['beam_size'] dataset_subdir = '%s_%s' % (args.dataset_name, args.split_name) dataset_cache_dir = '%s/%s/%s' % (cache_home, dataset_subdir, args.model_name[0]) feature_cache_dir = '%s/%s/%s' % (cache_home, dataset_subdir, args.model_name[0]) if pred: dataset_cache_dir = '%s/%s/%s_pred' % (cache_home, dataset_subdir, args.model_name[0]) feature_cache_dir = '%s/%s/%s_pred' % (cache_home, dataset_subdir, args.model_name[0]) cache_dir = '%s/%s' % (dataset_cache_dir, strategy_name) experimenter = eval_generation.CaptionExperiment(captioner, dataset, feature_cache_dir, cache_dir, sg) captioner.set_image_batch_size(min(100, len(dataset.keys()))) #compute descriptors print 'Computing image descriptors' descriptor_labels = [ df.split('/')[-2].split('.')[0] for df in experimenter.images ] if pred: label_dict = pkl.load(open('data/bilinear_preds.p', 'r')) descriptor_labels = [ label_dict['/'.join(df.split('/')[-2:])] + 1 for df in experimenter.images ] experimenter.compute_descriptors(des_file_idx=0, file_load=False) num_descriptors = experimenter.descriptors.shape[0] descriptor_files = experimenter.descriptor_filename size_input_feature = args.size_input_features concat_descriptors = np.zeros((num_descriptors, size_input_feature)) num_descriptors = len(descriptor_labels) for i in range(num_descriptors): concat_descriptors[i, :1000] = experimenter.descriptors[i, :] if size_input_feature == 1001: for i in range(num_descriptors): concat_descriptors[i, -1] = float(descriptor_labels[i]) if size_input_feature == 1200: for i in range(num_descriptors): binary_vec = np.zeros((200, )) binary_vec[int(descriptor_labels[i]) - 1] = 1 concat_descriptors[i, -200:] = binary_vec * args.label_scale if size_input_feature == 2000: lookup_mat = pkl.load(open(args.lookup_mat, 'r')) for i in range(num_descriptors): lookup_index = int(descriptor_labels[i]) - 1 concat_descriptors[i, -1000:] = lookup_mat[lookup_index, :] experimenter.descriptors = concat_descriptors #generate descriptions max_batch_size = 1000 num_images = len(experimenter.images) do_batches = (strategy['type'] == 'beam' and strategy['beam_size'] == 1) or \ (strategy['type'] == 'sample' and ('temp' not in strategy or strategy['temp'] in (1, float('inf'))) and ('num' not in strategy or strategy['num'] == 1)) batch_size = min(max_batch_size, num_images) if do_batches else 1 all_captions = [None] * num_images image_index = 0 all_captions, image_index = experimenter.generate_captions( strategy, do_batches, batch_size, image_index=image_index) experimenter.save_and_score_generation(all_captions) check_equiv = 37 print descriptor_files[check_equiv] captions, caption_probs = experimenter.captioner.sample_captions( [concat_descriptors[check_equiv]], temp=float('inf'), min_length=2) print experimenter.captioner.sentence(captions[0]) print experimenter.captioner.sentence(all_captions[check_equiv])