def __init__(self, refexp_dataset_path, coco_data_path): """Constructor for GoogleRefexpEvalGeneration class for evaluation. Args: refexp_dataset_path: path for the Google Refexp dataset file """ # handle refexp dataset file assert refexp_dataset_path, "Refexp dataset file missing!" self.refexp_dataset_path = refexp_dataset_path print 'Loading Google Refexp dataset file for the generation task.' self.refexp_dataset = Refexp(refexp_dataset_path, coco_data_path) # Need to check - change self.gt_ann_ids_set = frozenset( self.refexp_dataset.getAnnIds()) # Need to check - change
def preprocess_refexp(): # get annotations coco_filename = '../external/coco/annotations/instances_train2014.json' refexp_filename = '../google_refexp_dataset_release/google_refexp_train_201511_coco_aligned.json' refexp = Refexp(refexp_filename, coco_filename) # ignore specific captions that have weird symbols in them ref_ids = [ ref_id for ref_id in refexp.dataset['refexps'] if ref_id not in [ 146, 400, 923, 21409, 35384, 38589, 46630, 47673, 65639, 70715, 82742 ] ] captions = [ refexp.dataset['refexps'][ref_id]['raw'].encode('ascii') for ref_id in ref_ids ] caption_seqs = [text_to_word_sequence(c) for c in captions] max_cap_len = max([len(seq) for seq in caption_seqs]) database_stats = {'max_cap_len': max_cap_len} # create dictionaries for dataset unique = unique_words(caption_seqs) word_to_idx = {STOP_TOKEN: STOP_TOKEN_IDX, START_TOKEN: START_TOKEN_IDX} idx_to_word = {STOP_TOKEN_IDX: STOP_TOKEN, START_TOKEN_IDX: START_TOKEN} for i, word in enumerate(unique): # Start indices at 2 since 0 and 1 are reserved for start and stop tokens word_to_idx[word] = i + 2 idx_to_word[i + 2] = word # Basic sanity checks assert (idx_to_word[word_to_idx['the']] == 'the') assert (word_to_idx[STOP_TOKEN] == STOP_TOKEN_IDX) assert (idx_to_word[STOP_TOKEN_IDX] == STOP_TOKEN) assert (word_to_idx[START_TOKEN] == START_TOKEN_IDX) assert (idx_to_word[START_TOKEN_IDX] == START_TOKEN) assert (word_to_idx[idx_to_word[2]] == 2) # Save the data with open('/refexp_word_to_idx', 'w+') as handle: pickle.dump(word_to_idx, handle) with open('/refexp_idx_to_word', 'w+') as handle: pickle.dump(idx_to_word, handle) with open('/refexp_stats', 'w+') as handle: pickle.dump(database_stats, handle) print 'Finished processing refexp dataset'
def __init__(self, refexp_dataset_path, coco_data_path): """Constructor for GoogleRefexpEvalComprehension class for evaluation. Args: refexp_dataset_path: path for the Google Refexp dataset file coco_data_path: path for the original coco dataset file (e.g. 'instances_train2014.json') """ # handle refexp dataset file assert refexp_dataset_path, "Refexp dataset file missing!" self.refexp_dataset_path = refexp_dataset_path print 'Loading Google Refexp dataset file for the comprehension task.' self.refexp_dataset = Refexp(refexp_dataset_path, coco_data_path) # Need to check - change self.gt_ann_ids_set = frozenset( self.refexp_dataset.getAnnIds()) # Need to check - change self.gt_refexp_ids_set = frozenset( self.refexp_dataset.getRefexpIds()) # Need to check - change # reset evaluation state self.reset_eval_state()
base_model = VGG19(weights='imagenet') model = Model(input=base_model.input, output=base_model.get_layer('fc2').output) model2 = Model(input=base_model.input, output=base_model.get_layer('flatten').output) model3 = Model(input=base_model.input, output=base_model.get_layer('predictions').output) refexp_filename = '../google_refexp_dataset_release/google_refexp_train_201511_coco_aligned.json' coco_filename = '../external/coco/annotations/instances_train2014.json' datasetDir = '../external/coco/' datasetType = 'images/train2014/COCO_train2014_' # Create Refexp instance. refexp = Refexp(refexp_filename, coco_filename) # for x in captions['annotations'] def predict_image(file_id, bbox=[]): # img = refexp.loadImgs(file_id)[0] # img = # print img # print file_id # print img number = str(('_0000000000000' + str(file_id))[-12:]) # img_path = '/Users/reuben/Downloads/train2014/COCO_train2014_'+number+'.jpg' # img_path = 'cat.jpg' x = image.load_img(datasetDir + datasetType + number + '.jpg',
def preprocess_refexp_images(stream_num, stream_size, word_to_idx, max_cap_len, coco_dir, category_names=[], out_file='../keras_vgg_19/savedoc', NO_PADDING=False): coco_filename = coco_dir + '/annotations/instances_train2014.json' refexp_filename = '../google_refexp_dataset_release/google_refexp_train_201511_coco_aligned.json' refexp = Refexp(refexp_filename, coco_filename) # choose categories/images catIds = refexp.getCatIds(catNms=category_names) imgIds = list(set(refexp.getImgIds(catIds=catIds))) annIds = refexp.getAnnIds(imgIds=imgIds) anns = refexp.loadAnns(ids=annIds) refIds = [] bboxes = [] refImgIds = [] # get all refexp ids and bboxes and imageids in these annotations, except for captions with weird symbols for ann in anns: for ref_id in ann['refexp_ids']: if ref_id not in [ 146, 400, 923, 21409, 35384, 38589, 46630, 47673, 65639, 70715, 82742 ]: refIds.append(ref_id) bboxes.append(ann['bbox']) refImgIds.append(ann['image_id']) # get caption sequences, with added start and stop tokens captions = [ refexp.dataset['refexps'][ref_id]['raw'].encode('ascii') for ref_id in refIds ] caption_seqs = [[START_TOKEN] + text_to_word_sequence(c) + [STOP_TOKEN] for c in captions] caption_lengths = [len(seq) for seq in caption_seqs] # filter out the long captions refImgIds = [ img_id for i, img_id in enumerate(refImgIds) if caption_lengths[i] <= max_cap_len ] bboxes = [ bbox for i, bbox in enumerate(bboxes) if caption_lengths[i] <= max_cap_len ] caption_seqs = [ seq for i, seq in enumerate(caption_seqs) if caption_lengths[i] <= max_cap_len ] caption_lengths = [l for l in caption_lengths if l <= max_cap_len ] # do not move this before the other filter steps! total_num_partial_captions = sum(caption_lengths) # repeat image id and bounding box for each partial caption repeated_ids = [[img_id] * (l - 1) for img_id, l in zip(refImgIds, caption_lengths)] image_ids = [img_id for rep_id in repeated_ids for img_id in rep_id] repeated_bboxes = [[bbox] * (l - 1) for bbox, l in zip(bboxes, caption_lengths)] cap_bbox = [bbox for rep_bbox in repeated_bboxes for bbox in rep_bbox] partial_caps, next_words = partial_captions_and_next_words( caption_seqs, word_to_idx, max_cap_len ) #preprocess_captions(caption_seqs, word_to_idx, max_cap_len) print(len(image_ids), len(partial_caps), len(cap_bbox)) assert (len(image_ids) == len(partial_caps)) assert (len(image_ids) == len(cap_bbox)) ''' # Determine how many (partial caption, image) examples to take to obtain # `num_imgs_to_sample` total distinct images (including all partial captions) if num_caps_to_sample < total_num_images: number_of_items = 0 for i, l in enumerate(caption_lengths): if i >= num_caps_to_sample: break number_of_items += l else: print total_num_images, ' were requested, but only ', num_caps_to_sample, \ ' are available in this category. Processing all images in the category...' number_of_items = len(partial_caps) ''' X = [0, 0] # TODO: handle the case where you request indices out of range number_of_items = min(stream_size, total_num_partial_captions) item_range = range((stream_num - 1) * stream_size, stream_num * stream_size) ids_and_bboxes = zip(image_ids, cap_bbox) X[0] = ids_and_bboxes[((stream_num - 1) * stream_size):(stream_num * stream_size)] X[1] = np.asarray(partial_caps[item_range]) y = np.asarray(next_words)[item_range] out = X, y with open(out_file, 'w') as handle: pickle.dump(out, handle)