Example #1
0
 def __init__(self, refexp_dataset_path, coco_data_path):
     """Constructor for GoogleRefexpEvalGeneration class for evaluation.
 
 Args:
   refexp_dataset_path: path for the Google Refexp dataset file
 """
     # handle refexp dataset file
     assert refexp_dataset_path, "Refexp dataset file missing!"
     self.refexp_dataset_path = refexp_dataset_path
     print 'Loading Google Refexp dataset file for the generation task.'
     self.refexp_dataset = Refexp(refexp_dataset_path,
                                  coco_data_path)  # Need to check - change
     self.gt_ann_ids_set = frozenset(
         self.refexp_dataset.getAnnIds())  # Need to check - change
Example #2
0
def preprocess_refexp():
    # get annotations
    coco_filename = '../external/coco/annotations/instances_train2014.json'
    refexp_filename = '../google_refexp_dataset_release/google_refexp_train_201511_coco_aligned.json'

    refexp = Refexp(refexp_filename, coco_filename)
    # ignore specific captions that have weird symbols in them
    ref_ids = [
        ref_id for ref_id in refexp.dataset['refexps'] if ref_id not in [
            146, 400, 923, 21409, 35384, 38589, 46630, 47673, 65639, 70715,
            82742
        ]
    ]
    captions = [
        refexp.dataset['refexps'][ref_id]['raw'].encode('ascii')
        for ref_id in ref_ids
    ]
    caption_seqs = [text_to_word_sequence(c) for c in captions]
    max_cap_len = max([len(seq) for seq in caption_seqs])

    database_stats = {'max_cap_len': max_cap_len}

    # create dictionaries for dataset
    unique = unique_words(caption_seqs)
    word_to_idx = {STOP_TOKEN: STOP_TOKEN_IDX, START_TOKEN: START_TOKEN_IDX}
    idx_to_word = {STOP_TOKEN_IDX: STOP_TOKEN, START_TOKEN_IDX: START_TOKEN}

    for i, word in enumerate(unique):
        # Start indices at 2 since 0 and 1 are reserved for start and stop tokens
        word_to_idx[word] = i + 2
        idx_to_word[i + 2] = word

    # Basic sanity checks
    assert (idx_to_word[word_to_idx['the']] == 'the')
    assert (word_to_idx[STOP_TOKEN] == STOP_TOKEN_IDX)
    assert (idx_to_word[STOP_TOKEN_IDX] == STOP_TOKEN)
    assert (word_to_idx[START_TOKEN] == START_TOKEN_IDX)
    assert (idx_to_word[START_TOKEN_IDX] == START_TOKEN)
    assert (word_to_idx[idx_to_word[2]] == 2)

    # Save the data
    with open('/refexp_word_to_idx', 'w+') as handle:
        pickle.dump(word_to_idx, handle)
    with open('/refexp_idx_to_word', 'w+') as handle:
        pickle.dump(idx_to_word, handle)
    with open('/refexp_stats', 'w+') as handle:
        pickle.dump(database_stats, handle)

    print 'Finished processing refexp dataset'
Example #3
0
    def __init__(self, refexp_dataset_path, coco_data_path):
        """Constructor for GoogleRefexpEvalComprehension class for evaluation.
    
    Args:
      refexp_dataset_path: path for the Google Refexp dataset file
      coco_data_path: path for the original coco dataset file (e.g. 'instances_train2014.json')
    """
        # handle refexp dataset file
        assert refexp_dataset_path, "Refexp dataset file missing!"
        self.refexp_dataset_path = refexp_dataset_path
        print 'Loading Google Refexp dataset file for the comprehension task.'
        self.refexp_dataset = Refexp(refexp_dataset_path,
                                     coco_data_path)  # Need to check - change
        self.gt_ann_ids_set = frozenset(
            self.refexp_dataset.getAnnIds())  # Need to check - change
        self.gt_refexp_ids_set = frozenset(
            self.refexp_dataset.getRefexpIds())  # Need to check - change

        # reset evaluation state
        self.reset_eval_state()
Example #4
0
base_model = VGG19(weights='imagenet')
model = Model(input=base_model.input,
              output=base_model.get_layer('fc2').output)
model2 = Model(input=base_model.input,
               output=base_model.get_layer('flatten').output)
model3 = Model(input=base_model.input,
               output=base_model.get_layer('predictions').output)

refexp_filename = '../google_refexp_dataset_release/google_refexp_train_201511_coco_aligned.json'
coco_filename = '../external/coco/annotations/instances_train2014.json'
datasetDir = '../external/coco/'
datasetType = 'images/train2014/COCO_train2014_'

# Create Refexp instance.
refexp = Refexp(refexp_filename, coco_filename)

# for x in captions['annotations']


def predict_image(file_id, bbox=[]):
    # img = refexp.loadImgs(file_id)[0]
    # img =
    # print img
    # print file_id
    # print img
    number = str(('_0000000000000' + str(file_id))[-12:])
    # img_path = '/Users/reuben/Downloads/train2014/COCO_train2014_'+number+'.jpg'

    # img_path = 'cat.jpg'
    x = image.load_img(datasetDir + datasetType + number + '.jpg',
Example #5
0
def preprocess_refexp_images(stream_num,
                             stream_size,
                             word_to_idx,
                             max_cap_len,
                             coco_dir,
                             category_names=[],
                             out_file='../keras_vgg_19/savedoc',
                             NO_PADDING=False):

    coco_filename = coco_dir + '/annotations/instances_train2014.json'
    refexp_filename = '../google_refexp_dataset_release/google_refexp_train_201511_coco_aligned.json'

    refexp = Refexp(refexp_filename, coco_filename)

    # choose categories/images
    catIds = refexp.getCatIds(catNms=category_names)
    imgIds = list(set(refexp.getImgIds(catIds=catIds)))
    annIds = refexp.getAnnIds(imgIds=imgIds)
    anns = refexp.loadAnns(ids=annIds)

    refIds = []
    bboxes = []
    refImgIds = []
    # get all refexp ids and bboxes and imageids in these annotations, except for captions with weird symbols
    for ann in anns:
        for ref_id in ann['refexp_ids']:
            if ref_id not in [
                    146, 400, 923, 21409, 35384, 38589, 46630, 47673, 65639,
                    70715, 82742
            ]:
                refIds.append(ref_id)
                bboxes.append(ann['bbox'])
                refImgIds.append(ann['image_id'])

    # get caption sequences, with added start and stop tokens
    captions = [
        refexp.dataset['refexps'][ref_id]['raw'].encode('ascii')
        for ref_id in refIds
    ]
    caption_seqs = [[START_TOKEN] + text_to_word_sequence(c) + [STOP_TOKEN]
                    for c in captions]
    caption_lengths = [len(seq) for seq in caption_seqs]

    # filter out the long captions
    refImgIds = [
        img_id for i, img_id in enumerate(refImgIds)
        if caption_lengths[i] <= max_cap_len
    ]
    bboxes = [
        bbox for i, bbox in enumerate(bboxes)
        if caption_lengths[i] <= max_cap_len
    ]
    caption_seqs = [
        seq for i, seq in enumerate(caption_seqs)
        if caption_lengths[i] <= max_cap_len
    ]
    caption_lengths = [l for l in caption_lengths if l <= max_cap_len
                       ]  # do not move this before the other filter steps!
    total_num_partial_captions = sum(caption_lengths)

    # repeat image id and bounding box for each partial caption
    repeated_ids = [[img_id] * (l - 1)
                    for img_id, l in zip(refImgIds, caption_lengths)]
    image_ids = [img_id for rep_id in repeated_ids for img_id in rep_id]
    repeated_bboxes = [[bbox] * (l - 1)
                       for bbox, l in zip(bboxes, caption_lengths)]
    cap_bbox = [bbox for rep_bbox in repeated_bboxes for bbox in rep_bbox]

    partial_caps, next_words = partial_captions_and_next_words(
        caption_seqs, word_to_idx, max_cap_len
    )  #preprocess_captions(caption_seqs, word_to_idx, max_cap_len)

    print(len(image_ids), len(partial_caps), len(cap_bbox))
    assert (len(image_ids) == len(partial_caps))
    assert (len(image_ids) == len(cap_bbox))
    '''
    # Determine how many (partial caption, image) examples to take to obtain
    # `num_imgs_to_sample` total distinct images (including all partial captions)
    if num_caps_to_sample < total_num_images:
        number_of_items = 0
        for i, l in enumerate(caption_lengths):
            if i >= num_caps_to_sample:
                break
            number_of_items += l
    else:
        print total_num_images, ' were requested, but only ', num_caps_to_sample, \
            ' are available in this category. Processing all images in the category...'
        number_of_items = len(partial_caps)
    '''

    X = [0, 0]
    # TODO: handle the case where you request indices out of range
    number_of_items = min(stream_size, total_num_partial_captions)
    item_range = range((stream_num - 1) * stream_size,
                       stream_num * stream_size)
    ids_and_bboxes = zip(image_ids, cap_bbox)
    X[0] = ids_and_bboxes[((stream_num - 1) * stream_size):(stream_num *
                                                            stream_size)]
    X[1] = np.asarray(partial_caps[item_range])
    y = np.asarray(next_words)[item_range]
    out = X, y

    with open(out_file, 'w') as handle:
        pickle.dump(out, handle)