コード例 #1
0
def process():
    # load data
    blacklist = make_blacklist()
    save_json('data/kptest_blacklist.json', blacklist)
    qrpe = load_qrpe_data(blacklist)
    vtfp = load_vtfp_data(blacklist)
    import pdb
    pdb.set_trace()
    meta = qrpe + vtfp
    # process data
    images, image_ids, questions = [], [], []
    encoder = SentenceEncoder()
    for item in meta:
        image_id = item['image_id']
        image = item['image']
        tokens = encoder.encode_sentence(item['question'])
        images.append(image)
        image_ids.append(image_id)
        questions.append(tokens)
    # put to array
    from post_process_variation_questions import put_to_array
    arr, arr_len = put_to_array(questions)

    save_json('data/QRD_irrelevant_meta.json', {
        'images': images,
        'image_ids': image_ids
    })
    image_ids = np.array(image_ids, dtype=np.int32)
    save_hdf5('data/QRD_irrelevant_data.data', {
        'image_ids': image_ids,
        'quest': arr,
        'quest_len': arr_len
    })
コード例 #2
0
def extract_w2v():
    trainset = 'trainval'
    top_ans_file = 'data/vqa_%s_answer_word_counts.txt' % trainset
    answer_vocab = []
    with open(top_ans_file, 'r') as fs:
        for line in fs:
            token = line.split(' ')[0].strip()
            answer_vocab.append(token)
    # extract w2v
    encoder = SentenceEncoder()
    encoding = []
    index2ans = []
    print('Extracting answer codings')

    for i, ans in enumerate(answer_vocab):
        w2v = encoder.encode(ans)
        encoding.append(w2v)
        key = '_'.join(v.strip() for v in ans.split(','))
        key = '%s:%d' % (key, i + 1)
        index2ans.append(key)

    ans_enc = np.concatenate(encoding)
    # l2 norm
    print('Normalise and compute distance')
    # _norm = np.sqrt(np.square(ans_enc).sum(axis=1)) + 1e-8
    # ans_enc /= _norm[:, np.newaxis]
    from util import save_hdf5
    save_hdf5('data/vqa_trainval_answer_vocab_w2v.data', {'ans_w2v': ans_enc})
コード例 #3
0
def _encode_w2v(images, encoder, subset):
    quest_coding = []
    cands_coding = []
    labels = []
    quest_ids = []
    cands_meta = []
    for i, info in enumerate(images):
        if not i % 1000:
            tf.logging.info("%s: processed %d of %d items." %
                            (subset.upper(), i, len(images)))

        quest_id = info.question_id
        q_w2v = encoder.encode(info.question)
        ca_w2v, label = _encode_answer_candidates(info, encoder)
        # pdb.set_trace()
        quest_coding.append(q_w2v)
        cands_coding.append(ca_w2v)
        labels.append(label)
        quest_ids.append(quest_id)
        _m = {'quest_id': quest_id, 'cands': info.choices}
        cands_meta.append(_m)
    # ready to pack data
    quest_coding = np.concatenate(quest_coding, axis=0).astype(np.float32)
    cands_coding = np.concatenate(cands_coding, axis=0).astype(np.float32)
    labels = np.array(labels, dtype=np.int32)
    quest_ids = np.array(quest_ids, dtype=np.int32)
    save_hdf5(
        'data3/vqa_mc_w2v_coding_%s.data' % subset, {
            'quest_w2v': quest_coding,
            'cands_w2v': cands_coding,
            'labels': labels,
            'quest_ids': quest_ids
        })
    save_json('data3/vqa_mc_cands_%s.meta' % subset, cands_meta)
コード例 #4
0
def _process_dataset(subset, images, encoder):
    meta_filename = os.path.join(FLAGS.output_dir,
                                 'v2_vqa_std_mscoco_kp%s.meta' % subset)
    data_filename = os.path.join(FLAGS.output_dir,
                                 'v2_vqa_std_mscoco_kp%s.data' % subset)

    num_images = len(images)
    quests = []
    labels = []
    answers = []
    couter_examples = []
    image_names = []
    quest_ids = []
    for i in range(num_images):
        image = images[i]
        image_names.append(image.filename)
        quest_ids.append(image.question_id)
        counter_example_id = image.counter_example
        if counter_example_id is None:
            counter_example_id = -1
        couter_examples.append(counter_example_id)
        res = encoder.encode(image)
        if res is None:
            continue
        quest, label, ans = res
        # remove start and end word
        quest = quest[1:-1]
        ans = ans[1:-1]
        quests.append(quest)
        answers.append(ans)
        labels.append(label)

        if not i % 1000:
            print("%s: Processed %d of %d items." %
                  (datetime.now(), i, num_images))
            sys.stdout.flush()

    # merge questions to a matrix
    quest_arr, quest_len = _list_tokens_to_array(quests)
    ans_arr, ans_len = _list_tokens_to_array(answers)
    labels = np.array(labels, dtype=np.int32)
    couter_examples = np.array(couter_examples, dtype=np.int32)
    # save data file
    save_hdf5(
        data_filename, {
            'quest_arr': quest_arr,
            'quest_len': quest_len,
            'answer': labels,
            'counter_example': couter_examples,
            'quest_ids': np.array(quest_ids, dtype=np.int32)
        })
    ans_filename = 'data/v2_answer_std_mscoco_kp%s.data' % subset
    save_hdf5(ans_filename, {'ans_arr': ans_arr, 'ans_len': ans_len})
    # save meta file
    d = {'quest_id': quest_ids, 'images': image_names}
    json.dump(d, open(meta_filename, 'w'))

    print("%s: Wrote %d VQA files to %s" %
          (datetime.now(), num_images, meta_filename))
    sys.stdout.flush()
コード例 #5
0
def process_test():
    from util import save_hdf5, save_json
    # load data
    meta = load_bsir_dataset()
    # process data
    labels, images, image_ids, questions = [], [], [], []
    encoder = SentenceEncoder()
    for item in meta:
        image_id = item['image_id']
        image = item['image']
        tokens = encoder.encode_sentence(item['question'])
        images.append(image)
        image_ids.append(image_id)
        questions.append(tokens)
        labels.append(item['label'])
    # put to array
    from post_process_variation_questions import put_to_array
    arr, arr_len = put_to_array(questions)

    save_json('data/QRD_irrelevant_meta_test.json', {
        'images': images,
        'image_ids': image_ids
    })
    image_ids = np.array(image_ids, dtype=np.int32)
    labels = np.array(labels, dtype=np.float32)
    import pdb
    pdb.set_trace()
    save_hdf5(
        'data/QRD_irrelevant_data_test.data', {
            'image_ids': image_ids,
            'quest': arr,
            'quest_len': arr_len,
            'labels': labels
        })
コード例 #6
0
def extract_w2v():
    top_ans_file = '../VQA-tensorflow/data/vqa_trainval_top2000_answers.txt'
    answer_vocab = []
    with open(top_ans_file, 'r') as fs:
        for line in fs:
            answer_vocab.append(line.strip())
    # extract w2v
    encoder = SentenceEncoder()
    encoding = []
    index2ans = []
    print('Extracting answer codings')

    for i, ans in enumerate(answer_vocab):
        w2v = encoder.encode(ans)
        encoding.append(w2v)
        key = '_'.join(v.strip() for v in ans.split(','))
        key = '%s:%d' % (key, i + 1)
        index2ans.append(key)

    ans_enc = np.concatenate(encoding)
    # l2 norm
    print('Normalise and compute distance')
    _norm = np.sqrt(np.square(ans_enc).sum(axis=1)) + 1e-8
    ans_enc /= _norm[:, np.newaxis]
    num = ans_enc.shape[0]
    # dist = cdist(ans_enc, ans_enc)
    sim = np.dot(ans_enc, ans_enc.transpose())
    from util import save_hdf5
    save_hdf5('data/top2000_answer_feat.data', {'ans_w2v': ans_enc,
                                                'sim': sim})
コード例 #7
0
def pool_resnet_features(split='train'):
    SPLITS = ['test', 'train', 'val', 'vg_aug_train']
    assert (split in SPLITS)
    seed_file = os.path.join('/usr/data/fl302/code/inverse_vqa/data2',
                             'v7w_std_mscoco_%s.meta' % split)
    d = load_json(seed_file)
    image_ids = d['image_ids']
    image_names = d['images']
    image_id2fpath = {}

    for image_id, name in zip(image_ids, image_names):
        image_id2fpath.update({image_id: name})

    FEAT_DIR = '/usr/data/fl302/data/visual_genome/ResNet152/resnet_res5c'

    image_ids = np.unique(image_ids)

    idx = 0
    t = time()
    feats = []
    for image_id in image_ids:
        if idx % 100 == 0:
            print('processed %d images (%0.2f sec/batch)' % (idx, time() - t))
            t = time()

        file_name = image_id2fpath[image_id]
        feat_file = os.path.join(FEAT_DIR, file_name + '.npz')
        f = _load_and_pool_feature(feat_file)
        feats.append(f)
        idx += 1
    feats = np.concatenate(feats, axis=0).astype(np.float32)
    save_hdf5('data2/v7w_res152_%s.h5' % split, {
        'image_ids': image_ids,
        'features': feats
    })
def dump_dict_and_mapping(quest_vocab, mapping):
    reverse_qvoc = {v: k for (k, v) in quest_vocab.iteritems()}
    with open('data/vqa_trainval_merged_word_counts', 'w') as fs:
        for i in range(len(reverse_qvoc)):
            fs.write('%s %d\n' % (reverse_qvoc[i], i))
    from util import save_hdf5
    save_hdf5('data/answer_index_to_merged_index.mapping',
              {'mapping': mapping})
コード例 #9
0
def convert():
    model_name = 'ivaq_var_restval'
    checkpoint_path = 'model/var_ivqa_pretrain_restval/model.ckpt-505000'
    # build model
    from config import ModelConfig
    model_config = ModelConfig()
    model_fn = get_model_creation_fn('VAQ-Var')
    # create graph
    g = tf.Graph()
    with g.as_default():
        # Build the model.
        model = model_fn(model_config, 'beam')
        model.build()
        tf_embedding = model._answer_embed
        tf_answer_feed = model._ans
        tf_answer_len_feed = model._ans_len
        # Restore from checkpoint
        print('Restore from %s' % checkpoint_path)
        restorer = Restorer(g)
        sess = tf.Session()
        restorer.restore(sess, checkpoint_path)

    # build reader
    top_ans_file = '/import/vision-ephemeral/fl302/code/' \
                   'VQA-tensorflow/data/vqa_trainval_top2000_answers.txt'
    mc_ctx = MultiChoiceQuestionManger(subset='val',
                                       load_ans=True,
                                       top_ans_file=top_ans_file)
    to_sentence = SentenceGenerator(trainset='trainval',
                                    top_ans_file=top_ans_file)
    answer_encoder = mc_ctx.encoder

    top_answer_inds = range(2000)
    top_answers = answer_encoder.get_top_answers(top_answer_inds)

    answer_seqs = answer_encoder.encode_to_sequence(top_answers)
    for i, (ans, seq) in enumerate(zip(top_answers, answer_seqs)):
        rec_ans = to_sentence.index_to_answer(seq)
        ans = ' '.join(_tokenize_sentence(ans))
        print('%d: Raw: %s, Rec: %s' % (i + 1, ans, rec_ans))
        assert (ans == rec_ans)
    print('Checking passed')

    # extract
    print('Converting...')
    ans_arr, ans_arr_len = put_to_array(answer_seqs)
    import pdb
    pdb.set_trace()
    embedding = sess.run(tf_embedding,
                         feed_dict={
                             tf_answer_feed: ans_arr.astype(np.int32),
                             tf_answer_len_feed: ans_arr_len.astype(np.int32)
                         })
    # save
    sv_file = 'data/v1_%s_top2000_lstm_embedding.h5' % model_name
    from util import save_hdf5
    save_hdf5(sv_file, {'answer_embedding': embedding})
    print('Done')
コード例 #10
0
def _process_dataset(subset, images, encoder):
    meta_filename = os.path.join(FLAGS.output_dir,
                                 'vqa_std_mscoco_%s.meta' % subset)
    data_filename = os.path.join(FLAGS.output_dir,
                                 'vqa_std_mscoco_%s.data' % subset)

    num_images = len(images)
    quests = []
    labels = []
    image_names = []
    quest_ids = []
    for i in range(num_images):
        image = images[i]
        image_names.append(image.filename)
        quest_ids.append(image.question_id)
        res = encoder.encode(image)
        if res is None:
            continue
        quest, label = res
        # remove start and end word
        quest = quest[1:-1]
        quests.append(quest)
        labels.append(label)

        if not i % 1000:
            print("%s: Processed %d of %d items." %
                  (datetime.now(), i, num_images))
            sys.stdout.flush()

    # merge questions to a matrix
    quest_len = [len(q) for q in quests]
    max_len = max(quest_len)
    quest_arr = np.zeros([num_images, max_len], dtype=np.int32)
    for i, x in enumerate(quest_arr):
        x[:quest_len[i]] = quests[i]
    quest_len = np.array(quest_len, dtype=np.int32)
    answer_arr = np.array(labels, dtype=np.int32)
    # save data file
    save_hdf5(data_filename, {
        'quest_arr': quest_arr,
        'quest_len': quest_len,
        'answer': answer_arr
    })
    # save meta file
    d = {'quest_id': quest_ids, 'images': image_names}
    json.dump(d, open(meta_filename, 'w'))

    print("%s: Wrote %d VQA files to %s" %
          (datetime.now(), num_images, meta_filename))
    sys.stdout.flush()
コード例 #11
0
def vaq_condition(checkpoint_path=None):
    subset = 'dev'
    model_config = ModelConfig()

    # Get model
    model_fn = get_model_creation_fn(FLAGS.model_type)

    # build data reader
    reader = Reader(batch_size=1, subset=subset, output_attr=True, output_im=False,
                    output_qa=True, output_capt=False)

    if checkpoint_path is None:
        ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir % FLAGS.model_type)
        checkpoint_path = ckpt.model_checkpoint_path

    g = tf.Graph()
    with g.as_default():
        # Build the model.
        model = model_fn(model_config, 'condition')
        model.build()
        saver = tf.train.Saver()

        sess = tf.Session()
        tf.logging.info('Restore from model %s' % os.path.basename(checkpoint_path))
        saver.restore(sess, checkpoint_path)

    fetch_op = model.losses
    num_batches = reader.num_batches

    save_file = 'data/%s_vaq_cond_score1000-2000_%s.hdf5' % ((FLAGS.model_type).lower(), subset)
    print('Save File: %s' % save_file)
    print('Running conditioning...')
    nlls, quest_ids = [], []
    for i in range(num_batches):
        update_progress(i / float(num_batches))

        outputs = reader.get_test_batch()
        im_feed, quest, _, ans_feed, quest_id, image_id = outputs

        losses = sess.run(fetch_op, feed_dict=model.fill_feed_dict(outputs[:-2]))
        scores = losses[:, :-1].mean(axis=1)
        scores = scores[np.newaxis, ::]
        nlls.append(scores)
        quest_ids.append(quest_id)

    nlls = np.concatenate(nlls, axis=0)
    quest_ids = np.concatenate(quest_ids, axis=0)
    print('\nSaving result files: %s...' % save_file)
    save_hdf5(save_file, {'nll': nlls, 'quest_ids': quest_ids})
def load_data():
    subsets = ['train', 'val']
    image_ids, features = [], []
    for subset in subsets:
        print('Loading subset: %s' % subset)
        fpath = 'data/imagenet_%s_features.h5' % subset
        d = load_feature_file_vqabaseline(fpath)
        image_ids.append(d['image_ids'])
        features.append(d['features'])
    image_ids = np.concatenate(image_ids).astype(np.int32)
    features = np.concatenate(features).astype(np.float32)
    print('Saving...')
    save_hdf5('/usr/data/fl302/code/compute_nn/res152_trainval.h5', {'image_ids': image_ids,
                                                                     'features': features})
    print('Done')
コード例 #13
0
def make_top_answer_data_layer():
    from build_v2_ivqa_data import _load_vocab, _list_tokens_to_array
    from nltk.tokenize import word_tokenize
    from util import save_hdf5
    answer_vocab_file = 'data/vqa_trainval_answer_word_counts.txt'
    vocab = _load_vocab(answer_vocab_file)
    top_answers = load_top_answer_list()

    answers = []
    for top_ans in top_answers:
        tokenized = word_tokenize(str(top_ans).lower())
        token_ids = [vocab.word_to_id(word) for word in tokenized]
        answers.append(token_ids)
    ans_arr, ans_len = _list_tokens_to_array(answers)
    save_hdf5('data/top_answer2000_sequences.h5', {'answer_seq': ans_arr,
                                                   'answer_seq_len': ans_len})
コード例 #14
0
def _split_attributes(subset, atts, image_ids):
    image_id2att_index = {image_id: i for i, image_id in enumerate(image_ids)}
    subset_image_ids = get_subset_image_id(subset)
    index = -np.ones(len(subset_image_ids), dtype=np.int32)
    for i, image_id in enumerate(subset_image_ids):
        if image_id in image_id2att_index:
            index[i] = image_id2att_index[image_id]
    num_match = (index >= 0).sum()
    print('Find %d matched attributes for subset %s, missed %d\n' % (num_match,
                                                                     subset, index.size-num_match))
    # slice
    index[index < 0] = 0
    scores = atts[index, :]
    data_file = 'data/attribute_std_mscoco_%s.data' % subset
    save_hdf5(data_file, {'att_arr': scores.astype(np.float32),
                          'image_ids': np.array(subset_image_ids, dtype=np.int32)})
コード例 #15
0
def encode_answers(quest_ids, subset):
    ctx = MultiChoiceQuestionManger(subset='trainval', load_ans=True,
                                    answer_coding='sequence')
    answers = []
    for q_id in quest_ids:
        ans, seq = ctx.get_gt_answer_and_sequence_coding(q_id)
        answers += seq
    # merge questions to a matrix
    seq_len = [len(q) for q in answers]
    max_len = max(seq_len)
    num_capts = len(answers)
    ans_arr = np.zeros([num_capts, max_len], dtype=np.int32)
    for i, x in enumerate(ans_arr):
        x[:seq_len[i]] = answers[i]
    seq_len = np.array(seq_len, dtype=np.int32)
    vqa_data_file = 'data/answer_std_mscoco_%s.data' % subset
    save_hdf5(vqa_data_file, {'ans_arr': ans_arr, 'ans_len': seq_len})
コード例 #16
0
def convert_val_attributes():
    data_root = '/import/vision-ephemeral/fl302/code/slim'
    subsets = ['Val']
    image_ids, labels = [], []
    for subset in subsets:
        fname = 'Inception_v3_1000_%s_Dets.mat' % subset
        print('Loading %s...' % fname)
        fpath = os.path.join(data_root, fname)
        d = loadmat(fpath)
        t_labels = d['labels'].astype(np.float32)
        t_image_ids = d['image_id'].flatten()
        data_file = 'data/capt1k_std_mscoco_%s.data' % subset.lower()
        save_hdf5(
            data_file, {
                'att_arr': t_labels.astype(np.float32),
                'image_ids': np.array(t_image_ids, dtype=np.int32)
            })
コード例 #17
0
def find_boxes_of_questions(regions, v7w_qa2box, subset):
    seed_file = os.path.join('/usr/data/fl302/code/inverse_vqa/data2',
                             'v7w_std_mscoco_%s.meta' % subset)
    d = load_json(seed_file)
    quest_ids = d['quest_id']

    sv_file = '/usr/data/fl302/code/inverse_vqa/data2/v7w_qa_boxes_%s.data' % subset
    qa2reg = load_json(
        '/usr/data/fl302/data/visual_genome/qa_to_region_mapping.json')
    has_boxes = []
    quest_boxes = []
    for quest_id in quest_ids:
        # see whether it is in v7w annotation
        if quest_id in v7w_qa2box:
            box = v7w_qa2box[quest_id]
            quest_boxes.append(box)
            has_boxes.append(True)
            continue
        # check visual genome annotation
        q_key = str(quest_id)
        if q_key in qa2reg:
            region_id = qa2reg[q_key]
            if region_id in regions:
                # print('In')
                box = regions[region_id]
                quest_boxes.append(box)
                has_boxes.append(True)
            else:
                box = [0., 0., 1., 1.]
                has_boxes.append(False)
                quest_boxes.append(box)
        else:
            box = [0., 0., 1., 1.]
            has_boxes.append(False)
            quest_boxes.append(box)
    quest_boxes = np.array(quest_boxes)
    has_boxes = np.array(has_boxes)
    quest_ids = np.array(quest_ids)
    tf.logging.info('Subset %s, %d/%d QAs have region annotation' %
                    (subset, has_boxes.sum(), has_boxes.size))
    save_hdf5(sv_file, {
        'quest_ids': quest_ids,
        'has_boxes': has_boxes,
        'quest_boxes': quest_boxes
    })
コード例 #18
0
 def _load_answer_type(self, quest_ids):
     answer_type_file = 'data/%sanswer_type_std_mscoco_%s.data' % (
         self._version_suffix, self._subset)
     if not os.path.exists(answer_type_file):
         _mc_ctx = MultiChoiceQuestionManger(subset='val')
         answer_type_ids = [
             _mc_ctx.get_answer_type_coding(quest_id)
             for quest_id in quest_ids
         ]
         answer_type_ids = np.array(answer_type_ids, dtype=np.int32)
         save_hdf5(
             answer_type_file, {
                 'answer_type': answer_type_ids,
                 'quest_ids': np.array(quest_ids, dtype=np.int32)
             })
     else:
         d = load_hdf5(answer_type_file)
         answer_type_ids = d['answer_type']
     self._answer_type = answer_type_ids
コード例 #19
0
def process(subset):
    print('Processing subset %s' % subset)
    disable_entries = [0, 1, 2000]
    feats, image_ids = load_image_data(subset)
    image_id2qa_index, labels = load_qa_data(subset)
    num = feats.shape[0]
    bin_labels = np.zeros((num, 2001), dtype=np.float32)
    for image_id, bow in zip(image_ids, bin_labels):
        ind = image_id2qa_index[image_id]
        _label = np.unique(labels[ind])
        bow[_label] = 1.0
    # disable meaningless entries
    bin_labels[:, disable_entries] = 0.
    sv_file = 'data/vqa_std_mscoco_multilabel_%s.data' % subset
    save_hdf5(sv_file, {
        'features': feats,
        'labels': bin_labels,
        'image_ids': image_ids
    })
コード例 #20
0
def process_dataset(mc, subset):
    print('Processing %s' % subset)
    quest_ids = load_dataset(subset)
    im_encoder = FeatureEncoder(subset)
    answer_enc = []
    image_enc = []
    for quest_id in quest_ids:
        _, w2v = mc.get_gt_answer_and_word2vec(quest_id)
        answer_enc.append(w2v)
        im_feat = im_encoder.get_feature(mc.get_image_id(quest_id))
        image_enc.append(im_feat[np.newaxis, :])
    quest_ids = np.array(quest_ids, dtype=np.int32)
    answer_enc = np.concatenate(answer_enc, axis=0).astype(np.float32)
    image_enc = np.concatenate(image_enc, axis=0).astype(np.float32)
    save_hdf5('data/image_answer_coding_%s.h5' % subset, {
        'quest_ids': quest_ids,
        'answer_enc': answer_enc,
        'image_enc': image_enc
    })
コード例 #21
0
def split_subset(subset, inputs):
    print('Processing split %s' % subset)
    images, quest_id, quest_arr, quest_len, answer, attr_image_ids, attr_arr = inputs
    vqa_image_ids = [find_image_id_from_fname(fname) for fname in images]
    # get coco ids
    coco_ids = get_image_id(subset)
    # build coco id hashing table
    coco_ids = {image_id: i for i, image_id in enumerate(coco_ids)}

    # split vqa data
    keep_tab = np.array([im_id in coco_ids for im_id in vqa_image_ids])
    images = [im for im, keep in zip(images, keep_tab) if keep]
    quest_id = [q_id for q_id, keep in zip(quest_id, keep_tab) if keep]
    quest_arr = quest_arr[keep_tab]
    quest_len = quest_len[keep_tab]
    answer = answer[keep_tab]

    # split attribute data
    keep_tab = np.array([im_id in coco_ids for im_id in attr_image_ids])
    attr_image_ids = attr_image_ids[keep_tab]
    attr_arr = attr_arr[keep_tab]

    # process answers
    encode_answers(quest_id, subset)

    # save to files
    vqa_meta_file = 'data/vqa_std_mscoco_kp%s.meta' % subset
    save_json(vqa_meta_file, {'images': images, 'quest_id': quest_id})

    vqa_data_file = 'data/vqa_std_mscoco_kp%s.data' % subset
    save_hdf5(vqa_data_file, {
        'quest_arr': quest_arr,
        'quest_len': quest_len,
        'answer': answer
    })
    attr_data_file = 'data/attribute_std_mscoco_kp%s.data' % subset
    save_hdf5(attr_data_file, {
        'image_ids': attr_image_ids,
        'att_arr': attr_arr
    })
コード例 #22
0
def load_res152_feature():
    sets = ['val', 'test', 'restval']
    fdir = '/import/vision-ephemeral/fl302/code/text-to-image/'
    feats = []
    image_ids = []
    for subset in sets:
        d = load_hdf5(os.path.join(fdir, 'mscoco_res152_%s.h5' % subset))
        image_ids.append(d['image_ids'].flatten())
        feats.append(d['features'])
    feats = np.concatenate(feats)
    image_ids = np.concatenate(image_ids)

    # vertify
    vertify_image_ids(image_ids)

    # save
    data_file = 'data/res152_std_mscoco_%s.data' % 'val'
    save_hdf5(
        data_file, {
            'att_arr': feats.astype(np.float32),
            'image_ids': np.array(image_ids, dtype=np.int32)
        })
コード例 #23
0
def _process_dataset(subset, images, encoder):
    data_filename = os.path.join(FLAGS.output_dir,
                                 'caption_std_mscoco_%s.data' % subset)

    num_images = len(images)
    capts = []
    image_ids = []
    for i, info in enumerate(images):
        image_id = info.image_id
        for c in info.captions:
            capts.append(encoder.encode(c))
            image_ids.append(image_id)

        if not i % 1000:
            print("%s: Processed %d of %d items." %
                  (datetime.now(), i, num_images))
            sys.stdout.flush()

    # merge questions to a matrix
    seq_len = [len(q) for q in capts]
    max_len = max(seq_len)
    num_capts = len(capts)
    dummy_id = encoder.vocab_size + 1
    capt_arr = np.ones([num_capts, max_len], dtype=np.int32) * dummy_id
    for i, x in enumerate(capt_arr):
        x[:seq_len[i]] = capts[i]
    seq_len = np.array(seq_len, dtype=np.int32)
    image_ids = np.array(image_ids, dtype=np.int32)
    # save data file
    save_hdf5(data_filename, {
        'capt_arr': capt_arr,
        'capt_len': seq_len,
        'image_ids': image_ids
    })

    print("%s: Wrote %d caption files to %s" %
          (datetime.now(), num_images, data_filename))
    sys.stdout.flush()
コード例 #24
0
def load_question_candidates(subset):
    _model_suffix = 'var_' if FLAGS.use_var else ''
    d = load_hdf5('data4/%sivqa_%s_question_tokens.data' %
                  (_model_suffix, subset))
    ext_quest_arr = d['ext_quest_arr']
    ext_quest_len = d['ext_quest_len']
    seed_quest_ids = d['ext_quest_ids']
    quest_id2index, quest_ids, top_answers, \
    answer_arr, answer_arr_len = load_answer_data(subset)

    ext_top_answer = []
    ext_answer_arr = []
    ext_answer_arr_len = []

    # process answer data
    num = seed_quest_ids.shape[0]
    for i, qids in enumerate(seed_quest_ids):
        if i % 1000 == 0:
            print('Processed %d/%d' % (i, num))
        idx = quest_id2index[qids[0]]
        ext_top_answer.append(top_answers[idx])
        ext_answer_arr.append(answer_arr[idx])
        ext_answer_arr_len.append(answer_arr_len[idx])
    # concat data
    ext_top_answer = np.array(ext_top_answer).astype(np.int32)
    ext_answer_arr = np.concatenate(ext_answer_arr).astype(np.int32)
    ext_answer_arr_len = np.array(ext_answer_arr_len).astype(np.int32)
    save_hdf5(
        'data4/%sivqa_%s_question_answers.data' %
        (_model_suffix, FLAGS.subset), {
            'ext_quest_arr': ext_quest_arr,
            'ext_quest_len': ext_quest_len,
            'ext_quest_ids': seed_quest_ids,
            'ext_top_answer': ext_top_answer,
            'ext_answer_arr': ext_answer_arr,
            'ext_answer_arr_len': ext_answer_arr_len
        })
コード例 #25
0
def _process_dataset(subset, images, encoder):
    meta_filename = os.path.join(FLAGS.output_dir,
                                 'v7w_std_mscoco_%s.meta' % subset)
    data_filename = os.path.join(FLAGS.output_dir,
                                 'v7w_std_mscoco_%s.data' % subset)

    num_images = len(images)
    quests = []
    labels = []
    answers = []
    image_names = []
    quest_ids = []
    image_ids = []
    for i in range(num_images):
        image = images[i]
        image_ids.append(image.image_id)
        image_names.append(image.filename)
        quest_ids.append(image.question_id)
        res = encoder.encode(image)
        if res is None:
            continue
        quest, ans, label = res
        # remove start and end word
        quest = quest[1:-1]
        ans = ans[1:-1]
        answers.append(ans)
        quests.append(quest)
        labels.append(label)

        if not i % 1000:
            print("%s: Processed %d of %d items." %
                  (datetime.now(), i, num_images))
            sys.stdout.flush()

    # merge questions to a matrix
    def put_to_array(datums):
        _len = [len(q) for q in datums]
        max_len = max(_len)
        _arr = np.zeros([num_images, max_len], dtype=np.int32)
        for i, x in enumerate(_arr):
            x[:_len[i]] = datums[i]
        _len = np.array(_len, dtype=np.int32)
        return _arr, _len

    quest_arr, quest_len = put_to_array(quests)
    ans_arr, ans_len = put_to_array(answers)
    labels = np.array(labels, dtype=np.int32)
    num_oov = np.sum(labels == FLAGS.num_top_answers)
    tf.logging.info('In vocabulary percentage: %0.2f' %
                    (100. - 100. * num_oov / labels.size))
    # save data file
    save_hdf5(
        data_filename, {
            'quest_arr': quest_arr,
            'quest_len': quest_len,
            'ans_arr': ans_arr,
            'ans_len': ans_len,
            'answer': labels
        })
    # save meta file
    d = {'quest_id': quest_ids, 'images': image_names, 'image_ids': image_ids}
    json.dump(d, open(meta_filename, 'w'))

    print("%s: Wrote %d VQA files to %s" %
          (datetime.now(), num_images, meta_filename))
    sys.stdout.flush()
コード例 #26
0
def test(checkpoint_path=None):
    batch_size = 100
    config = ModelConfig()
    # Get model function
    # model_fn = get_model_creation_fn(FLAGS.model_type)
    _model_suffix = 'var_' if FLAGS.use_var else ''

    # build data reader
    reader = AttentionFetcher(batch_size=batch_size,
                              subset=FLAGS.testset,
                              feat_type=config.feat_type,
                              version=FLAGS.version,
                              var_suffix=_model_suffix)
    if checkpoint_path is None:
        ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir %
                                             (FLAGS.version, FLAGS.model_type))
        checkpoint_path = ckpt.model_checkpoint_path
    print(checkpoint_path)

    # build and restore model
    model = model_fn(config, phase='test')
    # model.set_agent_ids([0])
    model.build()
    prob = model.prob

    sess = tf.Session(graph=tf.get_default_graph())
    tf.logging.info('Restore from model %s' %
                    os.path.basename(checkpoint_path))
    saver = tf.train.Saver()
    saver.restore(sess, checkpoint_path)

    # Create the vocabulary.
    # top_ans_file = '../VQA-tensorflow/data/vqa_trainval_top2000_answers.txt'
    # to_sentence = SentenceGenerator(trainset='trainval',
    #                                 top_ans_file=top_ans_file)
    # to_sentence = SentenceGenerator(trainset='trainval')

    ans_ids = []
    ans_scores = []
    gt_scores = []
    quest_ids = []

    print('Running inference on split %s...' % FLAGS.testset)
    for i in range(reader.num_batches):
        if i % 10 == 0:
            update_progress(i / float(reader.num_batches))
        outputs = reader.get_test_batch()
        generated_ans = sess.run(prob,
                                 feed_dict=model.fill_feed_dict(outputs[:-2]))
        _gt_labels = outputs[3]
        _this_batch_size = _gt_labels.size
        _gt_scores = generated_ans[np.arange(_this_batch_size, ), _gt_labels]
        gt_scores.append(_gt_scores)
        generated_ans[:, -1] = 0
        top_ans = np.argmax(generated_ans, axis=1)
        top_scores = np.max(generated_ans, axis=1)

        ans_ids.append(top_ans)
        ans_scores.append(top_scores)
        quest_id = outputs[-2]
        quest_ids.append(quest_id)

    quest_ids = np.concatenate(quest_ids)
    ans_ids = np.concatenate(ans_ids)
    ans_scores = np.concatenate(ans_scores)
    gt_scores = np.concatenate(gt_scores)

    # save results
    tf.logging.info('Saving results')
    # res_file = FLAGS.result_format % (FLAGS.version, FLAGS.testset)
    from util import save_hdf5
    save_hdf5(
        'data4/%sv2qa_%s_qa_scores.data' % (_model_suffix, FLAGS.testset), {
            'ext_quest_ids': quest_ids,
            'ext_cand_scores': gt_scores,
            'ext_cand_pred_labels': ans_ids,
            'ext_cand_pred_scores': ans_scores
        })
コード例 #27
0
def test(checkpoint_path=None):
    batch_size = 100
    config = ModelConfig()
    # Get model function
    # model_fn = get_model_creation_fn(FLAGS.model_type)

    # build data reader
    reader = AttentionFetcher(batch_size=batch_size, subset=TEST_SET,
                              feat_type=config.feat_type, version=FLAGS.version)
    if checkpoint_path is None:
        ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir % (FLAGS.version,
                                                                     FLAGS.model_type))
        checkpoint_path = ckpt.model_checkpoint_path
    print(checkpoint_path)

    # build and restore model
    model = model_fn(config, phase='test')
    # model.set_agent_ids([0])
    model.build()
    prob = model.prob

    sess = tf.Session(graph=tf.get_default_graph())
    tf.logging.info('Restore from model %s' % os.path.basename(checkpoint_path))
    saver = tf.train.Saver()
    saver.restore(sess, checkpoint_path)

    # Create the vocabulary.
    top_ans_file = '../VQA-tensorflow/data/vqa_trainval_top2000_answers.txt'
    to_sentence = SentenceGenerator(trainset='trainval',
                                    top_ans_file=top_ans_file)
    # to_sentence = SentenceGenerator(trainset='trainval')

    ans_ids = []
    quest_ids = []
    ans_preds = []

    print('Running inference on split %s...' % TEST_SET)
    for i in range(reader.num_batches):
        if i % 10 == 0:
            update_progress(i / float(reader.num_batches))
        outputs = reader.get_test_batch()
        generated_ans = sess.run(
            prob, feed_dict=model.fill_feed_dict(outputs[:-2]))
        ans_preds.append(generated_ans)
        generated_ans[:, -1] = 0
        top_ans = np.argmax(generated_ans, axis=1)

        ans_ids.append(top_ans)
        quest_id = outputs[-2]
        quest_ids.append(quest_id)

    quest_ids = np.concatenate(quest_ids)
    ans_ids = np.concatenate(ans_ids)
    ans_preds = np.concatenate(ans_preds)
    result = [{u'answer': to_sentence.index_to_top_answer(aid),
               u'question_id': qid} for aid, qid in zip(ans_ids, quest_ids)]

    # save results
    tf.logging.info('Saving results')
    res_file = FLAGS.result_format % (FLAGS.version, TEST_SET)
    data_file = 'data5/%s_%s_scores_flt.data' % (TEST_SET, FLAGS.model_type)
    from util import save_hdf5
    save_hdf5(data_file, {'quest_ids': quest_ids, 'ans_preds': ans_preds})
    json.dump(result, open(res_file, 'w'))
    tf.logging.info('Done!')
    tf.logging.info('#Num eval samples %d' % len(result))
    return res_file, quest_ids
コード例 #28
0
                aug_quest_id = question_id * 1000 + _pid
                res_i = {
                    'image_id': int(image_id),
                    'question_id': aug_quest_id,
                    'question': sentence
                }
                results.append(res_i)
            extend_questions += cur_pathes

    save_json(res_file, results)
    ext_quest_arr, ext_quest_len = put_to_array(extend_questions)
    ext_quest_ids = np.array(extended_question_ids, dtype=np.int32)
    save_hdf5(
        'data_rl/%sivqa_%s_question_tokens.data' %
        (_model_suffix, FLAGS.subset), {
            'ext_quest_arr': ext_quest_arr,
            'ext_quest_len': ext_quest_len,
            'ext_quest_ids': ext_quest_ids
        })
    return res_file


def main(_):
    from watch_model import ModelWatcher
    subset = FLAGS.subset

    def test_model(model_path):
        with tf.Graph().as_default():
            res_file = ivqa_decoding_beam_search(checkpoint_path=model_path,
                                                 subset=subset)
            cider = evaluate_oracle(res_file)
コード例 #29
0
 def backup_statistics(self):
     save_hdf5(self._cache_file, {
         'loss': self._loss,
         'num_visit': self._num_visit
     })
コード例 #30
0
def process(K=1., use_global_thresh=False):
    _model_suffix = 'var_' if FLAGS.use_var else ''
    # load VQA scores
    d = load_hdf5('data4/%svqg_%s_qa_scores.data' % (_model_suffix, FLAGS.testset))
    # cand_scores = d['ext_cand_scores']
    quest_ids = d['ext_quest_ids']
    ext_top_answer = d['ext_cand_pred_labels']
    cand_scores = d['ext_cand_pred_scores']

    # load QAs
    d = load_hdf5('data4/%svqg_%s_question_tokens.data' % (_model_suffix, FLAGS.testset))
    ext_quest_arr = d['ext_quest_arr']
    ext_quest_len = d['ext_quest_len']
    seed_quest_ids = d['ext_quest_ids']
    # ext_top_answer = d['ext_top_answer']

    assert (np.all(np.equal(quest_ids, seed_quest_ids)))

    num_all = quest_ids.shape[0]
    print(quest_ids.shape[0])

    # build index
    quest_id2index = {}
    for i, quest_id_tuple in enumerate(quest_ids):
        quest_id, _ = quest_id_tuple.tolist()
        if quest_id in quest_id2index:
            quest_id2index[quest_id].append(i)
        else:
            quest_id2index[quest_id] = [i]

    # parse
    slice_index = []
    unk_quest_ids = quest_id2index.keys()
    num = len(unk_quest_ids)

    if use_global_thresh:
        loc = int(num * K)
        thresh = -np.sort(-cand_scores)[loc]
        thresh = 0.3
        print('Global thresh: %0.2f' % thresh)
        keep_tab = cand_scores > thresh
        quest_ids = quest_ids[keep_tab]
        ext_quest_arr = ext_quest_arr[keep_tab]
        ext_quest_len = ext_quest_len[keep_tab]
        ext_top_answer = ext_top_answer[keep_tab]
    else:
        for i, quest_id in enumerate(unk_quest_ids):
            if i % 1000 == 0:
                print('Processed %d/%d' % (i, num))
            _index = quest_id2index[quest_id]
            _scores = cand_scores[_index]
            _max_score_idx = (-_scores).argsort()[:K]
            if K == 1:
                add_idx = _index[_max_score_idx]
                slice_index.append(add_idx)
            else:
                add_idx = [_index[_mci] for _mci in _max_score_idx]
                slice_index += add_idx

        # get data
        quest_ids = quest_ids[slice_index]
        ext_quest_arr = ext_quest_arr[slice_index]
        ext_quest_len = ext_quest_len[slice_index]
        ext_top_answer = ext_top_answer[slice_index]

    # save
    save_hdf5('data4/%svqg_%s_question_answers_fltmax.data' % (_model_suffix, FLAGS.testset),
              {'ext_quest_arr': ext_quest_arr,
               'ext_quest_len': ext_quest_len,
               'ext_quest_ids': quest_ids,
               'ext_top_answer': ext_top_answer})
    print('%d/%d' % (ext_top_answer.size, num_all))