V_ft = vfeat_entry['vfeat'].value pad_V_ft = np.zeros([max_box_num - num_box, V_ft.shape[1]], dtype=V_ft.dtype) V_ft = np.concatenate([V_ft, pad_V_ft], axis=0) q_intseq = entry['question_intseq'].value q_intseq_len = np.array(len(q_intseq), dtype=np.int32) q_intseq_pad = np.zeros([max_q_len - q_intseq_len], dtype=q_intseq.dtype) q_intseq = np.concatenate([q_intseq, q_intseq_pad], axis=0) answer_id = np.array(entry['answer_id'].value, dtype=np.int32) tf_example = tf.train.Example(features=tf.train.Features( feature={ 'qid': tf_util.int64_feature(int(id)), 'image_id': tf_util.bytes_feature(str(image_id)), 'image/encoded': tf_util.bytes_feature(image_str), 'image/format': tf_util.bytes_feature('png'), 'image/height': tf_util.int64_feature(IMAGE_HEIGHT), 'image/width': tf_util.int64_feature(IMAGE_WIDTH), 'box/list': tf_util.float_feature(list(box.reshape([-1]))), 'box/shape': tf_util.int64_feature(list(box.shape)), 'num_box': tf_util.int64_feature(num_box), 'V_ft/list': tf_util.float_feature(list(V_ft.reshape([-1]))), 'V_ft/shape': tf_util.int64_feature(list(V_ft.shape)), 'q_intseq/list': tf_util.int64_feature(list(q_intseq)), 'q_intseq/len': tf_util.int64_feature(q_intseq_len), 'answer_id': tf_util.int64_feature(answer_id), })) tf_record_writer.write(tf_example.SerializeToString())
V_ft = vfeat_entry['vfeat'].value pad_V_ft = np.zeros([max_box_num - num_box, V_ft.shape[1]], dtype=V_ft.dtype) V_ft = np.concatenate([V_ft, pad_V_ft], axis=0) q_intseq = entry['question_intseq'].value q_intseq_len = np.array(len(q_intseq), dtype=np.int32) q_intseq_pad = np.zeros([max_q_len - q_intseq_len], dtype=q_intseq.dtype) q_intseq = np.concatenate([q_intseq, q_intseq_pad], axis=0) answer_id = np.array(entry['answer_id'].value, dtype=np.int32) tf_example = tf.train.Example(features=tf.train.Features( feature={ 'qid': tf_util.int64_feature(int(id)), 'image_id': tf_util.bytes_feature(str(image_id)), 'box/list': tf_util.float_feature(list(box.reshape([-1]))), 'box/shape': tf_util.int64_feature(list(box.shape)), 'num_box': tf_util.int64_feature(num_box), 'V_ft/list': tf_util.float_feature(list(V_ft.reshape([-1]))), 'V_ft/shape': tf_util.int64_feature(list(V_ft.shape)), 'q_intseq/list': tf_util.int64_feature(list(q_intseq)), 'q_intseq/len': tf_util.int64_feature(q_intseq_len), 'answer_id': tf_util.int64_feature(answer_id), })) tf_record_writer.write(tf_example.SerializeToString()) data.close() vfeat.close()
tqdm(entries, desc='write tfrecord {}'.format(split))): if i % config.num_record_per_shard == 0: shard_id = int(i / config.num_record_per_shard) shard_name = '{}-{:05d}-of-{:05d}'.format(split, shard_id, num_shards) shard_path = os.path.join(config.save_dir, split, shard_name) if os.path.exists(shard_path): raise ValueError('Existing shard path: {}'.format(shard_path)) tf_record_writer = tf.python_io.TFRecordWriter(shard_path) max_q_len = max(max_q_len, len(entry['q_token'])) max_num_answer = max(max_num_answer, len(entry['answer']['labels'])) tf_example = tf.train.Example(features=tf.train.Features( feature={ 'qid': tf_util.int64_feature(int(entry['question_id'])), 'image_id': tf_util.bytes_feature(str(entry['image_id'])), 'image_idx': tf_util.int64_feature(int(entry['image'])), 'q_intseq/list': tf_util.int64_feature(entry['q_token']), 'q_intseq/len': tf_util.int64_feature(len(entry['q_token'])), 'answers/ids': tf_util.int64_feature(entry['answer'] ['labels']), 'answers/scores': tf_util.float_feature(entry['answer'] ['scores']), })) tf_record_writer.write(tf_example.SerializeToString()) # Construct image_info used_image_paths = set() image_id2idx = {} image_path2idx = {} image_num2path = {}