def generator(self, data_dir, tmp_dir, train): """Generate examples.""" data_file = TRAIN_DATASETS if train else TEST_DATASETS # Generate vocab raw_gen = RawDataGenerator() vocab_encoder = generator_utils.get_or_generate_vocab_inner( data_dir, self.vocab_file, self.targeted_vocab_size, raw_gen.generator(data_file, for_vocab=True)) label_encoder = text_encoder.ClassLabelEncoder( class_labels_fname=LABEL_FILE) # Generate examples for label, entities, sentence in raw_gen.generator(data_file): entities = [vocab_encoder.encode(e) for e in entities] sentence = vocab_encoder.encode(sentence) entities_pos = raw_gen.find_start_position(entities, sentence) yield { "inputs": sentence, "targets": [label_encoder.encode(label)], 'lexical': raw_gen.lexical_feature(entities_pos, sentence), 'position1': raw_gen.position_feature(entities_pos[0], sentence), 'position2': raw_gen.position_feature(entities_pos[1], sentence), }
def feature_encoders(self, data_dir): encoder = self.get_or_create_vocab(data_dir, None, force_get=True) return { "inputs": encoder, "targets": text_encoder.ClassLabelEncoder(self.class_labels(data_dir)) }
def feature_encoders(self, data_dir): vocab_filename = os.path.join(data_dir, self.vocab_file) encoder = text_encoder.SubwordTextEncoder(vocab_filename) return { "inputs": encoder, "targets": text_encoder.ClassLabelEncoder(["neg", "pos"]), }
def feature_encoders(self, data_dir): vocab_filename = os.path.join(data_dir, self.vocab_file) encoder = text_encoder.SubwordTextEncoder(vocab_filename) return { 'inputs': encoder, 'targets': text_encoder.ClassLabelEncoder(self._LABELS), }
def feature_encoders(self, data_dir): encoder = text_encoder.ByteTextEncoder() return { "inputs": encoder, "targets": text_encoder.ClassLabelEncoder(self.class_labels(data_dir)) }
def feature_encoders(self, data_dir): encoder = text_encoder.TokenTextEncoder(vocab_filename=None, vocab_list=self.vocab) return { "inputs": encoder, "targets": text_encoder.ClassLabelEncoder(self.class_labels(data_dir)), }
def generate_vocab(self): # Generate vocab token_generator = self.raw_gen.generator([TRAIN_FILE, TEST_FILE], for_vocab=True) self.vocab_encoder = generator_utils.get_or_generate_vocab_inner( OUTPUT_DIR, VOCAB_FILE, VOCAB_SIZE, token_generator) tf.logging.info('vocab_size', self.vocab_encoder.vocab_size) self.label_encoder = text_encoder.ClassLabelEncoder( class_labels_fname=LABEL_FILE)
def vqa_v2_generator(data_dir, tmp_dir, datasets, vocab_filename, label_filename, eos_list=None): """vqa v2 generator.""" eos_list = eos_list if eos_list else [] _get_vqa_v2_dataset(tmp_dir) vocab_path = os.path.join(data_dir, vocab_filename) if not tf.gfile.Exists(vocab_path): vocab_tmp_path = os.path.join(tmp_dir, vocab_filename) tf.gfile.Copy(vocab_tmp_path, vocab_path) with tf.gfile.GFile(vocab_path, mode="r") as f: vocab_data = "<pad>\n<EOS>\n" + f.read() + "UNK\n" with tf.gfile.GFile(vocab_path, mode="w") as f: f.write(vocab_data) label_path = os.path.join(data_dir, label_filename) if not tf.gfile.Exists(label_path): label_tmp_path = os.path.join(tmp_dir, label_filename) tf.gfile.Copy(label_tmp_path, label_path) vocab_encoder = text_encoder.TokenTextEncoder(vocab_path, replace_oov="UNK") label_encoder = text_encoder.ClassLabelEncoder( class_labels_fname=label_path) prefix_annotation = [] for prefix, annotation_file in datasets: annotation_path = os.path.join(tmp_dir, annotation_file) with tf.gfile.Open(annotation_path) as f: annotation_json = json.loads(f.read()) prefix_annotation += [(prefix, anno) for anno in annotation_json] random.shuffle(prefix_annotation) annotation_count = len(prefix_annotation) tf.logging.info("Processing %d annotations for vqa v2" % (annotation_count)) for prefix, anno in prefix_annotation: image_id = anno["image_id"] question = vocab_encoder.encode(anno["question"]) + eos_list answer = [label_encoder.encode(ans) for ans in anno["answer"]] answer = answer if answer else [0] # 0 indicates padding image_filename = "COCO_" + prefix + "_" + str(image_id).zfill( 12) + ".jpg" image_filepath = os.path.join(tmp_dir, prefix, image_filename) with tf.gfile.Open(image_filepath, "r") as f: encoded_image_data = f.read() yield { "image/encoded": [encoded_image_data], "image/format": ["jpeg"], "image/image_id": [image_id], "image/question_id": [anno["question_id"]], "image/question": question, "image/answer": answer, }
def write_results(predictions): label_encoder = text_encoder.ClassLabelEncoder( class_labels_fname=LABEL_FILE) start_no = 8001 with open(FLAGS.results_file, 'w') as f: for idx, id in enumerate(predictions): if idx < 2717: rel = label_encoder.decode(id) f.write('%d\t%s\n' % (start_no + idx, rel))
def feature_encoders(self, data_dir): input_encoder = text_encoder.ImageEncoder(channels=self.num_channels) vocab_file = os.path.join(data_dir, self.vocab_filename) question_encoder = text_encoder.TokenTextEncoder( vocab_file, replace_oov="UNK") label_file = os.path.join(data_dir, self.label_filename) target_encoder = text_encoder.ClassLabelEncoder( class_labels_fname=label_file) return {"inputs": input_encoder, "question": question_encoder, "targets": target_encoder}
def feature_encoders(self, data_dir): '''Used on inference, convert input and output from ids to tokens. The returned results are stored in self._encoders ''' vocab_filename = os.path.join(data_dir, self.vocab_file) encoder = text_encoder.SubwordTextEncoder(vocab_filename) return { "inputs": encoder, "targets": text_encoder.ClassLabelEncoder(class_labels_fname=LABEL_FILE), }
def feature_encoders(self, data_dir): del data_dir return { "inputs": text_encoder.ImageEncoder(), "targets": text_encoder.ClassLabelEncoder(self.class_labels) }
def vqa_v2_generator(self, data_dir, tmp_dir, datasets): """VQA v2 generator using image features.""" _get_vqa_v2_annotations(tmp_dir, self._VQA_V2_ANNOTATION_URL) _get_vqa_v2_image_feature_dataset(tmp_dir, self._VQA_V2_FEATURE_URL) vocab_path = os.path.join(data_dir, self.vocab_filename) if not tf.gfile.Exists(vocab_path): vocab_tmp_path = os.path.join(tmp_dir, self.vocab_filename) tf.gfile.Copy(vocab_tmp_path, vocab_path) with tf.gfile.GFile(vocab_path, mode="r") as f: vocab_data = "<pad>\n<EOS>\n" + f.read() + "UNK\n" with tf.gfile.GFile(vocab_path, mode="w") as f: f.write(vocab_data) label_path = os.path.join(data_dir, self.label_filename) if not tf.gfile.Exists(label_path): label_tmp_path = os.path.join(tmp_dir, self.label_filename) tf.gfile.Copy(label_tmp_path, label_path) vocab_encoder = text_encoder.TokenTextEncoder(vocab_path, replace_oov="UNK") label_encoder = text_encoder.ClassLabelEncoder( class_labels_fname=label_path) # merge annotations annotation_json = [] for _, annotation_file in datasets: annotation_path = os.path.join(tmp_dir, annotation_file) with tf.gfile.Open(annotation_path) as f: annotation_json += json.loads(f.read()) annotation_count = len(annotation_json) tf.logging.info("Processing %d annotations for vqa v2" % (annotation_count)) imageid2annotation = {} for anno in annotation_json: if anno["image_id"] not in imageid2annotation: imageid2annotation[anno["image_id"]] = [anno] else: imageid2annotation[anno["image_id"]].append(anno) csv.field_size_limit(sys.maxsize) for feature_file, _ in datasets: feature_file_path = os.path.join(tmp_dir, feature_file) with open(feature_file_path, "r+b") as tsv_file: csv_reader = csv.DictReader( tsv_file, delimiter="\t", fieldnames=self.feature_file_field_names) for item in csv_reader: item["num_boxes"] = int(item["num_boxes"]) image_id = int(item["image_id"]) image_w = float(item["image_w"]) image_h = float(item["image_h"]) bboxes = np.frombuffer(base64.decodestring(item["boxes"]), dtype=np.float32).reshape( (item["num_boxes"], -1)) box_width = bboxes[:, 2] - bboxes[:, 0] box_height = bboxes[:, 3] - bboxes[:, 1] scaled_width = box_width / image_w scaled_height = box_height / image_h scaled_x = bboxes[:, 0] / image_w scaled_y = bboxes[:, 1] / image_h box_width = box_width[..., np.newaxis] box_height = box_height[..., np.newaxis] scaled_width = scaled_width[..., np.newaxis] scaled_height = scaled_height[..., np.newaxis] scaled_x = scaled_x[..., np.newaxis] scaled_y = scaled_y[..., np.newaxis] spatial_features = np.concatenate( (scaled_x, scaled_y, scaled_x + scaled_width, scaled_y + scaled_height, scaled_width, scaled_height), axis=1) if image_id in imageid2annotation: for anno in imageid2annotation[image_id]: question = vocab_encoder.encode(anno["question"]) answer = [ label_encoder.encode(ans) for ans in anno["answer"] ] answer = answer if answer else [ 0 ] # 0 indicates padding yield { "image/feature": np.frombuffer(base64.decodestring( item["features"]), dtype=np.float32).tolist(), "image/spatial_feature": spatial_features.flatten().tolist(), "image/height": [image_h], "image/width": [image_w], "image/bboxes": bboxes.flatten().tolist(), "image/image_id": [image_id], "image/question_id": [anno["question_id"]], "image/question": question, "image/answer": answer, } del imageid2annotation[image_id] # assert all annotations are included assert not imageid2annotation