def _inner_iter(self, fpath): with make_tf_record_iter(fpath) as record_iter: for record in record_iter: lite_example_ids = dj_pb.LiteExampleIds() lite_example_ids.ParseFromString(record) tf_example = tf.train.Example( features=lite_example_ids.features) rows = convert_tf_example_to_dict(tf_example) example_id_num = len(rows['example_id']) index = 0 while index < example_id_num: row = dict() for fn in SYNC_ALLOWED_OPTIONAL_FIELDS: if fn not in rows: continue value_list = rows[fn] if len(value_list) > 0: row[fn] = value_list[index] example_id_item = ExampleIdVisitor.ExampleIdItem( index + lite_example_ids.begin_index, row ) yield example_id_item index += 1
def __init__(self, record_str): super().__init__() self._record_str = record_str self._parse_example_error = False example = self._parse_example() dic = common.convert_tf_example_to_dict(example) self._features.update( {key: dic[key] for key in dic if key in self._allowed_fields}) self._csv_record = None self._gc_example(example)
def csv_record(self): if self._csv_record is None: self._csv_record = {} self._parse_example(False) if not self._parse_example_error: try: self._csv_record = \ common.convert_tf_example_to_dict(self._example) except Exception as e: # pylint: disable=broad-except logging.error("Failed convert tf example to csv record, "\ "reason %s", e) return self._csv_record
def check_tfrecord(self, raw_data): if random.random() < self._sample_ratio: try: example = tf.train.Example() example.ParseFromString(raw_data) example_dict = \ convert_tf_example_to_dict(example) if not self._check(example_dict): return False except Exception as e: # pylint: disable=broad-except logging.error( "Failed parse tf.Example from record %s, reason %s", raw_data, e) return False return True
def __init__(self, record_str, cache_type=None, index=None): super().__init__() self._cache_type = cache_type self._index = index if self._cache_type: assert self._index is not None,\ "store space is disk, index cann't be None" self._parse_example_error = False example = self._parse_example(record_str) dic = common.convert_tf_example_to_dict(example) self._features.update({ key: dic[key] for key in dic if key in common.ALLOWED_FIELDS.keys() }) self._set_tf_record(record_str) self._csv_record = None self._gc_example(example)
def __init__(self, record_str, cache_type=None, index=None): super().__init__() self._cache_type = cache_type self._index = index if self._cache_type: assert self._index is not None,\ "store space is disk, index cann't be None" self._parse_example_error = False example = self._parse_example(record_str) dic = common.convert_tf_example_to_dict(example) # should not be list for data block new_dict = {} for key, val in dic.items(): new_dict[key] = val[0] if len(val) == 1 else val self._features.update({key: new_dict[key] for key in new_dict if key in common.ALLOWED_FIELDS.keys()}) self._set_tf_record(record_str) self._csv_record = None self._gc_example(example)