Example #1
0
        def _inner_iter(self, fpath):
            with make_tf_record_iter(fpath) as record_iter:
                for record in record_iter:
                    lite_example_ids = dj_pb.LiteExampleIds()
                    lite_example_ids.ParseFromString(record)
                    tf_example = tf.train.Example(
                        features=lite_example_ids.features)
                    rows = convert_tf_example_to_dict(tf_example)

                    example_id_num = len(rows['example_id'])
                    index = 0
                    while index < example_id_num:
                        row = dict()
                        for fn in SYNC_ALLOWED_OPTIONAL_FIELDS:
                            if fn not in rows:
                                continue
                            value_list = rows[fn]
                            if len(value_list) > 0:
                                row[fn] = value_list[index]
                        example_id_item = ExampleIdVisitor.ExampleIdItem(
                                index + lite_example_ids.begin_index,
                                row
                            )
                        yield example_id_item
                        index += 1
Example #2
0
 def __init__(self, record_str):
     super().__init__()
     self._record_str = record_str
     self._parse_example_error = False
     example = self._parse_example()
     dic = common.convert_tf_example_to_dict(example)
     self._features.update(
         {key: dic[key]
          for key in dic if key in self._allowed_fields})
     self._csv_record = None
     self._gc_example(example)
Example #3
0
 def csv_record(self):
     if self._csv_record is None:
         self._csv_record = {}
         self._parse_example(False)
         if not self._parse_example_error:
             try:
                 self._csv_record = \
                     common.convert_tf_example_to_dict(self._example)
             except Exception as e:  # pylint: disable=broad-except
                 logging.error("Failed convert tf example to csv record, "\
                               "reason %s", e)
     return self._csv_record
 def check_tfrecord(self, raw_data):
     if random.random() < self._sample_ratio:
         try:
             example = tf.train.Example()
             example.ParseFromString(raw_data)
             example_dict = \
                 convert_tf_example_to_dict(example)
             if not self._check(example_dict):
                 return False
         except Exception as e:  # pylint: disable=broad-except
             logging.error(
                 "Failed parse tf.Example from record %s, reason %s",
                 raw_data, e)
             return False
     return True
Example #5
0
 def __init__(self, record_str, cache_type=None, index=None):
     super().__init__()
     self._cache_type = cache_type
     self._index = index
     if self._cache_type:
         assert self._index is not None,\
                 "store space is disk, index cann't be None"
     self._parse_example_error = False
     example = self._parse_example(record_str)
     dic = common.convert_tf_example_to_dict(example)
     self._features.update({
         key: dic[key]
         for key in dic if key in common.ALLOWED_FIELDS.keys()
     })
     self._set_tf_record(record_str)
     self._csv_record = None
     self._gc_example(example)
Example #6
0
 def __init__(self, record_str, cache_type=None, index=None):
     super().__init__()
     self._cache_type = cache_type
     self._index = index
     if self._cache_type:
         assert self._index is not None,\
                 "store space is disk, index cann't be None"
     self._parse_example_error = False
     example = self._parse_example(record_str)
     dic = common.convert_tf_example_to_dict(example)
     # should not be list for data block
     new_dict = {}
     for key, val in dic.items():
         new_dict[key] = val[0] if len(val) == 1 else val
     self._features.update({key: new_dict[key] for key in new_dict
                            if key in common.ALLOWED_FIELDS.keys()})
     self._set_tf_record(record_str)
     self._csv_record = None
     self._gc_example(example)