Python TFRecordWriter Beispiele, tensorflow.python.lib.io.tf_record.TFRecordWriter Python Beispiele

Beispiel #1

0

Datei anzeigen

    def testReadTruncatedFile_preservesReadOffset(self):
        """Verify that tf_record_iterator throws an exception on bad TFRecords.

    When a truncated record is completed, the iterator should return that new
    record on the next attempt at iteration, preserving the read offset. This
    behavior is required by TensorBoard.
    """
        # Write out a record and read it back it to get the raw bytes.
        fn = os.path.join(self.get_temp_dir(), "temp_file")
        with tf_record.TFRecordWriter(fn) as writer:
            writer.write(b"truncated")
        with open(fn, "rb") as f:
            record_bytes = f.read()
        # Start the file with a good record.
        fn_truncated = os.path.join(self.get_temp_dir(), "truncated_file")
        with tf_record.TFRecordWriter(fn_truncated) as writer:
            writer.write(b"good")
        with open(fn_truncated, "ab", buffering=0) as f:
            # Cause truncation by omitting the last byte from the record.
            f.write(record_bytes[:-1])
            iterator = tf_record.tf_record_iterator(fn_truncated)
            # Good record appears first.
            self.assertEqual(b"good", next(iterator))
            # Truncated record repeatedly causes DataLossError upon iteration.
            with self.assertRaises(errors_impl.DataLossError):
                next(iterator)
            with self.assertRaises(errors_impl.DataLossError):
                next(iterator)
            # Retrying after completing the record successfully returns the rest of
            # the file contents, preserving the prior read offset.
            f.write(record_bytes[-1:])
            self.assertEqual(b"truncated", next(iterator))
            with self.assertRaises(StopIteration):
                next(iterator)

Beispiel #2

0

Datei anzeigen

    def testInputFn(self):
        with tempfile.NamedTemporaryFile() as records_file:
            with tf_record.TFRecordWriter(records_file.name) as records_writer:
                example = tf.train.Example()
                height = 5
                width = 3
                example.features.feature['height'].int64_list.value.append(
                    height)
                example.features.feature['width'].int64_list.value.append(
                    width)
                example.features.feature['patch'].float_list.value.extend(
                    range(height * width))
                label = 1
                example.features.feature['label'].int64_list.value.append(
                    label)
                for _ in range(3):
                    records_writer.write(example.SerializeToString())

            flags.FLAGS.input_patches = records_file.name
            batch_tensors = glyph_patches.input_fn()

            with self.test_session() as sess:
                batch = sess.run(batch_tensors)

                self.assertAllEqual(
                    batch[0]['patch'],
                    np.arange(height * width).reshape(
                        (1, height, width)).repeat(3, axis=0))
                self.assertAllEqual(batch[1], [label, label, label])

Beispiel #3

0

Datei anzeigen

    def testReadGrowingFile_preservesReadOffset(self):
        """Verify that tf_record_iterator preserves read offset even after EOF.

    When a file is iterated to EOF, the iterator should raise StopIteration but
    not actually close the reader. Then if later new data is appended, the
    iterator should start returning that new data on the next call to next(),
    preserving the read offset. This behavior is required by TensorBoard.
    """
        # Start the file with a good record.
        fn = os.path.join(self.get_temp_dir(), "file.tfrecord")
        with tf_record.TFRecordWriter(fn) as writer:
            writer.write(b"one")
            writer.write(b"two")
            writer.flush()
            iterator = tf_record.tf_record_iterator(fn)
            self.assertEqual(b"one", next(iterator))
            self.assertEqual(b"two", next(iterator))
            # Iterating at EOF results in StopIteration repeatedly.
            with self.assertRaises(StopIteration):
                next(iterator)
            with self.assertRaises(StopIteration):
                next(iterator)
            # Retrying after adding a new record successfully returns the new record,
            # preserving the prior read offset.
            writer.write(b"three")
            writer.flush()
            self.assertEqual(b"three", next(iterator))
            with self.assertRaises(StopIteration):
                next(iterator)

Beispiel #4

0

Datei anzeigen

 def setUp(self, compression_type=TFRecordCompressionType.NONE):
     super(TFRecordWriterCloseAndFlushTests, self).setUp()
     self._fn = os.path.join(self.get_temp_dir(),
                             "tf_record_writer_test.txt")
     self._options = tf_record.TFRecordOptions(compression_type)
     self._writer = tf_record.TFRecordWriter(self._fn, self._options)
     self._num_records = 20

Beispiel #5

0

Datei anzeigen

Datei: test_utils.py Projekt: yonghankim/tf-slim

def create_tfrecord_files(output_dir, num_files=3, num_records_per_file=10):
    """Creates TFRecords files.

  The method must be called within an active session.

  Args:
    output_dir: The directory where the files are stored.
    num_files: The number of files to create.
    num_records_per_file: The number of records per file.

  Returns:
    A list of the paths to the TFRecord files.
  """
    tfrecord_paths = []
    for i in range(num_files):
        path = os.path.join(output_dir,
                            'flowers.tfrecord-%d-of-%s' % (i, num_files))
        tfrecord_paths.append(path)

        writer = tf_record.TFRecordWriter(path)
        for _ in range(num_records_per_file):
            _, example = generate_image(image_shape=(10, 10, 3))
            writer.write(example)
        writer.close()

    return tfrecord_paths

Beispiel #6

0

Datei anzeigen

def save_rows_to_tf_record_file(rows,
                                make_sequence_example_fn,
                                sessions_df_length,
                                export_filename,
                                content_article_embeddings=None,
                                num_of_articles_in_sub_group=None):
    tf_record_options = tf_record.TFRecordOptions(
        tf_record.TFRecordCompressionType.GZIP)

    tf_writer = tf_record.TFRecordWriter(export_filename,
                                         options=tf_record_options)
    try:
        counter = 1
        for row in rows:
            start = time.time()
            print(f"{counter}/{sessions_df_length}")

            seq_example = make_sequence_example_fn(
                row, num_of_articles_in_sub_group, content_article_embeddings)

            end = time.time()
            print(end - start)
            counter += 1
            tf_writer.write(seq_example.SerializeToString())
    finally:
        tf_writer.close()
        sys.stdout.flush()

Beispiel #7

0

Datei anzeigen

Datei: reader_ops_test.py Projekt: ztsy5200/tensorflow

 def _WriteRecordsToFile(self, records, name="tf_record"):
     fn = os.path.join(self.get_temp_dir(), name)
     writer = tf_record.TFRecordWriter(fn, options=None)
     for r in records:
         writer.write(r)
     writer.close()
     del writer
     return fn

Beispiel #8

0

Datei anzeigen

Datei: record_input_test.py Projekt: dkumar95120/deep_learning

  def generateTestData(self, prefix, n, m):
    for i in range(n):
      f = os.path.join(self.get_temp_dir(), prefix + "." + str(i))
      w = tf_record.TFRecordWriter(f)

      for j in range(m):
        w.write("{0:0{width}}".format(i * m + j, width=10).encode("utf-8"))

    w.close()

Beispiel #9

0

Datei anzeigen

Datei: reader_ops_test.py Projekt: ztsy5200/tensorflow

 def _CreateFiles(self):
     filenames = []
     for i in range(self._num_files):
         fn = os.path.join(self.get_temp_dir(), "tf_record.%d.txt" % i)
         filenames.append(fn)
         writer = tf_record.TFRecordWriter(fn)
         for j in range(self._num_records):
             writer.write(self._Record(i, j))
     return filenames

Beispiel #10

0

Datei anzeigen

Datei: tensorflow_dataframe_test.py Projekt: geronald10/SikemasTC

 def _make_test_tfrecord(self):
   f = tempfile.NamedTemporaryFile(dir=self.get_temp_dir(), delete=False)
   w = tf_record.TFRecordWriter(f.name)
   for i in range(100):
     ex = example_pb2.Example()
     ex.features.feature["var_len_int"].int64_list.value.extend(range((i % 3)))
     ex.features.feature["fixed_len_float"].float_list.value.extend(
         [float(i), 2 * float(i)])
     w.write(ex.SerializeToString())
   return f.name

Beispiel #11

0

Datei anzeigen

  def generateTestData(self, prefix, n, m,
      compression_type=tf_record.TFRecordCompressionType.NONE):
    options = tf_record.TFRecordOptions(compression_type)
    for i in range(n):
      f = os.path.join(self.get_temp_dir(), prefix + "." + str(i))
      w = tf_record.TFRecordWriter(f, options=options)

      for j in range(m):
        w.write("{0:0{width}}".format(i * m + j, width=10).encode("utf-8"))

    w.close()

Beispiel #12

0

Datei anzeigen

Datei: input_pipeline_test.py Projekt: EricNeid/go-tensorflow-image-recognition

def _make_tfexample_series(num_features, num_samples, test_tmpdir):
  _, data_file = tempfile.mkstemp(dir=test_tmpdir)
  with tf_record.TFRecordWriter(data_file) as writer:
    for i in range(num_samples):
      example = example_pb2.Example()
      times = example.features.feature[TrainEvalFeatures.TIMES]
      times.int64_list.value.append(i)
      values = example.features.feature[TrainEvalFeatures.VALUES]
      values.float_list.value.extend(
          [float(i) * 2. + feature_number
           for feature_number in range(num_features)])
      writer.write(example.SerializeToString())
  return data_file

Beispiel #13

0

Datei anzeigen

Datei: reader_ops_test.py Projekt: ztsy5200/tensorflow

 def _WriteCompressedRecordsToFile(
         self,
         records,
         name="tfrecord.z",
         compression_type=tf_record.TFRecordCompressionType.ZLIB):
     fn = os.path.join(self.get_temp_dir(), name)
     options = tf_record.TFRecordOptions(compression_type=compression_type)
     writer = tf_record.TFRecordWriter(fn, options=options)
     for r in records:
         writer.write(r)
     writer.close()
     del writer
     return fn

Beispiel #14

0

Datei anzeigen

 def testKmeans(self):
     num_features = FLAGS.patch_height * FLAGS.patch_width
     dummy_data = np.random.random((500, num_features))
     with tempfile.NamedTemporaryFile(mode='r') as patches_file:
         with tf_record.TFRecordWriter(patches_file.name) as patches_writer:
             for patch in dummy_data:
                 example = example_pb2.Example()
                 example.features.feature[
                     'features'].float_list.value.extend(patch)
                 patches_writer.write(example.SerializeToString())
         clusters = staffline_patches_kmeans_pipeline.train_kmeans(
             patches_file.name, NUM_CLUSTERS, BATCH_SIZE, TRAIN_STEPS)
         self.assertEqual(clusters.shape, (NUM_CLUSTERS, num_features))

Beispiel #15

0

Datei anzeigen

 def testBadFile(self):
     """Verify that tf_record_iterator throws an exception on bad TFRecords."""
     fn = os.path.join(self.get_temp_dir(), "bad_file")
     with tf_record.TFRecordWriter(fn) as writer:
         writer.write(b"123")
     fn_truncated = os.path.join(self.get_temp_dir(), "bad_file_truncated")
     with open(fn, "rb") as f:
         with open(fn_truncated, "wb") as f2:
             # DataLossError requires that we've written the header, so this must
             # be at least 12 bytes.
             f2.write(f.read(14))
     with self.assertRaises(errors_impl.DataLossError):
         for _ in tf_record.tf_record_iterator(fn_truncated):
             pass

Beispiel #16

0

Datei anzeigen

Datei: data_test.py Projekt: huangjin1995/algolia-assignment

    def test_read_batched_sequence_example_dataset(self, sloppy_ordering):
        # Save protos in a sstable file in a temp folder.
        serialized_sequence_examples = [
            SEQ_EXAMPLE_PROTO_1.SerializeToString(),
            SEQ_EXAMPLE_PROTO_2.SerializeToString()
        ] * 100
        data_dir = test.get_temp_dir()
        data_file = os.path.join(data_dir, "test_sequence_example.tfrecord")
        if file_io.file_exists(data_file):
            file_io.delete_file(data_file)

        with tf_record.TFRecordWriter(data_file) as writer:
            for s in serialized_sequence_examples:
                writer.write(s)

        batched_dataset = data_lib.read_batched_sequence_example_dataset(
            file_pattern=data_file,
            batch_size=2,
            list_size=2,
            context_feature_spec=CONTEXT_FEATURE_SPEC,
            example_feature_spec=EXAMPLE_FEATURE_SPEC,
            reader=readers.TFRecordDataset,
            shuffle=False,
            sloppy_ordering=sloppy_ordering)

        features = batched_dataset.make_one_shot_iterator().get_next()
        self.assertAllEqual(sorted(features),
                            ["query_length", "unigrams", "utility"])
        # Check static shapes for dense tensors.
        self.assertAllEqual([2, 1],
                            features["query_length"].get_shape().as_list())
        self.assertAllEqual([2, 2, 1],
                            features["utility"].get_shape().as_list())

        with session.Session() as sess:
            sess.run(variables.local_variables_initializer())
            queue_runner.start_queue_runners()
            feature_map = sess.run(features)
            # Test dense_shape, indices and values for a SparseTensor.
            self.assertAllEqual(feature_map["unigrams"].dense_shape, [2, 2, 3])
            self.assertAllEqual(
                feature_map["unigrams"].indices,
                [[0, 0, 0], [0, 1, 0], [0, 1, 1], [0, 1, 2], [1, 0, 0]])
            self.assertAllEqual(
                feature_map["unigrams"].values,
                [b"tensorflow", b"learning", b"to", b"rank", b"gbdt"])
            # Check values directly for dense tensors.
            self.assertAllEqual(feature_map["query_length"], [[3], [2]])
            self.assertAllEqual(feature_map["utility"],
                                [[[0.], [1.0]], [[0.], [0.]]])

Beispiel #17

0

Datei anzeigen

Datei: tf_records_management.py Projekt: 13520505/bigdataproj

def save_rows_to_tf_record_file(df_rows, make_sequence_example_fn,
                                export_filename):
    tf_record_options = tf_record.TFRecordOptions(
        tf_record.TFRecordCompressionType.GZIP)

    tf_writer = tf_record.TFRecordWriter(export_filename,
                                         options=tf_record_options)
    try:
        for index, row in df_rows.iterrows():
            seq_example = make_sequence_example_fn(row)
            tf_writer.write(seq_example.SerializeToString())
    finally:
        tf_writer.close()
        sys.stdout.flush()

Beispiel #18

0

Datei anzeigen

Datei: reader_ops_test.py Projekt: ztsy5200/tensorflow

    def _CreateFiles(self):
        filenames = []
        for i in range(self._num_files):
            fn = os.path.join(self.get_temp_dir(), "tf_record.%d.txt" % i)
            filenames.append(fn)
            options = tf_record.TFRecordOptions(
                compression_type=TFRecordCompressionType.ZLIB)
            writer = tf_record.TFRecordWriter(fn, options=options)
            for j in range(self._num_records):
                writer.write(self._Record(i, j))
            writer.close()
            del writer

        return filenames

Beispiel #19

0

Datei anzeigen

    def do_POST(self):
        post_vars = cgi.parse_qs(
            self.rfile.read(int(self.headers.getheader('content-length'))))
        labels = [
            post_vars['cluster%d' % i][0]
            for i in moves.xrange(self.clusters.shape[0])
        ]
        examples = create_examples(self.clusters, labels)

        with tf_record.TFRecordWriter(self.output_path) as writer:
            for example in examples:
                writer.write(example.SerializeToString())
        self.send_response(http_client.OK)
        self.end_headers()
        self.wfile.write('Success')  # printed in the labeler alert

Beispiel #20

0

Datei anzeigen

    def testAsFunctionFromReader(self):
        with ops.device("CPU"):
            file_path = os.path.join(
                self.get_temp_dir(),
                "{}.tfrecord.gz".format("tf_record_asset"))
            with tf_record.TFRecordWriter(file_path, "GZIP") as f:
                for v in ["a", "aa", "aaa"]:
                    f.write(str(v))
            original_dataset = readers.TFRecordDataset([file_path],
                                                       compression_type="GZIP")
            fn = original_dataset._trace_variant_creation()
            variant = fn()

            revived_dataset = dataset_ops._VariantDataset(
                variant, original_dataset.element_spec)
            self.assertDatasetProduces(revived_dataset, ["a", "aa", "aaa"])

Beispiel #21

0

Datei anzeigen

Datei: staffline_patches_kmeans_pipeline.py Projekt: tensorflow/moonlight

def main(_):
    tf.logging.info('Building the pipeline...')
    records_dir = tempfile.mkdtemp(prefix='staffline_kmeans')
    try:
        patch_file_prefix = os.path.join(records_dir, 'patches')
        with pipeline_flags.create_pipeline() as pipeline:
            filenames = file_io.get_matching_files(FLAGS.music_pattern)
            assert filenames, 'Must have matched some filenames'
            if 0 < FLAGS.num_pages < len(filenames):
                filenames = random.sample(filenames, FLAGS.num_pages)
            filenames = pipeline | beam.transforms.Create(filenames)
            patches = filenames | beam.ParDo(
                staffline_patches_dofn.StafflinePatchesDoFn(
                    patch_height=FLAGS.patch_height,
                    patch_width=FLAGS.patch_width,
                    num_stafflines=FLAGS.num_stafflines,
                    timeout_ms=FLAGS.timeout_ms,
                    max_patches_per_page=FLAGS.max_patches_per_page))
            if FLAGS.num_outputs:
                patches |= combiners.Sample.FixedSizeGlobally(
                    FLAGS.num_outputs)
            patches |= beam.io.WriteToTFRecord(
                patch_file_prefix, beam.coders.ProtoCoder(tf.train.Example))
            tf.logging.info('Running the pipeline...')
        tf.logging.info('Running k-means...')
        patch_files = file_io.get_matching_files(patch_file_prefix + '*')
        clusters = train_kmeans(patch_files, FLAGS.kmeans_num_clusters,
                                FLAGS.kmeans_batch_size,
                                FLAGS.kmeans_num_steps)
        tf.logging.info('Writing the centroids...')
        with tf_record.TFRecordWriter(FLAGS.output_path) as writer:
            for cluster in clusters:
                example = tf.train.Example()
                example.features.feature['features'].float_list.value.extend(
                    cluster)
                example.features.feature['height'].int64_list.value.append(
                    FLAGS.patch_height)
                example.features.feature['width'].int64_list.value.append(
                    FLAGS.patch_width)
                writer.write(example.SerializeToString())
        tf.logging.info('Done!')
    finally:
        shutil.rmtree(records_dir)

Beispiel #22

0

Datei anzeigen

Datei: reader_ops_test.py Projekt: zzhangncsu/tensorflow

 def _WriteRecordsToFile(self, records, name="tfrecord", options=None):
     fn = os.path.join(self.get_temp_dir(), name)
     with tf_record.TFRecordWriter(fn, options=options) as writer:
         for r in records:
             writer.write(r)
     return fn

Beispiel #23

0

Datei anzeigen

 def write_records_to_file(filename, records):
     writer = tf_record.TFRecordWriter(filename)
     for record in records:
         writer.write(record)
     writer.close()