コード例 #1
0
ファイル: beam_utils_test.py プロジェクト: galv/lingvo-copy
    def testReaders(self):
        pattern = test_helper.test_src_dir_path(
            'tasks/mt/testdata/wmt14_ende_wpm_32k_test.tfrecord')
        _ = beam_utils.GetReader('tfrecord',
                                 pattern,
                                 value_coder=beam.coders.ProtoCoder(
                                     tf.train.Example))

        with self.assertRaises(ValueError):
            _ = beam_utils.GetReader('unknown',
                                     '/tmp/foo',
                                     value_coder=beam.coders.ProtoCoder(
                                         tf.train.Example))
コード例 #2
0
def main(argv):
    beam_utils.BeamInit()

    assert FLAGS.input_file_pattern
    assert FLAGS.output_filebase

    # Construct pipeline options from argv.
    options = beam.options.pipeline_options.PipelineOptions(argv[1:])

    reader = beam_utils.GetReader('tfrecord',
                                  FLAGS.input_file_pattern,
                                  value_coder=beam.coders.ProtoCoder(
                                      dataset_pb2.Frame))

    writer = beam_utils.GetWriter('tfrecord',
                                  file_pattern=FLAGS.output_filebase,
                                  value_coder=beam.coders.ProtoCoder(
                                      tf.train.Example))

    emitter_fn = beam_utils.GetEmitterFn('tfrecord')
    with beam_utils.GetPipelineRoot(options=options) as root:
        _ = (root
             | 'Read' >> reader
             | 'ConvertToTFExample' >> beam.ParDo(
                 waymo_proto_to_tfe.WaymoOpenDatasetConverter(emitter_fn))
             | 'Write' >> writer)
コード例 #3
0
ファイル: count_records.py プロジェクト: galv/lingvo-copy
def main(argv):
    beam_utils.BeamInit()

    # Construct pipeline options from argv.
    options = beam.options.pipeline_options.PipelineOptions(argv[1:])

    reader = beam_utils.GetReader(FLAGS.record_format,
                                  FLAGS.input_file_pattern,
                                  value_coder=beam.coders.BytesCoder())

    with beam_utils.GetPipelineRoot(options=options) as root:
        _ = (
            root
            | 'Read' >> reader  # Read each record.
            | 'EmitOne' >> beam.Map(lambda _: 1)  # Emit a 1 for each record.
            | 'Count' >> beam.CombineGlobally(sum)  # Sum counts.
            | 'WriteToText' >> beam.io.WriteToText(FLAGS.output_count_file))