def testReaders(self): pattern = test_helper.test_src_dir_path( 'tasks/mt/testdata/wmt14_ende_wpm_32k_test.tfrecord') _ = beam_utils.GetReader('tfrecord', pattern, value_coder=beam.coders.ProtoCoder( tf.train.Example)) with self.assertRaises(ValueError): _ = beam_utils.GetReader('unknown', '/tmp/foo', value_coder=beam.coders.ProtoCoder( tf.train.Example))
def main(argv): beam_utils.BeamInit() assert FLAGS.input_file_pattern assert FLAGS.output_filebase # Construct pipeline options from argv. options = beam.options.pipeline_options.PipelineOptions(argv[1:]) reader = beam_utils.GetReader('tfrecord', FLAGS.input_file_pattern, value_coder=beam.coders.ProtoCoder( dataset_pb2.Frame)) writer = beam_utils.GetWriter('tfrecord', file_pattern=FLAGS.output_filebase, value_coder=beam.coders.ProtoCoder( tf.train.Example)) emitter_fn = beam_utils.GetEmitterFn('tfrecord') with beam_utils.GetPipelineRoot(options=options) as root: _ = (root | 'Read' >> reader | 'ConvertToTFExample' >> beam.ParDo( waymo_proto_to_tfe.WaymoOpenDatasetConverter(emitter_fn)) | 'Write' >> writer)
def main(argv): beam_utils.BeamInit() # Construct pipeline options from argv. options = beam.options.pipeline_options.PipelineOptions(argv[1:]) reader = beam_utils.GetReader(FLAGS.record_format, FLAGS.input_file_pattern, value_coder=beam.coders.BytesCoder()) with beam_utils.GetPipelineRoot(options=options) as root: _ = ( root | 'Read' >> reader # Read each record. | 'EmitOne' >> beam.Map(lambda _: 1) # Emit a 1 for each record. | 'Count' >> beam.CombineGlobally(sum) # Sum counts. | 'WriteToText' >> beam.io.WriteToText(FLAGS.output_count_file))