Ejemplo n.º 1
0
def main(argv):
    beam_utils.BeamInit()

    assert FLAGS.input_file_pattern
    assert FLAGS.output_filebase

    # Construct pipeline options from argv.
    options = beam.options.pipeline_options.PipelineOptions(argv[1:])

    reader = beam_utils.GetReader('tfrecord',
                                  FLAGS.input_file_pattern,
                                  value_coder=beam.coders.ProtoCoder(
                                      dataset_pb2.Frame))

    writer = beam_utils.GetWriter('tfrecord',
                                  file_pattern=FLAGS.output_filebase,
                                  value_coder=beam.coders.ProtoCoder(
                                      tf.train.Example))

    emitter_fn = beam_utils.GetEmitterFn('tfrecord')
    with beam_utils.GetPipelineRoot(options=options) as root:
        _ = (root
             | 'Read' >> reader
             | 'ConvertToTFExample' >> beam.ParDo(
                 waymo_proto_to_tfe.WaymoOpenDatasetConverter(emitter_fn))
             | 'Write' >> writer)
Ejemplo n.º 2
0
def main(argv):
    beam_utils.BeamInit()

    # Construct pipeline options from argv.
    options = beam.options.pipeline_options.PipelineOptions(argv[1:])

    reader = beam_utils.GetReader(FLAGS.record_format,
                                  FLAGS.input_file_pattern,
                                  value_coder=beam.coders.BytesCoder())

    with beam_utils.GetPipelineRoot(options=options) as root:
        _ = (
            root
            | 'Read' >> reader  # Read each record.
            | 'EmitOne' >> beam.Map(lambda _: 1)  # Emit a 1 for each record.
            | 'Count' >> beam.CombineGlobally(sum)  # Sum counts.
            | 'WriteToText' >> beam.io.WriteToText(FLAGS.output_count_file))
def main(_):
    beam_utils.BeamInit()

    if not FLAGS.output_file_pattern:
        raise ValueError('Must provide an output_file_pattern')

    reader = beam.io.ReadFromTFRecord(FLAGS.input_file_pattern,
                                      coder=beam.coders.ProtoCoder(
                                          tf.train.Example))

    model_name = FLAGS.model_name
    split = FLAGS.split
    run_preprocessors = FLAGS.run_preprocessors

    with beam_utils.GetPipelineRoot() as root:
        _ = (root
             | 'Read' >> reader
             | 'ToTFExample' >> beam.ParDo(
                 _ProcessShard(model_name, split, run_preprocessors))
             | 'Reshuffle' >> beam.Reshuffle()
             | 'Write' >> beam.io.WriteToTFRecord(FLAGS.output_file_pattern,
                                                  coder=beam.coders.ProtoCoder(
                                                      tf.train.Example)))