コード例 #1
0
def run(pipeline_args, known_args):
    """
    Run the pipeline. Invoked by the Beam runner.
    """
    import apache_beam as beam
    from apache_beam.io.gcp.internal.clients import bigquery as beam_bigquery
    from apache_beam.options.pipeline_options import PipelineOptions

    from geobeam.io import GeotiffSource
    from geobeam.fn import format_record

    pipeline_options = PipelineOptions([
        '--experiments', 'use_beam_bq_sink'
    ] + pipeline_args)

    with beam.Pipeline(options=pipeline_options) as p:
        (p
         | beam.io.Read(GeotiffSource(known_args.gcs_url,
             band_number=known_args.band_number,
             centroid_only=known_args.centroid_only,
             merge_blocks=known_args.merge_blocks))
         | 'ElevToCentimeters' >> beam.Map(elev_to_centimeters)
         | 'FormatRecords' >> beam.Map(format_record, known_args.band_column, 'int')
         | 'WriteToBigQuery' >> beam.io.WriteToBigQuery(
             beam_bigquery.TableReference(
                 datasetId=known_args.dataset,
                 tableId=known_args.table),
             schema=known_args.schema,
             method=beam.io.WriteToBigQuery.Method.FILE_LOADS,
             write_disposition=beam.io.BigQueryDisposition.WRITE_TRUNCATE,
             create_disposition=beam.io.BigQueryDisposition.CREATE_IF_NEEDED))
コード例 #2
0
def run(pipeline_args, known_args):
    """
    Run the pipeline. Invoked by the Beam runner.
    """

    import apache_beam as beam
    from apache_beam.io.gcp.internal.clients import bigquery as beam_bigquery
    from apache_beam.options.pipeline_options import PipelineOptions

    import geobeam.fn
    from geobeam.io import GeotiffSource

    pipeline_options = PipelineOptions(['--experiments', 'use_beam_bq_sink'] +
                                       pipeline_args)

    with beam.Pipeline(options=pipeline_options) as p:
        (p
         | beam.io.Read(
             GeotiffSource(known_args.gcs_url,
                           band_number=known_args.band_number,
                           merge_blocks=known_args.merge_blocks))
         | 'MakeValid' >> beam.Map(geobeam.fn.make_valid)
         | 'FilterInvalid' >> beam.Filter(geobeam.fn.filter_invalid)
         | 'FormatRecords' >>
         beam.Map(geobeam.fn.format_record, known_args.band_column,
                  known_args.band_type)
         | 'WriteToBigQuery' >> beam.io.WriteToBigQuery(
             beam_bigquery.TableReference(datasetId=known_args.dataset,
                                          tableId=known_args.table),
             method=beam.io.WriteToBigQuery.Method.FILE_LOADS,
             write_disposition=beam.io.BigQueryDisposition.WRITE_TRUNCATE,
             create_disposition=beam.io.BigQueryDisposition.CREATE_IF_NEEDED))