def run(pipeline_args, known_args): """ Run the pipeline. Invoked by the Beam runner. """ import apache_beam as beam from apache_beam.io.gcp.internal.clients import bigquery as beam_bigquery from apache_beam.options.pipeline_options import PipelineOptions from geobeam.io import GeotiffSource from geobeam.fn import format_record pipeline_options = PipelineOptions([ '--experiments', 'use_beam_bq_sink' ] + pipeline_args) with beam.Pipeline(options=pipeline_options) as p: (p | beam.io.Read(GeotiffSource(known_args.gcs_url, band_number=known_args.band_number, centroid_only=known_args.centroid_only, merge_blocks=known_args.merge_blocks)) | 'ElevToCentimeters' >> beam.Map(elev_to_centimeters) | 'FormatRecords' >> beam.Map(format_record, known_args.band_column, 'int') | 'WriteToBigQuery' >> beam.io.WriteToBigQuery( beam_bigquery.TableReference( datasetId=known_args.dataset, tableId=known_args.table), schema=known_args.schema, method=beam.io.WriteToBigQuery.Method.FILE_LOADS, write_disposition=beam.io.BigQueryDisposition.WRITE_TRUNCATE, create_disposition=beam.io.BigQueryDisposition.CREATE_IF_NEEDED))
def run(pipeline_args, known_args): """ Run the pipeline. Invoked by the Beam runner. """ import apache_beam as beam from apache_beam.io.gcp.internal.clients import bigquery as beam_bigquery from apache_beam.options.pipeline_options import PipelineOptions import geobeam.fn from geobeam.io import GeotiffSource pipeline_options = PipelineOptions(['--experiments', 'use_beam_bq_sink'] + pipeline_args) with beam.Pipeline(options=pipeline_options) as p: (p | beam.io.Read( GeotiffSource(known_args.gcs_url, band_number=known_args.band_number, merge_blocks=known_args.merge_blocks)) | 'MakeValid' >> beam.Map(geobeam.fn.make_valid) | 'FilterInvalid' >> beam.Filter(geobeam.fn.filter_invalid) | 'FormatRecords' >> beam.Map(geobeam.fn.format_record, known_args.band_column, known_args.band_type) | 'WriteToBigQuery' >> beam.io.WriteToBigQuery( beam_bigquery.TableReference(datasetId=known_args.dataset, tableId=known_args.table), method=beam.io.WriteToBigQuery.Method.FILE_LOADS, write_disposition=beam.io.BigQueryDisposition.WRITE_TRUNCATE, create_disposition=beam.io.BigQueryDisposition.CREATE_IF_NEEDED))