def run(): options = PipelineOptions() input_ = 'gs://tempgcpbucket1/entries.csv' output_ = 'gs://tempgcpbucket1/counts/' # options.input = input_ # options.output = output_ options = options.view_as(GoogleCloudOptions) options.project = 'rk-playground' options.job_name = 'entriesjob' options.staging_location = 'gs://tempgcpbucket1/binaries' options.temp_location = 'gs://tempgcpbucket1/tmp' # We use the save_main_session option because one or more DoFn's in this # workflow rely on global context (e.g., a module imported at module level). options.view_as(SetupOptions).save_main_session = True options.view_as(StandardOptions).runner = 'DataflowRunner' p = beam.Pipeline(options=options) (p # pylint: disable=expression-not-assigned | 'ReadInputText' >> beam.io.ReadFromText(input_) | 'UserAggregate' >> UserAggregate() | 'FormatUserScoreSums' >> beam.Map(format_user_score_sums) | 'WriteUserScoreSums' >> beam.io.WriteToText(output_)) p.run()
def test_model_bigqueryio(self): # We cannot test BigQueryIO functionality in unit tests, therefore we limit # ourselves to making sure the pipeline containing BigQuery sources and # sinks can be built. # # To run locally, set `run_locally` to `True`. You will also have to set # `project`, `dataset` and `table` to the BigQuery table the test will write # to. run_locally = False if run_locally: project = 'my-project' dataset = 'samples' # this must already exist table = 'model_bigqueryio' # this will be created if needed options = PipelineOptions().view_as(GoogleCloudOptions) options.project = project with beam.Pipeline(options=options) as p: snippets.model_bigqueryio(p, project, dataset, table) else: p = TestPipeline() snippets.model_bigqueryio(p)
def test_model_bigqueryio(self): # We cannot test BigQueryIO functionality in unit tests, therefore we limit # ourselves to making sure the pipeline containing BigQuery sources and # sinks can be built. # # To run locally, set `run_locally` to `True`. You will also have to set # `project`, `dataset` and `table` to the BigQuery table the test will write # to. run_locally = False if run_locally: project = 'my-project' dataset = 'samples' # this must already exist table = 'model_bigqueryio' # this will be created if needed options = PipelineOptions().view_as(GoogleCloudOptions) options.project = project with beam.Pipeline(options=options) as p: snippets.model_bigqueryio(p, project, dataset, table) else: p = TestPipeline() snippets.model_bigqueryio(p)