def run():
    options = PipelineOptions()

    input_ = 'gs://tempgcpbucket1/entries.csv'
    output_ = 'gs://tempgcpbucket1/counts/'
    # options.input = input_
    # options.output = output_

    options = options.view_as(GoogleCloudOptions)
    options.project = 'rk-playground'
    options.job_name = 'entriesjob'
    options.staging_location = 'gs://tempgcpbucket1/binaries'
    options.temp_location = 'gs://tempgcpbucket1/tmp'

    # We use the save_main_session option because one or more DoFn's in this
    # workflow rely on global context (e.g., a module imported at module level).
    options.view_as(SetupOptions).save_main_session = True

    options.view_as(StandardOptions).runner = 'DataflowRunner'

    p = beam.Pipeline(options=options)

    (p  # pylint: disable=expression-not-assigned
        | 'ReadInputText' >> beam.io.ReadFromText(input_)
        | 'UserAggregate' >> UserAggregate()
        | 'FormatUserScoreSums' >> beam.Map(format_user_score_sums)
        | 'WriteUserScoreSums' >> beam.io.WriteToText(output_))

    p.run()
def main():
    # Create options
    options = PipelineOptions()
    options = options.view_as(beam.options.pipeline_options.SetupOptions)
    options.setup_file = "./setup.py"
    options = options.view_as(beam.options.pipeline_options.GoogleCloudOptions)
    options.job_name = "gcs2gdrive"
    options = options.view_as(TemplateOptions)

    p = beam.Pipeline(options=options)

    (p | "Read" >> beam.io.ReadFromText(options.input_csv)
     | "Write" >> beam.ParDo(
         CopyFile(options.gdrive_directory_id, options.service_account_file)))

    p.run()