def test_run_example_with_setup_file(self): pipeline = TestPipeline(is_integration_test=True) coordinate_output = FileSystems.join( pipeline.get_option('output'), 'juliaset-{}'.format(str(uuid.uuid4())), 'coordinates.txt') extra_args = { 'coordinate_output': coordinate_output, 'grid_size': self.GRID_SIZE, 'setup_file': os.path.normpath( os.path.join(os.path.dirname(__file__), '..', 'setup.py')), 'on_success_matcher': all_of(PipelineStateMatcher(PipelineState.DONE)), } args = pipeline.get_full_options_as_args(**extra_args) juliaset.run(args)
def run_example(self, grid_size, image_file_name=None): args = [ '--coordinate_output=%s' % self.test_files['output_coord_file_name'], '--grid_size=%s' % grid_size, ] if image_file_name is not None: args.append('--image_output=%s' % image_file_name) juliaset.run(args)
In Python Dataflow, using the --setup_file option when submitting a job, will trigger creating a source distribution (as if running python setup.py sdist) and then staging the resulting tarball in the staging area. The workers, upon startup, will install the tarball. Below is a complete command line for running the juliaset workflow remotely as an example: python juliaset_main.py \ --job_name juliaset-$USER \ --project YOUR-PROJECT \ --region GCE-REGION \ --runner DataflowRunner \ --setup_file ./setup.py \ --staging_location gs://YOUR-BUCKET/juliaset/staging \ --temp_location gs://YOUR-BUCKET/juliaset/temp \ --coordinate_output gs://YOUR-BUCKET/juliaset/out \ --grid_size 20 """ # pytype: skip-file import logging from apache_beam.examples.complete.juliaset.juliaset import juliaset if __name__ == '__main__': logging.getLogger().setLevel(logging.INFO) juliaset.run()
evolve beyond just one module and you will have to make sure the additional modules are present in the worker. In Python Dataflow, using the --setup_file option when submitting a job, will trigger creating a source distribution (as if running python setup.py sdist) and then staging the resulting tarball in the staging area. The workers, upon startup, will install the tarball. Below is a complete command line for running the juliaset workflow remotely as an example: python juliaset_main.py \ --job_name juliaset-$USER \ --project YOUR-PROJECT \ --runner DataflowRunner \ --setup_file ./setup.py \ --staging_location gs://YOUR-BUCKET/juliaset/staging \ --temp_location gs://YOUR-BUCKET/juliaset/temp \ --coordinate_output gs://YOUR-BUCKET/juliaset/out \ --grid_size 20 \ """ import logging from apache_beam.examples.complete.juliaset.juliaset import juliaset if __name__ == '__main__': logging.getLogger().setLevel(logging.INFO) juliaset.run()