return transform(*args, **kwargs) def main(options): root = '/tmp/rillbeam/multiexternal' if os.path.exists(root): shutil.rmtree(root) os.makedirs(root) pipe = beam.Pipeline(options=options) (pipe | 'Gen' >> beam.Create([ (root, 'f1.ext'), (root, 'f2.ext'), (root, 'f3.ext'), ]) | 'Join' >> EnvTransform(Join) | 'Touch' >> EnvTransform(Touch) | 'Log' >> Log()) # proto, ctx = pipe.to_runner_api(return_context=True) result = pipe.run() result.wait_until_finish() if __name__ == '__main__': from rillbeam.helpers import get_options pipeline_args, _ = get_options(__name__) main(pipeline_args)
Basic graph to showcase that logging happens eagerly. """ import apache_beam as beam from rillbeam.transforms import SleepFn, Log def main(options, runner=None): from rillbeam.helpers import write_pipeline_text, write_pipeline_svg pipe = beam.Pipeline(options=options, runner=runner) (pipe | 'Init' >> beam.Create(range(10)) | 'Sleep' >> beam.ParDo(SleepFn(), duration=1.0) | 'Log' >> Log()) # write_pipeline_svg(pipe, __name__) result = pipe.run() result.wait_until_finish() if __name__ == '__main__': from rillbeam.helpers import get_options options, args = get_options(__name__) runner = None if args.defaults == 'rill': import rill.runner runner = rill.runner.RillRunner() main(options, runner=runner)
| 'GroupByKey' >> beam.GroupByKey() | 'Format' >> beam.Map(format_result) | 'ToBytes' >> beam.Map(lambda x: bytes(x)) | 'PubSubOutflow' >> beam.io.WriteToPubSub(OUTPUT_TOPIC)) print cprint('Starting pipeline...', 'yellow', attrs=['bold']) result = pipe.run() # type: PipelineResult time.sleep(10) while result.state != PipelineState.RUNNING: time.sleep(10) try: pubsub_interface(SUBSCRIPTION_PATH, INPUT_TOPIC) finally: print cprint('Shutting down pipeline...', 'yellow', attrs=['bold']) result.cancel() print if __name__ == '__main__': from rillbeam.helpers import get_options pipeline_args, _ = get_options( __name__, None, 'streaming', ) logging.getLogger().setLevel(logging.INFO) main(pipeline_args)
attrs=['bold']) print cprint('Generating farm jobs:', 'yellow') for k, v in farm_kw: print ' {}={}'.format(k, colored(repr(v), 'white', attrs=['bold'])) print publisher = pubsub_v1.PublisherClient() for payload in rillbeam.data.farm.gen_farm_messages(**dict(farm_kw)): publisher.publish(INPUT_TOPIC, data=bytes(json.dumps(payload)), tags=__name__) try: result.wait_until_finish() except KeyboardInterrupt: print cprint('Shutting down...', 'yellow') result.cancel() if __name__ == '__main__': from rillbeam.helpers import get_options logging.getLogger().setLevel(logging.INFO) pipeline_args, known_args = get_options(__name__, None, 'streaming') main(pipeline_args, known_args)
# | 'KafkaWrite' >> WriteToKafka( # producer_config={ # 'bootstrap.servers': 'localhost:9092', # }, # topic=TOPIC, # # key_serializer='org.apache.kafka.common.serialization.ByteArraySerializer', # # value_serializer='org.apache.kafka.common.serialization.ByteArraySerializer', # expansion_service='localhost:8097', # ) ) if __name__ == '__main__': from rillbeam.helpers import get_parser, get_options parser = get_parser() parser.add_argument('--send', action='store_true') logging.getLogger().setLevel(logging.INFO) pipeline_args, known_args = get_options( __name__, parser, 'streaming', ) if known_args.send: send(pipeline_args) else: main(pipeline_args, known_args)
""" Basic graph to test docker volume mounting. """ import apache_beam as beam DIR_NAME = '/tmp/beam_test' def touch(f): import os with open(f, 'a'): os.utime(f, None) def main(options): with beam.Pipeline(options=options) as pipe: (pipe | 'Init' >> beam.Create(['{}/f1.ext'.format(DIR_NAME)]) | 'Touch' >> beam.Map(touch)) if __name__ == '__main__': from rillbeam.helpers import get_options, REGISTRY_URL pipeline_args, _ = get_options( __name__, environment_config='-v {}:{} {}/beam/python:latest'.format( DIR_NAME, DIR_NAME, REGISTRY_URL), ) main(pipeline_args)