import bonobo from bonobo import examples from bonobo.examples.files._services import get_services def skip_comments(line): line = line.strip() if not line.startswith('#'): yield line def get_graph(*, _limit=(), _print=()): return bonobo.Graph( bonobo.FileReader('datasets/passwd.txt'), skip_comments, *_limit, lambda s: s.split(':')[0], *_print, bonobo.FileWriter('usernames.txt', fs='fs.output'), ) if __name__ == '__main__': parser = examples.get_argument_parser() with bonobo.parse_args(parser) as options: bonobo.run(get_graph(**examples.get_graph_options(options)), services=get_services())
return category, sms, sms_clean def get_graph(*, _limit=(), _print=()): graph = bonobo.Graph() graph.add_chain( # spam.pkl is within the gzipped tarball bonobo.PickleReader('spam.pkl'), *_limit, cleanse_sms, *_print, ) return graph def get_services(): from ._services import get_services return { **get_services(), 'fs': TarFS(bonobo.get_examples_path('datasets/spam.tgz')) } if __name__ == '__main__': parser = examples.get_argument_parser() with bonobo.parse_args(parser) as options: bonobo.run(get_graph(**examples.get_graph_options(options)), services=get_services())
list(filter(None, map(_getlink, json.loads(row.get('links'))))), 'country': pycountry.countries.get( alpha_2=row.get('country_code', '').upper() ).name, } return result def get_graph(graph=None, *, _limit=(), _print=()): graph = graph or bonobo.Graph() graph.add_chain( OpenDataSoftAPI(dataset=API_DATASET), *_limit, normalize, bonobo.UnpackItems(0), *_print, bonobo.JsonWriter(path='fablabs.json'), ) return graph if __name__ == '__main__': parser = examples.get_argument_parser() with bonobo.parse_args(parser) as options: bonobo.run( get_graph(**examples.get_graph_options(options)), services=get_services() )
graph_factories = { 'coffeeshops': get_coffeeshops_graph, 'fablabs': get_fablabs_graph } if __name__ == '__main__': parser = examples.get_argument_parser() parser.add_argument('--target', '-t', choices=graph_factories.keys(), nargs='+') parser.add_argument('--sync', action='store_true', default=False) with bonobo.parse_args(parser) as options: graph_options = examples.get_graph_options(options) graph_names = list(options['target'] if options['target'] else sorted( graph_factories.keys())) # Create a graph with all requested subgraphs graph = bonobo.Graph() for name in graph_names: graph = graph_factories[name](graph, **graph_options) bonobo.run(graph, services=get_services()) if options['sync']: # TODO: when parallel option for node will be implemented, need to be rewriten to use a graph. import boto3 s3 = boto3.client('s3')
from bonobo.examples.datasets.services import get_services, get_datasets_dir, get_minor_version graph_factories = { 'coffeeshops': get_coffeeshops_graph, 'fablabs': get_fablabs_graph, } if __name__ == '__main__': parser = examples.get_argument_parser() parser.add_argument( '--target', '-t', choices=graph_factories.keys(), nargs='+' ) parser.add_argument('--sync', action='store_true', default=False) with bonobo.parse_args(parser) as options: graph_options = examples.get_graph_options(options) graph_names = list( options['target'] if options['target'] else sorted(graph_factories.keys()) ) # Create a graph with all requested subgraphs graph = bonobo.Graph() for name in graph_names: graph = graph_factories[name](graph, **graph_options) bonobo.run(graph, services=get_services()) if options['sync']: # TODO: when parallel option for node will be implemented, need to be rewriten to use a graph. import boto3
def get_graph(*, _limit=(), _print=()): graph = bonobo.Graph() graph.add_chain( # spam.pkl is within the gzipped tarball bonobo.PickleReader('spam.pkl'), *_limit, cleanse_sms, *_print, ) return graph def get_services(): from ._services import get_services return { **get_services(), 'fs': TarFS(bonobo.get_examples_path('datasets/spam.tgz')) } if __name__ == '__main__': parser = examples.get_argument_parser() with bonobo.parse_args(parser) as options: bonobo.run( get_graph(**examples.get_graph_options(options)), services=get_services() )