list(filter(None, map(_getlink, json.loads(row.get('links'))))), 'country': pycountry.countries.get( alpha_2=row.get('country_code', '').upper() ).name, } return result def get_graph(graph=None, *, _limit=(), _print=()): graph = graph or bonobo.Graph() graph.add_chain( OpenDataSoftAPI(dataset=API_DATASET), *_limit, normalize, bonobo.UnpackItems(0), *_print, bonobo.JsonWriter(path='fablabs.json'), ) return graph if __name__ == '__main__': parser = examples.get_argument_parser() with bonobo.parse_args(parser) as options: bonobo.run( get_graph(**examples.get_graph_options(options)), services=get_services() )
'-t', choices=graph_factories.keys(), nargs='+') parser.add_argument('--sync', action='store_true', default=False) with bonobo.parse_args(parser) as options: graph_options = examples.get_graph_options(options) graph_names = list(options['target'] if options['target'] else sorted( graph_factories.keys())) # Create a graph with all requested subgraphs graph = bonobo.Graph() for name in graph_names: graph = graph_factories[name](graph, **graph_options) bonobo.run(graph, services=get_services()) if options['sync']: # TODO: when parallel option for node will be implemented, need to be rewriten to use a graph. import boto3 s3 = boto3.client('s3') local_dir = get_datasets_dir() for root, dirs, files in os.walk(local_dir): for filename in files: local_path = os.path.join(root, filename) relative_path = os.path.relpath(local_path, local_dir) s3_path = os.path.join(get_minor_version(), relative_path) try:
result = { **row, 'links': list(filter(None, map(_getlink, json.loads(row.get('links'))))), 'country': pycountry.countries.get( alpha_2=row.get('country_code', '').upper()).name, } return result def get_graph(graph=None, *, _limit=(), _print=()): graph = graph or bonobo.Graph() graph.add_chain( OpenDataSoftAPI(dataset=API_DATASET), *_limit, normalize, bonobo.UnpackItems(0), *_print, bonobo.JsonWriter(path='fablabs.json'), ) return graph if __name__ == '__main__': parser = examples.get_argument_parser() with bonobo.parse_args(parser) as options: bonobo.run(get_graph(**examples.get_graph_options(options)), services=get_services())
) parser.add_argument('--sync', action='store_true', default=False) with bonobo.parse_args(parser) as options: graph_options = examples.get_graph_options(options) graph_names = list( options['target'] if options['target'] else sorted(graph_factories.keys()) ) # Create a graph with all requested subgraphs graph = bonobo.Graph() for name in graph_names: graph = graph_factories[name](graph, **graph_options) bonobo.run(graph, services=get_services()) if options['sync']: # TODO: when parallel option for node will be implemented, need to be rewriten to use a graph. import boto3 s3 = boto3.client('s3') local_dir = get_datasets_dir() for root, dirs, files in os.walk(local_dir): for filename in files: local_path = os.path.join(root, filename) relative_path = os.path.relpath(local_path, local_dir) s3_path = os.path.join( get_minor_version(), relative_path )