def get_graph(**options): """ This function builds the graph that needs to be executed. :return: bonobo.Graph """ graph = bonobo.Graph() graph.add_chain(extract_accounts, transform, bonobo.JsonWriter('aws_accounts_ex.json'), valid_aws_account, _name="main") graph.add_chain( bonobo.JsonWriter('aws_accounts.json'), _input="main", ) graph.add_chain( bonobo.UnpackItems(0), bonobo.CsvWriter('aws_accounts.csv'), _input=valid_aws_account, ) graph.add_chain( bonobo.UnpackItems(0), bonobo_sqlalchemy.InsertOrUpdate(table_name='aws_accounts' + options['table_suffix'], discriminant=('account_id', ), engine='db'), _input=valid_aws_account, ) return graph
def get_graph(graph=None, *, _limit=(), _print=()): """ Extracts a list of cafes with on euro in Paris, renames the name, address and zipcode fields, reorders the fields and formats to json and csv files. """ graph = graph or bonobo.Graph() producer = ( graph.get_cursor() >> ODSReader(dataset="liste-des-cafes-a-un-euro", netloc="opendata.paris.fr") >> PartialGraph(*_limit) >> bonobo.UnpackItems(0) >> bonobo.Rename( name="nom_du_cafe", address="adresse", zipcode="arrondissement") >> bonobo.Format(city="Paris", country="France") >> bonobo.OrderFields([ "name", "address", "zipcode", "city", "country", "geometry", "geoloc" ]) >> PartialGraph(*_print)) # Comma separated values. graph.get_cursor(producer.output) >> bonobo.CsvWriter( "coffeeshops.csv", fields=["name", "address", "zipcode", "city"], delimiter=",") # Standard JSON graph.get_cursor( producer.output) >> bonobo.JsonWriter(path="coffeeshops.json") # Line-delimited JSON graph.get_cursor( producer.output) >> bonobo.LdjsonWriter(path="coffeeshops.ldjson") return graph
def get_graph(graph=None, *, _limit=(), _print=()): graph = graph or bonobo.Graph() graph.add_chain( OpenDataSoftAPI(dataset=API_DATASET), *_limit, normalize, bonobo.UnpackItems(0), *_print, bonobo.JsonWriter(path='fablabs.json'), ) return graph
def get_graph(*, _limit=None, _print=False): graph = bonobo.Graph() trunk = graph.add_chain(bonobo.JsonReader('datasets/theaters.json'), *((bonobo.Limit(_limit),) if _limit else ())) if _print: graph.add_chain(bonobo.PrettyPrinter(), _input=trunk.output) graph.add_chain(bonobo.JsonWriter('theaters.json', fs='fs.output'), _input=trunk.output) graph.add_chain(bonobo.LdjsonWriter('theaters.ldjson', fs='fs.output'), _input=trunk.output) return graph
def get_graph(*, _limit=None, _print=False): graph = bonobo.Graph() trunk = graph.add_chain(bonobo.JsonReader("theaters.json", fs="fs.static"), *((bonobo.Limit(_limit), ) if _limit else ())) if _print: graph.add_chain(bonobo.PrettyPrinter(), _input=trunk.output) graph.add_chain(bonobo.JsonWriter("theaters.output.json", fs="fs.output"), _input=trunk.output) graph.add_chain(bonobo.LdjsonWriter("theaters.output.ldjson", fs="fs.output"), _input=trunk.output) return graph
def get_graph(graph=None, *, _limit=(), _print=()): """ Extracts a list of cafes with on euro in Paris, renames the name, address and zipcode fields, reorders the fields and formats to json and csv files. """ graph = graph or bonobo.Graph() producer = graph.add_chain( ODSReader(dataset='liste-des-cafes-a-un-euro', netloc='opendata.paris.fr'), *_limit, bonobo.UnpackItems(0), bonobo.Rename(name='nom_du_cafe', address='adresse', zipcode='arrondissement'), bonobo.Format(city='Paris', country='France'), bonobo.OrderFields([ 'name', 'address', 'zipcode', 'city', 'country', 'geometry', 'geoloc' ]), *_print, ) # Comma separated values. graph.add_chain( bonobo.CsvWriter('coffeeshops.csv', fields=['name', 'address', 'zipcode', 'city'], delimiter=','), _input=producer.output, ) # Standard JSON graph.add_chain( bonobo.JsonWriter(path='coffeeshops.json'), _input=producer.output, ) # Line-delimited JSON graph.add_chain( bonobo.LdjsonWriter(path='coffeeshops.ldjson'), _input=producer.output, ) return graph
import bonobo def split_one(line): return line.split(', ', 1) graph = bonobo.Graph( bonobo.FileReader('coffeeshops.txt'), split_one, bonobo.JsonWriter('coffeeshops.json'), ) if __name__ == '__main__': bonobo.run(graph, services={'fs': bonobo.open_examples_fs('datasets')})
def get_graph(**options): """ This function builds the graph that needs to be executed. :return: bonobo.Graph """ graph = bonobo.Graph() graph.add_chain( bonobo.CsvWriter('billing.csv'), bonobo.JsonWriter('billing.json'), invalid_entries, fix_numbers, parse_dates, #bonobo.PrettyPrinter(), filter_summary, #bonobo.PrettyPrinter(), lookup_account_sk, lookup_date_sk, summarize_costs, bonobo.UnpackItems(0), bonobo_sqlalchemy.InsertOrUpdate( table_name='fact_itsm_aws_historical_cost' + options['table_suffix'], discriminant=( 'productname', 'date_sk', 'account_name_sk', ), engine='database'), _name="main", _input=None, ) now = options['now'] # Go to beginning of month now += relativedelta(day=1, hour=0, minute=0, second=0, microsecond=0) when = now for log in range(0, options['months']): when = when + relativedelta(months=-1) tstamp = when.strftime("%Y-%m") print("# %d Processing %s" % (log, tstamp)) if options['limit']: _limit = (bonobo.Limit(options['limit']), ) else: _limit = () graph.add_chain( AwsBillingReader('%s-aws-cost-allocation-%s.csv' % (options['aws_account_id'], tstamp), fs='s3', skip=1), *_limit, _output="main", ) graph.add_chain( bonobo_sqlalchemy.InsertOrUpdate( table_name=options['table'] + options['table_suffix'], discriminant=('invoiceid', 'linkedaccountid', 'payeraccountid', 'recordid'), engine='database'), _input=parse_dates, ) return graph
(row.get('postal_code', None), row.get('city', None)))), row.get('county', None), row.get('country'), ))) print(' - {}address{}: {address}'.format(Fore.BLUE, Style.RESET_ALL, address=', '.join(address))) print(' - {}links{}: {links}'.format(Fore.BLUE, Style.RESET_ALL, links=', '.join(row['links']))) print(' - {}geometry{}: {geometry}'.format(Fore.BLUE, Style.RESET_ALL, **row)) print(' - {}source{}: {source}'.format(Fore.BLUE, Style.RESET_ALL, source='datanova/' + API_DATASET)) graph = bonobo.Graph( OpenDataSoftAPI(dataset=API_DATASET, netloc=API_NETLOC, timezone='Europe/Paris'), normalize, bonobo.Filter(filter=lambda row: row.get('country') == 'France'), bonobo.JsonWriter(path='fablabs.txt', ioformat='arg0'), bonobo.Tee(display), ) if __name__ == '__main__': bonobo.run(graph, services=get_default_services(__file__))
import bonobo def split_one(line): return dict(zip(("name", "address"), line.split(', ', 1))) graph = bonobo.Graph( bonobo.FileReader('coffeeshops.txt'), split_one, bonobo.JsonWriter('coffeeshops.json', fs='fs.output'), ) def get_services(): return { 'fs': bonobo.open_examples_fs('datasets'), 'fs.output': bonobo.open_fs(), } if __name__ == '__main__': bonobo.run(graph, services=get_services())
import bonobo graph = bonobo.Graph( bonobo.CsvReader('Google_facebook_statuses.csv'), bonobo.JsonWriter('output.json'), ) if __name__ == '__main__': bonobo.run(graph)
import bonobo def split_one(line): return line.split(', ', 1) graph = bonobo.Graph( bonobo.FileReader('coffeeshops.txt'), split_one, bonobo.JsonWriter('coffeeshops.json', fs='fs.output', ioformat='arg0'), ) def get_services(): return { 'fs': bonobo.open_examples_fs('datasets'), 'fs.output': bonobo.open_fs(), } if __name__ == '__main__': bonobo.run(graph, services=get_services())
(row.get('postal_code', None), row.get('city', None)))), row.get('county', None), row.get('country'), ))) print(' - {}address{}: {address}'.format(Fore.BLUE, Style.RESET_ALL, address=', '.join(address))) print(' - {}links{}: {links}'.format(Fore.BLUE, Style.RESET_ALL, links=', '.join(row['links']))) print(' - {}geometry{}: {geometry}'.format(Fore.BLUE, Style.RESET_ALL, **row)) print(' - {}source{}: {source}'.format(Fore.BLUE, Style.RESET_ALL, source='datanova/' + API_DATASET)) graph = bonobo.Graph( OpenDataSoftAPI(dataset=API_DATASET, netloc=API_NETLOC, timezone='Europe/Paris'), normalize, filter_france, bonobo.Tee(display), bonobo.JsonWriter(path='fablabs.txt'), ) if __name__ == '__main__': bonobo.run(graph, services=get_default_services(__file__))