def get_bu_graph(**options): graph = bonobo.Graph() graph.add_chain( get_business_units, join_cost_centers, centerstone_BU_SupOrg_Merge_remap, centerstone_BussUnit_remap, ) graph.add_chain( #bonobo.Limit(3), #bonobo.PrettyPrinter(), productLineLevel1_remap, unique_product_line, bonobo.UnpackItems(0), bonobo.PrettyPrinter(), bonobo.CsvWriter('/etl/centerstone/downloads/ProductLineLevel1.txt' + options['suffix'], lineterminator="\n", delimiter="\t", fs="brickftp"), _input=centerstone_BussUnit_remap) graph.add_chain( teamLevel3_remap, bonobo.UnpackItems(0), bonobo.CsvWriter('/etl/centerstone/downloads/TeamLevel3.txt' + options['suffix'], lineterminator="\n", delimiter="\t", fs="brickftp"), _input=centerstone_BussUnit_remap) return graph
def get_graph(**options): """ This function builds the graph that needs to be executed. :return: bonobo.Graph """ graph = bonobo.Graph() graph.add_chain(extract_accounts, transform, bonobo.JsonWriter('aws_accounts_ex.json'), valid_aws_account, _name="main") graph.add_chain( bonobo.JsonWriter('aws_accounts.json'), _input="main", ) graph.add_chain( bonobo.UnpackItems(0), bonobo.CsvWriter('aws_accounts.csv'), _input=valid_aws_account, ) graph.add_chain( bonobo.UnpackItems(0), bonobo_sqlalchemy.InsertOrUpdate(table_name='aws_accounts' + options['table_suffix'], discriminant=('account_id', ), engine='db'), _input=valid_aws_account, ) return graph
def get_workday_employee_graph(**options): """ This function builds the graph that needs to be executed. :return: bonobo.Graph """ graph = bonobo.Graph() graph.add_chain( get_workday_users, workday_centerstone_employee_remap, bonobo.UnpackItems(0), bonobo.CsvWriter('/etl/centerstone/downloads/workday-users.csv' + options['suffix'], lineterminator="\n", delimiter="\t", fs="brickftp"), bonobo.CsvWriter('workday-users.csv' + options['suffix'], lineterminator="\n", delimiter="\t", fs="centerstone")) graph.add_chain(split_active_employee, bonobo.UnpackItems(0), HeaderlessCsvWriter( '/etl/centerstone/downloads/Mozilla_Active_Users.txt' + options['suffix'], lineterminator="\n", delimiter="\t", fs="brickftp"), HeaderlessCsvWriter('Mozilla_Active_Users.txt' + options['suffix'], lineterminator="\n", delimiter="\t", fs="centerstone"), _input=workday_centerstone_employee_remap) graph.add_chain(split_termed_employee, bonobo.UnpackItems(0), HeaderlessCsvWriter( '/etl/centerstone/downloads/Mozilla_Termed_Users.txt' + options['suffix'], lineterminator="\n", delimiter="\t", fs="brickftp"), HeaderlessCsvWriter('Mozilla_Termed_Users.txt' + options['suffix'], lineterminator="\n", delimiter="\t", fs="centerstone"), _input=workday_centerstone_employee_remap) return graph
def get_graph(graph=None, *, _limit=(), _print=()): """ Extracts a list of cafes with on euro in Paris, renames the name, address and zipcode fields, reorders the fields and formats to json and csv files. """ graph = graph or bonobo.Graph() producer = ( graph.get_cursor() >> ODSReader(dataset="liste-des-cafes-a-un-euro", netloc="opendata.paris.fr") >> PartialGraph(*_limit) >> bonobo.UnpackItems(0) >> bonobo.Rename( name="nom_du_cafe", address="adresse", zipcode="arrondissement") >> bonobo.Format(city="Paris", country="France") >> bonobo.OrderFields([ "name", "address", "zipcode", "city", "country", "geometry", "geoloc" ]) >> PartialGraph(*_print)) # Comma separated values. graph.get_cursor(producer.output) >> bonobo.CsvWriter( "coffeeshops.csv", fields=["name", "address", "zipcode", "city"], delimiter=",") # Standard JSON graph.get_cursor( producer.output) >> bonobo.JsonWriter(path="coffeeshops.json") # Line-delimited JSON graph.get_cursor( producer.output) >> bonobo.LdjsonWriter(path="coffeeshops.ldjson") return graph
def get_graph(**options): """ This function builds the graph that needs to be executed. :return: bonobo.Graph """ graph = bonobo.Graph() split_dbs = bonobo.noop graph.add_chain(extract_accounts, transform, valid_aws_account, bonobo.UnpackItems(0), split_dbs, _name="main") for engine in list(set(options['engine'])): graph.add_chain(bonobo_sqlalchemy.InsertOrUpdate( table_name=options['table_name'] + options['table_suffix'], discriminant=('linked_account_number', ), engine=engine), _input=split_dbs) return graph
def get_graph(**options): """ This function builds the graph that needs to be executed. :return: bonobo.Graph """ graph = bonobo.Graph() split_dbs = bonobo.noop graph.add_chain( bonobo.CsvReader('/etl/metrics-insights/workday-users.csv', fs='brickftp'), employee_active, find_badge_id, bonobo.UnpackItems(0), split_dbs) for engine in list(set(options['engine'])): graph.add_chain(bonobo_sqlalchemy.InsertOrUpdate( table_name=options['table_name'] + options['table_suffix'], discriminant=('badgeid', ), buffer_size=10, engine=engine), _input=split_dbs) return graph
def get_costcenter_graph(**options): """ This function builds the graph that needs to be executed. :return: bonobo.Graph """ graph = bonobo.Graph() graph.add_chain( get_cost_centers, cache_cost_centers, centerstone_CostCenter_remap, #bonobo.PrettyPrinter(), bonobo.UnpackItems(0), # Can't skip the header, but must bonobo.CsvWriter( '/etl/centerstone/downloads/CostCenterLevel2.txt' + options['suffix'], lineterminator="\n", delimiter="\t", fs="brickftp"), bonobo.CsvWriter( 'CostCenterLevel2.txt' + options['suffix'], lineterminator="\n", delimiter="\t", fs="centerstone"), bonobo.count, _name="main") return graph
def get_sched_graph(**options): """ This function builds the graph that needs to be executed. :return: bonobo.Graph """ graph = bonobo.Graph( get_sched, bonobo.UnpackItems(0), modified_events, sync_event, bonobo.UnpackItems(0), bonobo.PrettyPrinter(), bonobo.count, ) return graph
def get_graph(graph=None, *, _limit=(), _print=()): graph = graph or bonobo.Graph() graph.add_chain( OpenDataSoftAPI(dataset=API_DATASET), *_limit, normalize, bonobo.UnpackItems(0), *_print, bonobo.JsonWriter(path='fablabs.json'), ) return graph
def get_sched_graph(**options): """ This function builds the graph that needs to be executed. :return: bonobo.Graph """ graph = bonobo.Graph( get_sched, bonobo.UnpackItems(0), cache_sched, bonobo.count, ) return graph
def get_graph(graph=None, *, _limit=(), _print=()): """ Extracts a list of cafes with on euro in Paris, renames the name, address and zipcode fields, reorders the fields and formats to json and csv files. """ graph = graph or bonobo.Graph() producer = graph.add_chain( ODSReader(dataset='liste-des-cafes-a-un-euro', netloc='opendata.paris.fr'), *_limit, bonobo.UnpackItems(0), bonobo.Rename(name='nom_du_cafe', address='adresse', zipcode='arrondissement'), bonobo.Format(city='Paris', country='France'), bonobo.OrderFields([ 'name', 'address', 'zipcode', 'city', 'country', 'geometry', 'geoloc' ]), *_print, ) # Comma separated values. graph.add_chain( bonobo.CsvWriter('coffeeshops.csv', fields=['name', 'address', 'zipcode', 'city'], delimiter=','), _input=producer.output, ) # Standard JSON graph.add_chain( bonobo.JsonWriter(path='coffeeshops.json'), _input=producer.output, ) # Line-delimited JSON graph.add_chain( bonobo.LdjsonWriter(path='coffeeshops.ldjson'), _input=producer.output, ) return graph
def get_graph(job, graph=None, *, _limit=(), _print=()): """Builds the execution graph.""" graph = graph or bonobo.Graph() graph.add_chain( bonobo.CsvReader(job.input_file, fs=FS_IN_SERVICE_ID, fields=[ 'integration_id', 'site_name', 'address', 'borough', 'status' ], skip=1), *_limit, search, bonobo.UnpackItems(0), bonobo.CsvWriter(job.output_file, fs=FS_OUT_SERVICE_ID), *_print, ) return graph
def get_graph(**options): """ This function builds the graph that needs to be executed. :return: bonobo.Graph """ graph = bonobo.Graph() graph.add_chain( get_cards, wishlist_map, bonobo.UnpackItems(0), bonobo.CsvWriter('Deckbox-wishlist.csv'), _name='main', ) return graph
def get_graph(**options): """ This function builds the graph that needs to be executed. :return: bonobo.Graph """ graph = bonobo.Graph() split_dbs = bonobo.noop graph.add_chain( GetOrderXML(prefix="/etl/ivm", glob=[ 'Mozilla_Corporation{timestamp:%Y_%m_%d}*.xml'.format( timestamp=options['now']) ]), ParseDates(['Transactionlog_Tranenddatetime']), truncate_description, bonobo.UnpackItems(0), bonobo.Rename(transaction_date='Transactionlog_Tranenddatetime', item_number='Transactionlog_Itemnumber', transaction_id='Transactionlog_Tlid', item_description='Transactionlog_Itemdesc'), bonobo.Rename( user_id='Transactionlog_User', quantity='Transactionlog_Qty', transaction_code='Transactionlog_Transcode', description='Vendingmachines_Descr', ), split_dbs, _name="main") #insert into ivm (description, transaction_id, item_number, item_description, user_id, quantity, transaction_date, transaction_code) values for engine in list(set(options['engine'])): graph.add_chain(bonobo_sqlalchemy.InsertOrUpdate( table_name=options['table_name'] + options['table_suffix'], discriminant=('transaction_id', ), engine=engine), _input=split_dbs) return graph
def get_graph(**options): """ This function builds the graph that needs to be executed. :return: bonobo.Graph """ now = options['now'] # Null out time portion, go back 2 days in the past now += relativedelta(days=-2, hour=0, minute=0, second=0, microsecond=0) print("# Processing for %s" % now.date()) graph = bonobo.Graph() STMT = """ select a.badgeid AS badgeid, b.user_id AS user_id, a.employee_id AS employee_id, a.email AS email, b.item_description AS item_description, b.item_number AS item_number , b.transaction_date AS transaction_date, b.transaction_id AS transaction_id, b.description AS description, '' AS drawer_id, b.quantity AS quantity from ivm b , (select badgeid,email, employee_id from f_employee group by badgeid,email ,employee_id) a where b.user_id = a.badgeid and b.transaction_date = '{now}'; """ graph.add_chain( bonobo_sqlalchemy.Select(STMT.format(now=now), engine=options['engine']), trim_employee_id, invalid_badge_id, invalid_email, format_payload, create_ticket, bonobo.UnpackItems(0), ) return graph
def get_graph(**options): """ This function builds the graph that needs to be executed. :return: bonobo.Graph """ graph = bonobo.Graph() split_dbs = bonobo.noop graph.add_chain(bonobo.CsvReader(options['input_file'], delimiter='|', fields=('Admitted', 'blank1', 'Timestamp', 'blank2', 'Name', 'card_id', 'Location'), fs='brickftp'), timestamp, card_id, map_fields, bonobo.UnpackItems(0), split_dbs, _name="main") for engine in list(set(options['engine'])): graph.add_chain(bonobo_sqlalchemy.InsertOrUpdate( table_name=options['table_name'] + options['table_suffix'], discriminant=( 'activitydate', 'badgeid', 'username', 'location', ), engine=engine), _input=split_dbs) return graph
def get_graph(**options): """ This function builds the graph that needs to be executed. :return: bonobo.Graph """ graph = bonobo.Graph() split = bonobo.noop graph.add_chain( bonobo.CsvWriter('DeckedBuilder.csv'), # bonobo.Limit(10), metadata, # bonobo.UnpackItems(0), split, _input=None, _name='main', ) graph.add_chain( bonobo.CsvReader('main-en.csv'), bonobo.Format(Language='English'), _output='main', ) graph.add_chain( bonobo.CsvReader('main-de.csv'), bonobo.Format(Language='German'), _output='main', ) graph.add_chain( bonobo.CsvReader('main-ru.csv'), bonobo.Format(Language='Russian'), _output='main', ) graph.add_chain( bonobo.CsvReader('main-it.csv'), bonobo.Format(Language='Italian'), _output='main', ) graph.add_chain( bonobo.CsvReader('main-jp.csv'), bonobo.Format(Language='Japanese'), _output='main', ) graph.add_chain( bonobo.CsvReader('main-fr.csv'), bonobo.Format(Language='French'), _output='main', ) graph.add_chain( bonobo.CsvReader('main-kr.csv'), bonobo.Format(Language='Korean'), _output='main', ) graph.add_chain( bonobo.CsvReader('main-cs.csv'), bonobo.Format(Language='Chinese'), _output='main', ) graph.add_chain( bonobo.CsvReader('Deckbox-extras.csv'), bonobo.Format(Language='English'), _output='main', ) if ECHO_MTG: # Reg Qty,Foil Qty,Name,Set,Acquired,Language echomtg = {'Acquired For': '0.004', 'Language': 'en'} graph.add_chain( # echomtg specific fiddling remove_metadata, bonobo.UnpackItems(0), # bonobo.PrettyPrinter(), bonobo.Rename(Name='Card'), bonobo.Format(**echomtg), bonobo.CsvWriter('EchoMTG.csv'), _input=split, ) # MTG Studio if MTG_STUDIO: graph.add_chain( mtg_studio, remove_metadata, bonobo.UnpackItems(0), # bonobo.Format(Edition='{Set}'), bonobo.Rename(Edition='Set'), # bonobo.Rename(Name='Card'), # bonobo.Rename(Qty='Reg Qty'), # bonobo.Rename(Foil='Foil Qty'), # bonobo.PrettyPrinter(), bonobo.CsvWriter('MTG-Studio.csv'), _input=split, ) # graph.add_chain( # tradeable, # bonobo.UnpackItems(0), # #bonobo.PrettyPrinter(), # #bonobo.Limit(3000), # bonobo.CsvWriter("DeckedBuilder-tradelist.csv"), # bonobo.OrderFields([ # 'Card', # 'Set', # 'Foil', # 'Quantity', # ]), # bonobo.CsvWriter("CardKingdom-buylist.csv"), # bonobo.OrderFields([ # 'Quantity', # 'Card', # 'Set', # ]), # bonobo.CsvWriter( # "mtgprice-buylist.csv", # delimiter="\t", # ), # _input=split, # ) # if DECKBOX: csv_out = bonobo.CsvWriter('Deckbox-inventory.csv') graph.add_chain( # # metadata, # #bonobo.UnpackItems(0), deckbox, bonobo.UnpackItems(0), csv_out, _input=split, ) graph.add_chain(bonobo.CsvReader('Deckbox-specials.csv'), _output=csv_out) return graph
def get_graph(**options): """ This function builds the graph that needs to be executed. :return: bonobo.Graph """ graph = bonobo.Graph() graph.add_chain( bonobo.CsvWriter('billing.csv'), bonobo.JsonWriter('billing.json'), invalid_entries, fix_numbers, parse_dates, #bonobo.PrettyPrinter(), filter_summary, #bonobo.PrettyPrinter(), lookup_account_sk, lookup_date_sk, summarize_costs, bonobo.UnpackItems(0), bonobo_sqlalchemy.InsertOrUpdate( table_name='fact_itsm_aws_historical_cost' + options['table_suffix'], discriminant=( 'productname', 'date_sk', 'account_name_sk', ), engine='database'), _name="main", _input=None, ) now = options['now'] # Go to beginning of month now += relativedelta(day=1, hour=0, minute=0, second=0, microsecond=0) when = now for log in range(0, options['months']): when = when + relativedelta(months=-1) tstamp = when.strftime("%Y-%m") print("# %d Processing %s" % (log, tstamp)) if options['limit']: _limit = (bonobo.Limit(options['limit']), ) else: _limit = () graph.add_chain( AwsBillingReader('%s-aws-cost-allocation-%s.csv' % (options['aws_account_id'], tstamp), fs='s3', skip=1), *_limit, _output="main", ) graph.add_chain( bonobo_sqlalchemy.InsertOrUpdate( table_name=options['table'] + options['table_suffix'], discriminant=('invoiceid', 'linkedaccountid', 'payeraccountid', 'recordid'), engine='database'), _input=parse_dates, ) return graph