def test_bonodoo_reader_fields(self): folder = tempfile.TemporaryDirectory() filename = 'test_file.csv' value_1 = {'id': 2} value_2 = {'id': 3} read = OdooReader( model='res.users', domain=[], fields=['id'], ) with patch('xmlrpc.client.ServerProxy') as mk: mock_server = mk.return_value mock_server.login.return_value = 1 mock_server.execute_kw.return_value = [value_1, value_2] graph = Graph() graph.add_chain(read, CsvWriter(filename, fs='fs.data')) bonobo.run(graph, services={ 'fs.data': bonobo.open_fs(folder.name), 'odoo.server': self.server, }) mk.assert_called() with open(os.path.join(folder.name, filename), 'r') as f: lines = f.readlines() self.assertEqual(len(lines), 3) self.assertEqual(ast.literal_eval(lines[1]), value_1.get('id')) self.assertEqual(ast.literal_eval(lines[2]), value_2.get('id')) folder.cleanup()
def run_etl(input_file_path, config_file, output_cube_path=None, cube_config=None): """ Run ETl Process for passed excel file. :param input_file_path: excel file path :param config_file: config file path example of config:: # in the config file you specify for each table, columns associate with it. Facts: [Price, Quantity] Accounts: ['Source Account', 'Destination Account'] Client: ['Client Activity', 'Client Role'] :param output_cube_path: cube folder path :param cube_config: if you want to call run_etl as function, you can pass dict config directly as param, there an example:: @click.command() @click.pass_context def myETL(ctx): # demo run_etl as function with config as dict config = { 'Facts': ['Amount', 'Count'], 'Geography': ['Continent', 'Country', 'City'], 'Product': ['Company', 'Article', 'Licence'], 'Date': ['Year', 'Quarter', 'Month', 'Day'] } ctx.invoke(run_etl, input_file_path='sales.xlsx', cube_config=config, output_cube_path='cube2') """ parser = bonobo.get_argument_parser() parser.add_argument("-in", "--input_file_path", help="Input file") parser.add_argument("-cf", "--config_file", help="Configuration file path") parser.add_argument("-out", "--output_cube_path", help="Cube export path") with bonobo.parse_args(parser) as options: if cube_config: options["cube_config"] = cube_config elif config_file: with open(config_file) as config_file: options["cube_config"] = yaml.load(config_file) else: raise Exception("Config file is not specified") if input_file_path: options["input_file_path"] = input_file_path else: raise Exception("Excel file is not specified") if output_cube_path: options["output_cube_path"] = output_cube_path else: options["output_cube_path"] = os.path.join( expanduser("~"), "olapy-data", "cubes", Path(input_file_path).stem ) bonobo.run(get_graph(**options), services=get_services(**options))
def execute_pipeline(self): self.bonobo_parser = bonobo.get_argument_parser() with bonobo.parse_args(self.bonobo_parser) as options: bonobo.run( self.build_graph(**options), services=self.get_services(**options))
def main(setting, ckan_portal, dataset_id, ressource, namespace, filename): parser = bonobo.get_argument_parser() with bonobo.parse_args(parser) as options: bonobo.run( get_graph(ckan_portal, dataset_id, ressource, namespace, filename, **options), services=get_services(setting, **options) )
def run(self) -> bool: # TODO: Change the continue to a report error insertion on DB if self._reports is None: print(__name__, "::run() no reports available.") return False for dep_report in self._reports.values(): if dep_report is not None: for report in dep_report: if not report.enabled: print(__name__, '::run() report is disabled, ', report.name) continue graph: Bonobo.graph = report.graph if graph is None: print(__name__, "::run() DAG could not be constructed, ", report.name) continue if self._valid(report): print(__name__, ' Running report ', report.name) bonobo.run(graph)
def main(): graph = bonobo.Graph( extract_data_from_csv, transform_data, load_data_to_dw ) bonobo.run(graph)
def __call__(self, *args, **kwargs): """ execute command """ # This is not that useful, but does show how to create more complicated graphs # graph.add_chain( # bonobo.PrettyPrinter(), # _input=read_recs # ) services = self.get_services(args[0]) engine = services['sqlalchemy.engine'] # We do not want to do this in production - it creates the tables ... Weather.metadata.create_all(engine) # Make a session session = sessionmaker(bind=engine)() # Add it to injectable services services['session'] = session bonobo.run(self.graph, services=services) session.commit()
def main(): graph = bonobo.Graph( extract_data_from_xlxs, transform_data, load_into_new_xlsx_file ) bonobo.run(graph)
def run_graph(self, graph, *, services): if self.parallel: print('Running with PARALLEL bonobo executor') bonobo.run(graph, services=services) else: print('Running with SERIAL custom executor') e = pipeline.execution.GraphExecutor(graph, services) e.run()
def main(): """Execute the pipeline graph""" # logfilename = "wh.log" # logger = logging.getLogger() # ch = logging.FileHandler(logfilename) # formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s') # ch.setFormatter(formatter) # logger.addHandler(ch) parser = bonobo.get_argument_parser() with bonobo.parse_args(parser) as options: bonobo.run(get_graph(**options), services=get_services(**options))
def test_postgres(postgres): #bonobo.settings.QUIET.set(True) db_name = 'my_db' port = postgres['NetworkSettings']['Ports']['5432/tcp'][0]['HostPort'] wait_for_postgres(port) root_engine = create_root_engine(port) _execute_sql(root_engine, "CREATE ROLE my_user WITH LOGIN PASSWORD '';") _execute_sql( root_engine, 'CREATE DATABASE {name} WITH OWNER=my_user TEMPLATE=template0 ENCODING="utf-8"' .format(name=db_name)) engine = create_engine('my_user', db_name, port) metadata.create_all(engine) services = {'sqlalchemy.engine': engine} graph = bonobo.Graph() graph.add_chain(extract, bonobo_sqlalchemy.InsertOrUpdate(TABLE_1)) assert bonobo.run(graph, services=services) buf = Bufferize() graph = bonobo.Graph() graph.add_chain( bonobo_sqlalchemy.Select('SELECT * FROM ' + TABLE_1), buf, ) assert bonobo.run(graph, services=services) assert buf.buffer == [((0, 'value for 0'), {}), ((1, 'value for 1'), {}), ((2, 'value for 2'), {}), ((3, 'value for 3'), {}), ((4, 'value for 4'), {}), ((5, 'value for 5'), {}), ((6, 'value for 6'), {}), ((7, 'value for 7'), {}), ((8, 'value for 8'), {}), ((9, 'value for 9'), {})] graph = bonobo.Graph( bonobo_sqlalchemy.Select('SELECT * FROM ' + TABLE_1), bonobo_sqlalchemy.InsertOrUpdate(TABLE_2), ) assert bonobo.run(graph, services=services) buf = Bufferize() graph = bonobo.Graph() graph.add_chain( bonobo_sqlalchemy.Select('SELECT * FROM ' + TABLE_2), buf, ) assert bonobo.run(graph, services=services) assert buf.buffer == [((0, 'value for 0'), {}), ((1, 'value for 1'), {}), ((2, 'value for 2'), {}), ((3, 'value for 3'), {}), ((4, 'value for 4'), {}), ((5, 'value for 5'), {}), ((6, 'value for 6'), {}), ((7, 'value for 7'), {}), ((8, 'value for 8'), {}), ((9, 'value for 9'), {})]
def update_message_count(start_date, end_date, base_services): database = base_services["database"] status_db = db.Status(database) services = get_message_count_services(base_services) for date in date_range(start_date, end_date): if not status_db.is_message_count_complete(date): logging.info(f"Fetching raw messages for {date.isoformat()}") graph = get_message_count_graph(date, False) bonobo.run(graph, services=services) if date < datetime.date.today(): status_db.set_message_count_complete(date) else: logging.info(f"Date {date.isoformat()} is complete. Skipping.")
def fetch_user_data(self): try: self.build_xml_document_header() connString = self.assemble_connection_string() bonobo.run(self.get_graph(), services=self.get_services(connString)) self.build_xml_document_footer() except Exception as e: logging.warning("Exception caught: fetch_user_data") logging.warning(e) logging.warning(self.__data_source_parms) return 'FAIL DATA' return self.__result_string
def execute(file, quiet=False): with file: code = compile(file.read(), file.name, 'exec') # TODO: A few special variables should be set before running the file: # # See: # - https://docs.python.org/3/reference/import.html#import-mod-attrs # - https://docs.python.org/3/library/runpy.html#runpy.run_module context = { '__name__': '__bonobo__', '__file__': file.name, } try: exec(code, context) except Exception as exc: raise graphs = dict((k, v) for k, v in context.items() if isinstance(v, Graph)) assert len(graphs) == 1, ( 'Having zero or more than one graph definition in one file is unsupported for now, ' 'but it is something that will be implemented in the future.\n\nExpected: 1, got: {}.' ).format(len(graphs)) name, graph = list(graphs.items())[0] # todo if console and not quiet, then add the console plugin # todo when better console plugin, add it if console and just disable display return run(graph)
def handle(self, *args, **options): _stdout_backup, _stderr_backup = self.stdout, self.stderr self.stdout = OutputWrapper(ConsoleOutputPlugin._stdout, ending=CLEAR_EOL + '\n') self.stderr = OutputWrapper(ConsoleOutputPlugin._stderr, ending=CLEAR_EOL + '\n') self.stderr.style_func = lambda x: Fore.LIGHTRED_EX + Back.RED + '!' + Style.RESET_ALL + ' ' + x with bonobo.parse_args(options) as options: services = self.get_services() graph_coll = self.get_graph(*args, **options) if not isinstance(graph_coll, GeneratorType): graph_coll = (graph_coll, ) for i, graph in enumerate(graph_coll): assert isinstance(graph, bonobo.Graph), 'Invalid graph provided.' print(term.lightwhite('{}. {}'.format(i + 1, graph.name))) result = bonobo.run(graph, services=services) print(term.lightblack(' ... return value: ' + str(result))) print() self.stdout, self.stderr = _stdout_backup, _stderr_backup
def run(self, *args, **options): results = [] with bonobo.parse_args(options) as options: services = self.get_services() strategy = self.get_strategy() graph_coll = self.get_graph(*args, **options) if not isinstance(graph_coll, GeneratorType): graph_coll = (graph_coll, ) for i, graph in enumerate(graph_coll): if not isinstance(graph, bonobo.Graph): raise ValueError( "Expected a Graph instance, got {!r}.".format(graph)) print( term.lightwhite("{}. {}".format( i + 1, graph.name or repr(graph).strip("<>")))) result = bonobo.run(graph, services=services, strategy=strategy) results.append(result) for node in result.nodes: print(node.get_statistics_as_string(), node.get_flags_as_string()) print(term.lightblack(" ... return value: " + str(result))) return results
def update_raw_threads(start_date, end_date, backdate_nworking_days, base_services): database = base_services["database"] status_db = db.Status(database) services = get_raw_threads_services(base_services) ndays_ago = nworking_days_before(datetime.date.today(), backdate_nworking_days) for date in date_range(start_date, end_date): if not status_db.is_raw_threads_complete(date): logging.info("Fetching raw threads for %s", date.isoformat()) graph = get_raw_threads_graph(date) bonobo.run(graph, services=services) if date < ndays_ago: status_db.set_raw_threads_complete(date) else: logging.info("Date %s is complete. Skipping.", date.isoformat())
def test_run_graph_noop(): graph = bonobo.Graph(bonobo.noop) assert len(graph) == 1 with patch('bonobo._api._is_interactive_console', side_effect=lambda: False): result = bonobo.run(graph) assert isinstance(result, GraphExecutionContext)
def execute(filename, module, install=False, quiet=False, verbose=False, env=None): graph, plugins, services = read(filename, module, install, quiet, verbose, env) return bonobo.run(graph, plugins=plugins, services=services)
def load_activities(response): client = stravaio.StravaIO(response["access_token"]) def extract(): """Fetch activities summary from Strava""" activities = None while activities is None: time.sleep(1) try: activities = client.get_logged_in_athlete_activities(after='20180101') except: activities = None logger.debug('load_activities: extract: fetching activities') for a in activities: yield a def get_streams(a): """Returns dict of activitiy and streams dataframe""" if (a.device_watts): # check if the activity has the power data logger.debug(f'load_activities: Fetching stream for {maya.parse(a.start_date).iso8601()}:, {a.name}, {a.start_latlng}, {a.trainer}, {a.type}') s = client.get_activity_streams(a.id, response['athlete']['id']) if isinstance(s, pd.DataFrame): # check whether the stream was loaded from the local copy logger.debug(f'load_activities ...found locally') _s = s else: # Streams were loaded from the API, will be stored locally first logger.debug(f'load_activities ...fetched remotely, storing locally') s.store_locally() _s = pd.DataFrame(s.to_dict()) yield {maya.parse(a.start_date).iso8601(): list(_s['watts'])} d = [] def load(s): logger.debug('load_activities: Appending date and power data to the dict') d.append(s) g = bonobo.Graph() g.add_chain(extract, get_streams, load) bonobo.run(g) f_name = f"{response['athlete']['id']}.json" with open(os.path.join(dir_testdata(), f_name), 'w') as f: logger.debug(f'load_activities: Save data to json {f_name}') json.dump(d, f)
def test_bonodoo_function_single(self): folder = tempfile.TemporaryDirectory() filename = 'test_file.csv' read = OdooModelFunction(model='res.users', function='test_function') value_1 = {'id': 2} with patch('xmlrpc.client.ServerProxy') as mk: mock_server = mk.return_value mock_server.login.return_value = 1 mock_server.execute_kw.return_value = value_1 graph = Graph() graph.add_chain(read, CsvWriter(filename, fs='fs.data')) bonobo.run(graph, services={ 'fs.data': bonobo.open_fs(folder.name), 'odoo.server': self.server, }) mk.assert_called() with open(os.path.join(folder.name, filename), 'r') as f: lines = f.readlines() self.assertEqual(len(lines), 1) self.assertEqual(ast.literal_eval(lines[0]), value_1) folder.cleanup()
def run(get_graph, get_services, *, parser=None): parser = parser or get_argument_parser() with bonobo.parse_args(parser) as options: with Timer() as timer: print( "Options:", " ".join("{}={}".format(k, v) for k, v in sorted(options.items()))) retval = bonobo.run(get_graph(**get_graph_options(options)), services=get_services(), strategy=options["strategy"]) print("Execution time:", timer) print("Return value:", retval) print("XStatus:", retval.xstatus) return retval.xstatus
def execute(input, output, reader=None, reader_options=None, writer=None, writer_options=None, options=None): reader = resolve_factory(reader, input, READER)(input) writer = resolve_factory(writer, output, WRITER)(output) graph = bonobo.Graph() graph.add_chain(reader, writer) return bonobo.run(graph, services={ 'fs': bonobo.open_fs(), })
def handle(self, *args, **options): _stdout_backup, _stderr_backup = self.stdout, self.stderr self.stdout = OutputWrapper(ConsoleOutputPlugin._stdout, ending=CLEAR_EOL + '\n') self.stderr = OutputWrapper(ConsoleOutputPlugin._stderr, ending=CLEAR_EOL + '\n') self.stderr.style_func = lambda x: Fore.LIGHTRED_EX + Back.RED + '!' + Style.RESET_ALL + ' ' + x with bonobo.parse_args(options) as options: result = bonobo.run( self.get_graph(*args, **options), services=self.get_services(), ) self.stdout, self.stderr = _stdout_backup, _stderr_backup return '\nReturn Value: ' + str(result)
def handle( self, input_filename, output_filename, reader=None, reader_option=None, writer=None, writer_option=None, option=None, limit=None, transformation=None, ): reader_factory = default_registry.get_reader_factory_for( input_filename, format=reader) reader_kwargs = _resolve_options((option or []) + (reader_option or [])) if output_filename == '-': writer_factory = bonobo.PrettyPrinter writer_args = () else: writer_factory = default_registry.get_writer_factory_for( output_filename, format=writer) writer_args = (output_filename, ) writer_kwargs = _resolve_options((option or []) + (writer_option or [])) transformations = () if limit: transformations += (bonobo.Limit(limit), ) transformations += _resolve_transformations(transformation) graph = bonobo.Graph() graph.add_chain( reader_factory(input_filename, **reader_kwargs), *transformations, writer_factory(*writer_args, **writer_kwargs), ) return bonobo.run(graph, services={ 'fs': bonobo.open_fs(), })
def run(self, *args, **options): results = [] with bonobo.parse_args(options) as options: services = self.get_services() strategy = self.get_strategy() graph_coll = self.get_graph(*args, **options) if not isinstance(graph_coll, GeneratorType): graph_coll = (graph_coll, ) for i, graph in enumerate(graph_coll): assert isinstance(graph, bonobo.Graph), 'Invalid graph provided.' print(term.lightwhite('{}. {}'.format(i + 1, graph.name))) result = bonobo.run(graph, services=services, strategy=strategy) results.append(result) print(term.lightblack(' ... return value: ' + str(result))) print() return results
def main(): configuration = read_configuration() parser = make_parser() arguments = parser.parse_args() logging.info("Running with configuration %s and arguments %s.", configuration, arguments) base_services = get_services(configuration) logging.info("Getting users") with log_timed("users graph"): bonobo.run(get_users_graph(), services=get_users_services(base_services)) logging.info("Getting channels") with log_timed("channels graph"): bonobo.run(get_channels_graph(), services=get_channels_services(base_services)) logging.info("Getting message count") with log_timed("message count graph"): if arguments.quick: update_message_count_quick(datetime.date.today(), base_services) else: update_message_count(configuration.start_date, configuration.end_date, base_services) logging.info("Getting raw threads.") with log_timed("raw threads graph"): if arguments.quick: update_raw_threads_quick( nworking_days_before(datetime.date.today(), 1), configuration.end_date, base_services) else: update_raw_threads(configuration.start_date, configuration.end_date, configuration.threads_lookback_working_days, base_services) logging.info("Enriching messages with user and channel information") bonobo.run(get_enriched_messages_graph(configuration.start_date, configuration.end_date), services=get_enriched_messages_services(base_services)) logging.info("Converting to org-mode") bonobo.run(get_convert_to_org_graph(), services=get_convert_to_org_services(base_services))
def handle( self, input_filename, output_filename, reader=None, reader_option=None, writer=None, writer_option=None, option=None, limit=None, transformation=None, ): reader_factory = default_registry.get_reader_factory_for(input_filename, format=reader) reader_kwargs = _resolve_options((option or []) + (reader_option or [])) if output_filename == '-': writer_factory = bonobo.PrettyPrinter writer_args = () else: writer_factory = default_registry.get_writer_factory_for(output_filename, format=writer) writer_args = (output_filename, ) writer_kwargs = _resolve_options((option or []) + (writer_option or [])) transformations = () if limit: transformations += (bonobo.Limit(limit), ) transformations += _resolve_transformations(transformation) graph = bonobo.Graph() graph.add_chain( reader_factory(input_filename, **reader_kwargs), *transformations, writer_factory(*writer_args, **writer_kwargs), ) return bonobo.run( graph, services={ 'fs': bonobo.open_fs(), } )
def run(self, *args, **options): results = [] with bonobo.parse_args(options) as options: services = self.get_services() strategy = self.get_strategy() graph_coll = self.get_graph(*args, **options) if not isinstance(graph_coll, GeneratorType): graph_coll = (graph_coll,) for i, graph in enumerate(graph_coll): if not isinstance(graph, bonobo.Graph): raise ValueError('Expected a Graph instance, got {!r}.'.format(graph)) print(term.lightwhite('{}. {}'.format(i + 1, graph.name))) result = bonobo.run(graph, services=services, strategy=strategy) results.append(result) print(term.lightblack(' ... return value: ' + str(result))) print() return results
import bonobo def split_one(line): return dict(zip(("name", "address"), line.split(', ', 1))) graph = bonobo.Graph( bonobo.FileReader('coffeeshops.txt'), split_one, bonobo.JsonWriter('coffeeshops.json', fs='fs.output'), ) def get_services(): return { 'fs': bonobo.open_examples_fs('datasets'), 'fs.output': bonobo.open_fs(), } if __name__ == '__main__': bonobo.run(graph, services=get_services())
import bonobo def extract(): yield 'foo' yield 'bar' yield 'baz' def transform(x): return x.upper() def load(x): print(x) graph = bonobo.Graph(extract, transform, load) graph.__doc__ = 'hello' if __name__ == '__main__': bonobo.run(graph)
def load(result): # Cada resultado que ingrese a este punto # ingresarlo como una nueva linea a un archivo # de texto (usando open con 'a' y write) # o insertando a una base de datos a elección. # El objetivo es que quede almacenado en un archivo # o una base de datos la tabla del 5 cinco.insert_multiplo(result) print('Fin!') def get_graph(**options): graph = bonobo.Graph() graph.add_chain(extract, transform, load) return graph def get_services(**options): return {} if __name__ == "__main__": cinco.create_schema() parser = bonobo.get_argument_parser() with bonobo.parse_args(parser) as options: bonobo.run(get_graph(**options), services=get_services(**options))
bonobo.CsvReader('datasets/coffeeshops.txt'), *((bonobo.Limit(_limit), ) if _limit else ()), *((bonobo.PrettyPrinter(), ) if _print else ()), bonobo.CsvWriter('coffeeshops.csv', fs='fs.output') ) if __name__ == '__main__': parser = bonobo.get_argument_parser() parser.add_argument( '--limit', '-l', type=int, default=None, help='If set, limits the number of processed lines.' ) parser.add_argument( '--print', '-p', action='store_true', default=False, help='If set, pretty prints before writing to output file.' ) with bonobo.parse_args(parser) as options: bonobo.run( get_graph(_limit=options['limit'], _print=options['print']), services=get_services() )
import bonobo import datetime import time def extract(): """Placeholder, change, rename, remove... """ for x in range(60): if x: time.sleep(1) yield datetime.datetime.now() def get_graph(): graph = bonobo.Graph() graph.add_chain( extract, print, ) return graph if __name__ == '__main__': parser = bonobo.get_argument_parser() with bonobo.parse_args(parser): bonobo.run(get_graph())
def write_to_mongodb(dr): transaction_id = db.transactions.insert_one(dr).inserted_id yield transaction_id def get_graph(**options): graph = bonobo.Graph( ["Swedbank", "AmericanExpress"], get_account_statementfiles, # bonobo.Limit(10), parse_sf, flatten_statements, apply_categories, # bonobo.PrettyPrinter(), write_to_mongodb, ) return graph def get_services(**options): return {} if __name__ == "__main__": parser = bonobo.get_argument_parser() with bonobo.parse_args(parser) as options: bonobo.run(get_graph(**options), services=get_services(**options)) cleanup_swedbanktransactions()
import bonobo from bonobo import examples from bonobo.examples.files._services import get_services def skip_comments(line): line = line.strip() if not line.startswith('#'): yield line def get_graph(*, _limit=(), _print=()): return bonobo.Graph( bonobo.FileReader('datasets/passwd.txt'), skip_comments, *_limit, lambda s: s.split(':')[0], *_print, bonobo.FileWriter('usernames.txt', fs='fs.output'), ) if __name__ == '__main__': parser = examples.get_argument_parser() with bonobo.parse_args(parser) as options: bonobo.run(get_graph(**examples.get_graph_options(options)), services=get_services())
list(filter(None, map(_getlink, json.loads(row.get('links'))))), 'country': pycountry.countries.get( alpha_2=row.get('country_code', '').upper() ).name, } return result def get_graph(graph=None, *, _limit=(), _print=()): graph = graph or bonobo.Graph() graph.add_chain( OpenDataSoftAPI(dataset=API_DATASET), *_limit, normalize, bonobo.UnpackItems(0), *_print, bonobo.JsonWriter(path='fablabs.json'), ) return graph if __name__ == '__main__': parser = examples.get_argument_parser() with bonobo.parse_args(parser) as options: bonobo.run( get_graph(**examples.get_graph_options(options)), services=get_services() )