def execute_pipeline(self): self.bonobo_parser = bonobo.get_argument_parser() with bonobo.parse_args(self.bonobo_parser) as options: bonobo.run( self.build_graph(**options), services=self.get_services(**options))
def run_etl(input_file_path, config_file, output_cube_path=None, cube_config=None): """ Run ETl Process for passed excel file. :param input_file_path: excel file path :param config_file: config file path example of config:: # in the config file you specify for each table, columns associate with it. Facts: [Price, Quantity] Accounts: ['Source Account', 'Destination Account'] Client: ['Client Activity', 'Client Role'] :param output_cube_path: cube folder path :param cube_config: if you want to call run_etl as function, you can pass dict config directly as param, there an example:: @click.command() @click.pass_context def myETL(ctx): # demo run_etl as function with config as dict config = { 'Facts': ['Amount', 'Count'], 'Geography': ['Continent', 'Country', 'City'], 'Product': ['Company', 'Article', 'Licence'], 'Date': ['Year', 'Quarter', 'Month', 'Day'] } ctx.invoke(run_etl, input_file_path='sales.xlsx', cube_config=config, output_cube_path='cube2') """ parser = bonobo.get_argument_parser() parser.add_argument("-in", "--input_file_path", help="Input file") parser.add_argument("-cf", "--config_file", help="Configuration file path") parser.add_argument("-out", "--output_cube_path", help="Cube export path") with bonobo.parse_args(parser) as options: if cube_config: options["cube_config"] = cube_config elif config_file: with open(config_file) as config_file: options["cube_config"] = yaml.load(config_file) else: raise Exception("Config file is not specified") if input_file_path: options["input_file_path"] = input_file_path else: raise Exception("Excel file is not specified") if output_cube_path: options["output_cube_path"] = output_cube_path else: options["output_cube_path"] = os.path.join( expanduser("~"), "olapy-data", "cubes", Path(input_file_path).stem ) bonobo.run(get_graph(**options), services=get_services(**options))
def main(setting, ckan_portal, dataset_id, ressource, namespace, filename): parser = bonobo.get_argument_parser() with bonobo.parse_args(parser) as options: bonobo.run( get_graph(ckan_portal, dataset_id, ressource, namespace, filename, **options), services=get_services(setting, **options) )
def get_argument_parser(parser=None): """Extend CLI parser provided by bobobo and returns it.""" parser = bonobo.get_argument_parser(parser=parser) parser.add_argument("--input_file", "-i", type=str, default=None, help="Path of the input file.") parser.add_argument("--output_file", "-o", type=str, default=None, help="Path of the output file.") return parser
def main(): """Execute the pipeline graph""" # logfilename = "wh.log" # logger = logging.getLogger() # ch = logging.FileHandler(logfilename) # formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s') # ch.setFormatter(formatter) # logger.addHandler(ch) parser = bonobo.get_argument_parser() with bonobo.parse_args(parser) as options: bonobo.run(get_graph(**options), services=get_services(**options))
def get_argument_parser(parser=None): """Extend CLI parser provided by bobobo and returns it.""" parser = bonobo.get_argument_parser(parser=parser) parser.add_argument("--input_file", "-i", type=str, default=None, help="Path of the input file.") parser.add_argument("--output_file", "-o", type=str, default=None, help="Path of the output file.") # these parameters are added for accessing different S3 services parser.add_argument("--bucket", "-b", type=str, default=None, help="Bucket name in S3 service.") parser.add_argument("--key", "-k", type=str, default=None, help="Key to access S3 service.") parser.add_argument("--secret_key", "-sk", type=str, default=None, help="Secret key to access the S3 service.") parser.add_argument("--endpoint_url", "-ep", type=str, default=None, help="Endpoint URL for S3 service.") return parser
def get_argument_parser(parser=None): parser = bonobo.get_argument_parser(parser=parser) parser.add_argument('--limit', '-l', type=int, default=None, help='If set, limits the number of processed lines.') parser.add_argument( '--print', '-p', action='store_true', default=False, help='If set, pretty prints before writing to output file.') return parser
def get_argument_parser(parser=None): parser = bonobo.get_argument_parser(parser=parser) parser.add_argument( '--limit', '-l', type=int, default=None, help='If set, limits the number of processed lines.' ) parser.add_argument( '--print', '-p', action='store_true', default=False, help='If set, pretty prints before writing to output file.' ) return parser
def parse_args(parser=None): parser = parser or bonobo.get_argument_parser() parser.add_argument('--drop', '-D', action='store_true') parser.add_argument('--create', '-C', action='store_true') parser.add_argument('--echo', action='store_true') with bonobo.parse_args(parser) as options: import models import settings import services if options['echo']: logging.getLogger('sqlalchemy.engine').setLevel(logging.INFO) if options['drop'] or options['create']: root_engine = services.create_engine(superuser=True) if options['drop']: # drop database/role with super user privileges _execute_sql(root_engine, "DROP DATABASE {}".format(settings.DATABASE_NAME)) _execute_sql(root_engine, "DROP ROLE {}".format(settings.DATABASE_USERNAME)) if options['create']: # create database/role with super user privileges _execute_sql( root_engine, 'CREATE ROLE {} WITH LOGIN PASSWORD \'{}\';'.format( settings.DATABASE_USERNAME, settings.DATABASE_PASSWORD)) _execute_sql( root_engine, 'CREATE DATABASE {} WITH OWNER={} TEMPLATE=template0 ENCODING="utf-8";' .format(settings.DATABASE_NAME, settings.DATABASE_USERNAME)) # create tables in userland engine = services.create_engine() models.metadata.create_all(engine) yield options
def get_argument_parser(parser=None): parser = bonobo.get_argument_parser(parser=parser) parser.add_argument("--limit", "-l", type=int, default=None, help="If set, limits the number of processed lines.") parser.add_argument( "--print", "-p", action="store_true", default=False, help="If set, pretty prints before writing to output file.") parser.add_argument("--strategy", "-s", type=str, choices=STRATEGIES.keys(), default=DEFAULT_STRATEGY) return parser
import bonobo from bonobo.examples.files._services import get_services def get_graph(*, _limit=None, _print=False): return bonobo.Graph( bonobo.CsvReader('datasets/coffeeshops.txt'), *((bonobo.Limit(_limit), ) if _limit else ()), *((bonobo.PrettyPrinter(), ) if _print else ()), bonobo.CsvWriter('coffeeshops.csv', fs='fs.output') ) if __name__ == '__main__': parser = bonobo.get_argument_parser() parser.add_argument( '--limit', '-l', type=int, default=None, help='If set, limits the number of processed lines.' ) parser.add_argument( '--print', '-p', action='store_true', default=False, help='If set, pretty prints before writing to output file.' )
def create_parser(self, prog_name, subcommand): return bonobo.get_argument_parser(super().create_parser( prog_name, subcommand))
def parse_args(mixed=None): """ Context manager to extract and apply environment related options from the provided argparser result. A dictionnary with unknown options will be yielded, so the remaining options can be used by the caller. :api: bonobo.patch_environ :param mixed: ArgumentParser instance, Namespace, or dict. :return: """ if mixed is None: global _parser if _parser is not None: warnings.warn( 'You are calling bonobo.parse_args() without a parser argument, but it looks like you created a parser before. You probably want to pass your parser to this call, or if creating a new parser here is really what you want to do, please create a new one explicitely to silence this warning.' ) # use the api from bonobo namespace, in case a command patched it. import bonobo mixed = bonobo.get_argument_parser() if isinstance(mixed, argparse.ArgumentParser): options = mixed.parse_args() else: options = mixed if not isinstance(options, dict): options = options.__dict__ # make a copy so we don't polute our parent variables. options = dict(options) # storage for values before patch. _backup = {} # Priority order: --env > --env-file > system > --default-env > --default-env-file # # * The code below is reading default-env before default-env-file as if the first sets something, default-env-file # won't override it. # * Then, env-file is read from before env, as the behaviour will be the oposite (env will override a var even if # env-file sets something.) try: # Set default environment for name, value in map(parse_var, options.pop('default_env', []) or []): if not name in os.environ: if not name in _backup: _backup[name] = os.environ.get(name, None) os.environ[name] = value # Read and set default environment from file(s) for filename in options.pop('default_env_file', []) or []: for name, value in load_env_from_file(filename): if not name in os.environ: if not name in _backup: _backup[name] = os.environ.get(name, None) os.environ[name] = value # Read and set environment from file(s) for filename in options.pop('env_file', []) or []: for name, value in load_env_from_file(filename): if not name in _backup: _backup[name] = os.environ.get(name, None) os.environ[name] = value # Set environment for name, value in map(parse_var, options.pop('env', []) or []): if not name in _backup: _backup[name] = os.environ.get(name, None) os.environ[name] = value yield options finally: for name, value in _backup.items(): if value is None: del os.environ[name] else: os.environ[name] = value
def get_argument_parser(argparser): """Augments the given ArgumentParser for use with the Bonobo ETL framework.""" return bonobo.get_argument_parser(parser=argparser)
def main(setting, query, namespace, filename): parser = bonobo.get_argument_parser() with bonobo.parse_args(parser) as options: bonobo.run(get_graph(query, namespace, filename, **options), services=get_services(setting, **options))
def create_parser(self, prog_name, subcommand): return bonobo.get_argument_parser(super().create_parser(prog_name, subcommand))
def load(result): # Cada resultado que ingrese a este punto # ingresarlo como una nueva linea a un archivo # de texto (usando open con 'a' y write) # o insertando a una base de datos a elección. # El objetivo es que quede almacenado en un archivo # o una base de datos la tabla del 5 cinco.insert_multiplo(result) print('Fin!') def get_graph(**options): graph = bonobo.Graph() graph.add_chain(extract, transform, load) return graph def get_services(**options): return {} if __name__ == "__main__": cinco.create_schema() parser = bonobo.get_argument_parser() with bonobo.parse_args(parser) as options: bonobo.run(get_graph(**options), services=get_services(**options))
def main(): """Execute the pipeline graph """ parser = bonobo.get_argument_parser() with bonobo.parse_args(parser) as options: bonobo.run(get_graph(**options), services=get_services(**options))