Example #1
0
    def run(self, *args, **options):
        results = []
        with bonobo.parse_args(options) as options:
            services = self.get_services()
            strategy = self.get_strategy()
            graph_coll = self.get_graph(*args, **options)

            if not isinstance(graph_coll, GeneratorType):
                graph_coll = (graph_coll, )

            for i, graph in enumerate(graph_coll):
                if not isinstance(graph, bonobo.Graph):
                    raise ValueError(
                        "Expected a Graph instance, got {!r}.".format(graph))
                print(
                    term.lightwhite("{}. {}".format(
                        i + 1, graph.name or repr(graph).strip("<>"))))
                result = bonobo.run(graph,
                                    services=services,
                                    strategy=strategy)
                results.append(result)
                for node in result.nodes:
                    print(node.get_statistics_as_string(),
                          node.get_flags_as_string())
                print(term.lightblack(" ... return value: " + str(result)))

        return results
Example #2
0
def main(setting, ckan_portal, dataset_id, ressource, namespace, filename):
    parser = bonobo.get_argument_parser()
    with bonobo.parse_args(parser) as options:
        bonobo.run(
            get_graph(ckan_portal, dataset_id, ressource, namespace, filename, **options),
            services=get_services(setting, **options)
        )
Example #3
0
    def handle(self, *args, **options):
        _stdout_backup, _stderr_backup = self.stdout, self.stderr

        self.stdout = OutputWrapper(ConsoleOutputPlugin._stdout,
                                    ending=CLEAR_EOL + '\n')
        self.stderr = OutputWrapper(ConsoleOutputPlugin._stderr,
                                    ending=CLEAR_EOL + '\n')
        self.stderr.style_func = lambda x: Fore.LIGHTRED_EX + Back.RED + '!' + Style.RESET_ALL + ' ' + x

        with bonobo.parse_args(options) as options:
            services = self.get_services()
            graph_coll = self.get_graph(*args, **options)

            if not isinstance(graph_coll, GeneratorType):
                graph_coll = (graph_coll, )

            for i, graph in enumerate(graph_coll):
                assert isinstance(graph,
                                  bonobo.Graph), 'Invalid graph provided.'
                print(term.lightwhite('{}. {}'.format(i + 1, graph.name)))
                result = bonobo.run(graph, services=services)
                print(term.lightblack(' ... return value: ' + str(result)))
                print()

        self.stdout, self.stderr = _stdout_backup, _stderr_backup
Example #4
0
def run_etl(input_file_path, config_file, output_cube_path=None, cube_config=None):
    """
    Run ETl Process for passed excel file.

    :param input_file_path: excel file path

    :param config_file: config file path

    example of config::

        # in the config file you specify for each table, columns associate with it.
        Facts: [Price, Quantity]
        Accounts: ['Source Account', 'Destination Account']
        Client: ['Client Activity', 'Client Role']

    :param output_cube_path: cube folder path

    :param cube_config: if you want to call run_etl as function, you can pass dict config directly as param,
    there an example::

        @click.command()
        @click.pass_context
        def myETL(ctx):
            # demo run_etl as function with config as dict
            config = {
                'Facts': ['Amount', 'Count'],
                'Geography': ['Continent', 'Country', 'City'],
                'Product': ['Company', 'Article', 'Licence'],
                'Date': ['Year', 'Quarter', 'Month', 'Day']
            }
            ctx.invoke(run_etl, input_file_path='sales.xlsx', cube_config=config, output_cube_path='cube2')

    """
    parser = bonobo.get_argument_parser()
    parser.add_argument("-in", "--input_file_path", help="Input file")
    parser.add_argument("-cf", "--config_file", help="Configuration file path")
    parser.add_argument("-out", "--output_cube_path", help="Cube export path")
    with bonobo.parse_args(parser) as options:

        if cube_config:
            options["cube_config"] = cube_config
        elif config_file:
            with open(config_file) as config_file:
                options["cube_config"] = yaml.load(config_file)
        else:
            raise Exception("Config file is not specified")

        if input_file_path:
            options["input_file_path"] = input_file_path
        else:
            raise Exception("Excel file is not specified")

        if output_cube_path:
            options["output_cube_path"] = output_cube_path
        else:
            options["output_cube_path"] = os.path.join(
                expanduser("~"), "olapy-data", "cubes", Path(input_file_path).stem
            )

        bonobo.run(get_graph(**options), services=get_services(**options))
Example #5
0
 def execute_pipeline(self):
     
     self.bonobo_parser = bonobo.get_argument_parser()
     with bonobo.parse_args(self.bonobo_parser) as options:
         bonobo.run(
             self.build_graph(**options),
             services=self.get_services(**options))
def main():
    """Execute the pipeline graph"""
    # logfilename = "wh.log"
    # logger = logging.getLogger()
    # ch = logging.FileHandler(logfilename)
    # formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
    # ch.setFormatter(formatter)
    # logger.addHandler(ch)
    parser = bonobo.get_argument_parser()
    with bonobo.parse_args(parser) as options:
        bonobo.run(get_graph(**options), services=get_services(**options))
Example #7
0
def run(get_graph, get_services, *, parser=None):
    parser = parser or get_argument_parser()

    with bonobo.parse_args(parser) as options:
        with Timer() as timer:
            print(
                "Options:", " ".join("{}={}".format(k, v)
                                     for k, v in sorted(options.items())))
            retval = bonobo.run(get_graph(**get_graph_options(options)),
                                services=get_services(),
                                strategy=options["strategy"])
        print("Execution time:", timer)
        print("Return value:", retval)
        print("XStatus:", retval.xstatus)
        return retval.xstatus
Example #8
0
    def handle(self, *args, **options):
        _stdout_backup, _stderr_backup = self.stdout, self.stderr

        self.stdout = OutputWrapper(ConsoleOutputPlugin._stdout,
                                    ending=CLEAR_EOL + '\n')
        self.stderr = OutputWrapper(ConsoleOutputPlugin._stderr,
                                    ending=CLEAR_EOL + '\n')
        self.stderr.style_func = lambda x: Fore.LIGHTRED_EX + Back.RED + '!' + Style.RESET_ALL + ' ' + x

        with bonobo.parse_args(options) as options:
            result = bonobo.run(
                self.get_graph(*args, **options),
                services=self.get_services(),
            )

        self.stdout, self.stderr = _stdout_backup, _stderr_backup

        return '\nReturn Value: ' + str(result)
Example #9
0
    def run(self, *args, **options):
        results = []
        with bonobo.parse_args(options) as options:
            services = self.get_services()
            strategy = self.get_strategy()
            graph_coll = self.get_graph(*args, **options)

            if not isinstance(graph_coll, GeneratorType):
                graph_coll = (graph_coll, )

            for i, graph in enumerate(graph_coll):
                assert isinstance(graph, bonobo.Graph), 'Invalid graph provided.'
                print(term.lightwhite('{}. {}'.format(i + 1, graph.name)))
                result = bonobo.run(graph, services=services, strategy=strategy)
                results.append(result)
                print(term.lightblack(' ... return value: ' + str(result)))
                print()

        return results
Example #10
0
def parse_args(parser=None):
    parser = parser or bonobo.get_argument_parser()

    parser.add_argument('--drop', '-D', action='store_true')
    parser.add_argument('--create', '-C', action='store_true')
    parser.add_argument('--echo', action='store_true')

    with bonobo.parse_args(parser) as options:
        import models
        import settings
        import services

        if options['echo']:
            logging.getLogger('sqlalchemy.engine').setLevel(logging.INFO)

        if options['drop'] or options['create']:
            root_engine = services.create_engine(superuser=True)
            if options['drop']:
                # drop database/role with super user privileges
                _execute_sql(root_engine,
                             "DROP DATABASE {}".format(settings.DATABASE_NAME))
                _execute_sql(root_engine,
                             "DROP ROLE {}".format(settings.DATABASE_USERNAME))

            if options['create']:
                # create database/role with super user privileges
                _execute_sql(
                    root_engine,
                    'CREATE ROLE {} WITH LOGIN PASSWORD \'{}\';'.format(
                        settings.DATABASE_USERNAME,
                        settings.DATABASE_PASSWORD))
                _execute_sql(
                    root_engine,
                    'CREATE DATABASE {} WITH OWNER={} TEMPLATE=template0 ENCODING="utf-8";'
                    .format(settings.DATABASE_NAME,
                            settings.DATABASE_USERNAME))

                # create tables in userland
                engine = services.create_engine()
                models.metadata.create_all(engine)

        yield options
Example #11
0
    def run(self, *args, **options):
        results = []
        with bonobo.parse_args(options) as options:
            services = self.get_services()
            strategy = self.get_strategy()
            graph_coll = self.get_graph(*args, **options)

            if not isinstance(graph_coll, GeneratorType):
                graph_coll = (graph_coll,)

            for i, graph in enumerate(graph_coll):
                if not isinstance(graph, bonobo.Graph):
                    raise ValueError('Expected a Graph instance, got {!r}.'.format(graph))
                print(term.lightwhite('{}. {}'.format(i + 1, graph.name)))
                result = bonobo.run(graph, services=services, strategy=strategy)
                results.append(result)
                print(term.lightblack(' ... return value: ' + str(result)))
                print()

        return results
Example #12
0
        bonobo.CsvReader('datasets/coffeeshops.txt'),
        *((bonobo.Limit(_limit), ) if _limit else ()),
        *((bonobo.PrettyPrinter(), ) if _print else ()),
        bonobo.CsvWriter('coffeeshops.csv', fs='fs.output')
    )


if __name__ == '__main__':
    parser = bonobo.get_argument_parser()

    parser.add_argument(
        '--limit',
        '-l',
        type=int,
        default=None,
        help='If set, limits the number of processed lines.'
    )
    parser.add_argument(
        '--print',
        '-p',
        action='store_true',
        default=False,
        help='If set, pretty prints before writing to output file.'
    )

    with bonobo.parse_args(parser) as options:
        bonobo.run(
            get_graph(_limit=options['limit'], _print=options['print']),
            services=get_services()
        )
Example #13
0
import bonobo
from bonobo.examples.types.strings import get_graph

if __name__ == "__main__":
    parser = bonobo.get_argument_parser()
    with bonobo.parse_args(parser):
        bonobo.run(get_graph())
Example #14
0
File: etl.py Project: mlipper/bray
def run(job, argparser):
    parser = get_argument_parser(argparser)
    with bonobo.parse_args(parser) as options:
        bonobo.run(get_graph(job, **get_graph_options(options)),
                   services=get_services(job))
def main(setting, query, namespace, filename):
    parser = bonobo.get_argument_parser()
    with bonobo.parse_args(parser) as options:
        bonobo.run(get_graph(query, namespace, filename, **options),
                   services=get_services(setting, **options))
Example #16
0
import bonobo
import datetime
import time


def extract():
    """Placeholder, change, rename, remove... """
    for x in range(60):
        if x:
            time.sleep(1)
        yield datetime.datetime.now()


def get_graph():
    graph = bonobo.Graph()
    graph.add_chain(
        extract,
        print,
    )

    return graph


if __name__ == '__main__':
    parser = bonobo.get_argument_parser()
    with bonobo.parse_args(parser):
        bonobo.run(get_graph())
Example #17
0
import bonobo
from bonobo import examples
from bonobo.examples.files._services import get_services


def skip_comments(line):
    line = line.strip()
    if not line.startswith('#'):
        yield line


def get_graph(*, _limit=(), _print=()):
    return bonobo.Graph(
        bonobo.FileReader('datasets/passwd.txt'),
        skip_comments,
        *_limit,
        lambda s: s.split(':')[0],
        *_print,
        bonobo.FileWriter('usernames.txt', fs='fs.output'),
    )


if __name__ == '__main__':
    parser = examples.get_argument_parser()
    with bonobo.parse_args(parser) as options:
        bonobo.run(get_graph(**examples.get_graph_options(options)), services=get_services())
def main():
    """Execute the pipeline graph
    """
    parser = bonobo.get_argument_parser()
    with bonobo.parse_args(parser) as options:
        bonobo.run(get_graph(**options), services=get_services(**options))