Beispiel #1
0
 def test_bonodoo_reader_fields(self):
     folder = tempfile.TemporaryDirectory()
     filename = 'test_file.csv'
     value_1 = {'id': 2}
     value_2 = {'id': 3}
     read = OdooReader(
         model='res.users',
         domain=[],
         fields=['id'],
     )
     with patch('xmlrpc.client.ServerProxy') as mk:
         mock_server = mk.return_value
         mock_server.login.return_value = 1
         mock_server.execute_kw.return_value = [value_1, value_2]
         graph = Graph()
         graph.add_chain(read, CsvWriter(filename, fs='fs.data'))
         bonobo.run(graph,
                    services={
                        'fs.data': bonobo.open_fs(folder.name),
                        'odoo.server': self.server,
                    })
         mk.assert_called()
     with open(os.path.join(folder.name, filename), 'r') as f:
         lines = f.readlines()
         self.assertEqual(len(lines), 3)
         self.assertEqual(ast.literal_eval(lines[1]), value_1.get('id'))
         self.assertEqual(ast.literal_eval(lines[2]), value_2.get('id'))
     folder.cleanup()
Beispiel #2
0
def run_etl(input_file_path, config_file, output_cube_path=None, cube_config=None):
    """
    Run ETl Process for passed excel file.

    :param input_file_path: excel file path

    :param config_file: config file path

    example of config::

        # in the config file you specify for each table, columns associate with it.
        Facts: [Price, Quantity]
        Accounts: ['Source Account', 'Destination Account']
        Client: ['Client Activity', 'Client Role']

    :param output_cube_path: cube folder path

    :param cube_config: if you want to call run_etl as function, you can pass dict config directly as param,
    there an example::

        @click.command()
        @click.pass_context
        def myETL(ctx):
            # demo run_etl as function with config as dict
            config = {
                'Facts': ['Amount', 'Count'],
                'Geography': ['Continent', 'Country', 'City'],
                'Product': ['Company', 'Article', 'Licence'],
                'Date': ['Year', 'Quarter', 'Month', 'Day']
            }
            ctx.invoke(run_etl, input_file_path='sales.xlsx', cube_config=config, output_cube_path='cube2')

    """
    parser = bonobo.get_argument_parser()
    parser.add_argument("-in", "--input_file_path", help="Input file")
    parser.add_argument("-cf", "--config_file", help="Configuration file path")
    parser.add_argument("-out", "--output_cube_path", help="Cube export path")
    with bonobo.parse_args(parser) as options:

        if cube_config:
            options["cube_config"] = cube_config
        elif config_file:
            with open(config_file) as config_file:
                options["cube_config"] = yaml.load(config_file)
        else:
            raise Exception("Config file is not specified")

        if input_file_path:
            options["input_file_path"] = input_file_path
        else:
            raise Exception("Excel file is not specified")

        if output_cube_path:
            options["output_cube_path"] = output_cube_path
        else:
            options["output_cube_path"] = os.path.join(
                expanduser("~"), "olapy-data", "cubes", Path(input_file_path).stem
            )

        bonobo.run(get_graph(**options), services=get_services(**options))
 def execute_pipeline(self):
     
     self.bonobo_parser = bonobo.get_argument_parser()
     with bonobo.parse_args(self.bonobo_parser) as options:
         bonobo.run(
             self.build_graph(**options),
             services=self.get_services(**options))
Beispiel #4
0
def main(setting, ckan_portal, dataset_id, ressource, namespace, filename):
    parser = bonobo.get_argument_parser()
    with bonobo.parse_args(parser) as options:
        bonobo.run(
            get_graph(ckan_portal, dataset_id, ressource, namespace, filename, **options),
            services=get_services(setting, **options)
        )
Beispiel #5
0
    def run(self) -> bool:
        # TODO: Change the continue to a report error insertion on DB

        if self._reports is None:
            print(__name__, "::run() no reports available.")
            return False

        for dep_report in self._reports.values():
            if dep_report is not None:
                for report in dep_report:

                    if not report.enabled:
                        print(__name__, '::run() report is disabled, ',
                              report.name)
                        continue

                    graph: Bonobo.graph = report.graph

                    if graph is None:
                        print(__name__,
                              "::run() DAG could not be constructed, ",
                              report.name)
                        continue

                    if self._valid(report):
                        print(__name__, ' Running report ', report.name)

                        bonobo.run(graph)
Beispiel #6
0
def main():
    graph = bonobo.Graph(
        extract_data_from_csv,
        transform_data,
        load_data_to_dw
    )
    bonobo.run(graph)
Beispiel #7
0
    def __call__(self, *args, **kwargs):
        """ execute command """

        # This is not that useful, but does show how to create more complicated graphs
        # graph.add_chain(
        #     bonobo.PrettyPrinter(),
        #     _input=read_recs
        # )

        services = self.get_services(args[0])

        engine = services['sqlalchemy.engine']

        # We do not want to do this in production - it creates the tables ...
        Weather.metadata.create_all(engine)

        # Make a session
        session = sessionmaker(bind=engine)()

        # Add it to injectable services
        services['session'] = session

        bonobo.run(self.graph, services=services)

        session.commit()
Beispiel #8
0
def main():
    
    graph = bonobo.Graph(
        extract_data_from_xlxs,
        transform_data,
        load_into_new_xlsx_file
    )
    bonobo.run(graph)
Beispiel #9
0
 def run_graph(self, graph, *, services):
     if self.parallel:
         print('Running with PARALLEL bonobo executor')
         bonobo.run(graph, services=services)
     else:
         print('Running with SERIAL custom executor')
         e = pipeline.execution.GraphExecutor(graph, services)
         e.run()
def main():
    """Execute the pipeline graph"""
    # logfilename = "wh.log"
    # logger = logging.getLogger()
    # ch = logging.FileHandler(logfilename)
    # formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
    # ch.setFormatter(formatter)
    # logger.addHandler(ch)
    parser = bonobo.get_argument_parser()
    with bonobo.parse_args(parser) as options:
        bonobo.run(get_graph(**options), services=get_services(**options))
Beispiel #11
0
def test_postgres(postgres):
    #bonobo.settings.QUIET.set(True)

    db_name = 'my_db'
    port = postgres['NetworkSettings']['Ports']['5432/tcp'][0]['HostPort']
    wait_for_postgres(port)
    root_engine = create_root_engine(port)
    _execute_sql(root_engine, "CREATE ROLE my_user WITH LOGIN PASSWORD '';")
    _execute_sql(
        root_engine,
        'CREATE DATABASE {name} WITH OWNER=my_user TEMPLATE=template0 ENCODING="utf-8"'
        .format(name=db_name))

    engine = create_engine('my_user', db_name, port)
    metadata.create_all(engine)

    services = {'sqlalchemy.engine': engine}

    graph = bonobo.Graph()
    graph.add_chain(extract, bonobo_sqlalchemy.InsertOrUpdate(TABLE_1))
    assert bonobo.run(graph, services=services)

    buf = Bufferize()
    graph = bonobo.Graph()
    graph.add_chain(
        bonobo_sqlalchemy.Select('SELECT * FROM ' + TABLE_1),
        buf,
    )
    assert bonobo.run(graph, services=services)
    assert buf.buffer == [((0, 'value for 0'), {}), ((1, 'value for 1'), {}),
                          ((2, 'value for 2'), {}), ((3, 'value for 3'), {}),
                          ((4, 'value for 4'), {}), ((5, 'value for 5'), {}),
                          ((6, 'value for 6'), {}), ((7, 'value for 7'), {}),
                          ((8, 'value for 8'), {}), ((9, 'value for 9'), {})]

    graph = bonobo.Graph(
        bonobo_sqlalchemy.Select('SELECT * FROM ' + TABLE_1),
        bonobo_sqlalchemy.InsertOrUpdate(TABLE_2),
    )
    assert bonobo.run(graph, services=services)

    buf = Bufferize()
    graph = bonobo.Graph()
    graph.add_chain(
        bonobo_sqlalchemy.Select('SELECT * FROM ' + TABLE_2),
        buf,
    )
    assert bonobo.run(graph, services=services)
    assert buf.buffer == [((0, 'value for 0'), {}), ((1, 'value for 1'), {}),
                          ((2, 'value for 2'), {}), ((3, 'value for 3'), {}),
                          ((4, 'value for 4'), {}), ((5, 'value for 5'), {}),
                          ((6, 'value for 6'), {}), ((7, 'value for 7'), {}),
                          ((8, 'value for 8'), {}), ((9, 'value for 9'), {})]
def update_message_count(start_date, end_date, base_services):
    database = base_services["database"]
    status_db = db.Status(database)
    services = get_message_count_services(base_services)

    for date in date_range(start_date, end_date):
        if not status_db.is_message_count_complete(date):
            logging.info(f"Fetching raw messages for {date.isoformat()}")
            graph = get_message_count_graph(date, False)
            bonobo.run(graph, services=services)
            if date < datetime.date.today():
                status_db.set_message_count_complete(date)
        else:
            logging.info(f"Date {date.isoformat()} is complete. Skipping.")
Beispiel #13
0
    def fetch_user_data(self):
        try:
            self.build_xml_document_header()
            connString = self.assemble_connection_string()
            bonobo.run(self.get_graph(),
                       services=self.get_services(connString))
            self.build_xml_document_footer()

        except Exception as e:
            logging.warning("Exception caught: fetch_user_data")
            logging.warning(e)
            logging.warning(self.__data_source_parms)
            return 'FAIL DATA'

        return self.__result_string
Beispiel #14
0
def execute(file, quiet=False):
    with file:
        code = compile(file.read(), file.name, 'exec')

    # TODO: A few special variables should be set before running the file:
    #
    # See:
    #  - https://docs.python.org/3/reference/import.html#import-mod-attrs
    #  - https://docs.python.org/3/library/runpy.html#runpy.run_module
    context = {
        '__name__': '__bonobo__',
        '__file__': file.name,
    }

    try:
        exec(code, context)
    except Exception as exc:
        raise

    graphs = dict((k, v) for k, v in context.items() if isinstance(v, Graph))

    assert len(graphs) == 1, (
        'Having zero or more than one graph definition in one file is unsupported for now, '
        'but it is something that will be implemented in the future.\n\nExpected: 1, got: {}.'
    ).format(len(graphs))

    name, graph = list(graphs.items())[0]

    # todo if console and not quiet, then add the console plugin
    # todo when better console plugin, add it if console and just disable display

    return run(graph)
Beispiel #15
0
    def handle(self, *args, **options):
        _stdout_backup, _stderr_backup = self.stdout, self.stderr

        self.stdout = OutputWrapper(ConsoleOutputPlugin._stdout,
                                    ending=CLEAR_EOL + '\n')
        self.stderr = OutputWrapper(ConsoleOutputPlugin._stderr,
                                    ending=CLEAR_EOL + '\n')
        self.stderr.style_func = lambda x: Fore.LIGHTRED_EX + Back.RED + '!' + Style.RESET_ALL + ' ' + x

        with bonobo.parse_args(options) as options:
            services = self.get_services()
            graph_coll = self.get_graph(*args, **options)

            if not isinstance(graph_coll, GeneratorType):
                graph_coll = (graph_coll, )

            for i, graph in enumerate(graph_coll):
                assert isinstance(graph,
                                  bonobo.Graph), 'Invalid graph provided.'
                print(term.lightwhite('{}. {}'.format(i + 1, graph.name)))
                result = bonobo.run(graph, services=services)
                print(term.lightblack(' ... return value: ' + str(result)))
                print()

        self.stdout, self.stderr = _stdout_backup, _stderr_backup
Beispiel #16
0
    def run(self, *args, **options):
        results = []
        with bonobo.parse_args(options) as options:
            services = self.get_services()
            strategy = self.get_strategy()
            graph_coll = self.get_graph(*args, **options)

            if not isinstance(graph_coll, GeneratorType):
                graph_coll = (graph_coll, )

            for i, graph in enumerate(graph_coll):
                if not isinstance(graph, bonobo.Graph):
                    raise ValueError(
                        "Expected a Graph instance, got {!r}.".format(graph))
                print(
                    term.lightwhite("{}. {}".format(
                        i + 1, graph.name or repr(graph).strip("<>"))))
                result = bonobo.run(graph,
                                    services=services,
                                    strategy=strategy)
                results.append(result)
                for node in result.nodes:
                    print(node.get_statistics_as_string(),
                          node.get_flags_as_string())
                print(term.lightblack(" ... return value: " + str(result)))

        return results
def update_raw_threads(start_date, end_date, backdate_nworking_days,
                       base_services):
    database = base_services["database"]
    status_db = db.Status(database)
    services = get_raw_threads_services(base_services)
    ndays_ago = nworking_days_before(datetime.date.today(),
                                     backdate_nworking_days)
    for date in date_range(start_date, end_date):
        if not status_db.is_raw_threads_complete(date):
            logging.info("Fetching raw threads for %s", date.isoformat())
            graph = get_raw_threads_graph(date)
            bonobo.run(graph, services=services)
            if date < ndays_ago:
                status_db.set_raw_threads_complete(date)
        else:
            logging.info("Date %s is complete. Skipping.", date.isoformat())
Beispiel #18
0
def test_run_graph_noop():
    graph = bonobo.Graph(bonobo.noop)
    assert len(graph) == 1

    with patch('bonobo._api._is_interactive_console',
               side_effect=lambda: False):
        result = bonobo.run(graph)
    assert isinstance(result, GraphExecutionContext)
def test_run_graph_noop():
    graph = bonobo.Graph(bonobo.noop)
    assert len(graph) == 1

    with patch('bonobo._api._is_interactive_console', side_effect=lambda: False):
        result = bonobo.run(graph)

    assert isinstance(result, GraphExecutionContext)
Beispiel #20
0
def execute(filename,
            module,
            install=False,
            quiet=False,
            verbose=False,
            env=None):
    graph, plugins, services = read(filename, module, install, quiet, verbose,
                                    env)

    return bonobo.run(graph, plugins=plugins, services=services)
Beispiel #21
0
def load_activities(response):
    client = stravaio.StravaIO(response["access_token"])
    def extract():
        """Fetch activities summary from Strava"""
        activities = None
        while activities is None:
            time.sleep(1)
            try:
                activities = client.get_logged_in_athlete_activities(after='20180101')
            except:
                activities = None
        logger.debug('load_activities: extract: fetching activities')
        for a in activities:
            yield a

    def get_streams(a):
        """Returns dict of activitiy and streams dataframe"""
        if (a.device_watts): # check if the activity has the power data
            logger.debug(f'load_activities: Fetching stream for {maya.parse(a.start_date).iso8601()}:, {a.name}, {a.start_latlng}, {a.trainer}, {a.type}')
            s = client.get_activity_streams(a.id, response['athlete']['id'])
            if isinstance(s, pd.DataFrame): # check whether the stream was loaded from the local copy
                logger.debug(f'load_activities     ...found locally')
                _s = s
            else: # Streams were loaded from the API, will be stored locally first
                logger.debug(f'load_activities     ...fetched remotely, storing locally')
                s.store_locally()
                _s = pd.DataFrame(s.to_dict())
            yield {maya.parse(a.start_date).iso8601(): list(_s['watts'])}

    d = []
    def load(s):
        logger.debug('load_activities: Appending date and power data to the dict')
        d.append(s)

    g = bonobo.Graph()
    g.add_chain(extract, get_streams, load)
    bonobo.run(g)

    f_name = f"{response['athlete']['id']}.json"
    with open(os.path.join(dir_testdata(), f_name), 'w') as f:
        logger.debug(f'load_activities: Save data to json {f_name}')
        json.dump(d, f)
Beispiel #22
0
 def test_bonodoo_function_single(self):
     folder = tempfile.TemporaryDirectory()
     filename = 'test_file.csv'
     read = OdooModelFunction(model='res.users', function='test_function')
     value_1 = {'id': 2}
     with patch('xmlrpc.client.ServerProxy') as mk:
         mock_server = mk.return_value
         mock_server.login.return_value = 1
         mock_server.execute_kw.return_value = value_1
         graph = Graph()
         graph.add_chain(read, CsvWriter(filename, fs='fs.data'))
         bonobo.run(graph,
                    services={
                        'fs.data': bonobo.open_fs(folder.name),
                        'odoo.server': self.server,
                    })
         mk.assert_called()
     with open(os.path.join(folder.name, filename), 'r') as f:
         lines = f.readlines()
         self.assertEqual(len(lines), 1)
         self.assertEqual(ast.literal_eval(lines[0]), value_1)
     folder.cleanup()
Beispiel #23
0
def run(get_graph, get_services, *, parser=None):
    parser = parser or get_argument_parser()

    with bonobo.parse_args(parser) as options:
        with Timer() as timer:
            print(
                "Options:", " ".join("{}={}".format(k, v)
                                     for k, v in sorted(options.items())))
            retval = bonobo.run(get_graph(**get_graph_options(options)),
                                services=get_services(),
                                strategy=options["strategy"])
        print("Execution time:", timer)
        print("Return value:", retval)
        print("XStatus:", retval.xstatus)
        return retval.xstatus
Beispiel #24
0
def execute(input,
            output,
            reader=None,
            reader_options=None,
            writer=None,
            writer_options=None,
            options=None):
    reader = resolve_factory(reader, input, READER)(input)
    writer = resolve_factory(writer, output, WRITER)(output)

    graph = bonobo.Graph()
    graph.add_chain(reader, writer)

    return bonobo.run(graph, services={
        'fs': bonobo.open_fs(),
    })
Beispiel #25
0
    def handle(self, *args, **options):
        _stdout_backup, _stderr_backup = self.stdout, self.stderr

        self.stdout = OutputWrapper(ConsoleOutputPlugin._stdout,
                                    ending=CLEAR_EOL + '\n')
        self.stderr = OutputWrapper(ConsoleOutputPlugin._stderr,
                                    ending=CLEAR_EOL + '\n')
        self.stderr.style_func = lambda x: Fore.LIGHTRED_EX + Back.RED + '!' + Style.RESET_ALL + ' ' + x

        with bonobo.parse_args(options) as options:
            result = bonobo.run(
                self.get_graph(*args, **options),
                services=self.get_services(),
            )

        self.stdout, self.stderr = _stdout_backup, _stderr_backup

        return '\nReturn Value: ' + str(result)
Beispiel #26
0
    def handle(
        self,
        input_filename,
        output_filename,
        reader=None,
        reader_option=None,
        writer=None,
        writer_option=None,
        option=None,
        limit=None,
        transformation=None,
    ):
        reader_factory = default_registry.get_reader_factory_for(
            input_filename, format=reader)
        reader_kwargs = _resolve_options((option or []) +
                                         (reader_option or []))

        if output_filename == '-':
            writer_factory = bonobo.PrettyPrinter
            writer_args = ()
        else:
            writer_factory = default_registry.get_writer_factory_for(
                output_filename, format=writer)
            writer_args = (output_filename, )
        writer_kwargs = _resolve_options((option or []) +
                                         (writer_option or []))

        transformations = ()

        if limit:
            transformations += (bonobo.Limit(limit), )

        transformations += _resolve_transformations(transformation)

        graph = bonobo.Graph()
        graph.add_chain(
            reader_factory(input_filename, **reader_kwargs),
            *transformations,
            writer_factory(*writer_args, **writer_kwargs),
        )

        return bonobo.run(graph, services={
            'fs': bonobo.open_fs(),
        })
Beispiel #27
0
    def run(self, *args, **options):
        results = []
        with bonobo.parse_args(options) as options:
            services = self.get_services()
            strategy = self.get_strategy()
            graph_coll = self.get_graph(*args, **options)

            if not isinstance(graph_coll, GeneratorType):
                graph_coll = (graph_coll, )

            for i, graph in enumerate(graph_coll):
                assert isinstance(graph, bonobo.Graph), 'Invalid graph provided.'
                print(term.lightwhite('{}. {}'.format(i + 1, graph.name)))
                result = bonobo.run(graph, services=services, strategy=strategy)
                results.append(result)
                print(term.lightblack(' ... return value: ' + str(result)))
                print()

        return results
Beispiel #28
0
def main():
    configuration = read_configuration()
    parser = make_parser()
    arguments = parser.parse_args()
    logging.info("Running with configuration %s and arguments %s.",
                 configuration, arguments)
    base_services = get_services(configuration)
    logging.info("Getting users")
    with log_timed("users graph"):
        bonobo.run(get_users_graph(),
                   services=get_users_services(base_services))
    logging.info("Getting channels")
    with log_timed("channels graph"):
        bonobo.run(get_channels_graph(),
                   services=get_channels_services(base_services))
    logging.info("Getting message count")
    with log_timed("message count graph"):
        if arguments.quick:
            update_message_count_quick(datetime.date.today(), base_services)
        else:
            update_message_count(configuration.start_date,
                                 configuration.end_date, base_services)
    logging.info("Getting raw threads.")
    with log_timed("raw threads graph"):
        if arguments.quick:
            update_raw_threads_quick(
                nworking_days_before(datetime.date.today(), 1),
                configuration.end_date, base_services)
        else:
            update_raw_threads(configuration.start_date,
                               configuration.end_date,
                               configuration.threads_lookback_working_days,
                               base_services)
    logging.info("Enriching messages with user and channel information")
    bonobo.run(get_enriched_messages_graph(configuration.start_date,
                                           configuration.end_date),
               services=get_enriched_messages_services(base_services))
    logging.info("Converting to org-mode")
    bonobo.run(get_convert_to_org_graph(),
               services=get_convert_to_org_services(base_services))
Beispiel #29
0
    def handle(
            self,
            input_filename,
            output_filename,
            reader=None,
            reader_option=None,
            writer=None,
            writer_option=None,
            option=None,
            limit=None,
            transformation=None,
    ):
        reader_factory = default_registry.get_reader_factory_for(input_filename, format=reader)
        reader_kwargs = _resolve_options((option or []) + (reader_option or []))

        if output_filename == '-':
            writer_factory = bonobo.PrettyPrinter
            writer_args = ()
        else:
            writer_factory = default_registry.get_writer_factory_for(output_filename, format=writer)
            writer_args = (output_filename, )
        writer_kwargs = _resolve_options((option or []) + (writer_option or []))

        transformations = ()

        if limit:
            transformations += (bonobo.Limit(limit), )

        transformations += _resolve_transformations(transformation)

        graph = bonobo.Graph()
        graph.add_chain(
            reader_factory(input_filename, **reader_kwargs),
            *transformations,
            writer_factory(*writer_args, **writer_kwargs),
        )

        return bonobo.run(
            graph, services={
                'fs': bonobo.open_fs(),
            }
        )
Beispiel #30
0
    def run(self, *args, **options):
        results = []
        with bonobo.parse_args(options) as options:
            services = self.get_services()
            strategy = self.get_strategy()
            graph_coll = self.get_graph(*args, **options)

            if not isinstance(graph_coll, GeneratorType):
                graph_coll = (graph_coll,)

            for i, graph in enumerate(graph_coll):
                if not isinstance(graph, bonobo.Graph):
                    raise ValueError('Expected a Graph instance, got {!r}.'.format(graph))
                print(term.lightwhite('{}. {}'.format(i + 1, graph.name)))
                result = bonobo.run(graph, services=services, strategy=strategy)
                results.append(result)
                print(term.lightblack(' ... return value: ' + str(result)))
                print()

        return results
Beispiel #31
0
import bonobo


def split_one(line):
    return dict(zip(("name", "address"), line.split(', ', 1)))


graph = bonobo.Graph(
    bonobo.FileReader('coffeeshops.txt'),
    split_one,
    bonobo.JsonWriter('coffeeshops.json', fs='fs.output'),
)


def get_services():
    return {
        'fs': bonobo.open_examples_fs('datasets'),
        'fs.output': bonobo.open_fs(),
    }


if __name__ == '__main__':
    bonobo.run(graph, services=get_services())
Beispiel #32
0
import bonobo


def extract():
    yield 'foo'
    yield 'bar'
    yield 'baz'


def transform(x):
    return x.upper()


def load(x):
    print(x)


graph = bonobo.Graph(extract, transform, load)

graph.__doc__ = 'hello'

if __name__ == '__main__':
    bonobo.run(graph)
def load(result):
    # Cada resultado que ingrese a este punto
    # ingresarlo como una nueva linea a un archivo
    # de texto (usando open con 'a' y write)
    # o insertando a una base de datos a elección.
    # El objetivo es que quede almacenado en un archivo
    # o una base de datos la tabla del 5

    cinco.insert_multiplo(result)

    print('Fin!')


def get_graph(**options):
    graph = bonobo.Graph()
    graph.add_chain(extract, transform, load)
    return graph


def get_services(**options):
    return {}


if __name__ == "__main__":
    cinco.create_schema()

    parser = bonobo.get_argument_parser()
    with bonobo.parse_args(parser) as options:
        bonobo.run(get_graph(**options), services=get_services(**options))
Beispiel #34
0
        bonobo.CsvReader('datasets/coffeeshops.txt'),
        *((bonobo.Limit(_limit), ) if _limit else ()),
        *((bonobo.PrettyPrinter(), ) if _print else ()),
        bonobo.CsvWriter('coffeeshops.csv', fs='fs.output')
    )


if __name__ == '__main__':
    parser = bonobo.get_argument_parser()

    parser.add_argument(
        '--limit',
        '-l',
        type=int,
        default=None,
        help='If set, limits the number of processed lines.'
    )
    parser.add_argument(
        '--print',
        '-p',
        action='store_true',
        default=False,
        help='If set, pretty prints before writing to output file.'
    )

    with bonobo.parse_args(parser) as options:
        bonobo.run(
            get_graph(_limit=options['limit'], _print=options['print']),
            services=get_services()
        )
Beispiel #35
0
import bonobo
import datetime
import time


def extract():
    """Placeholder, change, rename, remove... """
    for x in range(60):
        if x:
            time.sleep(1)
        yield datetime.datetime.now()


def get_graph():
    graph = bonobo.Graph()
    graph.add_chain(
        extract,
        print,
    )

    return graph


if __name__ == '__main__':
    parser = bonobo.get_argument_parser()
    with bonobo.parse_args(parser):
        bonobo.run(get_graph())
Beispiel #36
0

def write_to_mongodb(dr):
    transaction_id = db.transactions.insert_one(dr).inserted_id
    yield transaction_id


def get_graph(**options):
    graph = bonobo.Graph(
        ["Swedbank", "AmericanExpress"],
        get_account_statementfiles,
        # bonobo.Limit(10),
        parse_sf,
        flatten_statements,
        apply_categories,
        # bonobo.PrettyPrinter(),
        write_to_mongodb,
    )
    return graph


def get_services(**options):
    return {}


if __name__ == "__main__":
    parser = bonobo.get_argument_parser()
    with bonobo.parse_args(parser) as options:
        bonobo.run(get_graph(**options), services=get_services(**options))
    cleanup_swedbanktransactions()
Beispiel #37
0
import bonobo
from bonobo import examples
from bonobo.examples.files._services import get_services


def skip_comments(line):
    line = line.strip()
    if not line.startswith('#'):
        yield line


def get_graph(*, _limit=(), _print=()):
    return bonobo.Graph(
        bonobo.FileReader('datasets/passwd.txt'),
        skip_comments,
        *_limit,
        lambda s: s.split(':')[0],
        *_print,
        bonobo.FileWriter('usernames.txt', fs='fs.output'),
    )


if __name__ == '__main__':
    parser = examples.get_argument_parser()
    with bonobo.parse_args(parser) as options:
        bonobo.run(get_graph(**examples.get_graph_options(options)), services=get_services())
Beispiel #38
0
        list(filter(None, map(_getlink, json.loads(row.get('links'))))),
        'country':
        pycountry.countries.get(
            alpha_2=row.get('country_code', '').upper()
        ).name,
    }
    return result


def get_graph(graph=None, *, _limit=(), _print=()):
    graph = graph or bonobo.Graph()
    graph.add_chain(
        OpenDataSoftAPI(dataset=API_DATASET),
        *_limit,
        normalize,
        bonobo.UnpackItems(0),
        *_print,
        bonobo.JsonWriter(path='fablabs.json'),
    )
    return graph


if __name__ == '__main__':
    parser = examples.get_argument_parser()

    with bonobo.parse_args(parser) as options:
        bonobo.run(
            get_graph(**examples.get_graph_options(options)),
            services=get_services()
        )