Ejemplo n.º 1
0
import bonobo
from bonobo import examples
from bonobo.examples.files._services import get_services


def skip_comments(line):
    line = line.strip()
    if not line.startswith('#'):
        yield line


def get_graph(*, _limit=(), _print=()):
    return bonobo.Graph(
        bonobo.FileReader('datasets/passwd.txt'),
        skip_comments,
        *_limit,
        lambda s: s.split(':')[0],
        *_print,
        bonobo.FileWriter('usernames.txt', fs='fs.output'),
    )


if __name__ == '__main__':
    parser = examples.get_argument_parser()
    with bonobo.parse_args(parser) as options:
        bonobo.run(get_graph(**examples.get_graph_options(options)), services=get_services())
Ejemplo n.º 2
0
    return category, sms, sms_clean


def get_graph(*, _limit=(), _print=()):
    graph = bonobo.Graph()

    graph.add_chain(
        # spam.pkl is within the gzipped tarball
        bonobo.PickleReader('spam.pkl'),
        *_limit,
        cleanse_sms,
        *_print,
    )

    return graph


def get_services():
    from ._services import get_services
    return {
        **get_services(), 'fs':
        TarFS(bonobo.get_examples_path('datasets/spam.tgz'))
    }


if __name__ == '__main__':
    parser = examples.get_argument_parser()
    with bonobo.parse_args(parser) as options:
        bonobo.run(get_graph(**examples.get_graph_options(options)),
                   services=get_services())
Ejemplo n.º 3
0
        list(filter(None, map(_getlink, json.loads(row.get('links'))))),
        'country':
        pycountry.countries.get(
            alpha_2=row.get('country_code', '').upper()
        ).name,
    }
    return result


def get_graph(graph=None, *, _limit=(), _print=()):
    graph = graph or bonobo.Graph()
    graph.add_chain(
        OpenDataSoftAPI(dataset=API_DATASET),
        *_limit,
        normalize,
        bonobo.UnpackItems(0),
        *_print,
        bonobo.JsonWriter(path='fablabs.json'),
    )
    return graph


if __name__ == '__main__':
    parser = examples.get_argument_parser()

    with bonobo.parse_args(parser) as options:
        bonobo.run(
            get_graph(**examples.get_graph_options(options)),
            services=get_services()
        )
Ejemplo n.º 4
0
graph_factories = {
    'coffeeshops': get_coffeeshops_graph,
    'fablabs': get_fablabs_graph
}

if __name__ == '__main__':
    parser = examples.get_argument_parser()
    parser.add_argument('--target',
                        '-t',
                        choices=graph_factories.keys(),
                        nargs='+')
    parser.add_argument('--sync', action='store_true', default=False)

    with bonobo.parse_args(parser) as options:
        graph_options = examples.get_graph_options(options)
        graph_names = list(options['target'] if options['target'] else sorted(
            graph_factories.keys()))

        # Create a graph with all requested subgraphs
        graph = bonobo.Graph()
        for name in graph_names:
            graph = graph_factories[name](graph, **graph_options)

        bonobo.run(graph, services=get_services())

        if options['sync']:
            # TODO: when parallel option for node will be implemented, need to be rewriten to use a graph.
            import boto3

            s3 = boto3.client('s3')
Ejemplo n.º 5
0
from bonobo.examples.datasets.services import get_services, get_datasets_dir, get_minor_version

graph_factories = {
    'coffeeshops': get_coffeeshops_graph,
    'fablabs': get_fablabs_graph,
}

if __name__ == '__main__':
    parser = examples.get_argument_parser()
    parser.add_argument(
        '--target', '-t', choices=graph_factories.keys(), nargs='+'
    )
    parser.add_argument('--sync', action='store_true', default=False)

    with bonobo.parse_args(parser) as options:
        graph_options = examples.get_graph_options(options)
        graph_names = list(
            options['target']
            if options['target'] else sorted(graph_factories.keys())
        )

        # Create a graph with all requested subgraphs
        graph = bonobo.Graph()
        for name in graph_names:
            graph = graph_factories[name](graph, **graph_options)

        bonobo.run(graph, services=get_services())

        if options['sync']:
            # TODO: when parallel option for node will be implemented, need to be rewriten to use a graph.
            import boto3
Ejemplo n.º 6
0
def get_graph(*, _limit=(), _print=()):
    graph = bonobo.Graph()

    graph.add_chain(
        # spam.pkl is within the gzipped tarball
        bonobo.PickleReader('spam.pkl'),
        *_limit,
        cleanse_sms,
        *_print,
    )

    return graph


def get_services():
    from ._services import get_services
    return {
        **get_services(), 'fs':
        TarFS(bonobo.get_examples_path('datasets/spam.tgz'))
    }


if __name__ == '__main__':
    parser = examples.get_argument_parser()
    with bonobo.parse_args(parser) as options:
        bonobo.run(
            get_graph(**examples.get_graph_options(options)),
            services=get_services()
        )