Ejemplo n.º 1
0
import bonobo
from bonobo import examples
from bonobo.examples.files._services import get_services


def skip_comments(line):
    line = line.strip()
    if not line.startswith('#'):
        yield line


def get_graph(*, _limit=(), _print=()):
    return bonobo.Graph(
        bonobo.FileReader('datasets/passwd.txt'),
        skip_comments,
        *_limit,
        lambda s: s.split(':')[0],
        *_print,
        bonobo.FileWriter('usernames.txt', fs='fs.output'),
    )


if __name__ == '__main__':
    parser = examples.get_argument_parser()
    with bonobo.parse_args(parser) as options:
        bonobo.run(get_graph(**examples.get_graph_options(options)), services=get_services())
Ejemplo n.º 2
0
        bonobo.CsvReader('datasets/coffeeshops.txt'),
        *((bonobo.Limit(_limit), ) if _limit else ()),
        *((bonobo.PrettyPrinter(), ) if _print else ()),
        bonobo.CsvWriter('coffeeshops.csv', fs='fs.output')
    )


if __name__ == '__main__':
    parser = bonobo.get_argument_parser()

    parser.add_argument(
        '--limit',
        '-l',
        type=int,
        default=None,
        help='If set, limits the number of processed lines.'
    )
    parser.add_argument(
        '--print',
        '-p',
        action='store_true',
        default=False,
        help='If set, pretty prints before writing to output file.'
    )

    with bonobo.parse_args(parser) as options:
        bonobo.run(
            get_graph(_limit=options['limit'], _print=options['print']),
            services=get_services()
        )
Ejemplo n.º 3
0
import bonobo
from bonobo.examples.files._services import get_services


def get_graph(*, _limit=None, _print=False):
    graph = bonobo.Graph()

    trunk = graph.add_chain(bonobo.JsonReader('datasets/theaters.json'), *((bonobo.Limit(_limit),) if _limit else ()))

    if _print:
        graph.add_chain(bonobo.PrettyPrinter(), _input=trunk.output)

    graph.add_chain(bonobo.JsonWriter('theaters.json', fs='fs.output'), _input=trunk.output)
    graph.add_chain(bonobo.LdjsonWriter('theaters.ldjson', fs='fs.output'), _input=trunk.output)

    return graph


if __name__ == '__main__':
    parser = bonobo.get_argument_parser()

    parser.add_argument('--limit', '-l', type=int, default=None, help='If set, limits the number of processed lines.')
    parser.add_argument(
        '--print', '-p', action='store_true', default=False, help='If set, pretty prints before writing to output file.'
    )

    with bonobo.parse_args(parser) as options:
        bonobo.run(get_graph(_limit=options['limit'], _print=options['print']), services=get_services())
Ejemplo n.º 4
0
import bonobo
from bonobo.examples.files._services import get_services


def get_graph(*, _limit=None, _print=False):
    return bonobo.Graph(bonobo.CsvReader('datasets/coffeeshops.txt'),
                        *((bonobo.Limit(_limit), ) if _limit else ()),
                        *((bonobo.PrettyPrinter(), ) if _print else ()),
                        bonobo.CsvWriter('coffeeshops.csv', fs='fs.output'))


if __name__ == '__main__':
    parser = bonobo.get_argument_parser()

    parser.add_argument('--limit',
                        '-l',
                        type=int,
                        default=None,
                        help='If set, limits the number of processed lines.')
    parser.add_argument(
        '--print',
        '-p',
        action='store_true',
        default=False,
        help='If set, pretty prints before writing to output file.')

    with bonobo.parse_args(parser) as options:
        bonobo.run(get_graph(_limit=options['limit'], _print=options['print']),
                   services=get_services())