def get_graph(**options):
    """
    This function builds the graph that needs to be executed.

    :return: bonobo.Graph

    """
    graph = bonobo.Graph()
    graph.add_chain(extract_accounts,
                    transform,
                    bonobo.JsonWriter('aws_accounts_ex.json'),
                    valid_aws_account,
                    _name="main")

    graph.add_chain(
        bonobo.JsonWriter('aws_accounts.json'),
        _input="main",
    )

    graph.add_chain(
        bonobo.UnpackItems(0),
        bonobo.CsvWriter('aws_accounts.csv'),
        _input=valid_aws_account,
    )

    graph.add_chain(
        bonobo.UnpackItems(0),
        bonobo_sqlalchemy.InsertOrUpdate(table_name='aws_accounts' +
                                         options['table_suffix'],
                                         discriminant=('account_id', ),
                                         engine='db'),
        _input=valid_aws_account,
    )

    return graph
Esempio n. 2
0
def get_graph(graph=None, *, _limit=(), _print=()):
    """
    Extracts a list of cafes with on euro in Paris, renames the name, address and zipcode fields,
    reorders the fields and formats to json and csv files.

    """
    graph = graph or bonobo.Graph()

    producer = (
        graph.get_cursor() >> ODSReader(dataset="liste-des-cafes-a-un-euro",
                                        netloc="opendata.paris.fr") >>
        PartialGraph(*_limit) >> bonobo.UnpackItems(0) >> bonobo.Rename(
            name="nom_du_cafe", address="adresse", zipcode="arrondissement") >>
        bonobo.Format(city="Paris", country="France") >> bonobo.OrderFields([
            "name", "address", "zipcode", "city", "country", "geometry",
            "geoloc"
        ]) >> PartialGraph(*_print))

    # Comma separated values.
    graph.get_cursor(producer.output) >> bonobo.CsvWriter(
        "coffeeshops.csv",
        fields=["name", "address", "zipcode", "city"],
        delimiter=",")

    # Standard JSON
    graph.get_cursor(
        producer.output) >> bonobo.JsonWriter(path="coffeeshops.json")

    # Line-delimited JSON
    graph.get_cursor(
        producer.output) >> bonobo.LdjsonWriter(path="coffeeshops.ldjson")

    return graph
Esempio n. 3
0
def get_graph(graph=None, *, _limit=(), _print=()):
    graph = graph or bonobo.Graph()
    graph.add_chain(
        OpenDataSoftAPI(dataset=API_DATASET),
        *_limit,
        normalize,
        bonobo.UnpackItems(0),
        *_print,
        bonobo.JsonWriter(path='fablabs.json'),
    )
    return graph
Esempio n. 4
0
def get_graph(*, _limit=None, _print=False):
    graph = bonobo.Graph()

    trunk = graph.add_chain(bonobo.JsonReader('datasets/theaters.json'), *((bonobo.Limit(_limit),) if _limit else ()))

    if _print:
        graph.add_chain(bonobo.PrettyPrinter(), _input=trunk.output)

    graph.add_chain(bonobo.JsonWriter('theaters.json', fs='fs.output'), _input=trunk.output)
    graph.add_chain(bonobo.LdjsonWriter('theaters.ldjson', fs='fs.output'), _input=trunk.output)

    return graph
Esempio n. 5
0
def get_graph(*, _limit=None, _print=False):
    graph = bonobo.Graph()

    trunk = graph.add_chain(bonobo.JsonReader("theaters.json", fs="fs.static"),
                            *((bonobo.Limit(_limit), ) if _limit else ()))

    if _print:
        graph.add_chain(bonobo.PrettyPrinter(), _input=trunk.output)

    graph.add_chain(bonobo.JsonWriter("theaters.output.json", fs="fs.output"),
                    _input=trunk.output)
    graph.add_chain(bonobo.LdjsonWriter("theaters.output.ldjson",
                                        fs="fs.output"),
                    _input=trunk.output)

    return graph
Esempio n. 6
0
def get_graph(graph=None, *, _limit=(), _print=()):
    """
    Extracts a list of cafes with on euro in Paris, renames the name, address and zipcode fields,
    reorders the fields and formats to json and csv files.

    """
    graph = graph or bonobo.Graph()

    producer = graph.add_chain(
        ODSReader(dataset='liste-des-cafes-a-un-euro',
                  netloc='opendata.paris.fr'),
        *_limit,
        bonobo.UnpackItems(0),
        bonobo.Rename(name='nom_du_cafe',
                      address='adresse',
                      zipcode='arrondissement'),
        bonobo.Format(city='Paris', country='France'),
        bonobo.OrderFields([
            'name', 'address', 'zipcode', 'city', 'country', 'geometry',
            'geoloc'
        ]),
        *_print,
    )

    # Comma separated values.
    graph.add_chain(
        bonobo.CsvWriter('coffeeshops.csv',
                         fields=['name', 'address', 'zipcode', 'city'],
                         delimiter=','),
        _input=producer.output,
    )

    # Standard JSON
    graph.add_chain(
        bonobo.JsonWriter(path='coffeeshops.json'),
        _input=producer.output,
    )

    # Line-delimited JSON
    graph.add_chain(
        bonobo.LdjsonWriter(path='coffeeshops.ldjson'),
        _input=producer.output,
    )

    return graph
Esempio n. 7
0
import bonobo


def split_one(line):
    return line.split(', ', 1)


graph = bonobo.Graph(
    bonobo.FileReader('coffeeshops.txt'),
    split_one,
    bonobo.JsonWriter('coffeeshops.json'),
)

if __name__ == '__main__':
    bonobo.run(graph, services={'fs': bonobo.open_examples_fs('datasets')})
Esempio n. 8
0
def get_graph(**options):
    """
    This function builds the graph that needs to be executed.

    :return: bonobo.Graph

    """
    graph = bonobo.Graph()

    graph.add_chain(
        bonobo.CsvWriter('billing.csv'),
        bonobo.JsonWriter('billing.json'),
        invalid_entries,
        fix_numbers,
        parse_dates,
        #bonobo.PrettyPrinter(),
        filter_summary,
        #bonobo.PrettyPrinter(),
        lookup_account_sk,
        lookup_date_sk,
        summarize_costs,
        bonobo.UnpackItems(0),
        bonobo_sqlalchemy.InsertOrUpdate(
            table_name='fact_itsm_aws_historical_cost' +
            options['table_suffix'],
            discriminant=(
                'productname',
                'date_sk',
                'account_name_sk',
            ),
            engine='database'),
        _name="main",
        _input=None,
    )

    now = options['now']

    # Go to beginning of month
    now += relativedelta(day=1, hour=0, minute=0, second=0, microsecond=0)

    when = now
    for log in range(0, options['months']):
        when = when + relativedelta(months=-1)
        tstamp = when.strftime("%Y-%m")
        print("# %d Processing %s" % (log, tstamp))
        if options['limit']:
            _limit = (bonobo.Limit(options['limit']), )
        else:
            _limit = ()

        graph.add_chain(
            AwsBillingReader('%s-aws-cost-allocation-%s.csv' %
                             (options['aws_account_id'], tstamp),
                             fs='s3',
                             skip=1),
            *_limit,
            _output="main",
        )

    graph.add_chain(
        bonobo_sqlalchemy.InsertOrUpdate(
            table_name=options['table'] + options['table_suffix'],
            discriminant=('invoiceid', 'linkedaccountid', 'payeraccountid',
                          'recordid'),
            engine='database'),
        _input=parse_dates,
    )

    return graph
Esempio n. 9
0
                       (row.get('postal_code', None), row.get('city', None)))),
            row.get('county', None),
            row.get('country'),
        )))

    print('  - {}address{}: {address}'.format(Fore.BLUE,
                                              Style.RESET_ALL,
                                              address=', '.join(address)))
    print('  - {}links{}: {links}'.format(Fore.BLUE,
                                          Style.RESET_ALL,
                                          links=', '.join(row['links'])))
    print('  - {}geometry{}: {geometry}'.format(Fore.BLUE, Style.RESET_ALL,
                                                **row))
    print('  - {}source{}: {source}'.format(Fore.BLUE,
                                            Style.RESET_ALL,
                                            source='datanova/' + API_DATASET))


graph = bonobo.Graph(
    OpenDataSoftAPI(dataset=API_DATASET,
                    netloc=API_NETLOC,
                    timezone='Europe/Paris'),
    normalize,
    bonobo.Filter(filter=lambda row: row.get('country') == 'France'),
    bonobo.JsonWriter(path='fablabs.txt', ioformat='arg0'),
    bonobo.Tee(display),
)

if __name__ == '__main__':
    bonobo.run(graph, services=get_default_services(__file__))
Esempio n. 10
0
import bonobo


def split_one(line):
    return dict(zip(("name", "address"), line.split(', ', 1)))


graph = bonobo.Graph(
    bonobo.FileReader('coffeeshops.txt'),
    split_one,
    bonobo.JsonWriter('coffeeshops.json', fs='fs.output'),
)


def get_services():
    return {
        'fs': bonobo.open_examples_fs('datasets'),
        'fs.output': bonobo.open_fs(),
    }


if __name__ == '__main__':
    bonobo.run(graph, services=get_services())
Esempio n. 11
0
import bonobo

graph = bonobo.Graph(
    bonobo.CsvReader('Google_facebook_statuses.csv'),
    bonobo.JsonWriter('output.json'),
)

if __name__ == '__main__':
    bonobo.run(graph)
Esempio n. 12
0
import bonobo


def split_one(line):
    return line.split(', ', 1)


graph = bonobo.Graph(
    bonobo.FileReader('coffeeshops.txt'),
    split_one,
    bonobo.JsonWriter('coffeeshops.json', fs='fs.output', ioformat='arg0'),
)


def get_services():
    return {
        'fs': bonobo.open_examples_fs('datasets'),
        'fs.output': bonobo.open_fs(),
    }


if __name__ == '__main__':
    bonobo.run(graph, services=get_services())
Esempio n. 13
0
                       (row.get('postal_code', None), row.get('city', None)))),
            row.get('county', None),
            row.get('country'),
        )))

    print('  - {}address{}: {address}'.format(Fore.BLUE,
                                              Style.RESET_ALL,
                                              address=', '.join(address)))
    print('  - {}links{}: {links}'.format(Fore.BLUE,
                                          Style.RESET_ALL,
                                          links=', '.join(row['links'])))
    print('  - {}geometry{}: {geometry}'.format(Fore.BLUE, Style.RESET_ALL,
                                                **row))
    print('  - {}source{}: {source}'.format(Fore.BLUE,
                                            Style.RESET_ALL,
                                            source='datanova/' + API_DATASET))


graph = bonobo.Graph(
    OpenDataSoftAPI(dataset=API_DATASET,
                    netloc=API_NETLOC,
                    timezone='Europe/Paris'),
    normalize,
    filter_france,
    bonobo.Tee(display),
    bonobo.JsonWriter(path='fablabs.txt'),
)

if __name__ == '__main__':
    bonobo.run(graph, services=get_default_services(__file__))