Exemple #1
0
def get_bu_graph(**options):
    graph = bonobo.Graph()
    graph.add_chain(
        get_business_units,
        join_cost_centers,
        centerstone_BU_SupOrg_Merge_remap,
        centerstone_BussUnit_remap,
    )

    graph.add_chain(
        #bonobo.Limit(3),
        #bonobo.PrettyPrinter(),
        productLineLevel1_remap,
        unique_product_line,
        bonobo.UnpackItems(0),
        bonobo.PrettyPrinter(),
        bonobo.CsvWriter('/etl/centerstone/downloads/ProductLineLevel1.txt' +
                         options['suffix'],
                         lineterminator="\n",
                         delimiter="\t",
                         fs="brickftp"),
        _input=centerstone_BussUnit_remap)
    graph.add_chain(
        teamLevel3_remap,
        bonobo.UnpackItems(0),
        bonobo.CsvWriter('/etl/centerstone/downloads/TeamLevel3.txt' +
                         options['suffix'],
                         lineterminator="\n",
                         delimiter="\t",
                         fs="brickftp"),
        _input=centerstone_BussUnit_remap)

    return graph
Exemple #2
0
def get_costcenter_graph(**options):
    """
    This function builds the graph that needs to be executed.

    :return: bonobo.Graph

    """
    graph = bonobo.Graph()
    graph.add_chain(
        get_cost_centers,
        cache_cost_centers,
        centerstone_CostCenter_remap,
        #bonobo.PrettyPrinter(),
        bonobo.UnpackItems(0),
        # Can't skip the header, but must
        bonobo.CsvWriter(
            '/etl/centerstone/downloads/CostCenterLevel2.txt' +
            options['suffix'],
            lineterminator="\n",
            delimiter="\t",
            fs="brickftp"),
        bonobo.CsvWriter(
            'CostCenterLevel2.txt' + options['suffix'],
            lineterminator="\n",
            delimiter="\t",
            fs="centerstone"),
        bonobo.count,
        _name="main")

    return graph
Exemple #3
0
def get_workday_employee_graph(**options):
    """
    This function builds the graph that needs to be executed.

    :return: bonobo.Graph

    """
    graph = bonobo.Graph()
    graph.add_chain(
        get_workday_users, workday_centerstone_employee_remap,
        bonobo.UnpackItems(0),
        bonobo.CsvWriter('/etl/centerstone/downloads/workday-users.csv' +
                         options['suffix'],
                         lineterminator="\n",
                         delimiter="\t",
                         fs="brickftp"),
        bonobo.CsvWriter('workday-users.csv' + options['suffix'],
                         lineterminator="\n",
                         delimiter="\t",
                         fs="centerstone"))

    graph.add_chain(split_active_employee,
                    bonobo.UnpackItems(0),
                    HeaderlessCsvWriter(
                        '/etl/centerstone/downloads/Mozilla_Active_Users.txt' +
                        options['suffix'],
                        lineterminator="\n",
                        delimiter="\t",
                        fs="brickftp"),
                    HeaderlessCsvWriter('Mozilla_Active_Users.txt' +
                                        options['suffix'],
                                        lineterminator="\n",
                                        delimiter="\t",
                                        fs="centerstone"),
                    _input=workday_centerstone_employee_remap)

    graph.add_chain(split_termed_employee,
                    bonobo.UnpackItems(0),
                    HeaderlessCsvWriter(
                        '/etl/centerstone/downloads/Mozilla_Termed_Users.txt' +
                        options['suffix'],
                        lineterminator="\n",
                        delimiter="\t",
                        fs="brickftp"),
                    HeaderlessCsvWriter('Mozilla_Termed_Users.txt' +
                                        options['suffix'],
                                        lineterminator="\n",
                                        delimiter="\t",
                                        fs="centerstone"),
                    _input=workday_centerstone_employee_remap)

    return graph
def get_graph(**options):
    """
    This function builds the graph that needs to be executed.

    :return: bonobo.Graph

    """
    graph = bonobo.Graph()
    graph.add_chain(extract_accounts,
                    transform,
                    bonobo.JsonWriter('aws_accounts_ex.json'),
                    valid_aws_account,
                    _name="main")

    graph.add_chain(
        bonobo.JsonWriter('aws_accounts.json'),
        _input="main",
    )

    graph.add_chain(
        bonobo.UnpackItems(0),
        bonobo.CsvWriter('aws_accounts.csv'),
        _input=valid_aws_account,
    )

    graph.add_chain(
        bonobo.UnpackItems(0),
        bonobo_sqlalchemy.InsertOrUpdate(table_name='aws_accounts' +
                                         options['table_suffix'],
                                         discriminant=('account_id', ),
                                         engine='db'),
        _input=valid_aws_account,
    )

    return graph
Exemple #5
0
def get_graph(graph=None, *, _limit=(), _print=()):
    """
    Extracts a list of cafes with on euro in Paris, renames the name, address and zipcode fields,
    reorders the fields and formats to json and csv files.

    """
    graph = graph or bonobo.Graph()

    producer = (
        graph.get_cursor() >> ODSReader(dataset="liste-des-cafes-a-un-euro",
                                        netloc="opendata.paris.fr") >>
        PartialGraph(*_limit) >> bonobo.UnpackItems(0) >> bonobo.Rename(
            name="nom_du_cafe", address="adresse", zipcode="arrondissement") >>
        bonobo.Format(city="Paris", country="France") >> bonobo.OrderFields([
            "name", "address", "zipcode", "city", "country", "geometry",
            "geoloc"
        ]) >> PartialGraph(*_print))

    # Comma separated values.
    graph.get_cursor(producer.output) >> bonobo.CsvWriter(
        "coffeeshops.csv",
        fields=["name", "address", "zipcode", "city"],
        delimiter=",")

    # Standard JSON
    graph.get_cursor(
        producer.output) >> bonobo.JsonWriter(path="coffeeshops.json")

    # Line-delimited JSON
    graph.get_cursor(
        producer.output) >> bonobo.LdjsonWriter(path="coffeeshops.ldjson")

    return graph
Exemple #6
0
def get_graph(graph=None, *, _limit=(), _print=()):
    """
    Extracts a list of cafes with on euro in Paris, renames the name, address and zipcode fields,
    reorders the fields and formats to json and csv files.

    """
    graph = graph or bonobo.Graph()

    producer = graph.add_chain(
        ODSReader(dataset='liste-des-cafes-a-un-euro',
                  netloc='opendata.paris.fr'),
        *_limit,
        bonobo.UnpackItems(0),
        bonobo.Rename(name='nom_du_cafe',
                      address='adresse',
                      zipcode='arrondissement'),
        bonobo.Format(city='Paris', country='France'),
        bonobo.OrderFields([
            'name', 'address', 'zipcode', 'city', 'country', 'geometry',
            'geoloc'
        ]),
        *_print,
    )

    # Comma separated values.
    graph.add_chain(
        bonobo.CsvWriter('coffeeshops.csv',
                         fields=['name', 'address', 'zipcode', 'city'],
                         delimiter=','),
        _input=producer.output,
    )

    # Standard JSON
    graph.add_chain(
        bonobo.JsonWriter(path='coffeeshops.json'),
        _input=producer.output,
    )

    # Line-delimited JSON
    graph.add_chain(
        bonobo.LdjsonWriter(path='coffeeshops.ldjson'),
        _input=producer.output,
    )

    return graph
Exemple #7
0
def get_graph(job, graph=None, *, _limit=(), _print=()):
    """Builds the execution graph."""
    graph = graph or bonobo.Graph()
    graph.add_chain(
        bonobo.CsvReader(job.input_file,
                         fs=FS_IN_SERVICE_ID,
                         fields=[
                             'integration_id', 'site_name', 'address',
                             'borough', 'status'
                         ],
                         skip=1),
        *_limit,
        search,
        bonobo.UnpackItems(0),
        bonobo.CsvWriter(job.output_file, fs=FS_OUT_SERVICE_ID),
        *_print,
    )
    return graph
Exemple #8
0
def get_graph(**options):
    """
    This function builds the graph that needs to be executed.

    :return: bonobo.Graph

    """
    graph = bonobo.Graph()

    graph.add_chain(
        get_cards,
        wishlist_map,
        bonobo.UnpackItems(0),
        bonobo.CsvWriter('Deckbox-wishlist.csv'),
        _name='main',
    )

    return graph
Exemple #9
0
def get_graph(*, _limit=None, _print=False):
    return bonobo.Graph(bonobo.CsvReader("coffeeshops.csv"),
                        *((bonobo.Limit(_limit), ) if _limit else ()),
                        *((bonobo.PrettyPrinter(), ) if _print else ()),
                        bonobo.CsvWriter("coffeeshops.csv", fs="fs.output"))
Exemple #10
0
def get_graph(**options):
    """
    This function builds the graph that needs to be executed.

    :return: bonobo.Graph

    """
    graph = bonobo.Graph()

    graph.add_chain(
        bonobo.CsvWriter('billing.csv'),
        bonobo.JsonWriter('billing.json'),
        invalid_entries,
        fix_numbers,
        parse_dates,
        #bonobo.PrettyPrinter(),
        filter_summary,
        #bonobo.PrettyPrinter(),
        lookup_account_sk,
        lookup_date_sk,
        summarize_costs,
        bonobo.UnpackItems(0),
        bonobo_sqlalchemy.InsertOrUpdate(
            table_name='fact_itsm_aws_historical_cost' +
            options['table_suffix'],
            discriminant=(
                'productname',
                'date_sk',
                'account_name_sk',
            ),
            engine='database'),
        _name="main",
        _input=None,
    )

    now = options['now']

    # Go to beginning of month
    now += relativedelta(day=1, hour=0, minute=0, second=0, microsecond=0)

    when = now
    for log in range(0, options['months']):
        when = when + relativedelta(months=-1)
        tstamp = when.strftime("%Y-%m")
        print("# %d Processing %s" % (log, tstamp))
        if options['limit']:
            _limit = (bonobo.Limit(options['limit']), )
        else:
            _limit = ()

        graph.add_chain(
            AwsBillingReader('%s-aws-cost-allocation-%s.csv' %
                             (options['aws_account_id'], tstamp),
                             fs='s3',
                             skip=1),
            *_limit,
            _output="main",
        )

    graph.add_chain(
        bonobo_sqlalchemy.InsertOrUpdate(
            table_name=options['table'] + options['table_suffix'],
            discriminant=('invoiceid', 'linkedaccountid', 'payeraccountid',
                          'recordid'),
            engine='database'),
        _input=parse_dates,
    )

    return graph
Exemple #11
0
def get_graph(*, _limit=None, _print=False):
    return bonobo.Graph(bonobo.CsvReader('datasets/coffeeshops.txt'),
                        *((bonobo.Limit(_limit), ) if _limit else ()),
                        *((bonobo.PrettyPrinter(), ) if _print else ()),
                        bonobo.CsvWriter('coffeeshops.csv', fs='fs.output'))
Exemple #12
0
def get_graph(**options):
    """
    This function builds the graph that needs to be executed.

    :return: bonobo.Graph

    """
    graph = bonobo.Graph()

    split = bonobo.noop

    graph.add_chain(
        bonobo.CsvWriter('DeckedBuilder.csv'),
        # bonobo.Limit(10),
        metadata,
        # bonobo.UnpackItems(0),
        split,
        _input=None,
        _name='main',
    )

    graph.add_chain(
        bonobo.CsvReader('main-en.csv'),
        bonobo.Format(Language='English'),
        _output='main',
    )

    graph.add_chain(
        bonobo.CsvReader('main-de.csv'),
        bonobo.Format(Language='German'),
        _output='main',
    )

    graph.add_chain(
        bonobo.CsvReader('main-ru.csv'),
        bonobo.Format(Language='Russian'),
        _output='main',
    )

    graph.add_chain(
        bonobo.CsvReader('main-it.csv'),
        bonobo.Format(Language='Italian'),
        _output='main',
    )

    graph.add_chain(
        bonobo.CsvReader('main-jp.csv'),
        bonobo.Format(Language='Japanese'),
        _output='main',
    )

    graph.add_chain(
        bonobo.CsvReader('main-fr.csv'),
        bonobo.Format(Language='French'),
        _output='main',
    )

    graph.add_chain(
        bonobo.CsvReader('main-kr.csv'),
        bonobo.Format(Language='Korean'),
        _output='main',
    )

    graph.add_chain(
        bonobo.CsvReader('main-cs.csv'),
        bonobo.Format(Language='Chinese'),
        _output='main',
    )

    graph.add_chain(
        bonobo.CsvReader('Deckbox-extras.csv'),
        bonobo.Format(Language='English'),
        _output='main',
    )

    if ECHO_MTG:
        # Reg Qty,Foil Qty,Name,Set,Acquired,Language
        echomtg = {'Acquired For': '0.004', 'Language': 'en'}
        graph.add_chain(
            # echomtg specific fiddling
            remove_metadata,
            bonobo.UnpackItems(0),
            # bonobo.PrettyPrinter(),
            bonobo.Rename(Name='Card'),
            bonobo.Format(**echomtg),
            bonobo.CsvWriter('EchoMTG.csv'),
            _input=split,
        )

    # MTG Studio

    if MTG_STUDIO:
        graph.add_chain(
            mtg_studio,
            remove_metadata,
            bonobo.UnpackItems(0),
            # bonobo.Format(Edition='{Set}'),
            bonobo.Rename(Edition='Set'),
            # bonobo.Rename(Name='Card'),
            # bonobo.Rename(Qty='Reg Qty'),
            # bonobo.Rename(Foil='Foil Qty'),
            # bonobo.PrettyPrinter(),
            bonobo.CsvWriter('MTG-Studio.csv'),
            _input=split,
        )

    #    graph.add_chain(
    #        tradeable,
    #        bonobo.UnpackItems(0),
    #        #bonobo.PrettyPrinter(),
    #        #bonobo.Limit(3000),
    #        bonobo.CsvWriter("DeckedBuilder-tradelist.csv"),
    #        bonobo.OrderFields([
    #            'Card',
    #            'Set',
    #            'Foil',
    #            'Quantity',
    #        ]),
    #        bonobo.CsvWriter("CardKingdom-buylist.csv"),
    #        bonobo.OrderFields([
    #            'Quantity',
    #            'Card',
    #            'Set',
    #        ]),
    #        bonobo.CsvWriter(
    #            "mtgprice-buylist.csv",
    #            delimiter="\t",
    #        ),
    #        _input=split,
    #    )
    #
    if DECKBOX:
        csv_out = bonobo.CsvWriter('Deckbox-inventory.csv')

        graph.add_chain(
            #       # metadata,
            #        #bonobo.UnpackItems(0),
            deckbox,
            bonobo.UnpackItems(0),
            csv_out,
            _input=split,
        )

        graph.add_chain(bonobo.CsvReader('Deckbox-specials.csv'),
                        _output=csv_out)
    return graph
import bonobo

def guess_email(**row):
    return {
            **row,
            'email': row['name'] + '@' + row['domain']
    }

graph = bonobo.Graph(
        bonobo.CsvReader('employees.csv'),
        bonobo.Filter(lambda *row: row['position'] != 'CEO'),
        # guess_email,
        bonobo.CsvWriter('employees.output.csv'),
)

if __name__ == "__main__":
    parser = bonobo.get_argument_parser()
    with bonobo.parse_args(parser):
        bonobo.run(graph)