Ejemplo n.º 1
0
def get_bu_graph(**options):
    graph = bonobo.Graph()
    graph.add_chain(
        get_business_units,
        join_cost_centers,
        centerstone_BU_SupOrg_Merge_remap,
        centerstone_BussUnit_remap,
    )

    graph.add_chain(
        #bonobo.Limit(3),
        #bonobo.PrettyPrinter(),
        productLineLevel1_remap,
        unique_product_line,
        bonobo.UnpackItems(0),
        bonobo.PrettyPrinter(),
        bonobo.CsvWriter('/etl/centerstone/downloads/ProductLineLevel1.txt' +
                         options['suffix'],
                         lineterminator="\n",
                         delimiter="\t",
                         fs="brickftp"),
        _input=centerstone_BussUnit_remap)
    graph.add_chain(
        teamLevel3_remap,
        bonobo.UnpackItems(0),
        bonobo.CsvWriter('/etl/centerstone/downloads/TeamLevel3.txt' +
                         options['suffix'],
                         lineterminator="\n",
                         delimiter="\t",
                         fs="brickftp"),
        _input=centerstone_BussUnit_remap)

    return graph
Ejemplo n.º 2
0
def get_graph(**options):
    """
    This function builds the graph that needs to be executed.

    :return: bonobo.Graph

    """
    graph = bonobo.Graph()
    graph.add_chain(extract_accounts,
                    transform,
                    bonobo.JsonWriter('aws_accounts_ex.json'),
                    valid_aws_account,
                    _name="main")

    graph.add_chain(
        bonobo.JsonWriter('aws_accounts.json'),
        _input="main",
    )

    graph.add_chain(
        bonobo.UnpackItems(0),
        bonobo.CsvWriter('aws_accounts.csv'),
        _input=valid_aws_account,
    )

    graph.add_chain(
        bonobo.UnpackItems(0),
        bonobo_sqlalchemy.InsertOrUpdate(table_name='aws_accounts' +
                                         options['table_suffix'],
                                         discriminant=('account_id', ),
                                         engine='db'),
        _input=valid_aws_account,
    )

    return graph
Ejemplo n.º 3
0
def get_workday_employee_graph(**options):
    """
    This function builds the graph that needs to be executed.

    :return: bonobo.Graph

    """
    graph = bonobo.Graph()
    graph.add_chain(
        get_workday_users, workday_centerstone_employee_remap,
        bonobo.UnpackItems(0),
        bonobo.CsvWriter('/etl/centerstone/downloads/workday-users.csv' +
                         options['suffix'],
                         lineterminator="\n",
                         delimiter="\t",
                         fs="brickftp"),
        bonobo.CsvWriter('workday-users.csv' + options['suffix'],
                         lineterminator="\n",
                         delimiter="\t",
                         fs="centerstone"))

    graph.add_chain(split_active_employee,
                    bonobo.UnpackItems(0),
                    HeaderlessCsvWriter(
                        '/etl/centerstone/downloads/Mozilla_Active_Users.txt' +
                        options['suffix'],
                        lineterminator="\n",
                        delimiter="\t",
                        fs="brickftp"),
                    HeaderlessCsvWriter('Mozilla_Active_Users.txt' +
                                        options['suffix'],
                                        lineterminator="\n",
                                        delimiter="\t",
                                        fs="centerstone"),
                    _input=workday_centerstone_employee_remap)

    graph.add_chain(split_termed_employee,
                    bonobo.UnpackItems(0),
                    HeaderlessCsvWriter(
                        '/etl/centerstone/downloads/Mozilla_Termed_Users.txt' +
                        options['suffix'],
                        lineterminator="\n",
                        delimiter="\t",
                        fs="brickftp"),
                    HeaderlessCsvWriter('Mozilla_Termed_Users.txt' +
                                        options['suffix'],
                                        lineterminator="\n",
                                        delimiter="\t",
                                        fs="centerstone"),
                    _input=workday_centerstone_employee_remap)

    return graph
Ejemplo n.º 4
0
def get_graph(graph=None, *, _limit=(), _print=()):
    """
    Extracts a list of cafes with on euro in Paris, renames the name, address and zipcode fields,
    reorders the fields and formats to json and csv files.

    """
    graph = graph or bonobo.Graph()

    producer = (
        graph.get_cursor() >> ODSReader(dataset="liste-des-cafes-a-un-euro",
                                        netloc="opendata.paris.fr") >>
        PartialGraph(*_limit) >> bonobo.UnpackItems(0) >> bonobo.Rename(
            name="nom_du_cafe", address="adresse", zipcode="arrondissement") >>
        bonobo.Format(city="Paris", country="France") >> bonobo.OrderFields([
            "name", "address", "zipcode", "city", "country", "geometry",
            "geoloc"
        ]) >> PartialGraph(*_print))

    # Comma separated values.
    graph.get_cursor(producer.output) >> bonobo.CsvWriter(
        "coffeeshops.csv",
        fields=["name", "address", "zipcode", "city"],
        delimiter=",")

    # Standard JSON
    graph.get_cursor(
        producer.output) >> bonobo.JsonWriter(path="coffeeshops.json")

    # Line-delimited JSON
    graph.get_cursor(
        producer.output) >> bonobo.LdjsonWriter(path="coffeeshops.ldjson")

    return graph
Ejemplo n.º 5
0
def get_graph(**options):
    """
    This function builds the graph that needs to be executed.

    :return: bonobo.Graph

    """
    graph = bonobo.Graph()

    split_dbs = bonobo.noop

    graph.add_chain(extract_accounts,
                    transform,
                    valid_aws_account,
                    bonobo.UnpackItems(0),
                    split_dbs,
                    _name="main")

    for engine in list(set(options['engine'])):
        graph.add_chain(bonobo_sqlalchemy.InsertOrUpdate(
            table_name=options['table_name'] + options['table_suffix'],
            discriminant=('linked_account_number', ),
            engine=engine),
                        _input=split_dbs)

    return graph
Ejemplo n.º 6
0
def get_graph(**options):
    """
    This function builds the graph that needs to be executed.

    :return: bonobo.Graph

    """

    graph = bonobo.Graph()

    split_dbs = bonobo.noop

    graph.add_chain(
        bonobo.CsvReader('/etl/metrics-insights/workday-users.csv',
                         fs='brickftp'), employee_active, find_badge_id,
        bonobo.UnpackItems(0), split_dbs)

    for engine in list(set(options['engine'])):
        graph.add_chain(bonobo_sqlalchemy.InsertOrUpdate(
            table_name=options['table_name'] + options['table_suffix'],
            discriminant=('badgeid', ),
            buffer_size=10,
            engine=engine),
                        _input=split_dbs)

    return graph
Ejemplo n.º 7
0
def get_costcenter_graph(**options):
    """
    This function builds the graph that needs to be executed.

    :return: bonobo.Graph

    """
    graph = bonobo.Graph()
    graph.add_chain(
        get_cost_centers,
        cache_cost_centers,
        centerstone_CostCenter_remap,
        #bonobo.PrettyPrinter(),
        bonobo.UnpackItems(0),
        # Can't skip the header, but must
        bonobo.CsvWriter(
            '/etl/centerstone/downloads/CostCenterLevel2.txt' +
            options['suffix'],
            lineterminator="\n",
            delimiter="\t",
            fs="brickftp"),
        bonobo.CsvWriter(
            'CostCenterLevel2.txt' + options['suffix'],
            lineterminator="\n",
            delimiter="\t",
            fs="centerstone"),
        bonobo.count,
        _name="main")

    return graph
Ejemplo n.º 8
0
def get_sched_graph(**options):
    """
    This function builds the graph that needs to be executed.

    :return: bonobo.Graph

    """
    graph = bonobo.Graph(
        get_sched,
        bonobo.UnpackItems(0),
        modified_events,
        sync_event,
        bonobo.UnpackItems(0),
        bonobo.PrettyPrinter(),
        bonobo.count,
    )

    return graph
Ejemplo n.º 9
0
def get_graph(graph=None, *, _limit=(), _print=()):
    graph = graph or bonobo.Graph()
    graph.add_chain(
        OpenDataSoftAPI(dataset=API_DATASET),
        *_limit,
        normalize,
        bonobo.UnpackItems(0),
        *_print,
        bonobo.JsonWriter(path='fablabs.json'),
    )
    return graph
Ejemplo n.º 10
0
def get_sched_graph(**options):
    """
    This function builds the graph that needs to be executed.

    :return: bonobo.Graph

    """
    graph = bonobo.Graph(
        get_sched,
        bonobo.UnpackItems(0),
        cache_sched,
        bonobo.count,
    )

    return graph
Ejemplo n.º 11
0
def get_graph(graph=None, *, _limit=(), _print=()):
    """
    Extracts a list of cafes with on euro in Paris, renames the name, address and zipcode fields,
    reorders the fields and formats to json and csv files.

    """
    graph = graph or bonobo.Graph()

    producer = graph.add_chain(
        ODSReader(dataset='liste-des-cafes-a-un-euro',
                  netloc='opendata.paris.fr'),
        *_limit,
        bonobo.UnpackItems(0),
        bonobo.Rename(name='nom_du_cafe',
                      address='adresse',
                      zipcode='arrondissement'),
        bonobo.Format(city='Paris', country='France'),
        bonobo.OrderFields([
            'name', 'address', 'zipcode', 'city', 'country', 'geometry',
            'geoloc'
        ]),
        *_print,
    )

    # Comma separated values.
    graph.add_chain(
        bonobo.CsvWriter('coffeeshops.csv',
                         fields=['name', 'address', 'zipcode', 'city'],
                         delimiter=','),
        _input=producer.output,
    )

    # Standard JSON
    graph.add_chain(
        bonobo.JsonWriter(path='coffeeshops.json'),
        _input=producer.output,
    )

    # Line-delimited JSON
    graph.add_chain(
        bonobo.LdjsonWriter(path='coffeeshops.ldjson'),
        _input=producer.output,
    )

    return graph
Ejemplo n.º 12
0
Archivo: etl.py Proyecto: mlipper/bray
def get_graph(job, graph=None, *, _limit=(), _print=()):
    """Builds the execution graph."""
    graph = graph or bonobo.Graph()
    graph.add_chain(
        bonobo.CsvReader(job.input_file,
                         fs=FS_IN_SERVICE_ID,
                         fields=[
                             'integration_id', 'site_name', 'address',
                             'borough', 'status'
                         ],
                         skip=1),
        *_limit,
        search,
        bonobo.UnpackItems(0),
        bonobo.CsvWriter(job.output_file, fs=FS_OUT_SERVICE_ID),
        *_print,
    )
    return graph
Ejemplo n.º 13
0
def get_graph(**options):
    """
    This function builds the graph that needs to be executed.

    :return: bonobo.Graph

    """
    graph = bonobo.Graph()

    graph.add_chain(
        get_cards,
        wishlist_map,
        bonobo.UnpackItems(0),
        bonobo.CsvWriter('Deckbox-wishlist.csv'),
        _name='main',
    )

    return graph
Ejemplo n.º 14
0
def get_graph(**options):
    """
    This function builds the graph that needs to be executed.

    :return: bonobo.Graph

    """
    graph = bonobo.Graph()

    split_dbs = bonobo.noop

    graph.add_chain(
        GetOrderXML(prefix="/etl/ivm",
                    glob=[
                        'Mozilla_Corporation{timestamp:%Y_%m_%d}*.xml'.format(
                            timestamp=options['now'])
                    ]),
        ParseDates(['Transactionlog_Tranenddatetime']),
        truncate_description,
        bonobo.UnpackItems(0),
        bonobo.Rename(transaction_date='Transactionlog_Tranenddatetime',
                      item_number='Transactionlog_Itemnumber',
                      transaction_id='Transactionlog_Tlid',
                      item_description='Transactionlog_Itemdesc'),
        bonobo.Rename(
            user_id='Transactionlog_User',
            quantity='Transactionlog_Qty',
            transaction_code='Transactionlog_Transcode',
            description='Vendingmachines_Descr',
        ),
        split_dbs,
        _name="main")

    #insert into ivm (description, transaction_id, item_number, item_description, user_id, quantity, transaction_date, transaction_code) values

    for engine in list(set(options['engine'])):
        graph.add_chain(bonobo_sqlalchemy.InsertOrUpdate(
            table_name=options['table_name'] + options['table_suffix'],
            discriminant=('transaction_id', ),
            engine=engine),
                        _input=split_dbs)

    return graph
Ejemplo n.º 15
0
def get_graph(**options):
    """
    This function builds the graph that needs to be executed.

    :return: bonobo.Graph

    """

    now = options['now']

    # Null out time portion, go back 2 days in the past
    now += relativedelta(days=-2, hour=0, minute=0, second=0, microsecond=0)

    print("# Processing for %s" % now.date())

    graph = bonobo.Graph()

    STMT = """
select a.badgeid AS badgeid, b.user_id AS user_id, a.employee_id AS employee_id, a.email AS email, 
b.item_description AS item_description, b.item_number AS item_number , b.transaction_date AS transaction_date,
b.transaction_id AS transaction_id, b.description AS description, '' AS drawer_id, b.quantity AS quantity
from  ivm b , (select badgeid,email, employee_id from f_employee group by badgeid,email ,employee_id) a
where  b.user_id = a.badgeid
and b.transaction_date = '{now}';
"""

    graph.add_chain(
        bonobo_sqlalchemy.Select(STMT.format(now=now),
                                 engine=options['engine']),
        trim_employee_id,
        invalid_badge_id,
        invalid_email,
        format_payload,
        create_ticket,
        bonobo.UnpackItems(0),
    )

    return graph
Ejemplo n.º 16
0
def get_graph(**options):
    """
    This function builds the graph that needs to be executed.

    :return: bonobo.Graph

    """
    graph = bonobo.Graph()

    split_dbs = bonobo.noop

    graph.add_chain(bonobo.CsvReader(options['input_file'],
                                     delimiter='|',
                                     fields=('Admitted', 'blank1', 'Timestamp',
                                             'blank2', 'Name', 'card_id',
                                             'Location'),
                                     fs='brickftp'),
                    timestamp,
                    card_id,
                    map_fields,
                    bonobo.UnpackItems(0),
                    split_dbs,
                    _name="main")

    for engine in list(set(options['engine'])):
        graph.add_chain(bonobo_sqlalchemy.InsertOrUpdate(
            table_name=options['table_name'] + options['table_suffix'],
            discriminant=(
                'activitydate',
                'badgeid',
                'username',
                'location',
            ),
            engine=engine),
                        _input=split_dbs)

    return graph
Ejemplo n.º 17
0
def get_graph(**options):
    """
    This function builds the graph that needs to be executed.

    :return: bonobo.Graph

    """
    graph = bonobo.Graph()

    split = bonobo.noop

    graph.add_chain(
        bonobo.CsvWriter('DeckedBuilder.csv'),
        # bonobo.Limit(10),
        metadata,
        # bonobo.UnpackItems(0),
        split,
        _input=None,
        _name='main',
    )

    graph.add_chain(
        bonobo.CsvReader('main-en.csv'),
        bonobo.Format(Language='English'),
        _output='main',
    )

    graph.add_chain(
        bonobo.CsvReader('main-de.csv'),
        bonobo.Format(Language='German'),
        _output='main',
    )

    graph.add_chain(
        bonobo.CsvReader('main-ru.csv'),
        bonobo.Format(Language='Russian'),
        _output='main',
    )

    graph.add_chain(
        bonobo.CsvReader('main-it.csv'),
        bonobo.Format(Language='Italian'),
        _output='main',
    )

    graph.add_chain(
        bonobo.CsvReader('main-jp.csv'),
        bonobo.Format(Language='Japanese'),
        _output='main',
    )

    graph.add_chain(
        bonobo.CsvReader('main-fr.csv'),
        bonobo.Format(Language='French'),
        _output='main',
    )

    graph.add_chain(
        bonobo.CsvReader('main-kr.csv'),
        bonobo.Format(Language='Korean'),
        _output='main',
    )

    graph.add_chain(
        bonobo.CsvReader('main-cs.csv'),
        bonobo.Format(Language='Chinese'),
        _output='main',
    )

    graph.add_chain(
        bonobo.CsvReader('Deckbox-extras.csv'),
        bonobo.Format(Language='English'),
        _output='main',
    )

    if ECHO_MTG:
        # Reg Qty,Foil Qty,Name,Set,Acquired,Language
        echomtg = {'Acquired For': '0.004', 'Language': 'en'}
        graph.add_chain(
            # echomtg specific fiddling
            remove_metadata,
            bonobo.UnpackItems(0),
            # bonobo.PrettyPrinter(),
            bonobo.Rename(Name='Card'),
            bonobo.Format(**echomtg),
            bonobo.CsvWriter('EchoMTG.csv'),
            _input=split,
        )

    # MTG Studio

    if MTG_STUDIO:
        graph.add_chain(
            mtg_studio,
            remove_metadata,
            bonobo.UnpackItems(0),
            # bonobo.Format(Edition='{Set}'),
            bonobo.Rename(Edition='Set'),
            # bonobo.Rename(Name='Card'),
            # bonobo.Rename(Qty='Reg Qty'),
            # bonobo.Rename(Foil='Foil Qty'),
            # bonobo.PrettyPrinter(),
            bonobo.CsvWriter('MTG-Studio.csv'),
            _input=split,
        )

    #    graph.add_chain(
    #        tradeable,
    #        bonobo.UnpackItems(0),
    #        #bonobo.PrettyPrinter(),
    #        #bonobo.Limit(3000),
    #        bonobo.CsvWriter("DeckedBuilder-tradelist.csv"),
    #        bonobo.OrderFields([
    #            'Card',
    #            'Set',
    #            'Foil',
    #            'Quantity',
    #        ]),
    #        bonobo.CsvWriter("CardKingdom-buylist.csv"),
    #        bonobo.OrderFields([
    #            'Quantity',
    #            'Card',
    #            'Set',
    #        ]),
    #        bonobo.CsvWriter(
    #            "mtgprice-buylist.csv",
    #            delimiter="\t",
    #        ),
    #        _input=split,
    #    )
    #
    if DECKBOX:
        csv_out = bonobo.CsvWriter('Deckbox-inventory.csv')

        graph.add_chain(
            #       # metadata,
            #        #bonobo.UnpackItems(0),
            deckbox,
            bonobo.UnpackItems(0),
            csv_out,
            _input=split,
        )

        graph.add_chain(bonobo.CsvReader('Deckbox-specials.csv'),
                        _output=csv_out)
    return graph
Ejemplo n.º 18
0
def get_graph(**options):
    """
    This function builds the graph that needs to be executed.

    :return: bonobo.Graph

    """
    graph = bonobo.Graph()

    graph.add_chain(
        bonobo.CsvWriter('billing.csv'),
        bonobo.JsonWriter('billing.json'),
        invalid_entries,
        fix_numbers,
        parse_dates,
        #bonobo.PrettyPrinter(),
        filter_summary,
        #bonobo.PrettyPrinter(),
        lookup_account_sk,
        lookup_date_sk,
        summarize_costs,
        bonobo.UnpackItems(0),
        bonobo_sqlalchemy.InsertOrUpdate(
            table_name='fact_itsm_aws_historical_cost' +
            options['table_suffix'],
            discriminant=(
                'productname',
                'date_sk',
                'account_name_sk',
            ),
            engine='database'),
        _name="main",
        _input=None,
    )

    now = options['now']

    # Go to beginning of month
    now += relativedelta(day=1, hour=0, minute=0, second=0, microsecond=0)

    when = now
    for log in range(0, options['months']):
        when = when + relativedelta(months=-1)
        tstamp = when.strftime("%Y-%m")
        print("# %d Processing %s" % (log, tstamp))
        if options['limit']:
            _limit = (bonobo.Limit(options['limit']), )
        else:
            _limit = ()

        graph.add_chain(
            AwsBillingReader('%s-aws-cost-allocation-%s.csv' %
                             (options['aws_account_id'], tstamp),
                             fs='s3',
                             skip=1),
            *_limit,
            _output="main",
        )

    graph.add_chain(
        bonobo_sqlalchemy.InsertOrUpdate(
            table_name=options['table'] + options['table_suffix'],
            discriminant=('invoiceid', 'linkedaccountid', 'payeraccountid',
                          'recordid'),
            engine='database'),
        _input=parse_dates,
    )

    return graph