Esempio n. 1
0
def main(argv):

    global g

    parser = argparse.ArgumentParser()
    parser.add_argument("--contributions-filename", required=True, help="Input UTF8 CSV with contributions data "
        "dumped from Servant Keeper")
    parser.add_argument("--split-detail-files", required=False, nargs='*', default=argparse.SUPPRESS,
        help="List of CSV files which have records that can be used to replace top-level 'Split Transaction' "
        "records in the main contributions file.")
    parser.add_argument("--chart-of-accounts-filename", required=True, help="Input UTF8 CSV with Chart of Accounts "
        "data from Servant Keeper")
    parser.add_argument("--output-filename", required=True, help="Output CSV filename which will be loaded with "
        "contributions data in CCB import format ")
    parser.add_argument('--trace', action='store_true', help="If specified, prints tracing/progress messages to "
        "stdout")
    args = parser.parse_args()

    assert os.path.isfile(args.contributions_filename), "Error: cannot open file '" + args.contributions_filename + "'"

    dict_split_transaction_details = load_split_transaction_details(args.split_detail_files)

    table = petl.fromcsv(args.contributions_filename)
    table = petl.rename(table, {
        'Individual ID': 'SK Individual ID',
        'Amount': 'SK Amount'
        })

    trace('REMOVING SPLIT TRANSACTIONS...', args.trace, banner=True)

    table = replace_split_transactions(table, dict_split_transaction_details)

    table_coa = petl.fromcsv(args.chart_of_accounts_filename)
    table = petl.leftjoin(table, table_coa, lkey='Account', rkey='SK Account')

    table = petl.addfield(table, 'Individual ID', lambda rec: rec['SK Individual ID'])
    table = petl.addfield(table, 'Date of Contribution', lambda rec: rec['Batch Date'])
    table = petl.addfield(table, 'Amount', lambda rec: rec['SK Amount'])
    table = petl.addfield(table, 'Type of Gift', lambda rec: rec['Type'])
    table = petl.addfield(table, 'Check Number', lambda rec: rec['Check #'])
    table = petl.addfield(table, 'Fund', convert_fund)
    table = petl.addfield(table, 'Sub Fund', convert_sub_fund)
    table = petl.addfield(table, 'Campus', '')
    table = petl.addfield(table, 'Transaction Grouping', '')
    table = petl.addfield(table, 'Batch Number/Name', '')
    table = petl.addfield(table, 'Tax Deductible', lambda rec: rec['Tax'])
    table = petl.addfield(table, 'Memo', convert_notes)

    trace('CONVERTING AND THEN EMITTING TO CSV FILE...', args.trace, banner=True)

    table.progress(200).tocsv(args.output_filename)

    trace('OUTPUT TO CSV COMPLETE.', args.trace, banner=True)

    if len(g.set_unfound_accounts) > 0:
        trace('UNMATCHED SK ACCOUNTS!', args.trace, banner=True)
        for acct in g.set_unfound_accounts:
            trace(acct, args.trace)

    trace('DONE!', args.trace, banner=True)
def produce_delivery_count_table():
    log.addFilter(MultilineFilter())  # useful for tables
    log.info('Starting to generate the monthly german payroll table')

    # ------------------------------
    # Extract driver names from Odoo
    # ------------------------------

    log.info('Extracting driver names from Odoo')
    odoo = OdooConnector()

    filters = [('supplier', '=', True),
               ('active', '=', True),
               ('company_id', '=', 5)]  # 5 is germany
    df = odoo.extract('res.partner', filters)
    odoo_drivers = fromdataframe(df)

    mappings = {
        'driver_app_username': '******',
        'planday_salary_id_in_odoo': 'x_salary_id',
        'odoo_id': 'id',
        'fullname_in_odoo': 'display_name'
    }
    odoo_drivers = odoo_drivers.fieldmap(mappings)

    # cache the results
    odoo_drivers.toxlsx(odoo_cache_file)
    log.info('%s drivers found in Odoo', odoo_drivers.nrows())
    log.debug(odoo_drivers.look())

    # ------------------------------------------
    # Extract delivery counts from the warehouse
    # ------------------------------------------

    log.info('Extracting delivery counts from the DWH')
    dwh = WarehouseConnector()

    query = SQLReader('sql.german_drivers_delivery_counts').statements[0]
    log.debug(query)
    df = dwh.execute(query)
    driver_counts = fromdataframe(df)

    # cache the results
    driver_counts.toxlsx(dwh_cache_file)
    log.info('%s drivers found in the DWH', driver_counts.nrows())
    log.info('Deliveries per driver %s', driver_counts.stats('number_of_deliveries'))
    log.debug(driver_counts.look())

    # ----------------------------
    # Join the two tables together
    # ----------------------------

    payroll = leftjoin(driver_counts, odoo_drivers, key='driver_app_username')
    # Some usernames appear multiple times in Odoo
    payroll = payroll.distinct('driver_app_username')
    log.debug(payroll.look())

    payroll.toxlsx(output_file)
    log.info('Payroll table saved to %s', output_file)
    log.removeFilter(MultilineFilter())
Esempio n. 3
0
def join_mine_guids(connection, application_table):
    current_mines = etl.fromdb(
        connection,
        'select distinct on (minenumber) mine_guid, mine_no as minenumber from public.mine order by minenumber, create_timestamp;'
    )
    application_table_guid_lookup = etl.leftjoin(application_table,
                                                 current_mines,
                                                 key='minenumber')
    return application_table_guid_lookup
Esempio n. 4
0
def join(data, strategy, source_left, source_right, destination, key_left,
         key_right, prefix_left, prefix_right, presorted, buffersize, tempdir,
         cache, missing):
    """Perform a join on two data tables."""
    source_left = data.get(source_left)
    source_right = data.get(source_right)

    kwargs = {}
    if key_left == key_right:
        kwargs['key'] = key_left
    else:
        kwargs['lkey'] = key_left
        kwargs['rkey'] = key_right

    if presorted is True:
        kwargs['presorted'] = presorted

    if buffersize is not None:
        kwargs['buffersize'] = buffersize

    if tempdir:
        kwargs['tempdir'] = tempdir

    if 'anti' not in strategy:
        if prefix_left is not None:
            kwargs['lprefix'] = prefix_left
        if prefix_right is not None:
            kwargs['rprefix'] = prefix_right

    if strategy not in ['join', 'antijoin', 'hashjoin', 'hashantijoin']:
        kwargs['missing'] = missing

    if strategy == 'join':
        o = petl.join(source_left, source_right, **kwargs)
    elif strategy == 'leftjoin':
        o = petl.leftjoin(source_left, source_right, **kwargs)
    elif strategy == 'lookupjoin':
        o = petl.lookupjoin(source_left, source_right, **kwargs)
    elif strategy == 'rightjoin':
        o = petl.rightjoin(source_left, source_right, **kwargs)
    elif strategy == 'outerjoin':
        o = petl.outerjoin(source_left, source_right, **kwargs)
    elif strategy == 'antijoin':
        o = petl.antijoin(source_left, source_right, **kwargs)
    elif strategy == 'hashjoin':
        o = petl.antijoin(source_left, source_right, **kwargs)
    elif strategy == 'hashleftjoin':
        o = petl.hashleftjoin(source_left, source_right, **kwargs)
    elif strategy == 'hashlookupjoin':
        o = petl.hashlookupjoin(source_left, source_right, **kwargs)
    elif strategy == 'hashrightjoin':
        o = petl.hashrightjoin(source_left, source_right, **kwargs)

    data.set(destination, o)
Esempio n. 5
0
 def get_relationships(self):
     "Parses a list of `Relationship` objects."
     core_file = _find_loinc_table_core_file(self.uri.path)
     core = etl.fromcsv(core_file, delimiter=',')
     core = etl.cut(core, ['LOINC_NUM', 'LONG_COMMON_NAME'])
     hierarchy_file = _find_multi_axial_hierarchy_file(self.uri.path)
     hierarchy = etl.fromcsv(hierarchy_file, delimiter=',')
     hierarchy = etl.leftjoin(hierarchy, core, lkey='CODE', rkey='LOINC_NUM')
     hierarchy = etl.cut(hierarchy, ['IMMEDIATE_PARENT', 'CODE', 'CODE_TEXT', 'LONG_COMMON_NAME'])
     hierarchy = etl.fillright(hierarchy)
     hierarchy = etl.cut(hierarchy, ['IMMEDIATE_PARENT', 'CODE', 'LONG_COMMON_NAME'])
     hierarchy = etl.rename(hierarchy, 'LONG_COMMON_NAME', 'CODE_TEXT')
     parents = etl.cut(hierarchy, ['CODE', 'CODE_TEXT'])
     hierarchy = etl.selectne(hierarchy, 'IMMEDIATE_PARENT', '')
     hierarchy = etl.leftjoin(hierarchy, parents, lkey='IMMEDIATE_PARENT', rkey='CODE', lprefix='source.', rprefix='target.')
     hierarchy = etl.distinct(hierarchy)
     if self.versioned:
         version = _parse_version(hierarchy_file)
         hierarchy = etl.addfield(hierarchy, 'version', version)
     hierarchy = etl.rowmapmany(hierarchy, _to_json, ['relationship'])
     return hierarchy
Esempio n. 6
0
def join_execute(cl, cr, join, **kwargs):
    cl, cr = cl(), cr()
    if 'addLfields' in kwargs:
        cl = etl.addfields(cl, kwargs['addLfields'])
    if 'addRfields' in kwargs:
        cr = etl.addfields(cr, kwargs['addRfields'])
    args = cl, cr
    if join == Join.UNION:
        c = etl.crossjoin(*args)
    else:
        kwargs = filter_keys(kwargs,
                             ("key", "lkey", "rkey", "missing", "presorted",
                              "buffersize", "tempdir", "cache"))
        if join == Join.INNER:
            c = etl.join(*args, **kwargs)
        elif join == Join.LEFT:
            c = etl.leftjoin(*args, **kwargs)
        elif join == Join.RIGHT:
            c = etl.rightjoin(*args, **kwargs)
        elif join == Join.FULL:
            c = etl.outerjoin(*args, **kwargs)
    return c
Esempio n. 7
0
# leftjoin

table1 = [['id', 'colour'],
          [1, 'blue'],
          [2, 'red'],
          [3, 'purple']]
table2 = [['id', 'shape'],
          [1, 'circle'],
          [3, 'square'],
          [4, 'ellipse']]

from petl import leftjoin, look
look(table1)
look(table2)
table3 = leftjoin(table1, table2, key='id')
look(table3)


# rightjoin

table1 = [['id', 'colour'],
          [1, 'blue'],
          [2, 'red'],
          [3, 'purple']]
table2 = [['id', 'shape'],
          [1, 'circle'],
          [3, 'square'],
          [4, 'ellipse']]

from petl import rightjoin, look
Esempio n. 8
0
# leftjoin

table1 = [['id', 'colour'],
          [1, 'blue'],
          [2, 'red'],
          [3, 'purple']]
table2 = [['id', 'shape'],
          [1, 'circle'],
          [3, 'square'],
          [4, 'ellipse']]

from petl import leftjoin, look
look(table1)
look(table2)
table3 = leftjoin(table1, table2, key='id')
look(table3)


# rightjoin

table1 = [['id', 'colour'],
          [1, 'blue'],
          [2, 'red'],
          [3, 'purple']]
table2 = [['id', 'shape'],
          [1, 'circle'],
          [3, 'square'],
          [4, 'ellipse']]

from petl import rightjoin, look
Esempio n. 9
0
table6 = [['id', 'shape'], [1, 'circle'], [1, 'square'], [2, 'ellipse']]
table7 = etl.join(table5, table6, key='id')
table7
# compound keys are supported
table8 = [['id', 'time', 'height'], [1, 1, 12.3], [1, 2, 34.5], [2, 1, 56.7]]
table9 = [['id', 'time', 'weight'], [1, 2, 4.5], [2, 1, 6.7], [2, 2, 8.9]]
table10 = etl.join(table8, table9, key=['id', 'time'])
table10

# leftjoin()
############

import petl as etl
table1 = [['id', 'colour'], [1, 'blue'], [2, 'red'], [3, 'purple']]
table2 = [['id', 'shape'], [1, 'circle'], [3, 'square'], [4, 'ellipse']]
table3 = etl.leftjoin(table1, table2, key='id')
table3

# rightjoin()
#############

import petl as etl
table1 = [['id', 'colour'], [1, 'blue'], [2, 'red'], [3, 'purple']]
table2 = [['id', 'shape'], [1, 'circle'], [3, 'square'], [4, 'ellipse']]
table3 = etl.rightjoin(table1, table2, key='id')
table3

# outerjoin()
#############

import petl as etl
Esempio n. 10
0
# Load Master.csv from the Lahman database.
table = etl.fromcsv(sys.argv[1])

# Use US births only
table2 = etl.select(table, lambda rec: rec.birthCountry == 'USA')

# Only use these fields
table3 = etl.cut(table2, 'nameFirst', 'nameLast', 'debut', 'bbrefID', 'weight', 'height', 'finalGame', 'birthCity', 'birthState', 'birthYear')

# Remove null birth city and birth year
table4 = etl.select(table3, lambda rec: rec.birthCity != "" and rec.birthYear != "")

# Add Baseball Reference URL
table5 = etl.addfield(table4, 'baseball_ref_url', add_bbreflink)
# Remove unnecessary bbrefid
table6 = etl.cutout(table5, "bbrefID")

# Load city,state lat long table.
city = etl.fromcsv(sys.argv[2])
# Only use these fields
city2 = etl.cut(city, "city", "state", "lat", "long")

# Join tables by two keys
lat_table = etl.leftjoin(table6, city2, lkey=["birthCity", "birthState"], rkey=["city", "state"])

# Output merged file to csv
lat_table.tocsv(sys.argv[3])


Esempio n. 11
0
# Use US births only
table2 = etl.select(table, lambda rec: rec.birthCountry == 'USA')

# Only use these fields
table3 = etl.cut(table2, 'nameFirst', 'nameLast', 'debut', 'bbrefID', 'weight',
                 'height', 'finalGame', 'birthCity', 'birthState', 'birthYear')

# Remove null birth city and birth year
table4 = etl.select(table3,
                    lambda rec: rec.birthCity != "" and rec.birthYear != "")

# Add Baseball Reference URL
table5 = etl.addfield(table4, 'baseball_ref_url', add_bbreflink)
# Remove unnecessary bbrefid
table6 = etl.cutout(table5, "bbrefID")

# Load city,state lat long table.
city = etl.fromcsv(sys.argv[2])
# Only use these fields
city2 = etl.cut(city, "city", "state", "lat", "long")

# Join tables by two keys
lat_table = etl.leftjoin(table6,
                         city2,
                         lkey=["birthCity", "birthState"],
                         rkey=["city", "state"])

# Output merged file to csv
lat_table.tocsv(sys.argv[3])
Esempio n. 12
0
table10


# leftjoin()
############

import petl as etl
table1 = [['id', 'colour'],
          [1, 'blue'],
          [2, 'red'],
          [3, 'purple']]
table2 = [['id', 'shape'],
          [1, 'circle'],
          [3, 'square'],
          [4, 'ellipse']]
table3 = etl.leftjoin(table1, table2, key='id')
table3


# rightjoin()
#############

import petl as etl
table1 = [['id', 'colour'],
          [1, 'blue'],
          [2, 'red'],
          [3, 'purple']]
table2 = [['id', 'shape'],
          [1, 'circle'],
          [3, 'square'],
          [4, 'ellipse']]
Esempio n. 13
0
    table = clean_up(table, 'geo_ind')
    table = clean_up(table, 'cid')
    table = clean_up(table, 'occ_typ')
    print('TRIMMED HEADERS = ' + str(etl.header(table)))

table = etl.select(table, 'occ_dt', lambda x: x > datetime(2000, 1, 1))
print('ROWS POST YR 2000 = ' + str(etl.nrows(table)))

mine_table = etl.fromcsv('mines.csv', encoding='utf-8')

##handle leading 0's
mine_table = etl.convert(mine_table, 'mine_no', lambda x: str(int(x)))
table = etl.convert(table, 'mine_no', lambda x: str(int(x)))

#MAP mine_no to mine_guid
table = etl.leftjoin(table, mine_table, key='mine_no')
table = clean_up(table, 'mine_no')
#make sure this is 0
if etl.valuecount(table, 'mine_guid', None)[0] > 0:
    print('mine_guid, mine_no pair missing from mines.csv')
    exit(1)

######
print('CONVERT AND RENAME descript1 to recommendation')
table = etl.addfield(table, 'recommendation', lambda x: x['descript1'])
table = clean_up(table, 'descript1')

######
print('CONVERTING sta_cd to status_code')
table = etl.addfield(table, 'status_code', lambda x: x['sta_cd'])
table = etl.convert(table, 'status_code', 'replace', 'O', 'F')
Esempio n. 14
0
            header=['id', 'external_id'])
        aggregated_summary = etl.unpackdict(aggregated_summary,
                                            'creation_data')

        file_name = 'datasets-%s.csv' % datetime.now().strftime('%Y%m%d%H%M%S')
        directory = 'csv'
        if not os.path.exists(directory):
            os.makedirs(directory)
        # etl.tocsv(aggregated_summary, './%s/%s' % (directory, file_name))
        # logging.info('This %s has been exported' % file_name)

        rooms, participations = storing_data_preparation(aggregated_summary)

        participations = etl.leftjoin(participations,
                                      external_ids,
                                      lkey='participant_id',
                                      rkey='id',
                                      rprefix='r_')
        participations = etl.cutout(participations, 'participant_id')
        participations = etl.rename(participations, 'r_external_id',
                                    'participant_id')

        rooms = etl.leftjoin(rooms,
                             external_ids,
                             lkey='creator',
                             rkey='id',
                             rprefix='r_')
        rooms = etl.cutout(rooms, 'creator')
        rooms = etl.rename(rooms, 'r_external_id', 'creator')

        logging.info('Storing data %s to database' % file_name)