Example #1
0
def table_execute(view, **kwargs):
    r = iter(view)  #@
    if 'row_number' in kwargs:
        r = etl.addrownumbers(r, field=kwargs['row_number'])
    if 'fieldmap' in kwargs:
        r = etl.fieldmap(r, kwargs['fieldmap'])
    return r
Example #2
0
table7['maxbar'] = 'bar', max
table7['sumbar'] = 'bar', sum
table7['listbar'] = 'bar' # default aggregation function is list
table7['bars'] = 'bar', strjoin(', ')
look(table7)


# addrownumbers
table1 = (('foo', 'bar'),
          ('A', 9),
          ('C', 2),
          ('F', 1))

from petl import addrownumbers, look
look(table1)
table2 = addrownumbers(table1)
look(table2)


# nthword
from petl import nthword
s = 'foo bar'
f = nthword(0)
f(s)
g = nthword(1)
g(s)


# search
table1 = (('foo', 'bar', 'baz'),
          ('orange', 12, 'oranges are nice fruit'),
Example #3
0
table7['maxbar'] = 'bar', max
table7['sumbar'] = 'bar', sum
table7['listbar'] = 'bar' # default aggregation function is list
table7['bars'] = 'bar', strjoin(', ')
look(table7)


# addrownumbers
table1 = (('foo', 'bar'),
          ('A', 9),
          ('C', 2),
          ('F', 1))

from petl import addrownumbers, look
look(table1)
table2 = addrownumbers(table1)
look(table2)


# nthword
from petl import nthword
s = 'foo bar'
f = nthword(0)
f(s)
g = nthword(1)
g(s)


# search
table1 = (('foo', 'bar', 'baz'),
          ('orange', 12, 'oranges are nice fruit'),
def append_tailings_reports_to_code_required_reports(connection, commit=False):
    src_table = etl.fromdb(
        connection,
        'SELECT exp_doc.mine_guid, exp_doc.exp_document_guid, req_doc.req_document_name, exp_doc.due_date, exp_doc.exp_document_status_code, exp_doc.received_date, exp_doc.active_ind, exp_doc_x.mine_document_guid, exp_doc.create_user, exp_doc.create_timestamp, exp_doc.update_user, exp_doc.update_timestamp from mine_expected_document exp_doc \
        inner join mine_expected_document_xref exp_doc_x on exp_doc.exp_document_guid = exp_doc_x.exp_document_guid\
        inner join mds_required_document req_doc on req_doc.req_document_guid = exp_doc.req_document_guid'
    )

    req_document_crr_defintion_map = [
        ['req_document_name', 'mine_report_definition_id'],
        ['Summary of TSF and Dam Safety Recommendations', 28],
        ['ITRB Activities Report', 27],
        ['Register of Tailings Storage Facilities and Dams', 47],
        ['Dam Safety Inspection (DSI) Report', 26],
        ['Dam Safety Review (DSR) Report', 31],
        ['“As-built” Reports', 32],
        ['Annual Reclamation', 25],
        ['MERP Record of Testing', 3],
        #['Annual Manager\'s Report', __________________ ], no mapping or data, ignore.
        ['OMS Manual', 33],
        ['Annual reconciliation of water balance and water management plans', 44],
        ['TSF risk assessment', 46],
        ['Mine Emergency Preparedness and Response Plan (MERP)', 24],
        ['Performance of high risk dumps', 29]
    ]

    table1 = etl.join(src_table, req_document_crr_defintion_map, 'req_document_name')
    mine_report = etl.cutout(table1, 'req_document_name')

    #to be inserted into db
    mine_report = etl.addfield(mine_report, 'submission_year', 2019)
    mine_report = etl.rename(mine_report, 'exp_document_status_code',
                             'mine_report_submission_status_code')
    mine_report = etl.addfield(mine_report, 'deleted_ind', lambda x: not x.active_ind)
    mine_report = etl.cutout(mine_report, 'active_ind')
    #to determine what FK's will be so can insert into related tables
    max_report_id = etl.fromdb(connection,
                               'select last_value from public.mine_report_mine_report_id_seq')[1][0]
    max_report_submission_id = etl.fromdb(
        connection,
        'select last_value from public.mine_report_submission_mine_report_submission_id_seq')[1][0]

    #if sequence hasn't been used yet, fix off by one
    if max_report_id == 1:
        max_report_id = 0
    if max_report_submission_id == 1:
        max_report_submission_id = 0

    #get one-to-many
    mine_report, mine_report_submission_documents = etl.unjoin(mine_report,
                                                               'mine_document_guid',
                                                               key='exp_document_guid')

    #add PK's for mappings
    mine_report_with_ids = etl.addrownumbers(mine_report,
                                             start=max_report_id + 1,
                                             step=1,
                                             field='mine_report_id')
    mine_report_with_ids = etl.addrownumbers(mine_report_with_ids,
                                             start=max_report_submission_id + 1,
                                             step=1,
                                             field='mine_report_submission_id')
    print(f'max_report_id= {max_report_id}, max_report_submission_id={max_report_submission_id}')
    #copy out fields for submission tables
    mine_report_submissions = etl.cut(mine_report_with_ids, [
        'mine_report_id', 'exp_document_guid', 'mine_report_submission_status_code', 'create_user',
        'create_timestamp', 'update_user', 'update_timestamp'
    ])
    mine_report_submissions = etl.addfield(mine_report_submissions,
                                           'submission_date', lambda x: x.create_timestamp)
    #remove fields not in mine_report
    mine_report = etl.cutout(mine_report, 'mine_report_submission_status_code')

    #replace exp_document_guid FK with mine_report_submission FK
    submission_id_lookup = etl.cut(mine_report_with_ids,
                                   ['mine_report_submission_id', 'exp_document_guid'])
    mine_report_submission_documents = etl.join(submission_id_lookup,
                                                mine_report_submission_documents,
                                                key='exp_document_guid')
    mine_report_submission_documents = etl.cutout(mine_report_submission_documents,
                                                  'exp_document_guid')

    #removed original PK
    mine_report = etl.cutout(mine_report, 'exp_document_guid')
    mine_report_submissions = etl.cutout(mine_report_submissions, 'exp_document_guid')

    print(etl.valuecounter(etl.distinct(table1, key='exp_document_guid'), 'req_document_name'))
    print(etl.valuecounter(mine_report, 'mine_report_definition_id'))
    print(table1)
    print(mine_report)
    print(mine_report_submissions)
    print(mine_report_submission_documents)

 
    etl.appenddb(mine_report, connection, 'mine_report', commit=False)
    print('INSERT mine_report staged')
    etl.appenddb(mine_report_submissions, connection, 'mine_report_submission', commit=False)
    print('INSERT mine_report_submission staged')
    etl.appenddb(mine_report_submission_documents,
                    connection,
                    'mine_report_document_xref',
                    commit=False)
    print('INSERT mine_report_document_xref staged')
    if commit:  
        connection.commit()
        print('DATA CREATION COMPLETE')
    else:
        connection.rollback()
        print('NO DATA CREATED: add --commit=true to insert report rows')
Example #5
0
def transfer_data(from_db_conn, to_db_conn):
    '''
    Transfer data from databases given cursor to execute queries to connected databases
    Limitations:
    1. poc.address_id is currently marked as  -1 since it was not provided in test data and is a FK non-null constraint
    2. institution2poc table is not available in old schema
    3. role table is already populated in bill.sql file so that table is skipped by this script
    4. poc_poc_id is currently set to be poc_id since no relevant information is available about the column
    5. project2moc_project.role_id column is not available in old schema and is a not null field in new schema
        so we default it to 1 for now.
    6. project2moc_project.username is not available from old schema so currently set to empty
    7. raw_item_ts.item_id has duplicates when imported from item_ts. So we currently filter out and insert only uniques.

    :param from_db_conn: source database connection
    :param to_db_conn: destination database connection
    '''

    # Emptying out tables with possible foreign key constraint issues
    fk_dep_tables = [
        'poc2project', 'poc2moc_project', 'poc', 'raw_item_ts', 'item',
        'project', 'institution2moc_project'
    ]
    for table_name in fk_dep_tables:
        table = etl.fromdb(to_db_conn,
                           "select * from {} where 1=0".format(table_name))
        etl.todb(table, to_db_conn, table_name)

    # Tables with no change in schema
    insert_as_tables = [
        'institution', 'address', 'item_type', 'item2item', 'catalog_item'
    ]
    for table_name in insert_as_tables:
        table = etl.fromdb(from_db_conn, "select * from {}".format(table_name))
        etl.todb(table, to_db_conn, table_name)

    # inserting dummy address for constraint matching
    dummy_address = [{'address_id': -1}]
    dummy_address_table = etl.fromdicts(dummy_address)
    etl.appenddb(dummy_address_table, to_db_conn, 'address')

    poc = etl.fromdb(from_db_conn, 'select * from poc')
    poc_transformed = etl.cutout(poc, 'domain_id', 'user_uid')
    poc_dummy_address = etl.replace(poc_transformed, 'address_id', None, -1)
    etl.todb(poc_dummy_address, to_db_conn, 'poc')

    project_names_table = etl.fromdb(
        from_db_conn, "select distinct project_name from project")
    moc_project_transformed = etl.addrownumbers(project_names_table)
    moc_project_transformed = etl.rename(moc_project_transformed,
                                         {'row': 'moc_project_id'})
    etl.todb(moc_project_transformed, to_db_conn, 'moc_project')

    domain = etl.fromdb(from_db_conn, "select * from domain")
    domain_table_transformed = etl.cutout(domain, 'domain_uid')
    domain_table_transformed = etl.rename(domain_table_transformed, {
        'domain_id': 'service_id',
        'domain_name': 'service_name'
    })
    etl.todb(domain_table_transformed, to_db_conn, 'service')

    project = etl.fromdb(from_db_conn, "select * from project")
    moc_project = etl.fromdb(to_db_conn, "select * from moc_project")
    project_moc_project_joined = etl.join(project,
                                          moc_project,
                                          key='project_name')
    project_table_transformed = etl.cutout(project_moc_project_joined,
                                           'project_name')
    project_table_transformed = etl.rename(project_table_transformed, {
        'domain_id': 'service_id',
        'project_uid': 'project_uuid'
    })
    etl.todb(project_table_transformed, to_db_conn, 'project')

    institution2project = etl.fromdb(from_db_conn,
                                     "Select * from institution2project")
    project = etl.fromdb(to_db_conn,
                         "select project_id, moc_project_id from project")
    inst2project_project_joined = etl.join(institution2project,
                                           project,
                                           key='project_id')
    inst2moc_project = etl.cutout(inst2project_project_joined, 'domain_id')
    etl.todb(inst2moc_project, to_db_conn, 'institution2moc_project')

    project2poc = etl.fromdb(from_db_conn, "select * from project2poc")
    project2poc_project_joined = etl.join(project2poc,
                                          project,
                                          key='project_id')
    poc2moc_project = etl.cutout(project2poc_project_joined, 'project_id',
                                 'domain_id')
    poc2moc_project = etl.addfield(poc2moc_project, 'role_id', 1)
    poc2moc_project = etl.addfield(poc2moc_project, 'poc_poc_id',
                                   lambda rec: rec['poc_id'])
    etl.todb(poc2moc_project, to_db_conn, 'poc2moc_project')

    poc2project = etl.cutout(project2poc, 'domain_id')
    poc2project = etl.addfield(poc2project, 'role_id', 1)
    poc2project = etl.addfield(poc2project, 'username', '')
    etl.todb(poc2project, to_db_conn, 'poc2project')

    item = etl.fromdb(from_db_conn, "select * from item")
    item_transformed = etl.cutout(item, 'domain_id')
    etl.todb(item_transformed, to_db_conn, 'item')

    raw_item_ts_unique = etl.fromdb(
        from_db_conn,
        "WITH summary AS ( SELECT its.item_id, its.start_ts, its.end_ts, its.state, its.catalog_item_id, ROW_NUMBER() OVER(PARTITION BY its.item_id) AS rk FROM ITEM_TS its) SELECT s.* FROM summary s WHERE s.rk = 1"
    )
    raw_item_ts_unique = etl.cutout(raw_item_ts_unique, 'rk')
    etl.todb(raw_item_ts_unique, to_db_conn, 'raw_item_ts')
Example #6
0
print('RENAMING mine_acc_no to mine_incident_no')
table = etl.rename(table, 'mine_acc_no', 'proponent_incident_no')

print('CREATING create_user = MMS_DO_IMPORT')
table = etl.addfield(table, 'create_user', 'MMS_DO_IMPORT')
table = etl.addfield(table, 'update_user', 'MMS_DO_IMPORT')

#RENAME SOURCE COLUMNS WE WANT TO PRESERVE
print("RENAME insp_cd to mms_insp_cd")
table = etl.rename(table, 'insp_cd', 'mms_insp_cd')
print("RENAME min_acc_no to mine_incident_no")
table = etl.rename(table, 'min_acc_no', 'mine_incident_no')

#force id column SQL will reset the sequence
table = etl.addrownumbers(table, field='mine_incident_id')
table = etl.sort(table, 'incident_timestamp', reverse=True)

print('UNJOIN Recommendations into separate table')
table, recommendation_table = etl.unjoin(table,
                                         'recommendation',
                                         key='mine_incident_id')
recommendation_table = etl.select(recommendation_table, 'recommendation',
                                  lambda x: x is not None and not x.isspace())
recommendation_table = etl.addfield(recommendation_table, 'create_user',
                                    'MMS_DO_IMPORT')
recommendation_table = etl.addfield(recommendation_table, 'update_user',
                                    'MMS_DO_IMPORT')

print("TRUNCATE public.mine_incident_recommendation")
connection.cursor().execute(
Example #7
0
table2 = [['foo', 'baz'],
          ['B', 3],
          ['D', 10]]
table3 = etl.annex(table1, table2)
table3


# addrownumbers()
#################

import petl as etl
table1 = [['foo', 'bar'],
          ['A', 9],
          ['C', 2],
          ['F', 1]]
table2 = etl.addrownumbers(table1)
table2


# addcolumn()
#############

import petl as etl
table1 = [['foo', 'bar'],
          ['A', 1],
          ['B', 2]]
col = [True, False]
table2 = etl.addcolumn(table1, 'baz', col)
table2