def table_execute(view, **kwargs): r = iter(view) #@ if 'row_number' in kwargs: r = etl.addrownumbers(r, field=kwargs['row_number']) if 'fieldmap' in kwargs: r = etl.fieldmap(r, kwargs['fieldmap']) return r
table7['maxbar'] = 'bar', max table7['sumbar'] = 'bar', sum table7['listbar'] = 'bar' # default aggregation function is list table7['bars'] = 'bar', strjoin(', ') look(table7) # addrownumbers table1 = (('foo', 'bar'), ('A', 9), ('C', 2), ('F', 1)) from petl import addrownumbers, look look(table1) table2 = addrownumbers(table1) look(table2) # nthword from petl import nthword s = 'foo bar' f = nthword(0) f(s) g = nthword(1) g(s) # search table1 = (('foo', 'bar', 'baz'), ('orange', 12, 'oranges are nice fruit'),
def append_tailings_reports_to_code_required_reports(connection, commit=False): src_table = etl.fromdb( connection, 'SELECT exp_doc.mine_guid, exp_doc.exp_document_guid, req_doc.req_document_name, exp_doc.due_date, exp_doc.exp_document_status_code, exp_doc.received_date, exp_doc.active_ind, exp_doc_x.mine_document_guid, exp_doc.create_user, exp_doc.create_timestamp, exp_doc.update_user, exp_doc.update_timestamp from mine_expected_document exp_doc \ inner join mine_expected_document_xref exp_doc_x on exp_doc.exp_document_guid = exp_doc_x.exp_document_guid\ inner join mds_required_document req_doc on req_doc.req_document_guid = exp_doc.req_document_guid' ) req_document_crr_defintion_map = [ ['req_document_name', 'mine_report_definition_id'], ['Summary of TSF and Dam Safety Recommendations', 28], ['ITRB Activities Report', 27], ['Register of Tailings Storage Facilities and Dams', 47], ['Dam Safety Inspection (DSI) Report', 26], ['Dam Safety Review (DSR) Report', 31], ['“As-built” Reports', 32], ['Annual Reclamation', 25], ['MERP Record of Testing', 3], #['Annual Manager\'s Report', __________________ ], no mapping or data, ignore. ['OMS Manual', 33], ['Annual reconciliation of water balance and water management plans', 44], ['TSF risk assessment', 46], ['Mine Emergency Preparedness and Response Plan (MERP)', 24], ['Performance of high risk dumps', 29] ] table1 = etl.join(src_table, req_document_crr_defintion_map, 'req_document_name') mine_report = etl.cutout(table1, 'req_document_name') #to be inserted into db mine_report = etl.addfield(mine_report, 'submission_year', 2019) mine_report = etl.rename(mine_report, 'exp_document_status_code', 'mine_report_submission_status_code') mine_report = etl.addfield(mine_report, 'deleted_ind', lambda x: not x.active_ind) mine_report = etl.cutout(mine_report, 'active_ind') #to determine what FK's will be so can insert into related tables max_report_id = etl.fromdb(connection, 'select last_value from public.mine_report_mine_report_id_seq')[1][0] max_report_submission_id = etl.fromdb( connection, 'select last_value from public.mine_report_submission_mine_report_submission_id_seq')[1][0] #if sequence hasn't been used yet, fix off by one if max_report_id == 1: max_report_id = 0 if max_report_submission_id == 1: max_report_submission_id = 0 #get one-to-many mine_report, mine_report_submission_documents = etl.unjoin(mine_report, 'mine_document_guid', key='exp_document_guid') #add PK's for mappings mine_report_with_ids = etl.addrownumbers(mine_report, start=max_report_id + 1, step=1, field='mine_report_id') mine_report_with_ids = etl.addrownumbers(mine_report_with_ids, start=max_report_submission_id + 1, step=1, field='mine_report_submission_id') print(f'max_report_id= {max_report_id}, max_report_submission_id={max_report_submission_id}') #copy out fields for submission tables mine_report_submissions = etl.cut(mine_report_with_ids, [ 'mine_report_id', 'exp_document_guid', 'mine_report_submission_status_code', 'create_user', 'create_timestamp', 'update_user', 'update_timestamp' ]) mine_report_submissions = etl.addfield(mine_report_submissions, 'submission_date', lambda x: x.create_timestamp) #remove fields not in mine_report mine_report = etl.cutout(mine_report, 'mine_report_submission_status_code') #replace exp_document_guid FK with mine_report_submission FK submission_id_lookup = etl.cut(mine_report_with_ids, ['mine_report_submission_id', 'exp_document_guid']) mine_report_submission_documents = etl.join(submission_id_lookup, mine_report_submission_documents, key='exp_document_guid') mine_report_submission_documents = etl.cutout(mine_report_submission_documents, 'exp_document_guid') #removed original PK mine_report = etl.cutout(mine_report, 'exp_document_guid') mine_report_submissions = etl.cutout(mine_report_submissions, 'exp_document_guid') print(etl.valuecounter(etl.distinct(table1, key='exp_document_guid'), 'req_document_name')) print(etl.valuecounter(mine_report, 'mine_report_definition_id')) print(table1) print(mine_report) print(mine_report_submissions) print(mine_report_submission_documents) etl.appenddb(mine_report, connection, 'mine_report', commit=False) print('INSERT mine_report staged') etl.appenddb(mine_report_submissions, connection, 'mine_report_submission', commit=False) print('INSERT mine_report_submission staged') etl.appenddb(mine_report_submission_documents, connection, 'mine_report_document_xref', commit=False) print('INSERT mine_report_document_xref staged') if commit: connection.commit() print('DATA CREATION COMPLETE') else: connection.rollback() print('NO DATA CREATED: add --commit=true to insert report rows')
def transfer_data(from_db_conn, to_db_conn): ''' Transfer data from databases given cursor to execute queries to connected databases Limitations: 1. poc.address_id is currently marked as -1 since it was not provided in test data and is a FK non-null constraint 2. institution2poc table is not available in old schema 3. role table is already populated in bill.sql file so that table is skipped by this script 4. poc_poc_id is currently set to be poc_id since no relevant information is available about the column 5. project2moc_project.role_id column is not available in old schema and is a not null field in new schema so we default it to 1 for now. 6. project2moc_project.username is not available from old schema so currently set to empty 7. raw_item_ts.item_id has duplicates when imported from item_ts. So we currently filter out and insert only uniques. :param from_db_conn: source database connection :param to_db_conn: destination database connection ''' # Emptying out tables with possible foreign key constraint issues fk_dep_tables = [ 'poc2project', 'poc2moc_project', 'poc', 'raw_item_ts', 'item', 'project', 'institution2moc_project' ] for table_name in fk_dep_tables: table = etl.fromdb(to_db_conn, "select * from {} where 1=0".format(table_name)) etl.todb(table, to_db_conn, table_name) # Tables with no change in schema insert_as_tables = [ 'institution', 'address', 'item_type', 'item2item', 'catalog_item' ] for table_name in insert_as_tables: table = etl.fromdb(from_db_conn, "select * from {}".format(table_name)) etl.todb(table, to_db_conn, table_name) # inserting dummy address for constraint matching dummy_address = [{'address_id': -1}] dummy_address_table = etl.fromdicts(dummy_address) etl.appenddb(dummy_address_table, to_db_conn, 'address') poc = etl.fromdb(from_db_conn, 'select * from poc') poc_transformed = etl.cutout(poc, 'domain_id', 'user_uid') poc_dummy_address = etl.replace(poc_transformed, 'address_id', None, -1) etl.todb(poc_dummy_address, to_db_conn, 'poc') project_names_table = etl.fromdb( from_db_conn, "select distinct project_name from project") moc_project_transformed = etl.addrownumbers(project_names_table) moc_project_transformed = etl.rename(moc_project_transformed, {'row': 'moc_project_id'}) etl.todb(moc_project_transformed, to_db_conn, 'moc_project') domain = etl.fromdb(from_db_conn, "select * from domain") domain_table_transformed = etl.cutout(domain, 'domain_uid') domain_table_transformed = etl.rename(domain_table_transformed, { 'domain_id': 'service_id', 'domain_name': 'service_name' }) etl.todb(domain_table_transformed, to_db_conn, 'service') project = etl.fromdb(from_db_conn, "select * from project") moc_project = etl.fromdb(to_db_conn, "select * from moc_project") project_moc_project_joined = etl.join(project, moc_project, key='project_name') project_table_transformed = etl.cutout(project_moc_project_joined, 'project_name') project_table_transformed = etl.rename(project_table_transformed, { 'domain_id': 'service_id', 'project_uid': 'project_uuid' }) etl.todb(project_table_transformed, to_db_conn, 'project') institution2project = etl.fromdb(from_db_conn, "Select * from institution2project") project = etl.fromdb(to_db_conn, "select project_id, moc_project_id from project") inst2project_project_joined = etl.join(institution2project, project, key='project_id') inst2moc_project = etl.cutout(inst2project_project_joined, 'domain_id') etl.todb(inst2moc_project, to_db_conn, 'institution2moc_project') project2poc = etl.fromdb(from_db_conn, "select * from project2poc") project2poc_project_joined = etl.join(project2poc, project, key='project_id') poc2moc_project = etl.cutout(project2poc_project_joined, 'project_id', 'domain_id') poc2moc_project = etl.addfield(poc2moc_project, 'role_id', 1) poc2moc_project = etl.addfield(poc2moc_project, 'poc_poc_id', lambda rec: rec['poc_id']) etl.todb(poc2moc_project, to_db_conn, 'poc2moc_project') poc2project = etl.cutout(project2poc, 'domain_id') poc2project = etl.addfield(poc2project, 'role_id', 1) poc2project = etl.addfield(poc2project, 'username', '') etl.todb(poc2project, to_db_conn, 'poc2project') item = etl.fromdb(from_db_conn, "select * from item") item_transformed = etl.cutout(item, 'domain_id') etl.todb(item_transformed, to_db_conn, 'item') raw_item_ts_unique = etl.fromdb( from_db_conn, "WITH summary AS ( SELECT its.item_id, its.start_ts, its.end_ts, its.state, its.catalog_item_id, ROW_NUMBER() OVER(PARTITION BY its.item_id) AS rk FROM ITEM_TS its) SELECT s.* FROM summary s WHERE s.rk = 1" ) raw_item_ts_unique = etl.cutout(raw_item_ts_unique, 'rk') etl.todb(raw_item_ts_unique, to_db_conn, 'raw_item_ts')
print('RENAMING mine_acc_no to mine_incident_no') table = etl.rename(table, 'mine_acc_no', 'proponent_incident_no') print('CREATING create_user = MMS_DO_IMPORT') table = etl.addfield(table, 'create_user', 'MMS_DO_IMPORT') table = etl.addfield(table, 'update_user', 'MMS_DO_IMPORT') #RENAME SOURCE COLUMNS WE WANT TO PRESERVE print("RENAME insp_cd to mms_insp_cd") table = etl.rename(table, 'insp_cd', 'mms_insp_cd') print("RENAME min_acc_no to mine_incident_no") table = etl.rename(table, 'min_acc_no', 'mine_incident_no') #force id column SQL will reset the sequence table = etl.addrownumbers(table, field='mine_incident_id') table = etl.sort(table, 'incident_timestamp', reverse=True) print('UNJOIN Recommendations into separate table') table, recommendation_table = etl.unjoin(table, 'recommendation', key='mine_incident_id') recommendation_table = etl.select(recommendation_table, 'recommendation', lambda x: x is not None and not x.isspace()) recommendation_table = etl.addfield(recommendation_table, 'create_user', 'MMS_DO_IMPORT') recommendation_table = etl.addfield(recommendation_table, 'update_user', 'MMS_DO_IMPORT') print("TRUNCATE public.mine_incident_recommendation") connection.cursor().execute(
table2 = [['foo', 'baz'], ['B', 3], ['D', 10]] table3 = etl.annex(table1, table2) table3 # addrownumbers() ################# import petl as etl table1 = [['foo', 'bar'], ['A', 9], ['C', 2], ['F', 1]] table2 = etl.addrownumbers(table1) table2 # addcolumn() ############# import petl as etl table1 = [['foo', 'bar'], ['A', 1], ['B', 2]] col = [True, False] table2 = etl.addcolumn(table1, 'baz', col) table2