def anadirListaTipos(self): global iTipos text, okPressed = QtWidgets.QInputDialog.getText( MainWindow, "Guardar preset", "Nombre del preset:", QtWidgets.QLineEdit.Normal, "") if okPressed and text != '': print(text) seconds = QtCore.QTime(0, 0, 0).secsTo(self.timeEdit.time()) table1 = [[ 'nombre', 'nofichas', 'tiempo', 'ficha1', 'ficha2', 'ficha3' ], [ text, self.spinBox_2.value(), seconds, self.comboBtnFicha1.currentIndex() + 1, self.comboBtnFicha2.currentIndex() + 1, self.comboBtnFicha3.currentIndex() + 1 ]] etl.appenddb(table1, connection, 'tipos') item = QtWidgets.QListWidgetItem() self.listWidgetPresets.addItem(item) item = self.listWidgetPresets.item(iTipos) item.setText(text)
def etl_from_dir(self, data_dir="data"): """Extract, translate, load reinstatements (and not exclusions) from directory DATA_DIR. """ # Get YYYYMM date of most recent reinstatement action most_recent = self.conn.get_latest_reinstatement_date().replace( '-', '')[:6] or "000000000" # Get the data from REIN CSV files. Gather reinstatement actions # since most_recent total_indiv = [] total_bus = [] for fname in sorted(glob.glob(os.path.join(data_dir, "*REIN.csv"))): if int(os.path.basename(fname)[:4]) <= int(most_recent[2:]): continue debug("Processing " + fname) reinstated = etl.fromcsv(fname) individual, business = clean_and_separate(reinstated) total_indiv.append(individual) total_bus.append(business) # Save to db, APPENDING TO existing data tables. Assumes tables # exist. if total_indiv: etl.appenddb(etl.cat(*total_indiv), self.conn.conn, 'individual_reinstatement') if total_bus: etl.appenddb(etl.cat(*total_bus), self.conn.conn, 'business_reinstatement') # It is possible to end up with duplicate rows if, say, an ETL # process is interrupted midway through. So we should find and # remove dupes. self.conn.dedupe_reinstatements()
def synctable(self, sourceDb, targetDb, sourceTable, targetTable): sourceCursor = sourceDb.cursor() targetCursor = targetDb.cursor() affected_total = 0 init_rowCount = targetTable.rowCount if targetTable.rowCount < sourceTable.rowCount else sourceTable.rowCount pbar = tqdm(total=sourceTable.rowCount, unit='records') pbar.update(init_rowCount) while sourceTable.lastUpdatetime > targetTable.lastUpdatetime: affected_rows = 0 batchSize = 100000 sql = "SELECT * FROM (SELECT * FROM {schema}.{tablename} WHERE {timestamp}>=to_timestamp('{last_updatetime}','yyyy-mm-dd hh24:mi:ss.ff6') ORDER BY {timestamp}) WHERE ROWNUM<={batch_size}".format( timestamp=sourceTable.timestampField, schema=sourceTable.schema, tablename=sourceTable.tablename, last_updatetime=targetTable.lastUpdatetime, batch_size=batchSize) sourceRecord = etl.fromdb(lambda: CursorProxy(sourceDb.cursor()), sql) targetRecord = etl.fromdb( lambda: CursorProxy(targetDb.cursor()), "SELECT * FROM {schema}.{tablename} WHERE 1=0".format( schema=targetTable.schema, tablename=targetTable.tablename)) sourceTable.columns = etl.header(sourceRecord) targetTable.columns = etl.header(targetRecord) for column in list( set(sourceTable.columns) - set(targetTable.columns)): sourceRecord = etl.cutout(sourceRecord, column) max_updatetime = sourceRecord.cut( sourceTable.timestampField).skip(1).max()[0] sourceRecord = sourceRecord.sort(sourceTable.timestampField) etl.appenddb(sourceRecord, CursorProxy(targetCursor), targetTable.tablename, schema=targetTable.schema, commit=True) affected_rows += targetCursor.rowcount targetTable.lastUpdatetime = max_updatetime.strftime( '%Y-%m-%d %H:%M:%S.%f') targetTable.rowCount += affected_rows pbar.update(affected_rows if init_rowCount + affected_total + affected_rows < sourceTable.rowCount else sourceTable.rowCount - init_rowCount - affected_total) affected_total += affected_rows pbar.set_description("%s |%d records updated." % (targetTable.tablename, affected_total)) if targetTable.lastUpdatetime > sourceTable.lastUpdatetime: pbar.set_description("%s |timestamp >, skip." % (targetTable.tablename)) elif targetTable.lastUpdatetime == sourceTable.lastUpdatetime and targetTable.rowCount == sourceTable.rowCount: pbar.set_description("%s |no data change." % (targetTable.tablename)) elif targetTable.lastUpdatetime == sourceTable.lastUpdatetime and targetTable.rowCount > sourceTable.rowCount: pbar.set_description("%s |RowCount > but timestamp ==, skip." % (targetTable.tablename)) elif targetTable.lastUpdatetime == sourceTable.lastUpdatetime and targetTable.rowCount < sourceTable.rowCount: pbar.set_description("%s |RowCount < but timestamp ==, skip." % (targetTable.tablename)) pbar.close()
def test_todb_appenddb_cursor(): f = NamedTemporaryFile(delete=False) conn = sqlite3.connect(f.name) conn.execute("create table foobar (foo, bar)") conn.commit() # exercise function table = (("foo", "bar"), ("a", 1), ("b", 2), ("c", 2)) cursor = conn.cursor() todb(table, cursor, "foobar") # check what it did actual = conn.execute("select * from foobar") expect = (("a", 1), ("b", 2), ("c", 2)) ieq(expect, actual) # try appending table2 = (("foo", "bar"), ("d", 7), ("e", 9), ("f", 1)) appenddb(table2, cursor, "foobar") # check what it did actual = conn.execute("select * from foobar") expect = (("a", 1), ("b", 2), ("c", 2), ("d", 7), ("e", 9), ("f", 1)) ieq(expect, actual)
def test_todb_appenddb_cursor(): f = NamedTemporaryFile(delete=False) conn = sqlite3.connect(f.name) conn.execute('create table foobar (foo, bar)') conn.commit() # exercise function table = (('foo', 'bar'), ('a', 1), ('b', 2), ('c', 2)) cursor = conn.cursor() todb(table, cursor, 'foobar') # check what it did actual = conn.execute('select * from foobar') expect = (('a', 1), ('b', 2), ('c', 2)) ieq(expect, actual) # try appending table2 = (('foo', 'bar'), ('d', 7), ('e', 9), ('f', 1)) appenddb(table2, cursor, 'foobar') # check what it did actual = conn.execute('select * from foobar') expect = (('a', 1), ('b', 2), ('c', 2), ('d', 7), ('e', 9), ('f', 1)) ieq(expect, actual)
def ETL_MMS_NOW_schema(connection, tables, schema, system_name): '''Import all the data from the specified schema and tables.''' for destination, source in tables.items(): try: current_table = etl.fromdb(connection, f'SELECT * from {schema}.{source}') print(f' {destination}:{etl.nrows(current_table)}') if (source == 'application'): # add originating source table_plus_os = etl.addfield(current_table, 'originating_system', system_name) table_plus_os_guid = join_mine_guids(connection, table_plus_os) etl.appenddb(table_plus_os_guid, connection, destination, schema='now_submissions', commit=False) else: etl.appenddb(current_table, connection, destination, schema='now_submissions', commit=False) except Exception as err: print(f'ETL Parsing error: {err}') raise
def validate(cursor, table, constraints, task_name): header = etl.header(table) problems = etl.validate(table, constraints=constraints, header=header) problems = etl.addfield(problems, 'task_name', task_name) problems = etl.addfield(problems, 'create_date', datetime.now()) # etl.todb(problems, cursor, 'etl_logs') etl.appenddb(problems, cursor, 'tetl_logs')
def ETL_MMS_NOW_schema(connection, tables, schema): for key, value in tables.items(): try: current_table = etl.fromdb(connection, f'SELECT * from {schema}.{value}') etl.appenddb(current_table, connection, key, schema='now_submissions', commit=False) except Exception as err: print(f'ETL Parsing error: {err}')
def load(data,data_set): import psycopg2 conn = psycopg2.connect( host = config.get('rpi','server'), database = config.get('rpi','database'), user = config.get('rpi','user'), password = config.get('rpi','passwd') ) conn.autocommit = True etl.appenddb(data,conn,data_set.lower()) conn.close() print('Table {0} loaded.'.format(data_set)) return
def vehicleP(): while True: feed.ParseFromString( urlopen( 'http://gtfs.openov.nl/gtfs-rt/vehiclePositions.pb').read()) data = [] timer1 = datetime.now() timer2 = datetime.now() - timedelta(minutes=1) for entity in feed.entity: vp = entity.vehicle timex = datetime.fromtimestamp(vp.timestamp) if timex < timer1 and timex > timer2: x = vp.position.longitude y = vp.position.latitude time = datetime.fromtimestamp(vp.timestamp) geo = shape.from_shape(Point(x, y), srid=4326) schedule_relationship = vp.trip.schedule_relationship, direction_id = vp.trip.direction_id, current_stop_sequence = vp.current_stop_sequence, current_status = vp.current_status, trip_id = vp.trip.trip_id, route_id = vp.trip.route_id, stop_id = vp.stop_id, # trip_start_time = datetime.strptime(vp.trip.start_time, '%H:%M:%S').time(), # trip_start_date = datetime.strptime(vp.trip.start_date, "%d%m%Y").date(), trip_start_time = vp.trip.start_time, trip_start_date = vp.trip.start_date, vehicle_label = vp.vehicle.label, data.append({ 'time': time, 'geo_loc': str(geo), 'schedule_relationship': vp.trip.schedule_relationship, 'direction_id': direction_id, 'current_stop_sequence': current_stop_sequence, 'current_status': current_status, 'trip_id': trip_id, 'route_id': route_id, 'stop_id': stop_id, 'trip_start_time': trip_start_time, 'trip_start_date': trip_start_date, 'vehicle_label': vehicle_label, }) table1 = petl.fromdicts(data) print(petl.nrows(table1)) petl.appenddb(table1, con, 'vehicle_positions') t.sleep(60)
def _test_with_schema(dbo, schema): expect = (('foo', 'bar'), ('a', 1), ('b', 2)) expect_appended = (('foo', 'bar'), ('a', 1), ('b', 2), ('a', 1), ('b', 2)) actual = etl.fromdb(dbo, 'SELECT * FROM test') print('write some data and verify...') etl.todb(expect, dbo, 'test', schema=schema) ieq(expect, actual) print(etl.look(actual)) print('append some data and verify...') etl.appenddb(expect, dbo, 'test', schema=schema) ieq(expect_appended, actual) print(etl.look(actual))
def load(data, data_set): import psycopg2 conn = psycopg2.connect( host=config.get("rpi", "server"), database=config.get("rpi", "database"), user=config.get("rpi", "user"), password=config.get("rpi", "passwd"), ) conn.autocommit = True # etl.appenddb(data,conn,data_set.lower()) etl.appenddb(data, conn, data_set.lower()) conn.close() print("Table {0} loaded.".format(data_set)) return
def _test_dbo(write_dbo, read_dbo=None): if read_dbo is None: read_dbo = write_dbo expect_empty = (('foo', 'bar'),) expect = (('foo', 'bar'), ('a', 1), ('b', 2)) expect_appended = (('foo', 'bar'), ('a', 1), ('b', 2), ('a', 1), ('b', 2)) actual = etl.fromdb(read_dbo, 'SELECT * FROM test') debug('verify empty to start with...') debug(etl.look(actual)) ieq(expect_empty, actual) debug('write some data and verify...') etl.todb(expect, write_dbo, 'test') debug(etl.look(actual)) ieq(expect, actual) debug('append some data and verify...') etl.appenddb(expect, write_dbo, 'test') debug(etl.look(actual)) ieq(expect_appended, actual) debug('overwrite and verify...') etl.todb(expect, write_dbo, 'test') debug(etl.look(actual)) ieq(expect, actual) debug('cut, overwrite and verify') etl.todb(etl.cut(expect, 'bar', 'foo'), write_dbo, 'test') debug(etl.look(actual)) ieq(expect, actual) debug('cut, append and verify') etl.appenddb(etl.cut(expect, 'bar', 'foo'), write_dbo, 'test') debug(etl.look(actual)) ieq(expect_appended, actual) debug('try a single row') etl.todb(etl.head(expect, 1), write_dbo, 'test') debug(etl.look(actual)) ieq(etl.head(expect, 1), actual)
def anadirListaEmpresas(self): global iEmpresa table1 = [['nombre', 'sucursal', 'direccion'], [ self.txtNombreEmpresa.toPlainText(), self.spinBoxEmpresa.value(), self.txtDireccionEmpresa.toPlainText() ]] etl.appenddb(table1, connection, 'empresas') item = QtWidgets.QListWidgetItem() self.listEmpresas.addItem(item) item = self.listEmpresas.item(iEmpresa) item.setText(self.txtNombreEmpresa.toPlainText())
def write(cursor, table, table_name, mode='insert', module='MySQLdb'): """ load table to $table_name. :param cursor: database agent :type Cursor :param table: data container :type table ``petl.util.base.Table`` or double list like this: [['field_name', ...], ['value_object', ...], ...] :param table_name: table name :type table_name: ``str`` :param mode truncate and than insert if mode equal 'trunc'; insert data if mode equal 'insert'; insert and replace row where pk has exit if mode equal 'replace' :type mode ``str``={'trunc'|'insert'|'replace'} """ if 'MYSQL' in module.upper(): cursor.execute('SET SQL_MODE=ANSI_QUOTES') if mode == 'trunc': res = petl.todb(table, cursor, table_name) elif mode == 'insert': res = petl.appenddb(table, cursor, table_name) elif mode == 'replace': with _LOCK: petl.io.db.SQL_INSERT_QUERY = 'REPLACE INTO %s (%s) VALUES (%s)' res = petl.appenddb(table, cursor, table_name) petl.io.db.SQL_INSERT_QUERY = 'INSERT INTO %s (%s) VALUES (%s)' else: raise ValueError( "Argument mode must be {'trunc'|'insert'|'replace'}, not '%s'" % mode) return res
def exercise_with_schema(dbo, db): print('=' * len(repr(dbo))) print('EXERCISE WITH EXPLICIT SCHEMA NAME') print(repr(dbo)) print('=' * len(repr(dbo))) print( ) expect = (('foo', 'bar'), ('a', 1), ('b', 1)) expect_appended = (('foo', 'bar'), ('a', 1), ('b', 1), ('a', 1), ('b', 1)) actual = fromdb(dbo, 'SELECT * FROM test') print('write some data and verify...') todb(expect, dbo, 'test', schema=db) ieq(expect, actual) print(look(actual)) print('append some data and verify...') appenddb(expect, dbo, 'test', schema=db) ieq(expect_appended, actual) print(look(actual))
def exercise_ss_cursor(setup_dbo, ss_dbo): print '=' * len(repr(ss_dbo)) print 'EXERCISE WITH SERVER-SIDE CURSOR' print repr(ss_dbo) print '=' * len(repr(ss_dbo)) print expect_empty = (('foo', 'bar'),) expect = (('foo', 'bar'), ('a', 1), ('b', 1)) expect_appended = (('foo', 'bar'), ('a', 1), ('b', 1), ('a', 1), ('b', 1)) actual = fromdb(ss_dbo, 'SELECT * FROM test') print 'verify empty to start with...' ieq(expect_empty, actual) print look(actual) print 'write some data and verify...' todb(expect, setup_dbo, 'test') ieq(expect, actual) print look(actual) print 'append some data and verify...' appenddb(expect, setup_dbo, 'test') ieq(expect_appended, actual) print look(actual) print 'overwrite and verify...' todb(expect, setup_dbo, 'test') ieq(expect, actual) print look(actual) print 'cut, overwrite and verify' todb(cut(expect, 'bar', 'foo'), setup_dbo, 'test') ieq(expect, actual) print look(actual) print 'cut, append and verify' appenddb(cut(expect, 'bar', 'foo'), setup_dbo, 'test') ieq(expect_appended, actual) print look(actual)
def load_fornecedor_comportamento(): connComprasnet = get_comprasnet_connection() connSiasg_DW = get_siasg_dw_connection() engine = get_data_lake_engine() print("Iniciando carga desclassificação ...") ## DESCLASSIFICACAO_FORNECEDORES -> FORNECEDOR_COMPORTAMENTO tabela_sql = etl.fromdb(connComprasnet, Comprasnet.DESCLASSIFICACAO_FORNECEDORES) etl.appenddb(tabela_sql, engine, '_FORNECEDOR_COMPORTAMENTO') print("Carga desclassificação executada com sucesso") print("Iniciando carga \"contrato continuado\"") ## CONTRATO_CONTINUADO -> FORNECEDOR_COMPORTAMENTO tabela_sql = etl.fromdb(connSiasg_DW, Siasg_DW.CONTRATO_CONTINUADO) etl.appenddb(tabela_sql, engine, '_FORNECEDOR_COMPORTAMENTO') print("Carga \"contrato continuado\" executada com sucesso") print("Iniciando carga recursos ...") ## RECURSOS -> FORNECEDOR_COMPORTAMENTO # Esta consulta terá que ser ajustada, quando for implentado as tabelas faltante no datalake # Hoje ela está no Quartzo (Postgres) e será migrada para o Datalake(SQL SERVER) tabela_sql = etl.fromdb(connComprasnet, Comprasnet.RECURSOS) etl.appenddb(tabela_sql, engine, '_FORNECEDOR_COMPORTAMENTO') print("Carga recursos executada com sucesso") return 0
def exercise(dbo): print '=' * len(repr(dbo)) print repr(dbo) print '=' * len(repr(dbo)) print expect_empty = (('foo', 'bar'),) expect = (('foo', 'bar'), ('a', 1), ('b', 1)) expect_appended = (('foo', 'bar'), ('a', 1), ('b', 1), ('a', 1), ('b', 1)) actual = fromdb(dbo, 'SELECT * FROM test') print 'verify empty to start with...' ieq(expect_empty, actual) print look(actual) print 'write some data and verify...' todb(expect, dbo, 'test') ieq(expect, actual) print look(actual) print 'append some data and verify...' appenddb(expect, dbo, 'test') ieq(expect_appended, actual) print look(actual) print 'overwrite and verify...' todb(expect, dbo, 'test') ieq(expect, actual) print look(actual) print 'cut, overwrite and verify' todb(cut(expect, 'bar', 'foo'), dbo, 'test') ieq(expect, actual) print look(actual) print 'cut, append and verify' appenddb(cut(expect, 'bar', 'foo'), dbo, 'test') ieq(expect_appended, actual) print look(actual)
def load_fornecedor_historico(): # Siasgnet connQSiasgnet = get_siasgnet_connection() engine = get_data_lake_engine() # CONTRATO_ATIVOS -> FORNECEDOR_HISTORICO print("Iniciando carga contratos ativos") execute_sql(Painel_Fornecedor.CONTRATO_ATIVOS, engine) print("Carga contratos ativos realizada com sucesso") # CONTRATO_VENCIDOS -> FORNECEDOR_HISTORICO print("Iniciando carga contratos vencidos") execute_sql(Painel_Fornecedor.CONTRATO_VENCIDOS, engine) print("Carga contratos vencidos realizada com sucesso") # LICITACOES -> FORNECEDOR_HISTORICO print("Iniciando carga licitações") execute_sql(Painel_Fornecedor.LICITACOES, engine) print("Carga licitações realizada com sucesso") ## ATA_VIGENTES -> FORNECEDOR_HISTORICO # Esta consulta terá que ser ajustada, quando for implentado as tabelas faltante no datalake # Hoje ela está no Quartzo (Postgres) e será migrada para o Datalake(SQL SERVER) print("Iniciando carga atas vigentes") tabela_sql = etl.fromdb(connQSiasgnet, Siasgnet.ATA_VIGENTES) etl.appenddb(tabela_sql, engine, '_FORNECEDOR_HISTORICO_CONTRATO') print("Carga atas vigentes realizada com sucesso") ## ATA_VENCIDAS -> FORNECEDOR_HISTORICO # Esta consulta terá que ser ajustada, quando for implentado as tabelas faltante no datalake # Hoje ela está no Quartzo (Postgres) e será migrada para o Datalake(SQL SERVER) print("Iniciando carga atas vencidas") tabela_sql = etl.fromdb(connQSiasgnet, Siasgnet.ATA_VENCIDAS) etl.appenddb(tabela_sql, engine, '_FORNECEDOR_HISTORICO_CONTRATO') print("Carga atas vigentes realizada com sucesso") return 0
import petl as etl server = 'localhost' database = 'BikeStores' username = '******' password = '******' connection = pyodbc.connect('DRIVER={ODBC Driver 17 for SQL Server};SERVER='+server+';DATABASE='+database+';UID='+username+';PWD='+ password) mkcursor = lambda: connection.cursor() table = etl.fromdb(mkcursor,'select * from production.brands') table cursor = cnxn.cursor() categories = [['yuchan','raum','hyejin']] table = etl.fromcolumns(categories) table = etl.rename(table,'f0','category_name') res = etl.appenddb(table,connection,'categories','production') res #df = pd.DataFrame(columns=['id','list','dict']) #for i in range(5): # id = i # a = np.array([k for k in range(i+10)]).tobytes() # b = json.dumps({'ab':[1,2,3],'cd':[4,5,6]}) # df.loc[i] = (i,a,b) #from sqlalchemy import create_engine #df ##https://stackoverflow.com/questions/15750711/connecting-to-sql-server-2012-using-sqlalchemy-and-pyodbc/36747352 ## create sqlalchemy engine
########## Json extraction and maping tableJ = etl.fromjson('cust_data.json', header=['id','gender','first_name','last_name', 'email','ville']) tableJ = etl.movefield(tableJ, 'gender', 4) ########## CSV extraction and conversion tableCSV = etl.fromcsv('week_cust.csv') tableCSV = etl.convert(tableCSV, 'id', int) ########### Sqlserver connection and extraction connectionSqlServer=pyodbc.connect("Driver={SQL Server Native Client 11.0};" "Server=81_64_msdn;" "Database=BD4client;" "Trusted_Connection=yes;" "convert_unicode =True;") cursor = connectionSqlServer.cursor() cursor.execute('SELECT id, first_name, last_name, email, gender, ville FROM client_DATA') tableSqlServer = cursor.fetchall() tableSqlServer =[('id','first_name','last_name', 'email','gender','ville')]+tableSqlServer cursor.close() connectionSqlServer.close() ######### Staging area transforming and concatenation StagingArea = etl.cat(tableCSV, tableJ,tableSqlServer) StagingArea = etl.convert(StagingArea, 'gender', {'Male': 'M', 'Female': 'F', 'male': 'M', 'female': 'F', None: 'N'}) StagingArea = etl.rename(StagingArea, 'ville', 'city') ######## mysql connection = mysql.connect(host="localhost", user="******", passwd="", db="customerdatabase") curseur = connection.cursor() curseur.execute('SET SQL_MODE=ANSI_QUOTES') #### load data, assuming table " CustomerData" already exists in the database etl.appenddb(StagingArea, connection, 'customerdata', schema='customerdatabase', commit='commit') curseur.close() connection.close()
stock) list_of_sentiments = ['bearishPercent', 'bullishPercent'] important_sentiments = {} for sentiments in list_of_sentiments: important_sentiments[sentiments] = sentiment['sentiment'][sentiments] important_sentiments['date_and_time'] = str(datetime.datetime.now())[:19] table_sentiment = etl.fromdicts([important_sentiments]) table_sentiment = etl.addfield(table_sentiment, 'symbol', stock) tables_without_symbol = [ table_price, table_insiders, table_financial_metrics, table_sentiment ] db = Database('postgres', 'postgres', '123456f') db_connection = db.con etl.appenddb(table_price, db_connection, 'price') etl.todb(table_recommendation, db_connection, 'recommendation') etl.todb(table_insiders, db_connection, 'insiders') etl.appenddb(table_financial_metrics, db_connection, 'financial_metrics') etl.appenddb(table_sentiment, db_connection, 'sentiment') # data for first request db.cursor.execute('select NAME from INSIDERS') data1 = db.cursor.fetchall() data1 = list(set(data1)) # data for second request db.cursor.execute('select * from INSIDERS') data2 = db.cursor.fetchall() data2_modify = {} for row in data2:
def append_tailings_reports_to_code_required_reports(connection, commit=False): src_table = etl.fromdb( connection, 'SELECT exp_doc.mine_guid, exp_doc.exp_document_guid, req_doc.req_document_name, exp_doc.due_date, exp_doc.exp_document_status_code, exp_doc.received_date, exp_doc.active_ind, exp_doc_x.mine_document_guid, exp_doc.create_user, exp_doc.create_timestamp, exp_doc.update_user, exp_doc.update_timestamp from mine_expected_document exp_doc \ inner join mine_expected_document_xref exp_doc_x on exp_doc.exp_document_guid = exp_doc_x.exp_document_guid\ inner join mds_required_document req_doc on req_doc.req_document_guid = exp_doc.req_document_guid' ) req_document_crr_defintion_map = [ ['req_document_name', 'mine_report_definition_id'], ['Summary of TSF and Dam Safety Recommendations', 28], ['ITRB Activities Report', 27], ['Register of Tailings Storage Facilities and Dams', 47], ['Dam Safety Inspection (DSI) Report', 26], ['Dam Safety Review (DSR) Report', 31], ['“As-built” Reports', 32], ['Annual Reclamation', 25], ['MERP Record of Testing', 3], #['Annual Manager\'s Report', __________________ ], no mapping or data, ignore. ['OMS Manual', 33], ['Annual reconciliation of water balance and water management plans', 44], ['TSF risk assessment', 46], ['Mine Emergency Preparedness and Response Plan (MERP)', 24], ['Performance of high risk dumps', 29] ] table1 = etl.join(src_table, req_document_crr_defintion_map, 'req_document_name') mine_report = etl.cutout(table1, 'req_document_name') #to be inserted into db mine_report = etl.addfield(mine_report, 'submission_year', 2019) mine_report = etl.rename(mine_report, 'exp_document_status_code', 'mine_report_submission_status_code') mine_report = etl.addfield(mine_report, 'deleted_ind', lambda x: not x.active_ind) mine_report = etl.cutout(mine_report, 'active_ind') #to determine what FK's will be so can insert into related tables max_report_id = etl.fromdb(connection, 'select last_value from public.mine_report_mine_report_id_seq')[1][0] max_report_submission_id = etl.fromdb( connection, 'select last_value from public.mine_report_submission_mine_report_submission_id_seq')[1][0] #if sequence hasn't been used yet, fix off by one if max_report_id == 1: max_report_id = 0 if max_report_submission_id == 1: max_report_submission_id = 0 #get one-to-many mine_report, mine_report_submission_documents = etl.unjoin(mine_report, 'mine_document_guid', key='exp_document_guid') #add PK's for mappings mine_report_with_ids = etl.addrownumbers(mine_report, start=max_report_id + 1, step=1, field='mine_report_id') mine_report_with_ids = etl.addrownumbers(mine_report_with_ids, start=max_report_submission_id + 1, step=1, field='mine_report_submission_id') print(f'max_report_id= {max_report_id}, max_report_submission_id={max_report_submission_id}') #copy out fields for submission tables mine_report_submissions = etl.cut(mine_report_with_ids, [ 'mine_report_id', 'exp_document_guid', 'mine_report_submission_status_code', 'create_user', 'create_timestamp', 'update_user', 'update_timestamp' ]) mine_report_submissions = etl.addfield(mine_report_submissions, 'submission_date', lambda x: x.create_timestamp) #remove fields not in mine_report mine_report = etl.cutout(mine_report, 'mine_report_submission_status_code') #replace exp_document_guid FK with mine_report_submission FK submission_id_lookup = etl.cut(mine_report_with_ids, ['mine_report_submission_id', 'exp_document_guid']) mine_report_submission_documents = etl.join(submission_id_lookup, mine_report_submission_documents, key='exp_document_guid') mine_report_submission_documents = etl.cutout(mine_report_submission_documents, 'exp_document_guid') #removed original PK mine_report = etl.cutout(mine_report, 'exp_document_guid') mine_report_submissions = etl.cutout(mine_report_submissions, 'exp_document_guid') print(etl.valuecounter(etl.distinct(table1, key='exp_document_guid'), 'req_document_name')) print(etl.valuecounter(mine_report, 'mine_report_definition_id')) print(table1) print(mine_report) print(mine_report_submissions) print(mine_report_submission_documents) etl.appenddb(mine_report, connection, 'mine_report', commit=False) print('INSERT mine_report staged') etl.appenddb(mine_report_submissions, connection, 'mine_report_submission', commit=False) print('INSERT mine_report_submission staged') etl.appenddb(mine_report_submission_documents, connection, 'mine_report_document_xref', commit=False) print('INSERT mine_report_document_xref staged') if commit: connection.commit() print('DATA CREATION COMPLETE') else: connection.rollback() print('NO DATA CREATED: add --commit=true to insert report rows')
cursor = connection.cursor() cursor.execute('DROP TABLE IF EXISTS test') cursor.execute('CREATE TABLE test (foo TEXT, bar INT)') connection.commit() cursor.close() print 'exercise the petl functions using a connection' from petl import look, fromdb, todb, appenddb t1 = fromdb(connection, 'SELECT * FROM test') print look(t1) t2 = (('foo', 'bar'), ('a', 1), ('b', 1)) t2app = (('foo', 'bar'), ('a', 1), ('b', 1), ('a', 1), ('b', 1)) todb(t2, connection, 'test') print look(t1) ieq(t2, t1) appenddb(t2, connection, 'test') print look(t1) ieq(t2app, t1) todb(t2, connection, 'test') print look(t1) ieq(t2, t1) print 'exercise the petl functions using a cursor' cursor = connection.cursor() todb(t2, cursor, 'test') print look(t1) ieq(t2, t1) appenddb(t2, cursor, 'test') print look(t1) ieq(t2app, t1) todb(t2, cursor, 'test')
cursor.execute('SET SQL_MODE=ANSI_QUOTES') cursor.execute('DROP TABLE IF EXISTS test') cursor.execute('CREATE TABLE test (foo TEXT, bar INT)') cursor.close() connection.commit() prompt('exercise the petl functions using a connection') from petl import look, fromdb, todb, appenddb t1 = fromdb(connection, 'SELECT * FROM test') print look(t1) t2 = (('foo', 'bar'), ('a', 1), ('b', 1)) t2app = (('foo', 'bar'), ('a', 1), ('b', 1), ('a', 1), ('b', 1)) todb(t2, connection, 'test') print look(t1) ieq(t2, t1) appenddb(t2, connection, 'test') print look(t1) ieq(t2app, t1) todb(t2, connection, 'test') print look(t1) ieq(t2, t1) prompt('exercise the petl functions using a cursor') cursor = connection.cursor() todb(t2, cursor, 'test') print look(t1) ieq(t2, t1) appenddb(t2, cursor, 'test') print look(t1) ieq(t2app, t1) todb(t2, cursor, 'test')
def transfer_data(from_db_conn, to_db_conn): ''' Transfer data from databases given cursor to execute queries to connected databases Limitations: 1. poc.address_id is currently marked as -1 since it was not provided in test data and is a FK non-null constraint 2. institution2poc table is not available in old schema 3. role table is already populated in bill.sql file so that table is skipped by this script 4. poc_poc_id is currently set to be poc_id since no relevant information is available about the column 5. project2moc_project.role_id column is not available in old schema and is a not null field in new schema so we default it to 1 for now. 6. project2moc_project.username is not available from old schema so currently set to empty 7. raw_item_ts.item_id has duplicates when imported from item_ts. So we currently filter out and insert only uniques. :param from_db_conn: source database connection :param to_db_conn: destination database connection ''' # Emptying out tables with possible foreign key constraint issues fk_dep_tables = [ 'poc2project', 'poc2moc_project', 'poc', 'raw_item_ts', 'item', 'project', 'institution2moc_project' ] for table_name in fk_dep_tables: table = etl.fromdb(to_db_conn, "select * from {} where 1=0".format(table_name)) etl.todb(table, to_db_conn, table_name) # Tables with no change in schema insert_as_tables = [ 'institution', 'address', 'item_type', 'item2item', 'catalog_item' ] for table_name in insert_as_tables: table = etl.fromdb(from_db_conn, "select * from {}".format(table_name)) etl.todb(table, to_db_conn, table_name) # inserting dummy address for constraint matching dummy_address = [{'address_id': -1}] dummy_address_table = etl.fromdicts(dummy_address) etl.appenddb(dummy_address_table, to_db_conn, 'address') poc = etl.fromdb(from_db_conn, 'select * from poc') poc_transformed = etl.cutout(poc, 'domain_id', 'user_uid') poc_dummy_address = etl.replace(poc_transformed, 'address_id', None, -1) etl.todb(poc_dummy_address, to_db_conn, 'poc') project_names_table = etl.fromdb( from_db_conn, "select distinct project_name from project") moc_project_transformed = etl.addrownumbers(project_names_table) moc_project_transformed = etl.rename(moc_project_transformed, {'row': 'moc_project_id'}) etl.todb(moc_project_transformed, to_db_conn, 'moc_project') domain = etl.fromdb(from_db_conn, "select * from domain") domain_table_transformed = etl.cutout(domain, 'domain_uid') domain_table_transformed = etl.rename(domain_table_transformed, { 'domain_id': 'service_id', 'domain_name': 'service_name' }) etl.todb(domain_table_transformed, to_db_conn, 'service') project = etl.fromdb(from_db_conn, "select * from project") moc_project = etl.fromdb(to_db_conn, "select * from moc_project") project_moc_project_joined = etl.join(project, moc_project, key='project_name') project_table_transformed = etl.cutout(project_moc_project_joined, 'project_name') project_table_transformed = etl.rename(project_table_transformed, { 'domain_id': 'service_id', 'project_uid': 'project_uuid' }) etl.todb(project_table_transformed, to_db_conn, 'project') institution2project = etl.fromdb(from_db_conn, "Select * from institution2project") project = etl.fromdb(to_db_conn, "select project_id, moc_project_id from project") inst2project_project_joined = etl.join(institution2project, project, key='project_id') inst2moc_project = etl.cutout(inst2project_project_joined, 'domain_id') etl.todb(inst2moc_project, to_db_conn, 'institution2moc_project') project2poc = etl.fromdb(from_db_conn, "select * from project2poc") project2poc_project_joined = etl.join(project2poc, project, key='project_id') poc2moc_project = etl.cutout(project2poc_project_joined, 'project_id', 'domain_id') poc2moc_project = etl.addfield(poc2moc_project, 'role_id', 1) poc2moc_project = etl.addfield(poc2moc_project, 'poc_poc_id', lambda rec: rec['poc_id']) etl.todb(poc2moc_project, to_db_conn, 'poc2moc_project') poc2project = etl.cutout(project2poc, 'domain_id') poc2project = etl.addfield(poc2project, 'role_id', 1) poc2project = etl.addfield(poc2project, 'username', '') etl.todb(poc2project, to_db_conn, 'poc2project') item = etl.fromdb(from_db_conn, "select * from item") item_transformed = etl.cutout(item, 'domain_id') etl.todb(item_transformed, to_db_conn, 'item') raw_item_ts_unique = etl.fromdb( from_db_conn, "WITH summary AS ( SELECT its.item_id, its.start_ts, its.end_ts, its.state, its.catalog_item_id, ROW_NUMBER() OVER(PARTITION BY its.item_id) AS rk FROM ITEM_TS its) SELECT s.* FROM summary s WHERE s.rk = 1" ) raw_item_ts_unique = etl.cutout(raw_item_ts_unique, 'rk') etl.todb(raw_item_ts_unique, to_db_conn, 'raw_item_ts')
connection = psycopg2.connect('dbname=twelveBD user=postgres password=admin') table = etl.fromdb(connection, 'SELECT * FROM personas') table1 = [['idPersona', 'nombre', 'sexo', 'edad'], [1, 'Rafael Perez Aguirre', 'm', 24]] table2 = [['idPersona', 'nombre', 'sexo', 'edad'], [2, 'Eduardo Cantoran Flores', 'm', 25]] table3 = [['idPersona', 'nombre', 'sexo', 'edad'], [3, 'Adriana Lopez Montiel', 'm', 30]] table1 = [ ['idEmpresa', 'nombre', 'sucursal', 'direccion'], [ 'IDIT', 1, 'Blvrd del Niño Poblano 2901, Reserva Territorial Atlixcáyotl, Centro Comercial Puebla, 72810 San Andrés Cholula, Pue.' ] ] table1 = [['nombre', 'nofichas', 'secuencia', 'tiempo'], ['fácil', 30, 1, 45]] table1 = [['nombre', 'nofichas', 'secuencia', 'tiempo'], ['intermedio', 35, 1, 40]] table1 = [['nombre', 'nofichas', 'secuencia', 'tiempo'], ['Dificil', 45, 1, 40]] table1 = [['nombre', 'nofichas', 'secuencia', 'tiempo'], ['Veterano', 50, 1, 45]] table1 = [['nombre', 'nofichas', 'secuencia', 'tiempo'], ['Dios', 55, 1, 40]] etl.todb(table1, connection, 'personas') etl.appenddb(table1, connection, 'personas')