def test_todb_appenddb_cursor(): f = NamedTemporaryFile(delete=False) conn = sqlite3.connect(f.name) conn.execute('create table foobar (foo, bar)') conn.commit() # exercise function table = (('foo', 'bar'), ('a', 1), ('b', 2), ('c', 2)) cursor = conn.cursor() todb(table, cursor, 'foobar') # check what it did actual = conn.execute('select * from foobar') expect = (('a', 1), ('b', 2), ('c', 2)) ieq(expect, actual) # try appending table2 = (('foo', 'bar'), ('d', 7), ('e', 9), ('f', 1)) appenddb(table2, cursor, 'foobar') # check what it did actual = conn.execute('select * from foobar') expect = (('a', 1), ('b', 2), ('c', 2), ('d', 7), ('e', 9), ('f', 1)) ieq(expect, actual)
def test_todb_appenddb_cursor(): f = NamedTemporaryFile(delete=False) conn = sqlite3.connect(f.name) conn.execute("create table foobar (foo, bar)") conn.commit() # exercise function table = (("foo", "bar"), ("a", 1), ("b", 2), ("c", 2)) cursor = conn.cursor() todb(table, cursor, "foobar") # check what it did actual = conn.execute("select * from foobar") expect = (("a", 1), ("b", 2), ("c", 2)) ieq(expect, actual) # try appending table2 = (("foo", "bar"), ("d", 7), ("e", 9), ("f", 1)) appenddb(table2, cursor, "foobar") # check what it did actual = conn.execute("select * from foobar") expect = (("a", 1), ("b", 2), ("c", 2), ("d", 7), ("e", 9), ("f", 1)) ieq(expect, actual)
def etl_from_table(self, table, force_reload=False): """Extract, translate, load exclusions (and not reinstatements) from a petl TABLE. Set FORCE_RELOAD to True to turn off the protections against reading the same csv file twice. There is no harm in redoing a csv file, since the csv contents replaces the db table entirely. We avoid reloading because it is faster and because it prevents the db from having an empty table for a moment between blowing away and refilling it. """ if not force_reload: # If UPDATED.csv has the same number of rows and the same most # recent date as our db, we've already snarfed this csv file and # can skip it. db_latest = self.conn.get_latest_exclusion_date().replace('-', '') db_num_rows = self.conn.count_exclusions() updated_latest = etl.cut(etl.sort(table, 'EXCLDATE'), 'EXCLDATE')[len(table) - 1][0] updated_num_rows = len(table) - 1 if (db_num_rows == updated_num_rows and db_latest == updated_latest): return # Massage data individual, business = clean_and_separate(table) # Save to db, BLOWING AWAY data in the existing tables. If # tables don't exist, will create them, but without any # constraints. info("Replacing individual_exclusion and business_exclusion tables.") etl.todb(individual, self.conn.conn, 'individual_exclusion') etl.todb(business, self.conn.conn, 'business_exclusion')
def writeToDB(self): ''' Retrive the hockey data from the CSV file, print snippet of the data to the console for visual feedback on progress, and write the data (with petl) to the hockey database. ''' data = fromcsv(self.csvPath) print(look(data, style='minimal')) todb(data, connection, self.tableName)
def _save_chunk(self, data, chunk_info): print(data) table = pd.DataFrame.from_records(data) print('chunk_i', chunk_info['chunk_i']) table['chunk_i'] = [ chunk_info['chunk_i'], ] * len(data) print(table.head()) print(table.columns) etl.todb(data, self.connection, str(self.key), create=True) return dict()
def _load_db(self, taiwan_date_str=None, is_delete=False): if not (taiwan_date_str is None): self.set_trade_date(taiwan_date_str) raw_file = self.source_file tse = etl.fromcsv(raw_file) connection = self._get_connection() #psycopg2.connect(_CONNECTION) if is_delete: self.clean_db(self._trade_date) # assuming table "quotes" already exists in the database, and tse need to have the header. # petl.io.db.todb(table, dbo, tablename, schema=None, commit=True, create=False, drop=False, constraints=True, # metadata=None, dialect=None, sample=1000)[source] etl.todb(tse, connection, 'quotes', drop=False) #, truncate=False)
def _test_unicode(dbo): expect = ((u'name', u'id'), (u'Արամ Խաչատրյան', 1), (u'Johann Strauß', 2), (u'Вагиф Сәмәдоғлу', 3), (u'章子怡', 4), ) actual = etl.fromdb(dbo, 'SELECT * FROM test_unicode') print('write some data and verify...') etl.todb(expect, dbo, 'test_unicode') ieq(expect, actual) print(etl.look(actual))
def load_pessoa(): connSicaf = get_sicaf_dw_connection() engine = get_data_lake_engine() print("Consultando DW SICAF") ## Pessoa tabela_sql = etl.fromdb(connSicaf, Pessoa.PESSOA) print("realizando carga na tabela _PESSOA") etl.todb(tabela_sql, engine, '_PESSOA') return 0
def extract_result(): try: db_connect = create_engine( 'postgres://*****:*****@ec2-34-204-22-76.compute-1.amazonaws.com:5432/dabbhqt4pegslv' ) table = etl.fromdb(db_connect, "select * from result") # # print(etl.look(table)) connection = sqlite3.connect('example.db') etl.todb(table, connection, 'agrobean_results') except: return 1 # extract_result()
def _test_with_schema(dbo, schema): expect = (('foo', 'bar'), ('a', 1), ('b', 2)) expect_appended = (('foo', 'bar'), ('a', 1), ('b', 2), ('a', 1), ('b', 2)) actual = etl.fromdb(dbo, 'SELECT * FROM test') print('write some data and verify...') etl.todb(expect, dbo, 'test', schema=schema) ieq(expect, actual) print(etl.look(actual)) print('append some data and verify...') etl.appenddb(expect, dbo, 'test', schema=schema) ieq(expect_appended, actual) print(etl.look(actual))
def load_raw_data(): """Function to load raw extracted data into the configured raw database.""" def extract_raw_csv(): """Nested function to load extracted raw processed csv data to table data frame""" table = etl.fromcsv(raw_csv_data, header=raw_datatable_headers, encoding='utf8') return table raw_dataframe = extract_raw_csv() conn = sqlite3.connect(raw_database_path) etl.todb(raw_dataframe, conn, raw_tablename)
def load_to_staging_warehouse(self): db_info = self.conf.get_staging_db_info() connection = pymysql.connect( host=db_info['host'], user=db_info['user'], password=db_info['passwd'], db=db_info['db'], ) connection.cursor().execute('SET SQL_MODE=ANSI_QUOTES') # TODO: ask Yang about peerlogic set up, why is there an error for duplicate # entries in actor IDs, can't different roles share primary keys for IDs for table in self.UPDATE_ORDER: data = self.TABLES[table]() if data: print(f'Loading {table}...\n{self.TABLES[table]()}') etl.todb(data, connection, table) connection.close()
def load(tb, pkey, hst, usr, pwd, dtb): db = pymysql.connect(host=hst, user=usr, password=pwd, db=dtb, charset='utf8mb4', cursorclass=pymysql.cursors.DictCursor) cur = db.cursor() if cur.execute("SHOW TABLES LIKE 'ETL'") == 0: cur.execute(crtTable(tb[0], dmap(tb[0], tb[1]), tname)) print "No. of data to be processed: " + str(len(tb)) cur.execute('SET SQL_MODE=ANSI_QUOTES') etl.todb(tb, db, tname) print "Done loading." db.commit() """s = int()
def exercise_unicode(dbo): print('=' * len(repr(dbo))) print('EXERCISE UNICODE') print(repr(dbo)) print('=' * len(repr(dbo))) print() expect = ((u'name', u'id'), (u'Արամ Խաչատրյան', 1), (u'Johann Strauß', 2), (u'Вагиф Сәмәдоғлу', 3), (u'章子怡', 4), ) actual = fromdb(dbo, 'SELECT * FROM test_unicode') print('write some data and verify...') todb(expect, dbo, 'test_unicode') ieq(expect, actual) print(look(actual))
def load_fornecedor_contrato_item(): connSiasg_DW = get_siasg_dw_connection() engine = get_data_lake_engine() print("Selecionando faixas de itens contrato") faixa = pd.read_sql_query(con=connSiasg_DW, sql=""" SELECT cast((D.ID_ITCT_ITEM_CONTRATO / 500000) as integer) as faixa FROM D_ITCT_ITEM_CONTRATO D group by cast((D.ID_ITCT_ITEM_CONTRATO / 500000) as integer) order by cast((D.ID_ITCT_ITEM_CONTRATO / 500000) as integer) """) for index, row in faixa.iterrows(): print("faixa", index + 1) print( f"Selecionando faixa de {int(index * 500000)} ate {int(index * 500000 + 499999 )}..." ) n = row['faixa'] sql = """ SELECT I.ID_ITCT_ITEM_CONTRATO, I.ID_CNTR_CONTRATO, I.ID_ITCP_ITEM_COMPRA, CH_ITCT_ITEM_CONTRATO_EDIT, DS_ITCP_ITEM_COMPRA as NO_ITCP_ITEM_CONTRATO, I.ID_ITCP_TP_COD_MAT_SERV, CASE WHEN TI.ID_ITCP_TP_MATERIAL_SERVICO = '1' THEN 'Material' WHEN TI.ID_ITCP_TP_MATERIAL_SERVICO = '2' THEN 'Serviço' else 'Não informado' end as NO_ITCP_TP_COD_MAT_SERV, I.QT_ITCT_ITENS_CONTRATO, I.QT_ITCT_CONTRATADA, I.VL_ITCT_CONTRATADO FROM D_CNTR_CONTRATO CO INNER JOIN F_ITEM_CONTRATO I ON CO.ID_CNTR_CONTRATO = I.ID_CNTR_CONTRATO AND I.ID_ITCT_ITEM_CONTRATO between {0} * 500000 and {0}* 500000 + 499999 inner join D_ITCT_ITEM_CONTRATO D ON D.ID_ITCT_ITEM_CONTRATO = I.ID_ITCT_ITEM_CONTRATO INNER JOIN D_ITCP_ITEM_COMPRA C ON C.ID_ITCP_ITEM_COMPRA = I.ID_ITCP_ITEM_COMPRA INNER JOIN D_ITCP_MATERIAL_SERVICO TI ON I.ID_ITCP_TP_COD_MAT_SERV = TI.ID_ITCP_TP_COD_MAT_SERV; """.format(n) tabela_sql = etl.fromdb(connSiasg_DW, sql) print("Inserindo faixa na base de dados...") etl.todb(tabela_sql, engine, '_FORNECEDOR_CONTRATO_ITEM') print("Carga realizada com sucesso") return 0
def execute(self, context): engine = create_engine( "postgresql+psycopg2://giovani.stefani@prd-dictas-postgresql-server.postgres.database.azure.com:B7hqx9mFi19J8BTE@prd-dictas-postgresql-server.postgres.database.azure.com:5432/dictas" ) try: nm_arq = 'VIDAS.csv' nm_tbl = 'vidas' with open(f'{_PROC_FILES}/{nm_arq}', 'wb') as data_from: data_from.write( self.client_from.get_blob_client( nm_arq).download_blob().readall()) table = etl.fromcsv(f'{_PROC_FILES}/{nm_arq}', delimiter='|') etl.todb(table, engine, f'{nm_tbl}') self.logger.info(f"tabela {nm_tbl} carregada") except azure.core.exceptions.ResourceNotFoundError: print('Entrou na exceção :)')
def execute(self): if self.destination is None: self.destination = get_default_destination() for source in self.sources: conn = source.get_connection() dest_conn = self.destination.get_connection() print source.test_connection() print self.destination.test_connection() if source.dbms == 'mysql': conn.cursor().execute('SET SQL_MODE=ANSI_QUOTES;') for table in source.tables: table_data = petl.fromdb(conn, 'SELECT * FROM %s' % table) petl.todb(table_data, dest_conn, table, commit=True) dest_conn.commit() dest_conn.close() return
def exercise_with_schema(dbo, db): print('=' * len(repr(dbo))) print('EXERCISE WITH EXPLICIT SCHEMA NAME') print(repr(dbo)) print('=' * len(repr(dbo))) print( ) expect = (('foo', 'bar'), ('a', 1), ('b', 1)) expect_appended = (('foo', 'bar'), ('a', 1), ('b', 1), ('a', 1), ('b', 1)) actual = fromdb(dbo, 'SELECT * FROM test') print('write some data and verify...') todb(expect, dbo, 'test', schema=db) ieq(expect, actual) print(look(actual)) print('append some data and verify...') appenddb(expect, dbo, 'test', schema=db) ieq(expect_appended, actual) print(look(actual))
def load(self): with open(os.path.join(dir, 'configuration.json'), 'r') as f: data = json.load(f) s_credentials = { "database": data['source']['database'], "host": data['source']['host'], "port": data['source']['port'], "user": data['source']['username'], "password": data['source']['password'] } t_credentials = { "database": data['target']['database'], "host": data['target']['host'], "port": data['target']['port'], "user": data['target']['username'], "password": data['target']['password'] } u = Utility() source_id = u.get_source(vendor=data['source']['vendor'])["id"] target_id = u.get_source(vendor=data['target']['vendor'])["id"] source = u.get_source_connection(vendor_id=source_id, credentials=s_credentials).connect() target = u.get_target_connection(vendor_id=target_id, credentials=t_credentials).connect() tables = u.get_tables('public') # table = etl.fromdb(source,'select * from vendor_modules') # etl.todb(table, target, 'vendor_modules') for doc in tables: query = "select * from " + doc['name'] table = etl.fromdb(source, query) if len(table) > 1: etl.todb(table, target, doc['name']) # print len(table) print "Loading of table " + doc['name'] + " completed"
def todb(table, dbo, tablename, schema=None, commit=True, create=False, drop=False, constraints=True, metadata=None, dialect=None, sample=1000): """ Drop-in replacement for :func:`petl.todb` which also supports automatic table creation. Parameters ---------- table : sequence of sequences (petl table) Table data to load dbo : database object DB-API 2.0 connection, callable returning a DB-API 2.0 cursor, or SQLAlchemy connection, engine or session tablename : string Name of the table schema : string Name of the database schema to create the table in commit : bool If True commit the changes create : bool If True attempt to create the table before loading, inferring types from a sample of the data drop : bool If True attempt to drop the table before recreating (only relevant if create=True) constraints : bool If True use length and nullable constraints (only relevant if create=True) metadata : sqlalchemy.MetaData Custom table metadata (only relevant if create=True) dialect : string One of {'access', 'sybase', 'sqlite', 'informix', 'firebird', 'mysql', 'oracle', 'maxdb', 'postgresql', 'mssql'} (only relevant if create=True) sample : int Number of rows to sample when inferring types etc. Set to 0 to use the whole table. """ if create: if drop: drop_table(dbo, tablename, schema=schema, commit=commit) create_table(table, dbo, tablename, schema=schema, commit=commit, constraints=constraints, metadata=metadata, dialect=dialect, sample=sample) petl.todb(table, dbo, tablename, schema=schema, commit=commit)
def load_fornecedor_sumario_compra(): connSiasg_DW = get_siasg_dw_connection() engine = get_data_lake_engine() # FORNECEDOR_SUMARIO_COMPRA #tabela_sql = etl.fromdb(connSiasg_DW, Siasg_DW.FORNECEDOR_SUMARIO_COMPRA) #etl.todb(tabela_sql, engine, '_FORNECEDOR_SUMARIO_COMPRA') print("Selecionando faixas de itens contrato") faixa = pd.read_sql_query(con=connSiasg_DW, sql=""" SELECT cast((D.ID_ITFN_ITEM_FORNECEDOR / 10000000) as integer) as faixa FROM F_ITEM_FORNECEDOR D group by cast((D.ID_ITFN_ITEM_FORNECEDOR / 10000000) as integer) order by cast((D.ID_ITFN_ITEM_FORNECEDOR / 10000000) as integer) """) for index, row in faixa.iterrows(): print("faixa", index + 1) print( f"Selecionando faixa de {int(index * 10000000)} ate {int(index * 10000000 + 9999999 )}..." ) n = row['faixa'] sql = """ SELECT F.ID_FRND_FORNECEDOR_COMPRA as ID_FORNECEDOR, count(Distinct F.ID_CMPR_COMPRA) as QTD_COMPRA, count(Distinct F.ID_ITCP_ITEM_COMPRA) as QTD_ITEM_COMPRA, count(Distinct case when F.VL_PRECO_TOTAL_HOMOLOG > 0 then F.ID_CMPR_COMPRA end) as QTD_COMPRA_HOMOLOG, count(Distinct case when F.VL_PRECO_TOTAL_HOMOLOG > 0 then F.ID_ITCP_ITEM_COMPRA end) as QTD_ITEM_COMPRA_HOMOLOG, sum(F.VL_PRECO_TOTAL_HOMOLOG) as VALOR_HOMOLOGADO from F_ITEM_FORNECEDOR F WHERE F.ID_ITFN_ITEM_FORNECEDOR between {0} * 10000000 and {0}* 10000000 + 9999999 group by F.ID_FRND_FORNECEDOR_COMPRA; """.format(n) tabela_sql = etl.fromdb(connSiasg_DW, sql) print("Inserindo faixa na base de dados...") etl.todb(tabela_sql, engine, '_FORNECEDOR_SUMARIO_COMPRA') print("Carga realizada com sucesso") return 0
def _test_dbo(write_dbo, read_dbo=None): if read_dbo is None: read_dbo = write_dbo expect_empty = (('foo', 'bar'),) expect = (('foo', 'bar'), ('a', 1), ('b', 2)) expect_appended = (('foo', 'bar'), ('a', 1), ('b', 2), ('a', 1), ('b', 2)) actual = etl.fromdb(read_dbo, 'SELECT * FROM test') debug('verify empty to start with...') debug(etl.look(actual)) ieq(expect_empty, actual) debug('write some data and verify...') etl.todb(expect, write_dbo, 'test') debug(etl.look(actual)) ieq(expect, actual) debug('append some data and verify...') etl.appenddb(expect, write_dbo, 'test') debug(etl.look(actual)) ieq(expect_appended, actual) debug('overwrite and verify...') etl.todb(expect, write_dbo, 'test') debug(etl.look(actual)) ieq(expect, actual) debug('cut, overwrite and verify') etl.todb(etl.cut(expect, 'bar', 'foo'), write_dbo, 'test') debug(etl.look(actual)) ieq(expect, actual) debug('cut, append and verify') etl.appenddb(etl.cut(expect, 'bar', 'foo'), write_dbo, 'test') debug(etl.look(actual)) ieq(expect_appended, actual) debug('try a single row') etl.todb(etl.head(expect, 1), write_dbo, 'test') debug(etl.look(actual)) ieq(etl.head(expect, 1), actual)
def write(cursor, table, table_name, mode='insert', module='MySQLdb'): """ load table to $table_name. :param cursor: database agent :type Cursor :param table: data container :type table ``petl.util.base.Table`` or double list like this: [['field_name', ...], ['value_object', ...], ...] :param table_name: table name :type table_name: ``str`` :param mode truncate and than insert if mode equal 'trunc'; insert data if mode equal 'insert'; insert and replace row where pk has exit if mode equal 'replace' :type mode ``str``={'trunc'|'insert'|'replace'} """ if 'MYSQL' in module.upper(): cursor.execute('SET SQL_MODE=ANSI_QUOTES') if mode == 'trunc': res = petl.todb(table, cursor, table_name) elif mode == 'insert': res = petl.appenddb(table, cursor, table_name) elif mode == 'replace': with _LOCK: petl.io.db.SQL_INSERT_QUERY = 'REPLACE INTO %s (%s) VALUES (%s)' res = petl.appenddb(table, cursor, table_name) petl.io.db.SQL_INSERT_QUERY = 'INSERT INTO %s (%s) VALUES (%s)' else: raise ValueError( "Argument mode must be {'trunc'|'insert'|'replace'}, not '%s'" % mode) return res
def load_stage_data(): """Function to load stage data into the configured stage database.""" def extract_staging_csv(dataframe): """Nested function to load processed stage csv data to table data frame""" table = etl.fromcsv(dataframe, encoding='utf8') return table conn = psycopg2.connect(database=stage_db_name, user=stage_db_user, password=stage_db_pass) etl.todb(extract_staging_csv(stage_tweet_csv), conn, stage_tweet_table) etl.todb(extract_staging_csv(stage_user_csv), conn, stage_user_table) etl.todb(extract_staging_csv(stage_date_csv), conn, stage_date_table)
def exercise_ss_cursor(setup_dbo, ss_dbo): print '=' * len(repr(ss_dbo)) print 'EXERCISE WITH SERVER-SIDE CURSOR' print repr(ss_dbo) print '=' * len(repr(ss_dbo)) print expect_empty = (('foo', 'bar'),) expect = (('foo', 'bar'), ('a', 1), ('b', 1)) expect_appended = (('foo', 'bar'), ('a', 1), ('b', 1), ('a', 1), ('b', 1)) actual = fromdb(ss_dbo, 'SELECT * FROM test') print 'verify empty to start with...' ieq(expect_empty, actual) print look(actual) print 'write some data and verify...' todb(expect, setup_dbo, 'test') ieq(expect, actual) print look(actual) print 'append some data and verify...' appenddb(expect, setup_dbo, 'test') ieq(expect_appended, actual) print look(actual) print 'overwrite and verify...' todb(expect, setup_dbo, 'test') ieq(expect, actual) print look(actual) print 'cut, overwrite and verify' todb(cut(expect, 'bar', 'foo'), setup_dbo, 'test') ieq(expect, actual) print look(actual) print 'cut, append and verify' appenddb(cut(expect, 'bar', 'foo'), setup_dbo, 'test') ieq(expect_appended, actual) print look(actual)
def exercise(dbo): print '=' * len(repr(dbo)) print repr(dbo) print '=' * len(repr(dbo)) print expect_empty = (('foo', 'bar'),) expect = (('foo', 'bar'), ('a', 1), ('b', 1)) expect_appended = (('foo', 'bar'), ('a', 1), ('b', 1), ('a', 1), ('b', 1)) actual = fromdb(dbo, 'SELECT * FROM test') print 'verify empty to start with...' ieq(expect_empty, actual) print look(actual) print 'write some data and verify...' todb(expect, dbo, 'test') ieq(expect, actual) print look(actual) print 'append some data and verify...' appenddb(expect, dbo, 'test') ieq(expect_appended, actual) print look(actual) print 'overwrite and verify...' todb(expect, dbo, 'test') ieq(expect, actual) print look(actual) print 'cut, overwrite and verify' todb(cut(expect, 'bar', 'foo'), dbo, 'test') ieq(expect, actual) print look(actual) print 'cut, append and verify' appenddb(cut(expect, 'bar', 'foo'), dbo, 'test') ieq(expect_appended, actual) print look(actual)
def writedata(conection, table): etl.todb(table,conection,'son',create=True)
import petl as etl, psycopg2 as pg, sys from sqlalchemy import * reload(sys) sys.setdefaultencoding('utf8') dbCnxns = { 'operations': 'dbname=operations user=etl host=127.0.0.1', 'python': 'dbname=python user=etl host=127.0.0.1', 'production': 'dbname=production user=etl host=127.0.0.1' } sourceConn = pg.connect(dbCnxns['operations']) targetConn = pg.connect(dbCnxns['python']) sourceCursor = sourceConn.cursor() targetCursor = targetConn.cursor() sourceCursor.execute( """select table_name from information_schema.columns where table_name in ('orders','returns') group by 1""") sourceTables = sourceCursor.fetchall() for t in sourceTables: targetCursor.execute("drop table if exists %s" % (t[0])) sourceDs = etl.fromdb(sourceConn, 'select * from %s' % (t[0])) etl.todb(sourceDs, targetConn, t[0], create=True, sample=10000)
t_confirmed = etl.rename(t_confirmed, {'index': 'Country'}) # Luego agregamos las columnas de fecha como datos y renombramos las nuevas columnas t_confirmed = etl.melt(t_confirmed, 'Country') t_confirmed = etl.rename(t_confirmed, {'variable': 'Date'}) t_confirmed = etl.rename(t_confirmed, {'value': 'Cases'}) # Luego agregamos el continente para agrupar t_confirmed = etl.addfield(t_confirmed, 'Continent', lambda rec: get_continent_code(rec['Country'])) # Y nuevamente nos aseguramos que sean del tipo de dato que deben ser. t_confirmed = etl.convert(t_confirmed, 'Cases', int) t_confirmed = etl.convert(t_confirmed, 'Date', lambda v: datetime.datetime.strptime(v, '%Y-%m-%d')) #Finalmente, subimos el archivo al repositorio de datos conn = pymysql.connect(password='******', database='covid', user='******') conn.cursor().execute('SET SQL_MODE=ANSI_QUOTES') etl.todb(t_confirmed, conn, 'confirmed', create=True, drop=True) conn.close() # Ejemplos de visualización para debugging #print(etl.header(t_confirmed)) #print(etl.records(t_confirmed)) #print(t_confirmed.lookall()) #etl.tocsv(t_confirmed, 'confirmados.csv') #df.to_csv(r'confirmados.csv', index=True, header=True) #cols = etl.columns(t_confirmed)
"""Test python etl library https://petl.readthedocs.io/en/stable/io.html""" import psycopg2 from petl import todb, look create_table_script = """ CREATE TABLE public.petl_test ( petl_id serial NOT NULL, description varchar NULL, external_id int4 NULL ); """ # source list of data including headers source = [['description', 'external_id'], ['teach', 2], ['learn', 3]] look(source) # debugging dest_db_table = 'petl_test' # this db table must already exist with psycopg2.connect( "host=localhost dbname=mydb user=guest password=12345") as connection: with connection.cursor() as db_cur: # truncate the table and insert the data todb(table=source, dbo=db_cur, tablename=dest_db_table, schema='public')
prompt('setup table') cursor = connection.cursor() # deal with quote compatibility cursor.execute('SET SQL_MODE=ANSI_QUOTES') cursor.execute('DROP TABLE IF EXISTS test') cursor.execute('CREATE TABLE test (foo TEXT, bar INT)') cursor.close() connection.commit() prompt('exercise the petl functions using a connection') from petl import look, fromdb, todb, appenddb t1 = fromdb(connection, 'SELECT * FROM test') print look(t1) t2 = (('foo', 'bar'), ('a', 1), ('b', 1)) t2app = (('foo', 'bar'), ('a', 1), ('b', 1), ('a', 1), ('b', 1)) todb(t2, connection, 'test') print look(t1) ieq(t2, t1) appenddb(t2, connection, 'test') print look(t1) ieq(t2app, t1) todb(t2, connection, 'test') print look(t1) ieq(t2, t1) prompt('exercise the petl functions using a cursor') cursor = connection.cursor() todb(t2, cursor, 'test') print look(t1) ieq(t2, t1) appenddb(t2, cursor, 'test')
cs = "dbname=%s user=%s password=%s host=%s port=%s" % (dn, du, dp, dh, dbp) connection = psycopg2.connect(cs) # a = np.random.rand(100000, 200).tolist() b = np.random.rand(100000, 10).tolist() etl.fromcolumns() #pandas -> table -> db import pandas as pd df = pd.DataFrame(columns=['id', 'features', 'model_id', 'regdate']) for i in range(1, 10000): df.loc[i] = [i, np.random.rand(10).tolist(), 'test1', datetime.now()] pddf = etl.fromdataframe(df) etl.todb(pddf, connection, 'defect_features', 'public') #select query mkcursor = lambda: connection.cursor(name='arbitrary') table = etl.fromdb(mkcursor, 'select * from public.defect_features') table #primitive -> db import numpy as np from datetime import datetime datatable = [['id', 'features', 'model_id', 'regdate']] for i in range(1, 10): datatable.append([i, np.random.rand(10).tolist(), 'test1', datetime.now()]) etl.todb(datatable, connection, 'defect_features', 'public')
#!/usr/bin/env python3 import petl as etl import pandas as pd import psycopg2 as pg NASA_SOURCE = "../kepler_accepted_results.csv" k_conn = pg.connect(user='******', password='******', host='localhost', database='kepler') # Not using petl to pull the data in from CSV because it's CSV methods # are a bit lacking. Pandas does a better job here. df = pd.read_csv(NASA_SOURCE, skiprows=31) # df = df.set_index('kepid') stbl = etl.fromdataframe(df, include_index=False) # Not doing any transformation at all yet etl.todb(stbl, k_conn, 'nasa_result', create=True, drop=False, sample=0) k_conn.close()
connection = psycopg2.connect('dbname=twelveBD user=postgres password=admin') table = etl.fromdb(connection, 'SELECT * FROM personas') table1 = [['idPersona', 'nombre', 'sexo', 'edad'], [1, 'Rafael Perez Aguirre', 'm', 24]] table2 = [['idPersona', 'nombre', 'sexo', 'edad'], [2, 'Eduardo Cantoran Flores', 'm', 25]] table3 = [['idPersona', 'nombre', 'sexo', 'edad'], [3, 'Adriana Lopez Montiel', 'm', 30]] table1 = [ ['idEmpresa', 'nombre', 'sucursal', 'direccion'], [ 'IDIT', 1, 'Blvrd del Niño Poblano 2901, Reserva Territorial Atlixcáyotl, Centro Comercial Puebla, 72810 San Andrés Cholula, Pue.' ] ] table1 = [['nombre', 'nofichas', 'secuencia', 'tiempo'], ['fácil', 30, 1, 45]] table1 = [['nombre', 'nofichas', 'secuencia', 'tiempo'], ['intermedio', 35, 1, 40]] table1 = [['nombre', 'nofichas', 'secuencia', 'tiempo'], ['Dificil', 45, 1, 40]] table1 = [['nombre', 'nofichas', 'secuencia', 'tiempo'], ['Veterano', 50, 1, 45]] table1 = [['nombre', 'nofichas', 'secuencia', 'tiempo'], ['Dios', 55, 1, 40]] etl.todb(table1, connection, 'personas') etl.appenddb(table1, connection, 'personas')