def synctable(self, sourceDb, targetDb, sourceTable, targetTable): sourceCursor = sourceDb.cursor() targetCursor = targetDb.cursor() affected_total = 0 init_rowCount = targetTable.rowCount if targetTable.rowCount < sourceTable.rowCount else sourceTable.rowCount pbar = tqdm(total=sourceTable.rowCount, unit='records') pbar.update(init_rowCount) while sourceTable.lastUpdatetime > targetTable.lastUpdatetime: affected_rows = 0 batchSize = 100000 sql = "SELECT * FROM (SELECT * FROM {schema}.{tablename} WHERE {timestamp}>=to_timestamp('{last_updatetime}','yyyy-mm-dd hh24:mi:ss.ff6') ORDER BY {timestamp}) WHERE ROWNUM<={batch_size}".format( timestamp=sourceTable.timestampField, schema=sourceTable.schema, tablename=sourceTable.tablename, last_updatetime=targetTable.lastUpdatetime, batch_size=batchSize) sourceRecord = etl.fromdb(lambda: CursorProxy(sourceDb.cursor()), sql) targetRecord = etl.fromdb( lambda: CursorProxy(targetDb.cursor()), "SELECT * FROM {schema}.{tablename} WHERE 1=0".format( schema=targetTable.schema, tablename=targetTable.tablename)) sourceTable.columns = etl.header(sourceRecord) targetTable.columns = etl.header(targetRecord) for column in list( set(sourceTable.columns) - set(targetTable.columns)): sourceRecord = etl.cutout(sourceRecord, column) max_updatetime = sourceRecord.cut( sourceTable.timestampField).skip(1).max()[0] sourceRecord = sourceRecord.sort(sourceTable.timestampField) etl.appenddb(sourceRecord, CursorProxy(targetCursor), targetTable.tablename, schema=targetTable.schema, commit=True) affected_rows += targetCursor.rowcount targetTable.lastUpdatetime = max_updatetime.strftime( '%Y-%m-%d %H:%M:%S.%f') targetTable.rowCount += affected_rows pbar.update(affected_rows if init_rowCount + affected_total + affected_rows < sourceTable.rowCount else sourceTable.rowCount - init_rowCount - affected_total) affected_total += affected_rows pbar.set_description("%s |%d records updated." % (targetTable.tablename, affected_total)) if targetTable.lastUpdatetime > sourceTable.lastUpdatetime: pbar.set_description("%s |timestamp >, skip." % (targetTable.tablename)) elif targetTable.lastUpdatetime == sourceTable.lastUpdatetime and targetTable.rowCount == sourceTable.rowCount: pbar.set_description("%s |no data change." % (targetTable.tablename)) elif targetTable.lastUpdatetime == sourceTable.lastUpdatetime and targetTable.rowCount > sourceTable.rowCount: pbar.set_description("%s |RowCount > but timestamp ==, skip." % (targetTable.tablename)) elif targetTable.lastUpdatetime == sourceTable.lastUpdatetime and targetTable.rowCount < sourceTable.rowCount: pbar.set_description("%s |RowCount < but timestamp ==, skip." % (targetTable.tablename)) pbar.close()
def load_fornecedor_comportamento(): connComprasnet = get_comprasnet_connection() connSiasg_DW = get_siasg_dw_connection() engine = get_data_lake_engine() print("Iniciando carga desclassificação ...") ## DESCLASSIFICACAO_FORNECEDORES -> FORNECEDOR_COMPORTAMENTO tabela_sql = etl.fromdb(connComprasnet, Comprasnet.DESCLASSIFICACAO_FORNECEDORES) etl.appenddb(tabela_sql, engine, '_FORNECEDOR_COMPORTAMENTO') print("Carga desclassificação executada com sucesso") print("Iniciando carga \"contrato continuado\"") ## CONTRATO_CONTINUADO -> FORNECEDOR_COMPORTAMENTO tabela_sql = etl.fromdb(connSiasg_DW, Siasg_DW.CONTRATO_CONTINUADO) etl.appenddb(tabela_sql, engine, '_FORNECEDOR_COMPORTAMENTO') print("Carga \"contrato continuado\" executada com sucesso") print("Iniciando carga recursos ...") ## RECURSOS -> FORNECEDOR_COMPORTAMENTO # Esta consulta terá que ser ajustada, quando for implentado as tabelas faltante no datalake # Hoje ela está no Quartzo (Postgres) e será migrada para o Datalake(SQL SERVER) tabela_sql = etl.fromdb(connComprasnet, Comprasnet.RECURSOS) etl.appenddb(tabela_sql, engine, '_FORNECEDOR_COMPORTAMENTO') print("Carga recursos executada com sucesso") return 0
def insert_data(df, int): connection = psycopg2.connect(dbname='voyager', user='******', password='******', host='172.16.0.45') engine = create_engine('postgresql://*****:*****@172.16.0.45:5432/voyager') latest_temp = 'SELECT id FROM temperature ORDER BY id DESC LIMIT 1' latest_hum = 'SELECT id FROM humidity ORDER BY id DESC LIMIT 1' temp = etl.fromdb(connection, latest_temp) hum = etl.fromdb(connection, latest_hum) temp = temp['id'][0] + 1 hum = hum['id'][0] + 1 df_loc = df[['time', 'latitude', 'longitude']] if int == 1: df_temp = df[['temperature']] df_temp.index = np.arange(temp, len(df_temp) + temp) df_temp['id'] = df_temp.index df_temp.rename(columns={'temperature': 'value'}, inplace=True) df_loc['temperature_id'] = df_temp.index df_temp.to_sql('temperature', engine, if_exists='append', index=False, method='multi') print(df_temp) df_hum = df[['humidity']] df_hum.index = np.arange(hum, len(df_hum) + hum) df_hum['id'] = df_hum.index df_hum.rename(columns={'humidity': 'value'}, inplace=True) df_loc['humidity_id'] = df_hum.index print(df_hum) df_hum.to_sql('humidity', engine, if_exists='append', index=False, method='multi') if int == 2: df_temp = df[['temperature']] df_temp.index = np.arange(temp, len(df_temp) + temp) df_temp['id'] = df_temp.index df_temp.rename(columns={'temperature': 'value'}, inplace=True) df_loc['temperature_id'] = df_temp.index print(df_temp) df_temp.to_sql('temperature', engine, if_exists='append', index=False, method='multi') if int == 3: df_hum = df[['humidity']] df_hum.index = np.arange(hum, len(df_hum) + hum) df_hum['id'] = df_hum.index df_hum.rename(columns={'humidity': 'value'}, inplace=True) df_loc['humidity_id'] = df_hum.index print(df_hum) df_hum.to_sql('humidity', engine, if_exists='append', index=False, method='multi') df_loc['research_id'] = research df_loc['voyager_id'] = voyager print(df_loc) df_loc.to_sql('location', engine, if_exists='append', index=False, method='multi')
def etl_(query): source_db = get_source_db(query) extract_query = get_extract_query(query) with source_db() as source: etl.fromdb(source, extract_query) \ .topickle(f'temp/{query.target_table}.p') with GISLNIDB.GISLNIDB() as target: etl.frompickle(f'temp/{query.target_table}.p') \ .todb(get_cursor(target), query.target_table.upper())
def dimension_values(): connection = psycopg2.connect(dbname='voyager', user='******', password='******', host='172.16.0.45') engine = create_engine('postgresql://*****:*****@172.16.0.45:5432/voyager') knmi_dim_time = "select distinct((to_date(CAST(date AS text), 'YYYYMMDD'))::timestamp + interval '1h' * hour) as time from knmi_station_data" lmn_dim_time = "SELECT Distinct(left(timestamp, -6)) as time from luchtmeetnet_data" knmi_table = etl.fromdb(connection, knmi_dim_time) lmn_table = etl.fromdb(connection, lmn_dim_time) df_knmi = pd.DataFrame(knmi_table) df_lmn = pd.DataFrame(lmn_table) df_knmi.columns = df_knmi.iloc[0] df_knmi = df_knmi.drop(0) df_lmn.columns = df_lmn.iloc[0] df_lmn = df_lmn.drop(0) df_total = df_knmi.append(df_lmn) df_total = df_total.drop_duplicates() df_total = df_total.reset_index() df_total = df_total.drop('index', axis=1) df_total = df_total.reset_index() df_total = df_total.rename(columns={'index': 'id'}) df_total['time'] = df_total['time'].astype(str) df_total[['Year', 'Month', 'Day']] = df_total.time.str.split( "-", expand=True, ) df_total[['Day', 'Hour']] = df_total.Day.str.split( " ", expand=True, ) df_total[['Hour', 'Minute', 'Second']] = df_total.Hour.str.split( ":", expand=True, ) df_total.to_sql('dim_time', engine, if_exists='append', index=False, method='multi') print(df_total)
def etl_(query, logger): source_db = get_source_db(query) extract_query = get_extract_query(query) logger.info(f'{query.target_table} - extracting data into pickle file...') with source_db() as source: etl.fromdb(source, extract_query).topickle(f'temp/{query.target_table}.p') logger.info(f'{query.target_table} - loading data from pickle file...') with PERMITP.PERMITP() as target: etl.frompickle(f'temp/{query.target_table}.p').todb( get_cursor(target), query.target_table.upper())
def dimension_values(): connection = psycopg2.connect(dbname='voyager', user='******', password='******', host='172.16.0.45') engine = create_engine('postgresql://*****:*****@172.16.0.45:5432/voyager') knmi_dim_com = "SELECT column_name as name FROM information_schema.columns " \ "WHERE table_schema = 'public' " \ "AND table_name = 'knmi_station_data'" lmn_dim_com = "SELECT name FROM luchtmeetnet_sensors" knmi_table = etl.fromdb(connection, knmi_dim_com) lmn_table = etl.fromdb(connection, lmn_dim_com) df_knmi = pd.DataFrame(knmi_table) df_lmn = pd.DataFrame(lmn_table) df_knmi.columns = df_knmi.iloc[0] df_knmi = df_knmi.drop(0) df_lmn.columns = df_lmn.iloc[0] df_lmn = df_lmn.drop(0) wid = df_knmi.loc[df_knmi['name'] == 'weather_station_id'].index df_knmi = df_knmi.drop(wid) date = df_knmi.loc[df_knmi['name'] == 'date'].index df_knmi = df_knmi.drop(date) hour = df_knmi.loc[df_knmi['name'] == 'hour'].index df_knmi = df_knmi.drop(hour) index = df_knmi.loc[df_knmi['name'] == 'index'].index df_knmi = df_knmi.drop(index) df_total = df_knmi.append(df_lmn) df_total = df_total.reset_index() df_total = df_total.drop('index', axis=1) df_total = df_total.reset_index() df_total = df_total.rename(columns={'index': 'id'}) print(df_total) df_total.to_sql('dim_com', engine, if_exists='append', index=False, method='multi')
def llenarListaEmpresas(self): global iEmpresa self.listEmpresas = QtWidgets.QListWidget( self.scrollAreaWidgetContents_2) self.listEmpresas.setObjectName("listEmpresas") empresas = etl.fromdb(connection, 'SELECT * FROM empresas') for empresa in etl.data(empresas): item = QtWidgets.QListWidgetItem() self.listEmpresas.addItem(item) self.horizontalLayout_3.addWidget(self.listEmpresas) __sortingEnabled = self.listEmpresas.isSortingEnabled() self.listEmpresas.setSortingEnabled(False) iEmpresa = 0 for empresa in etl.data(empresas): item = self.listEmpresas.item(iEmpresa) item.setText(empresa[1]) iEmpresa += 1 self.listEmpresas.setSortingEnabled(__sortingEnabled)
def run(self, driver, task, log): input_driver = driver.get_driver(task["source"]["connection"]) sql = self._parse_sql(task["source"]) db = input_driver.get_db() record_set = etl.fromdb(db, sql) if not etl.data(record_set).any(): log.write("Task skipped. No rows on source") else: transform = TransformSubTask(task, log) record_set = transform.get_result(record_set) fld = task["target"].get("folder", "output") fld = compat.translate_unicode(fld) target = task["target"]["file"] target = compat.translate_unicode(target) out = "{}/{}".format(fld, target) separator = task["target"].get("delimiter", ";") separator = compat.translate_unicode(separator) enc = task["target"].get("encoding", "utf-8") task_log = "log/db-csv_{}_{}.log".format(task["name"], get_time_filename()) with open(task_log, "w") as lg: if "truncate" in task["target"] and task["target"]["truncate"]: record_set.progress(10000, out=lg).tocsv(out, encoding=enc, delimiter=separator) else: record_set.progress(10000, out=lg).appendcsv(out, encoding=enc, delimiter=separator) db.close()
def ETL_MMS_NOW_schema(connection, tables, schema, system_name): '''Import all the data from the specified schema and tables.''' for destination, source in tables.items(): try: current_table = etl.fromdb(connection, f'SELECT * from {schema}.{source}') print(f' {destination}:{etl.nrows(current_table)}') if (source == 'application'): # add originating source table_plus_os = etl.addfield(current_table, 'originating_system', system_name) table_plus_os_guid = join_mine_guids(connection, table_plus_os) etl.appenddb(table_plus_os_guid, connection, destination, schema='now_submissions', commit=False) else: etl.appenddb(current_table, connection, destination, schema='now_submissions', commit=False) except Exception as err: print(f'ETL Parsing error: {err}') raise
def generateCSV(request): connection = psycopg2.connect("dbname=TaaS \ user=postgres \ password=python \ host=localhost") table = fromdb(connection, 'select * from cars') #table2 = head(table, 2) #petl.tail(table, n=10) #table3 = select(table, lambda rec: rec['name'] == 'Skoda' and rec['baz'] > 88.1) #table3 = select(table, lambda rec: rec['name'] == 'Skoda') #print look(table) tocsv(table, '/home/kannan/Documents/openproject/mysite/output.csv') data = open("/home/kannan/Documents/openproject/mysite/output.csv", "r") #data = f.readlines() #f = StringIO.StringIO() #template_name = 'GDR.html' #response = HttpResponse(f, content_type='text/csv') #response['Content-Disposition'] = 'attachment; \ # filename=output.csv' #return response resp = HttpResponse(data, mimetype='text/csv') resp['Content-Disposition'] = 'attachment;filename=output.csv' return resp
def test_fromdb_mkcursor(): # initial data data = (("a", 1), ("b", 2), ("c", 2.0)) connection = sqlite3.connect(":memory:") c = connection.cursor() c.execute("create table foobar (foo, bar)") for row in data: c.execute("insert into foobar values (?, ?)", row) connection.commit() c.close() # test the function mkcursor = lambda: connection.cursor() actual = fromdb(mkcursor, "select * from foobar") expect = (("foo", "bar"), ("a", 1), ("b", 2), ("c", 2.0)) ieq(expect, actual) ieq(expect, actual) # verify can iterate twice # test iterators are isolated i1 = iter(actual) i2 = iter(actual) eq_(("foo", "bar"), i1.next()) eq_(("a", 1), i1.next()) eq_(("foo", "bar"), i2.next()) eq_(("b", 2), i1.next())
def test_fromdb_mkcursor(): # initial data data = (('a', 1), ('b', 2), ('c', 2.0)) connection = sqlite3.connect(':memory:') c = connection.cursor() c.execute('create table foobar (foo, bar)') for row in data: c.execute('insert into foobar values (?, ?)', row) connection.commit() c.close() # test the function mkcursor = lambda: connection.cursor() actual = fromdb(mkcursor, 'select * from foobar') expect = (('foo', 'bar'), ('a', 1), ('b', 2), ('c', 2.0)) ieq(expect, actual) ieq(expect, actual) # verify can iterate twice # test iterators are isolated i1 = iter(actual) i2 = iter(actual) eq_(('foo', 'bar'), i1.next()) eq_(('a', 1), i1.next()) eq_(('foo', 'bar'), i2.next()) eq_(('b', 2), i1.next())
def game_buckets(game_param, game_bins, position): # extract data table_name = '{}_games'.format(position) games = petl.fromdb(conn, 'select * from lombardi.{}'.format(table_name)) vals = [float(y) for y in games[game_param]] # calculate params for our model min_v = min(vals) max_v = max(vals) bin_size = (max_v-min_v)/game_bins def rebucket(v): if v == game_bins: return v-1 return v # bucket vals bucketed_games = ( games .addfield('normed_v', lambda r: float(r[game_param]) - min_v) .addfield('bucket', lambda r: int(r['normed_v'] // bin_size)) .convert(game_param, float) # gnarly hack to account for bin logic weirdness on last value .convert('bucket', rebucket) .cut(('year', 'name', 'week', game_param, 'bucket')) ) return bucketed_games
def selectRows(self, line): currentCount = 0 listLength = 0 list1 = [] fromTable = petl.fromdb(self.FROM_DB_CON, line) it = iter(fromTable) hdr = next(it) for one in it: currentCount += 1 listLength += 1 if self.bigFields is not None: bigFields = self.bigFields one = list(one) if 'BLOB' in bigFields: for n in bigFields['BLOB']: try: one[n] = one[n].read() except Exception as e: print(e) list1.append(one) if listLength == self.getDataLength: #print(self.manager.getDispatchedJobQueue().qsize()) qList = list1 self.manager.getDispatchedJobQueue().put(qList) list1 = [] listLength = 0 if len(list1): self.manager.getDispatchedJobQueue().put(list1) data = self.manager.getConfigQueue().get(1) data['endFlag'] = True self.manager.getConfigQueue().put(data)
def extract(self, model, record_id): table = model.name sql = f'''select POINT_ID, WQ_{table}.Latitude, WQ_{table}.Longitude, SiteNames, WellDepth from dbo.WQ_{table} join NM_Aquifer.dbo.Location on NM_Aquifer.dbo.Location.PointID = dbo.WQ_{table}.POINT_ID where PublicRelease=1 and POINT_ID = %d''' table = petl.fromdb(nm_quality_connection(), sql, record_id) return table
def load_db_data(db, dispensary_id, table_name): """ Data extracted from source db """ sql = ("SELECT * from {0} WHERE " "dispensary_id={1}").format(table_name, dispensary_id) return etl.fromdb(db, sql)
def _has_observations(self, record): sql = '''select count(PointID) from dbo.WaterLevelsContinuous_Pressure where PointID=%s''' pid = record['PointID'] table = petl.fromdb(nm_aquifier_connection(), sql, (pid, )) nobs = petl.values(table, '')[0] print(f'{pid} has nobs={nobs}') return bool(nobs)
def get_data_table(tableName): conex = get_conex(dDBI["mod"], dDBI["host"], dDBI["port"], dDBI["user"], dDBI["passwd"], dDBI["db"]) tabla = etl.fromdb(conex, "SELECT * FROM " + tableName) etl.tojson(tabla, "./static/data/" + tableName + '.json') conex.close() rv = showjson(str(tableName)) return jsonify(rv)
def join_mine_guids(connection, application_table): current_mines = etl.fromdb( connection, 'select distinct on (minenumber) mine_guid, mine_no as minenumber from public.mine order by minenumber, create_timestamp;' ) application_table_guid_lookup = etl.leftjoin(application_table, current_mines, key='minenumber') return application_table_guid_lookup
def make_runlist(): sql = '''select DISTINCT Location.PointID from dbo.WaterLevelsContinuous_Pressure join dbo.Location on Location.PointID = dbo.WaterLevelsContinuous_Pressure.PointID where dbo.Location.LatitudeDD is not null and dbo.Location.PublicRelease=1 group by Location.PointID order by Location.PointID''' table = petl.fromdb(nm_aquifier_connection(), sql) obj = petl.tojson(table, 'record_ids.json', indent=2) print(obj)
def get_atributosTable(tableName): conex = get_conex(dDBI["mod"], dDBI["host"], dDBI["port"], dDBI["user"], dDBI["passwd"], dDBI["db"]) query = "SELECT COLUMN_NAME FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_NAME =" + "'" + str( tableName) + "'" atributos = etl.fromdb(conex, query) etl.tojson(atributos, "./static/data/" + tableName + '_atrib.json') conex.close() rv = showjson(str(tableName) + "_atrib") return jsonify(rv)
def get_data(self, min_dt, max_dt): """ Generates the SQL and extracts our data :param min_dt: :param max_dt: :return: """ sql = "select dt, total, duration from ol_transactions where dt > '{0}' and dt <= '{1}'".format(min_dt, max_dt) self.log("SQL: {0}".format(sql)) self.table = petl.fromdb(self.connection, sql)
def get_max_dt(self): """ Gets the current maximum date in the table :return: """ sql = 'select max(dt) as max_dt from ol_transactions' self.log("SQL: {0}".format(sql)) table = petl.fromdb(self.connection, sql) max_dt = petl.values(table, 'max_dt')[0] return max_dt
def cambiarFichas(self, tipo): tipos = etl.fromdb(connection, 'SELECT * FROM tipos') b = tipos.data() self.imgFicha1.setPixmap( QtGui.QPixmap(self.ponerImagenFicha(b[midTipo][4]))) self.imgFicha2.setPixmap( QtGui.QPixmap(self.ponerImagenFicha(b[midTipo][5]))) self.imgFicha3.setPixmap( QtGui.QPixmap(self.ponerImagenFicha(b[midTipo][6])))
def get_cambiar_valor(cambio): conex = get_conex(dDBI["mod"], dDBI["host"], dDBI["port"], dDBI["user"], dDBI["passwd"], dDBI["db"]) myCambio = cambio.split("_") #0una vez cambiado 1 tabla = etl.fromdb(conex, "SELECT * FROM " + myCambio[3]) tablaCambiada = etl.convert(tabla, str(myCambio[0]), 'replace', str(myCambio[1]), str(myCambio[2])) etl.tojson(tablaCambiada, "./static/data/cambiarValor.json") conex.close() rv = showjson("cambiarValor") return jsonify(rv)
def _extract(self, thing, model, skip): point_id = thing['@nmbgmr.point_id'] sql = f'''select DateMeasured, {model.mapped_column} from dbo.WaterLevelsContinuous_Pressure join dbo.Location on dbo.Location.PointID = dbo.WaterLevelsContinuous_Pressure.PointID where dbo.Location.PointID = %d and QCed=1 order by DateMeasured offset %d rows ''' return petl.fromdb(nm_aquifier_connection(), sql, (point_id, skip))
def get_all_tables(): conex = get_conex(dDBI["mod"], dDBI["host"], dDBI["port"], dDBI["user"], dDBI["passwd"], dDBI["db"]) listTable = "SELECT TABLE_NAME FROM information_schema.TABLES WHERE TABLE_SCHEMA =" + "'" + str( dDBI["db"]) + "'" tablas = etl.fromdb(conex, listTable) etl.tojson(tablas, './static/data/tablas.json' ) #ACA AGREGAR UN IDENTIFICADOR PARA BASE DE DATOS!!!! OJO conex.close() aTablas = showjson('tablas') return jsonify(aTablas)
def run_backup(sqlite_db, backup_path): """backs-up each table in the inventory database to a csv, zips them all up, and saves the zip with a timestamp-derived name. """ ts = timestamp() # SET UP THE FOLDERS ----------------------------------------------------- #check for backup folder, make if it doesn't exist if not os.path.exists(backup_path): os.makedirs(backup_path) #make a folder for this backup this_backup_path = os.path.join(backup_path, "backup_{0}".format(ts)) if not os.path.exists(this_backup_path): os.makedirs(this_backup_path) click.echo(this_backup_path) # GET THE DATA OUT ------------------------------------------------------- # temporarily store extracted csv files. (use this to delete them later) csvs = [] # connect to the DB, get each table, save out as a csv. conn = sqlite3.connect(sqlite_db) for table in [ 'product', 'product_tags', 'sale', 'staff', 'supplier', 'tag' ]: t = etl.fromdb(lambda: conn.cursor(), """SELECT * FROM {0}""".format(table)) out_csv = os.path.join(this_backup_path, '{0}.csv'.format(table)) etl.tocsv(t, out_csv) csvs.append(out_csv) # ZIP THE DATA UP -------------------------------------------------------- # make a zip file in the main backup location zipfile_directory = os.path.join(backup_path, "inventory_backup_{0}.zip".format(ts)) # create a zip file object zf = zipfile.ZipFile(zipfile_directory, mode="w") for each in csvs: click.echo(each) zf.write(filename=each, arcname=os.path.basename(each), compress_type=compression) zf.close() # REMOVE TEMP FILES ------------------------------------------------------- for each in csvs: os.remove(each) os.rmdir(this_backup_path)
def __iter__(self): """Proxy iteration to core petl.""" # form sql statement stmt = self.stmt() # get petl iterator dbo = self.db.dbo db_view = etl.fromdb(dbo, stmt) iter_fn = db_view.__iter__() return iter_fn
def get_all_atributos(): conex = get_conex(dDBI["mod"], dDBI["host"], dDBI["port"], dDBI["user"], dDBI["passwd"], dDBI["db"]) listAtrib = "SELECT COLUMN_NAME,TABLE_NAME FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_SCHEMA =" + "'" + str( dDBI["db"]) + "'" allAtributos = etl.fromdb(conex, listAtrib) etl.tojson(allAtributos, './static/data/allAtributos.json' ) #ACA AGREGAR UN IDENTIFICADOR PARA BASE DE DATOS!!!! OJO conex.close() myAtributos = showjson('allAtributos') return jsonify(myAtributos)
def test_sync_users(src_conn, dest_conn): src_table = etl.fromdb(src_conn, 'select * from auth_user limit 8') mapping = OrderedDict() mapping['org_id'] = lambda x: 1 mapping['username'] = '******' mapping['name'] = 'full_name' mapping['status'] = lambda x: 'ACTIVE' mapping['uid'] = 'username' mapping['type'] = lambda x: "STUDENT" dst_table = etl.fieldmap(src_table, mapping) upsert_many(dest_conn['auth_user'], dst_table, keys=['username'])
def _medical_limits(id, source_db): """ get the member limits """ sql = ("SELECT dispensary_id, daily_purchase_limit, visit_purchase_limit, " "daily_visit_limit, two_week_purchase_limit " "FROM red_flags " "WHERE dispensary_id={0}").format(id) data = etl.fromdb(source_db, sql) limits = etl.select(data, lambda rec: rec.dispensary_id == id) return etl.dicts(limits)
def ETL_MMS_NOW_schema(connection, tables, schema): for key, value in tables.items(): try: current_table = etl.fromdb(connection, f'SELECT * from {schema}.{value}') etl.appenddb(current_table, connection, key, schema='now_submissions', commit=False) except Exception as err: print(f'ETL Parsing error: {err}')
def _test_unicode(dbo): expect = ((u'name', u'id'), (u'Արամ Խաչատրյան', 1), (u'Johann Strauß', 2), (u'Вагиф Сәмәдоғлу', 3), (u'章子怡', 4), ) actual = etl.fromdb(dbo, 'SELECT * FROM test_unicode') print('write some data and verify...') etl.todb(expect, dbo, 'test_unicode') ieq(expect, actual) print(etl.look(actual))
def pg_extract(service): """ Returns the full contents of a postgresql table :param service: object with the pgsql connection service :return: data """ schema=config.postgres_info['schema'] table=config.postgres_info['table'] try: data = petl.fromdb(service, 'SELECT * FROM %s.%s;' %(schema, table)) except Exception, e: print "Error extracting data from postgres %s.%s %s" % (schema, table, e) service.rollback() exit(1)
def get_min_dt(self, last): """ Gets the minimum date considering previous extractions from the table. :param last: :return: """ if last is None or len(last) == 0: sql = "select min(dt) as min_dt from ol_transactions" else: sql = "select min(dt) as min_dt from ol_transactions where dt >= '{0}'".format(last) self.log("SQL: {0}".format(sql)) table = petl.fromdb(self.connection, sql) extract_dt = petl.values(table, 'min_dt')[0] return extract_dt
def _test_dbo(write_dbo, read_dbo=None): if read_dbo is None: read_dbo = write_dbo expect_empty = (('foo', 'bar'),) expect = (('foo', 'bar'), ('a', 1), ('b', 2)) expect_appended = (('foo', 'bar'), ('a', 1), ('b', 2), ('a', 1), ('b', 2)) actual = etl.fromdb(read_dbo, 'SELECT * FROM test') debug('verify empty to start with...') debug(etl.look(actual)) ieq(expect_empty, actual) debug('write some data and verify...') etl.todb(expect, write_dbo, 'test') debug(etl.look(actual)) ieq(expect, actual) debug('append some data and verify...') etl.appenddb(expect, write_dbo, 'test') debug(etl.look(actual)) ieq(expect_appended, actual) debug('overwrite and verify...') etl.todb(expect, write_dbo, 'test') debug(etl.look(actual)) ieq(expect, actual) debug('cut, overwrite and verify') etl.todb(etl.cut(expect, 'bar', 'foo'), write_dbo, 'test') debug(etl.look(actual)) ieq(expect, actual) debug('cut, append and verify') etl.appenddb(etl.cut(expect, 'bar', 'foo'), write_dbo, 'test') debug(etl.look(actual)) ieq(expect_appended, actual) debug('try a single row') etl.todb(etl.head(expect, 1), write_dbo, 'test') debug(etl.look(actual)) ieq(etl.head(expect, 1), actual)
def test_fromdb_withargs(): # initial data data = (("a", 1), ("b", 2), ("c", 2.0)) connection = sqlite3.connect(":memory:") c = connection.cursor() c.execute("create table foobar (foo, bar)") for row in data: c.execute("insert into foobar values (?, ?)", row) connection.commit() c.close() # test the function actual = fromdb(connection, "select * from foobar where bar > ? and bar < ?", (1, 3)) expect = (("foo", "bar"), ("b", 2), ("c", 2.0)) ieq(expect, actual) ieq(expect, actual) # verify can iterate twice
def exercise_unicode(dbo): print('=' * len(repr(dbo))) print('EXERCISE UNICODE') print(repr(dbo)) print('=' * len(repr(dbo))) print() expect = ((u'name', u'id'), (u'Արամ Խաչատրյան', 1), (u'Johann Strauß', 2), (u'Вагиф Сәмәдоғлу', 3), (u'章子怡', 4), ) actual = fromdb(dbo, 'SELECT * FROM test_unicode') print('write some data and verify...') todb(expect, dbo, 'test_unicode') ieq(expect, actual) print(look(actual))
def execute(self): if self.destination is None: self.destination = get_default_destination() for source in self.sources: conn = source.get_connection() dest_conn = self.destination.get_connection() print source.test_connection() print self.destination.test_connection() if source.dbms == 'mysql': conn.cursor().execute('SET SQL_MODE=ANSI_QUOTES;') for table in source.tables: table_data = petl.fromdb(conn, 'SELECT * FROM %s' % table) petl.todb(table_data, dest_conn, table, commit=True) dest_conn.commit() dest_conn.close() return
def exercise_with_schema(dbo, db): print('=' * len(repr(dbo))) print('EXERCISE WITH EXPLICIT SCHEMA NAME') print(repr(dbo)) print('=' * len(repr(dbo))) print( ) expect = (('foo', 'bar'), ('a', 1), ('b', 1)) expect_appended = (('foo', 'bar'), ('a', 1), ('b', 1), ('a', 1), ('b', 1)) actual = fromdb(dbo, 'SELECT * FROM test') print('write some data and verify...') todb(expect, dbo, 'test', schema=db) ieq(expect, actual) print(look(actual)) print('append some data and verify...') appenddb(expect, dbo, 'test', schema=db) ieq(expect_appended, actual) print(look(actual))
def exercise(dbo): print '=' * len(repr(dbo)) print repr(dbo) print '=' * len(repr(dbo)) print expect = (('foo', 'bar'), ('a', 1), ('b', 1)) expect_extended = (('foo', 'bar', 'baz'), ('a', 1, 2.3), ('b', 1, 4.1)) actual = fromdb(dbo, 'SELECT * FROM testx') print "verify table doesn't exist to start with" try: print look(actual) except Exception as e: print 'expected exception: ' + str(e) else: raise Exception('expected exception not raised') print "verify cannot write without create" try: todb(expect, dbo, 'testx') except Exception as e: print 'expected exception: ' + str(e) else: raise Exception('expected exception not raised') print 'create table and verify...' todb(expect, dbo, 'testx', create=True) ieq(expect, actual) print look(actual) print 'verify cannot overwrite with new cols without recreate...' try: todb(expect_extended, dbo, 'testx') except Exception as e: print 'expected exception: ' + str(e) else: raise Exception('expected exception not raised') print 'verify recreate...' todb(expect_extended, dbo, 'testx', create=True, drop=True) ieq(expect_extended, actual) print look(actual)
def _test_with_schema(dbo, schema): expect = (('foo', 'bar'), ('a', 1), ('b', 2)) expect_appended = (('foo', 'bar'), ('a', 1), ('b', 2), ('a', 1), ('b', 2)) actual = etl.fromdb(dbo, 'SELECT * FROM test') print('write some data and verify...') etl.todb(expect, dbo, 'test', schema=schema) ieq(expect, actual) print(etl.look(actual)) print('append some data and verify...') etl.appenddb(expect, dbo, 'test', schema=schema) ieq(expect_appended, actual) print(etl.look(actual))
def test_fromdb_withargs(): # initial data data = (('a', 1), ('b', 2), ('c', 2.0)) connection = sqlite3.connect(':memory:') c = connection.cursor() c.execute('create table foobar (foo, bar)') for row in data: c.execute('insert into foobar values (?, ?)', row) connection.commit() c.close() # test the function actual = fromdb(connection, 'select * from foobar where bar > ? and bar < ?', (1, 3)) expect = (('foo', 'bar'), ('b', 2), ('c', 2.0)) ieq(expect, actual) ieq(expect, actual) # verify can iterate twice
def exercise_ss_cursor(setup_dbo, ss_dbo): print '=' * len(repr(ss_dbo)) print 'EXERCISE WITH SERVER-SIDE CURSOR' print repr(ss_dbo) print '=' * len(repr(ss_dbo)) print expect_empty = (('foo', 'bar'),) expect = (('foo', 'bar'), ('a', 1), ('b', 1)) expect_appended = (('foo', 'bar'), ('a', 1), ('b', 1), ('a', 1), ('b', 1)) actual = fromdb(ss_dbo, 'SELECT * FROM test') print 'verify empty to start with...' ieq(expect_empty, actual) print look(actual) print 'write some data and verify...' todb(expect, setup_dbo, 'test') ieq(expect, actual) print look(actual) print 'append some data and verify...' appenddb(expect, setup_dbo, 'test') ieq(expect_appended, actual) print look(actual) print 'overwrite and verify...' todb(expect, setup_dbo, 'test') ieq(expect, actual) print look(actual) print 'cut, overwrite and verify' todb(cut(expect, 'bar', 'foo'), setup_dbo, 'test') ieq(expect, actual) print look(actual) print 'cut, append and verify' appenddb(cut(expect, 'bar', 'foo'), setup_dbo, 'test') ieq(expect_appended, actual) print look(actual)
def exercise(dbo): print '=' * len(repr(dbo)) print repr(dbo) print '=' * len(repr(dbo)) print expect_empty = (('foo', 'bar'),) expect = (('foo', 'bar'), ('a', 1), ('b', 1)) expect_appended = (('foo', 'bar'), ('a', 1), ('b', 1), ('a', 1), ('b', 1)) actual = fromdb(dbo, 'SELECT * FROM test') print 'verify empty to start with...' ieq(expect_empty, actual) print look(actual) print 'write some data and verify...' todb(expect, dbo, 'test') ieq(expect, actual) print look(actual) print 'append some data and verify...' appenddb(expect, dbo, 'test') ieq(expect_appended, actual) print look(actual) print 'overwrite and verify...' todb(expect, dbo, 'test') ieq(expect, actual) print look(actual) print 'cut, overwrite and verify' todb(cut(expect, 'bar', 'foo'), dbo, 'test') ieq(expect, actual) print look(actual) print 'cut, append and verify' appenddb(cut(expect, 'bar', 'foo'), dbo, 'test') ieq(expect_appended, actual) print look(actual)
user = sys.argv[1] passwd = sys.argv[2] import MySQLdb # assume database petl_test already created connection = MySQLdb.connect(user=user, passwd=passwd, db='petl_test') print 'setup table' cursor = connection.cursor() cursor.execute('DROP TABLE IF EXISTS test') cursor.execute('CREATE TABLE test (foo TEXT, bar INT)') connection.commit() cursor.close() print 'exercise the petl functions using a connection' from petl import look, fromdb, todb, appenddb t1 = fromdb(connection, 'SELECT * FROM test') print look(t1) t2 = (('foo', 'bar'), ('a', 1), ('b', 1)) t2app = (('foo', 'bar'), ('a', 1), ('b', 1), ('a', 1), ('b', 1)) todb(t2, connection, 'test') print look(t1) ieq(t2, t1) appenddb(t2, connection, 'test') print look(t1) ieq(t2app, t1) todb(t2, connection, 'test') print look(t1) ieq(t2, t1) print 'exercise the petl functions using a cursor' cursor = connection.cursor()
def collect_import_objects(self): """ Query and put together all QA objects which will be imported. """ def get_tables(mappings): """ Recursively collect all WOCAT tables of the mappings. Args: mappings: list. Returns: list. A list of tables. """ tables = [] for mapping in mappings: table = mapping.get('wocat_table') if table: tables.append(table) tables.extend(get_tables(mapping.get('mapping', []))) tables.extend(get_tables(mapping.get('conditions', []))) return tables self.output('Fetching data from WOCAT QA database.', v=1) # Extend the default tables by adding the ones from the mapping. tables = self.default_tables for qg_properties in self.mapping.values(): questions = qg_properties.get('questions', {}) for q_properties in questions.values(): tables.extend(get_tables(q_properties.get('mapping', []))) # Remove duplicates tables = list(set(tables)) # Try to query the lookup table and collect its values. try: lookup_query = """ SELECT * FROM {schema}.{table_name}; """.format(schema=self.schema, table_name=self.lookup_table_name) lookup_table = {} for row in petl.dicts(petl.fromdb(self.connection, lookup_query)): lookup_table[row.get('id')] = row except AttributeError: lookup_table = {} # So far, lookup_text is never used. Therefore it can be left empty. lookup_table_text = {} # Try to query file infos try: lookup_query_files = """ SELECT * FROM {schema}.{table_name}; """.format(schema=self.schema, table_name=self.file_info_table) file_infos = {} for row in petl.dicts( petl.fromdb(self.connection, lookup_query_files)): file_infos[row.get('blob_id')] = row except AttributeError: file_infos = {} for table_name in tables: query = 'SELECT {columns} FROM {schema}.{table_name};'.format( columns='*', schema=self.schema, table_name=table_name) queried_table = petl.fromdb(self.connection, query) row_errors = False for row in petl.dicts(queried_table): if row_errors is True: continue # Inconsistent naming throughout the tables questionnaire_identifier = self.questionnaire_identifier if table_name == 'approach': questionnaire_identifier = 'id' elif table_name == 'qa_quality_review': questionnaire_identifier = 'qa_id' identifier = row.get(questionnaire_identifier) if identifier is None: self.output('No identifier found for table "{}".'.format( table_name), v=1, l='error') row_errors = True if identifier in self.import_objects_exclude: continue import_object = self.get_import_object(identifier) if import_object is None: import_object = QAImportObject( identifier, self.command_options, lookup_table, lookup_table_text, file_infos, self.image_url) import_object.add_custom_mapping_messages( self.custom_mapping_messages) self.import_objects.append(import_object) # Set the code if it is available in the current table code = row.get(self.questionnaire_code) if code: import_object.set_code(code) # The main contributor is the compiler compiler_id = row.get(self.questionnaire_owner) if compiler_id: # If the main contributer is "Not registered" (ID 661), use # the default compiler if compiler_id == 661: compiler_id = self.default_compiler_id import_object.add_mapping_message( 'Using "Unknown User" as compiler in QCAT as main ' 'contributor in QA was "Not registered"') # The following QAs have a main contributor which is not # available through the API call. Set the default user and # add a mapping message. elif identifier in [131, 128, 89, 47, 106, 82, 195, 212, 76, 107, 84, 139, 130, 276, 72, 147, 138, 43, 44, 46, 49, 50, 52, 57, 173, 171, 170, 166, 125, 78, 102, 45, 197, 48]: compiler_id = self.default_compiler_id import_object.add_mapping_message( 'The compiler needs to be set manually. Use the ' 'main contributor of QA.') import_object.set_owner(compiler_id) # Use the creation date available on the approach table created = row.get('date') if created and table_name == 'approach': creation_time = datetime.strptime( created, WOCAT_DATE_FORMAT) import_object.created = timezone.make_aware( creation_time, timezone.get_current_timezone()) import_object.add_wocat_data(table_name, row)
id_to_predict = 11111 read_db_conn = pymysql.connect(host=dbconfig.db_host, port=dbconfig.db_port, charset="utf8", user=dbconfig.db_user, password=dbconfig.db_pass, db=dbconfig.db_name) df = pd.read_csv('trained_data.csv', index_col=False) query = "SELECT id,names FROM {} WHERE id = {} ".format(dbconfig.db_table_items, id_to_predict) items_to_predict = etl.fromdb(read_db_conn, query) print items_to_predict.values('name') similiar_items = df.loc[lambda df:df.id == id_to_predict, 'similiar_items'] similiar_items = json.loads(similiar_items.values[0]) results_ids = [] for similarity, item_id in similiar_items: print similarity, item_id if similarity > 0.04: # put some threshold results_ids.append(str(item_id)) query = "SELECT image, name, vendor_id FROM {} WHERE id IN ({})".format(dbconfig.db_table_items, ",".join(results_ids))
def standardize_name(name): tmp = name.strip() # Name standardization: tmp_list = re.sub('[' + string.punctuation + ']', '', tmp).split() std = StandardName(tmp_list, False).output std_name = ' '.join(std) return std_name alias_stmt = ''' select trim(ala.pre_dir) as PRE_DIR, trim(ala.name) as ST_NAME, trim(ala.type_) as ST_TYPE, trim(ala.suf_dir) as SUF_DIR, sc.l_f_add, sc.l_t_add, sc.r_f_add, sc.r_t_add, sc.st_code, ala.seg_id, trim(sc.responsibl) as RESPONSIBL, trim(sc.PRE_DIR) as CL_PRE_DIR, trim(sc.ST_NAME) as CL_ST_NAME, trim(sc.ST_TYPE) as CL_ST_TYPE, trim(sc.SUF_DIR) as CL_SUF_DIR from {alias_table} ala inner join {cl_table} sc on sc.seg_id = ala.seg_id order by st_name, pre_dir, st_type, suf_dir, l_f_add, l_t_add, r_f_add, r_t_add, st_code, seg_id '''.format(cl_table = street_centerline_table_name, alias_table = alias_table_name) alias_rows = etl.fromdb(dbo, alias_stmt).convert('SEG_ID', int) \ .convert('ST_NAME', lambda s: standardize_name(s)) \ .convert('CL_ST_NAME', lambda s: standardize_name(s)) \ .addfield('CL_STREET_FULL', lambda a: concat_cl_streetname(a)) alias_rows.tocsv(alias_csv) alias_centerline_street_rows = alias_rows.cut('PRE_DIR', 'ST_NAME', 'ST_TYPE') \ .addfield('STREET_FULL', lambda a: concat_al_streetname(a)) \ .cut('STREET_FULL', 'PRE_DIR', 'ST_NAME', 'ST_TYPE') \ .addfield('POST_DIR', '') alias_centerline_street_rows.tocsv(alias_streets_csv, write_header=False)
i = raw_input(msg + '? ([y]/n)\n') if i not in ('', 'y', 'Y'): sys.exit(0) prompt('setup table') cursor = connection.cursor() # deal with quote compatibility cursor.execute('SET SQL_MODE=ANSI_QUOTES') cursor.execute('DROP TABLE IF EXISTS test') cursor.execute('CREATE TABLE test (foo TEXT, bar INT)') cursor.close() connection.commit() prompt('exercise the petl functions using a connection') from petl import look, fromdb, todb, appenddb t1 = fromdb(connection, 'SELECT * FROM test') print look(t1) t2 = (('foo', 'bar'), ('a', 1), ('b', 1)) t2app = (('foo', 'bar'), ('a', 1), ('b', 1), ('a', 1), ('b', 1)) todb(t2, connection, 'test') print look(t1) ieq(t2, t1) appenddb(t2, connection, 'test') print look(t1) ieq(t2app, t1) todb(t2, connection, 'test') print look(t1) ieq(t2, t1) prompt('exercise the petl functions using a cursor') cursor = connection.cursor()
# print(url) r = requests.get(url) return r.status_code except requests.exceptions.HTTPError as e: error = [e,'',''] query_errors[url] = error except requests.exceptions.RequestException as e: error = [e,'',''] query_errors[url] = error except JSONDecodeError as e: error = [e, r.raw.data, r.raw.read(100)] query_errors[url] = error read_conn = psycopg2.connect("dbname=ais_engine user=ais_engine") address_count = etl.fromdb(read_conn, 'select count(*) as N from {}'.format(warmup_address_table_name)) n = list(address_count.values('n'))[0] warmup_rows = etl.fromdb(read_conn, 'select {address_field} from {table} OFFSET floor(random()*{n}) limit {limit}'.format(address_field=warmup_address_field, table=warmup_address_table_name, n=n, limit=warmup_row_limit)) # print(etl.look(warmup_rows)) responses = warmup_rows.addfield('response_status', (lambda a: query_address(a['street_address']))).progress(100) # print(etl.look(responses)) eval = responses.aggregate('response_status', len) print(etl.look(eval)) f_200 = [(count/warmup_row_limit) for status, count in eval[1:] if status == 200][0] print(f_200) ########################### # WRITE ERRORS OUT TO FILE # ############################ print("Writing errors to file...") error_table = [] for url, error_vals in query_errors.items():
dbo = cx_Oracle.connect(dsn) source_table_name = source_def['table'] source_field_map = source_def['field_map'] source_field_map_upper = {} for k,v in source_field_map.items(): source_field_map_upper[k] = v.upper() # Read DOR CONDO rows from source print("Reading condos...") # TODO: get fieldnames from source_field_map dor_condo_read_stmt = ''' select condounit, objectid, mapref from {dor_condo_table} where status in (1,3) '''.format(dor_condo_table = source_table_name) source_dor_condo_rows = etl.fromdb(dbo, dor_condo_read_stmt).fieldmap(source_field_map_upper) if DEV: print(etl.look(source_dor_condo_rows)) # Read DOR Parcel rows from engine db print("Reading parcels...") dor_parcel_read_stmt = ''' select parcel_id, street_address, address_low, address_low_suffix, address_low_frac, address_high, street_predir, street_name, street_suffix, street_postdir, street_full from {dor_parcel_table} '''.format(dor_parcel_table='dor_parcel') engine_dor_parcel_rows = etl.fromdb(pg_db, dor_parcel_read_stmt) if DEV: print(etl.look(engine_dor_parcel_rows)) # Get duplicate parcel_ids: non_unique_parcel_id_rows = engine_dor_parcel_rows.duplicates(key='parcel_id')
if standardize_nulls(unit_num): unit_full = '# {}'.format(unit_num) if address_full and street_full: source_address_comps = [address_full, street_full, unit_full] source_address = ' '.join([x for x in source_address_comps if x]) return source_address if source_address != None else '' ############################################# # Read in files, format and write to tables # ############################################# ############## # TRUE RANGE # ############## print("Writing true_range table...") etl.fromdb(read_conn, 'select * from true_range').tooraclesde(write_dsn, true_range_write_table_name) ######################## # SERVICE AREA SUMMARY # ######################## print("Writing service_area_summary table...") etl.fromdb(read_conn, 'select * from service_area_summary')\ .rename({'neighborhood_advisory_committee': 'neighborhood_advisory_committe'}, )\ .tooraclesde(write_dsn, service_area_summary_write_table_name) ######################## # ADDRESS AREA SUMMARY # ######################## print("Creating transformed address_summary table...") address_summary_out_table = etl.fromdb(read_conn, 'select * from address_summary') \ .addfield('address_full', (lambda a: make_address_full( {'address_low': a['address_low'], 'address_low_suffix': a['address_low_suffix'], 'address_low_frac': a['address_low_frac'], 'address_high': a['address_high']}))) \
return ' '.join(stnam_list) def standardize_name(name): tmp = name.strip() # Name standardization: tmp_list = re.sub('[' + string.punctuation + ']', '', tmp).split() std = StandardName(tmp_list, False).output std_name = ' '.join(std) return std_name centerline_stmt = '''select trim(PRE_DIR) AS PRE_DIR,trim(ST_NAME) AS ST_NAME,trim(ST_TYPE) AS ST_TYPE,trim(SUF_DIR) AS SUF_DIR, L_F_ADD,L_T_ADD,R_F_ADD,R_T_ADD,ST_CODE,SEG_ID,trim(RESPONSIBL) AS RESPONSIBL from {} order by st_name, st_type, pre_dir, suf_dir, l_f_add, l_t_add, r_f_add, r_t_add, st_code, seg_id'''.format(street_centerline_table_name) centerline_rows = etl.fromdb(dbo, centerline_stmt).convert('ST_NAME', lambda s: standardize_name(s)) print(etl.look(centerline_rows)) centerline_rows.tocsv(centerline_csv) # Centerline_streets centerline_street_rows = centerline_rows.cut('PRE_DIR', 'ST_NAME', 'ST_TYPE') \ .addfield('STREET_FULL', lambda a: concat_streetname(a)) \ .addfield('POST_DIR', '') \ .cut('STREET_FULL', 'PRE_DIR', 'ST_NAME', 'ST_TYPE', 'POST_DIR') \ .distinct() \ .sort(key=['ST_NAME', 'ST_TYPE', 'PRE_DIR', 'POST_DIR']) print(etl.look(centerline_street_rows)) centerline_street_rows.tocsv(centerline_streets_csv, write_header=False)
# http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # import pymysql import os import petl import pymysql # Fetch our database access configuration from environment variables host = os.environ['DB_HOST'] user = os.environ['DB_USER'] password = os.environ['DB_PASSWORD'] db = os.environ['DB_DATABASE'] # Connect to the database using the PyMSQL package connection = pymysql.connect(host=host, user=user, password=password, db=db) # Extract the data using both PyMSQL and PETL table = petl.fromdb(connection, 'SELECT dt, total, duration FROM ol_transactions') print(table) connection.close()