def accdb_to_csv(directory, get_out, warn, file_type, get_in): meta.write_logs(warn, 'DEBUG', 'accdb_to_csv','Iniciando processo', debug=DEBUG) get_out = directory + get_out meta.write_logs(warn, 'INFO', 'Arquivo encontrado ' + file_type, 'Formato Micrsoft Access (mdb,accdb).') for table in mdb.list_tables(get_in): try: meta.write_logs(warn, 'INFO', 'Tabela ' + table, 'Tabela encontrado dentro do arquivo.') csv.access_to_csv(get_in, get_out, table) meta.write_logs(warn, 'INFO', 'Chave ùnica criada ' + get_out + '_' + table + '.csv','SHA256(' + meta.md5_hash(get_out + '_temp_' + table + '.csv') + ')') except: meta.write_logs(warn, 'ERROR','accdb_to_csv - Falha ao exportar tabela ' + table, 'Erro de leitura na tabela') meta.write_logs(warn, 'DEBUG', 'accdb_to_csv','Finalizando processo', debug=DEBUG)
def ParseSynergiDatabase(self): """ Use Pandas Access to convert the MDB tables to Pandas DataFrames. """ print("Opening synergie database - ", self.paths["Synergi File"]) table_list = mdb.list_tables(self.paths["Synergi File"]) table_list_warehouse = [] if "warehouse" in self.paths: print("Opening warehouse database - ", self.paths["warehouse"]) table_list_warehouse = mdb.list_tables(self.paths["warehouse"]) for table in table_list: self.SynergiDictionary[table] = self.ToLowerCase( mdb.read_table(self.paths["Synergi File"], table) ) for table in table_list_warehouse: self.SynergiDictionary[table] = self.ToLowerCase( mdb.read_table(self.paths["warehouse"], table) ) return
def get_mdb_data(self, payload): ''' Returns list of Entity object(s) (str,dataframe) from a successful extraction Arguments: payload -- payload object (str,binary) ''' # TODO: find a way to directly pass byteio to reading utility without writing to disk try: # Write to bytes to disk open(payload.filename, 'wb').write(payload.data.getvalue()) # Get database schema mdb_schema = pandas_access.read_schema(payload.filename) # Get attributes that are of integer type integer_attributes = self.get_attributes_by_data_type( mdb_schema, 'Long Integer') # Declare entity dict entity_dict = dict() # Get list of table from database table_list = pandas_access.list_tables(payload.filename) # Update progress bar job count self.job_count += len(table_list) self.pbar.total = self.job_count # Iterate through each table in database for tbl in table_list: self.logger.debug('Extracting table: \'%s\' from file: %s...', tbl, payload.filename) # Issue: Default pandas integer type is not nullable - null values in integer column causes read error # Workaround: Read integer as Int64 (pandas nullable integer type in pandas) dtype_input = { attribute: 'Int64' for attribute in integer_attributes[tbl] } df = pandas_access.read_table(payload.filename, tbl, dtype=dtype_input) entity_dict.update({tbl: df}) # update progress bar self.pbar.update() return entity_dict finally: self.logger.debug('Removing intermediate file: %s...', payload.filename) utils.silentremove(payload.filename)
def from_SQLT(args): sql_table = args.sql_table if sql_table == None or sql_table == "": return "MUST SUPPLY --sql_table" sql_type = args.sql_kind.lower() if sql_type == "psql": return sql_type + " Coming Soon" elif sql_type == "msql": return sql_type + " Coming Soon" elif sql_type == "sqll": return sql_type + " Coming Soon" elif sql_type == "mdb": sql_file = args.sql_file if sql_file == None or sql_file == "": return "MUST SUPPLY --sql_file" try: tables = mdb.list_tables(sql_file) if from_table not in tables: return "TABLE NOT FOUND. Available Tables:\n\t" + "\n\t".join( tables) df = mdb.read_table(sql_file, from_table) except: traceback.print_exc() return "ERROR" else: return "UNSUPPORTED SQL. Available Kinds: PSQL (PostGreSQL), MSQL (MySQL), MDB (Access DB), SQLL (SQLite)" return df
# Input files should be (*.mdb or *.accdb) inputfile = "/lss/research/itrns-otohpc/CrashTools/NonMotoristCrashTool/working.mdb" outputfile = "/lss/research/itrns-otohpc/CrashTools/NonMotoristCrashTool/Output.csv" outputdir = "/lss/research/itrns-otohpc/CrashTools/NonMotoristCrashTool/" outputcl = "/lss/research/itrns-otohpc/CrashTools/NonMotoristCrashTool/CRASH_LEVEL.csv" outputvl = "/lss/research/itrns-otohpc/CrashTools/NonMotoristCrashTool/VEHICLE_LEVEL.csv" outputpl = "/lss/research/itrns-otohpc/CrashTools/NonMotoristCrashTool/PERSON_LEVEL.csv" # In[2]: import pandas_access as mdb db_filename = inputfile # Listing the tables. for tbl in mdb.list_tables(db_filename): print(tbl) # import all the necessary libraries import csv import pyodbc import pandas as pd import numpy as np # In[4]: dfcl = mdb.read_table(db_filename, table_name="CRASH_LEVEL") dfcl.to_csv(outputcl, index=False) # In[5]: dfvl = mdb.read_table(db_filename, table_name="VEHICLE_LEVEL") dfvl.to_csv(outputcl, index=False)
while attempts < 3: try: response = urlopen( 'http://static.data.gov.hk/td/routes-and-fares/RSTOP_BUS.mdb', timeout=10) content = response.read() with open(expanduser('~/Desktop/RSTOP_BUS.mdb'), 'wb') as f: f.write(content) break except URLError as e: if attempts == 2: exit(1) attempts += 1 df = mdb.read_table(expanduser('~/Desktop/RSTOP_BUS.mdb'), mdb.list_tables(expanduser('~/Desktop/RSTOP_BUS.mdb'))[0]) df.drop_duplicates(subset=['ROUTE_ID', 'ROUTE_SEQ', 'STOP_SEQ'], keep='last', inplace=True) #df.fillna(-1, inplace=True) conn = pymssql.connect(server='192.168.13.60', user='******', password='******', database='DWHRMS_DEMO') last = len(df) - 1 sql = 'DELETE FROM [t_MobBusStops]' cursor = conn.cursor() cursor.execute(sql) conn.commit() folder = expanduser('~/Desktop/rstops')
def read_tables(db_path): result = [] for tbl in mdb.list_tables(db_path, encoding="utf-8"): result.append(tbl) return result
while attempts < 3: try: response = urlopen( 'http://static.data.gov.hk/td/routes-and-fares/ROUTE_GMB.mdb', timeout=10) content = response.read() with open(expanduser('~/Desktop/ROUTE_GMB.mdb'), 'wb') as f: f.write(content) break except URLError as e: if attempts == 2: exit(1) attempts += 1 df = mdb.read_table(expanduser('~/Desktop/ROUTE_GMB.mdb'), mdb.list_tables(expanduser('~/Desktop/ROUTE_GMB.mdb'))[0]) #df.drop_duplicates(subset=['ROUTE_ID', 'ROUTE_SEQ', 'STOP_SEQ'], keep='last', inplace=True) #df.fillna(-1, inplace=True) conn = pymssql.connect(server=r'192.168.9.75\sql2012', user='******', password='******', database='DWHRMS_DEMO') last = len(df) - 1 sql = 'DELETE FROM [t_MobMinibusFares]' cursor = conn.cursor() cursor.execute(sql) conn.commit() for i, r in df.iterrows(): if i % 1000 == 0: sql = f'INSERT INTO [t_MobMinibusFares] ([Route ID], [District], [Route Name], [Source zh-hk], [Source zh-cn], [Source en-us], [Dest zh-hk], [Dest zh-cn], [Dest en-us], [Fare]) \n' else:
while attempts < 3: try: response = urlopen( 'http://static.data.gov.hk/td/routes-and-fares/FARE_BUS.mdb', timeout=10) content = response.read() with open(expanduser('~/Desktop/FARE_BUS.mdb'), 'wb') as f: f.write(content) break except URLError as e: if attempts == 2: exit(1) attempts += 1 df = mdb.read_table(expanduser('~/Desktop/FARE_BUS.mdb'), mdb.list_tables(expanduser('~/Desktop/FARE_BUS.mdb'))[0]) df.drop_duplicates(subset=['ROUTE_ID', 'ROUTE_SEQ', 'ON_SEQ', 'OFF_SEQ'], keep='last', inplace=True) #df.fillna(-1, inplace=True) conn = pymssql.connect(server='192.168.13.60', user='******', password='******', database='DWHRMS_DEMO') last = len(df) - 1 sql = 'DELETE FROM [t_MobBusFaresSection]' cursor = conn.cursor() cursor.execute(sql) conn.commit() folder = expanduser('~/Desktop/fares')
# Does what it says on the tin. # Note that it takes a while to read all the mdb thingies. # To run pandas_access successfully, install mdbtools through whatever you need. # -Matt import pandas import numpy as np import pandas_access DATASET = "./FDOC_January_2017.mdb" if __name__ == "__main__": table_list = [] for tablename in pandas_access.list_tables(DATASET): print(tablename) table_list.append(tablename) tables_dict = {} for table in table_list[0:2]: #doing more makes it take forever. tables_dict[table] = pandas_access.read_table(DATASET, table)