Exemple #1
0
def accdb_to_csv(directory, get_out, warn, file_type, get_in):
    meta.write_logs(warn, 'DEBUG', 'accdb_to_csv','Iniciando processo', debug=DEBUG)
    get_out = directory + get_out
    meta.write_logs(warn, 'INFO', 'Arquivo encontrado ' + file_type, 'Formato Micrsoft Access (mdb,accdb).')
    for table in mdb.list_tables(get_in):
        try:
            meta.write_logs(warn, 'INFO', 'Tabela ' + table, 'Tabela encontrado dentro do arquivo.')
            csv.access_to_csv(get_in, get_out, table)                    
            meta.write_logs(warn, 'INFO', 'Chave ùnica criada ' + get_out + '_' + table + '.csv','SHA256(' + meta.md5_hash(get_out + '_temp_' + table + '.csv') + ')')
        except:
            meta.write_logs(warn, 'ERROR','accdb_to_csv - Falha ao exportar tabela ' + table, 'Erro de leitura na tabela')
    meta.write_logs(warn, 'DEBUG', 'accdb_to_csv','Finalizando processo', debug=DEBUG)            
Exemple #2
0
    def ParseSynergiDatabase(self):
        """
        Use Pandas Access to convert the MDB tables to Pandas DataFrames.
        """
        print("Opening synergie database - ", self.paths["Synergi File"])
        table_list = mdb.list_tables(self.paths["Synergi File"])

        table_list_warehouse = []
        if "warehouse" in self.paths:
            print("Opening warehouse database - ", self.paths["warehouse"])
            table_list_warehouse = mdb.list_tables(self.paths["warehouse"])

        for table in table_list:
            self.SynergiDictionary[table] = self.ToLowerCase(
                mdb.read_table(self.paths["Synergi File"], table)
            )

        for table in table_list_warehouse:
            self.SynergiDictionary[table] = self.ToLowerCase(
                mdb.read_table(self.paths["warehouse"], table)
            )
        return
Exemple #3
0
    def get_mdb_data(self, payload):
        '''
        Returns list of Entity object(s) (str,dataframe) from a successful extraction

        Arguments:
        payload -- payload object (str,binary)
        '''
        # TODO: find a way to directly pass byteio to reading utility without writing to disk
        try:
            # Write to bytes to disk
            open(payload.filename, 'wb').write(payload.data.getvalue())

            # Get database schema
            mdb_schema = pandas_access.read_schema(payload.filename)
            # Get attributes that are of integer type
            integer_attributes = self.get_attributes_by_data_type(
                mdb_schema, 'Long Integer')

            # Declare entity dict
            entity_dict = dict()

            # Get list of table from database
            table_list = pandas_access.list_tables(payload.filename)

            # Update progress bar job count
            self.job_count += len(table_list)
            self.pbar.total = self.job_count

            # Iterate through each table in database
            for tbl in table_list:
                self.logger.debug('Extracting table: \'%s\' from file: %s...',
                                  tbl, payload.filename)
                # Issue: Default pandas integer type is not nullable - null values in integer column causes read error
                # Workaround: Read integer as Int64 (pandas nullable integer type in pandas)
                dtype_input = {
                    attribute: 'Int64'
                    for attribute in integer_attributes[tbl]
                }
                df = pandas_access.read_table(payload.filename,
                                              tbl,
                                              dtype=dtype_input)
                entity_dict.update({tbl: df})
                # update progress bar
                self.pbar.update()
            return entity_dict
        finally:
            self.logger.debug('Removing intermediate file: %s...',
                              payload.filename)
            utils.silentremove(payload.filename)
Exemple #4
0
def from_SQLT(args):
    sql_table = args.sql_table

    if sql_table == None or sql_table == "":
        return "MUST SUPPLY --sql_table"

    sql_type = args.sql_kind.lower()

    if sql_type == "psql":
        return sql_type + " Coming Soon"
    elif sql_type == "msql":
        return sql_type + " Coming Soon"
    elif sql_type == "sqll":
        return sql_type + " Coming Soon"
    elif sql_type == "mdb":
        sql_file = args.sql_file

        if sql_file == None or sql_file == "":
            return "MUST SUPPLY --sql_file"

        try:
            tables = mdb.list_tables(sql_file)

            if from_table not in tables:
                return "TABLE NOT FOUND. Available Tables:\n\t" + "\n\t".join(
                    tables)

            df = mdb.read_table(sql_file, from_table)
        except:
            traceback.print_exc()
            return "ERROR"

    else:
        return "UNSUPPORTED SQL. Available Kinds: PSQL (PostGreSQL), MSQL (MySQL), MDB (Access DB), SQLL (SQLite)"

    return df
# Input files should be (*.mdb or *.accdb)
inputfile = "/lss/research/itrns-otohpc/CrashTools/NonMotoristCrashTool/working.mdb"
outputfile = "/lss/research/itrns-otohpc/CrashTools/NonMotoristCrashTool/Output.csv"
outputdir = "/lss/research/itrns-otohpc/CrashTools/NonMotoristCrashTool/"
outputcl = "/lss/research/itrns-otohpc/CrashTools/NonMotoristCrashTool/CRASH_LEVEL.csv"
outputvl = "/lss/research/itrns-otohpc/CrashTools/NonMotoristCrashTool/VEHICLE_LEVEL.csv"
outputpl = "/lss/research/itrns-otohpc/CrashTools/NonMotoristCrashTool/PERSON_LEVEL.csv"

# In[2]:
import pandas_access as mdb

db_filename = inputfile

# Listing the tables.
for tbl in mdb.list_tables(db_filename):
    print(tbl)

# import all the necessary libraries
import csv
import pyodbc
import pandas as pd
import numpy as np

# In[4]:
dfcl = mdb.read_table(db_filename, table_name="CRASH_LEVEL")
dfcl.to_csv(outputcl, index=False)

# In[5]:
dfvl = mdb.read_table(db_filename, table_name="VEHICLE_LEVEL")
dfvl.to_csv(outputcl, index=False)
Exemple #6
0
while attempts < 3:
    try:
        response = urlopen(
            'http://static.data.gov.hk/td/routes-and-fares/RSTOP_BUS.mdb',
            timeout=10)
        content = response.read()
        with open(expanduser('~/Desktop/RSTOP_BUS.mdb'), 'wb') as f:
            f.write(content)
        break
    except URLError as e:
        if attempts == 2:
            exit(1)
        attempts += 1

df = mdb.read_table(expanduser('~/Desktop/RSTOP_BUS.mdb'),
                    mdb.list_tables(expanduser('~/Desktop/RSTOP_BUS.mdb'))[0])
df.drop_duplicates(subset=['ROUTE_ID', 'ROUTE_SEQ', 'STOP_SEQ'],
                   keep='last',
                   inplace=True)
#df.fillna(-1, inplace=True)
conn = pymssql.connect(server='192.168.13.60',
                       user='******',
                       password='******',
                       database='DWHRMS_DEMO')
last = len(df) - 1
sql = 'DELETE FROM [t_MobBusStops]'
cursor = conn.cursor()
cursor.execute(sql)
conn.commit()

folder = expanduser('~/Desktop/rstops')
Exemple #7
0
def read_tables(db_path):
    result = []
    for tbl in mdb.list_tables(db_path, encoding="utf-8"):
        result.append(tbl)
    return result
Exemple #8
0
while attempts < 3:
    try:
        response = urlopen(
            'http://static.data.gov.hk/td/routes-and-fares/ROUTE_GMB.mdb',
            timeout=10)
        content = response.read()
        with open(expanduser('~/Desktop/ROUTE_GMB.mdb'), 'wb') as f:
            f.write(content)
        break
    except URLError as e:
        if attempts == 2:
            exit(1)
        attempts += 1

df = mdb.read_table(expanduser('~/Desktop/ROUTE_GMB.mdb'),
                    mdb.list_tables(expanduser('~/Desktop/ROUTE_GMB.mdb'))[0])
#df.drop_duplicates(subset=['ROUTE_ID', 'ROUTE_SEQ', 'STOP_SEQ'], keep='last', inplace=True)
#df.fillna(-1, inplace=True)
conn = pymssql.connect(server=r'192.168.9.75\sql2012',
                       user='******',
                       password='******',
                       database='DWHRMS_DEMO')
last = len(df) - 1
sql = 'DELETE FROM [t_MobMinibusFares]'
cursor = conn.cursor()
cursor.execute(sql)
conn.commit()
for i, r in df.iterrows():
    if i % 1000 == 0:
        sql = f'INSERT INTO [t_MobMinibusFares] ([Route ID], [District], [Route Name], [Source zh-hk], [Source zh-cn], [Source en-us], [Dest zh-hk], [Dest zh-cn], [Dest en-us], [Fare]) \n'
    else:
Exemple #9
0
while attempts < 3:
    try:
        response = urlopen(
            'http://static.data.gov.hk/td/routes-and-fares/FARE_BUS.mdb',
            timeout=10)
        content = response.read()
        with open(expanduser('~/Desktop/FARE_BUS.mdb'), 'wb') as f:
            f.write(content)
        break
    except URLError as e:
        if attempts == 2:
            exit(1)
        attempts += 1

df = mdb.read_table(expanduser('~/Desktop/FARE_BUS.mdb'),
                    mdb.list_tables(expanduser('~/Desktop/FARE_BUS.mdb'))[0])
df.drop_duplicates(subset=['ROUTE_ID', 'ROUTE_SEQ', 'ON_SEQ', 'OFF_SEQ'],
                   keep='last',
                   inplace=True)
#df.fillna(-1, inplace=True)
conn = pymssql.connect(server='192.168.13.60',
                       user='******',
                       password='******',
                       database='DWHRMS_DEMO')
last = len(df) - 1
sql = 'DELETE FROM [t_MobBusFaresSection]'
cursor = conn.cursor()
cursor.execute(sql)
conn.commit()

folder = expanduser('~/Desktop/fares')
# Does what it says on the tin.
# Note that it takes a while to read all the mdb thingies.
# To run pandas_access successfully, install mdbtools through whatever you need.
# -Matt

import pandas
import numpy as np
import pandas_access

DATASET = "./FDOC_January_2017.mdb"

if __name__ == "__main__":
    table_list = []
    for tablename in pandas_access.list_tables(DATASET):
        print(tablename)
        table_list.append(tablename)

    tables_dict = {}

    for table in table_list[0:2]:  #doing more makes it take forever.
        tables_dict[table] = pandas_access.read_table(DATASET, table)