コード例 #1
0
 def __init__(self):
     self.__db_name = dbConfig['database']
     LogHandler.log_msg("DB connection initializing...")
     self.__engine = create_engine(
         'mysql+pymysql://%s:%s@%s:%s/%s?charset=%s' % (
             self.__config['user'],
             self.__config['password'],
             self.__config['host'],
             self.__config['port'],
             self.__config['database'],
             self.__config['charset'],
         ),
         echo=False)
     LogHandler.log_msg("Done.")
コード例 #2
0
 def __init__(self):
     self.__db_name = dbConfig['database']
     LogHandler.log_msg("DB engine initializing...")
     try:
         self.engine = create_engine(
             create_engine('mysql://%s:%s@%s:%s/%s' % (
                 dbConfig['user'],
                 dbConfig['password'],
                 dbConfig['host'],
                 dbConfig['port'],
                 dbConfig['database'],
             ),
                           echo=False))
     except DBAPIError as err:
         LogHandler.log_exceptions(err)
     finally:
         LogHandler.log_msg("Done.")
コード例 #3
0
 def __init__(self):
     LogHandler.log_msg("Initializing tools..")
     self.__con = dbConnectionEngine()
     LogHandler.log_msg("Fetch current table attribute")
     self.__con.loadModelProperties(self.mp)
     LogHandler.log_msg("Done.")
     self.__mysqlCon = dbConnection()
コード例 #4
0
    def run(self):
        success_msg = ''
        LogHandler.log_msg("[%s]: %s" %
                           (self.threadCode, time.ctime(time.time())))
        t_start = time.process_time_ns()
        mytools = myTools()
        number = 0
        data_count = 0
        file_nums = 0
        datamapping_performance = 0
        for filename in self.sublist:
            file_nums = len(self.sublist)
            number += 1
            LogHandler.log_msg('[%s]: Extracting from files %s/%s %s\n' % (
                self.threadCode,
                number,
                len(self.sublist),
                filename,
            ))
            t3 = time.process_time_ns()

            # checkCSVInTable
            csvdata = pd.read_csv("csv/" + filename)
            if csvdata.empty:
                continue

            # Check file type
            if mytools.checkIfIsDate(csvdata.columns):
                fileProperty = mytools.matchFile(filename, isPrice=False)
                # Pre-processing data: remove fields which is not in sql table
                csvdata['name'] = csvdata['name'].str.strip()
                cols = csvdata['name'].str.strip()
                csvdata = csvdata.iloc[(csvdata['name'].isin(
                    mytools.getMp()[fileProperty['table']])).index]

                # Remove ',' of each element and convert to type float
                dataframe = csvdata.T.iloc[1:] \
                    .applymap(
                    lambda x: float(x.replace(',', '')) if type(x) != float else float(x))

                # Rename columns' names to string
                dataframe.rename(columns=cols.to_dict(), inplace=True)

                # Remove unrelated table fields
                dataframe = dataframe.loc[:, cols.loc[(cols.isin(mytools.getMp(
                )[fileProperty['table']]))].str.strip().to_list()]

                # Append Code, ReportDate, ValuationMethod to dataframe
                dataframe['Code'] = fileProperty['cols']['Code']
                dataframe['ReportDate'] = csvdata.columns.to_series(
                ).iloc[1:].apply(lambda x: datetime.strptime(x, '%m/%d/%Y').
                                 strftime('%Y-%m-%d')
                                 if x != 'ttm' else '0000-00-00')
                if 'ValuationMethod' in tuple(fileProperty['cols'].keys()):
                    dataframe['ValuationMethod'] = fileProperty['cols'][
                        'ValuationMethod']

                # Replace all NaN to None
                dataframe = dataframe.where(dataframe.notna(), None)
                # Save in Database

                mytools.save(dataframe=dataframe,
                             table=fileProperty['table'],
                             filename=filename)
                data_count += dataframe.size
            else:
                fileProperty = mytools.matchFile(filename, isPrice=True)
                csvdata.rename(columns={
                    csvdata.columns[x]: csvdata.columns[x].replace(" ", "")
                    for x in range(csvdata.columns.size)
                },
                               inplace=True)

                # Append Code
                csvdata['Code'] = fileProperty['cols']['Code']

                # Replace all NaN to None
                csvdata = csvdata.where(csvdata.notna(), None)

                # Save in Database
                mytools.save(dataframe=csvdata,
                             table=fileProperty['table'],
                             filename=filename)
                data_count += csvdata.size
            # END

            t4 = time.process_time_ns()
            LogHandler.log_msg('%s data parsed, finished in %sms\n' %
                               (data_count, round((t4 - t3) / 1000000, 5)))

        t_end = time.process_time_ns()
        success_msg += "\r\n-------------------------------------------------------------------------------\r\n"
        success_msg += "    END, total time: %sms\r\n" % round(
            (t_end - t_start) / 1000000, 5)
        success_msg += "    Main thread performance average/file: %sms" % round(
            (t_end - t_start) / 1000000 * file_nums, 5)
        success_msg += "    Parsed data total: %s\r\n" % data_count
        success_msg += "    Performance average/data: %sms\r\n" % round(
            (t_end - t_start) / 1000000 * data_count, 5)
        success_msg += "\r\n-------------------------------------------------------------------------------\r\n"
        LogHandler.success(success_msg)
コード例 #5
0
import time
from locale import *
import pandas as pd

from logHandler import LogHandler
from mytools import myTools

setlocale(LC_NUMERIC, 'English_US')
pd.set_option('display.max_columns', 10)

# Global variables
mytools, table_org_attributes, success_msg = myTools(), {}, ''
number, file_nums, single_file_data_count, data_count, datamapping_performance = 1, 0, 0, 0, 0.0

# Program start here
LogHandler.log_msg("Start...")
t_start = time.process_time_ns()
for root, directories, files in os.walk("csv"):
    file_nums = len(files)
    LogHandler.log_msg("{0} files in directory".format(len(files)))
    for filename in files:

        LogHandler.log_msg('Extracting from files %s/%s %s\n' %
                           (number, len(files), filename))

        t3 = time.process_time_ns()
        file_path = os.path.join(root, filename)
        # open and extract data from csv and return dataframe object
        csvdata = pd.read_csv(file_path, parse_dates=True)
        if csvdata.empty:
            continue