def __init__(self): self.__db_name = dbConfig['database'] LogHandler.log_msg("DB connection initializing...") self.__engine = create_engine( 'mysql+pymysql://%s:%s@%s:%s/%s?charset=%s' % ( self.__config['user'], self.__config['password'], self.__config['host'], self.__config['port'], self.__config['database'], self.__config['charset'], ), echo=False) LogHandler.log_msg("Done.")
def __init__(self): self.__db_name = dbConfig['database'] LogHandler.log_msg("DB engine initializing...") try: self.engine = create_engine( create_engine('mysql://%s:%s@%s:%s/%s' % ( dbConfig['user'], dbConfig['password'], dbConfig['host'], dbConfig['port'], dbConfig['database'], ), echo=False)) except DBAPIError as err: LogHandler.log_exceptions(err) finally: LogHandler.log_msg("Done.")
def __init__(self): LogHandler.log_msg("Initializing tools..") self.__con = dbConnectionEngine() LogHandler.log_msg("Fetch current table attribute") self.__con.loadModelProperties(self.mp) LogHandler.log_msg("Done.") self.__mysqlCon = dbConnection()
def run(self): success_msg = '' LogHandler.log_msg("[%s]: %s" % (self.threadCode, time.ctime(time.time()))) t_start = time.process_time_ns() mytools = myTools() number = 0 data_count = 0 file_nums = 0 datamapping_performance = 0 for filename in self.sublist: file_nums = len(self.sublist) number += 1 LogHandler.log_msg('[%s]: Extracting from files %s/%s %s\n' % ( self.threadCode, number, len(self.sublist), filename, )) t3 = time.process_time_ns() # checkCSVInTable csvdata = pd.read_csv("csv/" + filename) if csvdata.empty: continue # Check file type if mytools.checkIfIsDate(csvdata.columns): fileProperty = mytools.matchFile(filename, isPrice=False) # Pre-processing data: remove fields which is not in sql table csvdata['name'] = csvdata['name'].str.strip() cols = csvdata['name'].str.strip() csvdata = csvdata.iloc[(csvdata['name'].isin( mytools.getMp()[fileProperty['table']])).index] # Remove ',' of each element and convert to type float dataframe = csvdata.T.iloc[1:] \ .applymap( lambda x: float(x.replace(',', '')) if type(x) != float else float(x)) # Rename columns' names to string dataframe.rename(columns=cols.to_dict(), inplace=True) # Remove unrelated table fields dataframe = dataframe.loc[:, cols.loc[(cols.isin(mytools.getMp( )[fileProperty['table']]))].str.strip().to_list()] # Append Code, ReportDate, ValuationMethod to dataframe dataframe['Code'] = fileProperty['cols']['Code'] dataframe['ReportDate'] = csvdata.columns.to_series( ).iloc[1:].apply(lambda x: datetime.strptime(x, '%m/%d/%Y'). strftime('%Y-%m-%d') if x != 'ttm' else '0000-00-00') if 'ValuationMethod' in tuple(fileProperty['cols'].keys()): dataframe['ValuationMethod'] = fileProperty['cols'][ 'ValuationMethod'] # Replace all NaN to None dataframe = dataframe.where(dataframe.notna(), None) # Save in Database mytools.save(dataframe=dataframe, table=fileProperty['table'], filename=filename) data_count += dataframe.size else: fileProperty = mytools.matchFile(filename, isPrice=True) csvdata.rename(columns={ csvdata.columns[x]: csvdata.columns[x].replace(" ", "") for x in range(csvdata.columns.size) }, inplace=True) # Append Code csvdata['Code'] = fileProperty['cols']['Code'] # Replace all NaN to None csvdata = csvdata.where(csvdata.notna(), None) # Save in Database mytools.save(dataframe=csvdata, table=fileProperty['table'], filename=filename) data_count += csvdata.size # END t4 = time.process_time_ns() LogHandler.log_msg('%s data parsed, finished in %sms\n' % (data_count, round((t4 - t3) / 1000000, 5))) t_end = time.process_time_ns() success_msg += "\r\n-------------------------------------------------------------------------------\r\n" success_msg += " END, total time: %sms\r\n" % round( (t_end - t_start) / 1000000, 5) success_msg += " Main thread performance average/file: %sms" % round( (t_end - t_start) / 1000000 * file_nums, 5) success_msg += " Parsed data total: %s\r\n" % data_count success_msg += " Performance average/data: %sms\r\n" % round( (t_end - t_start) / 1000000 * data_count, 5) success_msg += "\r\n-------------------------------------------------------------------------------\r\n" LogHandler.success(success_msg)
import time from locale import * import pandas as pd from logHandler import LogHandler from mytools import myTools setlocale(LC_NUMERIC, 'English_US') pd.set_option('display.max_columns', 10) # Global variables mytools, table_org_attributes, success_msg = myTools(), {}, '' number, file_nums, single_file_data_count, data_count, datamapping_performance = 1, 0, 0, 0, 0.0 # Program start here LogHandler.log_msg("Start...") t_start = time.process_time_ns() for root, directories, files in os.walk("csv"): file_nums = len(files) LogHandler.log_msg("{0} files in directory".format(len(files))) for filename in files: LogHandler.log_msg('Extracting from files %s/%s %s\n' % (number, len(files), filename)) t3 = time.process_time_ns() file_path = os.path.join(root, filename) # open and extract data from csv and return dataframe object csvdata = pd.read_csv(file_path, parse_dates=True) if csvdata.empty: continue