def run(self): success_msg = '' LogHandler.log_msg("[%s]: %s" % (self.threadCode, time.ctime(time.time()))) t_start = time.process_time_ns() mytools = myTools() number = 0 data_count = 0 file_nums = 0 datamapping_performance = 0 for filename in self.sublist: file_nums = len(self.sublist) number += 1 LogHandler.log_msg('[%s]: Extracting from files %s/%s %s\n' % ( self.threadCode, number, len(self.sublist), filename, )) t3 = time.process_time_ns() # checkCSVInTable csvdata = pd.read_csv("csv/" + filename) if csvdata.empty: continue # Check file type if mytools.checkIfIsDate(csvdata.columns): fileProperty = mytools.matchFile(filename, isPrice=False) # Pre-processing data: remove fields which is not in sql table csvdata['name'] = csvdata['name'].str.strip() cols = csvdata['name'].str.strip() csvdata = csvdata.iloc[(csvdata['name'].isin( mytools.getMp()[fileProperty['table']])).index] # Remove ',' of each element and convert to type float dataframe = csvdata.T.iloc[1:] \ .applymap( lambda x: float(x.replace(',', '')) if type(x) != float else float(x)) # Rename columns' names to string dataframe.rename(columns=cols.to_dict(), inplace=True) # Remove unrelated table fields dataframe = dataframe.loc[:, cols.loc[(cols.isin(mytools.getMp( )[fileProperty['table']]))].str.strip().to_list()] # Append Code, ReportDate, ValuationMethod to dataframe dataframe['Code'] = fileProperty['cols']['Code'] dataframe['ReportDate'] = csvdata.columns.to_series( ).iloc[1:].apply(lambda x: datetime.strptime(x, '%m/%d/%Y'). strftime('%Y-%m-%d') if x != 'ttm' else '0000-00-00') if 'ValuationMethod' in tuple(fileProperty['cols'].keys()): dataframe['ValuationMethod'] = fileProperty['cols'][ 'ValuationMethod'] # Replace all NaN to None dataframe = dataframe.where(dataframe.notna(), None) # Save in Database mytools.save(dataframe=dataframe, table=fileProperty['table'], filename=filename) data_count += dataframe.size else: fileProperty = mytools.matchFile(filename, isPrice=True) csvdata.rename(columns={ csvdata.columns[x]: csvdata.columns[x].replace(" ", "") for x in range(csvdata.columns.size) }, inplace=True) # Append Code csvdata['Code'] = fileProperty['cols']['Code'] # Replace all NaN to None csvdata = csvdata.where(csvdata.notna(), None) # Save in Database mytools.save(dataframe=csvdata, table=fileProperty['table'], filename=filename) data_count += csvdata.size # END t4 = time.process_time_ns() LogHandler.log_msg('%s data parsed, finished in %sms\n' % (data_count, round((t4 - t3) / 1000000, 5))) t_end = time.process_time_ns() success_msg += "\r\n-------------------------------------------------------------------------------\r\n" success_msg += " END, total time: %sms\r\n" % round( (t_end - t_start) / 1000000, 5) success_msg += " Main thread performance average/file: %sms" % round( (t_end - t_start) / 1000000 * file_nums, 5) success_msg += " Parsed data total: %s\r\n" % data_count success_msg += " Performance average/data: %sms\r\n" % round( (t_end - t_start) / 1000000 * data_count, 5) success_msg += "\r\n-------------------------------------------------------------------------------\r\n" LogHandler.success(success_msg)
}, inplace=True) # Append Code csvdata['Code'] = fileProperty['cols']['Code'] # Save in Database mytools.save(dataframe=csvdata, table=fileProperty['table'], filename=filename) # mytools.save_using_mycon(table=fileProperty['table'], df=csvdata, filename=filename) single_file_data_count = csvdata.size data_count += single_file_data_count t4 = time.process_time_ns() LogHandler.log_msg( '%s data parsed, finished in %sms\n' % (single_file_data_count, round((t4 - t3) / 1000000, 5))) t_end = time.process_time_ns() success_msg += "\r\n-------------------------------------------------------------------------------\r\n" success_msg += " END, total time: %sms\r\n" % round( (t_end - t_start) / 1000000, 5) success_msg += " Main thread performance average/file: %sms\r\n" % round( (t_end - t_start) / 1000000 / file_nums, 5) success_msg += " Parsed data: %s\r\n" % data_count success_msg += " Performance average/data: %sms\r\n" % round( (t_end - t_start) / 1000000 / data_count, 5) success_msg += "\r\n-------------------------------------------------------------------------------\r\n" LogHandler.success(success_msg) # End