def insert(self, table, dataframe, **args): onDupUpdateKey = [] for c in dataframe.columns: if c != 'ReportDate' and c != 'Code' and c != 'Date' and c != 'ValuationMethod': onDupUpdateKey.append('%s=VALUES(%s)' % (c.replace(" ", ""), c.replace(" ", ""))) sql_insert = 'INSERT INTO %s(%s) VALUES(%s) %s' % ( table, ','.join(dataframe.columns), ','.join(['%s'] * len(dataframe.columns)), 'ON DUPLICATE KEY UPDATE ' + ','.join(onDupUpdateKey), ) try: row = [tuple(r) for r in dataframe.values] self.__sqlConnect.cursor().executemany(sql_insert, row) # NB : you won't get an IntegrityError when reading self.__sqlConnect.commit() except mysql.connector.Error as err: LogHandler.log_exceptions(""" Parsing file {}\nSQL Query: {}\nSomething went wrong: {}\n """.format(args['filename'], sql_insert, err)) finally: self.__sqlConnect.cursor().close()
def saveDataFrame(self, dataframe, table): try: dataframe.to_sql(name=table, con=self.__con.getEngine(), index=False, if_exists='append', dtype=self.__con.getDbType()[table]) except DBAPIError as e: LogHandler.log_exceptions("Sql Exceptions: %s\r\n" % e) pass finally: pass
def __init__(self): self.__db_name = dbConfig['database'] LogHandler.log_msg("DB connection initializing...") self.__engine = create_engine( 'mysql+pymysql://%s:%s@%s:%s/%s?charset=%s' % ( self.__config['user'], self.__config['password'], self.__config['host'], self.__config['port'], self.__config['database'], self.__config['charset'], ), echo=False) LogHandler.log_msg("Done.")
def __init__ (self, service, configFile): try: print "logger instantiated" self.logHandler = LogHandler(service, configFile) except Exception as error: monitorLog.logError("Cannot Instantiate Logger with configFile : " + configFile, `error`) raise IncorrectConfigException("Cannot Instantiate Logger with configFile : " + configFile) self.threadLocal = threading.local() self.counter = 0;
def engine_insert_update(self, dataframe, table, **args): onDupUpdateKey = [] for c in dataframe.columns: if c != 'ReportDate' and c != 'Code' and c != 'Date' and c != 'ValuationMethod': onDupUpdateKey.append('%s=VALUES(%s)' % (c.replace(" ", ""), c.replace(" ", ""))) sql_insert = 'INSERT INTO %s(%s) VALUES(%s) %s' % ( table, ','.join(dataframe.columns), ','.join(['%s'] * len(dataframe.columns)), 'ON DUPLICATE KEY UPDATE ' + ','.join(onDupUpdateKey), ) try: with self.__engine.connect() as con: row = [tuple(x) for x in dataframe.values] con.execute(sql_insert, *row) except DBAPIError as err: LogHandler.log_exceptions(""" Parsing file {}\nSQL Query: {}\nSomething went wrong: {}\n """.format(args['filename'], sql_insert, err)) except DataError as e: LogHandler.log_exceptions(""" Parsing file {}\nSQL Query: {}\nSomething went wrong: {}\n """.format(args['filename'], sql_insert, e)) except OperationalError as ope: LogHandler.log_exceptions(""" Parsing file {}\nSQL Query: {}\nSomething went wrong: {}\n """.format(args['filename'], sql_insert, ope)) pass
def __init__(self): LogHandler.log_msg("Initializing tools..") self.__con = dbConnectionEngine() LogHandler.log_msg("Fetch current table attribute") self.__con.loadModelProperties(self.mp) LogHandler.log_msg("Done.") self.__mysqlCon = dbConnection()
def __init__(self): self.__db_name = dbConfig['database'] LogHandler.log_msg("DB engine initializing...") try: self.engine = create_engine( create_engine('mysql://%s:%s@%s:%s/%s' % ( dbConfig['user'], dbConfig['password'], dbConfig['host'], dbConfig['port'], dbConfig['database'], ), echo=False)) except DBAPIError as err: LogHandler.log_exceptions(err) finally: LogHandler.log_msg("Done.")
thread_num = 4 file_nums = 0 threaded_file = [] if __name__ == '__main__': start_time = time.process_time_ns() for root, directories, files in os.walk("csv"): all_files = len(files) sublist_size = round(len(files) / thread_num) threaded_file = [ files[i:i + sublist_size] for i in range(0, len(files), sublist_size) ] threads = [] for index, tf in enumerate(threaded_file): p = StreamThread(sublist=tf, threadCode="Thread-{}".format(index)) threads.append(p) for th in threads: try: th.start() th.join() except Error as err: LogHandler.log_exceptions( "Error: unable to start thread, msg: {0}".format(err)) end_time = time.process_time_ns() print("All threads time spend: %sms" % round( (end_time - start_time) / 1000000, 5))
def __init__(self, *args, **kwargs): self.log = LogHandler(self.name, file=False) self.response = Response()
class WebRequest(object): name = 'web_request' def __init__(self, *args, **kwargs): self.log = LogHandler(self.name, file=False) self.response = Response() @property def header(self): return {'Connection': 'close', 'Content-Type': 'application/json'} def get(self, url, header=None, retry_time=6, retry_interval=6, timeout=10, *args, **kwargs): """ get method :param url: target url :param header: headers :param retry_time: retry time :param retry_interval: retry interval :param timeout: network timeout :return: """ # headers = self.es_headers # if header and isinstance(header, dict): # headers.update(header) while True: try: self.response = requests.get(url, headers=header, timeout=timeout, *args, **kwargs) # if self.response.status_code == 200: return self except Exception as e: self.log.error("请求URL地址: %s 错误是: %s" % (url, str(e))) # print("请求URL地址: %s 错误是: %s" % (url, str(e))) retry_time -= 1 if retry_time <= 0: resp = Response() resp.status_code = 200 return self self.log.info("重新连接 %s 秒后" % retry_interval) # print("重新连接 %s 秒后" % retry_interval) time.sleep(retry_interval) def post_data_json(self, url, header=None, retry_time=3, retry_interval=5, timeout=8, *args, **kwargs): """ post method :param url: target url :param header: headers :param retry_time: retry time :param retry_interval: retry interval :param timeout: network timeout :return: """ headers = self.header if header and isinstance(header, dict): headers.update(header) while True: try: self.response = requests.post(url, headers=headers, timeout=timeout, *args, **kwargs) return self except Exception as e: print("请求: %s 错误: %s" % (url, str(e))) retry_time -= 1 if retry_time <= 0: resp = Response() resp.status_code = 200 return self print("重新链接 %s 秒后" % retry_interval) time.sleep(retry_interval) @property def tree(self): return etree.HTML(self.response.content) @property def text(self): return self.response.text
import time from locale import * import pandas as pd from logHandler import LogHandler from mytools import myTools setlocale(LC_NUMERIC, 'English_US') pd.set_option('display.max_columns', 10) # Global variables mytools, table_org_attributes, success_msg = myTools(), {}, '' number, file_nums, single_file_data_count, data_count, datamapping_performance = 1, 0, 0, 0, 0.0 # Program start here LogHandler.log_msg("Start...") t_start = time.process_time_ns() for root, directories, files in os.walk("csv"): file_nums = len(files) LogHandler.log_msg("{0} files in directory".format(len(files))) for filename in files: LogHandler.log_msg('Extracting from files %s/%s %s\n' % (number, len(files), filename)) t3 = time.process_time_ns() file_path = os.path.join(root, filename) # open and extract data from csv and return dataframe object csvdata = pd.read_csv(file_path, parse_dates=True) if csvdata.empty: continue
class Logger: #beyond these, email will be triggered. threshold_failure = 4 threshold_latency = 4 threshold_count = 4 ''' It : - Creates LogHandler instance to write metrics to log file. - Creates threading.local() instance to create thread specific variables. ''' def __init__ (self, service, configFile): try: print "logger instantiated" self.logHandler = LogHandler(service, configFile) except Exception as error: monitorLog.logError("Cannot Instantiate Logger with configFile : " + configFile, `error`) raise IncorrectConfigException("Cannot Instantiate Logger with configFile : " + configFile) self.threadLocal = threading.local() self.counter = 0; ''' If the given action is failed, then it will log the failure count uptil now. It will also return the updated counter value. ''' def logIfFail (self, name, expectedReturn, counter, action, severity = 20, *args, **kwargs): count = self.reportCountNE(expectedReturn, counter, action, *args, **kwargs) if count > 0: try: print "logging failure" self.logHandler.appendFailCountLog(name, count, severity) except Exception as error: monitorLog.logError("Failed to append log for metric: " + name, `error`) raise LoggingException("Failed to append log for metric: " + name) return count def logFailure (self, name, counter, severity = 20): if counter > 0: try: if counter >= Logger.threshold_failure: self.logHandler.appendFailCountLog(name, counter, 'CRITICAL') self.logHandler.appendFailCountLog(name, counter, severity) print "logging failure" except Exception as error: monitorLog.logError("Failed to append log for metric: " + name, `error`) raise LoggingException("Failed to append log for metric: " + name) return 1 return 0 def logCount (self, name, counter, severity = 20): if counter > 0: try: if counter >= Logger.threshold_count: self.logHandler.appendCountLog(name, counter, 'CRITICAL') self.logHandler.appendCountLog(name, counter, severity) except Exception as error: monitorLog.logError("Failed to append log for metric: " + name, `error`) raise LoggingException("Failed to append log for metric: " + name) return 1 return 0 ''' Report the incremented counter if the action has failed to pass the expectation. ''' def reportCountEqual(self, expectedReturn, counter, action, *args, **kwargs): try: actualReturn = action(*args, **kwargs) except: return counter + 1 if actualReturn == expectedReturn: return counter + 1 return counter ''' Report the incremented counter if the action has passed the expectation. ''' def reportCountNE(self, expectedReturn, counter, action, *args, **kwargs): try: actualReturn = action(*args, **kwargs) except: return counter + 1 if actualReturn == expectedReturn: return counter return counter + 1 ''' Starts the thread local timer. ''' def startTime (self): #using thread local storage for start time self.threadLocal.startTime = time.time() ''' Stops the thread local timer and logs the execution time. ''' def reportTime (self, name, severity = 20): endTime = time.time() runTime = endTime - self.threadLocal.startTime try: if runTime >= Logger.threshold_latency: self.logHandler.appendTimeLog(name, runTime, 'CRITICAL') self.logHandler.appendTimeLog(name, runTime, severity) except Exception as error: monitorLog.logError("Failed to append log for metric: " + name, `error`) raise LoggingException("Failed to append log for metric: " + name) ''' Logs the execution time of the given action and returns the value of action. ''' def reportLatency (self, name, action, severity = 20, *args, **kwargs): self.startTime() try: actualReturn = action(*args, **kwargs) except Exception as error: monitorLog.logError("Failed Action " + `action`, `error`) raise Exception("Failed Action :" + `action`) self.reportTime(name, severity) return actualReturn
class Logger: ''' It : - Creates LogHandler instance to write metrics to log file. - Creates threading.local() instance to create thread specific variables. ''' def __init__ (self, service): self.logHandler = LogHandler(service) self.threadLocal = threading.local() ''' If the given action is failed, then it will log the failure count uptil now. It will also return the updated counter value. ''' def logIfFail (self, name, metricType, expectedReturn, counter, action, *args, **kwargs): count = self.reportCountNE(expectedReturn, counter, action, *args, **kwargs) if count > 0: self.logHandler.appendCountLog(name, metricType, count) return count ''' Report the incremented counter if the action has failed to pass the expectation. ''' def reportCountEqual(self, expectedReturn, counter, action, *args, **kwargs): try: actualReturn = action(*args, **kwargs) except: return counter + 1 if actualReturn == expectedReturn: return counter + 1 return counter ''' Report the incremented counter if the action has passed the expectation. ''' def reportCountNE(self, expectedReturn, counter, action, *args, **kwargs): try: actualReturn = action(*args, **kwargs) except: return counter + 1 if actualReturn == expectedReturn: return counter return counter + 1 ''' Starts the thread local timer. ''' def startTime (self): #using thread local storage for start time self.threadLocal.startTime = time.time() ''' Stops the thread local timer and logs the execution time. ''' def reportTime (self, name, metricType): endTime = time.time() runTime = endTime - self.threadLocal.startTime self.logHandler.appendTimeLog(name, metricType, runTime) ''' Logs the execution time of the given action and returns the value of action. ''' def reportLatency (self, name, metricType, action, *args, **kwargs): self.startTime() try: print "Inside reportLatency try block" actualReturn = action(*args, **kwargs) print actualReturn except: print "Inside reportLatency except block" #monitorLogs.logError("Error") self.reportTime(name, metricType) return actualReturn
def __init__ (self, service): self.logHandler = LogHandler(service) self.threadLocal = threading.local()
def run(self): success_msg = '' LogHandler.log_msg("[%s]: %s" % (self.threadCode, time.ctime(time.time()))) t_start = time.process_time_ns() mytools = myTools() number = 0 data_count = 0 file_nums = 0 datamapping_performance = 0 for filename in self.sublist: file_nums = len(self.sublist) number += 1 LogHandler.log_msg('[%s]: Extracting from files %s/%s %s\n' % ( self.threadCode, number, len(self.sublist), filename, )) t3 = time.process_time_ns() # checkCSVInTable csvdata = pd.read_csv("csv/" + filename) if csvdata.empty: continue # Check file type if mytools.checkIfIsDate(csvdata.columns): fileProperty = mytools.matchFile(filename, isPrice=False) # Pre-processing data: remove fields which is not in sql table csvdata['name'] = csvdata['name'].str.strip() cols = csvdata['name'].str.strip() csvdata = csvdata.iloc[(csvdata['name'].isin( mytools.getMp()[fileProperty['table']])).index] # Remove ',' of each element and convert to type float dataframe = csvdata.T.iloc[1:] \ .applymap( lambda x: float(x.replace(',', '')) if type(x) != float else float(x)) # Rename columns' names to string dataframe.rename(columns=cols.to_dict(), inplace=True) # Remove unrelated table fields dataframe = dataframe.loc[:, cols.loc[(cols.isin(mytools.getMp( )[fileProperty['table']]))].str.strip().to_list()] # Append Code, ReportDate, ValuationMethod to dataframe dataframe['Code'] = fileProperty['cols']['Code'] dataframe['ReportDate'] = csvdata.columns.to_series( ).iloc[1:].apply(lambda x: datetime.strptime(x, '%m/%d/%Y'). strftime('%Y-%m-%d') if x != 'ttm' else '0000-00-00') if 'ValuationMethod' in tuple(fileProperty['cols'].keys()): dataframe['ValuationMethod'] = fileProperty['cols'][ 'ValuationMethod'] # Replace all NaN to None dataframe = dataframe.where(dataframe.notna(), None) # Save in Database mytools.save(dataframe=dataframe, table=fileProperty['table'], filename=filename) data_count += dataframe.size else: fileProperty = mytools.matchFile(filename, isPrice=True) csvdata.rename(columns={ csvdata.columns[x]: csvdata.columns[x].replace(" ", "") for x in range(csvdata.columns.size) }, inplace=True) # Append Code csvdata['Code'] = fileProperty['cols']['Code'] # Replace all NaN to None csvdata = csvdata.where(csvdata.notna(), None) # Save in Database mytools.save(dataframe=csvdata, table=fileProperty['table'], filename=filename) data_count += csvdata.size # END t4 = time.process_time_ns() LogHandler.log_msg('%s data parsed, finished in %sms\n' % (data_count, round((t4 - t3) / 1000000, 5))) t_end = time.process_time_ns() success_msg += "\r\n-------------------------------------------------------------------------------\r\n" success_msg += " END, total time: %sms\r\n" % round( (t_end - t_start) / 1000000, 5) success_msg += " Main thread performance average/file: %sms" % round( (t_end - t_start) / 1000000 * file_nums, 5) success_msg += " Parsed data total: %s\r\n" % data_count success_msg += " Performance average/data: %sms\r\n" % round( (t_end - t_start) / 1000000 * data_count, 5) success_msg += "\r\n-------------------------------------------------------------------------------\r\n" LogHandler.success(success_msg)
elif PLATFORM == "Linux": if self.fileName.endswith("py"): os.system("cd %s && gnome-terminal -- python3 %s" % (DIR, os.path.join(DIR, self.fileName))) else: os.system("cd %s && gnome-terminal -- %s" % (DIR, os.path.join(DIR, self.fileName))) # win32api.ShellExecute(0, 'open', self.fileName, '', '', 1) print('start the target successfully...') outer_starttime = time() if __name__ == '__main__': expired = 3 # 日志文件最大500MB maxSize = 500 # 轮训间隔时间:1分钟 interval = 60 # 建立日志对象 logobj = LogHandler(expired, maxSize, interval) # 启动日志监测线程 logobj.start() # 设置程序死机等待多少秒时间后重启 restartTime = 20 # 需要启动的目标文件名 targetFileName = 'sps' # 设置通信端口号 port = 8889 obj = WatchDog(port, targetFileName, restartTime) obj.waitFood()
class WebRequest(object): name = "web_request" def __init__(self, *args, **kwargs): self.log = LogHandler(self.name, file=False) self.response = Response() @property def user_agent(self): """ return an User-Agent at random :return: """ ua_list = [ 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/30.0.1599.101', 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.122', 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.71', 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95', 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/21.0.1180.71', 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; QQDownload 732; .NET4.0C; .NET4.0E)', 'Mozilla/5.0 (Windows NT 5.1; U; en; rv:1.8.1) Gecko/20061208 Firefox/2.0.0 Opera 9.50', 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:34.0) Gecko/20100101 Firefox/34.0', ] return random.choice(ua_list) @property def header(self): """ basic header :return: """ return { 'User-Agent': self.user_agent, 'Accept': '*/*', 'Connection': 'keep-alive', 'Accept-Language': 'zh-CN,zh;q=0.8' } def get(self, url, header=None, retry_time=3, retry_interval=5, timeout=5, *args, **kwargs): """ get method :param url: target url :param header: headers :param retry_time: retry time :param retry_interval: retry interval :param timeout: network timeout :return: """ headers = self.header if header and isinstance(header, dict): headers.update(header) while True: try: self.response = requests.get(url, headers=headers, timeout=timeout, *args, **kwargs) return self except Exception as e: self.log.error("requests: %s error: %s" % (url, str(e))) retry_time -= 1 if retry_time <= 0: resp = Response() resp.status_code = 200 return self self.log.info("retry %s second after" % retry_interval) time.sleep(retry_interval) @property def tree(self): return etree.HTML(self.response.content) @property def text(self): return self.response.text