Ejemplo n.º 1
0
    def insert(self, table, dataframe, **args):
        onDupUpdateKey = []

        for c in dataframe.columns:
            if c != 'ReportDate' and c != 'Code' and c != 'Date' and c != 'ValuationMethod':
                onDupUpdateKey.append('%s=VALUES(%s)' %
                                      (c.replace(" ", ""), c.replace(" ", "")))

        sql_insert = 'INSERT INTO %s(%s) VALUES(%s) %s' % (
            table,
            ','.join(dataframe.columns),
            ','.join(['%s'] * len(dataframe.columns)),
            'ON DUPLICATE KEY UPDATE ' + ','.join(onDupUpdateKey),
        )

        try:
            row = [tuple(r) for r in dataframe.values]
            self.__sqlConnect.cursor().executemany(sql_insert, row)
            # NB : you won't get an IntegrityError when reading
            self.__sqlConnect.commit()
        except mysql.connector.Error as err:
            LogHandler.log_exceptions("""
                    Parsing file {}\nSQL Query: {}\nSomething went wrong: {}\n
            """.format(args['filename'], sql_insert, err))
        finally:
            self.__sqlConnect.cursor().close()
Ejemplo n.º 2
0
 def saveDataFrame(self, dataframe, table):
     try:
         dataframe.to_sql(name=table,
                          con=self.__con.getEngine(),
                          index=False,
                          if_exists='append',
                          dtype=self.__con.getDbType()[table])
     except DBAPIError as e:
         LogHandler.log_exceptions("Sql Exceptions: %s\r\n" % e)
         pass
     finally:
         pass
Ejemplo n.º 3
0
 def __init__(self):
     self.__db_name = dbConfig['database']
     LogHandler.log_msg("DB connection initializing...")
     self.__engine = create_engine(
         'mysql+pymysql://%s:%s@%s:%s/%s?charset=%s' % (
             self.__config['user'],
             self.__config['password'],
             self.__config['host'],
             self.__config['port'],
             self.__config['database'],
             self.__config['charset'],
         ),
         echo=False)
     LogHandler.log_msg("Done.")
Ejemplo n.º 4
0
 def __init__ (self, service, configFile):
   try:
     print "logger instantiated"
     self.logHandler = LogHandler(service, configFile)
   except Exception as error:
     monitorLog.logError("Cannot Instantiate Logger with configFile : " + configFile, `error`)
     raise IncorrectConfigException("Cannot Instantiate Logger with configFile : " + configFile)
   self.threadLocal = threading.local()
   self.counter = 0;
Ejemplo n.º 5
0
    def engine_insert_update(self, dataframe, table, **args):
        onDupUpdateKey = []

        for c in dataframe.columns:
            if c != 'ReportDate' and c != 'Code' and c != 'Date' and c != 'ValuationMethod':
                onDupUpdateKey.append('%s=VALUES(%s)' %
                                      (c.replace(" ", ""), c.replace(" ", "")))

        sql_insert = 'INSERT INTO %s(%s) VALUES(%s) %s' % (
            table,
            ','.join(dataframe.columns),
            ','.join(['%s'] * len(dataframe.columns)),
            'ON DUPLICATE KEY UPDATE ' + ','.join(onDupUpdateKey),
        )

        try:
            with self.__engine.connect() as con:
                row = [tuple(x) for x in dataframe.values]
                con.execute(sql_insert, *row)
        except DBAPIError as err:
            LogHandler.log_exceptions("""
                            Parsing file {}\nSQL Query: {}\nSomething went wrong: {}\n
                    """.format(args['filename'], sql_insert, err))
        except DataError as e:
            LogHandler.log_exceptions("""
                                        Parsing file {}\nSQL Query: {}\nSomething went wrong: {}\n
                                """.format(args['filename'], sql_insert, e))
        except OperationalError as ope:
            LogHandler.log_exceptions("""
                                        Parsing file {}\nSQL Query: {}\nSomething went wrong: {}\n
                                """.format(args['filename'], sql_insert, ope))
        pass
Ejemplo n.º 6
0
 def __init__(self):
     LogHandler.log_msg("Initializing tools..")
     self.__con = dbConnectionEngine()
     LogHandler.log_msg("Fetch current table attribute")
     self.__con.loadModelProperties(self.mp)
     LogHandler.log_msg("Done.")
     self.__mysqlCon = dbConnection()
Ejemplo n.º 7
0
 def __init__(self):
     self.__db_name = dbConfig['database']
     LogHandler.log_msg("DB engine initializing...")
     try:
         self.engine = create_engine(
             create_engine('mysql://%s:%s@%s:%s/%s' % (
                 dbConfig['user'],
                 dbConfig['password'],
                 dbConfig['host'],
                 dbConfig['port'],
                 dbConfig['database'],
             ),
                           echo=False))
     except DBAPIError as err:
         LogHandler.log_exceptions(err)
     finally:
         LogHandler.log_msg("Done.")
Ejemplo n.º 8
0
thread_num = 4

file_nums = 0
threaded_file = []

if __name__ == '__main__':
    start_time = time.process_time_ns()
    for root, directories, files in os.walk("csv"):
        all_files = len(files)
        sublist_size = round(len(files) / thread_num)
        threaded_file = [
            files[i:i + sublist_size]
            for i in range(0, len(files), sublist_size)
        ]
        threads = []
        for index, tf in enumerate(threaded_file):
            p = StreamThread(sublist=tf, threadCode="Thread-{}".format(index))
            threads.append(p)

        for th in threads:
            try:
                th.start()
                th.join()
            except Error as err:
                LogHandler.log_exceptions(
                    "Error: unable to start thread, msg: {0}".format(err))

    end_time = time.process_time_ns()
    print("All threads time spend: %sms" % round(
        (end_time - start_time) / 1000000, 5))
Ejemplo n.º 9
0
 def __init__(self, *args, **kwargs):
     self.log = LogHandler(self.name, file=False)
     self.response = Response()
Ejemplo n.º 10
0
class WebRequest(object):
    name = 'web_request'

    def __init__(self, *args, **kwargs):
        self.log = LogHandler(self.name, file=False)
        self.response = Response()

    @property
    def header(self):
        return {'Connection': 'close', 'Content-Type': 'application/json'}

    def get(self,
            url,
            header=None,
            retry_time=6,
            retry_interval=6,
            timeout=10,
            *args,
            **kwargs):
        """
        get method
        :param url: target url
        :param header: headers
        :param retry_time: retry time
        :param retry_interval: retry interval
        :param timeout: network timeout
        :return:
        """
        # headers = self.es_headers
        # if header and isinstance(header, dict):
        #     headers.update(header)
        while True:
            try:
                self.response = requests.get(url,
                                             headers=header,
                                             timeout=timeout,
                                             *args,
                                             **kwargs)
                # if self.response.status_code == 200:

                return self
            except Exception as e:
                self.log.error("请求URL地址: %s 错误是: %s" % (url, str(e)))
                # print("请求URL地址: %s 错误是: %s" % (url, str(e)))
                retry_time -= 1
                if retry_time <= 0:
                    resp = Response()
                    resp.status_code = 200
                    return self
                self.log.info("重新连接 %s 秒后" % retry_interval)
                # print("重新连接 %s 秒后" % retry_interval)
                time.sleep(retry_interval)

    def post_data_json(self,
                       url,
                       header=None,
                       retry_time=3,
                       retry_interval=5,
                       timeout=8,
                       *args,
                       **kwargs):
        """
        post method
        :param url: target url
        :param header: headers
        :param retry_time: retry time
        :param retry_interval: retry interval
        :param timeout: network timeout
        :return:
        """
        headers = self.header
        if header and isinstance(header, dict):
            headers.update(header)
        while True:
            try:
                self.response = requests.post(url,
                                              headers=headers,
                                              timeout=timeout,
                                              *args,
                                              **kwargs)
                return self
            except Exception as e:
                print("请求: %s 错误: %s" % (url, str(e)))
                retry_time -= 1
                if retry_time <= 0:
                    resp = Response()
                    resp.status_code = 200
                    return self
                print("重新链接 %s 秒后" % retry_interval)
                time.sleep(retry_interval)

    @property
    def tree(self):
        return etree.HTML(self.response.content)

    @property
    def text(self):
        return self.response.text
Ejemplo n.º 11
0
import time
from locale import *
import pandas as pd

from logHandler import LogHandler
from mytools import myTools

setlocale(LC_NUMERIC, 'English_US')
pd.set_option('display.max_columns', 10)

# Global variables
mytools, table_org_attributes, success_msg = myTools(), {}, ''
number, file_nums, single_file_data_count, data_count, datamapping_performance = 1, 0, 0, 0, 0.0

# Program start here
LogHandler.log_msg("Start...")
t_start = time.process_time_ns()
for root, directories, files in os.walk("csv"):
    file_nums = len(files)
    LogHandler.log_msg("{0} files in directory".format(len(files)))
    for filename in files:

        LogHandler.log_msg('Extracting from files %s/%s %s\n' %
                           (number, len(files), filename))

        t3 = time.process_time_ns()
        file_path = os.path.join(root, filename)
        # open and extract data from csv and return dataframe object
        csvdata = pd.read_csv(file_path, parse_dates=True)
        if csvdata.empty:
            continue
Ejemplo n.º 12
0
class Logger:
  
  #beyond these, email will be triggered.
  threshold_failure = 4
  threshold_latency = 4
  threshold_count = 4

  '''
    It :
      - Creates LogHandler instance to write metrics to log file.
      - Creates threading.local() instance to create thread specific variables.
  '''
  def __init__ (self, service, configFile):
    try:
      print "logger instantiated"
      self.logHandler = LogHandler(service, configFile)
    except Exception as error:
      monitorLog.logError("Cannot Instantiate Logger with configFile : " + configFile, `error`)
      raise IncorrectConfigException("Cannot Instantiate Logger with configFile : " + configFile)
    self.threadLocal = threading.local()
    self.counter = 0;

  '''
    If the given action is failed, then it will log the failure count uptil now.
    It will also return the updated counter value.
  '''
  def logIfFail (self, name, expectedReturn, counter, action, severity = 20, *args, **kwargs):
    count = self.reportCountNE(expectedReturn, counter, action, *args, **kwargs)
    if count > 0:
      try:
        print "logging failure"
        self.logHandler.appendFailCountLog(name, count, severity)	
      except Exception as error:
        monitorLog.logError("Failed to append log for metric: " + name, `error`)
        raise LoggingException("Failed to append log for metric: " + name)
    return count

  def logFailure (self, name, counter, severity = 20):
    if counter > 0:
      try:
        if counter >= Logger.threshold_failure:
          self.logHandler.appendFailCountLog(name, counter,  'CRITICAL')
        self.logHandler.appendFailCountLog(name, counter, severity)
        print "logging failure"
      except Exception as error:
        monitorLog.logError("Failed to append log for metric: " + name, `error`)
        raise LoggingException("Failed to append log for metric: " + name)
      return 1
    return 0

  def logCount (self, name, counter, severity = 20):
    if counter > 0:
      try:
        if counter >= Logger.threshold_count:
          self.logHandler.appendCountLog(name, counter,  'CRITICAL')
        self.logHandler.appendCountLog(name, counter, severity)
      except Exception as error:
        monitorLog.logError("Failed to append log for metric: " + name, `error`)
        raise LoggingException("Failed to append log for metric: " + name)
      return 1
    return 0


  '''
    Report the incremented counter if the action has failed to pass the expectation.

  '''
  def reportCountEqual(self, expectedReturn, counter, action, *args, **kwargs):
    try:
      actualReturn = action(*args, **kwargs)
    except:
      return counter + 1
    if actualReturn == expectedReturn:
      return counter + 1
    return counter 

  '''
    Report the incremented counter if the action has passed the expectation.
  '''  
  def reportCountNE(self, expectedReturn, counter, action, *args, **kwargs):
    try:
      actualReturn = action(*args, **kwargs)
    except:
      return counter + 1
    if actualReturn == expectedReturn:
      return counter
    return counter + 1

  '''
    Starts the thread local timer.
  '''
  def startTime (self):
    #using thread local storage for start time 
    self.threadLocal.startTime = time.time()

  '''
    Stops the thread local timer and logs the execution time. 
  '''
  def reportTime (self, name, severity = 20):
    endTime = time.time()
    runTime = endTime - self.threadLocal.startTime
    try:
      if runTime >= Logger.threshold_latency:
        self.logHandler.appendTimeLog(name, runTime, 'CRITICAL')
      self.logHandler.appendTimeLog(name, runTime, severity)
    except Exception as error:
      monitorLog.logError("Failed to append log for metric: " + name, `error`)
      raise LoggingException("Failed to append log for metric: " + name)

  '''
    Logs the execution time of the given action and returns the value of action.
  '''
  def reportLatency (self, name, action, severity = 20, *args, **kwargs):
    self.startTime()
    try:
      actualReturn = action(*args, **kwargs)
    except Exception as error:
      monitorLog.logError("Failed Action " + `action`, `error`)
      raise Exception("Failed Action :" + `action`)
    self.reportTime(name, severity)
    return actualReturn
Ejemplo n.º 13
0
class Logger:


  '''
    It :
      - Creates LogHandler instance to write metrics to log file.
      - Creates threading.local() instance to create thread specific variables.
  '''
  def __init__ (self, service):
    self.logHandler = LogHandler(service)
    self.threadLocal = threading.local()


  '''
    If the given action is failed, then it will log the failure count uptil now.
    It will also return the updated counter value.
  '''
  def logIfFail (self, name, metricType, expectedReturn, counter, action, *args, **kwargs):
    count = self.reportCountNE(expectedReturn, counter, action, *args, **kwargs)
    if count > 0:
      self.logHandler.appendCountLog(name, metricType, count)	
    return count


  '''
    Report the incremented counter if the action has failed to pass the expectation.
  '''
  def reportCountEqual(self, expectedReturn, counter, action, *args, **kwargs):
    try:
      actualReturn = action(*args, **kwargs)
    except:
      return counter + 1
    if actualReturn == expectedReturn:
      return counter + 1
    return counter 

  '''
    Report the incremented counter if the action has passed the expectation.
  '''  
  def reportCountNE(self, expectedReturn, counter, action, *args, **kwargs):
    try:
      actualReturn = action(*args, **kwargs)
    except:
      return counter + 1
    if actualReturn == expectedReturn:
      return counter
    return counter + 1

  '''
    Starts the thread local timer.
  '''
  def startTime (self):
#using thread local storage for start time 
    self.threadLocal.startTime = time.time()

  '''
    Stops the thread local timer and logs the execution time. 
  '''
  def reportTime (self, name, metricType):
    endTime = time.time()
    runTime = endTime - self.threadLocal.startTime
    self.logHandler.appendTimeLog(name, metricType, runTime)

  '''
    Logs the execution time of the given action and returns the value of action.
  '''
  def reportLatency (self, name, metricType, action, *args, **kwargs):
    self.startTime()
    try:
      print "Inside reportLatency try block"
      actualReturn = action(*args, **kwargs)
      print actualReturn
    except:
      print "Inside reportLatency except block"
      #monitorLogs.logError("Error")
    self.reportTime(name, metricType)
    return actualReturn
Ejemplo n.º 14
0
 def __init__ (self, service):
   self.logHandler = LogHandler(service)
   self.threadLocal = threading.local()
Ejemplo n.º 15
0
    def run(self):
        success_msg = ''
        LogHandler.log_msg("[%s]: %s" %
                           (self.threadCode, time.ctime(time.time())))
        t_start = time.process_time_ns()
        mytools = myTools()
        number = 0
        data_count = 0
        file_nums = 0
        datamapping_performance = 0
        for filename in self.sublist:
            file_nums = len(self.sublist)
            number += 1
            LogHandler.log_msg('[%s]: Extracting from files %s/%s %s\n' % (
                self.threadCode,
                number,
                len(self.sublist),
                filename,
            ))
            t3 = time.process_time_ns()

            # checkCSVInTable
            csvdata = pd.read_csv("csv/" + filename)
            if csvdata.empty:
                continue

            # Check file type
            if mytools.checkIfIsDate(csvdata.columns):
                fileProperty = mytools.matchFile(filename, isPrice=False)
                # Pre-processing data: remove fields which is not in sql table
                csvdata['name'] = csvdata['name'].str.strip()
                cols = csvdata['name'].str.strip()
                csvdata = csvdata.iloc[(csvdata['name'].isin(
                    mytools.getMp()[fileProperty['table']])).index]

                # Remove ',' of each element and convert to type float
                dataframe = csvdata.T.iloc[1:] \
                    .applymap(
                    lambda x: float(x.replace(',', '')) if type(x) != float else float(x))

                # Rename columns' names to string
                dataframe.rename(columns=cols.to_dict(), inplace=True)

                # Remove unrelated table fields
                dataframe = dataframe.loc[:, cols.loc[(cols.isin(mytools.getMp(
                )[fileProperty['table']]))].str.strip().to_list()]

                # Append Code, ReportDate, ValuationMethod to dataframe
                dataframe['Code'] = fileProperty['cols']['Code']
                dataframe['ReportDate'] = csvdata.columns.to_series(
                ).iloc[1:].apply(lambda x: datetime.strptime(x, '%m/%d/%Y').
                                 strftime('%Y-%m-%d')
                                 if x != 'ttm' else '0000-00-00')
                if 'ValuationMethod' in tuple(fileProperty['cols'].keys()):
                    dataframe['ValuationMethod'] = fileProperty['cols'][
                        'ValuationMethod']

                # Replace all NaN to None
                dataframe = dataframe.where(dataframe.notna(), None)
                # Save in Database

                mytools.save(dataframe=dataframe,
                             table=fileProperty['table'],
                             filename=filename)
                data_count += dataframe.size
            else:
                fileProperty = mytools.matchFile(filename, isPrice=True)
                csvdata.rename(columns={
                    csvdata.columns[x]: csvdata.columns[x].replace(" ", "")
                    for x in range(csvdata.columns.size)
                },
                               inplace=True)

                # Append Code
                csvdata['Code'] = fileProperty['cols']['Code']

                # Replace all NaN to None
                csvdata = csvdata.where(csvdata.notna(), None)

                # Save in Database
                mytools.save(dataframe=csvdata,
                             table=fileProperty['table'],
                             filename=filename)
                data_count += csvdata.size
            # END

            t4 = time.process_time_ns()
            LogHandler.log_msg('%s data parsed, finished in %sms\n' %
                               (data_count, round((t4 - t3) / 1000000, 5)))

        t_end = time.process_time_ns()
        success_msg += "\r\n-------------------------------------------------------------------------------\r\n"
        success_msg += "    END, total time: %sms\r\n" % round(
            (t_end - t_start) / 1000000, 5)
        success_msg += "    Main thread performance average/file: %sms" % round(
            (t_end - t_start) / 1000000 * file_nums, 5)
        success_msg += "    Parsed data total: %s\r\n" % data_count
        success_msg += "    Performance average/data: %sms\r\n" % round(
            (t_end - t_start) / 1000000 * data_count, 5)
        success_msg += "\r\n-------------------------------------------------------------------------------\r\n"
        LogHandler.success(success_msg)
Ejemplo n.º 16
0
                    elif PLATFORM == "Linux":
                        if self.fileName.endswith("py"):
                            os.system("cd %s && gnome-terminal -- python3 %s" %
                                      (DIR, os.path.join(DIR, self.fileName)))
                        else:
                            os.system("cd %s && gnome-terminal -- %s" %
                                      (DIR, os.path.join(DIR, self.fileName)))
                    # win32api.ShellExecute(0, 'open',  self.fileName, '', '', 1)
                    print('start the target successfully...')
                    outer_starttime = time()


if __name__ == '__main__':
    expired = 3
    # 日志文件最大500MB
    maxSize = 500
    # 轮训间隔时间:1分钟
    interval = 60
    # 建立日志对象
    logobj = LogHandler(expired, maxSize, interval)
    # 启动日志监测线程
    logobj.start()
    # 设置程序死机等待多少秒时间后重启
    restartTime = 20
    # 需要启动的目标文件名
    targetFileName = 'sps'
    # 设置通信端口号
    port = 8889
    obj = WatchDog(port, targetFileName, restartTime)
    obj.waitFood()
Ejemplo n.º 17
0
class WebRequest(object):
    name = "web_request"

    def __init__(self, *args, **kwargs):
        self.log = LogHandler(self.name, file=False)
        self.response = Response()

    @property
    def user_agent(self):
        """
        return an User-Agent at random
        :return:
        """
        ua_list = [
            'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/30.0.1599.101',
            'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.122',
            'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.71',
            'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95',
            'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/21.0.1180.71',
            'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; QQDownload 732; .NET4.0C; .NET4.0E)',
            'Mozilla/5.0 (Windows NT 5.1; U; en; rv:1.8.1) Gecko/20061208 Firefox/2.0.0 Opera 9.50',
            'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:34.0) Gecko/20100101 Firefox/34.0',
        ]
        return random.choice(ua_list)

    @property
    def header(self):
        """
        basic header
        :return:
        """
        return {
            'User-Agent': self.user_agent,
            'Accept': '*/*',
            'Connection': 'keep-alive',
            'Accept-Language': 'zh-CN,zh;q=0.8'
        }

    def get(self,
            url,
            header=None,
            retry_time=3,
            retry_interval=5,
            timeout=5,
            *args,
            **kwargs):
        """
        get method
        :param url: target url
        :param header: headers
        :param retry_time: retry time
        :param retry_interval: retry interval
        :param timeout: network timeout
        :return:
        """
        headers = self.header
        if header and isinstance(header, dict):
            headers.update(header)
        while True:
            try:
                self.response = requests.get(url,
                                             headers=headers,
                                             timeout=timeout,
                                             *args,
                                             **kwargs)
                return self
            except Exception as e:
                self.log.error("requests: %s error: %s" % (url, str(e)))
                retry_time -= 1
                if retry_time <= 0:
                    resp = Response()
                    resp.status_code = 200
                    return self
                self.log.info("retry %s second after" % retry_interval)
                time.sleep(retry_interval)

    @property
    def tree(self):
        return etree.HTML(self.response.content)

    @property
    def text(self):
        return self.response.text