Ejemplo n.º 1
0
class FetcherBase:
    def __init__(self, item_id, track_type,log=None):
        self.log = LogAdapter(log)
        self.item_id = item_id
        self.track_type = track_type
        self.name = "FetcherBase"
        self.proxy_dict = ""
        self.track_dict={}
        self.http = HttpClient()
        self.http.req_timeout = 30
        self.conn = None
        self.db = None
        self.debug_level = 0
        self.initialised = False

    def SetProxy(self,proxy_dict):
        self.proxy_dict = proxy_dict
        if proxy_dict is not None and len(proxy_dict) > 0:
            self.http.AddProxy(self.proxy_dict)

    def SetDBConn(self,conn):
        self.conn = conn
        self.db = track_db.TrackDB(db_conn=self.conn, log=self.log)

    def Fetch(self):
        try:
            self._fetch()
        except Exception as e:
            self._error_handle(e)

    def _chk_new_items(self,item_list):
        try:
            counter = len(item_list)
            if counter == 0:
                self.log.debug("[%s] Item info chk over with result is 0.", self.item_id )
                return False
            # got item info
            if not self.initialised:
                self.track_dict = self.db.item_get_top_n(self.item_id,counter,self.track_type)
                self.initialised = True
            for item_info in item_list:
                if item_info not in self.track_dict:
                    self.track_dict[item_info]=item_info #???
                    result = self._new_data(item_info)
                    if result == -1:
                        self.log.warn("[%s] Item info already in db, track_time=%s"
                                      ,self.name, str(item_info.track_time)  )
                    elif result == 2:
                        self.log.info("[%s] Tracking ended for track_time=%s"
                                      ,self.name, str(item_info.track_time) )
                        #break
            self.log.debug("[%s] Item info chk over.", self.item_id )
            return True
        except Exception as e:
            self._error_handle(str(e))
            return False

    def _new_data(self,item):
        """
        result_     1   new data stored
                    2   tracking ended
                   -1   already in db
        """
        self.log.info("[%s] New track info coming:[type=%s|delivered=%d] ITEM=%s,%s,%s,%s",
                      self.name, self.track_type, item.is_ended, item.name,
                       item.description, item.location, item.track_time )
        if self.db:
            return self.db.sp_insert_new_item(self.track_type, item.is_ended,
                item.name, item.track_time, item.description, item.location)
        else:
            raise Exception("db did not initialised")

    def _error_handle(self, msg):
        if msg:
            self.log.error("[%s] error happend:%s", self.item_id, msg )
        if self.db:
            return self.db.sp_update_item_status(self.track_type,self.item_id)
        else:
            self.log.error("[%s] error happend: db access error at meantime...",self.item_id)
        if issubclass(msg,BaseException):
            self.log.exception(msg)

    def _dump_error(self,item_id,fetch_url,text="",e=None):
        SAVEPATH=r'ex-pages'
        """ save exception page"""
        from time import localtime,time
        import codecs,os
        if text is None or text == "":
            return
        t=localtime(time())
        t_str="%d%d%d%d%d%d"%(t.tm_year,t.tm_mon , t.tm_mday,
                              t.tm_hour, t.tm_min, t.tm_sec)
        filename=r'./%s/%s_%s.html'%(self.SAVEPATH,item_id,t_str)
        vavava.util.assure_path("./%s"%self.SAVEPATH)
        f=codecs.open(filename,"w",'utf-8')
        file_full_name=os.path.abspath(filename)
        if f:
            f.writelines('<!--' + fetch_url + '--!>'+os.linesep)
            f.write(text)
            f.close()
        self.log.error(r"page saved at %s",file_full_name)