def run_job(self, jobd): self.dump_jobid(jobd) GenQueries.run_job(self, jobd) if not isinstance(jobd, dict): return if jobd.get('type') == 'loadpage': o = self.load_page(jobd.get('url'), jobd.get('page')) if o is None: self.add_job(jobd) if jobd.get('type') == 'cvurl': url = jobd.get('url') m = re.search(r'res_id_encode=([a-z0-9A-Z]+)', url) if m: cvid = m.group(1) qstring = "liepincv://" + cvid cnt = spider.util.HashChecker().query(qstring) if cnt is not None and int(cnt) != 0: print '======%s hash downloaded=====' % qstring return o = self.al_request(jobd.get('url'), headers=Cdata.headers, allow_redirects=False) if o is None: return None print '==========saveing======' spider.util.HashChecker().add(qstring) time.sleep(5) Cdata.lpcvstore.save(url, cvid, o.text) print '==========save done===='
def __init__(self, thcnt, ac): GenQueries.__init__(self, thcnt) self._last_time = 0.0 self.zlm = MRLManager(ac, new_ZLLogin) self.headers = { 'Referer': 'http://rdsearch.zhaopin.com/Home/ResultForCustom' } self.search_cnt = 0
def __init__(self, thcnt): GenQueries.__init__(self, thcnt) self.thread_count = 1 self._name = "jobui_queries" self.no_match_url = FileSaver("not_match_%s.txt" % self._name) self.bs2 = FileSaver("failed_urls.txt") self.job_url = FileSaver("job_url.txt") self.cnt = 0 self.domains = FileSaver("domains.txt") self.start_time = time.time()
def __init__(self, thcnt): GenQueries.__init__(self, thcnt) self._name = "test_set"
def __init__(self): GenQueries.__init__(self) self.thread_count = 1 self._name = "jobui_queries"
def __init__(self, thcnt=8): GenQueries.__init__(self, thcnt) self._name = 'WenshuwangGenQueries'
def __init__(self, thcnt): GenQueries.__init__(self, thcnt) MRLManager.__init__(self, Cdata.accounts, LPRequest) self._name = 'lp_queries' self.baseurl = {}
def __init__(self, thcnt=20): GenQueries.__init__(self, thcnt) self._name = "lp_qiye_queries"
def __init__(self): GenQueries.__init__(self) self.thread_count = 8 self._name = "zhilian_queries"