# ac_pay(ac, cm, '310', 'nx2016e2', '宁夏') # ac_pay(ac, cm, '510', 'gz2016e4', '贵州') # ac_pay(ac, cm, '510', 'gz2016e5', '贵州') # ac_pay(ac, cm, '510', 'hainan2016e5', '海南') # ac_pay(ac, cm, '150', 'sxty2016e6', '山西') # ac_pay(ac, cm, '150', 'sxty2016e7', '山西') # ac_pay(ac, cm, '150', 'hunan2016e7', '湖南') # ac_pay(ac, cm, '150', 'sxxa2016e7', '陕西') # ac_pay(ac, cm, '310', 'gs2016e6', '甘肃') # ac_pay(ac, cm, '150', 'gs2016e6', '甘肃') # ac_pay(ac, cm, '150', 'qh2016e6', '青海') # ac_pay(ac, cm, '510', 'tj2016e6', '天津') # ac_pay(ac, cm, '150', 'sxty2016e5', '山西') # ac_pay(ac, cm, '150', 'gx2016e5', '广西') # ac_pay(ac, cm, '150', 'nm2016e5', '内蒙古') # ac_pay(ac, cm, '150', 'hebei2016e6', '河北') # ac_pay(ac, cm, '150', 'hebei2016e7', '河北') # ac_pay(ac, cm, '150', 'hebei2016e1', '河北') # pay_time('510', '福建', ac, cm, 1) # pay_time('510', '福建', ac, cm, 1) # pay_time('510', '福建', ac, cm, 1) # pay_time('310', '福建', ac, cm, 1) # pay_time('150', '福建', ac, cm, 1) pay_time('150', '福建', ac, cm, 2) cm.save() ac.save() pass
class QueueJobManager(): def __init__(self, jobs, thcnt=2, proxy='proxy_s', times=3): self.ac = AccountManager() self.pq = ProxyManager() self.pq.load(proxy) self.jobs = jobs self.threads = [] self.running = False self.thread_cnt = thcnt self.times = times self.job_queue = Queue.Queue() self.done_job = 0 self.job_lock = threading.RLock() def distpatch(self): for job in self.jobs: ac = { 'accounts': [] , 'name': job['name'] , 'prefix': provinces[job['name']] , 'score': 750 , 'times': 0 } if job['name'] == '海南': ac['score'] = 900 elif job['name'] == '上海': ac['score'] = 600 elif job['name'] == '江苏': ac['score'] = 500 acs = self.ac.get(job['name'], job['count']) if len(acs) > 0: for a in acs: ac['accounts'].append(a.gen_run_param()) self.job_queue.put(ac) def run(self): if self.running: return self.running = True self.distpatch() for tid in range(self.thread_cnt): proxy = self.pq.get_good_proxy() if proxy is None: break t = threading.Thread(target=self.runner, args=(proxy,)) self.threads.append(t) for t in self.threads: t.start() time.sleep(2) for t in self.threads: t.join() self.ac.save() self.threads = [] def runner(self, proxy): while True: ac = self.job_queue.get() ac['proxy'] = proxy times = ac.get('times', 0) if times < self.times: print '%s start %d' % (ac['name'], ac['times']) recruit_jobs(ac) ac['times'] += 1 ac['proxy'] = None self.job_queue.put(ac) else: print '%s start check crawling' % ac['name'] level = 0 while level < 3: level = recheck(ac, level=level) with self.job_lock: self.done_job += 1 if self.done_job >= len(self.jobs): break
class JobManager(): def __init__(self, jobs, proxy='proxy_r'): self.ac = AccountManager() self.pm = ProxyManager() self.pq = ProxyQueue() self.pq.load(proxy) self.pm.load(proxy) self.jobs = jobs self.threads = [] self.running = False def init(self): random.seed(int(time.time())) for job in self.jobs: for ac in self.ac.get(job['name'], 2): ac.proxy = self.pm.get_good_proxy(7) ac.user_agent = ua[random.randint(0, len(ua)) % len(ua)] for job in self.jobs: for ac in self.ac.get(job['name'], job['count']): if ac.proxy is None: ac.proxy = self.pm.get_good_proxy(1) ac.user_agent = ua[random.randint(0, len(ua)) % len(ua)] def run(self): if self.running: return self.init() for tid in range(len(self.jobs)): t = threading.Thread(target=self.runner, args=(tid, )) self.threads.append(t) for t in self.threads: t.start() time.sleep(2) for t in self.threads: t.join() self.ac.save() def random_run(self): if self.running: return for tid in range(len(self.jobs)): t = threading.Thread(target=self.rand_runner, args=(tid, )) t.start() t.setDaemon(True) time.sleep(1) self.threads.append(t) time.sleep(2) for t in self.threads: t.join() self.ac.save() def rand_runner(self, tid): job = self.jobs[tid] ac = { 'accounts': [], 'name': job['name'], 'prefix': provinces[job['name']] } acs = self.ac.get(job['name'], job['count']) if len(acs) > 0: for a in acs: a.proxy = self.pq.get_good_proxy() ac['accounts'].append(a.gen_run_param()) level = 0 print '%s start ' % ac['name'] while level < 3: if ac['name'] != '海南': level = smart_full_job(ac['accounts'], ac['name'], ac['prefix'], level=level) else: level = smart_full_job(ac['accounts'], ac['name'], ac['prefix'], 900, level=level) for a in acs: self.pq.release(a.proxy) a.proxy = None def runner(self, tid): job = self.jobs[tid] ac = { 'accounts': [], 'name': job['name'], 'prefix': provinces[job['name']] } for a in self.ac.get(job['name'], job['count']): ac['accounts'].append(a.gen_run_param()) level = 0 print '%s start ' % ac['name'] while level < 3: if ac['name'] != '海南': level = smart_full_job(ac['accounts'], ac['name'], ac['prefix'], level=level) else: level = smart_full_job(ac['accounts'], ac['name'], ac['prefix'], 900, level=level)
class JobManager(): def __init__(self, jobs, proxy='proxy_s'): self.ac = AccountManager() self.pm = ProxyManager() self.pq = ProxyQueue() self.pq.load(proxy) self.pm.load(proxy) self.jobs = jobs self.threads = [] self.running = False self.job_queue = Queue.Queue() def init(self): random.seed(int(time.time())) for job in self.jobs: job['proxy'] = self.pm.get_good_proxy(1) for ac in self.ac.get(job['name'], job['count']): ac.user_agent = ua[random.randint(0, len(ua)) % len(ua)] def run(self): if self.running: return self.init() for tid in range(len(self.jobs)): t = threading.Thread(target=self.runner, args=(tid,)) self.threads.append(t) for t in self.threads: t.start() time.sleep(2) for t in self.threads: t.join() self.ac.save() def random_run(self): if self.running: return for tid in range(len(self.jobs)): t = threading.Thread(target=self.rand_runner, args=(tid,)) t.start() time.sleep(1) self.threads.append(t) time.sleep(2) for t in self.threads: t.join() self.ac.save() def rand_runner(self, tid): job = self.jobs[tid] ac = { 'accounts': [] , 'name': job['name'] , 'prefix': provinces[job['name']] , 'proxy': None , 'score': 750 } if job['name'] == '海南': ac['score'] = 900 elif job['name'] == '上海': ac['score'] = 600 elif job['name'] == '江苏': ac['score'] = 500 acs = self.ac.get(job['name'], job['count']) if len(acs) > 0: for a in acs: ac['accounts'].append(a.gen_run_param()) proxy = self.pq.get_good_proxy() print '%s start ' % ac['name'] recruit_jobs(ac) self.pq.release(proxy) def runner(self, tid): job = self.jobs[tid] ac = { 'accounts': [] , 'name': job['name'] , 'prefix': provinces[job['name']] , 'score': 750 , 'proxy': job['proxy'] } if job['name'] == '海南': ac['score'] = 900 elif job['name'] == '上海': ac['score'] = 600 elif job['name'] == '江苏': ac['score'] = 500 for a in self.ac.get(job['name'], job['count']): ac['accounts'].append(a.gen_run_param()) print '%s start ' % ac['name'] recruit_jobs(ac)