def get_proxy(kw): p = mp.Process(target=_get_proxy, args=(kw,)) p.daemon = True p.start() p.join() #任务执行完成调用结束函数报告状态 jobstatus.done(kw['id'])
def run(self): manager = mp.Manager() queue = manager.Queue() in_queue = manager.Queue() out_db_exit = mp.Event() input_db = mysqlwrap.get_db(self._input['dbserver']) sql_item = self._input['data'] #设置id范围 if 'range' in sql_item and sql_item['range']: minid, maxid = sql_item.pop('range') else: res, desc = input_db.query("select min(id) as min,max(id) as max from %s" % sql_item['table']) minid, maxid = desc[0]['min'], desc[0]['max'] sql_where = sql_item.pop('where','') #字段权重 weight = sql_item.pop('weight') if sql_where: sql_where+= " and " #定义运行级别取得进程数 #pool = mp.Pool(get_process_count(self.powerlevel)) """以守护的方式运行""" pnum = get_process_count(self.powerlevel) pool = [] q_pool=[] for i in range(0,pnum): q = manager.Queue() q_pool.append(q) pool.append(mp.Process(target=fielt_srv, args=(self.keywords,self.keyweight,weight,q,queue,self.exit_flag))) pool[i].daemon = True pool[i].start() """-""" p = None #启动输出线程 if self._output: p=mp.Process(target=self.output_data, args=(queue,out_db_exit)) p.start() #pool.append(p) #pool.apply_async(output_data,(self,queue,self.exit_flag)) #output_data(self,queue,self.exit_flag) p_start = time.time() #e_pool = eventlet.GreenPool(10) item=[] while minid < maxid: #检查退出状态 if self.exit_flag.is_set() or global_list.G_EXIT: pool.close() pool.join() self.close() break #检查任务状态 if global_list.TASK_STAUS[self.taskid] == 1: #暂停状态 time.sleep(1) continue if global_list.TASK_STAUS[self.taskid] == 2: #退出状态 #pool.close() #pool.join() #self.close() break stop_id = minid + self.rowstep if stop_id > maxid : stop_id = maxid _sql_item=sql_item.copy() _sql_item['where'] = "%s id >= %s and id < %s" % (sql_where, minid, stop_id) res, desc = input_db.query(_sql_item) """这里sql_item存在同步问题,有共享冲突""" #e_pool.spawn_n(self.data_source,input_db,q_pool,_sql_item) #item.append(_sql_item) minid = stop_id if res == -1: print(desc) time.sleep(3) continue #result = [] q_index = 0 for row in desc: #ps = pool.apply_async(func=fielt,args=(row,self.keywords,self.keyweight,weight,queue)) #fielt(row,self.keywords,self.keyweight,weight,queue) #if self._return: result.append(ps) #数据队列模式 if q_index == len(q_pool):q_index=0 q_pool[q_index].put(row) q_index+=1 """返回结果的代码 if 1:#self._return: for i in result: _res = i.get() print(_res) if _res: self.result.append(_res) """ #任务完成 #pool.close() #pool.join() self.close() #if p: p.join() #import pdb #pdb.set_trace() print(len(pool)) for i in range(0,len(pool)): print('join..',pool[i].pid) if global_list.TASK_STAUS[self.taskid] == 2: pool[i].terminate() else: pool[i].join() print("is done") out_db_exit.set() if p: p.join() print("finish job user :" ,time.time()-p_start) jobstatus.done(self.taskid)