class DataDownloader: def __init__(self, work_dir: str, pool_size: int): self._work_dir = work_dir self._pool = Pool(pool_size) def stop(self): logger.info('Stopping data downloader') self._pool.terminate() def download_url(self, url: str, output_name: str): self._pool.apply(self._download_url, (self._work_dir, url, output_name)) @staticmethod def _download_url(work_dir: str, url: str, output_name: str): path = os.path.join(work_dir, output_name) logger.debug('Downloading URL %s into %s', url, path) try: with urlopen(url) as response: with open(path, 'wb') as output: copyfileobj(response, output) except HTTPError as e: logger.debug('Cannot download %s: %s', url, e) raise CustomException('remote error') size = os.path.getsize(path) if not size: raise CustomException('data seems empty') logger.debug('Download of %s complete (%s bytes total)', url, size)
def process_two(): # multiprocessing pool = Pool(3) for i in range(1, 6): pool.apply_async(task, (i,)) # 非阻塞 有返回值 pool.apply(task, (i,)) # 阻塞 无返回值 msg_li = [1, 2, 3, 4, 5] pool.map(task, msg_li) # 阻塞 无返回值 pool.imap(task, msg_li) # 非阻塞 无返回值 pool.close() pool.join()
def manager_process(dir_queue, file_queue, out_queue): """Dispatches and manages path and scanning workers. """ pool = Pool(options.num_threads) atexit.register(at_exit_manager, pool) logging.info('Gathering Files...') pool.apply(explore_path, (dir_queue, file_queue)) logging.info('Files gathered. Scanning %s files...', file_queue.qsize()) logging.info('Starting %s scan processes...', options.num_threads) print '~' * 80 thread.start_new_thread(print_status, (file_queue,)) for _ in range(options.num_threads): pool.apply_async(parallel_scan, (file_queue, out_queue)) pool.close() pool.join() out_queue.put(StopIteration)
def ecb_encryption_mp(self): result = '' pool = Pool(processes=len(self.input_list)) cipher = [ pool.apply(self.encryption, args=(s, )) for s in self.input_list ] for c in cipher: result += c return result
def show_logbook(): """ Show information about any jobs currently running Uses many threads to poll the job board because the latency can be high but the processing power required is low :return None: """ print("Not connected to jobboard") pool = Pool(processes=SHOW_POLLERS) try: for _ in range(SHOW_POLLERS): pool.apply(query_and_print) sleep(0.1) except KeyboardInterrupt: pool.terminate() except Exception: pool.terminate() finally: pool.close() pool.join()
def pool02(): print("Parent process %s." % os.getpid()) p = Pool(7) result = [] for i in range(10): result.append(p.apply(long_time_task, args=(i, ))) print('waiting for all subProcesses done...') p.close() p.join() print('All subProcesses done') for name in result: print(name) # 阻塞的直接返回最终的结果
def query_vehicle_trajetory(client,numb,ptype,stime,etime): tgsinfo = read_tgs_info() # from multiprocessing.dummy import Pool as ThreadPool from multiprocessing import Pool pool = Pool() result = [] for cid in tgsinfo.keys(): result.append(pool.apply(_query_single_bay, (cid,numb,ptype,stime,etime))) pool.close() pool.join() traj = [] for item in result: for i in item: traj.append(i) print 'totally %d records.' % (len(traj)) return traj
""" Create by zipee on 2019/1/6. """ __author__ = 'zipee' from multiprocessing.pool import Pool def work(a): pass if __name__ == '__main__': pool = Pool(3) for i in range(10): result = pool.apply(work, (i, )) print(result) print("apply all done") ########################################### results = [] for i in range(10): result = pool.apply_async(work, (i, )) results.append(result) for result in results: print(result.get()) print("apply all done") ########################################### results = pool.map(work, (i, )) print(results) ###########################################
type=str, help='parent output folder of all converted videos') parser.add_argument('--n_worker', type=int, help='number of workers', default=2) args = parser.parse_args() if not os.path.exists(args.ffmpeg_path): print('please install ffmpeg add provide its path!') parser.print_help() exit(-1) video_dirs, output_paths = get_video_dir_list(args.img_basedir, args.output_basedir) print('{} img sequences to be converted.'.format(len(video_dirs))) workers = Pool(args.n_worker) # chunk jobs chunks = [ list(zip(video_dirs, output_paths))[i::args.n_worker] for i in range(args.n_worker) ] for i in range(args.n_worker): workers.apply(convert_job, (chunks[i], args.ffmpeg_path)) workers.close() workers.join()
boo = 'true' else: boo = 'false' url = 'https://www.lagou.com/jobs/positionAjax.json?needAddtionalResult=false&isSchoolJob=0' datas = {'first': boo, 'pn': x, 'kd': kd, 'city': city} info = get_json(url, datas) info_result = info_result + info # 创建workbook,即excel workbook = xlwt.Workbook(encoding='utf-8') # 创建表,第二参数用于确认同一个cell单元是否可以重设值 worksheet = workbook.add_sheet('lagouzp', cell_overwrite_ok=True) for i, row in enumerate(info_result): for j, col in enumerate(row): worksheet.write(i, j, col) workbook.save('F:\\Data\\lagouzp.xls') if __name__ == '__main__': kd = ['Java', 'Python', 'PHP', 'C++', 'C#', 'Ruby'] place = ['北京', '上海', '广州', '长沙', '深圳', '杭州', '成都', '武汉', '苏州'] ip_list = ['119.6.136.122', '114.106.77.14'] pool = Pool() for k in kd: for city in place: pool.apply(main, ( k, city, ))
""" # author Liu shi hao # date: 2019/12/11 15:43 # file_name: process_pool_test 进程池 """ import os import time from multiprocessing.pool import Pool # 进程应该完成的任务 def task(): for i in range(3): print(os.getpid(), i) time.sleep(0.2) if __name__ == '__main__': pool1 = Pool(3) for i in range(15): # pool1.apply_async(task) # 异步 pool1.apply(task) # 同步 pool1.close() pool1.join() print('finish')
from multiprocessing.pool import Pool import numpy as np from fire import generate_fire, generate_gif seed = np.random.randint(0, 100) frames = 13 if __name__ == '__main__': pool = Pool(processes=frames) [pool.apply(generate_fire, args=(n, 512, 50, seed)) for n in range(frames)] generate_gif(frames)
import time def fun(i): time.sleep(1) sum = 0 for i in range(0,100000): sum+=i print(i) def fun2(x): return x**2 if __name__ == "__main__": pool = Pool(1) # 5个进程的进程池 # apply start = time.time() for i in range(100): # pool.apply(fun) pool.apply(fun,args=(i,)) # res = list(map(lambda x,y:y*x,[1,2,3,4,5],["a","b","c","d"])) # print(res) # res =pool.map(fun2,[1,2,3,4,5]) # print(res) end = time.time() print("运行总时间%s"%(end-start))
def spider(url): """ 4核cpu 勉强把四个需求都实现了...... 先爬model首页pages获得所有的model个人页面的链接 ==> 等待有足够多的数据后,开始同时爬取信息页面和展示页面 model_post==> 获取信息页面所需的信息,保存到对应的model的model_info表 model_show_list==> 获取展示页面的所有子相册链接 photo_list==> 获取每一个子相册内的图片链接 """ manager = Manager() profile_q = manager.Queue() profile_p = Pool(processes=1) show_q = manager.Queue() show_p = Pool(processes=1) photo_q = manager.Queue() photo_p = Pool(processes=2) photo_url_q = manager.Queue() # 进程池是否终止的状态码 show_p_live = 1 profile_p_live = 1 try: while 1: url = list_spider(url) if not url: print('Finish') break time.sleep(1) time.sleep(10) # 因为首页pages不多,就等它跑完再开其他spider,也就40second,而且也不用担心数据库冲突,偷懒:-) print('======首页爬完,开始model_info 和 show_list======') profile_new_id = 0 show_new_id = 0 while 1: # 当profile_p 进程池任务没完结的时候 if profile_p_live: if profile_q.empty(): """拿主表数据put进info spider的食盘""" print('profile query sql') model_post_url_list = db_session.query(WomanModels.model_home) \ .filter(WomanModels.id.in_(range(profile_new_id, profile_new_id + 10))) model_post_url_list = list(model_post_url_list) print('profile query sql end') if model_post_url_list: [ profile_q.put(post_url) for post_url in model_post_url_list ] profile_new_id += 10 else: profile_p.close() profile_p.terminate() profile_p_live = 0 else: """开启info spider""" print("===model个人信息spider===", '\n') profile_p.apply(model_post, profile_q.get()) # 当show_p 进程池任务没完结的时候 if show_p_live: if show_q.empty(): """ 有重复代码,但因为show页面可能有好几页,如果和profile用同一个Queue 当它把这几个页面爬完的时候,可能profile已经从Queue取走好几个URL,充满了不确定性 """ print('show query sql') model_show_url_list = db_session.query(WomanModels.model_home, WomanModels.id) \ .filter(WomanModels.id.in_(range(show_new_id, show_new_id + 10))) model_show_url_list = list(model_show_url_list) print('show query sql end') if model_show_url_list: [ show_q.put(show_url_and_id) for show_url_and_id in model_show_url_list ] show_new_id += 10 else: show_p.close() show_p.terminate() show_p_live = 0 else: """开启show pages spider""" print("===model_show的spider===", '\n') res = show_p.apply(model_show_list, args=(show_q.get(), )) if res[1:]: if res[0]: show_q.put(res[0]) # 把子相册URL put进photo spider 的食盘里 [ photo_q.put((photo_list_url, res[1][1])) for photo_list_url in res[1][0] ] if not photo_q.empty(): """开启photo spider""" print("===子相册的图片spider first===", '\n') res = photo_p.apply_async(func=photo_list, args=(photo_q.get(), )).get() if res: [photo_url_q.put(photo_url) for photo_url in res] # 同时开两个,加快速度 if not photo_q.empty(): print("===子相册的图片spider second===", '\n') res = photo_p.apply_async(func=photo_list, args=(photo_q.get(), )).get() if res: [photo_url_q.put(photo_url) for photo_url in res] if not photo_url_q.empty(): q_get = photo_url_q.get() print('q_get', q_get) try: db_session.add( ModelShow(href=q_get['href'], create_time=q_get['create_time'], title=q_get['title'], hits=q_get['hits'], model_id=q_get['model_id'])) db_session.commit() except Exception as ModelShow_error: db_session.rollback() print(ModelShow_error) print('photo_url_q', photo_url_q.empty()) # 当profile_p 和 show_p 的任务都完结了的时候, if not profile_p_live and not show_p_live and photo_url_q.empty(): # 等待photo进程结束 photo_p.close() photo_p.terminate() profile_p.join() show_p.join() photo_p.join() break except Exception as e: db_session.rollback() print(e)
# # if __name__ == "__main__": # pass from multiprocessing import Pool import time def fun(): time.sleep(1) return 123 def fun2(item): return item + 1 if __name__ == "__main__": pool = Pool(100) # 创建进程池 5代表进程池的进程数量 res = pool.apply(fun) res = pool.apply_async(fun) res.wait() print(res.get()) start = time.time() res1 = pool.map(fun2, range(100000)) # res = map(fun2,range(100000)) for i in res1: print(i) # print(res) end = time.time() print('运行总时间:%s' % (end - start))
def task(name): print('start mission {}'.format(name)) start = time.time() time.sleep(random()) end = time.time() print('mission accomplished {}, time cost:{}'.format(name, (end - start)), os.getpid()) # return 'mission accomplished {}, time cost:{}'.format(name,(end-start)),os.getpid() container = [] def callback_func(content): container.append(content) return container if __name__ == '__main__': pool = Pool(3) tasks = ['刷牙', '洗脸', '吃早饭', '喝水', '上班', '下班', '睡觉'] for task1 in tasks: pool.apply(task, args=(task1, )) pool.close() pool.join() # for c in container: # print(c) print('tasks over! 好处是可以重复利用创建好的进程,进程的开销太大')
class ProcessPoolStrategy(ParallelStrategy, _PoolRunnableStrategy, _Resultable): _Processors_Pool: Pool = None _Processors_List: List[Union[ApplyResult, AsyncResult]] = None def __init__(self, pool_size: int): super().__init__(pool_size=pool_size) def initialization(self, queue_tasks: Optional[Union[_BaseQueueTask, _BaseList]] = None, features: Optional[Union[_BaseFeatureAdapterFactory, _BaseList]] = None, *args, **kwargs) -> None: super(ProcessPoolStrategy, self).initialization(queue_tasks=queue_tasks, features=features, *args, **kwargs) # Activate multiprocessing.managers.BaseManager server activate_manager_server() # Initialize and build the Processes Pool. __pool_initializer: Callable = kwargs.get("pool_initializer", None) __pool_initargs: IterableType = kwargs.get("pool_initargs", None) self._Processors_Pool = Pool(processes=self.pool_size, initializer=__pool_initializer, initargs=__pool_initargs) def apply(self, tasks_size: int, function: Callable, args: Tuple = (), kwargs: Dict = {}) -> None: self.reset_result() __process_running_result = None try: __process_running_result = [ self._Processors_Pool.apply(func=function, args=args, kwds=kwargs) for _ in range(tasks_size) ] __exception = None __process_run_successful = True except Exception as e: __exception = e __process_run_successful = False # Save Running result state and Running result value as dict self._result_saving(successful=__process_run_successful, result=__process_running_result, exception=None) def async_apply(self, tasks_size: int, function: Callable, args: Tuple = (), kwargs: Dict = {}, callback: Callable = None, error_callback: Callable = None) -> None: self.reset_result() self._Processors_List = [ self._Processors_Pool.apply_async(func=function, args=args, kwds=kwargs, callback=callback, error_callback=error_callback) for _ in range(tasks_size) ] for process in self._Processors_List: _process_running_result = None _process_run_successful = None _exception = None try: _process_running_result = process.get() _process_run_successful = process.successful() except Exception as e: _exception = e _process_run_successful = False # Save Running result state and Running result value as dict self._result_saving(successful=_process_run_successful, result=_process_running_result, exception=_exception) def apply_with_iter(self, functions_iter: List[Callable], args_iter: List[Tuple] = None, kwargs_iter: List[Dict] = None) -> None: self.reset_result() __process_running_result = None if args_iter is None: args_iter = [() for _ in functions_iter] if kwargs_iter is None: kwargs_iter = [{} for _ in functions_iter] try: __process_running_result = [ self._Processors_Pool.apply(func=_func, args=_args, kwds=_kwargs) for _func, _args, _kwargs in zip(functions_iter, args_iter, kwargs_iter) ] __exception = None __process_run_successful = True except Exception as e: __exception = e __process_run_successful = False # Save Running result state and Running result value as dict self._result_saving(successful=__process_run_successful, result=__process_running_result, exception=None) def async_apply_with_iter( self, functions_iter: List[Callable], args_iter: List[Tuple] = None, kwargs_iter: List[Dict] = None, callback_iter: List[Callable] = None, error_callback_iter: List[Callable] = None) -> None: self.reset_result() if args_iter is None: args_iter = [() for _ in functions_iter] if kwargs_iter is None: kwargs_iter = [{} for _ in functions_iter] if callback_iter is None: callback_iter = [None for _ in functions_iter] if error_callback_iter is None: error_callback_iter = [None for _ in functions_iter] self._Processors_List = [ self._Processors_Pool.apply_async(func=_func, args=_args, kwds=_kwargs, callback=_callback, error_callback=_error_callback) for _func, _args, _kwargs, _callback, _error_callback in zip( functions_iter, args_iter, kwargs_iter, callback_iter, error_callback_iter) ] for process in self._Processors_List: _process_running_result = None _process_run_successful = None _exception = None try: _process_running_result = process.get() _process_run_successful = process.successful() except Exception as e: _exception = e _process_run_successful = False # Save Running result state and Running result value as dict self._result_saving(successful=_process_run_successful, result=_process_running_result, exception=_exception) def map(self, function: Callable, args_iter: IterableType = (), chunksize: int = None) -> None: self.reset_result() _process_running_result = None try: _process_running_result = self._Processors_Pool.map( func=function, iterable=args_iter, chunksize=chunksize) _exception = None _process_run_successful = True except Exception as e: _exception = e _process_run_successful = False # Save Running result state and Running result value as dict for __result in (_process_running_result or []): self._result_saving(successful=_process_run_successful, result=__result, exception=_exception) def async_map(self, function: Callable, args_iter: IterableType = (), chunksize: int = None, callback: Callable = None, error_callback: Callable = None) -> None: self.reset_result() _process_running_result = None _exception = None _map_result = self._Processors_Pool.map_async( func=function, iterable=args_iter, chunksize=chunksize, callback=callback, error_callback=error_callback) try: _process_running_result = _map_result.get() _process_run_successful = _map_result.successful() except Exception as e: _exception = e _process_run_successful = False # Save Running result state and Running result value as dict for __result in (_process_running_result or []): self._result_saving(successful=_process_run_successful, result=__result, exception=_exception) def map_by_args(self, function: Callable, args_iter: IterableType[IterableType] = (), chunksize: int = None) -> None: self.reset_result() _process_running_result = None try: _process_running_result = self._Processors_Pool.starmap( func=function, iterable=args_iter, chunksize=chunksize) _exception = None _process_run_successful = True except Exception as e: _exception = e _process_run_successful = False # Save Running result state and Running result value as dict for __result in (_process_running_result or []): self._result_saving(successful=_process_run_successful, result=__result, exception=_exception) def async_map_by_args(self, function: Callable, args_iter: IterableType[IterableType] = (), chunksize: int = None, callback: Callable = None, error_callback: Callable = None) -> None: self.reset_result() _map_result = self._Processors_Pool.starmap_async( func=function, iterable=args_iter, chunksize=chunksize, callback=callback, error_callback=error_callback) _process_running_result = _map_result.get() _process_run_successful = _map_result.successful() # Save Running result state and Running result value as dict for __result in (_process_running_result or []): self._result_saving(successful=_process_run_successful, result=__result, exception=None) def imap(self, function: Callable, args_iter: IterableType = (), chunksize: int = 1) -> None: self.reset_result() _process_running_result = None try: imap_running_result = self._Processors_Pool.imap( func=function, iterable=args_iter, chunksize=chunksize) _process_running_result = [ result for result in imap_running_result ] _exception = None _process_run_successful = True except Exception as e: _exception = e _process_run_successful = False # Save Running result state and Running result value as dict for __result in (_process_running_result or []): self._result_saving(successful=_process_run_successful, result=__result, exception=_exception) def imap_unordered(self, function: Callable, args_iter: IterableType = (), chunksize: int = 1) -> None: self.reset_result() _process_running_result = None try: imap_running_result = self._Processors_Pool.imap_unordered( func=function, iterable=args_iter, chunksize=chunksize) _process_running_result = [ result for result in imap_running_result ] _exception = None _process_run_successful = True except Exception as e: _exception = e _process_run_successful = False # Save Running result state and Running result value as dict for __result in (_process_running_result or []): self._result_saving(successful=_process_run_successful, result=__result, exception=_exception) def _result_saving(self, successful: bool, result: List, exception: Exception) -> None: _process_result = { "successful": successful, "result": result, "exception": exception } self._Processors_Running_Result.append(_process_result) def close(self) -> None: self._Processors_Pool.close() self._Processors_Pool.join() def terminal(self) -> None: self._Processors_Pool.terminate() def get_result(self) -> List[_ProcessPoolResult]: return self.result() def _saving_process(self) -> List[_ProcessPoolResult]: _pool_results = [] for __result in self._Processors_Running_Result: _pool_result = _ProcessPoolResult() _pool_result.is_successful = __result["successful"] _pool_result.data = __result["result"] _pool_results.append(_pool_result) return _pool_results
from multiprocessing.pool import Pool import os, sys def find_item(d): try: files = os.listdir(d) except OSError, err: print(d + ": " + str(err)) else: print(d) for item in files: fullitem = os.path.join(d, item) if os.path.isdir(fullitem): tPool.apply_async(find_item, [fullitem]) # findItem(fullitem) else: print(fullitem) if len(sys.argv) > 1: s_dir = sys.argv[1] else: s_dir = "./" tPool = Pool(2) tPool.apply(find_item, [s_dir]) tPool.close() tPool.join() # findItem(d)