def main(): pool = Pool(multiprocessing.cpu_count()) for url in urls: pool.apply(Request, (url, )) # pool.map(Request, urls) pool.close() pool.join()
def RunClientCode(self): pool = Pool(1) try: pool.apply(self._client_app.payload) self._base.NextPage() self.ShowGoodFinishedDialog() except: self.ShowBadFinishedDialog(traceback.format_exc())
def RunClientCode(self): _pool = Pool(1) try: _pool.apply(self._client_app.payload) self._base.NextPage() self.ShowGoodFinishedDialog() except: self.ShowBadFinishedDialog(traceback.format_exc())
def apply(): pool = ThreadPool(3) for i in range(3): pool.apply(work, (3, )) print('close') pool.close() print('join') pool.join()
def get_pictures_by_request_multiprocessing(host, url_dict, dir): if not os.path.exists(dir): os.makedirs(dir) # pool = Pool(multiprocessing.cpu_count()) pool = Pool(1024) for key in url_dict: s_t = time.time() pool.apply(get_pic, (key, )) e_t = time.time() print("Time cost: " + str(e_t - s_t))
def test_DirectoryWatcherFileCountChanged(self): self.watcher.addEventObserver( self.watcher.IncomingFileCountChangedEvent, lambda event, caller, callData: setattr(self, "fileCountChangedEmitted", True)) mrHead = self.sampleDataLogic.sourceForSampleName('MRHead') self.sampleDataLogic.downloadFile(mrHead.uris[0], self.tempDir, mrHead.fileNames[0]) mrHeadPath = os.path.join(self.tempDir, mrHead.fileNames[0]) shutil.copy(mrHeadPath, os.path.join(self.watchedDirectory, mrHead.fileNames[0])) def checkTimerExecuted(): while not self.fileCountChangedEmitted: print "timer not done" time.sleep(1) print "timer done" pool = ThreadPool(4) results = pool.apply(checkTimerExecuted) pool.close() pool.join() # # t = Thread(target=checkTimerExecuted) # t.start() # t.run() self.assertTrue(self.fileCountChangedEmitted) self.watcher.removeEventObservers()
def target_process(target_path): process_result = [] thread_pool = Pool(cpu_count() * 10) for target in target_path: process_result.append(thread_pool.apply(exp_string, args=(target, ))) thread_pool.close() thread_pool.join() return process_result
def run(self): # 使agent每次启动都会上传网络设备基本信息 而后4小时上传一次 if self.net_equip_details: for value in self.net_equip_details.values(): target = value.get('target') if target and target != "unknown": target.last_time_basis_info_collected = 0 ping_timeout = int(self.config['ping'][0]['timeout']) ping_retries = int(self.config['ping'][0]['retries']) ''' 网段扫描 获取并保存网络设备节点信息 上传网络设备在线状态 因为这个指标获取速度很快 不需要多线程处理 实例化每一个网络设备的Target实例并放置于targets集合中 ''' network_scan = Thread(target=self.net_segment_scan, args=(ping_timeout, ping_retries)) network_scan.start() ''' 从target_queue队列取target 根据采集周期往task_queue队列中放置采集任务 ''' task_generation = Thread(target=self.collect_task_producer) task_generation.start() ''' 从task_queue获取task 线程池对多任务进行并发处理 ''' pool = ThreadPool(self.pool_size) while self.continue_running: task = self.task_queue.get() try: pool.apply(task[1][0], (task[1][1], )) except Exception as e: log.warn('Task {0} error {1}'.format(task, e)) '''将过去一段时间(RECORD_INTERVAL) 所发送的各指标数目记录到日志中''' self.metrics_amount_record() pool.close() pool.join()
def __init__(self, session, database): print(colored(' processing...', 'green')) # load processing settings from yaml file self.settings = load_yaml(processing_options['cfg']) self.session = session self.database = database # Process tracking data for data_name, tracking_data in sorted(list(self.session.Tracking.items())): try: if data_name == 'Exploration' or data_name == 'Whole Session': print(colored(' processing currently only supports processing of trial data, not {}'.format( data_name), 'yellow')) continue print(colored(' Trial {}'.format(data_name), 'green')) self.tracking_data = tracking_data # Save info about processing options in the metadata self.define_processing_metadata() # Apply processes in parallel # TODO use decorator to make sure that functions are automatically added to the list, avoid bugs funcs = [self.extract_bodylength, self.extract_velocity, self.extract_location_relative_shelter, self.extract_orientation] pool = ThreadPool(len(funcs)) [pool.apply(func) for func in funcs] # Other processing steps will not be done in parallel self.extract_ang_velocity() # PoseReconstructor(self.tracking_data.dlc_tracking['Posture']) # WORK IN PROGRESS, buggy except: warnings.warn('Could not analyse this trial!!!') # but avoid crashing the whole analysis print(colored(' trial {} could not be processed!'.format(data_name), 'yellow')) slack_chat_messenger('Could not process trial {}'.format(data_name)) # Call experiment specific processing tools [only implemented for maze experiments] if self.settings['apply exp-specific']: ProcessingTrialsMaze(self.session, debugging=self.settings['debug exp-specific']) ProcessingSessionMaze(self.session) else: from warnings import warn warn('Experiment type {} is not supported yet'.format(exp_type))
# list of pmid ids lst = ['9749847','9755759','9757885','9766300','9769004'],'9784814','9800512','9804224','9877391','9879865','9888139','9918973','9919913'] lst = ['9749847'] lst = [['9749847','9755759'],['9757885','9766300','9769004'],['9784814','9800512','9804224','9877391'],['9879865','9888139','9918973','9919913']] lst = ['9749847','9755759','9757885','9766300','9769004','9784814','9800512','9804224','9877391','9879865','9888139','9918973','9919913'] # define function def list_records(item): record = Entrez.read(Entrez.elink(dbfrom="pubmed", db="pmc", LinkName="pubmed_pmc_refs",from_uid=item)) #print(item) return(record) pool = mp.Pool(processes=4) results = [pool.apply(list_records, args=(lst, ))] print(results) output = [p.get() for p in results] print(output) # define a example function def rand_string(length, output): """ Generates a random string of numbers, lower- and uppercase chars. """ rand_str = ''.join(random.choice( string.ascii_lowercase + string.ascii_uppercase + string.digits) for i in range(length)) output.put(rand_str)
# apply_async print('\n------apply_async-------') async_pool = ThreadPool(processes=4) results = [] for i in range(5): msg = 'msg: %d' % i result = async_pool.apply_async(fun, (msg, )) results.append(result) print('apply_async: 不堵塞') # async_pool.close() # async_pool.join() for i in results: i.wait() # 等待线程函数执行完毕 for i in results: if i.ready(): # 线程函数是否已经启动了 if i.successful(): # 线程函数是否执行成功 print(i.get()) # 线程函数返回值 # apply print('\n------apply-------') pool = ThreadPool(processes=4) results = [] for i in range(5): msg = 'msg: %d' % i result = pool.apply(fun, (msg, )) results.append(result) print('apply: 堵塞') print(results)
pre_url="https://zh.wikipedia.org" test_url="https://zh.wikipedia.org/wiki/Category:日本的大學教師" test2_url="https://zh.wikipedia.org/wiki/Category:各國政治人物" start_url="https://zh.wikipedia.org/wiki/Category:按國籍分類" wiki_of =open('process.txt','w+') urlqueue=Queue.LifoQueue() start_time=time.time() start_time=time.time() root=category(start_url) while not urlqueue.empty(): pool.apply(new_category, (urlqueue.get(),)) sleep(delay) pool.close() pool.join() print time.time()-start_time print "All subprocesses done." wiki_of.close() # def get_info(url): # r = requests.get(url) # print r.text # print r.encoding
class ParallelDownloader(URL_Fetcher): 'Parallel threaded web page downloader' def __init__(self, db_name, proc_count, site_base_url, fUseCache=True, fCacheSearchPages=True, fUseCookies=False, timeout=secHTTP_WAIT_TIMEOUT, search_proc_count=2, proxies=None): self.proxies = proxies self.queue = Queue() self.fSaveSearchPages = fCacheSearchPages self.site_base_url = site_base_url self.pool = Pool(processes=proc_count) self.search_queue = Queue() self.url_extract_pool = Pool(processes=search_proc_count) URL_Fetcher.__init__(self, db_name, fUseCache, fUseCookies, timeout=timeout, proxies=proxies) def process_urls_from_search_queue(self): while not self.search_queue.empty(): search_page_url = self.search_queue.get() # logOut('search pages queue size: %d'%self.search_queue.qsize()) logDbg('search page: %s' % search_page_url) search_page = self.get_page(search_page_url, fUseCache=self.fSaveSearchPages) rel_urls = extract_data_xpath(search_page, self.url_extract_xpath) #rel_urls = self.extract_page_xpath(self.url_extract_xpath, search_page_url) # logOut('URLs from %s extracted'%search_page_url) logOut('%d urls extracted from [%s]. Queuing...' % (len(rel_urls), search_page_url)) logDbg('Extracted urls: %s. Queuing to download...' % rel_urls) list(map(self.queue.put, self.prefix_site_base_url(rel_urls))) self.queue.put(None) self.postprocess_search_page_list(rel_urls, search_page) def queue_pages(self, url_list): list(map(self.queue.put, url_list)) # признак завершения очереди заданий self.queue.put(None) def postprocess_search_page_list(self, url, page): pass def prefix_site_base_url(self, rel_urls): return [self.site_base_url + url for url in rel_urls] def process_pages(self, page_processor, *add_processor_args): self.page_processor = page_processor self.add_pprocessor_args = add_processor_args self.pool.apply(self.process_page) def process_page(self): while True: url = self.queue.get() logDbg('Url got from queue: %s' % url) if not url: break page = self.get_page(url) #, proxies=self.proxies #logOut('pp_arg_list: [%s]'%pp_arg_list) if page: self.page_processor(url, page, *self.add_pprocessor_args)
prob = [i[1] for i in prob] prob = np.array(prob,dtype='float32') probs.append(prob) #print probs prob_s = np.array(probs) prob_s = sum(prob_s)/np.sum(probs) prob_s = [float(i) for i in prob_s] return dict(zip(['0','1'],prob_s)) def worker(i): print i['stream']['id'] print i['stream']['timestamp'] probs = ftpredict(i['stream']['tweet']) print probs db.stream.find_one_and_update({'_id': i['_id'],'probs':None}, { '$set':{'probs':probs)}}) print 'done!' if __name__ == '__main__': while True: pool = Pool(1) latest = db.time.find_one({'name':'stream_score_entropy'})['time'] latest_stream = db.stream.find_one({'stream.timestamp':{'$gt':latest}},sort=[('stream.timestamp',pymongo.DESCENDING)]) if latest_stream != None: db.time.find_one_and_update({'name':'stream_score_entropy'},{'$set':{'time':latest_stream['stream']['timestamp']}}) [pool.apply(worker,(i,)) for i in db.stream.find({'stream.timestamp':{'$gt':latest}},sort=[('stream.timestamp',pymongo.DESCENDING)])] pool.close() pool.join() time.sleep(60*60) else: time.sleep(5*60)
class RedisServer: def __init__(self, selector, sock, host='127.0.0.1', port=8880): self.datas = { 'ZSET': ZSetStore(), 'STR': StrStore(), 'SET': SetStore(), 'HASH': HashStore(), 'LIST': ListStore() } self.host = host self.port = port self.selector = selector self.sock = sock self.commands_map = {} self.pool = ThreadPool(processes=4) self.lock = Lock() def load(self): if os.path.exists('redis.db'): with open('redis.db', 'rb') as f: datas = pickle.load(f) for k in self.datas: self.datas[k].load(datas[k]) else: self.dump() def dump(self): with self.lock: datas = {} for k in self.datas: datas[k] = self.datas[k].dump() with open('redis.db', 'wb') as f: pickle.dump(datas, f) def run(self): self.register_commands() self.load() self.process_request() def register_commands(self): for k in self.datas: command_map = self.datas[k].register_command() self.commands_map.update(command_map) def execute_command(self, command): commands = command.split('\r\n') rows = int(commands[0][1]) method = commands[2].upper() if rows == 2: method, key = method, commands[4] logger.info("execute %s", ' '.join([method, key])) try: message = self.commands_map[method](key) except Exception: logger.error("execute %s", ' '.join([method, key])) return 'Error' return message elif rows == 3: method, key, value = method, commands[4], commands[6] logger.info("execute %s", ' '.join([method, key, value])) try: message = self.commands_map[method](key, value) if message == None: message = 'OK' except Exception: logger.error("execute %s", ' '.join([method, key, value])) return 'Error' return message elif rows == 4: method, key, value, value2 = method, commands[4], commands[ 6], commands[8] logger.info("execute %s", ' '.join([method, key, value, value2])) try: message = self.commands_map[method](key, value, value2) if message == None: message = 'OK' except Exception: logger.error("execute %s", ' '.join([method, key, value, value2])) return 'Error' return message else: logger.error("execute %s", ''.join(commands)) return 'Error' def process_request(self): logger.info("listen to %s:%s" % (self.host, self.port)) self.sock.bind((self.host, self.port)) self.sock.listen(1000) self.sock.setblocking(False) self.selector.register(self.sock, selectors.EVENT_READ, self.accept) while True: events = self.selector.select() for key, mask in events: callback = key.data callback(key.fileobj, mask) def accept(self, sock, mask): conn, addr = sock.accept() logger.info("accepted conn from %s", addr) conn.setblocking(False) self.selector.register(conn, selectors.EVENT_READ, self.read) def read(self, conn, mask): data = conn.recv(1024) command = str(data, encoding="utf8") if command != 'exit': message = self.pool.apply(self.execute_command, (command, )) self.dump() conn.send(message.encode('utf8')) elif command == 'exit': print('closing', conn) self.selector.unregister(conn) conn.close()
# -*- coding: utf-8 -*- # @Time : 2020/9/21 10:57 # @Author : Fcvane # @Param : # @File : tmp_1.py from multiprocessing.dummy import Pool import time import os def talk(msg): print('msg:', msg) time.sleep(3) print('end') if __name__ == "__main__": print('开始执行程序:') start_time = time.time() pool = Pool(3) print('开始执行三个子进程') for i in range(6): pool.apply(talk, [i]) print('id号:%s主进程结束 总耗时:%s' % (os.getpid(), time.time() - start_time))
def parse_link(response): print(response.url) content = response.content selector = etree.HTML(content) urls = selector.xpath('//img/@src') urls = [url for url in urls if 'sinaimg' in url] if urls: for url in urls: URI.put_nowait((url, 'img')) def images(response): print(response.url) content = response.content try: with open(response.url.split(r'/')[-1], 'wb') as img: img.write(content) except Exception as e: print(e) if __name__ == '__main__': pool = Pool(10) for num in range(1, 98): URI.put_nowait((URL.format(num), 'first')) pool.apply(main) pool.close() pool.join() print(time.time() - start)
def save(self, suffix, services=None, only_tag=False, only_push=False, no_interaction=False, text=False): if only_tag and only_push: print 'please note that. Only one of [--only-tag|--only-push] can be use.' return _show = self._probe(services, merge=False) table_data = [] if not _show: return longest_image = max([len(v['image']) for v in _show]) longest_service = max([len(v['service']) for v in _show]) longest_imageId = max([len(v['Id']) for v in _show]) longest_match = max([len(v['Match']) for v in _show]) title = '\n {image:<{longest_image}} | {service:<{longest_service}} | {imageid:<{longest_imageId}} | {match:<{longest_match}}' \ '\n {ind:-<{wedth}}'.format( image='Image', service='Service', imageid='Image-Id', match='Match', longest_image=longest_image, longest_service=longest_service, longest_imageId=longest_imageId, longest_match=longest_match, ind='-', wedth=longest_image + longest_service + longest_imageId + longest_match + 9) for v in _show: table_data.append( '{image:<{longest_image}} | {service:<{longest_service}} | {imageid:<{longest_imageId}} | {match:<{longest_match}}' .format(image=v['image'], service=v['service'], imageid=v['Id'], match=v['Match'], longest_image=longest_image, longest_service=longest_service, longest_imageId=longest_imageId, longest_match=longest_match)) selected_service = [] if no_interaction: selected_service.extend(_show) else: try: selected = pick( table_data, 'Please choose your images for save (press SPACE to mark, ENTER to continue, Ctrl+C to exit): ' + title, indicator='*', multi_select=True, min_selection_count=0) except KeyboardInterrupt: return if selected: for s in selected: v = _show[s[1]] selected_service.append(v) else: print 'Select 0 image.' return # confirm_input(msg='Select these images.') print 'List:' _skip = False not_ready = [] for s in selected_service: _action = 'skip' if s['Id'] == '': _action += '(not exist)' _skip = True elif s['Match'] != '': _action += '(not match)' _skip = True else: _action = 'do' s['Action'] = _action if _action == 'do': _action = Color('{autogreen}%s{/autogreen}' % (_action)) else: _action = Color('{autored}%s{/autored}' % (_action)) not_ready.append(s) print '{action:<25} {image_old:<{longest_image}} => {image_new:<{longest_image}}'.format( action=_action, longest_image=longest_image, image_old=s['image'], image_new=s['image'] + suffix) if only_tag: _msg = 'Tag these images.' elif only_push: _msg = 'Push these images.' else: _msg = 'Tag and Push these images.' if _skip: if no_interaction: _msg += Color( '\n{autored}These service`s image is not ready, please fix it first.{/autored}' ) print _msg table_data = [] table_instance = SingleTable(table_data, 'Not Ready') table_data.append( ['Image', 'Service', 'Image-Id', 'Created', 'Labels']) for s in not_ready: table_data.append([ s['image'] + '\n' + s['Match'], s['service'], s['Id'], s['Created'], s['Labels'] ]) table_instance.inner_heading_row_border = False table_instance.inner_row_border = True print table_instance.table sys.exit(-1) else: _msg += Color('\n{autored}Some service`s image is not ready. \n' \ 'You can use k2-compose pull/up to fix it, otherwise these images will be skipped.{/autored}\n') confirm_input(msg=_msg) if not only_push: pool = ThreadPool(len(selected_service)) for s in selected_service: if s['Action'] == 'do': container = self.get_container_instance_by_service_name( s['service']) pool.apply(container.tag, (suffix, )) pool.close() pool.join() if only_tag: return _result = [] pool = ThreadPool(len(selected_service)) print 'Pushing...' for s in selected_service: if s['Action'] == 'do': container = self.get_container_instance_by_service_name( s['service']) _result.append(pool.apply_async(container.push, (suffix, ))) pool.close() pool.join() for r in _result: print r.get() print Color('{autogreen}Push all done.{/autogreen}\n') if text: print "#".join( ['Image', 'Service', 'Image-Id', 'Created', 'Labels']) for s in selected_service: print "#".join((s['image'], s['service'], s['Id'], s['Created'], s['Labels'].replace('\n', ' '))) else: table_data = [] table_instance = SingleTable(table_data, 'Done') table_data.append( ['Image', 'Service', 'Image-Id', 'Created', 'Labels']) for s in selected_service: table_data.append([ s['image'], s['service'], s['Id'], s['Created'], s['Labels'] ]) table_instance.inner_heading_row_border = False table_instance.inner_row_border = True print table_instance.table return
from multiprocessing.dummy import Pool as ThreadPool import requests URL_TO_TEST = "http://127.0.0.1:8009" def hammer_it(): with open("test_http/index.html.zip", "rb") as f: files = {"file": f} res = requests.post(URL_TO_TEST, files=files) print(res.status_code) pool = ThreadPool(8) for _ in range(1, 10): pool.apply(hammer_it)
class ThreadPool: # multiprocessing.dummy.Pool with exc_info in error_callback def __init__(self,name=None,processes=None): self._processes=processes self._pool=NamedPool(self._processes,name=name) self._lock=Lock() # lock for self self._cblock=Lock() # lock for callback self._errcblock=Lock() # lock for error_callback self._closed=False self.name=name def apply(self,*args,**kwargs): return self._pool.apply(*args,**kwargs) def map(self,*args,**kwargs): return self._pool.map(*args,**kwargs) def map_async(self,*args,**kwargs): return self._pool.map_async(*args,**kwargs) def imap(self,*args,**kwargs): return self._pool.imap(*args,**kwargs) def imap_unordered(self,*args,**kwargs): return self._pool.imap_unordered(*args,**kwargs) def starmap(self,*args,**kwargs): return self._pool.starmap(*args,**kwargs) def starmap_async(self,*args,**kwargs): return self._pool.starmap_async(*args,**kwargs) def join(self): return self._pool.join() def _uiter(self,iterable): buf=[] for item in iterable: if item in buf: continue yield item buf.append(item) buf.clear() def _trycall(self,func,args=(),kwargs={},lock=None): if not callable(func): return with lock: try: return func(*args,**kwargs) except: pass def _caller(self,func,args,kwargs,callback,error_callback,exc_raise): try: result=func(*args,**kwargs) except: etype,value,tb=sys.exc_info() self._trycall(error_callback,args=(self.name,etype,value,tb), lock=self._errcblock) if exc_raise: raise etype(value) else: self._trycall(callback,args=(result,), lock=self._cblock) return result def apply_async(self,func,args=(),kwargs={}, callback=None,error_callback=None): # run error_callback with ThreadPool.name and exc_info if func failed, # callback and error_callback will *not* run in multi thread. # other arguments is same as Pool.apply_async return self._pool.apply_async( self._caller,(func,args,kwargs,None,error_callback,True), callback=callback) def cbmap(self,func,iterable,callback=None,error_callback=None): # shortcut of: # # for item in iterable: # apply_async(func,args=(item,),kwargs={}, # callback=callback,error_callback=error_callback) # # always return None for item in iterable: self.apply_async(func,args=(item,), callback=callback,error_callback=error_callback) def ucbmap(self,func,iterable,callback=None,error_callback=None): # unique version of ThreadPool.cbmap return self.cbmap(func,self._uiter(iterable),callback,error_callback) def umap(self,func,iterable,chunksize=None): # unique version of ThreadPool.map return self.map(func,self._uiter(iterable),chunksize=chunksize) def umap_async(self,func,iterable,chunksize=None, callback=None,error_callback=None): # unique version of ThreadPool.map_async return self.map_async( func,self._uiter(iterable),chunksize, callback,error_callback) def uimap(self,func,iterable,chunksize=None): # unique version of ThreadPool.imap return self.imap(func,self._uiter(iterable),chunksize) def uimap_unordered(self,func,iterable,chunksize=None): # unique version of ThreadPool.imap_unordered return self.imap_unordered(func,self._uiter(iterable),chunksize) def ustarmap(self,func,iterable,chunksize=None): # unique version of ThreadPool.starmap return self.starmap(func,self._uiter(iterable),chunksize) def ustarmap_async(self,func,iterable,chunksize=None, callback=None,error_callback=None): # unique version of ThreadPool.starmap_async return self.starmap_async( func,self._uiter(iterable),chunksize, callback,error_callback) def close(self): # same as Pool.close self._closed=True return self._pool.close() def terminate(self): # same as Pool.terminate self._closed=True return self._pool.terminate() def renew(self): # terminate all process and start a new clean pool with self._lock: self.terminate() self._pool=Pool(self._processes) self._closed=False @property def closed(self): # True if ThreadPool closed return self._closed def __enter__(self): return self def __exit__(self,etype,value,tb): self.terminate()
class Spider(Counter, HTML, File): '''This class is used to crawl web page''' allow_crawl = True def __init__(self): HTML.__init__(self) Counter.__init__(self) File.__init__(self) self.pool = None self.p = None self.last_links = list() self.crawled = set() self.dir_name = None # Create a directory of start crawling point def create_dir_name(self, url): url = url.replace('//', '_') url = url.replace('/', '_') url = url.replace('.', '_') url = url.replace(':', '_') return 'data/' + url # Get and set environment for crawling def start_crawl(self): last_file = None url = ui.lineEdit.text() self.dir_name = self.create_dir_name(url) self._create_directory(self.dir_name) if os.path.exists(self.dir_name): last_file = self.get_last_file(self.dir_name) if url != '': if last_file is not None: file = self.load_file(last_file) self.rank = file['rank'] self.http_error = file['http_error'] self.url_error = file['url_error'] self.last_links = file['last_links'] self.crawled = file['crawled'] Spider.allow_crawl = True if len(self.last_links) != 0: self.p = Thread(target=self.crawl, args=(self.last_links, )) self.p.start() else: self.p = Thread(target=self.crawl, args=(url, )) self.p.start() # Stop crawling def stop_crawl(self): Spider.allow_crawl = False self.pool.terminate() self.pool.join() self.p.join(timeout=5) data = { 'rank': self.rank, 'http_error': self.http_error, 'url_error': self.url_error, 'last_links': self.last_links, 'crawled': self.crawled } file_path = self.dir_name + '/' + self.create_file_name() self.save_file(file_path, data) # Process of each crawling def process(self, url): if url not in self.crawled: error, html = self.get_html(url) if html is not None: if type(html) is str: if ui.bot_list_widget.count() > ui.item_limit: pool = Pool(1) pool.apply_async(ui.delete_list_widget, args=(ui.bot_list_widget, )) text = 'Retrieved {}.'.format(url) text_item = QtWidgets.QListWidgetItem(text) ui.bot_list_widget.addItem(text_item) plain_text = self.clean_html_tag(html) self.count(plain_text, url) return [url, self.get_link(html)] else: if ui.bot_list_widget.count() > ui.item_limit: pool = Pool(1) pool.apply_async(ui.delete_list_widget, args=(ui.bot_list_widget, )) text = '{}'.format(error) text_item = QtWidgets.QListWidgetItem(text) ui.bot_list_widget.addItem(text_item) # Second crawl step def _continue_crawl(self, url): url = list(filter(None, url)) links = list() file_path = self.dir_name + '/' + self.create_file_name() for each_link in self.pool.imap_unordered(self.process, url): if each_link is not None: if each_link[1] is not None: links.extend(each_link[1]) self.crawled.add(each_link[0]) if len(links) != 0: self.last_links = links data = { 'rank': self.rank, 'http_error': self.http_error, 'url_error': self.url_error, 'last_links': self.last_links, 'crawled': self.crawled } self.save_file(file_path, data) while Spider.allow_crawl and len(self.last_links) != 0: links = list() # url = list(filter(None, url)) # url = list(itertools.chain.from_iterable(url)) for each_link in self.pool.imap_unordered( self.process, self.last_links): if each_link is not None: if each_link[1] is not None: links.extend(each_link[1]) self.crawled.add(each_link[0]) data = { 'rank': self.rank, 'http_error': self.http_error, 'url_error': self.url_error, 'last_links': self.last_links, 'crawled': self.crawled } self.save_file(file_path, data) if len(links) != 0: self.last_links = links # First crawl step def crawl(self, url): self.pool = Pool(30) if type(url) is not list: links = self.pool.apply(self.process, args=(url, )) if links is not None: links[1] = list(filter(None, links[1])) if len(links[1]) != 0: self.last_links = links[1] self.crawled.add(links[0]) self._continue_crawl(links[1]) else: self._continue_crawl(url)
def pr_j(): if i % 2 == 1: print(os.getpid(), i) def pr_o(): if i % 2 == 0: print(os.getpid(), i) if __name__ == '__main__': # 创建指定数量的进程的进程池 pool1 = Pool(3) for i in range(3): pool1.apply(task) pool1.apply(pr_j) pool1.apply(pr_o) # 进程池安排完毕 需要关闭 pool1.close() pool1.join() print("进程池完成所有的任务!") # 2、A进程负责向Q1队列发送数据,且从Q2队列读取数据;B进程负责向Q2队列发送数据,且从Q1队列读取数据 # 3、A进程负责向Q1队列发送数据【整数】,B进程从Q1队列读取数据,将读出的每个数乘以2放进队列Q2, # C进程从Q2队列读取数据,将取出的数据求平方,再打印出来 # 4、利用进程池和队列编写以下功能:
class Downloader: """ Downloader main class """ def __init__(self, base_url='{}', base_path='', logger=logging.getLogger('log')): """ Generate downloader function via params """ self.logger = logger self.logger.info("Downloader Starting...") self.pool = Pool(10) # 10 processes, configurable later self.dl_list = [] self.base_url = str(base_url) self.base_path = str(base_path) def get_status(self, clear=False): downloaded = [] downloading = [] failed = [] for i in range(len(self.dl_list) - 1, -1, -1): result = self.dl_list[i] if result[1].ready() is False: downloading.append(result[0]) if clear: self.dl_list.pop(i) continue if result[1].get() is False: failed.append(result[0]) continue downloaded.append(result[0]) if clear: self.dl_list.pop(i) return { 'Downloaded': downloaded, 'Downloading': downloading, 'Failed': failed } def dl_sync(self, url, path='', fn=''): return self.pool.apply(self.dl, ( url, path, fn, )) def download(self, url, path='', fn='', referer=''): r = self.pool.apply_async(self.dl, ( url, path, fn, referer, )) self.dl_list.append((url, r)) def dl(self, url, path='', fn='', referer=''): try: path = self.make_sure_path(os.path.join(self.base_path, path)) if isinstance(url, str): url = [url] url = self.base_url.format(*url) if fn == '': fn = os.path.basename(url) req = urllib.request.Request(url) req.add_header('Referer', referer) img = urllib.request.urlopen(req) if img.status is not 200: raise Exception('Image cannot be reached({})'.format( img.status)) with open(os.path.join(path, fn), 'wb') as f: f.write(img.read()) except Exception as e: if os.path.exists(os.path.join(path, fn)): os.remove(os.path.join(path.fn)) self.logger.error( "Error downloading image: " + url + ' ; err: ' + str(e)) return False self.logger.info('Downloaded: ' + url) return True def close(self): try: self.pool.close() self.logger.info("Pool closed, wating for completing download.") self.pool.join() except Exception as e: self.logger.error("Error stopping the downloader: " + str(e)) self.pool.terminate() return false self.logger.info("Downloader Stopped...") return True def make_sure_path(self, path): """ Make sure the path is exists""" path = str(path) # anaconda-mode hasn't supported PEP-0484 yet, QAQ path = path.strip() if path is not '' and not os.path.exists(path): try: os.makedirs(path) except Exception as e: self.logger.fatal( "Cannot create dirs: " + path + " ; err: " + str(e)) self.logger.info("Exiting Program") import sys sys.exit() finally: self.logger.info("Created dirs: " + path) return path
class Service: def __init__(self, db, prom, targets, latency_collection_interval, speed_collection_interval): """ Data collection service. :param db: SQLite DB file name :param prom: Prometheus collector :param targets: host to use for collecting latency data :param latency_collection_interval: latency data collection interval (in seconds) :param speed_collection_interval: network speed data collection interval (in seconds) """ self.db = db self.prom = prom self.targets = targets self.latency_collection_interval = latency_collection_interval self.speed_collection_interval = speed_collection_interval self.pool = Pool(processes=len(os.sched_getaffinity(0))) self.log = logging.getLogger("collection-service") def __enter__(self): return self def __exit__(self, exc_type, exc_value, traceback): self.pool.close() self.pool.terminate() def start_latency_collection(self): """ Starts latency data collection and stores it in the configured SQLite database (table must already exist). **Note**: `Blocks the current thread indefinitely.` :return: `nothing` """ with SQLite(self.db) as store: while True: latencies = self.pool.map( lambda target: (target, from_ping(target)), self.targets) for target, latency in latencies: if latency: self.log.debug( "Collected latency data for target [{}]: [{}/{} ({}) | {}] (min/max (avg) | loss)" .format(latency.target, latency.minimum, latency.maximum, latency.average, latency.loss)) store.latency_add(latency) self.prom.latency_add(latency) else: self.log.error( "Failed to collect latency data for target [{}]". format(target)) time.sleep(self.latency_collection_interval) def start_speed_collection(self): """ Starts network speed data collection and stores it in the configured SQLite database (table must already exist). **Note**: `Blocks the current thread indefinitely.` :return: `nothing` """ with SQLite(self.db) as store: while True: speed = self.pool.apply(from_speedtest) if speed: self.log.debug( "Collected speed data for server [{}]: [{}/{}] (down/up)" .format(speed.server, speed.download, speed.upload)) store.speed_add(speed) self.prom.speed_add(speed) else: self.log.error("Failed to collect speed data") time.sleep(self.speed_collection_interval)