class BBQ(object): def __init__(self,t=180,count=8): self.time = float(t) self.count = int(count) self.ThreadPool = ThreadPool(self.count) ''' 实际处理烧烤任务的函数 ''' def handle(self,task): time.sleep(self.time)#模拟烧烤时间 try: task[0] = True except: pass return ''' 添加一个烧烤任务 task格式:[True/False], True代表处理完成 False 代表等待处理 ''' def addTask(self,task): self.ThreadPool.addTask(self.handle,task)
def threaddo(self): pool = ThreadPool(10) todolist = self.getsalebilllist(2) print(todolist, len(todolist)) request = threadpool.makeRequests(self.salebilloutdemo, todolist) [pool.putRequest(req) for req in request] pool.wait()
def convertWMA2MP3underDir(path): if not os.path.isdir(path): if existFile(path): print " Path:["+ path+ "] is not a directory, exit!\n" return else: os.makedirs(path) pool = ThreadPool(6) MP3_CMD = '''ffmpeg.exe -i "%s" -f mp3 "%s"''' DEL_CMD = '''del %s''' for file_name in os.listdir(path): wma_path = path+'\\'+file_name if os.path.isfile(wma_path) and wma_path.lower().endswith('.wma'): mp3_file_name = file_name[:file_name.rfind('.')]+'.mp3' mp3_save_path = path+'\\'+mp3_file_name if os.path.exists(mp3_save_path): print " File:[" + mp3_save_path+ "] already exists, pass.\n" else: cmd1 = MP3_CMD%(wma_path, mp3_save_path) #cmd2 = DEL_CMD%(wma_path) print cmd1 pool.queueTask(run_cmd, (cmd1)) pool.joinAll()
def testImageDownload(self): logging.debug('Start at %s', datetime.now()) url = 'http://f1.163.com' # url = 'http://news.sina.com.cn/photo/' work_request = WorkRequest(spider, url) pool = ThreadPool(10, work_request) pool.poll()
def run(self, env, json_writer): ''' Schedule all tests in profile for execution. See ``Test.schedule`` and ``Test.run``. ''' self.prepare_test_list(env) # If using concurrency, add all the concurrent tests to the pool and # execute that pool if env.concurrent: pool = ThreadPool(multiprocessing.cpu_count()) for (path, test) in self.test_list.items(): if test.runConcurrent: pool.add(test.execute, (env, path, json_writer)) pool.join() # Run any remaining tests serially from a single thread pool after the # concurrent tests have finished pool = ThreadPool(1) for (path, test) in self.test_list.items(): if not env.concurrent or not test.runConcurrent: pool.add(test.execute, (env, path, json_writer)) pool.join()
def convertFlv2Mp4underDir(path): if not os.path.isdir(path): if os.path.exists(path): print " Path:["+ path+ "] is not a directory, exit!\n" return else: os.makedirs(path) pool = ThreadPool(6) MP4_CMD = '''D:\\Program\\tools\\ffmpeg.exe -i "%s" -vcodec mpeg4 -b 1200kb -mbd 2 -aic 2 -cmp 2 -subcmp 2 -acodec libfaac -ac 2 -ab 128000 -y "%s"''' MP3_CMD = '''D:\\Program\\tools\\ffmpeg.exe -i "%s" -vn -ar 44100 -ac 2 -f mp3 "%s"''' for file_name in os.listdir(path): flv_path = path+'\\'+file_name if os.path.isfile(flv_path): mp4_file_name = file_name[:file_name.rfind('.')]+'.mp4' mp4_save_path = path+'\\mp4\\'+mp4_file_name if os.path.exists(mp4_save_path): print " File:[" + mp4_save_path+ "] already exists, pass.\n" else: cmd = MP4_CMD%(flv_path, mp4_save_path) #print cmd #pool.queueTask(run_cmd, (cmd)) mp3_file_name = file_name[:file_name.rfind('.')]+'.mp3' mp3_save_path = path+'\\mp3\\'+mp3_file_name if os.path.exists(mp3_save_path): print " File:[" + mp3_save_path+ "] already exists, pass.\n" else: cmd = MP3_CMD%(flv_path, mp3_save_path) print cmd pool.queueTask(run_cmd, (cmd)) pool.joinAll()
class Spider: def __init__(self, depth=2): self.threadPool = ThreadPool(10) self.depth = depth def start(self, currentLevel, url): self.threadPool.initPool() self.threadPool.putTask(self.crawlPage, \ currentLevel = currentLevel, \ url = url) def crawlPage(self, args): print 'crawlPage', args currentLevel = args['currentLevel'] url = args['url'] req = urllib2.Request(url=url, headers=header) try: resp = urllib2.urlopen(req, timeout=10) except urllib2.HTTPError as e: # XXX The except HTTPError must come first # otherwise except URLError will also catch an HTTPError pass except urllib2.URLError as e: pass except Exception, e: print e else:
def search(song, n, processes=config.search_processes, returnGen=False): ''' Function searches song and returns n valid .mp3 links. @param song: Search string. @param n: Number of songs. @param processes: Number of processes to launch in the subprocessing pool. @param returnGen: If true, a generator of the links will be returned, and not the calculated list itself. ''' sources_list = [x for x in config.search_sources_const if config.search_sources[x]] log.debug("Using sources: %s" % sources_list) # IMPROVE: better handeling of slicing. pool = ThreadPool(max_threads=min(processes, len(sources_list)), catch_returns=True, logger=log) args_list = [] for source in sources_list: args_list.append([song, source, n/len(sources_list)]) if n % len(sources_list): args_list[-1][2] += 1 for args in args_list: pool(parse)(*args) gen = pool.iter() if returnGen: return gen return list(gen)
def multiThread(poolSize): keyword_list = tools.readKeyword("/home/panshan/keywords.txt") # 获取关键字 # keyword_list = tools.readKeyword('D:\develop\pycharm\workspace\ProTest\src1\keyword.txt') pool = ThreadPool(poolSize) # 设置线程池 requests = makeRequests(process, keyword_list) [pool.putRequest(req) for req in requests] pool.wait()
def downloadAllPagesVideos(url): global proxy, host, thread_count print url content = getContent(url, None, proxy) all_page_content = '' matched_groups = re.findall('''<a href="(.*?)" title='第\d+页' charset=".*?">\d+</a>''', content) for matched in matched_groups: page_url = 'http://so.youku.com'+matched.strip() all_page_content += getContent(page_url, None, proxy) pool = ThreadPool(thread_count) video_url_set = set() matched_groups = re.findall('''<a href="(http\://v\.youku\.com/v_show/id_.*?=\.html)"''', all_page_content) for matched in matched_groups: #print matched.strip() video_url = matched.strip() video_url_set.add(video_url) for video_url in video_url_set: print video_url log(video_url) pool.queueTask(downloadVideo, (video_url)) pool.joinAll()
def download_by_threadpool(download_task): size = 50 pool = ThreadPool(size) # print [([t[0], t[1]]) for t in download_task] requests = makeRequests(download, [([t[0], t[1]], None) for t in download_task]) [pool.putRequest(req) for req in requests] pool.wait()
def __init__(self, kb, cfg, targets, edispatcher): self.kb = kb self.cfg = cfg self.targets = targets self.pool = ThreadPool(window_size=self.cfg.Threads, prototype=ScannerThread, async=False) self.ed = edispatcher
def __init__(self,seed,depth,pool_size=10): self.seed = seed self.depth = depth self.all_url_list = [seed] self.finished_url_list = [] self.failure_url_list = [] self.pool = ThreadPool(pool_size)
def pickle_all_companies(): tpool = ThreadPool(50) companies = Company.objects.all() for c in companies: tpool.add_task(pickle_company, c.symbol) tpool.wait_completion() return None
def render_rap(self, msg_id, words): # Make the length of words fit the melody notes = sum(1 for pitch, beats in self._melody if pitch != REST) diff = notes - len(words) if diff < 0: words = words[:diff] else: words = words + ['la'] * diff delay = 0 offsets = {} word_index = 0 word_count = len(words) word_delays = [] word_paths = [] pool = ThreadPool(min(word_count, self._thread_pool)) for pitch, beats in self._melody: duration = beats * self._spb if pitch != REST: word = words[word_index] word_delays.append(delay) word_path = '/tmp/%s-%s.wav' % (msg_id, word_index) word_paths.append(word_path) ssml = '<s><prosody pitch="%sHz" range="x-low">%s</prosody></s>' \ % (pitch, word) def task(word_id, ssml, word_path): offsets[word_id] = self._swift.tts_file(ssml, word_path) pool.queue_task(task, (word_index, ssml, word_path)) word_index += 1 delay += duration if word_index == word_count: # Break here, rather than inside the if statement above, so that # that delay is updated and equals the duration of the rap. break pool.join_all() if not word_index: # Didn't render any words! return # Mix the rap and the backing track mix_path = '/tmp/%s-mix.wav' % msg_id sox_args = [self.sox_path, '-M'] + word_paths \ + [self._backing_sample, mix_path, 'delay'] \ + [str(delay + offsets[i]) for i, delay in enumerate(word_delays)] \ + ['remix', ','.join(str(channel) for channel in range(1, word_count + 2)), 'norm'] print(' '.join(sox_args)) subprocess.check_call(sox_args) return mix_path
def startWork(self, work, argsList, resultCallback=None): try: requests = makeRequests(work, argsList, resultCallback, None) job = ThreadPool(self.threadNum) for req in requests: job.putRequest(req) job.wait() except: print sys.exc_info()
def __init__(self, name, daemon = None): self.daemon = daemon self.name = str(name) self.simstack = [None] import logging setLogger(str(name), ('localhost', 514), logging.DEBUG) MPIRedirect.local = self if middleware.USE_MPI: self.threadpool = ThreadPool(5)
def thread_web_socket(): pool = ThreadPool(thread_num) num = list() for ir in range(thread_num): num.append(ir) requests = makeRequests(on_start, num) [pool.putRequest(req) for req in requests] pool.wait()
def get_baidu_index_by_date_range(self, keyword, start_date, end_date, type_name, area): # 根据区间获取关键词的索引值 url = ini_config.time_range_trend_url.format( start_date=start_date, end_date=end_date, word=urllib.parse.quote(keyword.encode('gbk')), area=area ) self.browser.get(url) if ini_config.browser_sleep: time.sleep(float(ini_config.browser_sleep)) if u'未被收录' in self.browser.page_source: return {} # 执行js获取后面所需的res和res2的值 res = self.browser.execute_script('return PPval.ppt;') res2 = self.browser.execute_script('return PPval.res2;') # 获取指定区间的日期列表,方便下面循环用 start_date, end_date, date_list = self.get_date_info( start_date, end_date ) # 拼接api的url url = ini_config.all_index_url.format( res=res, res2=res2, start_date=start_date, end_date=end_date ) # 获取api的结果信息,这里面保存了后面日期节点的一些加密值 all_index_info = self.api.get_all_index_html(url) indexes_enc = all_index_info['data'][type_name][0]['userIndexes_enc'] enc_list = indexes_enc.split(',') pool = ThreadPool(int(ini_config.num_of_threads)) # wm = WorkManager(int(ini_config.num_of_threads)) # 遍历这些enc值,这些值拼接出api的url(这个页面返回 图片信息以及css规定的切图信息) list_of_args = [] for index, _ in enumerate(enc_list): url = ini_config.index_show_url.format( res=res, res2=res2, enc_index=_, t=int(time.time()) * 1000 ) # 根据enc在列表中的位置,获取它的日期 date = date_list[index] # 将任务添加到多线程下载模型中 item = (None, dict(date=date, url=url, keyword=keyword, type_name=type_name, area=area)) list_of_args.append(item) baidu_index_dict = {} def callback(*args, **kwargs): req, val = args[0], args[1] baidu_index_dict[req.kwds['date']] = val req_list = makeRequests(self.get_one_day_index, list_of_args, callback) [pool.putRequest(req) for req in req_list] pool.wait() return baidu_index_dict
def thread_web_socket(): # 线程池 pool_list = ThreadPool(thread_num) num = list() # 设置开启线程的数量 for ir in range(thread_num): num.append(ir) requests = makeRequests(on_start, num) [pool_list.putRequest(req) for req in requests] pool_list.wait()
def __init__(self, host, port): self.max_threads = 8 self.host = host self.port = port self.chatrooms = {} # room_name -> room self.chatroom_ids = {} # id -> name self.client_ids = {} self.setup_socket() self.tp = ThreadPool(self.max_threads) self.accept_connections()
class ConcurrentTestPool(Singleton): @synchronized_self def init(self): self.pool = ThreadPool(multiprocessing.cpu_count()) @synchronized_self def put(self, callable_, args=None, kwds=None): self.pool.putRequest(WorkRequest(callable_, args=args, kwds=kwds)) def join(self): self.pool.wait()
def bfTest(): pool = ThreadPool(100) for j in range(100): alltime = [] for i in range(bingfa): work = WorkRequest(threads, args=(int(random.random() * portnum) % portnum,)) pool.putRequest(work) sleep((1.0 / bingfa) * random.random()) # threading.Thread(target=threads, args=(i % portnum,)).start() pool.wait() printdata(alltime)
def __init__(self, searcher): self.searcher = searcher self._thread_pool = ThreadPool(THREAD_POOL_WORKS) # Add a watch to the root of the dir self.watch_manager = WatchManager() self.notifier = ThreadedNotifier(self.watch_manager, FileProcessEvent(self)) self.notifier.start() self._build_exclude_list()
def prime_cache(self): """Ensures that the webpage cache is filled in the quickest time possible by making many requests in parallel""" print "Getting data for parts from suppliers' websites" pool = ThreadPool(NUM_THREADS) for srcode, pg in self.iteritems(): print srcode pool.add_task(pg.get_price) pool.wait_completion()
def parse_soundcloud_api2(title): ''' Function connects to soundcloud.com and returns the .mp3 links in it. API method 2: Parsing player's json data. ''' links = search_soundcloud(title) pool = ThreadPool(max_threads=5, catch_returns=True, logger=log) for link in links: pool(get_soundcloud_dl_link)(link) return pool.iter()
def download_images(prd_ser, base_url, cache_update, auth_key_papy, sum_page): pool = ThreadPool(poolsize) args_list = [] for page in range(1, sum_page + 1): url = base_url % page + "?" if cache_update: url += "date=" + cache_update + '&' url += auth_key_papy + "&origin=s_dre-viewer.papy.co.jp" args_list.append(((url, page, prd_ser), None)) requests = makeRequests(download_one_page, args_list) [pool.putRequest(i) for i in requests] pool.wait()
def __init__(self): ''' 初始化 self.url 根url self.deep 爬取深度 self.db 数据库操作类 self._thread 线程池 ''' logger.info('init control class') self.url = conf['url'] self.deep = conf['deep'] self.db = operate['db'] self._thread = ThreadPool(conf['thread'], self.get_html)
class TaskPool(object): def __init__(self, limit, logger=None, **kwargs): self.limit = limit self.logger = logger or log.get_default_logger() self._pool = None def start(self): self._pool = ThreadPool(self.limit) def stop(self): self._pool.dismissWorkers(self.limit, do_join=True) def apply_async(self, target, args=None, kwargs=None, callbacks=None, errbacks=None, accept_callback=None, **compat): args = args or [] kwargs = kwargs or {} callbacks = callbacks or [] errbacks = errbacks or [] on_ready = curry(self.on_ready, callbacks, errbacks) self.logger.debug("ThreadPool: Apply %s (args:%s kwargs:%s)" % (target, args, kwargs)) req = WorkRequest(do_work, (target, args, kwargs, on_ready, accept_callback)) self._pool.putRequest(req) # threadpool also has callback support, # but for some reason the callback is not triggered # before you've collected the results. # Clear the results (if any), so it doesn't grow too large. self._pool._results_queue.queue.clear() return req def on_ready(self, callbacks, errbacks, ret_value): """What to do when a worker task is ready and its return value has been collected.""" if isinstance(ret_value, ExceptionInfo): if isinstance(ret_value.exception, (SystemExit, KeyboardInterrupt)): # pragma: no cover raise ret_value.exception [errback(ret_value) for errback in errbacks] else: [callback(ret_value) for callback in callbacks]
def same_ms(product_id): data = {'product_id': product_id, 'address_id': '72858'} url = 'http://payment.ohsame.com/order_create' time_s = time.time() pool = ThreadPool(20) reqs = makeRequests(same_ms_req, [((url, data), {}) for i in range(200)], same_ms_callback) [pool.putRequest(req) for req in reqs] pool.wait() time_e = time.time() print('秒杀商品:%s\n' % str(product_id)) print('秒杀结果:%s\n' % rs_ms) print('秒杀耗时:%s\n' % (time_e-time_s))
def start(self): pool = ThreadPool(_thread_num) href_and_name = self.getProvince() # len =31 #demo: params_seq = [(['/0/2/0/0/', ['广东省']], None)] params_seq = [([i[0], [i[1]]], None) for i in href_and_name] # params_seq = [(['/0/2/0/0/', ['广东省']], None)] for group in PhoneBook.splitGroups(params_seq, 3): self.initItems() requests = makeRequests(self.process, group) [pool.putRequest(req) for req in requests] pool.wait() self.saveItems() # 存储 time.sleep(random.uniform(2, 5)) PhoneBook.client.close() # 关闭连接
def run_prod(): cycle_count = 1 main = ThreadPool(num_workers=PARSER_THREAD_COUNT) while True: ADMIN_LOGGER.info("Starting cycle : " + str(cycle_count)) reload(P_ROOT) process_list = [[ e, __import__(P_ROOT.__name__ + '.' + e + '.main', fromlist=e) ] for e in P_ROOT.__all__] process_dict = dict(process_list) ADMIN_LOGGER.info("Executing process list : " + str(process_dict.items())) for proc_name in process_dict.keys(): proc = getattr(process_dict.get(proc_name), 'Parser', 'None') main.putRequest( WorkRequest(proc_runner, args=(1, proc), callback=None)) ADMIN_LOGGER.info("Started thread : " + proc_name) try: main.poll() except NoResultsPending: pass except: ADMIN_LOGGER.error(traceback.format_exc()) main.wait() ADMIN_LOGGER.info("Sleeping for default LISTING_SLEEP_TIME : " + str(GLOBAL_SLEEP_TIME)) time.sleep(GLOBAL_SLEEP_TIME) cycle_count = 1 if cycle_count > 9999 else cycle_count + 1
def mutilpool(url): from threadpool import ThreadPool, makeRequests task_pool = ThreadPool(8) request_list = [] # 存放任务列表 urls = [] # 首先构造任务列表 for device in range(20): urls.append(url) request_list.append(makeRequests(view, url)) # map(task_pool.putRequest, request_list) requests = makeRequests(view, urls) [task_pool.putRequest(req) for req in requests] task_pool.wait()
def from_file(m163,option): """ download objects (songs, albums...) from an input file. """ urls = [] with open(option.inFile) as f: urls = f.readlines() global total, done, xiami_obj total = len(urls) print border LOG.info(msgTxt.fmt_links_in_file % total) print border pool = ThreadPool(config.THREAD_POOL_SIZE) for link in [u for u in urls if u]: link = link.rstrip('\n') #if it is a xiami link, init xiami object if re.match(pat_xm, link): __init_xiami_obj(option) pool.add_task(from_url_xm, xiami_obj,link, verbose=False) elif re.match(pat_163, link): pool.add_task(from_url_163, m163,link, verbose=False) else: LOG.warning(msgTxt.fmt_skip_unknown_url % link) pool.wait_completion()
def refresh_tunnels(args): tunnels = db.store.find(Tunnel) if tunnels: pool = ThreadPool(tunnels.count()) for tunnel in tunnels: request = WorkRequest(tunnel.check_available) pool.putRequest(request) pool.wait() for tunnel in tunnels: host = db.store.get(Host, tunnel.hostid) record = AvailabilityRecord.register(host, tunnel, check=False) print record
def aggregate_all(client, iterator, connection_factory): """ Aggregate all feeds returned by the generator. The generator should contain pairs of two elements (feed_url, categories) """ def attach_connection(thread): thread.hbase = connection_factory() return thread pool = ThreadPool(10, thread_init=attach_connection) for feed, categs in iterator: pool.queueTask(lambda worker, p: aggregate(worker.hbase, *p), (feed, categs)) pool.joinAll()
def start_thread(self): args_list = [] ips = self.parse_ip() for ip in ips: args = self.args.copy() args['ip'] = ip args_list.append(args) self.cui.w('Proxy Scanner started') self.cui.i('Nums: %s' % len(args_list)) self.cui.i('Port: %s' % self.args['port']) self.cui.i('Thread: %s' % self.args['thread']) pool = ThreadPool(self.args['thread']) reqs = makeRequests(self.run, args_list) [pool.putRequest(req) for req in reqs] pool.wait()
def getSongsFromHTML(htmlcontent, save_path): global thread_count pool = ThreadPool(thread_count) matched_groups = re.findall("""W[LS]\("(\d+)",\s*"(\d+)",\s*"(.*?)\s+",""", htmlcontent) for matched in matched_groups: print "-" * 2, matched order = matched[0].strip() song_id = matched[1].strip() song_name = matched[2].strip() # getSong(song_id, order, save_path) pool.queueTask(getSongThread, (song_id, order, save_path)) pool.joinAll()
def get_default_threadpool(): global default_threadpool if default_threadpool is None: default_threadpool = ThreadPool(minthreads=20, maxthreads=100, queuesize=100) return default_threadpool
def __init__(self,threads,output,limits): self.threads = threads #任务线程数 self.output = output #图片保存目录 self.limits = limits #抓取图片数量限制 self.tasks = 0 #已完成任务数 self.stop = False #停止任务 self.threadpool = ThreadPool(self.threads) #初始化线程池
def __init__(self, url, domain, depth, threadNum): #当前正在执行的线程数 self.currentRunning = 0 #线程池管理线程与爬虫线程同步的条件变量,用于修改currentRunning时的加锁和唤醒管理线程 self.processCondition = Condition() #待访问的URL的队列 self.urlQueue = Queue() #已经访问的URL self.readUrls = [] #URL host、path、param的键值对 self.urls = {} #线程数 self.threadNum = threadNum #设定了线程数的线程池 #self.threadPool = ThreadPool(self.threadNum) self.pool = ThreadPool(self.threadNum) #初始化URL队列 self.urlQueue.put({'url':url,"depth":1}) #预期爬行深度 self.depth = depth #当前爬行深度 #self.currentDepth = 1 #当前运行状态 self.state = False #DOMAIN self.domain = domain #初始化数据库 self.db = mongodb(self.domain) self.db.clean()
def __init__(self, driver=None, database=None, user=None, password=None, host='localhost', ioloop=tornado.ioloop.IOLoop.instance(), num_threads=10, tx_connection_pool_size=5, queue_timeout=1, thread_idle_life=60*60): if not(driver): raise ValueError("Missing 'driver' argument") self._driver = driver self._database = database self._user = user self._password = password self._host = host self._threadpool = ThreadPool( per_thread_init_func=self.create_connection, per_thread_close_func=self.close_connection, num_threads=num_threads, queue_timeout=queue_timeout, thread_idle_life=thread_idle_life) self._ioloop = ioloop # Connection pool for transactions self._connection_pool = [] for i in xrange(tx_connection_pool_size): conn = self.create_connection() self._connection_pool.append(conn) self._waiting_on_connection = deque()
def configure(self, gconfig={}, **options): """ Reconfigures the scheduler with the given options. Can only be done when the scheduler isn't running. """ if self.running: raise SchedulerAlreadyRunningError # Set general options config = combine_opts(gconfig, 'apscheduler.', options) self.misfire_grace_time = int(config.pop('misfire_grace_time', 1)) self.coalesce = asbool(config.pop('coalesce', True)) self.daemonic = asbool(config.pop('daemonic', True)) # Configure the thread pool if 'threadpool' in config: self._threadpool = maybe_ref(config['threadpool']) else: threadpool_opts = combine_opts(config, 'threadpool.') self._threadpool = ThreadPool(**threadpool_opts) # Configure job stores jobstore_opts = combine_opts(config, 'jobstore.') jobstores = {} for key, value in jobstore_opts.items(): store_name, option = key.split('.', 1) opts_dict = jobstores.setdefault(store_name, {}) opts_dict[option] = value for alias, opts in jobstores.items(): classname = opts.pop('class') cls = maybe_ref(classname) jobstore = cls(**opts) self.add_jobstore(jobstore, alias, True)
def __init__(self, path): self.input_data_path = path self.file_list = deque() self.getAllDataDir() self.post = True self.backup_tag_list = self.getAllBackUpTag(path) self.tag_info = defaultdict(lambda: {}) self.check_true_file_list = [] self.check_false_file_list = [] self.false_check_reasion = [] self.auto_module_ = loadTag('config/auto_module.json', '') self.config_ = loadTag('config/data_pipeline_config.json', '') self.end_point_30 = self.config_["end_point_30"] self.end_point_40 = self.config_["end_point_40"] self.end_point_21 = self.config_["end_point_21"] self.end_point = self.end_point_21 self.check_file_name_list = self.config_["check_file"] self.headerdata = {"Data-tag-type": "application/json"} self.tag_module_list = loadTag( tag_file_name='config/tag_module.json' ) # special manual tagging, take over, dangerous driving etc self.tprofile_thresh = loadTag('config/tprofile_thresh.json', '') self.readShellFile('config/download_logs.sh') self.pool = ThreadPool(int(multiprocessing.cpu_count() * 0.6)) self.auto_module_ = loadTag('config/auto_module.json', '') self.pred_eval_thresh = loadTag('config/pred_eval_thresh.json', '') self.case_tagging = TaggingMain(self.pool, self.config_, self.auto_module_, self.tag_module_list) self.case_toss = TossMain(self.config_, self.auto_module_, self.pred_eval_thresh)
def job(): """ Updater job for periodic repetitions. """ print('[+] PUT UPDATER INTO THREADPOOL AT [%s]' % (now())) pool = ThreadPool(POOL_COUNT) pool.add_task(populate, None) pool.wait_completion() print('[+] COMPLETE POPULATE AT [%s]' % (now())) pool.add_task(update, None) pool.wait_completion() print('[+] COMPLETE UPDATE AT [%s]' % (now())) del pool cache.rpush( 'incomingQueue', 'StartUpdateVulnerabilityDataBase')
def after_properties_set(self): """ Run by Spring Python after all the JMS container's properties have been set. """ for idx in range(self.concurrent_listeners): # Create as many Circuits managers as there are JMS listeners. manager = Manager() manager.start() # A pool of handler threads for each listener. handlers_pool = ThreadPool(self.handlers_per_listener) # Each manager gets assigned its own listener. listener = WebSphereMQListener() # Assign the listener and a debugger component to the manager. manager += listener manager += Debugger(logger=self.logger) listener.factory = self.factory listener.destination = self.destination listener.handler = self.handler listener.handlers_pool = handlers_pool listener.wait_interval = self.wait_interval listener.start()
def start_download(songs, skipped_hists): """ start multi-threading downloading songs. and generate a summary file songs: the list of songs need to be downloaded call the finish_hook function, pass skipped_hist """ global total total = len(songs) LOG.debug('init thread pool (%d) for downloading'% config.THREAD_POOL_SIZE) pool = ThreadPool(config.THREAD_POOL_SIZE) downloader = Downloader(songs, pool) LOG.debug('Start downloading' ) downloader.start() while done < total: time.sleep(1) print_progress() # handling lyrics downloading download_lyrics(songs) print log.hl(msg.fmt_insert_hist, 'warning') hist_handler.insert_hist(songs) print log.hl(msg.fmt_all_finished, 'warning') #call finish hook finish_summary(skipped_hists)
def __init__(self,t=180,count=8): self.time = float(t) self.count = int(count) self.ThreadPool = ThreadPool(self.count)
class TaskPool(object): def __init__(self, limit, logger=None, **kwargs): self.limit = limit self.logger = logger or log.get_default_logger() self._pool = None def start(self): self._pool = ThreadPool(self.limit) def stop(self): self._pool.dismissWorkers(self.limit, do_join=True) def apply_async(self, target, args=None, kwargs=None, callbacks=None, errbacks=None, accept_callback=None, **compat): args = args or [] kwargs = kwargs or {} callbacks = callbacks or [] errbacks = errbacks or [] on_ready = partial(self.on_ready, callbacks, errbacks) self.logger.debug("ThreadPool: Apply %s (args:%s kwargs:%s)" % ( target, args, kwargs)) req = WorkRequest(do_work, (target, args, kwargs, on_ready, accept_callback)) self._pool.putRequest(req) # threadpool also has callback support, # but for some reason the callback is not triggered # before you've collected the results. # Clear the results (if any), so it doesn't grow too large. self._pool._results_queue.queue.clear() return req def on_ready(self, callbacks, errbacks, ret_value): """What to do when a worker task is ready and its return value has been collected.""" if isinstance(ret_value, ExceptionInfo): if isinstance(ret_value.exception, ( SystemExit, KeyboardInterrupt)): # pragma: no cover raise ret_value.exception [errback(ret_value) for errback in errbacks] else: [callback(ret_value) for callback in callbacks]
def from_file(xm_obj, infile): """ download objects (songs, albums...) from an input file. """ urls = [] with open(infile) as f: urls = f.readlines() global total, done total = len(urls) print border LOG.info(u' 文件包含链接总数: %d' % total) print border pool = ThreadPool(config.THREAD_POOL_SIZE) for link in [u for u in urls if u]: pool.add_task(from_url, xm_obj,link.rstrip('\n'), verbose=False) pool.wait_completion()
def startup(self): ok = self.load_plugins() if not ok: sys.stderr.write( "Some plugins failed to load, please check the logs. Aborting.\n" ) self.logger.info('postomaat shut down after fatal error condition') sys.exit(1) self.logger.info("Init Threadpool") try: minthreads = self.config.getint('performance', 'minthreads') maxthreads = self.config.getint('performance', 'maxthreads') except ConfigParser.NoSectionError: self.logger.warning( 'Performance section not configured, using default thread numbers' ) minthreads = 1 maxthreads = 3 queuesize = maxthreads * 10 self.threadpool = ThreadPool(minthreads, maxthreads, queuesize) self.logger.info("Init policyd Engine") ports = self.config.get('main', 'incomingport') for portconfig in ports.split(): #plugins plugins = self.plugins if ':' in portconfig: port, pluginlist = portconfig.split(':') port = int(port.strip()) plugins, ok = self._load_all(pluginlist) if not ok: self.logger.error( "Could not startup engine on port %s, some plugins failed to load" % port) continue else: port = int(portconfig.strip()) server = PolicyServer(self, port=port, address=self.config.get( 'main', 'bindaddress'), plugins=plugins) thread.start_new_thread(server.serve, ()) self.servers.append(server) self.logger.info('Startup complete') if self.debugconsole: self.run_debugconsole() else: while self.stayalive: try: time.sleep(10) except KeyboardInterrupt: self.shutdown()
class Spider(object): def __init__(self,seed,depth,pool_size=10): self.seed = seed self.depth = depth self.all_url_list = [seed] self.finished_url_list = [] self.failure_url_list = [] self.pool = ThreadPool(pool_size) def crawl(self): base_deep_size = 0 while base_deep_size <= self.depth: for url in self.all_url_list: if url not in self.finished_url_list: self.pool.add_task(self.download,url) self.pool.close() self.depth-=1 def download(self,url): try: data = urllib2.urlopen(url) page = data.read() self.finished_url_list.append(url) links = self.get_urls(page) return page,links except Exception as e: print 'open url:%s raise exception(%s)'%(url,e) return None def get_urls(self,page): soup = BeautifulSoup(page,fromEncoding="gb18030") if soup.title: print soup.title.string links = [] for item in soup.findAll('a'): link=item.get('href') if link and link.startswith('http://') and link not in self.finished_url_list: links.append(link) print links return links def get_next_url(self): pass