def run(self, env, json_writer): ''' Schedule all tests in profile for execution. See ``Test.schedule`` and ``Test.run``. ''' self.prepare_test_list(env) # If using concurrency, add all the concurrent tests to the pool and # execute that pool if env.concurrent: pool = ThreadPool(multiprocessing.cpu_count()) for (path, test) in self.test_list.items(): if test.runConcurrent: pool.add(test.execute, (env, path, json_writer)) pool.join() # Run any remaining tests serially from a single thread pool after the # concurrent tests have finished pool = ThreadPool(1) for (path, test) in self.test_list.items(): if not env.concurrent or not test.runConcurrent: pool.add(test.execute, (env, path, json_writer)) pool.join()
def run(): logging.info('start subscribe server.....') ##创建抓取网页的线程池 grab_pool = ThreadPool(GRAB_NUM) for i in range(GRAB_NUM): grab_pool.add_task(do_grab,None,id = i+1) ##创建解析网页的线程池 paser_pool = ThreadPool(PASER_NUM) for i in range(PASER_NUM): paser_pool.add_task(do_paser,None,id = i+1) ##创建发送邮件的线程池 send_pool = ThreadPool(MAIL_SENDER_NUM) for i in range(MAIL_SENDER_NUM): send_pool.add_task(do_send,None,id = i+1) # Join and destroy all threads grab_pool.destroy() paser_pool.destroy() send_pool.destroy()
def start(self): self.connect() # DOC(sumitjami) This code is to executed only once in the lifetime. # when the bot starts executing. if not self.pool: self.pool = ThreadPool(self.pool_size) self.load_plugins() while True: try: for reply in self.slack_client.rtm_read(): self.input(reply) self.crons() except: self.connected = False self.connect() self.crons() else: self.autoping() self.output() time.sleep(.1) if self.reload: self.bot_plugins = [] if not self.pool: self.pool = ThreadPool(self.pool_size) vv('reloading') self.load_plugins()
def from_file(m163,option): """ download objects (songs, albums...) from an input file. """ urls = [] with open(option.inFile) as f: urls = f.readlines() global total, done, xiami_obj total = len(urls) print border LOG.info(msgTxt.fmt_links_in_file % total) print border pool = ThreadPool(config.THREAD_POOL_SIZE) for link in [u for u in urls if u]: link = link.rstrip('\n') #if it is a xiami link, init xiami object if re.match(pat_xm, link): __init_xiami_obj(option) pool.add_task(from_url_xm, xiami_obj,link, verbose=False) elif re.match(pat_163, link): pool.add_task(from_url_163, m163,link, verbose=False) else: LOG.warning(msgTxt.fmt_skip_unknown_url % link) pool.wait_completion()
def run(self): socket_server = self.server_socket socket_server.listen() task_pool = ThreadPool() while True: client_socket, _ = socket_server.accept() task_pool.add_task((client_socket, self.job))
def get_default_threadpool(): global default_threadpool if default_threadpool is None: default_threadpool = ThreadPool(minthreads=20, maxthreads=100, queuesize=100) return default_threadpool
def multiThread(poolSize): keyword_list = tools.readKeyword("/home/panshan/keywords.txt") # 获取关键字 # keyword_list = tools.readKeyword('D:\develop\pycharm\workspace\ProTest\src1\keyword.txt') pool = ThreadPool(poolSize) # 设置线程池 requests = makeRequests(process, keyword_list) [pool.putRequest(req) for req in requests] pool.wait()
def search(song, n, processes=config.search_processes, returnGen=False): ''' Function searches song and returns n valid .mp3 links. @param song: Search string. @param n: Number of songs. @param processes: Number of processes to launch in the subprocessing pool. @param returnGen: If true, a generator of the links will be returned, and not the calculated list itself. ''' sources_list = [ x for x in config.search_sources_const if config.search_sources[x] ] log.debug("Using sources: %s" % sources_list) # IMPROVE: better handeling of slicing. pool = ThreadPool(max_threads=min(processes, len(sources_list)), catch_returns=True, logger=log) args_list = [] for source in sources_list: args_list.append([song, source, n / len(sources_list)]) if n % len(sources_list): args_list[-1][2] += 1 for args in args_list: pool(parse)(*args) gen = pool.iter() if returnGen: return gen return list(gen)
def __init__(self, bot_config): log.debug("ErrBot init.") super().__init__(bot_config) self.bot_config = bot_config self.prefix = bot_config.BOT_PREFIX if bot_config.BOT_ASYNC: self.thread_pool = ThreadPool(3) log.debug('created the thread pool' + str(self.thread_pool)) self.commands = { } # the dynamically populated list of commands available on the bot self.re_commands = { } # the dynamically populated list of regex-based commands available on the bot self.command_filters = [] # the dynamically populated list of filters self.MSG_UNKNOWN_COMMAND = 'Unknown command: "%(command)s". ' \ 'Type "' + bot_config.BOT_PREFIX + 'help" for available commands.' if bot_config.BOT_ALT_PREFIX_CASEINSENSITIVE: self.bot_alt_prefixes = tuple( prefix.lower() for prefix in bot_config.BOT_ALT_PREFIXES) else: self.bot_alt_prefixes = bot_config.BOT_ALT_PREFIXES self.repo_manager = None self.plugin_manager = None self.storage_plugin = None self._plugin_errors_during_startup = None self.flow_executor = FlowExecutor(self)
def threaddo(self): pool = ThreadPool(10) todolist = self.getsalebilllist(2) print(todolist, len(todolist)) request = threadpool.makeRequests(self.salebilloutdemo, todolist) [pool.putRequest(req) for req in request] pool.wait()
def run_prod(): cycle_count = 1 main = ThreadPool(num_workers=PARSER_THREAD_COUNT) while True: ADMIN_LOGGER.info("Starting cycle : " + str(cycle_count)) reload(P_ROOT) process_list = [[ e, __import__(P_ROOT.__name__ + '.' + e + '.main', fromlist=e) ] for e in P_ROOT.__all__] process_dict = dict(process_list) ADMIN_LOGGER.info("Executing process list : " + str(process_dict.items())) for proc_name in process_dict.keys(): proc = getattr(process_dict.get(proc_name), 'Parser', 'None') main.putRequest( WorkRequest(proc_runner, args=(1, proc), callback=None)) ADMIN_LOGGER.info("Started thread : " + proc_name) try: main.poll() except NoResultsPending: pass except: ADMIN_LOGGER.error(traceback.format_exc()) main.wait() ADMIN_LOGGER.info("Sleeping for default LISTING_SLEEP_TIME : " + str(GLOBAL_SLEEP_TIME)) time.sleep(GLOBAL_SLEEP_TIME) cycle_count = 1 if cycle_count > 9999 else cycle_count + 1
def __init__(self, driver=None, database=None, user=None, password=None, host='localhost', ioloop=tornado.ioloop.IOLoop.instance(), num_threads=10, tx_connection_pool_size=5, queue_timeout=1): if not (driver): raise ValueError("Missing 'driver' argument") self._driver = driver self._database = database self._user = user self._password = password self._host = host self._threadpool = ThreadPool( per_thread_init_func=self.create_connection, per_thread_close_func=self.close_connection, num_threads=num_threads, queue_timeout=queue_timeout) self._ioloop = ioloop # Connection pool for transactions self._connection_pool = [] for i in xrange(tx_connection_pool_size): conn = self.create_connection() self._connection_pool.append(conn) self._waiting_on_connection = deque()
def __init__(self, startUrl, maxDepth=3, threads=10): self.startUrl = startUrl self.maxDepth = maxDepth self.threads = threads self.urlManager = UrlManager() self.urlManager.add_new_url(self.startUrl) self.threadpool = ThreadPool(threads)
def get_one_month_by_threadpool(tasks): size = 12 pool = ThreadPool(size) requests = makeRequests(get_one_month_background, [([t[0], t[1]], None) for t in tasks]) [pool.putRequest(req) for req in requests] pool.wait()
def __init__(self, path): self.input_data_path = path self.file_list = deque() self.getAllDataDir() self.post = True self.backup_tag_list = self.getAllBackUpTag(path) self.tag_info = defaultdict(lambda: {}) self.check_true_file_list = [] self.check_false_file_list = [] self.false_check_reasion = [] self.auto_module_ = loadTag('config/auto_module.json', '') self.config_ = loadTag('config/data_pipeline_config.json', '') self.end_point_30 = self.config_["end_point_30"] self.end_point_40 = self.config_["end_point_40"] self.end_point_21 = self.config_["end_point_21"] self.end_point = self.end_point_21 self.check_file_name_list = self.config_["check_file"] self.headerdata = {"Data-tag-type": "application/json"} self.tag_module_list = loadTag( tag_file_name='config/tag_module.json' ) # special manual tagging, take over, dangerous driving etc self.tprofile_thresh = loadTag('config/tprofile_thresh.json', '') self.readShellFile('config/download_logs.sh') self.pool = ThreadPool(int(multiprocessing.cpu_count() * 0.6)) self.auto_module_ = loadTag('config/auto_module.json', '') self.pred_eval_thresh = loadTag('config/pred_eval_thresh.json', '') self.case_tagging = TaggingMain(self.pool, self.config_, self.auto_module_, self.tag_module_list) self.case_toss = TossMain(self.config_, self.auto_module_, self.pred_eval_thresh)
def __init__(self, url, depth, threadNum, file, cookie): """Initialization parameters""" """Operating status""" self.status = False """管理线程池数量唤醒及等待""" self.processcondition = Condition() """当前运行的线程数量""" self.currentrun = 0 """url等待队列""" self.urlQue = Queue() """已经请求过的url""" self.visitedurl = [] """线程数""" self.threadNum = threadNum """设定了线程数的线程池""" self.pool = ThreadPool(self.threadNum) """预期爬行深度""" self.depth = depth """初始化浏览器""" self.browser = '' self.tasksall = [] """打开json文件""" self.re_json = json.load(open('patten.json', 'r', encoding='utf-8')) self.resfile = 'resault.txt' self.Cookie = {'Cookie': ''} """初始化队列""" for url in urllist: self.urlQue.put({'url': url, "depth": int(depth)}) """当前爬取的url""" self.spiderdomain = 'start'
def testImageDownload(self): logging.debug('Start at %s', datetime.now()) url = 'http://f1.163.com' # url = 'http://news.sina.com.cn/photo/' work_request = WorkRequest(spider, url) pool = ThreadPool(10, work_request) pool.poll()
def start_download(songs, skipped_hists): """ start multi-threading downloading songs. and generate a summary file songs: the list of songs need to be downloaded call the finish_hook function, pass skipped_hist """ global total total = len(songs) LOG.debug('init thread pool (%d) for downloading'% config.THREAD_POOL_SIZE) pool = ThreadPool(config.THREAD_POOL_SIZE) downloader = Downloader(songs, pool) LOG.debug('Start downloading' ) downloader.start() while done < total: time.sleep(1) print_progress() # handling lyrics downloading download_lyrics(songs) print log.hl(msg.fmt_insert_hist, 'warning') hist_handler.insert_hist(songs) print log.hl(msg.fmt_all_finished, 'warning') #call finish hook finish_summary(skipped_hists)
def __init__(self, url, domain, depth, threadNum): #当前正在执行的线程数 self.currentRunning = 0 #线程池管理线程与爬虫线程同步的条件变量,用于修改currentRunning时的加锁和唤醒管理线程 self.processCondition = Condition() #待访问的URL的队列 self.urlQueue = Queue() #已经访问的URL self.readUrls = [] #URL host、path、param的键值对 self.urls = {} #线程数 self.threadNum = threadNum #设定了线程数的线程池 #self.threadPool = ThreadPool(self.threadNum) self.pool = ThreadPool(self.threadNum) #初始化URL队列 self.urlQueue.put({'url':url,"depth":1}) #预期爬行深度 self.depth = depth #当前爬行深度 #self.currentDepth = 1 #当前运行状态 self.state = False #DOMAIN self.domain = domain #初始化数据库 self.db = mongodb(self.domain) self.db.clean()
def program(self,create=1,mode='time'): ''' create: 1:清空原数据,重新爬取。0:追加,去重。 mode: 'time':按循环次数设置日期。's_e':按起止时间设置日期。 ''' self.__get_namecode__(mode) self.create=create if create==1: if os.path.exists(conf.get('dir','stock')): shutil.rmtree(conf.get('dir','stock')) os.mkdir(conf.get('dir','stock')) else: os.mkdir(conf.get('dir','stock')) pool = ThreadPool(10) param=[] for i,code in enumerate(self.ts_code): for [st,et] in self.timerank: url=conf.get('config','req_url').format(code,st,et,'ts_code') param.append(([i,code,url,st,et],None)) reqs = makeRequests(self.get_data,param) [pool.putRequest(req) for req in reqs] pool.wait()
def after_properties_set(self): """ Run by Spring Python after all the JMS container's properties have been set. """ for idx in range(self.concurrent_listeners): # Create as many Circuits managers as there are JMS listeners. manager = Manager() manager.start() # A pool of handler threads for each listener. handlers_pool = ThreadPool(self.handlers_per_listener) # Each manager gets assigned its own listener. listener = WebSphereMQListener() # Assign the listener and a debugger component to the manager. manager += listener manager += Debugger(logger=self.logger) listener.factory = self.factory listener.destination = self.destination listener.handler = self.handler listener.handlers_pool = handlers_pool listener.wait_interval = self.wait_interval listener.start()
def zhixingSpiderAPI(goal, thread_num): ''' :param goal: 将要爬取id数目 :param thread_num: 线程数目 :return: None ''' makeDirs() spider = ZhiXingSpider() pool = ThreadPool(thread_num) start_id = spider.getStartID() for group in spider.idQueue(start_id, goal): spider.__init__() t_begin = time.time() print time.ctime() + u'\tBegin:启动id:{0}, 目标数量:{1}'.format( start_id, goal) requests = makeRequests(spider.getJson, group) [pool.putRequest(req) for req in requests] pool.wait() result = spider.saveItems() log_id = list() log_id.extend((group[0], group[-1])) clawLog(log_id, result) spider.saveClawedID(start_id + goal) print time.ctime() + u'\tOver:finish it,coast:{0} \n'.format( time.time() - t_begin) removeAllFiles([spider.dire_code, spider.dire_temp])
def configure(self, gconfig={}, **options): """ Reconfigures the scheduler with the given options. Can only be done when the scheduler isn't running. """ if self.running: raise SchedulerAlreadyRunningError # Set general options config = combine_opts(gconfig, 'apscheduler.', options) self.misfire_grace_time = int(config.pop('misfire_grace_time', 1)) self.coalesce = asbool(config.pop('coalesce', True)) self.daemonic = asbool(config.pop('daemonic', True)) # Configure the thread pool if 'threadpool' in config: self._threadpool = maybe_ref(config['threadpool']) else: threadpool_opts = combine_opts(config, 'threadpool.') self._threadpool = ThreadPool(**threadpool_opts) # Configure job stores jobstore_opts = combine_opts(config, 'jobstore.') jobstores = {} for key, value in jobstore_opts.items(): store_name, option = key.split('.', 1) opts_dict = jobstores.setdefault(store_name, {}) opts_dict[option] = value for alias, opts in jobstores.items(): classname = opts.pop('class') cls = maybe_ref(classname) jobstore = cls(**opts) self.add_jobstore(jobstore, alias, True)
def download_by_threadpool(download_task): size = 50 pool = ThreadPool(size) # print [([t[0], t[1]]) for t in download_task] requests = makeRequests(download, [([t[0], t[1]], None) for t in download_task]) [pool.putRequest(req) for req in requests] pool.wait()
def __init__(self, kb, cfg, targets, edispatcher): self.kb = kb self.cfg = cfg self.targets = targets self.pool = ThreadPool(window_size=self.cfg.Threads, prototype=ScannerThread, async=False) self.ed = edispatcher
def __init__(self,seed,depth,pool_size=10): self.seed = seed self.depth = depth self.all_url_list = [seed] self.finished_url_list = [] self.failure_url_list = [] self.pool = ThreadPool(pool_size)
def startup(self): ok = self.load_plugins() if not ok: sys.stderr.write( "Some plugins failed to load, please check the logs. Aborting.\n" ) self.logger.info('postomaat shut down after fatal error condition') sys.exit(1) self.logger.info("Init Threadpool") try: minthreads = self.config.getint('performance', 'minthreads') maxthreads = self.config.getint('performance', 'maxthreads') except ConfigParser.NoSectionError: self.logger.warning( 'Performance section not configured, using default thread numbers' ) minthreads = 1 maxthreads = 3 queuesize = maxthreads * 10 self.threadpool = ThreadPool(minthreads, maxthreads, queuesize) self.logger.info("Init policyd Engine") ports = self.config.get('main', 'incomingport') for portconfig in ports.split(): #plugins plugins = self.plugins if ':' in portconfig: port, pluginlist = portconfig.split(':') port = int(port.strip()) plugins, ok = self._load_all(pluginlist) if not ok: self.logger.error( "Could not startup engine on port %s, some plugins failed to load" % port) continue else: port = int(portconfig.strip()) server = PolicyServer(self, port=port, address=self.config.get( 'main', 'bindaddress'), plugins=plugins) thread.start_new_thread(server.serve, ()) self.servers.append(server) self.logger.info('Startup complete') if self.debugconsole: self.run_debugconsole() else: while self.stayalive: try: time.sleep(10) except KeyboardInterrupt: self.shutdown()
def __init__(self, name, daemon = None): self.daemon = daemon self.name = str(name) self.simstack = [None] import logging setLogger(str(name), ('localhost', 514), logging.DEBUG) MPIRedirect.local = self if middleware.USE_MPI: self.threadpool = ThreadPool(5)
def thread_web_socket(): pool = ThreadPool(thread_num) num = list() for ir in range(thread_num): num.append(ir) requests = makeRequests(on_start, num) [pool.putRequest(req) for req in requests] pool.wait()
def start_jobs(jobs, title='', threadpool_size=1): Terminate_Watcher() pool = ThreadPool(threadpool_size) executer = Executer(jobs, pool) executer.start() progress_bar = Progressbar(jobs, title, threadpool_size) while progress_bar.has_work_to_do(): time.sleep(1) progress_bar.print_progress()