Ejemplo n.º 1
0
class BBQ(object): 

        def __init__(self,t=180,count=8):
		
                
            
            self.time = float(t)
            self.count = int(count)
            
            self.ThreadPool = ThreadPool(self.count)
        
        '''
    实际处理烧烤任务的函数
    '''
        def handle(self,task):
            
            time.sleep(self.time)#模拟烧烤时间
            try:
                task[0] = True
            except:
                pass
            return 
         
        '''
    添加一个烧烤任务
    task格式:[True/False],
    True代表处理完成
    False 代表等待处理
    '''
        def addTask(self,task):
        
            self.ThreadPool.addTask(self.handle,task)
Ejemplo n.º 2
0
 def threaddo(self):
     pool = ThreadPool(10)
     todolist = self.getsalebilllist(2)
     print(todolist, len(todolist))
     request = threadpool.makeRequests(self.salebilloutdemo, todolist)
     [pool.putRequest(req) for req in request]
     pool.wait()
Ejemplo n.º 3
0
def convertWMA2MP3underDir(path):
    if not os.path.isdir(path):
        if existFile(path):
            print "  Path:["+ path+ "] is not a directory, exit!\n"
            return
        else:
            os.makedirs(path)
    
    pool = ThreadPool(6)
    
    MP3_CMD = '''ffmpeg.exe -i "%s" -f mp3 "%s"'''
    DEL_CMD = '''del %s'''
    for file_name in os.listdir(path):
        wma_path = path+'\\'+file_name
        if os.path.isfile(wma_path) and wma_path.lower().endswith('.wma'):
            mp3_file_name = file_name[:file_name.rfind('.')]+'.mp3'
            mp3_save_path = path+'\\'+mp3_file_name
            if os.path.exists(mp3_save_path):
                print "  File:[" + mp3_save_path+ "] already exists, pass.\n"
            else:
                cmd1 = MP3_CMD%(wma_path, mp3_save_path)
                #cmd2 = DEL_CMD%(wma_path)
                print cmd1
                pool.queueTask(run_cmd, (cmd1))
        
    pool.joinAll()
Ejemplo n.º 4
0
 def testImageDownload(self):
     logging.debug('Start at %s', datetime.now())
     url = 'http://f1.163.com'
     # url = 'http://news.sina.com.cn/photo/'
     work_request = WorkRequest(spider, url)
     pool = ThreadPool(10, work_request)
     pool.poll()
Ejemplo n.º 5
0
Archivo: core.py Proyecto: RAOF/piglit
    def run(self, env, json_writer):
        '''
        Schedule all tests in profile for execution.

        See ``Test.schedule`` and ``Test.run``.
        '''

        self.prepare_test_list(env)

        # If using concurrency, add all the concurrent tests to the pool and
        # execute that pool
        if env.concurrent:
            pool = ThreadPool(multiprocessing.cpu_count())
            for (path, test) in self.test_list.items():
                if test.runConcurrent:
                    pool.add(test.execute, (env, path, json_writer))
            pool.join()

        # Run any remaining tests serially from a single thread pool after the
        # concurrent tests have finished
        pool = ThreadPool(1)
        for (path, test) in self.test_list.items():
            if not env.concurrent or not test.runConcurrent:
                pool.add(test.execute, (env, path, json_writer))
        pool.join()
Ejemplo n.º 6
0
def convertFlv2Mp4underDir(path):
    if not os.path.isdir(path):
        if os.path.exists(path):
            print "  Path:["+ path+ "] is not a directory, exit!\n"
            return
        else:
            os.makedirs(path)
    
    pool = ThreadPool(6)
    
    MP4_CMD = '''D:\\Program\\tools\\ffmpeg.exe -i "%s" -vcodec mpeg4 -b 1200kb -mbd 2 -aic 2 -cmp 2 -subcmp 2 -acodec libfaac -ac 2 -ab 128000 -y "%s"'''
    MP3_CMD = '''D:\\Program\\tools\\ffmpeg.exe -i "%s" -vn -ar 44100 -ac 2 -f mp3 "%s"'''
    for file_name in os.listdir(path):
        flv_path = path+'\\'+file_name
        if os.path.isfile(flv_path):
            mp4_file_name = file_name[:file_name.rfind('.')]+'.mp4'
            mp4_save_path = path+'\\mp4\\'+mp4_file_name
            if os.path.exists(mp4_save_path):
                print "  File:[" + mp4_save_path+ "] already exists, pass.\n"
            else:
                cmd = MP4_CMD%(flv_path, mp4_save_path)
                #print cmd
                #pool.queueTask(run_cmd, (cmd))
            
            mp3_file_name = file_name[:file_name.rfind('.')]+'.mp3'
            mp3_save_path = path+'\\mp3\\'+mp3_file_name
            if os.path.exists(mp3_save_path):
                print "  File:[" + mp3_save_path+ "] already exists, pass.\n"
            else:
                cmd = MP3_CMD%(flv_path, mp3_save_path)
                print cmd
                pool.queueTask(run_cmd, (cmd))
            
    pool.joinAll()
Ejemplo n.º 7
0
class Spider:
	def __init__(self, depth=2):
		self.threadPool = ThreadPool(10)
		self.depth = depth

	def start(self, currentLevel, url):
		self.threadPool.initPool()
		self.threadPool.putTask(self.crawlPage, \
							currentLevel = currentLevel, \
							url = url)

	def crawlPage(self, args):
		print 'crawlPage', args
		currentLevel = args['currentLevel']
		url = args['url']
		req = urllib2.Request(url=url, headers=header)
		try:
			resp = urllib2.urlopen(req, timeout=10)
		except urllib2.HTTPError as e:
			# XXX The except HTTPError must come first
			# otherwise except URLError will also catch an HTTPError
			pass
		except urllib2.URLError as e:
			pass
		except Exception, e:
			print e
		else:
Ejemplo n.º 8
0
def search(song, n, processes=config.search_processes, returnGen=False):
	'''
	Function searches song and returns n valid .mp3 links.
	@param song: Search string.
	@param n: Number of songs.
	@param processes: Number of processes to launch in the subprocessing pool.
	@param returnGen: If true, a generator of the links will be returned,
						and not the calculated list itself.
	'''
	sources_list = [x for x in config.search_sources_const if config.search_sources[x]]
	log.debug("Using sources: %s" % sources_list)
	
	# IMPROVE: better handeling of slicing.
	pool = ThreadPool(max_threads=min(processes, len(sources_list)), catch_returns=True, logger=log)
	args_list = []
	for source in sources_list:
		args_list.append([song, source, n/len(sources_list)])
	if n % len(sources_list):
		args_list[-1][2] += 1
	
	for args in args_list:
		pool(parse)(*args)
	
	gen = pool.iter()

	if returnGen:
		return gen
	return list(gen)
Ejemplo n.º 9
0
def multiThread(poolSize):
    keyword_list = tools.readKeyword("/home/panshan/keywords.txt")      #  获取关键字
    # keyword_list = tools.readKeyword('D:\develop\pycharm\workspace\ProTest\src1\keyword.txt')
    pool = ThreadPool(poolSize)     #  设置线程池
    requests = makeRequests(process, keyword_list)
    [pool.putRequest(req) for req in requests]
    pool.wait()
Ejemplo n.º 10
0
def downloadAllPagesVideos(url):
    global proxy, host, thread_count
    print url
    content = getContent(url, None, proxy)
    
    all_page_content = ''
    matched_groups = re.findall('''<a href="(.*?)" title='第\d+页' charset=".*?">\d+</a>''', content)
    for matched in matched_groups:
        page_url = 'http://so.youku.com'+matched.strip()
        all_page_content += getContent(page_url, None, proxy)
    
    
    pool = ThreadPool(thread_count)

    video_url_set = set()
    matched_groups = re.findall('''<a href="(http\://v\.youku\.com/v_show/id_.*?=\.html)"''', all_page_content)
    for matched in matched_groups:
        #print matched.strip()
        video_url = matched.strip()
        video_url_set.add(video_url)

    for video_url in video_url_set:
        print video_url
        log(video_url)
        pool.queueTask(downloadVideo, (video_url))

    pool.joinAll()
Ejemplo n.º 11
0
def download_by_threadpool(download_task):
    size = 50
    pool = ThreadPool(size)
    # print [([t[0], t[1]]) for t in download_task]
    requests = makeRequests(download,
                            [([t[0], t[1]], None) for t in download_task])
    [pool.putRequest(req) for req in requests]
    pool.wait()
Ejemplo n.º 12
0
 def __init__(self, kb, cfg, targets, edispatcher):
     self.kb = kb
     self.cfg = cfg
     self.targets = targets
     self.pool = ThreadPool(window_size=self.cfg.Threads,
                            prototype=ScannerThread,
                            async=False)
     self.ed = edispatcher
Ejemplo n.º 13
0
 def __init__(self,seed,depth,pool_size=10):
     
     self.seed = seed
     self.depth = depth
     self.all_url_list = [seed]
     self.finished_url_list = []
     self.failure_url_list = []
     self.pool = ThreadPool(pool_size)
Ejemplo n.º 14
0
def pickle_all_companies():
    tpool = ThreadPool(50)
    companies = Company.objects.all()
    for c in companies:
        tpool.add_task(pickle_company, c.symbol)
    tpool.wait_completion()

    return None
Ejemplo n.º 15
0
    def render_rap(self, msg_id, words):
        # Make the length of words fit the melody
        notes = sum(1 for pitch, beats in self._melody if pitch != REST)
        diff = notes - len(words)
        if diff < 0:
            words = words[:diff]
        else:
            words = words + ['la'] * diff

        delay = 0
        offsets = {}
        word_index = 0
        word_count = len(words)
        word_delays = []
        word_paths = []

        pool = ThreadPool(min(word_count, self._thread_pool))

        for pitch, beats in self._melody:
            duration = beats * self._spb

            if pitch != REST:
                word = words[word_index]
                word_delays.append(delay)
                word_path = '/tmp/%s-%s.wav' % (msg_id, word_index)
                word_paths.append(word_path)
                ssml = '<s><prosody pitch="%sHz" range="x-low">%s</prosody></s>' \
                    % (pitch, word)
                def task(word_id, ssml, word_path):
                    offsets[word_id] = self._swift.tts_file(ssml, word_path)
                pool.queue_task(task, (word_index, ssml, word_path))
                word_index += 1

            delay += duration

            if word_index == word_count:
                # Break here, rather than inside the if statement above, so that
                # that delay is updated and equals the duration of the rap.
                break

        pool.join_all()

        if not word_index:
            # Didn't render any words!
            return

        # Mix the rap and the backing track
        mix_path = '/tmp/%s-mix.wav' % msg_id
        sox_args = [self.sox_path, '-M'] + word_paths \
            + [self._backing_sample, mix_path, 'delay'] \
            + [str(delay + offsets[i]) for i, delay in enumerate(word_delays)] \
            + ['remix',
                ','.join(str(channel) for channel in range(1, word_count + 2)),
                'norm']
        print(' '.join(sox_args))
        subprocess.check_call(sox_args)

        return mix_path
Ejemplo n.º 16
0
 def startWork(self, work, argsList, resultCallback=None):
   try:
     requests = makeRequests(work, argsList, resultCallback, None)
     job = ThreadPool(self.threadNum)
     for req in requests:
       job.putRequest(req)
     job.wait()
   except:
     print sys.exc_info()
Ejemplo n.º 17
0
 def __init__(self, name, daemon = None):
     self.daemon = daemon
     self.name = str(name)
     self.simstack = [None]
     import logging
     setLogger(str(name), ('localhost', 514), logging.DEBUG)
     MPIRedirect.local = self
     if middleware.USE_MPI:
         self.threadpool = ThreadPool(5)
Ejemplo n.º 18
0
def thread_web_socket():
    pool = ThreadPool(thread_num)
    num = list()

    for ir in range(thread_num):
        num.append(ir)
    requests = makeRequests(on_start, num)
    [pool.putRequest(req) for req in requests]
    pool.wait()
Ejemplo n.º 19
0
    def get_baidu_index_by_date_range(self, keyword, start_date, end_date,
                                      type_name, area):
        # 根据区间获取关键词的索引值
        url = ini_config.time_range_trend_url.format(
            start_date=start_date, end_date=end_date,
            word=urllib.parse.quote(keyword.encode('gbk')),
            area=area
        )
        self.browser.get(url)
        if ini_config.browser_sleep:
            time.sleep(float(ini_config.browser_sleep))
        if u'未被收录' in self.browser.page_source:
            return {}
        # 执行js获取后面所需的res和res2的值
        res = self.browser.execute_script('return PPval.ppt;')
        res2 = self.browser.execute_script('return PPval.res2;')

        # 获取指定区间的日期列表,方便下面循环用
        start_date, end_date, date_list = self.get_date_info(
            start_date, end_date
        )

        # 拼接api的url
        url = ini_config.all_index_url.format(
            res=res, res2=res2, start_date=start_date, end_date=end_date
        )
        # 获取api的结果信息,这里面保存了后面日期节点的一些加密值
        all_index_info = self.api.get_all_index_html(url)        
        indexes_enc = all_index_info['data'][type_name][0]['userIndexes_enc']
        enc_list = indexes_enc.split(',')

        pool = ThreadPool(int(ini_config.num_of_threads))
        # wm = WorkManager(int(ini_config.num_of_threads))

        # 遍历这些enc值,这些值拼接出api的url(这个页面返回 图片信息以及css规定的切图信息)
        list_of_args = []
        for index, _ in enumerate(enc_list):
            url = ini_config.index_show_url.format(
                res=res, res2=res2, enc_index=_, t=int(time.time()) * 1000
            )
            # 根据enc在列表中的位置,获取它的日期
            date = date_list[index]
            # 将任务添加到多线程下载模型中
            item = (None, dict(date=date, url=url, keyword=keyword, type_name=type_name, area=area))
            list_of_args.append(item)

        baidu_index_dict = {}

        def callback(*args, **kwargs):
            req, val = args[0], args[1]
            baidu_index_dict[req.kwds['date']] = val

        req_list = makeRequests(self.get_one_day_index, list_of_args, callback)
        [pool.putRequest(req) for req in req_list]
        pool.wait()

        return baidu_index_dict
Ejemplo n.º 20
0
Archivo: 1.py Proyecto: kalan898/study
def thread_web_socket():
    # 线程池
    pool_list = ThreadPool(thread_num)
    num = list()
    # 设置开启线程的数量
    for ir in range(thread_num):
        num.append(ir)
    requests = makeRequests(on_start, num)
    [pool_list.putRequest(req) for req in requests]
    pool_list.wait()
Ejemplo n.º 21
0
 def __init__(self, host, port):
     self.max_threads = 8
     self.host = host
     self.port = port
     self.chatrooms = {}  # room_name -> room
     self.chatroom_ids = {}  # id -> name
     self.client_ids = {}
     self.setup_socket()
     self.tp = ThreadPool(self.max_threads)
     self.accept_connections()
Ejemplo n.º 22
0
class ConcurrentTestPool(Singleton):
    @synchronized_self
    def init(self):
        self.pool = ThreadPool(multiprocessing.cpu_count())

    @synchronized_self
    def put(self, callable_, args=None, kwds=None):
        self.pool.putRequest(WorkRequest(callable_, args=args, kwds=kwds))

    def join(self):
        self.pool.wait()
Ejemplo n.º 23
0
class ConcurrentTestPool(Singleton):
    @synchronized_self
    def init(self):
        self.pool = ThreadPool(multiprocessing.cpu_count())

    @synchronized_self
    def put(self, callable_, args=None, kwds=None):
        self.pool.putRequest(WorkRequest(callable_, args=args, kwds=kwds))

    def join(self):
        self.pool.wait()
Ejemplo n.º 24
0
Archivo: test.py Proyecto: dabeike/waf
def bfTest():
    pool = ThreadPool(100)
    for j in range(100):
        alltime = []
        for i in range(bingfa):
            work = WorkRequest(threads, args=(int(random.random() * portnum) % portnum,))
            pool.putRequest(work)
            sleep((1.0 / bingfa) * random.random())
            # threading.Thread(target=threads, args=(i % portnum,)).start()
        pool.wait()
        printdata(alltime)
Ejemplo n.º 25
0
    def __init__(self, searcher):
        self.searcher = searcher

        self._thread_pool = ThreadPool(THREAD_POOL_WORKS)

        # Add a watch to the root of the dir
        self.watch_manager = WatchManager()
        self.notifier = ThreadedNotifier(self.watch_manager,
                                         FileProcessEvent(self))
        self.notifier.start()

        self._build_exclude_list()
Ejemplo n.º 26
0
    def prime_cache(self):
        """Ensures that the webpage cache is filled in the
        quickest time possible by making many requests in
        parallel"""

        print "Getting data for parts from suppliers' websites"
        pool = ThreadPool(NUM_THREADS)

        for srcode, pg in self.iteritems():
            print srcode
            pool.add_task(pg.get_price)

        pool.wait_completion()
Ejemplo n.º 27
0
def parse_soundcloud_api2(title):
	'''
	Function connects to soundcloud.com and returns the .mp3 links in it.
	
	API method 2: Parsing player's json data.
	'''
	links = search_soundcloud(title)
	
	pool = ThreadPool(max_threads=5, catch_returns=True, logger=log)
	for link in links:
		pool(get_soundcloud_dl_link)(link)
	
	return pool.iter()
Ejemplo n.º 28
0
def parse_soundcloud_api2(title):
    '''
	Function connects to soundcloud.com and returns the .mp3 links in it.
	
	API method 2: Parsing player's json data.
	'''
    links = search_soundcloud(title)

    pool = ThreadPool(max_threads=5, catch_returns=True, logger=log)
    for link in links:
        pool(get_soundcloud_dl_link)(link)

    return pool.iter()
Ejemplo n.º 29
0
def download_images(prd_ser, base_url, cache_update, auth_key_papy, sum_page):
    pool = ThreadPool(poolsize)
    args_list = []
    for page in range(1, sum_page + 1):
        url = base_url % page + "?"
        if cache_update:
            url += "date=" + cache_update + '&'
        url += auth_key_papy + "&origin=s_dre-viewer.papy.co.jp"
        args_list.append(((url, page, prd_ser), None))

    requests = makeRequests(download_one_page, args_list)
    [pool.putRequest(i) for i in requests]
    pool.wait()
Ejemplo n.º 30
0
 def __init__(self):
     '''
     初始化
     self.url 根url
     self.deep 爬取深度
     self.db 数据库操作类
     self._thread 线程池
     '''
     logger.info('init control class')
     self.url = conf['url']
     self.deep = conf['deep']
     self.db = operate['db']
     self._thread = ThreadPool(conf['thread'], self.get_html)
Ejemplo n.º 31
0
class TaskPool(object):
    def __init__(self, limit, logger=None, **kwargs):
        self.limit = limit
        self.logger = logger or log.get_default_logger()
        self._pool = None

    def start(self):
        self._pool = ThreadPool(self.limit)

    def stop(self):
        self._pool.dismissWorkers(self.limit, do_join=True)

    def apply_async(self,
                    target,
                    args=None,
                    kwargs=None,
                    callbacks=None,
                    errbacks=None,
                    accept_callback=None,
                    **compat):
        args = args or []
        kwargs = kwargs or {}
        callbacks = callbacks or []
        errbacks = errbacks or []

        on_ready = curry(self.on_ready, callbacks, errbacks)

        self.logger.debug("ThreadPool: Apply %s (args:%s kwargs:%s)" %
                          (target, args, kwargs))

        req = WorkRequest(do_work,
                          (target, args, kwargs, on_ready, accept_callback))
        self._pool.putRequest(req)
        # threadpool also has callback support,
        # but for some reason the callback is not triggered
        # before you've collected the results.
        # Clear the results (if any), so it doesn't grow too large.
        self._pool._results_queue.queue.clear()
        return req

    def on_ready(self, callbacks, errbacks, ret_value):
        """What to do when a worker task is ready and its return value has
        been collected."""

        if isinstance(ret_value, ExceptionInfo):
            if isinstance(ret_value.exception,
                          (SystemExit, KeyboardInterrupt)):  # pragma: no cover
                raise ret_value.exception
            [errback(ret_value) for errback in errbacks]
        else:
            [callback(ret_value) for callback in callbacks]
Ejemplo n.º 32
0
def same_ms(product_id):
	data = {'product_id': product_id, 'address_id': '72858'}
	url = 'http://payment.ohsame.com/order_create'
	
	time_s = time.time()
	pool = ThreadPool(20)
	reqs = makeRequests(same_ms_req, [((url, data), {}) for i in range(200)], same_ms_callback)
	[pool.putRequest(req) for req in reqs]
	pool.wait()
	time_e = time.time()

	print('秒杀商品:%s\n' % str(product_id))
	print('秒杀结果:%s\n' % rs_ms)
	print('秒杀耗时:%s\n' % (time_e-time_s))
Ejemplo n.º 33
0
 def start(self):
     pool = ThreadPool(_thread_num)
     href_and_name = self.getProvince()  # len =31
     #demo: params_seq = [(['/0/2/0/0/', ['广东省']], None)]
     params_seq = [([i[0], [i[1]]], None) for i in href_and_name]
     # params_seq = [(['/0/2/0/0/', ['广东省']], None)]
     for group in PhoneBook.splitGroups(params_seq, 3):
         self.initItems()
         requests = makeRequests(self.process, group)
         [pool.putRequest(req) for req in requests]
         pool.wait()
         self.saveItems()  # 存储
         time.sleep(random.uniform(2, 5))
     PhoneBook.client.close()  # 关闭连接
Ejemplo n.º 34
0
def run_prod():
    cycle_count = 1
    main = ThreadPool(num_workers=PARSER_THREAD_COUNT)
    while True:
        ADMIN_LOGGER.info("Starting cycle : " + str(cycle_count))
        reload(P_ROOT)
        process_list = [[
            e, __import__(P_ROOT.__name__ + '.' + e + '.main', fromlist=e)
        ] for e in P_ROOT.__all__]
        process_dict = dict(process_list)
        ADMIN_LOGGER.info("Executing process list : " +
                          str(process_dict.items()))
        for proc_name in process_dict.keys():
            proc = getattr(process_dict.get(proc_name), 'Parser', 'None')
            main.putRequest(
                WorkRequest(proc_runner, args=(1, proc), callback=None))
            ADMIN_LOGGER.info("Started thread : " + proc_name)
            try:
                main.poll()
            except NoResultsPending:
                pass
            except:
                ADMIN_LOGGER.error(traceback.format_exc())
        main.wait()
        ADMIN_LOGGER.info("Sleeping for default LISTING_SLEEP_TIME : " +
                          str(GLOBAL_SLEEP_TIME))
        time.sleep(GLOBAL_SLEEP_TIME)
        cycle_count = 1 if cycle_count > 9999 else cycle_count + 1
Ejemplo n.º 35
0
def mutilpool(url):
    from threadpool import ThreadPool, makeRequests

    task_pool = ThreadPool(8)
    request_list = []  # 存放任务列表
    urls = []
    # 首先构造任务列表
    for device in range(20):
        urls.append(url)
        request_list.append(makeRequests(view, url))
    # map(task_pool.putRequest, request_list)
    requests = makeRequests(view, urls)
    [task_pool.putRequest(req) for req in requests]
    task_pool.wait()
Ejemplo n.º 36
0
def from_file(m163,option):
    """ download objects (songs, albums...) from an input file.  """

    urls = []
    with open(option.inFile) as f:
        urls = f.readlines() 

    global total, done, xiami_obj
    total = len(urls)
    print border
    LOG.info(msgTxt.fmt_links_in_file % total)
    print border
    pool = ThreadPool(config.THREAD_POOL_SIZE)
    for link in [u for u in urls if u]:
        link = link.rstrip('\n')
        #if it is a xiami link, init xiami object
        if re.match(pat_xm, link):
            __init_xiami_obj(option)
            pool.add_task(from_url_xm, xiami_obj,link, verbose=False)
        elif re.match(pat_163, link):
            pool.add_task(from_url_163, m163,link, verbose=False)
        else:
            LOG.warning(msgTxt.fmt_skip_unknown_url % link)

    pool.wait_completion()
def refresh_tunnels(args):
    tunnels = db.store.find(Tunnel)
    if tunnels:
        pool = ThreadPool(tunnels.count())
        for tunnel in tunnels:
            request = WorkRequest(tunnel.check_available)
            pool.putRequest(request)

        pool.wait()
        
    for tunnel in tunnels:
        host = db.store.get(Host, tunnel.hostid)
        record = AvailabilityRecord.register(host, tunnel, check=False)
        print record
Ejemplo n.º 38
0
def aggregate_all(client, iterator, connection_factory):
    """
    Aggregate all feeds returned by the generator.

    The generator should contain pairs of two elements (feed_url, categories)
    """

    def attach_connection(thread):
        thread.hbase = connection_factory()
        return thread

    pool = ThreadPool(10, thread_init=attach_connection)
    for feed, categs in iterator:
        pool.queueTask(lambda worker, p: aggregate(worker.hbase, *p), (feed, categs))
    pool.joinAll()
Ejemplo n.º 39
0
 def start_thread(self):
     args_list = []
     ips = self.parse_ip()
     for ip in ips:
         args = self.args.copy()
         args['ip'] = ip
         args_list.append(args)
     self.cui.w('Proxy Scanner started')
     self.cui.i('Nums: %s' % len(args_list))
     self.cui.i('Port: %s' % self.args['port'])
     self.cui.i('Thread: %s' % self.args['thread'])
     pool = ThreadPool(self.args['thread'])
     reqs = makeRequests(self.run, args_list)
     [pool.putRequest(req) for req in reqs]
     pool.wait()
Ejemplo n.º 40
0
def getSongsFromHTML(htmlcontent, save_path):
    global thread_count

    pool = ThreadPool(thread_count)

    matched_groups = re.findall("""W[LS]\("(\d+)",\s*"(\d+)",\s*"(.*?)\s+",""", htmlcontent)
    for matched in matched_groups:
        print "-" * 2, matched
        order = matched[0].strip()
        song_id = matched[1].strip()
        song_name = matched[2].strip()
        # getSong(song_id, order, save_path)
        pool.queueTask(getSongThread, (song_id, order, save_path))

    pool.joinAll()
Ejemplo n.º 41
0
def get_default_threadpool():
    global default_threadpool
    if default_threadpool is None:
        default_threadpool = ThreadPool(minthreads=20,
                                        maxthreads=100,
                                        queuesize=100)
    return default_threadpool
Ejemplo n.º 42
0
 def __init__(self,threads,output,limits):
     self.threads = threads #任务线程数
     self.output = output #图片保存目录
     self.limits = limits  #抓取图片数量限制
     self.tasks = 0 #已完成任务数
     self.stop = False #停止任务
     self.threadpool = ThreadPool(self.threads) #初始化线程池
Ejemplo n.º 43
0
    def __init__(self, url, domain, depth, threadNum):
        #当前正在执行的线程数
        self.currentRunning = 0
        #线程池管理线程与爬虫线程同步的条件变量,用于修改currentRunning时的加锁和唤醒管理线程
        self.processCondition = Condition()
        #待访问的URL的队列
        self.urlQueue = Queue()
        #已经访问的URL
        self.readUrls = []
        #URL host、path、param的键值对
        self.urls = {}
        #线程数
        self.threadNum = threadNum
        #设定了线程数的线程池
        #self.threadPool = ThreadPool(self.threadNum)
        self.pool = ThreadPool(self.threadNum)

        #初始化URL队列
        self.urlQueue.put({'url':url,"depth":1})
        #预期爬行深度
        self.depth = depth
        #当前爬行深度
        #self.currentDepth = 1
        #当前运行状态
        self.state = False
        #DOMAIN
        self.domain = domain
        #初始化数据库
        self.db = mongodb(self.domain)
        self.db.clean()
Ejemplo n.º 44
0
    def __init__(self,
                 driver=None,
                 database=None, user=None, password=None,
                 host='localhost',
                 ioloop=tornado.ioloop.IOLoop.instance(),
                 num_threads=10,
                 tx_connection_pool_size=5,
                 queue_timeout=1,
                 thread_idle_life=60*60):
        if not(driver):
            raise ValueError("Missing 'driver' argument")
        self._driver = driver
        self._database = database
        self._user = user
        self._password = password
        self._host = host
        self._threadpool = ThreadPool(
            per_thread_init_func=self.create_connection,
            per_thread_close_func=self.close_connection,
            num_threads=num_threads,
            queue_timeout=queue_timeout,
            thread_idle_life=thread_idle_life)
        self._ioloop = ioloop

        # Connection pool for transactions
        self._connection_pool = []
        for i in xrange(tx_connection_pool_size):
            conn = self.create_connection()
            self._connection_pool.append(conn)
        self._waiting_on_connection = deque()
Ejemplo n.º 45
0
    def configure(self, gconfig={}, **options):
        """
        Reconfigures the scheduler with the given options. Can only be done
        when the scheduler isn't running.
        """
        if self.running:
            raise SchedulerAlreadyRunningError

        # Set general options
        config = combine_opts(gconfig, 'apscheduler.', options)
        self.misfire_grace_time = int(config.pop('misfire_grace_time', 1))
        self.coalesce = asbool(config.pop('coalesce', True))
        self.daemonic = asbool(config.pop('daemonic', True))

        # Configure the thread pool
        if 'threadpool' in config:
            self._threadpool = maybe_ref(config['threadpool'])
        else:
            threadpool_opts = combine_opts(config, 'threadpool.')
            self._threadpool = ThreadPool(**threadpool_opts)

        # Configure job stores
        jobstore_opts = combine_opts(config, 'jobstore.')
        jobstores = {}
        for key, value in jobstore_opts.items():
            store_name, option = key.split('.', 1)
            opts_dict = jobstores.setdefault(store_name, {})
            opts_dict[option] = value

        for alias, opts in jobstores.items():
            classname = opts.pop('class')
            cls = maybe_ref(classname)
            jobstore = cls(**opts)
            self.add_jobstore(jobstore, alias, True)
Ejemplo n.º 46
0
    def __init__(self, path):
        self.input_data_path = path
        self.file_list = deque()
        self.getAllDataDir()
        self.post = True
        self.backup_tag_list = self.getAllBackUpTag(path)
        self.tag_info = defaultdict(lambda: {})

        self.check_true_file_list = []
        self.check_false_file_list = []
        self.false_check_reasion = []

        self.auto_module_ = loadTag('config/auto_module.json', '')
        self.config_ = loadTag('config/data_pipeline_config.json', '')
        self.end_point_30 = self.config_["end_point_30"]
        self.end_point_40 = self.config_["end_point_40"]
        self.end_point_21 = self.config_["end_point_21"]
        self.end_point = self.end_point_21
        self.check_file_name_list = self.config_["check_file"]

        self.headerdata = {"Data-tag-type": "application/json"}
        self.tag_module_list = loadTag(
            tag_file_name='config/tag_module.json'
        )  # special manual tagging, take over, dangerous driving etc
        self.tprofile_thresh = loadTag('config/tprofile_thresh.json', '')
        self.readShellFile('config/download_logs.sh')
        self.pool = ThreadPool(int(multiprocessing.cpu_count() * 0.6))
        self.auto_module_ = loadTag('config/auto_module.json', '')
        self.pred_eval_thresh = loadTag('config/pred_eval_thresh.json', '')
        self.case_tagging = TaggingMain(self.pool, self.config_,
                                        self.auto_module_,
                                        self.tag_module_list)
        self.case_toss = TossMain(self.config_, self.auto_module_,
                                  self.pred_eval_thresh)
Ejemplo n.º 47
0
def job():
    """
    Updater job for periodic repetitions.
    """
    print('[+] PUT UPDATER INTO THREADPOOL AT [%s]' % (now()))
    pool = ThreadPool(POOL_COUNT)
    pool.add_task(populate, None)
    pool.wait_completion()
    print('[+] COMPLETE POPULATE AT [%s]' % (now()))
    pool.add_task(update, None)
    pool.wait_completion()
    print('[+] COMPLETE UPDATE AT [%s]' % (now()))
    del pool
    cache.rpush(
        'incomingQueue',
        'StartUpdateVulnerabilityDataBase')
Ejemplo n.º 48
0
    def after_properties_set(self):
        """ Run by Spring Python after all the JMS container's properties have
        been set.
        """

        for idx in range(self.concurrent_listeners):
            # Create as many Circuits managers as there are JMS listeners.
            manager = Manager()
            manager.start()

            # A pool of handler threads for each listener.
            handlers_pool = ThreadPool(self.handlers_per_listener)

            # Each manager gets assigned its own listener.
            listener = WebSphereMQListener()

            # Assign the listener and a debugger component to the manager.
            manager += listener
            manager += Debugger(logger=self.logger)

            listener.factory = self.factory
            listener.destination = self.destination
            listener.handler = self.handler
            listener.handlers_pool = handlers_pool
            listener.wait_interval = self.wait_interval
            listener.start()
Ejemplo n.º 49
0
def start_download(songs, skipped_hists):
    """
    start multi-threading downloading songs. and generate a summary file
    songs: the list of songs need to be downloaded

    call the finish_hook function, pass skipped_hist
    """
    global total
    total = len(songs)
    LOG.debug('init thread pool (%d) for downloading'% config.THREAD_POOL_SIZE)
    pool = ThreadPool(config.THREAD_POOL_SIZE)
    downloader = Downloader(songs, pool)

    LOG.debug('Start downloading' )
    downloader.start()

    while done < total:
        time.sleep(1)
        print_progress()

    # handling lyrics downloading
    download_lyrics(songs)

    print log.hl(msg.fmt_insert_hist, 'warning')
    hist_handler.insert_hist(songs)
    print log.hl(msg.fmt_all_finished, 'warning')
    #call finish hook
    finish_summary(skipped_hists)
Ejemplo n.º 50
0
        def __init__(self,t=180,count=8):
		
                
            
            self.time = float(t)
            self.count = int(count)
            
            self.ThreadPool = ThreadPool(self.count)
Ejemplo n.º 51
0
class TaskPool(object):

    def __init__(self, limit, logger=None, **kwargs):
        self.limit = limit
        self.logger = logger or log.get_default_logger()
        self._pool = None

    def start(self):
        self._pool = ThreadPool(self.limit)

    def stop(self):
        self._pool.dismissWorkers(self.limit, do_join=True)

    def apply_async(self, target, args=None, kwargs=None, callbacks=None,
            errbacks=None, accept_callback=None, **compat):
        args = args or []
        kwargs = kwargs or {}
        callbacks = callbacks or []
        errbacks = errbacks or []

        on_ready = partial(self.on_ready, callbacks, errbacks)

        self.logger.debug("ThreadPool: Apply %s (args:%s kwargs:%s)" % (
            target, args, kwargs))

        req = WorkRequest(do_work, (target, args, kwargs, on_ready,
                                    accept_callback))
        self._pool.putRequest(req)
        # threadpool also has callback support,
        # but for some reason the callback is not triggered
        # before you've collected the results.
        # Clear the results (if any), so it doesn't grow too large.
        self._pool._results_queue.queue.clear()
        return req

    def on_ready(self, callbacks, errbacks, ret_value):
        """What to do when a worker task is ready and its return value has
        been collected."""

        if isinstance(ret_value, ExceptionInfo):
            if isinstance(ret_value.exception, (
                    SystemExit, KeyboardInterrupt)):    # pragma: no cover
                raise ret_value.exception
            [errback(ret_value) for errback in errbacks]
        else:
            [callback(ret_value) for callback in callbacks]
Ejemplo n.º 52
0
def from_file(xm_obj, infile):
    """ download objects (songs, albums...) from an input file.  """

    urls = []
    with open(infile) as f:
        urls = f.readlines() 

    global total, done
    total = len(urls)
    print border
    LOG.info(u' 文件包含链接总数: %d' % total)
    print border
    pool = ThreadPool(config.THREAD_POOL_SIZE)
    for link in [u for u in urls if u]:
        pool.add_task(from_url, xm_obj,link.rstrip('\n'), verbose=False)

    pool.wait_completion()
Ejemplo n.º 53
0
    def startup(self):
        ok = self.load_plugins()
        if not ok:
            sys.stderr.write(
                "Some plugins failed to load, please check the logs. Aborting.\n"
            )
            self.logger.info('postomaat shut down after fatal error condition')
            sys.exit(1)

        self.logger.info("Init Threadpool")
        try:
            minthreads = self.config.getint('performance', 'minthreads')
            maxthreads = self.config.getint('performance', 'maxthreads')
        except ConfigParser.NoSectionError:
            self.logger.warning(
                'Performance section not configured, using default thread numbers'
            )
            minthreads = 1
            maxthreads = 3

        queuesize = maxthreads * 10
        self.threadpool = ThreadPool(minthreads, maxthreads, queuesize)

        self.logger.info("Init policyd Engine")

        ports = self.config.get('main', 'incomingport')
        for portconfig in ports.split():
            #plugins
            plugins = self.plugins
            if ':' in portconfig:
                port, pluginlist = portconfig.split(':')
                port = int(port.strip())
                plugins, ok = self._load_all(pluginlist)
                if not ok:
                    self.logger.error(
                        "Could not startup engine on port %s, some plugins failed to load"
                        % port)
                    continue
            else:
                port = int(portconfig.strip())

            server = PolicyServer(self,
                                  port=port,
                                  address=self.config.get(
                                      'main', 'bindaddress'),
                                  plugins=plugins)

            thread.start_new_thread(server.serve, ())
            self.servers.append(server)
        self.logger.info('Startup complete')
        if self.debugconsole:
            self.run_debugconsole()
        else:
            while self.stayalive:
                try:
                    time.sleep(10)
                except KeyboardInterrupt:
                    self.shutdown()
Ejemplo n.º 54
0
class Spider(object):

    def __init__(self,seed,depth,pool_size=10):
        
        self.seed = seed
        self.depth = depth
        self.all_url_list = [seed]
        self.finished_url_list = []
        self.failure_url_list = []
        self.pool = ThreadPool(pool_size)

    def crawl(self):
        base_deep_size = 0
        while base_deep_size <= self.depth:
            for url in self.all_url_list:
                if url not in self.finished_url_list:
                    self.pool.add_task(self.download,url)
            self.pool.close()
            self.depth-=1

    def download(self,url):
        try:
            data = urllib2.urlopen(url)
            page = data.read()
            self.finished_url_list.append(url)
            links = self.get_urls(page)
            return page,links
        except Exception as e:
            print 'open url:%s raise exception(%s)'%(url,e)
            return None

    def get_urls(self,page):
        soup = BeautifulSoup(page,fromEncoding="gb18030")
        if soup.title:
            print soup.title.string
        links = []
        for item in soup.findAll('a'):
            link=item.get('href')
            if link and link.startswith('http://') and link not in self.finished_url_list:
                links.append(link)
        print links
        return links

    def get_next_url(self):
        pass