Esempio n. 1
0
def main():
    pool = Pool(multiprocessing.cpu_count())
    for url in urls:
        pool.apply(Request, (url, ))
    # pool.map(Request, urls)
    pool.close()
    pool.join()
Esempio n. 2
0
 def RunClientCode(self):
     pool = Pool(1)
     try:
         pool.apply(self._client_app.payload)
         self._base.NextPage()
         self.ShowGoodFinishedDialog()
     except:
         self.ShowBadFinishedDialog(traceback.format_exc())
Esempio n. 3
0
 def RunClientCode(self):
   _pool = Pool(1)
   try:
     _pool.apply(self._client_app.payload)
     self._base.NextPage()
     self.ShowGoodFinishedDialog()
   except:
     self.ShowBadFinishedDialog(traceback.format_exc())
Esempio n. 4
0
def apply():
    pool = ThreadPool(3)
    for i in range(3):
        pool.apply(work, (3, ))
    print('close')
    pool.close()
    print('join')
    pool.join()
def get_pictures_by_request_multiprocessing(host, url_dict, dir):
    if not os.path.exists(dir):
        os.makedirs(dir)
    # pool = Pool(multiprocessing.cpu_count())
    pool = Pool(1024)
    for key in url_dict:
        s_t = time.time()
        pool.apply(get_pic, (key, ))
        e_t = time.time()
        print("Time cost: " + str(e_t - s_t))
    def test_DirectoryWatcherFileCountChanged(self):
        self.watcher.addEventObserver(
            self.watcher.IncomingFileCountChangedEvent, lambda event, caller,
            callData: setattr(self, "fileCountChangedEmitted", True))
        mrHead = self.sampleDataLogic.sourceForSampleName('MRHead')
        self.sampleDataLogic.downloadFile(mrHead.uris[0], self.tempDir,
                                          mrHead.fileNames[0])
        mrHeadPath = os.path.join(self.tempDir, mrHead.fileNames[0])
        shutil.copy(mrHeadPath,
                    os.path.join(self.watchedDirectory, mrHead.fileNames[0]))

        def checkTimerExecuted():
            while not self.fileCountChangedEmitted:
                print "timer not done"
                time.sleep(1)
            print "timer done"

        pool = ThreadPool(4)
        results = pool.apply(checkTimerExecuted)
        pool.close()
        pool.join()
        #
        # t = Thread(target=checkTimerExecuted)
        # t.start()
        # t.run()

        self.assertTrue(self.fileCountChangedEmitted)
        self.watcher.removeEventObservers()
Esempio n. 7
0
def target_process(target_path):
    process_result = []
    thread_pool = Pool(cpu_count() * 10)
    for target in target_path:
        process_result.append(thread_pool.apply(exp_string, args=(target, )))
    thread_pool.close()
    thread_pool.join()
    return process_result
Esempio n. 8
0
    def run(self):
        # 使agent每次启动都会上传网络设备基本信息 而后4小时上传一次
        if self.net_equip_details:
            for value in self.net_equip_details.values():
                target = value.get('target')
                if target and target != "unknown":
                    target.last_time_basis_info_collected = 0

        ping_timeout = int(self.config['ping'][0]['timeout'])
        ping_retries = int(self.config['ping'][0]['retries'])
        '''
        网段扫描
        获取并保存网络设备节点信息 上传网络设备在线状态
        因为这个指标获取速度很快 不需要多线程处理
        实例化每一个网络设备的Target实例并放置于targets集合中
        '''
        network_scan = Thread(target=self.net_segment_scan,
                              args=(ping_timeout, ping_retries))
        network_scan.start()
        '''
        从target_queue队列取target
        根据采集周期往task_queue队列中放置采集任务
        '''
        task_generation = Thread(target=self.collect_task_producer)
        task_generation.start()
        '''
        从task_queue获取task
        线程池对多任务进行并发处理
        '''
        pool = ThreadPool(self.pool_size)
        while self.continue_running:
            task = self.task_queue.get()
            try:
                pool.apply(task[1][0], (task[1][1], ))
            except Exception as e:
                log.warn('Task {0} error {1}'.format(task, e))
            '''将过去一段时间(RECORD_INTERVAL)
            所发送的各指标数目记录到日志中'''
            self.metrics_amount_record()

        pool.close()
        pool.join()
Esempio n. 9
0
    def __init__(self, session, database):
        print(colored('        processing...', 'green'))

        # load processing settings from yaml file
        self.settings = load_yaml(processing_options['cfg'])

        self.session = session
        self.database = database

        # Process tracking data
        for data_name, tracking_data in sorted(list(self.session.Tracking.items())):
            try:
                if data_name == 'Exploration' or data_name == 'Whole Session':
                    print(colored('          processing currently only supports processing of trial data, not {}'.format(
                        data_name), 'yellow'))
                    continue

                print(colored('        Trial {}'.format(data_name), 'green'))
                self.tracking_data = tracking_data

                # Save info about processing options in the metadata
                self.define_processing_metadata()
                # Apply processes in parallel
                # TODO use decorator to make sure that functions are automatically added to the list, avoid bugs
                funcs = [self.extract_bodylength, self.extract_velocity, self.extract_location_relative_shelter,
                          self.extract_orientation]
                pool = ThreadPool(len(funcs))
                [pool.apply(func) for func in funcs]

                # Other processing steps will not be done in parallel
                self.extract_ang_velocity()
                # PoseReconstructor(self.tracking_data.dlc_tracking['Posture'])  # WORK IN PROGRESS, buggy

            except:
                warnings.warn('Could not analyse this trial!!!')  # but avoid crashing the whole analysis
                print(colored('          trial {} could not be processed!'.format(data_name), 'yellow'))
                slack_chat_messenger('Could not process trial {}'.format(data_name))

        # Call experiment specific processing tools [only implemented for maze experiments]
        if self.settings['apply exp-specific']:
            ProcessingTrialsMaze(self.session, debugging=self.settings['debug exp-specific'])
            ProcessingSessionMaze(self.session)

        else:
            from warnings import warn
            warn('Experiment type {} is not supported yet'.format(exp_type))
Esempio n. 10
0
# list of pmid ids
lst = ['9749847','9755759','9757885','9766300','9769004'],'9784814','9800512','9804224','9877391','9879865','9888139','9918973','9919913']
lst = ['9749847']

lst =  [['9749847','9755759'],['9757885','9766300','9769004'],['9784814','9800512','9804224','9877391'],['9879865','9888139','9918973','9919913']]

lst = ['9749847','9755759','9757885','9766300','9769004','9784814','9800512','9804224','9877391','9879865','9888139','9918973','9919913']

# define function
def list_records(item):
    record = Entrez.read(Entrez.elink(dbfrom="pubmed", db="pmc", LinkName="pubmed_pmc_refs",from_uid=item))
    #print(item)
    return(record)

pool = mp.Pool(processes=4)
results = [pool.apply(list_records, args=(lst, ))]
print(results)
output = [p.get() for p in results]
print(output)


# define a example function
def rand_string(length, output):
    """ Generates a random string of numbers, lower- and uppercase chars. """
    rand_str = ''.join(random.choice(
                    string.ascii_lowercase
                    + string.ascii_uppercase
                    + string.digits)
               for i in range(length))
    output.put(rand_str)
Esempio n. 11
0
# apply_async
print('\n------apply_async-------')
async_pool = ThreadPool(processes=4)
results = []
for i in range(5):
    msg = 'msg: %d' % i
    result = async_pool.apply_async(fun, (msg, ))
    results.append(result)

print('apply_async: 不堵塞')
# async_pool.close()
# async_pool.join()
for i in results:
    i.wait()  # 等待线程函数执行完毕

for i in results:
    if i.ready():  # 线程函数是否已经启动了
        if i.successful():  # 线程函数是否执行成功
            print(i.get())  # 线程函数返回值

# apply
print('\n------apply-------')
pool = ThreadPool(processes=4)
results = []
for i in range(5):
    msg = 'msg: %d' % i
    result = pool.apply(fun, (msg, ))
    results.append(result)

print('apply: 堵塞')
print(results)
Esempio n. 12
0
	


pre_url="https://zh.wikipedia.org"
test_url="https://zh.wikipedia.org/wiki/Category:日本的大學教師"
test2_url="https://zh.wikipedia.org/wiki/Category:各國政治人物"
start_url="https://zh.wikipedia.org/wiki/Category:按國籍分類"

wiki_of =open('process.txt','w+')

urlqueue=Queue.LifoQueue()
start_time=time.time()

start_time=time.time()
root=category(start_url)

while not urlqueue.empty():
 	pool.apply(new_category, (urlqueue.get(),))
 	sleep(delay)

pool.close()
pool.join()
print time.time()-start_time
print "All subprocesses done."
wiki_of.close()

# def get_info(url):
# 	r = requests.get(url)
# 	print r.text
# 	print r.encoding 
Esempio n. 13
0
class ParallelDownloader(URL_Fetcher):
    'Parallel threaded web page downloader'

    def __init__(self,
                 db_name,
                 proc_count,
                 site_base_url,
                 fUseCache=True,
                 fCacheSearchPages=True,
                 fUseCookies=False,
                 timeout=secHTTP_WAIT_TIMEOUT,
                 search_proc_count=2,
                 proxies=None):

        self.proxies = proxies
        self.queue = Queue()
        self.fSaveSearchPages = fCacheSearchPages
        self.site_base_url = site_base_url
        self.pool = Pool(processes=proc_count)

        self.search_queue = Queue()
        self.url_extract_pool = Pool(processes=search_proc_count)

        URL_Fetcher.__init__(self,
                             db_name,
                             fUseCache,
                             fUseCookies,
                             timeout=timeout,
                             proxies=proxies)

    def process_urls_from_search_queue(self):
        while not self.search_queue.empty():
            search_page_url = self.search_queue.get()
            #			logOut('search pages queue size: %d'%self.search_queue.qsize())
            logDbg('search page: %s' % search_page_url)

            search_page = self.get_page(search_page_url,
                                        fUseCache=self.fSaveSearchPages)
            rel_urls = extract_data_xpath(search_page, self.url_extract_xpath)
            #rel_urls = self.extract_page_xpath(self.url_extract_xpath, search_page_url)
            #			logOut('URLs from %s extracted'%search_page_url)

            logOut('%d urls extracted from [%s]. Queuing...' %
                   (len(rel_urls), search_page_url))
            logDbg('Extracted urls: %s. Queuing to download...' % rel_urls)
            list(map(self.queue.put, self.prefix_site_base_url(rel_urls)))

        self.queue.put(None)
        self.postprocess_search_page_list(rel_urls, search_page)

    def queue_pages(self, url_list):
        list(map(self.queue.put, url_list))

        # признак завершения очереди заданий
        self.queue.put(None)

    def postprocess_search_page_list(self, url, page):
        pass

    def prefix_site_base_url(self, rel_urls):
        return [self.site_base_url + url for url in rel_urls]

    def process_pages(self, page_processor, *add_processor_args):
        self.page_processor = page_processor
        self.add_pprocessor_args = add_processor_args
        self.pool.apply(self.process_page)

    def process_page(self):
        while True:
            url = self.queue.get()
            logDbg('Url got from queue: %s' % url)
            if not url:
                break

            page = self.get_page(url)  #, proxies=self.proxies

            #logOut('pp_arg_list: [%s]'%pp_arg_list)
            if page:
                self.page_processor(url, page, *self.add_pprocessor_args)
Esempio n. 14
0
        prob = [i[1] for i in prob]
        prob = np.array(prob,dtype='float32')
        probs.append(prob)
    #print probs
    prob_s = np.array(probs)
    prob_s = sum(prob_s)/np.sum(probs)
    prob_s = [float(i) for i in prob_s]
    return dict(zip(['0','1'],prob_s))
    
def worker(i):
    print i['stream']['id']
    print i['stream']['timestamp']
    probs = ftpredict(i['stream']['tweet'])
    print probs
    db.stream.find_one_and_update({'_id': i['_id'],'probs':None}, { '$set':{'probs':probs)}})
    print 'done!'

    
if __name__ == '__main__':
    while True:
        pool = Pool(1)
        latest = db.time.find_one({'name':'stream_score_entropy'})['time']
        latest_stream = db.stream.find_one({'stream.timestamp':{'$gt':latest}},sort=[('stream.timestamp',pymongo.DESCENDING)])
        if latest_stream != None:
            db.time.find_one_and_update({'name':'stream_score_entropy'},{'$set':{'time':latest_stream['stream']['timestamp']}})
            [pool.apply(worker,(i,)) for i in db.stream.find({'stream.timestamp':{'$gt':latest}},sort=[('stream.timestamp',pymongo.DESCENDING)])]
            pool.close()
            pool.join()
            time.sleep(60*60)
        else:
            time.sleep(5*60)
Esempio n. 15
0
class RedisServer:
    def __init__(self, selector, sock, host='127.0.0.1', port=8880):
        self.datas = {
            'ZSET': ZSetStore(),
            'STR': StrStore(),
            'SET': SetStore(),
            'HASH': HashStore(),
            'LIST': ListStore()
        }
        self.host = host
        self.port = port
        self.selector = selector
        self.sock = sock
        self.commands_map = {}
        self.pool = ThreadPool(processes=4)
        self.lock = Lock()

    def load(self):
        if os.path.exists('redis.db'):
            with open('redis.db', 'rb') as f:
                datas = pickle.load(f)
            for k in self.datas:
                self.datas[k].load(datas[k])
        else:
            self.dump()

    def dump(self):
        with self.lock:
            datas = {}
            for k in self.datas:
                datas[k] = self.datas[k].dump()
            with open('redis.db', 'wb') as f:
                pickle.dump(datas, f)

    def run(self):
        self.register_commands()
        self.load()
        self.process_request()

    def register_commands(self):
        for k in self.datas:
            command_map = self.datas[k].register_command()
            self.commands_map.update(command_map)

    def execute_command(self, command):
        commands = command.split('\r\n')
        rows = int(commands[0][1])
        method = commands[2].upper()

        if rows == 2:
            method, key = method, commands[4]
            logger.info("execute %s", ' '.join([method, key]))
            try:
                message = self.commands_map[method](key)
            except Exception:
                logger.error("execute %s", ' '.join([method, key]))
                return 'Error'
            return message
        elif rows == 3:
            method, key, value = method, commands[4], commands[6]
            logger.info("execute %s", ' '.join([method, key, value]))
            try:
                message = self.commands_map[method](key, value)
                if message == None:
                    message = 'OK'
            except Exception:
                logger.error("execute %s", ' '.join([method, key, value]))
                return 'Error'
            return message
        elif rows == 4:
            method, key, value, value2 = method, commands[4], commands[
                6], commands[8]
            logger.info("execute %s", ' '.join([method, key, value, value2]))
            try:
                message = self.commands_map[method](key, value, value2)
                if message == None:
                    message = 'OK'
            except Exception:
                logger.error("execute %s",
                             ' '.join([method, key, value, value2]))
                return 'Error'
            return message
        else:
            logger.error("execute %s", ''.join(commands))
            return 'Error'

    def process_request(self):
        logger.info("listen  to %s:%s" % (self.host, self.port))
        self.sock.bind((self.host, self.port))
        self.sock.listen(1000)
        self.sock.setblocking(False)
        self.selector.register(self.sock, selectors.EVENT_READ, self.accept)
        while True:
            events = self.selector.select()
            for key, mask in events:
                callback = key.data
                callback(key.fileobj, mask)

    def accept(self, sock, mask):
        conn, addr = sock.accept()
        logger.info("accepted conn from %s", addr)
        conn.setblocking(False)
        self.selector.register(conn, selectors.EVENT_READ, self.read)

    def read(self, conn, mask):
        data = conn.recv(1024)
        command = str(data, encoding="utf8")
        if command != 'exit':
            message = self.pool.apply(self.execute_command, (command, ))
            self.dump()
            conn.send(message.encode('utf8'))
        elif command == 'exit':
            print('closing', conn)
            self.selector.unregister(conn)
            conn.close()
Esempio n. 16
0
# -*- coding: utf-8 -*-
# @Time    : 2020/9/21 10:57
# @Author  : Fcvane
# @Param   :
# @File    : tmp_1.py

from multiprocessing.dummy import Pool
import time
import os


def talk(msg):
    print('msg:', msg)
    time.sleep(3)
    print('end')


if __name__ == "__main__":
    print('开始执行程序:')
    start_time = time.time()
    pool = Pool(3)
    print('开始执行三个子进程')
    for i in range(6):
        pool.apply(talk, [i])
    print('id号:%s主进程结束 总耗时:%s' % (os.getpid(), time.time() - start_time))
Esempio n. 17
0
def parse_link(response):
    print(response.url)
    content = response.content
    selector = etree.HTML(content)
    urls = selector.xpath('//img/@src')
    urls = [url for url in urls if 'sinaimg' in url]
    if urls:
        for url in urls:
            URI.put_nowait((url, 'img'))


def images(response):
    print(response.url)
    content = response.content
    try:
        with open(response.url.split(r'/')[-1], 'wb') as img:
            img.write(content)
    except Exception as e:
        print(e)


if __name__ == '__main__':
    pool = Pool(10)
    for num in range(1, 98):
        URI.put_nowait((URL.format(num), 'first'))
    pool.apply(main)
    pool.close()
    pool.join()
    print(time.time() - start)
Esempio n. 18
0
    def save(self,
             suffix,
             services=None,
             only_tag=False,
             only_push=False,
             no_interaction=False,
             text=False):

        if only_tag and only_push:
            print 'please note that. Only one of [--only-tag|--only-push] can be use.'
            return
        _show = self._probe(services, merge=False)
        table_data = []
        if not _show:
            return

        longest_image = max([len(v['image']) for v in _show])
        longest_service = max([len(v['service']) for v in _show])
        longest_imageId = max([len(v['Id']) for v in _show])
        longest_match = max([len(v['Match']) for v in _show])

        title = '\n  {image:<{longest_image}} | {service:<{longest_service}} | {imageid:<{longest_imageId}} | {match:<{longest_match}}' \
                '\n  {ind:-<{wedth}}'.format(
            image='Image', service='Service', imageid='Image-Id', match='Match',
            longest_image=longest_image, longest_service=longest_service,
            longest_imageId=longest_imageId,
            longest_match=longest_match,
            ind='-',
            wedth=longest_image + longest_service + longest_imageId + longest_match + 9)

        for v in _show:
            table_data.append(
                '{image:<{longest_image}} | {service:<{longest_service}} | {imageid:<{longest_imageId}} | {match:<{longest_match}}'
                .format(image=v['image'],
                        service=v['service'],
                        imageid=v['Id'],
                        match=v['Match'],
                        longest_image=longest_image,
                        longest_service=longest_service,
                        longest_imageId=longest_imageId,
                        longest_match=longest_match))

        selected_service = []
        if no_interaction:
            selected_service.extend(_show)
        else:
            try:
                selected = pick(
                    table_data,
                    'Please choose your images for save (press SPACE to mark, ENTER to continue, Ctrl+C to exit): '
                    + title,
                    indicator='*',
                    multi_select=True,
                    min_selection_count=0)
            except KeyboardInterrupt:
                return
            if selected:
                for s in selected:
                    v = _show[s[1]]
                    selected_service.append(v)
            else:
                print 'Select 0 image.'
                return
        # confirm_input(msg='Select these images.')

        print 'List:'
        _skip = False
        not_ready = []

        for s in selected_service:
            _action = 'skip'
            if s['Id'] == '':
                _action += '(not exist)'
                _skip = True
            elif s['Match'] != '':
                _action += '(not match)'
                _skip = True
            else:
                _action = 'do'

            s['Action'] = _action
            if _action == 'do':
                _action = Color('{autogreen}%s{/autogreen}' % (_action))
            else:
                _action = Color('{autored}%s{/autored}' % (_action))
                not_ready.append(s)
            print '{action:<25} {image_old:<{longest_image}} => {image_new:<{longest_image}}'.format(
                action=_action,
                longest_image=longest_image,
                image_old=s['image'],
                image_new=s['image'] + suffix)
        if only_tag:
            _msg = 'Tag these images.'
        elif only_push:
            _msg = 'Push these images.'
        else:
            _msg = 'Tag and Push these images.'
        if _skip:
            if no_interaction:
                _msg += Color(
                    '\n{autored}These service`s image is not ready, please fix it first.{/autored}'
                )

                print _msg
                table_data = []
                table_instance = SingleTable(table_data, 'Not Ready')
                table_data.append(
                    ['Image', 'Service', 'Image-Id', 'Created', 'Labels'])

                for s in not_ready:
                    table_data.append([
                        s['image'] + '\n' + s['Match'], s['service'], s['Id'],
                        s['Created'], s['Labels']
                    ])
                table_instance.inner_heading_row_border = False
                table_instance.inner_row_border = True
                print table_instance.table
                sys.exit(-1)
            else:
                _msg += Color('\n{autored}Some service`s image is not ready. \n' \
                              'You can use k2-compose pull/up to fix it, otherwise these images will be skipped.{/autored}\n')
                confirm_input(msg=_msg)

        if not only_push:
            pool = ThreadPool(len(selected_service))
            for s in selected_service:
                if s['Action'] == 'do':
                    container = self.get_container_instance_by_service_name(
                        s['service'])
                    pool.apply(container.tag, (suffix, ))
            pool.close()
            pool.join()

        if only_tag:
            return

        _result = []
        pool = ThreadPool(len(selected_service))
        print 'Pushing...'
        for s in selected_service:
            if s['Action'] == 'do':
                container = self.get_container_instance_by_service_name(
                    s['service'])
                _result.append(pool.apply_async(container.push, (suffix, )))
        pool.close()
        pool.join()
        for r in _result:
            print r.get()
        print Color('{autogreen}Push all done.{/autogreen}\n')
        if text:
            print "#".join(
                ['Image', 'Service', 'Image-Id', 'Created', 'Labels'])
            for s in selected_service:
                print "#".join((s['image'], s['service'], s['Id'],
                                s['Created'], s['Labels'].replace('\n', ' ')))
        else:
            table_data = []
            table_instance = SingleTable(table_data, 'Done')
            table_data.append(
                ['Image', 'Service', 'Image-Id', 'Created', 'Labels'])

            for s in selected_service:
                table_data.append([
                    s['image'], s['service'], s['Id'], s['Created'],
                    s['Labels']
                ])
            table_instance.inner_heading_row_border = False
            table_instance.inner_row_border = True
            print table_instance.table
        return
Esempio n. 19
0
from multiprocessing.dummy import Pool as ThreadPool

import requests

URL_TO_TEST = "http://127.0.0.1:8009"


def hammer_it():
    with open("test_http/index.html.zip", "rb") as f:
        files = {"file": f}
        res = requests.post(URL_TO_TEST, files=files)
        print(res.status_code)


pool = ThreadPool(8)

for _ in range(1, 10):
    pool.apply(hammer_it)
Esempio n. 20
0
class ThreadPool:
    # multiprocessing.dummy.Pool with exc_info in error_callback
    def __init__(self,name=None,processes=None):

        self._processes=processes
        self._pool=NamedPool(self._processes,name=name)
        self._lock=Lock() # lock for self
        self._cblock=Lock() # lock for callback
        self._errcblock=Lock() # lock for error_callback
        self._closed=False

        self.name=name

    def apply(self,*args,**kwargs):
        return self._pool.apply(*args,**kwargs)

    def map(self,*args,**kwargs):
        return self._pool.map(*args,**kwargs)

    def map_async(self,*args,**kwargs):
        return self._pool.map_async(*args,**kwargs)

    def imap(self,*args,**kwargs):
        return self._pool.imap(*args,**kwargs)

    def imap_unordered(self,*args,**kwargs):
        return self._pool.imap_unordered(*args,**kwargs)

    def starmap(self,*args,**kwargs):
        return self._pool.starmap(*args,**kwargs)

    def starmap_async(self,*args,**kwargs):
        return self._pool.starmap_async(*args,**kwargs)

    def join(self):
        return self._pool.join()

    def _uiter(self,iterable):
        buf=[]
        for item in iterable:
            if item in buf:
                continue
            yield item
            buf.append(item)
        buf.clear()

    def _trycall(self,func,args=(),kwargs={},lock=None):
        if not callable(func):
            return
        with lock:
            try:
                return func(*args,**kwargs)
            except:
                pass

    def _caller(self,func,args,kwargs,callback,error_callback,exc_raise):
        try:
            result=func(*args,**kwargs)
        except:
            etype,value,tb=sys.exc_info()
            self._trycall(error_callback,args=(self.name,etype,value,tb),
                          lock=self._errcblock)
            if exc_raise:
                raise etype(value)
        else:
            self._trycall(callback,args=(result,),
                          lock=self._cblock)
            return result

    def apply_async(self,func,args=(),kwargs={},
                    callback=None,error_callback=None):
        # run error_callback with ThreadPool.name and exc_info if func failed,
        # callback and error_callback will *not* run in multi thread.
        # other arguments is same as Pool.apply_async
        return self._pool.apply_async(
            self._caller,(func,args,kwargs,None,error_callback,True),
            callback=callback)

    def cbmap(self,func,iterable,callback=None,error_callback=None):
        # shortcut of:
        #
        # for item in iterable:
        #     apply_async(func,args=(item,),kwargs={},
        #                 callback=callback,error_callback=error_callback)
        #
        # always return None
        for item in iterable:
            self.apply_async(func,args=(item,),
                             callback=callback,error_callback=error_callback)

    def ucbmap(self,func,iterable,callback=None,error_callback=None):
        # unique version of ThreadPool.cbmap
        return self.cbmap(func,self._uiter(iterable),callback,error_callback)

    def umap(self,func,iterable,chunksize=None):
        # unique version of ThreadPool.map
        return self.map(func,self._uiter(iterable),chunksize=chunksize)

    def umap_async(self,func,iterable,chunksize=None,
                   callback=None,error_callback=None):
        # unique version of ThreadPool.map_async
        return self.map_async(
            func,self._uiter(iterable),chunksize,
            callback,error_callback)

    def uimap(self,func,iterable,chunksize=None):
        # unique version of ThreadPool.imap
        return self.imap(func,self._uiter(iterable),chunksize)

    def uimap_unordered(self,func,iterable,chunksize=None):
        # unique version of ThreadPool.imap_unordered
        return self.imap_unordered(func,self._uiter(iterable),chunksize)

    def ustarmap(self,func,iterable,chunksize=None):
        # unique version of ThreadPool.starmap
        return self.starmap(func,self._uiter(iterable),chunksize)

    def ustarmap_async(self,func,iterable,chunksize=None,
                       callback=None,error_callback=None):
        # unique version of ThreadPool.starmap_async
        return self.starmap_async(
            func,self._uiter(iterable),chunksize,
            callback,error_callback)

    def close(self):
        # same as Pool.close
        self._closed=True
        return self._pool.close()

    def terminate(self):
        # same as Pool.terminate
        self._closed=True
        return self._pool.terminate()

    def renew(self):
        # terminate all process and start a new clean pool
        with self._lock:
            self.terminate()
            self._pool=Pool(self._processes)
            self._closed=False

    @property
    def closed(self):
        # True if ThreadPool closed
        return self._closed

    def __enter__(self):
        return self

    def __exit__(self,etype,value,tb):
        self.terminate()
Esempio n. 21
0
class Spider(Counter, HTML, File):
    '''This class is used to crawl web page'''
    allow_crawl = True

    def __init__(self):
        HTML.__init__(self)
        Counter.__init__(self)
        File.__init__(self)
        self.pool = None
        self.p = None
        self.last_links = list()
        self.crawled = set()
        self.dir_name = None

    # Create a directory of start crawling point
    def create_dir_name(self, url):
        url = url.replace('//', '_')
        url = url.replace('/', '_')
        url = url.replace('.', '_')
        url = url.replace(':', '_')
        return 'data/' + url

    # Get and set environment for crawling
    def start_crawl(self):
        last_file = None
        url = ui.lineEdit.text()
        self.dir_name = self.create_dir_name(url)
        self._create_directory(self.dir_name)
        if os.path.exists(self.dir_name):
            last_file = self.get_last_file(self.dir_name)

        if url != '':
            if last_file is not None:
                file = self.load_file(last_file)

                self.rank = file['rank']
                self.http_error = file['http_error']
                self.url_error = file['url_error']
                self.last_links = file['last_links']

                self.crawled = file['crawled']

            Spider.allow_crawl = True

            if len(self.last_links) != 0:
                self.p = Thread(target=self.crawl, args=(self.last_links, ))
                self.p.start()
            else:
                self.p = Thread(target=self.crawl, args=(url, ))
                self.p.start()

    # Stop crawling
    def stop_crawl(self):
        Spider.allow_crawl = False
        self.pool.terminate()
        self.pool.join()
        self.p.join(timeout=5)
        data = {
            'rank': self.rank,
            'http_error': self.http_error,
            'url_error': self.url_error,
            'last_links': self.last_links,
            'crawled': self.crawled
        }
        file_path = self.dir_name + '/' + self.create_file_name()

        self.save_file(file_path, data)

    # Process of each crawling
    def process(self, url):
        if url not in self.crawled:
            error, html = self.get_html(url)
            if html is not None:
                if type(html) is str:
                    if ui.bot_list_widget.count() > ui.item_limit:
                        pool = Pool(1)
                        pool.apply_async(ui.delete_list_widget,
                                         args=(ui.bot_list_widget, ))

                    text = 'Retrieved {}.'.format(url)
                    text_item = QtWidgets.QListWidgetItem(text)
                    ui.bot_list_widget.addItem(text_item)
                    plain_text = self.clean_html_tag(html)
                    self.count(plain_text, url)
                    return [url, self.get_link(html)]

            else:
                if ui.bot_list_widget.count() > ui.item_limit:
                    pool = Pool(1)
                    pool.apply_async(ui.delete_list_widget,
                                     args=(ui.bot_list_widget, ))

                text = '{}'.format(error)
                text_item = QtWidgets.QListWidgetItem(text)
                ui.bot_list_widget.addItem(text_item)

    # Second crawl step
    def _continue_crawl(self, url):
        url = list(filter(None, url))
        links = list()
        file_path = self.dir_name + '/' + self.create_file_name()
        for each_link in self.pool.imap_unordered(self.process, url):
            if each_link is not None:
                if each_link[1] is not None:
                    links.extend(each_link[1])
                    self.crawled.add(each_link[0])

        if len(links) != 0:
            self.last_links = links

            data = {
                'rank': self.rank,
                'http_error': self.http_error,
                'url_error': self.url_error,
                'last_links': self.last_links,
                'crawled': self.crawled
            }
            self.save_file(file_path, data)

            while Spider.allow_crawl and len(self.last_links) != 0:
                links = list()
                # url = list(filter(None, url))
                # url = list(itertools.chain.from_iterable(url))
                for each_link in self.pool.imap_unordered(
                        self.process, self.last_links):
                    if each_link is not None:
                        if each_link[1] is not None:
                            links.extend(each_link[1])
                            self.crawled.add(each_link[0])

                data = {
                    'rank': self.rank,
                    'http_error': self.http_error,
                    'url_error': self.url_error,
                    'last_links': self.last_links,
                    'crawled': self.crawled
                }
                self.save_file(file_path, data)

                if len(links) != 0:
                    self.last_links = links

    # First crawl step
    def crawl(self, url):
        self.pool = Pool(30)

        if type(url) is not list:
            links = self.pool.apply(self.process, args=(url, ))
            if links is not None:
                links[1] = list(filter(None, links[1]))
                if len(links[1]) != 0:
                    self.last_links = links[1]
                    self.crawled.add(links[0])
                    self._continue_crawl(links[1])
        else:
            self._continue_crawl(url)
Esempio n. 22
0
def pr_j():
    if i % 2 == 1:
        print(os.getpid(), i)


def pr_o():
    if i % 2 == 0:
        print(os.getpid(), i)


if __name__ == '__main__':
    # 创建指定数量的进程的进程池
    pool1 = Pool(3)
    for i in range(3):
        pool1.apply(task)
        pool1.apply(pr_j)
        pool1.apply(pr_o)

    # 进程池安排完毕 需要关闭
    pool1.close()
    pool1.join()
    print("进程池完成所有的任务!")

# 2、A进程负责向Q1队列发送数据,且从Q2队列读取数据;B进程负责向Q2队列发送数据,且从Q1队列读取数据


# 3、A进程负责向Q1队列发送数据【整数】,B进程从Q1队列读取数据,将读出的每个数乘以2放进队列Q2,
# 	C进程从Q2队列读取数据,将取出的数据求平方,再打印出来

# 4、利用进程池和队列编写以下功能:
Esempio n. 23
0
class Downloader:
    """ Downloader main class """

    def __init__(self,
                 base_url='{}',
                 base_path='',
                 logger=logging.getLogger('log')):
        """ Generate downloader function via params """
        self.logger = logger
        self.logger.info("Downloader Starting...")
        self.pool = Pool(10)  # 10 processes, configurable later
        self.dl_list = []
        self.base_url = str(base_url)
        self.base_path = str(base_path)

    def get_status(self, clear=False):
        downloaded = []
        downloading = []
        failed = []
        for i in range(len(self.dl_list) - 1, -1, -1):
            result = self.dl_list[i]
            if result[1].ready() is False:
                downloading.append(result[0])
                if clear:
                    self.dl_list.pop(i)
                continue
            if result[1].get() is False:
                failed.append(result[0])
                continue
            downloaded.append(result[0])
            if clear:
                self.dl_list.pop(i)
        return {
            'Downloaded': downloaded,
            'Downloading': downloading,
            'Failed': failed
        }

    def dl_sync(self, url, path='', fn=''):
        return self.pool.apply(self.dl, (
            url,
            path,
            fn,
        ))

    def download(self, url, path='', fn='', referer=''):
        r = self.pool.apply_async(self.dl, (
            url,
            path,
            fn,
            referer,
        ))
        self.dl_list.append((url, r))

    def dl(self, url, path='', fn='', referer=''):
        try:
            path = self.make_sure_path(os.path.join(self.base_path, path))
            if isinstance(url, str):
                url = [url]
            url = self.base_url.format(*url)
            if fn == '':
                fn = os.path.basename(url)
            req = urllib.request.Request(url)
            req.add_header('Referer', referer)
            img = urllib.request.urlopen(req)
            if img.status is not 200:
                raise Exception('Image cannot be reached({})'.format(
                    img.status))
            with open(os.path.join(path, fn), 'wb') as f:
                f.write(img.read())
        except Exception as e:
            if os.path.exists(os.path.join(path, fn)):
                os.remove(os.path.join(path.fn))
            self.logger.error(
                "Error downloading image: " + url + ' ; err: ' + str(e))
            return False
        self.logger.info('Downloaded: ' + url)
        return True

    def close(self):
        try:
            self.pool.close()
            self.logger.info("Pool closed, wating for completing download.")
            self.pool.join()
        except Exception as e:
            self.logger.error("Error stopping the downloader: " + str(e))
            self.pool.terminate()
            return false
        self.logger.info("Downloader Stopped...")
        return True

    def make_sure_path(self, path):
        """ Make sure the path is exists"""
        path = str(path)  # anaconda-mode hasn't supported PEP-0484 yet, QAQ
        path = path.strip()

        if path is not '' and not os.path.exists(path):
            try:
                os.makedirs(path)
            except Exception as e:
                self.logger.fatal(
                    "Cannot create dirs: " + path + " ; err: " + str(e))
                self.logger.info("Exiting Program")
                import sys
                sys.exit()
            finally:
                self.logger.info("Created dirs: " + path)

        return path
Esempio n. 24
0
class Service:
    def __init__(self, db, prom, targets, latency_collection_interval,
                 speed_collection_interval):
        """
        Data collection service.

        :param db: SQLite DB file name
        :param prom: Prometheus collector
        :param targets: host to use for collecting latency data
        :param latency_collection_interval: latency data collection interval (in seconds)
        :param speed_collection_interval: network speed data collection interval (in seconds)
        """

        self.db = db
        self.prom = prom
        self.targets = targets
        self.latency_collection_interval = latency_collection_interval
        self.speed_collection_interval = speed_collection_interval
        self.pool = Pool(processes=len(os.sched_getaffinity(0)))
        self.log = logging.getLogger("collection-service")

    def __enter__(self):
        return self

    def __exit__(self, exc_type, exc_value, traceback):
        self.pool.close()
        self.pool.terminate()

    def start_latency_collection(self):
        """
        Starts latency data collection and stores it in the configured SQLite database (table must already exist).

        **Note**: `Blocks the current thread indefinitely.`

        :return: `nothing`
        """

        with SQLite(self.db) as store:
            while True:
                latencies = self.pool.map(
                    lambda target: (target, from_ping(target)), self.targets)

                for target, latency in latencies:
                    if latency:
                        self.log.debug(
                            "Collected latency data for target [{}]: [{}/{} ({}) | {}] (min/max (avg) | loss)"
                            .format(latency.target, latency.minimum,
                                    latency.maximum, latency.average,
                                    latency.loss))
                        store.latency_add(latency)
                        self.prom.latency_add(latency)
                    else:
                        self.log.error(
                            "Failed to collect latency data for target [{}]".
                            format(target))

                time.sleep(self.latency_collection_interval)

    def start_speed_collection(self):
        """
        Starts network speed data collection and stores it in the configured SQLite database (table must already exist).

        **Note**: `Blocks the current thread indefinitely.`

        :return: `nothing`
        """

        with SQLite(self.db) as store:
            while True:
                speed = self.pool.apply(from_speedtest)

                if speed:
                    self.log.debug(
                        "Collected speed data for server [{}]: [{}/{}] (down/up)"
                        .format(speed.server, speed.download, speed.upload))
                    store.speed_add(speed)
                    self.prom.speed_add(speed)
                else:
                    self.log.error("Failed to collect speed data")

                time.sleep(self.speed_collection_interval)