Ejemplo n.º 1
0
 def __init__(self, pool_size=4, task_size=100):
     super(EventBus, self).__init__()
     self.pool = ThreadPool(pool_size)
     self.async_events = TaskPool(task_size)
     self.event_handlers = dict()
     self.con = Condition()
     self.init()
Ejemplo n.º 2
0
class ImageDown:
    def __init__(self, urls, location, urlCache):
        self._urls = list(set(urls))
        self._location = location

        if not os.path.exists(location):
            os.mkdir(location)

        self._cache = urlCache

        self._taskpool = TaskPool(10)
        
    def addUrls(self, urls):
        self._urls.extend(list(set(urls)))
        
        
    def run(self):
        urls = []
        urls, self._urls = self._urls, urls

        for url in urls:
            self._down(url)

        self._taskpool.run()
        
    def _down(self, url):
        if self._cache.get(url):
            return
        
        def callback(response):
            if response.error:
                print 'Error', response.error, url
            else:
                data = response.body
                self._writeImage(url, data)

        self._taskpool.spawn(url, callback)
        
    def _writeImage(self, url, data):
        try:
            fileName = (url.split('/')[-1])
            fileExt = os.path.splitext(fileName)[-1]
            
            fileName = hashlib.md5(data).hexdigest() + fileExt

            fullName = os.path.join(self._location, fileName)

            if not os.path.exists(fullName):
                with open(fullName, 'wb') as f:
                    f.write(data)

            self._cache.set(url, fullName)
                    
        except:
            print 'write image %s error %s' % (url, traceback.format_exc())
Ejemplo n.º 3
0
def run_tests(tests, summary):
    jobs = OPTIONS.workercount
    # python 3.3 fixed a bug with concurrently writing .pyc files.
    # https://bugs.python.org/issue13146
    embedded_version = (subprocess.check_output([
        OPTIONS.gdb_executable,
        "--batch",
        "--ex",
        "python import sys; print(sys.hexversion)",
    ]).decode("ascii").strip())
    if hex(int(embedded_version)) < "0x3030000":
        jobs = 1

    pool = TaskPool(tests, job_limit=jobs, timeout=OPTIONS.timeout)
    pool.run_all()
Ejemplo n.º 4
0
 def __init__(self,pool_size=4,task_size=100):
     super(EventBus,self).__init__()
     self.pool=ThreadPool(pool_size)
     self.async_events=TaskPool(task_size)
     self.event_handlers=dict()
     self.con=Condition()
     self.init()
Ejemplo n.º 5
0
    def __init__(self, urls, location, urlCache):
        self._urls = list(set(urls))
        self._location = location

        if not os.path.exists(location):
            os.mkdir(location)

        self._cache = urlCache

        self._taskpool = TaskPool(10)
Ejemplo n.º 6
0
class Crawler:
    def __init__(self, beginUrl):
        self._beginUrl = beginUrl
        self._urlTasks = []
        self._urlMarked = {}
        self._imgs = []
        self._taskpool = TaskPool(10)
        
    def run(self, depth):
        self._crawl(self._beginUrl, depth)
        self._taskpool.run()

    def getImgs(self):
        return self._imgs

    def _crawl(self, url, depth):
        if depth == 0:
            return

        if url in self._urlMarked:
            return

        self._urlMarked[url] = True

        def callback(response):
            if response.error:
                print 'Error', response.error, url
            else:
                data = response.body

                lister = URLLister()
                lister.feed(data)

                urls = lister.getUrls()
                imgs = lister.getImgs()

                self._imgs.extend(imgs)

                for newUrl in urls:
                    self._crawl(newUrl, depth - 1)

        self._taskpool.spawn(url, callback)
Ejemplo n.º 7
0
    def __worker_prepare(self, taskpool=None):
        Worker.setWorkerConfig(
            rootlogger=LOGGER,
            basedomain=self.__config["--basedomain"]
            if self.__config["--basedomain"] else None,
            errorpage=self.__config["--errorpage"]
            if self.__config["--errorpage"] else None,
            taskpool=TaskPool(distinct_filter_dump="filter_dump_{now}".format(
                now=NOW)) if not taskpool else taskpool)

        if not self.resume:
            Worker.Worker_taskpool.put(self.__config["<src>"])

        if self.__config["--debug"]:
            map(Worker.add_debuglog_handler,
                (getHandler(logging.FileHandler, filename=dbg_log_file)
                 for dbg_log_file in self.__config["--dbglog"]))
            Worker.add_debuglog_handler(getHandler(logging.StreamHandler))

        if self.__config["--syslog"]:
            map(Worker.add_syslog_handler,
                (getHandler(hdlr_cls=logging.FileHandler,
                            filename=sys_log_file)
                 for sys_log_file in self.__config["--syslog"]))
        elif not self.__config["--syslog"] and not self.__config["--debug"]:
            Worker.add_syslog_handler(
                getHandler(hdlr_cls=logging.StreamHandler))

        if self.__config["--errlog"]:
            map(Worker.add_errorlog_handler,
                (getHandler(hdlr_cls=logging.FileHandler,
                            filename=err_log_file)
                 for err_log_file in self.__config["--errlog"]))
        elif not self.__config["--errlog"] and not self.__config["--debug"]:
            Worker.add_errorlog_handler(
                getHandler(hdlr_cls=logging.StreamHandler))
Ejemplo n.º 8
0
def run_tests(tests, summary):
    pool = TaskPool(tests,
                    job_limit=OPTIONS.workercount,
                    timeout=OPTIONS.timeout)
    pool.run_all()
Ejemplo n.º 9
0
import threading
import os
import sys
import json
from BaseHTTPServer import HTTPServer

from taskpool import TaskPool
from HTTPlistener import httpServerFactory

f = open('taskpool_config', 'r')
master_config = json.load(f)
f.close


tp = TaskPool(master_config)
taskpool_thread = threading.Thread(target=tp.loop, args=()).start()

ip = master_config['master address']
port = master_config['master port']
HTTPListener = httpServerFactory({'taskpool': tp})
serv = HTTPServer((ip, port), HTTPListener)
print 'Starting master http server at port %d ....'%master_config['master port']


try:
    serv.serve_forever()
except KeyboardInterrupt:
    tp.close_connection()
    print 'KeyboardInterrupt 8P'
    os._exit(0)
Ejemplo n.º 10
0
def run_tests(tests, summary):
    pool = TaskPool(tests, job_limit=OPTIONS.workercount, timeout=OPTIONS.timeout)
    pool.run_all()
Ejemplo n.º 11
0
        return 'content for key %s: %s' % (inner_key, random.randint(
            1, 100000))

    content = read_cached(inner_open_test_random, key)
    return content


if __name__ == '__main__':
    logging.basicConfig(
        level=logging.INFO,
        format='%(asctime)s:%(name)s:%(levelname)s:%(message)s')
    logging.getLogger('requests').setLevel(logging.WARNING)
    file_handler = logging.FileHandler('download-vessels-details.log',
                                       mode='w')
    formatter = logging.Formatter(
        '%(asctime)s:%(name)s:%(levelname)s:%(message)s')
    file_handler.setFormatter(formatter)
    logging.getLogger().addHandler(file_handler)

    set_cache_path('output/tests', max_node_files=400, rebalancing_limit=1000)
    delete_cache()

    tasks = TaskPool(30)

    for count in range(10000):
        tasks.add_task(open_test_random, count)

    results = tasks.execute()
    logging.info('results: %s', results)
    delete_cache()
Ejemplo n.º 12
0
class EventBus(object):
    __metaclass__=Singleton

    def __init__(self,pool_size=4,task_size=100):
        super(EventBus,self).__init__()
        self.pool=ThreadPool(pool_size)
        self.async_events=TaskPool(task_size)
        self.event_handlers=dict()
        self.con=Condition()
        self.init()

    def init(self):
        thread.start_new_thread(self.loop,())

    @check_listener
    def register(self,listener):
        '''
        Use this method to register your listener to the eventbus
        and please before you post.
        :param listener: The listener have to be inheritanced from the Listener.
        :return: None
        '''
        self.event_handlers.update(listener.event_handlers)

    @check_listener
    def unregister(self,listener):
        '''
        This method will remove the listener from the eventbus so
        that your listener will not process the event.
        :param listener: The listener have to be inheritanced from the Listener.
        :return: None
        '''
        try:
            for event in listener.event_handlers:
                del self.event_handlers[event]
        except Exception:
            raise UnregisterError

    def process(self,event):
        handlers=self.event_handlers[event.id()]
        if not handlers:
            raise ProcessException

        for handler in handlers:
            handler(event)

    @check_event
    def post(self,event):
        '''
        Post your event when you need. The listener you registed will process it.
        And this is a sync method. It will not return until it complete.
        :param event: The event have to be inheritanced from the Event.
        :return: None
        '''
        self.process(event)

    @check_event
    def async_post(self,event):
        '''
        Post your event when you need. The listener you registed will process it.
        And this is a async method. You just post the event and it will not hold on
        your main thread.
        :param event: The event have to be inheritanced from the Event.
        :return: None
        '''
        with self.con:
            self.async_events.add_task(event)
            self.con.notifyAll()

    def loop(self):
        while True:
            with self.con:
                while self.async_events.isempty():
                    self.con.wait()
                if self.async_events.isfull():
                    raise TaskFullException
                else:
                    self.pool.map(self.process,self.async_events.tasks)
                    self.async_events.remove_task()

    def destroy(self):
        '''
        If you don't want to use it anymore. Use this method to release the resource.
        :param: None
        :return: None
        '''
        self.event_handlers.clear()
        self.pool.close()
        self.async_events.destroy()
Ejemplo n.º 13
0
class EventBus(object):
    __metaclass__ = Singleton

    def __init__(self, pool_size=4, task_size=100):
        super(EventBus, self).__init__()
        self.pool = ThreadPool(pool_size)
        self.async_events = TaskPool(task_size)
        self.event_handlers = dict()
        self.con = Condition()
        self.init()

    def init(self):
        thread.start_new_thread(self.loop, ())

    @check_listener
    def register(self, listener):
        '''
        Use this method to register your listener to the eventbus
        and please before you post.
        :param listener: The listener have to be inheritanced from the Listener.
        :return: None
        '''
        self.event_handlers.update(listener.event_handlers)

    @check_listener
    def unregister(self, listener):
        '''
        This method will remove the listener from the eventbus so
        that your listener will not process the event.
        :param listener: The listener have to be inheritanced from the Listener.
        :return: None
        '''
        try:
            for event in listener.event_handlers:
                del self.event_handlers[event]
        except Exception:
            raise UnregisterError

    def process(self, event):
        handlers = self.event_handlers[event.id()]
        if not handlers:
            raise ProcessException

        for handler in handlers:
            handler(event)

    @check_event
    def post(self, event):
        '''
        Post your event when you need. The listener you registed will process it.
        And this is a sync method. It will not return until it complete.
        :param event: The event have to be inheritanced from the Event.
        :return: None
        '''
        self.process(event)

    @check_event
    def async_post(self, event):
        '''
        Post your event when you need. The listener you registed will process it.
        And this is a async method. You just post the event and it will not hold on
        your main thread.
        :param event: The event have to be inheritanced from the Event.
        :return: None
        '''
        with self.con:
            self.async_events.add_task(event)
            self.con.notifyAll()

    def loop(self):
        while True:
            with self.con:
                while self.async_events.isempty():
                    self.con.wait()
                if self.async_events.isfull():
                    raise TaskFullException
                else:
                    self.pool.map(self.process, self.async_events.tasks)
                    self.async_events.remove_task()

    def destroy(self):
        '''
        If you don't want to use it anymore. Use this method to release the resource.
        :param: None
        :return: None
        '''
        self.event_handlers.clear()
        self.pool.close()
        self.async_events.destroy()
Ejemplo n.º 14
0
 def __init__(self, beginUrl):
     self._beginUrl = beginUrl
     self._urlTasks = []
     self._urlMarked = {}
     self._imgs = []
     self._taskpool = TaskPool(10)