Example #1
0
class UIDStack(object):
    """Thin convenience wrapper around gevent.queue.LifoQueue.
    Each entry in the stack is a pair (uid, metadata), where the metadata may
    be None."""
    def __init__(self):
        self._lifoqueue = LifoQueue()

    def empty(self):
        return self._lifoqueue.empty()

    def get(self):
        return self._lifoqueue.get_nowait()

    def peek(self):
        # This should be LifoQueue.peek_nowait(), which is currently buggy in
        # gevent. Can update with gevent version 1.0.2.
        return self._lifoqueue.queue[-1]

    def put(self, uid, metadata):
        self._lifoqueue.put((uid, metadata))

    def discard(self, objects):
        self._lifoqueue.queue = [
            item for item in self._lifoqueue.queue if item not in objects
        ]

    def qsize(self):
        return self._lifoqueue.qsize()

    def __iter__(self):
        for item in self._lifoqueue.queue:
            yield item
Example #2
0
class UIDStack(object):
    """Thin convenience wrapper around gevent.queue.LifoQueue.
    Each entry in the stack is a pair (uid, metadata), where the metadata may
    be None."""
    def __init__(self):
        self._lifoqueue = LifoQueue()

    def empty(self):
        return self._lifoqueue.empty()

    def get(self):
        return self._lifoqueue.get_nowait()

    def peek(self):
        # This should be LifoQueue.peek_nowait(), which is currently buggy in
        # gevent. Can update with gevent version 1.0.2.
        return self._lifoqueue.queue[-1]

    def put(self, uid, metadata):
        self._lifoqueue.put((uid, metadata))

    def discard(self, objects):
        self._lifoqueue.queue = [item for item in self._lifoqueue.queue if item
                                 not in objects]

    def qsize(self):
        return self._lifoqueue.qsize()

    def __iter__(self):
        for item in self._lifoqueue.queue:
            yield item
Example #3
0
class Stack(object):
    """Thin convenience wrapper around gevent.queue.LifoQueue."""
    def __init__(self, key, initial_elements=None):
        self.key = key
        self._lifoqueue = LifoQueue()
        if initial_elements is not None:
            self._lifoqueue.queue = sorted(list(initial_elements),
                                           key=self.key)

    def empty(self):
        return self._lifoqueue.empty()

    def get(self):
        return self._lifoqueue.get_nowait()

    def peek(self):
        # This should be LifoQueue.peek_nowait(), which is currently buggy in
        # gevent. Can update with gevent version 1.0.2.
        return self._lifoqueue.queue[-1]

    def put(self, obj):
        self._lifoqueue.put(obj)

    def update_from(self, objects):
        for obj in sorted(list(objects), key=self.key):
            self._lifoqueue.put(obj)

    def discard(self, objects):
        self._lifoqueue.queue = [item for item in self._lifoqueue.queue if item
                                 not in objects]

    def qsize(self):
        return self._lifoqueue.qsize()

    def __iter__(self):
        for item in self._lifoqueue.queue:
            yield item
Example #4
0
class BaseProcessor(LoggerMixin):
    name = 'base-processor'

    @classmethod
    def from_engine(cls, engine, *args, **kwargs):
        return cls(engine, *args, **kwargs)

    def _request(self):
        return self.engine.request

    request = property(_request)

    def __init__(self, engine, *args, **kwargs):
        from time import time
        from hashlib import md5
        from threading import Lock
        from gevent.queue import LifoQueue

        self.processor_name = '%s:%s' % (self.name, md5(str(
            time())).hexdigest()[:6])

        LoggerMixin.__init__(self)

        self.engine = engine

        self.__redis = None
        self.redis_lock = Lock()

        self.progress = 0

        self.total = 0
        # 忽略统计
        self.bypassed_cnt = 0

        # 超过这一限制时,add_task就暂停向其中添加任务
        self.maxsize = 1000
        self.tasks = LifoQueue()
        self.workers = []

        # 默认的polling间隔为1秒
        self.polling_interval = 1

        import argparse

        arg_parser = argparse.ArgumentParser()
        # 并发数量
        arg_parser.add_argument('--concur', type=int)
        args, leftover = arg_parser.parse_known_args()

        from core import dhaulagiri_settings

        if args.concur:
            dhaulagiri_settings['core']['concur'] = args.concur
        self.concur = dhaulagiri_settings['core']['concur']

        self.checkpoint_ts = None
        self.checkpoint_prog = None
        self.init_ts = time()

        # 心跳任务
        self.heart_beat = None

        # worker的Monitor。Worker在每次循环开始的时候,都会在该对象中进行一次状态更新
        self.worker_monitor = {}

    def update_worker_status(self, worker):
        """
        更新worker的状态
        :param worker:
        :return:
        """
        from time import time

        name = worker.worker_name
        self.worker_monitor[name] = time()

    def get_worker_stat(self):
        """
        获得worker队列的状态
        :return:
        """
        from time import time

        # 如果60秒都没有状态更新,说明该worker进入zombie状态
        time_window = 90

        cur = time()
        active = dict(
            filter(lambda item: item[1] >= cur - time_window,
                   self.worker_monitor.items()))
        zombie = dict(
            filter(lambda item: item[1] < cur - time_window,
                   self.worker_monitor.items()))

        return {'zombie': zombie, 'active': active}

    def incr_progress(self):
        self.progress += 1

    def _start_workers(self):
        def timer():
            """
            每30秒启动一次,输出当前进度
            """
            import time

            while True:
                msg = 'Progress: %d / %d.' % (self.progress, self.total)
                cts = time.time()

                if self.checkpoint_prog is not None and self.checkpoint_ts is not None:
                    rate = (self.progress - self.checkpoint_prog) / (
                        cts - self.checkpoint_ts) * 60
                    msg = '%s %s' % (msg, 'Processing rate: %d items/min' %
                                     int(rate))

                self.checkpoint_ts = cts
                self.checkpoint_prog = self.progress

                # 获得worker monitor统计
                stat = self.get_worker_stat()
                msg += ', active workers: %d, zombie workers: %d' % (len(
                    stat['active']), len(stat['zombie']))

                self.log(msg)
                gevent.sleep(30)

        self.heart_beat = gevent.spawn(timer)

        gevent.signal(signal.SIGKILL, gevent.kill)
        gevent.signal(signal.SIGQUIT, gevent.kill)

        for i in xrange(self.concur):
            worker = Worker.from_processor(self, self.tasks)
            self.workers.append(worker)

    def add_task(self, task, *args, **kwargs):
        # 是否启用流量控制
        flow_control = True
        while flow_control:
            # 如果self.tasks中的项目过多,则暂停添加
            if self.tasks.qsize() > self.maxsize:
                gevent.sleep(self.polling_interval)
            else:
                break

        func = lambda: task(*args, **kwargs)
        task_key = getattr(task, 'task_key', None)
        if task_key:
            setattr(func, 'task_key', task_key)
        self.tasks.put(func, timeout=120)
        self.logger.debug(
            'New task%s added to the queue. Remaining: %d' %
            ('(%s)' % task_key if task_key else '', self.tasks.qsize()))
        gevent.sleep(0)

    def _wait_for_workers(self):
        """
        等待所有的worker是否完成。判据:所有的worker都处于idle状态,并且tasks队列已空
        :return:
        """
        while True:
            if not self.tasks.empty():
                gevent.sleep(self.polling_interval)
                continue

            completed = True
            for w in self.workers:
                if not w.idle:
                    gevent.sleep(self.polling_interval)
                    completed = False
                    break

            if completed:
                break

        gevent.killall([w.gevent for w in self.workers])
        gevent.kill(self.heart_beat)

    def run(self):
        self._start_workers()
        self.populate_tasks()
        self._wait_for_workers()

        import time

        self.log(
            'Processor ended: %d items processed(%d bypassed) in %d minutes' %
            (self.progress, self.bypassed_cnt,
             int((time.time() - self.init_ts) / 60.0)))

    def populate_tasks(self):
        raise NotImplementedError
class AbstractDatabaseConnectionPool(object):
    def __init__(self, maxsize=100, maxwait=1.0, expires=None, cleanup=None):
        """
        The pool manages opened connections to the database. The main strategy is to keep the smallest number
        of alive connections which are required for best web service performance.
        In most cases connections are taken from the pool. In case of views-peeks, pool creates some
        extra resources preventing service gone unavailable. In time of low traffic (night) unnecessary
        connections are released.

        Parameters
        ----------
        maxsize : int
                  Soft limit of the number of created connections. After reaching this limit
                  taking the next connection first waits `maxwait` time for any returned slot.
        maxwait : float
                  The time in seconds which is to be wait before creating new connection after the pool gets empty.
                  It may be 0 then immediate connections are created til `maxoverflow` is reached.
        expires : float
                  The time in seconds indicates how long connection should stay alive.
                  It is also used to close unneeded slots.
        """
        if not isinstance(maxsize, integer_types):
            raise TypeError('Expected integer, got %r' % (maxsize, ))

        self._maxsize = maxsize
        self._maxwait = maxwait
        self._expires = expires
        self._cleanup = cleanup
        self._created_at = {}
        self._latest_use = {}
        self._pool = LifoQueue()
        self._size = 0
        self._latest_cleanup = 0 if self._expires or self._cleanup else 0xffffffffffffffff
        self._interval_cleanup = min(
            self._expires or self._cleanup, self._cleanup
            or self._expires) if self._expires or self._cleanup else 0
        self._cleanup_lock = Semaphore(value=1)

    def create_connection(self):
        raise NotImplementedError()

    def close_connection(self, item):
        try:
            self._size -= 1
            self._created_at.pop(id(item), None)
            self._latest_use.pop(id(item), None)
            item.close()
        except Exception:
            pass

    def cleanup(self):
        self._cleanup_queue(time.time())

    def _cleanup_queue(self, now):

        if self._latest_cleanup > now:
            return

        with self._cleanup_lock:

            if self._latest_cleanup > now:
                return

            self._latest_cleanup = now + self._interval_cleanup

            cleanup = now - self._cleanup if self._cleanup else None
            expires = now - self._expires if self._expires else None

            # Instead of creating new LIFO for self._pool, the ole one is reused,
            # beacuse some othere might wait for connetion on it.
            fresh_slots = []

            try:
                # try to fill self._pool ASAP, preventing creation of new connections.
                # because after this loop LIFO will be in reversed order
                while not self._pool.empty():
                    item = self._pool.get_nowait()
                    if cleanup and self._latest_use.get(id(item), 0) < cleanup:
                        self.close_connection(item)
                    elif expires and self._created_at.get(id(item),
                                                          0) < expires:
                        self.close_connection(item)
                    else:
                        fresh_slots.append(item)
            except Empty:
                pass

            # Reverse order back (frestest connections should be at the begining)
            for conn in reversed(fresh_slots):
                self._pool.put_nowait(conn)

    def get(self):

        try:
            return self._pool.get_nowait()
        except Empty:
            pass

        if self._size >= self._maxsize:
            try:
                return self._pool.get(timeout=self._maxwait)
            except Empty:
                pass

        # It is posiible that after waiting self._maxwait time, non connection has been returned
        # because of cleaning up old ones on put(), so there is not connection but also LIFO is not full.
        # In that case new connection shouls be created, otherwise exception is risen.
        if self._size >= self._maxsize:
            raise OperationalError(
                "Too many connections created: {} (maxsize is {})".format(
                    self._size, self._maxsize))

        try:
            self._size += 1
            conn = self.create_connection()
        except:
            self._size -= 1
            raise

        now = time.time()
        self._created_at[id(conn)] = now
        self._latest_use[id(conn)] = now
        return conn

    def put(self, conn):
        now = time.time()
        self._pool.put(conn)
        self._latest_use[id(conn)] = now

        self._cleanup_queue(now)

    def closeall(self):
        while not self._pool.empty():
            conn = self._pool.get_nowait()
            try:
                conn.close()
            except Exception:
                pass
        self._size = 0

    @contextlib.contextmanager
    def connection(self, isolation_level=None):
        conn = self.get()
        try:
            if isolation_level is not None:
                if conn.isolation_level == isolation_level:
                    isolation_level = None
                else:
                    conn.set_isolation_level(isolation_level)
            yield conn
        except:
            if conn.closed:
                conn = None
                self.closeall()
            else:
                conn = self._rollback(conn)
            raise
        else:
            if conn.closed:
                raise OperationalError(
                    "Cannot commit because connection was closed: %r" %
                    (conn, ))
            conn.commit()
        finally:
            if conn is not None and not conn.closed:
                if isolation_level is not None:
                    conn.set_isolation_level(isolation_level)
                self.put(conn)

    @contextlib.contextmanager
    def cursor(self, *args, **kwargs):
        isolation_level = kwargs.pop('isolation_level', None)
        with self.connection(isolation_level) as conn:
            yield conn.cursor(*args, **kwargs)

    def _rollback(self, conn):
        try:
            conn.rollback()
        except:
            gevent.get_hub().handle_error(conn, *sys.exc_info())
            return
        return conn

    def execute(self, *args, **kwargs):
        with self.cursor(**kwargs) as cursor:
            cursor.execute(*args)
            return cursor.rowcount

    def fetchone(self, *args, **kwargs):
        with self.cursor(**kwargs) as cursor:
            cursor.execute(*args)
            return cursor.fetchone()

    def fetchall(self, *args, **kwargs):
        with self.cursor(**kwargs) as cursor:
            cursor.execute(*args)
            return cursor.fetchall()

    def fetchiter(self, *args, **kwargs):
        with self.cursor(**kwargs) as cursor:
            cursor.execute(*args)
            while True:
                items = cursor.fetchmany()
                if not items:
                    break
                for item in items:
                    yield item