Example #1
0
class PlotlyStreamProducer(object):
    """Implements a producer that copies from a buffer to a plot.ly
    connection.
    """
    implements(IBodyProducer)
    length = UNKNOWN_LENGTH

    def __init__(self, buffer, start_callback=None):
        self.buffer = buffer
        self._done = False
        self._flush = DeferredSemaphore(1)
        self._waiter = DeferredSemaphore(1)
        self._flush.acquire()
        self._started = start_callback
        self._keepalive = LoopingCall(self._send_keepalive)

    @inlineCallbacks
    def startProducing(self, consumer):
        self._keepalive.start(60)
        self._started.callback(None)
        while True:
            # if paused, this will block
            yield self._waiter.acquire()
            while len(self.buffer):
                v = self.buffer.pop(0)
                if v is not None:
                    consumer.write(json.dumps(v))
                consumer.write("\n")
            yield self._waiter.release()

            if self._done: 
                return
            yield self._flush.acquire()

    def pauseProducing(self):
        return self._waiter.acquire()

    def resumeProducing(self):
        return self._waiter.release()

    def stopProducing(self):
        self._done = True
        if self._keepalive.running:
            self._keepalive.stop()

    def _send_keepalive(self):
        self.buffer.append(None)
        self.flush()

    def flush(self):
        if self._flush.tokens == 0:
            self._flush.release()
class ThreadedUrllib2TestMixin(object):

    def setUp(self):
        self._semaphore = DeferredSemaphore(2)

    def tearDown(self):
        pass


    def getPages(self, count, url):
        return gatherResults([self.getPage(url) for i in xrange(0, count)])

    @inlineCallbacks
    def getPage(self, url):
        yield self._semaphore.acquire()
        page = yield deferToThread(self._openPage, url)
        self._semaphore.release()
        returnValue(page)

    def _openPage(self, url):
        log.msg("Opening url: %r" % url)
        return urlopen(url).read()

    @inlineCallbacks
    def getPageLength(self, url):
        response = yield self.getPage(url)
        returnValue(len(response))
Example #3
0
File: util.py Project: e000/prickle
class DeferredConcurrencyLimiter:
    """Initiliaze me, and then use me as a decorator, to limit the ammount of defers that can execute asynchronously."""
    
    def __init__(self, tokens = 5):
        if tokens < 1:
            raise ValueError("tokens must be > 0")
        
        if tokens == 1:
            self.lock = DeferredLock()
        else:
            self.lock = DeferredSemaphore(tokens)
    
    def _releaseLock(self, response, lock):
        lock.release()
        return response
    
    def _lockAcquired(self, lock, f, *a, **kw):
        d = maybeDeferred(f, *a, **kw)
        d.addBoth(self._releaseLock, lock)
        return d
    
    def __call__(self, f):
        @wraps(f)
        def wrapped(*a, **kw):
            d = self.lock.acquire()
            d.addCallback(self._lockAcquired, f, *a, **kw)
            return d
        
        return wrapped
class TwistedWebTestMixin(object):

    def setUp(self):
        self._semaphore = DeferredSemaphore(2)

    def tearDown(self):
        pass


    @inlineCallbacks
    def getPages(self, count, url):
        return gatherResults([self.getPage(url) for i in xrange(0, count)])

    @inlineCallbacks
    def getPage(self, url):
        yield self._semaphore.acquire()
        page = yield tx_getPage(url)
        self._semaphore.release()
        returnValue(page)

    @inlineCallbacks
    def getPageLength(self, url):
        response = yield self.getPage(url)
        returnValue(len(response))
Example #5
0
class IndxConnectionPool:
    """ A wrapper for txpostgres connection pools, which auto-reconnects. """
    def __init__(self):
        logging.debug("IndxConnectionPool starting. ")
        self.connections = {}  # by connection string
        self.conn_strs = {}  # by db_name
        self.semaphore = DeferredSemaphore(1)
        self.subscribers = {}  # by db name

    def removeAll(self, db_name):
        """ Remove all connections for a named database - used before deleting that database. """
        logging.debug("IndxConnectionPool removeAll {0}".format(db_name))
        d_list = []
        if db_name in self.conn_strs:
            for conn_str in self.conn_strs[db_name]:
                for conn in self.connections[conn_str].getInuse():
                    d_list.append(conn.close())
                for conn in self.connections[conn_str].getFree():
                    d_list.append(conn.close())

                del self.connections[conn_str]
            del self.conn_strs[db_name]

        dl = DeferredList(d_list)
        return dl

    def connect(self, db_name, db_user, db_pass, db_host, db_port):
        """ Returns an IndxConnection (Actual connection and pool made when query is made). """

        return_d = Deferred()
        log_conn_str = "dbname='{0}' user='******' password='******' host='{3}' port='{4}' application_name='{5}'".format(
            db_name, db_user, "XXXX", db_host, db_port,
            indx_pg2.APPLICATION_NAME)
        conn_str = "dbname='{0}' user='******' password='******' host='{3}' port='{4}' application_name='{5}'".format(
            db_name, db_user, db_pass, db_host, db_port,
            indx_pg2.APPLICATION_NAME)
        logging.debug("IndxConnectionPool connect: {0}".format(log_conn_str))

        if db_name not in self.conn_strs:
            self.conn_strs[db_name] = []
        self.conn_strs[db_name].append(conn_str)

        def free_cb(conn):
            """ Called back when this IndxConnection has finished querying, so
                we put the real connection back into the pool. """
            logging.debug("IndxConnectionPool free_cb, conn: {0}".format(conn))

            self.connections[conn_str].freeConnection(
                conn)  # no dealing with callbacks, just carry on

        def alloc_cb(conn_str):
            # a query was called - allocate a connection now and pass it back
            return self._connect(conn_str)

        indx_connection = IndxConnection(conn_str, alloc_cb, free_cb)
        return_d.callback(indx_connection)
        return return_d

    def _connect(self, conn_str):
        """ Connect and return a free Connection.
            Figures out whether to make new connections, use the pool, or wait in a queue.
        """
        logging.debug("IndxConnectionPool _connect ({0})".format(conn_str))
        return_d = Deferred()

        def err_cb(failure):
            logging.error(
                "IndxConnectionPool _connect err_cb: {0}".format(failure))
            self.semaphore.release()
            return_d.errback(failure)

        def succeed_cb(empty):
            logging.debug("IndxConnectionPool _connect succeed_cb")
            # TODO pass a Connection back

            if len(self.connections[conn_str].getFree()) > 0:
                # free connection, use it straight away
                conn = self.connections[conn_str].getFree().pop()

                self.connections[conn_str].getInuse().append(conn)
                self.semaphore.release()
                return_d.callback(conn)
                return

            if len(self.connections[conn_str].getInuse()) < MAX_CONNS:
                # not at max connections for this conn_str

                # create a new one
                d = self._newConnection(conn_str)

                def connected_cb(indx_conn):
                    logging.debug(
                        "IndxConnectionPool _connect connected_cb ({0})".
                        format(indx_conn))
                    self.connections[conn_str].getFree().remove(indx_conn)
                    self.connections[conn_str].getInuse().append(indx_conn)
                    self.semaphore.release()
                    return_d.callback(indx_conn)
                    return

                d.addCallbacks(connected_cb, err_cb)
                return

            # wait for a connection
            def wait_cb(conn):
                logging.debug(
                    "IndxConnectionPool _connect wait_cb ({0})".format(conn))
                # already put in 'inuse'
                return_d.callback(conn)
                return

            self.semaphore.release()
            self.connections[conn_str].getWaiting().append(wait_cb)
            return

        def locked_cb(empty):
            logging.debug("IndxConnectionPool _connect locked_cb")
            if conn_str not in self.connections:
                self._newConnections(conn_str).addCallbacks(succeed_cb, err_cb)
            else:
                threads.deferToThread(succeed_cb, None)


#                succeed_cb(None)

        self.semaphore.acquire().addCallbacks(locked_cb, err_cb)
        return return_d

    def _closeOldConnection(self):
        """ Close the oldest connection, so we can open a new one up. """
        # is already in a semaphore lock, from _newConnection
        logging.debug("IndxConnectionPool _closeOldConnection")

        ### we could force quite them through postgresql like this - but instead we kill them from inside
        #query = "SELECT * FROM pg_stat_activity WHERE state = 'idle' AND application_name = %s AND query != 'LISTEN wb_new_version' ORDER BY state_change LIMIT 1;"
        #params = [indx_pg2.APPLICATION_NAME]

        return_d = Deferred()

        def err_cb(failure):
            return_d.errback(failure)

        ages = {}
        for conn_str, dbpool in self.connections.items():
            lastused = dbpool.getTime()
            if lastused not in ages:
                ages[lastused] = []
            ages[lastused].append(dbpool)

        times = ages.keys()
        times.sort()

        pool_queue = []
        for timekey in times:
            pools = ages[timekey]
            pool_queue.extend(pools)

        def removed_cb(count):

            if count < REMOVE_AT_ONCE and len(pool_queue) > 0:
                pool = pool_queue.pop(0)
                pool.getFree()
                pool.removeAll(count).addCallbacks(removed_cb, err_cb)
            else:
                return_d.callback(None)

        removed_cb(0)
        return return_d

    def _newConnection(self, conn_str):
        """ Makes a new connection to the DB
            and then puts it in the 'free' pool of this conn_str.
        """
        logging.debug("IndxConnectionPool _newConnection")
        # lock with the semaphore before calling this
        return_d = Deferred()

        def close_old_cb(failure):
            failure.trap(psycopg2.OperationalError, Exception)
            # couldn't connect, so close an old connection first
            logging.error(
                "IndxConnectionPool error close_old_cb: {0} - state of conns is: {1}"
                .format(failure.value, self.connections))

            logging.error("IndxConnectionPool connections: {0}".format(
                "\n".join(
                    map(lambda name: self.connections[name].__str__(),
                        self.connections))))

            def closed_cb(empty):
                # closed, so try connecting again
                self._newConnection(conn_str).addCallbacks(
                    return_d.callback, return_d.errback)

            closed_d = self._closeOldConnection()
            closed_d.addCallbacks(closed_cb, return_d.errback)

        try:
            # try to connect
            def connected_cb(connection):
                logging.debug(
                    "IndxConnectionPool _newConnection connected_cb, connection: {0}"
                    .format(connection))
                self.connections[conn_str].getFree().append(connection)
                return_d.callback(connection)

            conn = txpostgres.Connection()
            connection_d = conn.connect(conn_str)
            connection_d.addCallbacks(connected_cb, close_old_cb)
        except Exception as e:
            # close an old connection first
            logging.debug(
                "IndxConnectionPool Exception, going to call close_old_cb: ({0})"
                .format(e))
            close_old_cb(Failure(e))

        return return_d

    def _newConnections(self, conn_str):
        """ Make a pool of new connections. """
        # lock with the semaphore before calling this
        logging.debug("IndxConnectionPool _newConnections")
        return_d = Deferred()

        self.connections[conn_str] = DBConnectionPool(conn_str)

        try:
            d_list = []
            for i in range(MIN_CONNS):
                connection_d = self._newConnection(conn_str)
                d_list.append(connection_d)

            dl = DeferredList(d_list)
            dl.addCallbacks(return_d.callback, return_d.errback)

        except Exception as e:
            logging.error(
                "IndxConnectionPool error in _newConnections: {0}".format(e))
            return_d.errback(Failure(e))

        return return_d
Example #6
0
class DBConnectionPool():
    """ A pool of DB connections for a specific connection string / DB. """
    def __init__(self, conn_str):
        self.waiting = []
        self.inuse = []
        self.free = []

        self.semaphore = DeferredSemaphore(1)
        self.updateTime()

    def __unicode__(self):
        return self.__str__()

    def __str__(self):
        return "waiting: {0}, inuse: {1}, free: {2}, semaphore: {3}, lastused: {4}".format(
            self.waiting, self.inuse, self.free, self.semaphore, self.lastused)

    def updateTime(self):
        self.lastused = time.mktime(time.gmtime())  # epoch time

    def getTime(self):
        return self.lastused

    def getWaiting(self):
        self.updateTime()
        return self.waiting

    def getInuse(self):
        self.updateTime()
        return self.inuse

    def getFree(self):
        self.updateTime()
        return self.free

    def freeConnection(self, conn):
        """ Free a connection from this DBPool. """
        def locked_cb(empty):
            logging.debug("DBConnectionPool locked_cb")
            self.getInuse().remove(conn)

            if len(self.getWaiting()) > 0:
                callback = self.getWaiting().pop()
                self.getInuse().append(conn)
                self.semaphore.release()
                callback(conn)
            else:
                self.getFree().append(conn)
                self.semaphore.release()

        def err_cb(failure):
            failure.trap(Exception)
            logging.error("DBConnectionPool free, err_cb: {0}".format(
                failure.value))
            self.semaphore.release()

        self.semaphore.acquire().addCallbacks(locked_cb, err_cb)

    def removeAll(self, count):
        """ Remove all free connections (usually because they're old and we're in
            a freeing up period.
        """
        logging.debug(
            "DBConnectionPool removeAll called, count: {0}".format(count))
        return_d = Deferred()
        self.updateTime()

        def err_cb(failure):
            self.semaphore.release()
            return_d.errback(failure)

        def locked_cb(count):
            # immediately close the free connections
            while len(self.free) > 0:
                conn = self.free.pop(0)
                conn.close()
                count += 1

            self.semaphore.release()
            return_d.callback(count)

        self.semaphore.acquire().addCallbacks(lambda s: locked_cb(count),
                                              err_cb)
        return return_d
Example #7
0
class RateLimitedClient(object):
    """A Web client with per-second request limit.
    """

    # Max number of requests per second (can be < 1.0)
    rate_limit = None
    # Grace delay (seconds) when the server throttles us
    grace_delay = 30
    # Max number of parallel requests
    max_concurrency = 5

    def __init__(self, time=None):
        self.sem = DeferredSemaphore(self.max_concurrency)
        self.grace_deferred = None
        self.logger = logging.getLogger("webclient")
        self.time = time or reactor
        self.last_request = 0.0

    def _enable_grace_delay(self, delay):
        if self.grace_deferred:
            # Already enabled by an earlier concurrent request
            return
        self.grace_deferred = Deferred()

        def expire():
            g = self.grace_deferred
            self.grace_deferred = None
            g.callback(None)

        reactor.callLater(self.grace_delay, expire)

    def _delay_if_necessary(self, func, *args, **kwargs):
        d = Deferred()
        d.addCallback(lambda _: func(*args, **kwargs))
        trigger = None
        if self.grace_deferred:
            trigger = self.grace_deferred
        elif self.rate_limit:
            delay = (self.last_request + 1.0 / self.rate_limit) - self.time.seconds()
            if delay > 0:
                self.logger.debug("inserting rate limit delay of %.1f", delay)
                trigger = Deferred()
                self.time.callLater(delay, trigger.callback, None)
        (trigger or maybeDeferred(lambda: None)).chainDeferred(d)
        return d

    def get_page(self, url, *args, **kwargs):
        if isinstance(url, unicode):
            url = url.encode("utf8")

        def schedule_request(_):
            return self._delay_if_necessary(issue_request, None)

        def issue_request(_):
            self.last_request = self.time.seconds()
            self.logger.debug("fetching %r", url)
            return getPage(url, *args, **kwargs)

        def handle_success(value):
            self.sem.release()
            self.logger.debug("got %d bytes for %r", len(value), url)
            return value

        def handle_error(failure):
            self.sem.release()
            failure.trap(HTTPError)
            self.logger.debug("got HTTP error %s", failure.value)
            self.trap_throttling(failure)
            delay = self.grace_delay
            self.logger.warning("we are throttled, delaying by %.1f seconds", delay)
            self._enable_grace_delay(delay)
            # auto-retry
            return do_get_page()

        def do_get_page():
            # We acquire the semaphore *before* seeing if we should delay
            # the request, so that we avoid pounding on the server when
            # the grace period is entered.
            d = self.sem.acquire()
            d.addCallback(schedule_request)
            d.addCallbacks(handle_success, handle_error)
            return d

        return do_get_page()

    def trap_throttling(self, failure):
        """Trap HTTP failures and return if we are
        throttled by the distant site, else re-raise.
        """
        e = failure.value
        if e.status in ("400", "420", "500", "503"):
            return
        failure.raiseException()
class AggregationResponseCache(object):
    '''
    This holds all the responses being aggregated for a single destination.
    
    One of the main challenges here is to make sure while we're sending the responses,
    we don't get a new response in and not send it.
    '''


    def __init__(self, numSecondsToWait, numMessagesToWaitFor, chordNode):
        '''
        Constructor
        '''
        self.numSecondsToWait = numSecondsToWait
        self.numMessagesToWaitFor = numMessagesToWaitFor
        self.numSecondsToWait = numSecondsToWait
        self.chordNode = chordNode
        self.semaphore = DeferredSemaphore(1)
        self.messageList = [] # Holds tuples of (message, envelope)
        
        # Construct a timer to wait
        self.timerID = None
        
    def addResponse(self, message, envelope):
        '''We use a semaphore to ensure we don't modify the list while sending.'''
        d = self.semaphore.acquire()
        d.addCallback(self._addResponse, message, envelope)
        
    def _addResponse(self, dummy_defResult, message, envelope):
        '''This is called only once we have the semaphore.'''         
        self.messageList.append ( (message, envelope) )
        
        print("DEBUG: AggRespCache: %s  adding message %s " % (self.chordNode.nodeLocation.port, message))
        
        if len(self.messageList) >= self.numMessagesToWaitFor:
            # Send it!
            self._sendResponse()
        else:
            # Make sure a timer is running
            if self.timerID is None or not self.timerID.active():
                self.timerID = reactor.callLater(self.numSecondsToWait, self.sendResponse)
            
            # We're done.
            self.semaphore.release()    
        
            
    def sendResponse(self):
        '''Only call sendResponse when you have the lock.'''
        d = self.semaphore.acquire()
        d.addCallback(self._sendResponse)
        
    
    def _sendResponse(self, dummy_deferResult=None):
        '''Send the response but only after acquiring the semaphore
        '''
        # Copy the list
        messagesListCopy = self.messageList
        self.messageList = []
        
        # Release the semaphore
        self.semaphore.release()
        
        # Stop the timer if it's still going
        if self.timerID is not None and self.timerID.active():
            self.timerID.cancel()
            self.timerID = None
        
        print("DEBUG: AggResponseCache-Sending %d Messages %s" % (len(messagesListCopy), self.chordNode.nodeLocation.port))
        
        # Send a P2P message to the dest with all the responses
        d = self.chordNode.sendSyncMultipleMessage(messagesListCopy, 'p2p') # Will this break message authentication?
        d.addCallback(self.sendAcks, messagesListCopy)
        d.addErrback(self.sendResponseFailed)

#     def emptyMessageList(self, _):
#         self.messageList = []
        
    def sendAcks(self, resultsDict, messageList):
        # Send ACK messages to the nodes for which we aggregated
        
        for (_message, envelope) in messageList:
            # Get the status to return
            msgID = envelope['msgID']
            if msgID not in resultsDict:
                status = False
            else:
                status = resultsDict[msgID]

            d = self.chordNode.sendSingleAck(msgID, envelope['source'], status)
            d.addErrback(self.sendAckFailed, envelope['source'])
                        
            
    def sendAckFailed(self, fail, sourceNode):
        log.err("We failed to SendAck for source %s" % sourceNode, fail)
            
        
    def sendResponseFailed(self, theFailure):
        log.err(theFailure)    
        
        
        

        
Example #9
0
class IndxConnectionPool:
    """ A wrapper for txpostgres connection pools, which auto-reconnects. """

    def __init__(self):
        logging.debug("IndxConnectionPool starting. ")
        self.connections = {} # by connection string
        self.conn_strs = {} # by db_name
        self.semaphore = DeferredSemaphore(1)
        self.subscribers = {} # by db name

    def removeAll(self, db_name):
        """ Remove all connections for a named database - used before deleting that database. """
        logging.debug("IndxConnectionPool removeAll {0}".format(db_name))
        d_list = []
        if db_name in self.conn_strs:
            for conn_str in self.conn_strs[db_name]:
                for conn in self.connections[conn_str].getInuse():
                    d_list.append(conn.close())
                for conn in self.connections[conn_str].getFree():
                    d_list.append(conn.close())

                del self.connections[conn_str]
            del self.conn_strs[db_name]

        dl = DeferredList(d_list)
        return dl

    def connect(self, db_name, db_user, db_pass, db_host, db_port):
        """ Returns an IndxConnection (Actual connection and pool made when query is made). """

        return_d = Deferred()
        log_conn_str = "dbname='{0}' user='******' password='******' host='{3}' port='{4}' application_name='{5}'".format(db_name, db_user, "XXXX", db_host, db_port, indx_pg2.APPLICATION_NAME)
        conn_str = "dbname='{0}' user='******' password='******' host='{3}' port='{4}' application_name='{5}'".format(db_name, db_user, db_pass, db_host, db_port, indx_pg2.APPLICATION_NAME)
        logging.debug("IndxConnectionPool connect: {0}".format(log_conn_str))

        if db_name not in self.conn_strs:
            self.conn_strs[db_name] = []
        self.conn_strs[db_name].append(conn_str)

        def free_cb(conn):
            """ Called back when this IndxConnection has finished querying, so
                we put the real connection back into the pool. """
            logging.debug("IndxConnectionPool free_cb, conn: {0}".format(conn))

            self.connections[conn_str].freeConnection(conn) # no dealing with callbacks, just carry on


        def alloc_cb(conn_str):
            # a query was called - allocate a connection now and pass it back
            return self._connect(conn_str)
 
        indx_connection = IndxConnection(conn_str, alloc_cb, free_cb)
        return_d.callback(indx_connection)
        return return_d


    def _connect(self, conn_str):
        """ Connect and return a free Connection.
            Figures out whether to make new connections, use the pool, or wait in a queue.
        """
        logging.debug("IndxConnectionPool _connect ({0})".format(conn_str))
        return_d = Deferred()

        def err_cb(failure):
            logging.error("IndxConnectionPool _connect err_cb: {0}".format(failure))
            self.semaphore.release()
            return_d.errback(failure)

        def succeed_cb(empty):
            logging.debug("IndxConnectionPool _connect succeed_cb")
            # TODO pass a Connection back
            
            if len(self.connections[conn_str].getFree()) > 0:
                # free connection, use it straight away
                conn = self.connections[conn_str].getFree().pop()

                self.connections[conn_str].getInuse().append(conn)
                self.semaphore.release()
                return_d.callback(conn)
                return

            if len(self.connections[conn_str].getInuse()) < MAX_CONNS:
                # not at max connections for this conn_str
                
                # create a new one
                d = self._newConnection(conn_str)

                def connected_cb(indx_conn):
                    logging.debug("IndxConnectionPool _connect connected_cb ({0})".format(indx_conn))
                    self.connections[conn_str].getFree().remove(indx_conn)
                    self.connections[conn_str].getInuse().append(indx_conn)
                    self.semaphore.release()
                    return_d.callback(indx_conn)
                    return

                d.addCallbacks(connected_cb, err_cb)
                return

            # wait for a connection
            def wait_cb(conn):
                logging.debug("IndxConnectionPool _connect wait_cb ({0})".format(conn))
                # already put in 'inuse'
                return_d.callback(conn)
                return

            self.semaphore.release()
            self.connections[conn_str].getWaiting().append(wait_cb)
            return

        def locked_cb(empty):
            logging.debug("IndxConnectionPool _connect locked_cb")
            if conn_str not in self.connections:
                self._newConnections(conn_str).addCallbacks(succeed_cb, err_cb)
            else:
                threads.deferToThread(succeed_cb, None)
#                succeed_cb(None)

        self.semaphore.acquire().addCallbacks(locked_cb, err_cb)
        return return_d

    def _closeOldConnection(self):
        """ Close the oldest connection, so we can open a new one up. """
        # is already in a semaphore lock, from _newConnection
        logging.debug("IndxConnectionPool _closeOldConnection")

        ### we could force quite them through postgresql like this - but instead we kill them from inside
        #query = "SELECT * FROM pg_stat_activity WHERE state = 'idle' AND application_name = %s AND query != 'LISTEN wb_new_version' ORDER BY state_change LIMIT 1;"
        #params = [indx_pg2.APPLICATION_NAME]

        return_d = Deferred()

        def err_cb(failure):
            return_d.errback(failure)

        ages = {}
        for conn_str, dbpool in self.connections.items():
            lastused = dbpool.getTime()
            if lastused not in ages:
                ages[lastused] = []
            ages[lastused].append(dbpool)

        times = ages.keys()
        times.sort()

        pool_queue = []
        for timekey in times:
            pools = ages[timekey]
            pool_queue.extend(pools)

        def removed_cb(count):

            if count < REMOVE_AT_ONCE and len(pool_queue) > 0:
                pool = pool_queue.pop(0)
                pool.getFree()
                pool.removeAll(count).addCallbacks(removed_cb, err_cb)
            else:
                return_d.callback(None)
        
        removed_cb(0)
        return return_d

    def _newConnection(self, conn_str):
        """ Makes a new connection to the DB
            and then puts it in the 'free' pool of this conn_str.
        """
        logging.debug("IndxConnectionPool _newConnection")
        # lock with the semaphore before calling this
        return_d = Deferred()

        def close_old_cb(failure):
            failure.trap(psycopg2.OperationalError, Exception)
            # couldn't connect, so close an old connection first
            logging.error("IndxConnectionPool error close_old_cb: {0} - state of conns is: {1}".format(failure.value, self.connections))

            logging.error("IndxConnectionPool connections: {0}".format("\n".join(map(lambda name: self.connections[name].__str__(), self.connections))))

            def closed_cb(empty):
                # closed, so try connecting again
                self._newConnection(conn_str).addCallbacks(return_d.callback, return_d.errback)

            closed_d = self._closeOldConnection()
            closed_d.addCallbacks(closed_cb, return_d.errback)

        try:
            # try to connect
            def connected_cb(connection):
                logging.debug("IndxConnectionPool _newConnection connected_cb, connection: {0}".format(connection))
                self.connections[conn_str].getFree().append(connection)
                return_d.callback(connection)

            conn = txpostgres.Connection()
            connection_d = conn.connect(conn_str)
            connection_d.addCallbacks(connected_cb, close_old_cb)
        except Exception as e:
            # close an old connection first
            logging.debug("IndxConnectionPool Exception, going to call close_old_cb: ({0})".format(e))
            close_old_cb(Failure(e))

        return return_d

    def _newConnections(self, conn_str):
        """ Make a pool of new connections. """
        # lock with the semaphore before calling this
        logging.debug("IndxConnectionPool _newConnections")
        return_d = Deferred()

        self.connections[conn_str] = DBConnectionPool(conn_str)

        try:
            d_list = []
            for i in range(MIN_CONNS):
                connection_d = self._newConnection(conn_str) 
                d_list.append(connection_d)

            dl = DeferredList(d_list)
            dl.addCallbacks(return_d.callback, return_d.errback)

        except Exception as e:
            logging.error("IndxConnectionPool error in _newConnections: {0}".format(e))
            return_d.errback(Failure(e))

        return return_d
Example #10
0
class DBConnectionPool():
    """ A pool of DB connections for a specific connection string / DB. """

    def __init__(self, conn_str):
        self.waiting = []
        self.inuse = []
        self.free = []

        self.semaphore = DeferredSemaphore(1)
        self.updateTime()

    def __unicode__(self):
        return self.__str__()

    def __str__(self):
        return "waiting: {0}, inuse: {1}, free: {2}, semaphore: {3}, lastused: {4}".format(self.waiting, self.inuse, self.free, self.semaphore, self.lastused)

    def updateTime(self):
        self.lastused = time.mktime(time.gmtime()) # epoch time

    def getTime(self):
        return self.lastused

    def getWaiting(self):
        self.updateTime()
        return self.waiting

    def getInuse(self):
        self.updateTime()
        return self.inuse

    def getFree(self):
        self.updateTime()
        return self.free

    def freeConnection(self, conn):
        """ Free a connection from this DBPool. """

        def locked_cb(empty):
            logging.debug("DBConnectionPool locked_cb")
            self.getInuse().remove(conn)
            
            if len(self.getWaiting()) > 0:
                callback = self.getWaiting().pop()
                self.getInuse().append(conn)
                self.semaphore.release()
                callback(conn)
            else: 
                self.getFree().append(conn)
                self.semaphore.release()

        def err_cb(failure):
            failure.trap(Exception)
            logging.error("DBConnectionPool free, err_cb: {0}".format(failure.value))
            self.semaphore.release()

        self.semaphore.acquire().addCallbacks(locked_cb, err_cb)


    def removeAll(self, count):
        """ Remove all free connections (usually because they're old and we're in
            a freeing up period.
        """
        logging.debug("DBConnectionPool removeAll called, count: {0}".format(count))
        return_d = Deferred()
        self.updateTime()

        def err_cb(failure):
            self.semaphore.release()
            return_d.errback(failure)

        def locked_cb(count):
            # immediately close the free connections
            while len(self.free) > 0:
                conn = self.free.pop(0)
                conn.close()
                count += 1

            self.semaphore.release()
            return_d.callback(count)

        self.semaphore.acquire().addCallbacks(lambda s: locked_cb(count), err_cb)
        return return_d
Example #11
0
class BaseQtWebKitMiddleware(object):
    nam_cls = ScrapyNetworkAccessManager

    @classmethod
    def from_crawler(cls, crawler):
        settings = crawler.settings

        if crawler.settings.getbool('QTWEBKIT_COOKIES_ENABLED', False):
            cookies_middleware = CookiesMiddleware(
                crawler.settings.getbool('COOKIES_DEBUG')
            )
        else:
            cookies_middleware = None

        qt_platform = settings.get("QTWEBKIT_QT_PLATFORM", "minimal")
        if qt_platform == "default":
            qt_platform = None

        ext = cls(
            crawler,
            show_window=settings.getbool("QTWEBKIT_SHOW_WINDOW", False),
            qt_platform=qt_platform,
            enable_webkit_dev_tools=settings.get("QTWEBKIT_ENABLE_DEV_TOOLS",
                                                 False),
            page_limit=settings.getint("QTWEBKIT_PAGE_LIMIT", 4),
            cookies_middleware=cookies_middleware
        )

        return ext

    @staticmethod
    def engine_stopped():
        if QApplication.instance():
            QApplication.instance().quit()

    def __init__(self, crawler, show_window=False, qt_platform="minimal",
                 enable_webkit_dev_tools=False, page_limit=4,
                 cookies_middleware=None):
        super(BaseQtWebKitMiddleware, self).__init__()
        self._crawler = crawler
        self.show_window = show_window
        self.qt_platform = qt_platform
        self.enable_webkit_dev_tools = enable_webkit_dev_tools
        if page_limit != 1:
            if QWebSettings is not None:
                QWebSettings.setObjectCacheCapacities(0, 0, 0)
        if page_limit is None:
            self.semaphore = DummySemaphore()
        else:
            self.semaphore = DeferredSemaphore(page_limit)
        self.cookies_middleware = cookies_middleware
        self._references = set()

    @staticmethod
    def _schedule_qt_event_loop(app):
        """

        Schedule a QApplication's event loop within Twisted. Should be called
        at most once per QApplication.

        """
        # XXX: This is ugly but I don't know another way to do it.
        call = LoopingCall(app.processEvents)
        call.start(0.02, False)
        app.aboutToQuit.connect(call.stop)

    def _setup_page(self, page, extra_settings):
        settings = page.settings()
        settings.setAttribute(QWebSettings.JavaEnabled, False)
        settings.setAttribute(QWebSettings.PluginsEnabled, False)
        settings.setAttribute(QWebSettings.PrivateBrowsingEnabled, True)
        settings.setAttribute(QWebSettings.LocalStorageEnabled, True)
        settings.setAttribute(QWebSettings.LocalContentCanAccessRemoteUrls,
                              True)
        settings.setAttribute(QWebSettings.LocalContentCanAccessFileUrls,
                              True)
        settings.setAttribute(QWebSettings.NotificationsEnabled, False)

        settings.setAttribute(QWebSettings.DeveloperExtrasEnabled,
                              self.enable_webkit_dev_tools)

        for setting, value in extra_settings.items():
            settings.setAttribute(setting, value)

    @staticmethod
    def _make_qt_request(scrapy_request):
        """Build a QNetworkRequest from a Scrapy request."""

        qt_request = QNetworkRequest(QUrl(scrapy_request.url))
        for header, values in scrapy_request.headers.items():
            qt_request.setRawHeader(header, b', '.join(values))

        try:
            operation = HTTP_METHOD_TO_QT_OPERATION[scrapy_request.method]
        except KeyError:
            operation = QNetworkAccessManager.CustomOperation
            qt_request.setAttribute(QNetworkRequest.CustomVerbAttribute,
                                    scrapy_request.method)

        qt_request.setAttribute(QNetworkRequest.CacheSaveControlAttribute,
                                False)

        req_body = QByteArray(scrapy_request.body)

        return qt_request, operation, req_body

    @inlineCallbacks
    def process_request(self, request, spider):
        if self.cookies_middleware:
            yield self.cookies_middleware.process_request(request, spider)

        if isinstance(request, QtWebKitRequest):
            if request.webpage:
                # Request is to continue processing with an existing webpage
                # object.
                webpage = request.webpage
                request = request.replace(webpage=None)
                webpage.networkAccessManager().request = request
                returnValue(self._handle_page_request(spider, request,
                                                      webpage))
            else:
                yield self.semaphore.acquire()
                response = yield self.create_page(request, spider)
                returnValue(response)

    def process_response(self, request, response, spider):
        if self.cookies_middleware:
            return self.cookies_middleware.process_response(request, response,
                                                            spider)
        else:
            return response

    def ensure_qapplication(self):
        """Create and setup a QApplication if one does not already exist."""
        if not QApplication.instance():
            args = ["scrapy"]
            if self.qt_platform is not None:
                args.extend(["-platform", self.qt_platform])
            app = QApplication(args)
            self._schedule_qt_event_loop(app)
            _QApplicationStopper(self._crawler.signals, app)

    def create_page(self, request, spider):
        """

        Create a webpage object, load a request on it, return a deferred that
        fires with a response on page load.

        """

        self.ensure_qapplication()

        webpage = WebPage()
        self._setup_page(webpage,
                         request.meta.get('qwebsettings_settings', {}))
        self._references.add(webpage)

        if self.show_window:
            webview = QWebView()
            webview.setPage(webpage)
            webpage.webview = webview
            self._add_webview_to_window(webview, spider.name)

        if request.meta.get('qtwebkit_user_agent', False):
            request.headers['User-Agent'] = webpage.userAgentForUrl(
                QUrl(request.url)
            )

        nam = self.nam_cls(spider, request, request.headers.get('User-Agent'),
                           parent=webpage)
        if ((self.cookies_middleware and
             'dont_merge_cookies' not in request.meta)):
            cookiejarkey = request.meta.get("cookiejar")
            cookiejar = ScrapyAwareCookieJar(self.cookies_middleware,
                                             cookiejarkey, parent=nam)
            nam.setCookieJar(cookiejar)
        webpage.setNetworkAccessManager(nam)

        d = deferred_for_signal(webpage.load_finished_with_error)
        d.addCallback(partial(self._handle_page_request, spider, request,
                              webpage))
        webpage.mainFrame().load(*self._make_qt_request(request))
        return d

    def _add_webview_to_window(self, webview, title=""):
        pass

    def _remove_webview_from_window(self, webview):
        pass

    def _handle_page_request(self, spider, request, webpage,
                             load_result=(True, None)):
        """

        Handle a request for a web page, either a page load or a request to
        continue using an existing page object.

        """

        try:
            ok, error = load_result

            if ok:
                # The error object is not available if a page load was not
                # requested.
                if error and error.domain == QWebPage.Http:
                    status = error.error
                else:
                    status = 200
                if error:
                    url = error.url
                else:
                    url = webpage.mainFrame().url()

                qwebpage_response = request.meta.get('qwebpage_response', False)
                if qwebpage_response:
                    respcls = QtWebKitResponse
                else:
                    respcls = HtmlResponse

                response = respcls(status=status,
                                   url=url.toString(),
                                   headers=error.headers,
                                   body=webpage.mainFrame().toHtml(),
                                   encoding='utf-8',
                                   request=request)

                if qwebpage_response:
                    response.webpage = webpage
                    request.callback = partial(self._request_callback, spider,
                                               request.callback or 'parse')
                else:
                    self._close_page(webpage)

            else:
                raise self._exception_from_errorpageextensionoption(error)

        except Exception as err:
            response = Failure(err)

        return response

    @inlineCallbacks
    def _request_callback(self, spider, original_callback, response):
        """

        Close the page (lose the reference to it so it is garbage collected)
        when the callback returns.

        The original callback may prevent page closing by setting the
        should_close_webpage attribute in responses. This is useful for
        example if the page is stored somewhere else (e.g. request meta) to be
        used later. The page then needs to be closed manually at some point by
        calling its close_page() function, which is created here.

        """

        if isinstance(original_callback, basestring):
            original_callback = getattr(spider, original_callback)

        webpage = response.webpage
        response.should_close_webpage = True
        try:
            returnValue(arg_to_iter((yield maybeDeferred(original_callback,
                                                         response))))
        finally:
            # FIXME: sometimes this section is reached before the wrapped
            # callback finishes, when it returns a Deferred.
            if response.should_close_webpage:
                self._close_page(webpage)
            else:
                webpage.close_page = partial(self._close_page, webpage)
                webpage.close_page.__doc__ = ("Lose the reference to the "
                                              "webpage object and allow it "
                                              "to be garbage collected.")

    def _close_page(self, webpage):
        self._references.remove(webpage)
        # Resetting the main frame URL prevents it from making any more
        # requests, which would cause Qt errors after the webpage is deleted.
        webpage.mainFrame().setUrl(QUrl())
        if webpage.webview is not None:
            self._remove_webview_from_window(webpage.webview)
        self.semaphore.release()

    _qt_error_exc_mapping = {
        QNetworkReply.ConnectionRefusedError: ConnectionRefusedError,
        QNetworkReply.RemoteHostClosedError: ConnectionLost,
        QNetworkReply.HostNotFoundError: DNSLookupError,
        QNetworkReply.TimeoutError: TimeoutError,
        QNetworkReply.OperationCanceledError: ConnectingCancelledError,
        QNetworkReply.SslHandshakeFailedError: SSLError,
        QNetworkReply.ProtocolUnknownError: NotSupported
    }

    def _exception_from_errorpageextensionoption(self, option):
        if option.domain == QWebPage.QtNetwork:
            exc_cls = self._qt_error_exc_mapping.get(option.error,
                                                     ConnectError)
        # elif option.domain == QWebPage.WebKit:
        #     exc_cls = Exception
        else:
            exc_cls = Exception

        return exc_cls(option.errorString)