Python exception Examples

Programming Language: Python

Namespace/Package Name: downpour.logger

Method/Function: exception

Examples at hotexamples.com: 3

Python exception - 3 examples found. These are the top rated real world Python examples of downpour.logger.exception extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

    def pop(self, polite=True):
        '''Get the next request'''
        while True:

            # First, we pop the next thing in pldQueue *if* it's not a
            # premature fetch (and a race condition is not detected).
            with self.pld_lock:
                # Get the next plds we might want to fetch from
                next, when = self.pldQueue.peek(withscores=True)
                if not next:
                    # logger.debug('Nothing in pldQueue.')
                    return None
                # If the next-fetchable is too soon, wait. If we're
                # already waiting, don't schedule a double callLater.
                now = time.time()
                if polite and when > now:
                    with self.twi_lock:
                        if not (self.timer and self.timer.active()):
                            logger.debug('Waiting %f seconds on %s' %
                                         (when - now, next))
                            self.timer = reactor.callLater(
                                when - now, self.serveNext)
                    return None
                # If we get here, we don't need to wait. However, the
                # multithreaded nature of Twisted means that something
                # else might be waiting. Only clear timer if it's not
                # holding some other pending call.
                with self.twi_lock:
                    if not (self.timer and self.timer.active()):
                        self.timer = None
                # We know the time has passed (we peeked) so pop it.
                next = self.pldQueue.pop()

            # Get the queue pertaining to the PLD of interest and
            # acquire a request lock for it.
            q = qr.Queue(next)
            with self.req_lock:
                if len(q):
                    # If we've already saturated our parallel requests, then we'll
                    # wait some short amount of time before we make our next request.
                    # There is logic elsewhere so that if one of these requests
                    # completes before this small amount of time elapses, then it
                    # will be advanced accordingly.
                    if Counter.len(self.r, next) >= self.maxParallelRequests:
                        logger.debug('maxParallelRequests exceeded for %s' %
                                     next)
                        with self.pld_lock:
                            self.pldQueue.push_unique(next, time.time() + 20)
                        continue
                    # If the robots for this particular request is not fetched
                    # or it's expired, then we'll have to make a request for it
                    v = q.peek()
                    domain = urlparse.urlparse(v.url).netloc
                    robot = reppy.findRobot('http://' + domain)
                    if not self.allowAll and (not robot or robot.expired):
                        logger.debug('Making robots request for %s' % next)
                        r = RobotsRequest('http://' + domain + '/robots.txt')
                        r._originalKey = next
                        # Increment the number of requests we currently have in flight
                        Counter.put(self.r, r)
                        return r
                    else:
                        logger.debug('Popping next request from %s' % next)
                        v = q.pop()
                        # This was the source of a rather difficult-to-track bug
                        # wherein the pld queue would slowly drain, despite there
                        # being plenty of logical queues to draw from. The problem
                        # was introduced by calling urlparse.urljoin when invoking
                        # the request's onURL method. As a result, certain redirects
                        # were making changes to the url, saving it as an updated
                        # value, but we'd then try to pop off the queue for the new
                        # hostname, when in reality, we should pop off the queue
                        # for the original hostname.
                        v._originalKey = next
                        # Increment the number of requests we currently have in flight
                        Counter.put(self.r, v)
                        # At this point, we should also schedule the next request
                        # to this domain.
                        with self.pld_lock:
                            self.pldQueue.push_unique(
                                next,
                                time.time() + self.crawlDelay(v))
                        return v
                else:
                    try:
                        if Counter.len(self.r, next) == 0:
                            logger.debug('Calling onEmptyQueue for %s' % next)
                            self.onEmptyQueue(next)
                            try:
                                with self.pld_lock:
                                    self.pldQueue.clear_ph(next)
                            except ValueError:
                                logger.error(
                                    'pldQueue.clear_ph failed for %s' % next)
                        else:
                            # Otherwise, we should try again in a little bit, and
                            # see if the last request has finished.
                            with self.pld_lock:
                                self.pldQueue.push_unique(
                                    next,
                                    time.time() + 20)
                            logger.debug(
                                'Requests still in flight for %s. Waiting' %
                                next)
                    except Exception:
                        logger.exception('onEmptyQueue failed for %s' % next)
                    continue

        logger.debug('Returning None (should not happen).')
        return None

Example #2

Show file

File: PoliteFetcher.py Project: seomoz/downpour

    def pop(self, polite=True):
        '''Get the next request'''
        while True:

            # First, we pop the next thing in pldQueue *if* it's not a
            # premature fetch (and a race condition is not detected).
            with self.pld_lock:
                # Get the next plds we might want to fetch from
                next, when = self.pldQueue.peek(withscores=True)
                if not next:
                    # logger.debug('Nothing in pldQueue.')
                    return None
                # If the next-fetchable is too soon, wait. If we're
                # already waiting, don't schedule a double callLater.
                now = time.time()
                if polite and when > now:
                    with self.twi_lock:
                        if not (self.timer and self.timer.active()):
                            logger.debug('Waiting %f seconds on %s' % (when - now, next))
                            self.timer = reactor.callLater(when - now, self.serveNext)
                    return None
                # If we get here, we don't need to wait. However, the
                # multithreaded nature of Twisted means that something
                # else might be waiting. Only clear timer if it's not
                # holding some other pending call.
                with self.twi_lock:
                    if not (self.timer and self.timer.active()):
                        self.timer = None
                # We know the time has passed (we peeked) so pop it.
                next = self.pldQueue.pop()

            # Get the queue pertaining to the PLD of interest and
            # acquire a request lock for it.
            q = qr.Queue(next)
            with self.req_lock:
                if len(q):
                    # If we've already saturated our parallel requests, then we'll
                    # wait some short amount of time before we make our next request.
                    # There is logic elsewhere so that if one of these requests
                    # completes before this small amount of time elapses, then it
                    # will be advanced accordingly.
                    if Counter.len(self.r, next) >= self.maxParallelRequests:
                        logger.debug('maxParallelRequests exceeded for %s' % next)
                        with self.pld_lock:
                            self.pldQueue.push_unique(next, time.time() + 20)
                        continue
                    # If the robots for this particular request is not fetched
                    # or it's expired, then we'll have to make a request for it
                    v = q.peek()
                    domain = urlparse.urlparse(v.url).netloc
                    robot = reppy.findRobot('http://' + domain)
                    if not self.allowAll and (not robot or robot.expired):
                        logger.debug('Making robots request for %s' % next)
                        r = RobotsRequest('http://' + domain + '/robots.txt')
                        r._originalKey = next
                        # Increment the number of requests we currently have in flight
                        Counter.put(self.r, r)
                        return r
                    else:
                        logger.debug('Popping next request from %s' % next)
                        v = q.pop()
                        # This was the source of a rather difficult-to-track bug
                        # wherein the pld queue would slowly drain, despite there
                        # being plenty of logical queues to draw from. The problem
                        # was introduced by calling urlparse.urljoin when invoking
                        # the request's onURL method. As a result, certain redirects
                        # were making changes to the url, saving it as an updated
                        # value, but we'd then try to pop off the queue for the new
                        # hostname, when in reality, we should pop off the queue
                        # for the original hostname.
                        v._originalKey = next
                        # Increment the number of requests we currently have in flight
                        Counter.put(self.r, v)
                        # At this point, we should also schedule the next request
                        # to this domain.
                        with self.pld_lock:
                            self.pldQueue.push_unique(next, time.time() + self.crawlDelay(v))
                        return v
                else:
                    try:
                        if Counter.len(self.r, next) == 0:
                            logger.debug('Calling onEmptyQueue for %s' % next)
                            self.onEmptyQueue(next)
                            try:
                                with self.pld_lock:
                                    self.pldQueue.clear_ph(next)
                            except ValueError:
                                logger.error('pldQueue.clear_ph failed for %s' % next)
                        else:
                            # Otherwise, we should try again in a little bit, and
                            # see if the last request has finished.
                            with self.pld_lock:
                                self.pldQueue.push_unique(next, time.time() + 20)
                            logger.debug('Requests still in flight for %s. Waiting' % next)
                    except Exception:
                        logger.exception('onEmptyQueue failed for %s' % next)
                    continue

        logger.debug('Returning None (should not happen).')
        return None

Example #3

Show file

File: PoliteFetcher.py Project: mt3/downpour

 def pop(self, polite=True):
     '''Get the next request'''
     now = time.time()
     while True:
         # Get the next plds we might want to fetch from
         next, when = self.pldQueue.peek(withscores=True)
         if not next:
             return None
         # If the next-fetchable is not soon enough, then wait
         if polite and when > now:
             with self.tlock:
                 if not (self.timer and self.timer.active()):
                     logger.debug('Waiting %f seconds on %s' % (when - now, next))
                     self.timer = reactor.callLater(when - now, self.serveNext)
                 return None
         else:
             # Go ahead and pop this item
             last = next
             next = self.pldQueue.pop()
             # Unset the timer
             self.timer = None
             q = qr.Queue(next)
             
             with self.lock:
                 if len(q):
                     # If we've already saturated our parallel requests, then we'll
                     # wait some short amount of time before we make our next request.
                     # There is logic elsewhere so that if one of these requests 
                     # completes before this small amount of time elapses, then it
                     # will be advanced accordingly.
                     if Counter.len(self.r, next) >= self.maxParallelRequests:
                         self.pldQueue.push(next, time.time() + 20)
                         continue
                     
                     # If the robots for this particular request is not fetched
                     # or it's expired, then we'll have to make a request for it
                     v = q.peek()
                     domain = urlparse.urlparse(v.url).netloc
                     robot = reppy.findRobot('http://' + domain)
                     if not self.allowAll and (not robot or robot.expired):
                         logger.debug('Making robots request for %s' % next)
                         r = RobotsRequest('http://' + domain + '/robots.txt')
                         r._originalKey = next
                         # Increment the number of requests we currently have in flight
                         Counter.put(self.r, r)
                         return r
                     else:
                         logger.debug('Popping next request from %s' % next)
                         v = q.pop()
                         # This was the source of a rather difficult-to-track bug
                         # wherein the pld queue would slowly drain, despite there
                         # being plenty of logical queues to draw from. The problem
                         # was introduced by calling urlparse.urljoin when invoking
                         # the request's onURL method. As a result, certain redirects
                         # were making changes to the url, saving it as an updated
                         # value, but we'd then try to pop off the queue for the new
                         # hostname, when in reality, we should pop off the queue 
                         # for the original hostname.
                         v._originalKey = next
                         # Increment the number of requests we currently have in flight
                         Counter.put(self.r, v)
                         # At this point, we should also schedule the next request
                         # to this domain.
                         self.pldQueue.push(next, time.time() + self.crawlDelay(v))
                         return v
                 else:
                     try:
                         if Counter.len(self.r, next) == 0:
                             logger.debug('Calling onEmptyQueue for %s' % next)
                             self.onEmptyQueue(next)
                         else:
                             # Otherwise, we should try again in a little bit, and 
                             # see if the last request has finished.
                             self.pldQueue.push(next, time.time() + 20)
                             logger.debug('Requests still in flight for %s. Waiting' % next)
                     except Exception:
                         logger.exception('onEmptyQueue failed for %s' % next)
                     continue
     return None