Beispiel #1
0
    def put_visited(self, task, changed):
        """Put a task for a visited directory.

        If the directory is visited for the first time the `changed` argument
        will be ignored.  If the directory was previously visited the `changed`
        argument should be `True` if the directory changed, `False` otherwise.
        This information will be used to estimate the change frequency.  In
        both cases the `TaskQueue` will schedule a task to revisit the
        directory.
        """
        self._mutex.acquire()
        try:
            site_id = task.site_id
            site_info = self._sites_info[site_id]
            task.report_visit(changed)
            if task.revisit_count == 0:
                # First visit.  Set default values.
                task.revisit_wait = site_info['default_revisit_wait']
                logging.info('Setting revisit frequency for "%s" to %s'
                             % (task.url, secs_to_readable(task.revisit_wait)))
            else:
                if task.revisit_count >= self._revisits:
                    minimum = site_info['min_revisit_wait']
                    maximum = site_info['max_revisit_wait']
                    estimated = self._estimate_revisit_wait(task)
                    task.revisit_wait = min(maximum, max(minimum, estimated))
                    task.reset_change_count()
                    logging.info('Changing revisit frequency for "%s" to %s'
                                 % (task.url, secs_to_readable(task.revisit_wait)))
                else:
                    logging.info('Missing %s visits to "%s" before estimating change frequency.'
                                 % (self._revisits - task.revisit_count,task.url))
            self._put(task, task.revisit_wait)
        finally:
            self._mutex.release()
Beispiel #2
0
 def test_secs_to_readable(self):
     values = {
         1:
         '1 second',
         60:
         '1 minute',
         60 * 60:
         '1 hour',
         60 * 60 * 24:
         '1 day',
         2:
         '2 seconds',
         60 * 2:
         '2 minutes',
         60 * 60 * 2:
         '2 hours',
         60 * 60 * 24 * 2:
         '2 days',
         60 + 1:
         '1 minute and 1 second',
         60 * 60 + 60 + 1:
         '1 hour, 1 minute and 1 second',
         60 * 60 * 24 + 60 * 60 + 60 + 1:
         '1 day, 1 hour, 1 minute and 1 second',
     }
     for secs, readable in values.iteritems():
         self.assertEquals(secs_to_readable(secs), readable)
Beispiel #3
0
    def put_visited(self, task, changed):
        """Put a task for a visited directory.

        If the directory is visited for the first time the `changed` argument
        will be ignored.  If the directory was previously visited the `changed`
        argument should be `True` if the directory changed, `False` otherwise.
        This information will be used to estimate the change frequency.  In
        both cases the `TaskQueue` will schedule a task to revisit the
        directory.
        """
        self._mutex.acquire()
        try:
            site_id = task.site_id
            site_info = self._sites_info[site_id]
            task.report_visit(changed)
            if task.revisit_count == 0:
                # First visit.  Set default values.
                task.revisit_wait = site_info['default_revisit_wait']
                logging.info('Setting revisit frequency for "%s" to %s' %
                             (task.url, secs_to_readable(task.revisit_wait)))
            else:
                if task.revisit_count >= self._revisits:
                    minimum = site_info['min_revisit_wait']
                    maximum = site_info['max_revisit_wait']
                    estimated = self._estimate_revisit_wait(task)
                    task.revisit_wait = min(maximum, max(minimum, estimated))
                    task.reset_change_count()
                    logging.info(
                        'Changing revisit frequency for "%s" to %s' %
                        (task.url, secs_to_readable(task.revisit_wait)))
                else:
                    logging.info(
                        'Missing %s visits to "%s" before estimating change frequency.'
                        % (self._revisits - task.revisit_count, task.url))
            self._put(task, task.revisit_wait)
        finally:
            self._mutex.release()
Beispiel #4
0
 def test_secs_to_readable(self):
     values = {
         1: '1 second',
         60: '1 minute',
         60 * 60: '1 hour',
         60 * 60 * 24: '1 day',
         2: '2 seconds',
         60 * 2: '2 minutes',
         60 * 60 * 2: '2 hours',
         60 * 60 * 24 * 2: '2 days',
         60 + 1: '1 minute and 1 second',
         60 * 60 + 60 + 1: '1 hour, 1 minute and 1 second',
         60 * 60 * 24 + 60 * 60 + 60 + 1:
             '1 day, 1 hour, 1 minute and 1 second',
     }
     for secs, readable in values.iteritems():
         self.assertEquals(secs_to_readable(secs), readable)