Example #1
0
    def test_broken_500_handler_with_middleware(self):
        with Settings(BREAK_THAT_500=True,
                      INSTALLED_APPS=['raven.contrib.django']):
            client = TestClient(REMOTE_ADDR='127.0.0.1')
            client.handler = MockSentryMiddleware(MockClientHandler())

            self.assertRaises(Exception, client.get,
                              reverse('sentry-raise-exc'))

            assert len(self.raven.events) == 2
            event = self.raven.events.pop(0)

            self.assertTrue('sentry.interfaces.Exception' in event)
            exc = event['sentry.interfaces.Exception']
            self.assertEquals(exc['type'], 'Exception')
            self.assertEquals(exc['value'], 'view exception')
            self.assertEquals(event['level'], logging.ERROR)
            self.assertEquals(event['message'], 'Exception: view exception')
            self.assertEquals(event['culprit'],
                              'tests.contrib.django.views in raise_exc')

            event = self.raven.events.pop(0)

            self.assertTrue('sentry.interfaces.Exception' in event)
            exc = event['sentry.interfaces.Exception']
            self.assertEquals(exc['type'], 'ValueError')
            self.assertEquals(exc['value'], 'handler500')
            self.assertEquals(event['level'], logging.ERROR)
            self.assertEquals(event['message'], 'ValueError: handler500')
            self.assertEquals(event['culprit'],
                              'tests.contrib.django.urls in handler500')
Example #2
0
    def test_broken_500_handler_with_middleware(self):
        with Settings(BREAK_THAT_500=True):
            client = TestClient(REMOTE_ADDR='127.0.0.1')
            client.handler = MockSentryMiddleware(MockClientHandler())

            self.assertRaises(Exception, client.get,
                              reverse('sentry-raise-exc'))

            self.assertEquals(len(self.raven.events), 2)
            event = self.raven.events.pop(0)

            self.assertTrue('sentry.interfaces.Exception' in event)
            exc = event['sentry.interfaces.Exception']
            self.assertEquals(exc['type'], 'Exception')
            self.assertEquals(exc['value'], 'view exception')
            self.assertEquals(event['level'], logging.ERROR)
            self.assertEquals(event['message'], 'Exception: view exception')
            self.assertEquals(event['culprit'],
                              'tests.contrib.django.views.raise_exc')

            event = self.raven.events.pop(0)

            self.assertTrue('sentry.interfaces.Exception' in event)
            exc = event['sentry.interfaces.Exception']
            self.assertEquals(exc['type'], 'ValueError')
            self.assertEquals(exc['value'], 'handler500')
            self.assertEquals(event['level'], logging.ERROR)
            self.assertEquals(event['message'], 'ValueError: handler500')
            self.assertEquals(event['culprit'],
                              'tests.contrib.django.urls.handler500')
Example #3
0
    def test_broken_500_handler_with_middleware(self):
        with Settings(BREAK_THAT_500=True):
            client = TestClient(REMOTE_ADDR="127.0.0.1")
            client.handler = MockSentryMiddleware(MockClientHandler())

            self.assertRaises(Exception, client.get, reverse("sentry-raise-exc"))

            self.assertEquals(len(self.raven.events), 2)
            event = self.raven.events.pop(0)

            self.assertTrue("sentry.interfaces.Exception" in event)
            exc = event["sentry.interfaces.Exception"]
            self.assertEquals(exc["type"], "Exception")
            self.assertEquals(exc["value"], "view exception")
            self.assertEquals(event["level"], logging.ERROR)
            self.assertEquals(event["message"], "Exception: view exception")
            self.assertEquals(event["culprit"], "tests.contrib.django.views.raise_exc")

            event = self.raven.events.pop(0)

            self.assertTrue("sentry.interfaces.Exception" in event)
            exc = event["sentry.interfaces.Exception"]
            self.assertEquals(exc["type"], "ValueError")
            self.assertEquals(exc["value"], "handler500")
            self.assertEquals(event["level"], logging.ERROR)
            self.assertEquals(event["message"], "ValueError: handler500")
            self.assertEquals(event["culprit"], "tests.contrib.django.urls.handler500")
Example #4
0
    def test_broken_500_handler_with_middleware(self):
        with self.settings(BREAK_THAT_500=True):
            client = TestClient(REMOTE_ADDR='127.0.0.1')
            client.handler = MockOpbeatMiddleware(MockClientHandler())

            self.assertRaises(Exception, client.get, reverse('opbeat-raise-exc'))

            self.assertEquals(len(self.opbeat.events), 2)
            event = self.opbeat.events.pop(0)

            self.assertTrue('exception' in event)
            exc = event['exception']
            self.assertEquals(exc['type'], 'Exception')
            self.assertEquals(exc['value'], 'view exception')
            self.assertEquals(event['level'], 'error')
            self.assertEquals(event['message'], 'Exception: view exception')
            self.assertEquals(event['culprit'], 'tests.contrib.django.testapp.views.raise_exc')

            event = self.opbeat.events.pop(0)

            self.assertTrue('exception' in event)
            exc = event['exception']
            self.assertEquals(exc['type'], 'ValueError')
            self.assertEquals(exc['value'], 'handler500')
            self.assertEquals(event['level'], 'error')
            self.assertEquals(event['message'], 'ValueError: handler500')
            self.assertEquals(event['culprit'], 'tests.contrib.django.testapp.urls.handler500')
Example #5
0
    def test_broken_500_handler_with_middleware(self):
        with Settings(BREAK_THAT_500=True,
                      INSTALLED_APPS=['raven.contrib.django']):
            client = DjangoTestClient(REMOTE_ADDR='127.0.0.1')
            client.handler = MockSentryMiddleware(MockClientHandler())

            self.assertRaises(Exception, client.get,
                              reverse('sentry-raise-exc'))
            assert len(self.raven.events
                       ) == 2 or 4  # TODO: ash remove duplicate client events
            event = self.raven.events.pop(0)

            assert 'exception' in event
            exc = event['exception']['values'][-1]
            assert exc['type'] == 'Exception'
            assert exc['value'] == 'view exception'
            assert event['level'] == logging.ERROR
            assert event['message'] == 'Exception: view exception'

            event = self.raven.events.pop(0)

            assert 'exception' in event
            exc = event['exception']['values'][-1]
            assert exc['type'] == 'ValueError'
            assert exc['value'] == 'handler500'
            assert event['level'] == logging.ERROR
            assert event['message'] == 'ValueError: handler500'
Example #6
0
    def test_broken_500_handler_with_middleware(self):
        with Settings(BREAK_THAT_500=True, INSTALLED_APPS=['raven.contrib.django']):
            client = TestClient(REMOTE_ADDR='127.0.0.1')
            client.handler = MockSentryMiddleware(MockClientHandler())

            self.assertRaises(Exception, client.get, reverse('sentry-raise-exc'))

            assert len(self.raven.events) == 2
            event = self.raven.events.pop(0)

            assert 'exception' in event
            exc = event['exception']['values'][0]
            self.assertEquals(exc['type'], 'Exception')
            self.assertEquals(exc['value'], 'view exception')
            self.assertEquals(event['level'], logging.ERROR)
            self.assertEquals(event['message'], 'Exception: view exception')
            self.assertEquals(event['culprit'], 'tests.contrib.django.views in raise_exc')

            event = self.raven.events.pop(0)

            assert 'exception' in event
            exc = event['exception']['values'][0]
            self.assertEquals(exc['type'], 'ValueError')
            self.assertEquals(exc['value'], 'handler500')
            self.assertEquals(event['level'], logging.ERROR)
            self.assertEquals(event['message'], 'ValueError: handler500')
            self.assertEquals(event['culprit'], 'tests.contrib.django.urls in handler500')
Example #7
0
    def test_broken_500_handler_with_middleware(self):
        with Settings(BREAK_THAT_500=True, INSTALLED_APPS=['raven.contrib.django']):
            client = DjangoTestClient(REMOTE_ADDR='127.0.0.1')
            client.handler = MockSentryMiddleware(MockClientHandler())

            self.assertRaises(Exception, client.get, reverse('sentry-raise-exc'))

            assert len(self.raven.events) == 2
            event = self.raven.events.pop(0)

            assert 'exception' in event
            exc = event['exception']['values'][-1]
            assert exc['type'] == 'Exception'
            assert exc['value'] == 'view exception'
            assert event['level'] == logging.ERROR
            assert event['message'] == 'Exception: view exception'

            event = self.raven.events.pop(0)

            assert 'exception' in event
            exc = event['exception']['values'][-1]
            assert exc['type'] == 'ValueError'
            assert exc['value'] == 'handler500'
            assert event['level'] == logging.ERROR
            assert event['message'] == 'ValueError: handler500'
Example #8
0
def client():
    """A Django test client instance."""
    from django.test.client import Client

    client_ = Client()
    client_.handler = TestClienHandler()
    return client_
Example #9
0
    def test_broken_500_handler_with_middleware(self):
        with Settings(BREAK_THAT_500=True, INSTALLED_APPS=["raven.contrib.django"]):
            client = TestClient(REMOTE_ADDR="127.0.0.1")
            client.handler = MockSentryMiddleware(MockClientHandler())

            self.assertRaises(Exception, client.get, reverse("sentry-raise-exc"))

            assert len(self.raven.events) == 2
            event = self.raven.events.pop(0)

            assert "exception" in event
            exc = event["exception"]["values"][0]
            assert exc["type"] == "Exception"
            assert exc["value"] == "view exception"
            assert event["level"] == logging.ERROR
            assert event["message"] == "Exception: view exception"
            assert event["culprit"] == "tests.contrib.django.views in raise_exc"

            event = self.raven.events.pop(0)

            assert "exception" in event
            exc = event["exception"]["values"][0]
            assert exc["type"] == "ValueError"
            assert exc["value"] == "handler500"
            assert event["level"] == logging.ERROR
            assert event["message"] == "ValueError: handler500"
            assert event["culprit"] == "tests.contrib.django.urls in handler500"
Example #10
0
def admin_client(db, admin_user):
    """A Django test client logged in as an admin user."""
    from django.test.client import Client

    client_ = Client()
    client_.handler = TestClienHandler()
    client_.login(username=admin_user.username, password='******')
    return client_
Example #11
0
    def test_broken_500_handler_with_middleware(self):
        with Settings(BREAK_THAT_500=True):
            client = TestClient(REMOTE_ADDR='127.0.0.1')
            client.handler = MockSentryMiddleware(MockClientHandler())

            self.assertRaises(Exception, client.get, reverse('sentry-raise-exc'))

            self.assertEquals(len(self.raven.events), 2)
            event = self.raven.events.pop(0)

            self.assertEquals(event['class_name'], 'Exception')
            self.assertEquals(event['level'], logging.ERROR)
            self.assertEquals(event['message'], 'view exception')
            self.assertEquals(event['view'], 'tests.contrib.django.views.raise_exc')

            event = self.raven.events.pop(0)

            self.assertEquals(event['class_name'], 'ValueError')
            self.assertEquals(event['level'], logging.ERROR)
            self.assertEquals(event['message'], 'handler500')
            self.assertEquals(event['view'], 'tests.contrib.django.urls.handler500')
Example #12
0
    def _check_internal(self, tested_url):

        from linkcheck.utils import LinkCheckHandler

        if not(tested_url):
            self.message = 'Empty link'

        elif tested_url.startswith('mailto:'):
            self.status = None
            self.message = 'Email link (not automatically checked)'

        elif tested_url.startswith('#'):
            self.status = None
            self.message = 'Link to within the same page (not automatically checked)'

        elif tested_url.startswith(MEDIA_PREFIX):
            # TODO Assumes a direct mapping from media url to local filesystem path. This will break quite easily for alternate setups
            path = settings.MEDIA_ROOT + urlunquote(tested_url)[len(MEDIA_PREFIX)-1:]
            decoded_path = html_decode(path)
            if os.path.exists(path) or os.path.exists(decoded_path):
                self.message = 'Working file link'
                self.status = True
            else:
                self.message = 'Missing Document'

        elif getattr(self, '_internal_hash', False) and getattr(self, '_instance', None):
            # This is a hash link pointing to itself
            from linkcheck import parse_anchors

            hash = self._internal_hash
            instance = self._instance
            if hash == '#': # special case, point to #
                self.message = 'Working internal hash anchor'
                self.status = True
            else:
                hash = hash[1:] #'#something' => 'something'
                html_content = ''
                for field in instance._linklist.html_fields:
                    html_content += getattr(instance, field, '')
                names = parse_anchors(html_content)
                if hash in names:
                    self.message = 'Working internal hash anchor'
                    self.status = True
                else:
                    self.message = 'Broken internal hash anchor'

        elif tested_url.startswith('/'):
            old_prepend_setting = settings.PREPEND_WWW
            settings.PREPEND_WWW = False
            c = Client()
            c.handler = LinkCheckHandler()
            response = c.get(tested_url)
            if USE_REVERSION:
                # using test client will clear the RevisionContextManager stack.
                revision_context_manager.start()

            if response.status_code == 200:
                self.message = 'Working internal link'
                self.status = True
                # see if the internal link points an anchor
                if tested_url[-1] == '#': # special case, point to #
                    self.message = 'Working internal hash anchor'
                elif tested_url.count('#'):
                    anchor = tested_url.split('#')[1]
                    from linkcheck import parse_anchors
                    names = parse_anchors(response.content)
                    if anchor in names:
                        self.message = 'Working internal hash anchor'
                        self.status = True
                    else:
                        self.message = 'Broken internal hash anchor'
                        self.status = False

            elif response.status_code == 302 or response.status_code == 301:
                self.status = None
                self.message = 'This link redirects: code %d (not automatically checked)' % (response.status_code, )
            else:
                self.message = 'Broken internal link'
            settings.PREPEND_WWW = old_prepend_setting
        else:
            self.message = 'Invalid URL'

        self.last_checked = now()
        self.save()
    def check_url(self, check_internal=True, check_external=True, external_recheck_interval=EXTERNAL_RECHECK_INTERVAL):

        from linkcheck.utils import LinkCheckHandler
        external_recheck_datetime = now() - timedelta(minutes=external_recheck_interval)
        self.status  = False

        # Remove current domain from URLs as the test client chokes when trying to test them during a page save
        # They shouldn't generally exist but occasionally slip through
        # If settings.SITE_DOMAINS isn't set then use settings.SITE_DOMAIN
        # but also check for variants: example.org, www.example.org, test.example.org

        original_url = None # used to restore the original url afterwards

        if SITE_DOMAINS: #if the setting is present
            internal_exceptions = SITE_DOMAINS

        else: # try using SITE_DOMAIN
            root_domain = settings.SITE_DOMAIN
            if root_domain.startswith('www.'):
                root_domain = root_domain[4:]
            elif root_domain.startswith('test.'):
                root_domain = root_domain[5:]
            internal_exceptions = ['http://'+root_domain, 'http://www.'+root_domain, 'http://test.'+root_domain]

        for ex in internal_exceptions:
            if ex and self.url.startswith(ex):
                original_url = self.url
                self.url = self.url.replace(ex, '', 1)

        if check_internal and (not self.external):
            if not(self.url):
                self.message = 'Empty link'

            elif self.url.startswith('mailto:'):
                self.status = None
                self.message = 'Email link (not automatically checked)'

            elif self.url.startswith('#'):
                self.status = None
                self.message = 'Link to within the same page (not automatically checked)'

            elif self.url.startswith(MEDIA_PREFIX):
                #TODO Assumes a direct mapping from media url to local filesystem path. This will break quite easily for alternate setups
                path = settings.MEDIA_ROOT + self.url_unquoted()[len(MEDIA_PREFIX)-1:]
                decoded_path = html_decode(path)
                if os.path.exists(path) or os.path.exists(decoded_path):
                    self.message = 'Working file link'
                    self.status = True
                else:
                    self.message = 'Missing Document'

            elif getattr(self, '_internal_hash', False) and getattr(self, '_instance', None):
                # This is a hash link pointing to itself
                from linkcheck import parse_anchors

                hash = self._internal_hash
                instance = self._instance
                if hash == '#': # special case, point to #
                    self.message = 'Working internal hash anchor'
                    self.status = True
                else:
                    hash = hash[1:] #'#something' => 'something'
                    html_content = ''
                    for field in instance._linklist.html_fields:
                        html_content += getattr(instance, field, '')
                    names = parse_anchors(html_content)
                    if hash in names:
                        self.message = 'Working internal hash anchor'
                        self.status = True
                    else:
                        self.message = 'Broken internal hash anchor'

            elif self.url.startswith('/'):
                old_prepend_setting = settings.PREPEND_WWW
                settings.PREPEND_WWW = False
                c = Client()
                c.handler = LinkCheckHandler()
                response = c.get(self.url, follow=True)
                #using test client will clear the RevisionContextManager stack. 
                from reversion.revisions import revision_context_manager
                revision_context_manager.start()

                if response.status_code == 200:
                    self.message = 'Working internal link'
                    self.status = True
                    # see if the internal link points an anchor
                    if self.url[-1] == '#': # special case, point to #
                        self.message = 'Working internal hash anchor'
                    elif self.url.count('#'):
                        anchor = self.url.split('#')[1]
                        from linkcheck import parse_anchors
                        names = parse_anchors(response.content)
                        if anchor in names:
                            self.message = 'Working internal hash anchor'
                            self.status = True
                        else:
                            self.message = 'Broken internal hash anchor'
                            self.status = False

                elif (response.status_code == 302 or response.status_code == 301):
                    self.status = None
                    self.message = 'This link redirects: code %d (not automatically checked)' % (response.status_code, )
                else:
                    self.message = 'Broken internal link'
                settings.PREPEND_WWW = old_prepend_setting
            else:
                self.message = 'Invalid URL'

            if original_url: # restore the original url before saving
                self.url = original_url

            self.last_checked  = now()
            self.save()

        elif check_external and self.external:
            logger.info('checking external link: %s' % self.url)
            if self.last_checked and (self.last_checked > external_recheck_datetime):
                return self.status

            try:

                # Remove URL fragment identifiers
                url = self.url.rsplit('#')[0]

                if self.url.count('#'):
                    # We have to get the content so we can check the anchors
                    if TIMEOUT_SUPPORT:
                        response = urllib2.urlopen(
                            url,
                            timeout=LINKCHECK_CONNECTION_ATTEMPT_TIMEOUT
                        )
                    else:
                        response = urllib2.urlopen(url)
                else:
                    # Might as well just do a HEAD request
                    req = HeadRequest(url, headers={'User-Agent' : "http://%s Linkchecker" % settings.SITE_DOMAIN})
                    try:
                        if TIMEOUT_SUPPORT:
                            response = urllib2.urlopen(
                                req,
                                timeout=LINKCHECK_CONNECTION_ATTEMPT_TIMEOUT
                            )
                        else:
                            response = urllib2.urlopen(req)
                    except (ValueError, urllib2.HTTPError):
                        _, error, _ = sys.exc_info()
                        # ...except sometimes it triggers a bug in urllib2
                        if hasattr(error, 'code') and error.code == METHOD_NOT_ALLOWED:
                            req = GetRequest(url, headers={'User-Agent' : "http://%s Linkchecker" % settings.SITE_DOMAIN})
                        else:
                            req = url
                        if TIMEOUT_SUPPORT:
                            response = urllib2.urlopen(
                                req,
                                timeout=LINKCHECK_CONNECTION_ATTEMPT_TIMEOUT
                            )
                        else:
                            response = urllib2.urlopen(req)

                self.message = ' '.join([str(response.code), response.msg])
                self.status = True

                if self.url.count('#'):

                    anchor = self.url.split('#')[1]
                    from linkcheck import parse_anchors
                    try:
                        names = parse_anchors(response.read())
                        if anchor in names:
                            self.message = 'Working external hash anchor'
                            self.status = True
                        else:
                            self.message = 'Broken external hash anchor'
                            self.status = False

                    except:
                        # The external web page is mal-formatted #or maybe other parse errors like encoding
                        # I reckon a broken anchor on an otherwise good URL should count as a pass
                        self.message = "Page OK but anchor can't be checked"
                        self.status = True

            except BadStatusLine:
                    self.message = "Bad Status Line"

            except urllib2.HTTPError, e:
                if hasattr(e, 'code') and hasattr(e, 'msg'):
                    self.message = ' '.join([str(e.code), e.msg])
                else:
                    self.message = "Unknown Error"

            except urllib2.URLError, e:
                if hasattr(e, 'reason'):
                    self.message = 'Unreachable: '+str(e.reason)
                elif hasattr(e, 'code') and e.code!=301:
                    self.message = 'Error: '+str(e.code)
                else:
                    self.message = 'Redirect. Check manually: '+str(e.code)
Example #14
0
    def check(self,
              check_internal=True,
              check_external=True,
              external_recheck_interval=EXTERNAL_RECHECK_INTERVAL):

        from linkcheck.utils import LinkCheckHandler
        external_recheck_datetime = now() - timedelta(
            minutes=external_recheck_interval)
        self.status = False

        # Remove current domain from URLs as the test client chokes when trying to test them during a page save
        # They shouldn't generally exist but occasionally slip through
        # If settings.SITE_DOMAINS isn't set then use settings.SITE_DOMAIN
        # but also check for variants: example.org, www.example.org, test.example.org

        original_url = None  # used to restore the original url afterwards

        if SITE_DOMAINS:  #if the setting is present
            internal_exceptions = SITE_DOMAINS

        else:  # try using SITE_DOMAIN
            root_domain = settings.SITE_DOMAIN
            if root_domain.startswith('www.'):
                root_domain = root_domain[4:]
            elif root_domain.startswith('test.'):
                root_domain = root_domain[5:]
            internal_exceptions = [
                'http://' + root_domain, 'http://www.' + root_domain,
                'http://test.' + root_domain
            ]

        for ex in internal_exceptions:
            if ex and self.url.startswith(ex):
                original_url = self.url
                self.url = self.url.replace(ex, '', 1)

        if check_internal and (not self.external):
            if not (self.url):
                self.message = 'Empty link'

            elif self.url.startswith('mailto:'):
                self.status = None
                self.message = 'Email link (not automatically checked)'

            elif self.url.startswith('#'):
                self.status = None
                self.message = 'Link to within the same page (not automatically checked)'

            elif self.url.startswith(MEDIA_PREFIX):
                #TODO Assumes a direct mapping from media url to local filesystem path. This will break quite easily for alternate setups
                if os.path.exists(settings.MEDIA_ROOT +
                                  self.url_unquoted()[len(MEDIA_PREFIX) - 1:]):
                    self.message = 'Working file link'
                    self.status = True
                else:
                    self.message = 'Missing Document'

            elif getattr(self, '_internal_hash', False) and getattr(
                    self, '_instance', None):
                # This is a hash link pointing to itself
                from linkcheck import parse_anchors

                hash = self._internal_hash
                instance = self._instance
                if hash == '#':  # special case, point to #
                    self.message = 'Working internal hash anchor'
                    self.status = True
                else:
                    hash = hash[1:]  #'#something' => 'something'
                    html_content = ''
                    for field in instance._linklist.html_fields:
                        html_content += getattr(instance, field, '')
                    names = parse_anchors(html_content)
                    if hash in names:
                        self.message = 'Working internal hash anchor'
                        self.status = True
                    else:
                        self.message = 'Broken internal hash anchor'

            elif self.url.startswith('/'):
                old_prepend_setting = settings.PREPEND_WWW
                settings.PREPEND_WWW = False
                c = Client()
                c.handler = LinkCheckHandler()
                response = c.get(self.url, follow=True)
                if response.status_code == 200:
                    self.message = 'Working internal link'
                    self.status = True
                    # see if the internal link points an anchor
                    if self.url[-1] == '#':  # special case, point to #
                        self.message = 'Working internal hash anchor'
                    elif self.url.count('#'):
                        anchor = self.url.split('#')[1]
                        from linkcheck import parse_anchors
                        names = parse_anchors(response.content)
                        if anchor in names:
                            self.message = 'Working internal hash anchor'
                            self.status = True
                        else:
                            self.message = 'Broken internal hash anchor'
                            self.status = False

                elif (response.status_code == 302
                      or response.status_code == 301):
                    self.status = None
                    self.message = 'This link redirects: code %d (not automatically checked)' % (
                        response.status_code, )
                else:
                    self.message = 'Broken internal link'
                settings.PREPEND_WWW = old_prepend_setting
            else:
                self.message = 'Invalid URL'

            if original_url:  # restore the original url before saving
                self.url = original_url

            self.last_checked = now()
            self.save()

        elif check_external and self.external:
            logger.info('checking external link: %s' % self.url)
            if self.last_checked and (self.last_checked >
                                      external_recheck_datetime):
                return self.status

            try:

                # Remove URL fragment identifiers
                url = self.url.rsplit('#')[0]

                if self.url.count('#'):
                    # We have to get the content so we can check the anchors
                    if TIMEOUT_SUPPORT:
                        response = urllib2.urlopen(
                            url, timeout=LINKCHECK_CONNECTION_ATTEMPT_TIMEOUT)
                    else:
                        response = urllib2.urlopen(url)
                else:
                    # Might as well just do a HEAD request
                    req = HeadRequest(url,
                                      headers={
                                          'User-Agent':
                                          "http://%s Linkchecker" %
                                          settings.SITE_DOMAIN
                                      })
                    try:
                        if TIMEOUT_SUPPORT:
                            response = urllib2.urlopen(
                                req,
                                timeout=LINKCHECK_CONNECTION_ATTEMPT_TIMEOUT)
                        else:
                            response = urllib2.urlopen(req)
                    except (ValueError, urllib2.HTTPError) as error:
                        # ...except sometimes it triggers a bug in urllib2
                        if hasattr(
                                error,
                                'code') and error.code == METHOD_NOT_ALLOWED:
                            req = GetRequest(url,
                                             headers={
                                                 'User-Agent':
                                                 "http://%s Linkchecker" %
                                                 settings.SITE_DOMAIN
                                             })
                        else:
                            req = url
                        if TIMEOUT_SUPPORT:
                            response = urllib2.urlopen(
                                req,
                                timeout=LINKCHECK_CONNECTION_ATTEMPT_TIMEOUT)
                        else:
                            response = urllib2.urlopen(req)

                self.message = ' '.join([str(response.code), response.msg])
                self.status = True

                if self.url.count('#'):

                    anchor = self.url.split('#')[1]
                    from linkcheck import parse_anchors
                    try:
                        names = parse_anchors(response.read())
                        if anchor in names:
                            self.message = 'Working external hash anchor'
                            self.status = True
                        else:
                            self.message = 'Broken external hash anchor'
                            self.status = False

                    except:
                        # The external web page is mal-formatted #or maybe other parse errors like encoding
                        # I reckon a broken anchor on an otherwise good URL should count as a pass
                        self.message = "Page OK but anchor can't be checked"
                        self.status = True

            except BadStatusLine:
                self.message = "Bad Status Line"

            except urllib2.HTTPError, e:
                if hasattr(e, 'code') and hasattr(e, 'msg'):
                    self.message = ' '.join([str(e.code), e.msg])
                else:
                    self.message = "Unknown Error"

            except urllib2.URLError, e:
                if hasattr(e, 'reason'):
                    self.message = 'Unreachable: ' + str(e.reason)
                elif hasattr(e, 'code') and e.code != 301:
                    self.message = 'Error: ' + str(e.code)
                else:
                    self.message = 'Redirect. Check manually: ' + str(e.code)
Example #15
0
    def _check_internal(self, tested_url):

        from linkcheck.utils import LinkCheckHandler

        if not (tested_url):
            self.message = 'Empty link'

        elif tested_url.startswith('mailto:'):
            self.status = None
            self.message = 'Email link (not automatically checked)'

        elif tested_url.startswith('tel:'):
            self.status = None
            self.message = 'Phone number (not automatically checked)'

        elif tested_url.startswith('#'):
            self.status = None
            self.message = 'Link to within the same page (not automatically checked)'

        elif tested_url.startswith(MEDIA_PREFIX):
            # TODO Assumes a direct mapping from media url to local filesystem path. This will break quite easily for alternate setups
            path = settings.MEDIA_ROOT + urlunquote(
                tested_url)[len(MEDIA_PREFIX) - 1:]
            decoded_path = html_decode(path)
            if os.path.exists(path) or os.path.exists(decoded_path):
                self.message = 'Working file link'
                self.status = True
            else:
                self.message = 'Missing Document'

        elif getattr(self, '_internal_hash', False) and getattr(
                self, '_instance', None):
            # This is a hash link pointing to itself
            from linkcheck import parse_anchors

            hash = self._internal_hash
            instance = self._instance
            if hash == '#':  # special case, point to #
                self.message = 'Working internal hash anchor'
                self.status = True
            else:
                hash = hash[1:]  #'#something' => 'something'
                html_content = ''
                for field in instance._linklist.html_fields:
                    html_content += getattr(instance, field, '')
                try:
                    names = parse_anchors(html_content)
                    if hash in names:
                        self.message = 'Working internal hash anchor'
                        self.status = True
                    else:
                        self.message = 'Broken internal hash anchor'
                except UnicodeDecodeError:
                    self.message = 'Failed to parse HTML for anchor'

        elif tested_url.startswith('/'):
            old_prepend_setting = settings.PREPEND_WWW
            settings.PREPEND_WWW = False
            c = Client()
            c.handler = LinkCheckHandler()
            response = c.get(tested_url)
            if response.status_code == 200:
                self.message = 'Working internal link'
                self.status = True
                # see if the internal link points an anchor
                if tested_url[-1] == '#':  # special case, point to #
                    self.message = 'Working internal hash anchor'
                elif tested_url.count('#'):
                    anchor = tested_url.split('#')[1]
                    from linkcheck import parse_anchors
                    try:
                        names = parse_anchors(response.content)
                        if anchor in names:
                            self.message = 'Working internal hash anchor'
                            self.status = True
                        else:
                            self.message = 'Broken internal hash anchor'
                            self.status = False
                    except UnicodeDecodeError:
                        self.message = 'Failed to parse HTML for anchor'

            elif response.status_code == 302 or response.status_code == 301:
                redir_response = c.get(tested_url, follow=True)
                if redir_response.status_code == 200:
                    redir_state = 'Working redirect'
                    self.status = True
                else:
                    redir_state = 'Broken redirect'
                    self.status = False
                self.message = 'This link redirects: code %d (%s)' % (
                    response.status_code, redir_state)
            else:
                self.message = 'Broken internal link'
            settings.PREPEND_WWW = old_prepend_setting
        else:
            self.message = 'Invalid URL'

        if USE_REVERSION:
            # using test client will clear the RevisionContextManager stack.
            revision_context_manager.start()

        self.last_checked = now()
        self.save()
Example #16
0
    def check(self, check_internal=True, check_external=True, external_recheck_interval=EXTERNAL_RECHECK_INTERVAL):

        from linkcheck.utils import LinkCheckHandler
        external_recheck_datetime = datetime.now() - timedelta(minutes=external_recheck_interval)
        self.status  = False

        # Remove current domain from URLs as the test client chokes when trying to test them during a page save
        # They shouldn't generally exist but occasionally slip through
        # If settings.SITE_DOMAINS isn't set then use settings.SITE_DOMAIN
        # but also check for variants: example.org, www.example.org, test.example.org

        original_url = None # used to restore the original url afterwards

        if SITE_DOMAINS: #if the setting is present
            internal_exceptions = SITE_DOMAINS

        else: # try using SITE_DOMAIN
            root_domain = settings.SITE_DOMAIN
            if root_domain.startswith('www.'):
                root_domain = root_domain[4:]
            elif root_domain.startswith('test.'):
                root_domain = root_domain[5:]
            internal_exceptions = ['http://'+root_domain, 'http://www.'+root_domain, 'http://test.'+root_domain]

        for ex in internal_exceptions:
            if ex and self.url.startswith(ex):
                original_url = self.url
                self.url = self.url.replace(ex, '', 1)

        if check_internal and (not self.external):
            if not(self.url):
                self.message = 'Empty link'

            elif self.url.startswith('mailto:'):
                self.status = None
                self.message = 'Email link (not automatically checked)'

            elif self.url.startswith('#'):
                self.status = None
                self.message = 'Link to within the same page (not automatically checked)'

            elif self.url.startswith(MEDIA_PREFIX):
                #TODO Assumes a direct mapping from media url to local filesystem path. This will break quite easily for alternate setups
                if os.path.exists(settings.MEDIA_ROOT + self.url_unquoted()[len(MEDIA_PREFIX)-1:]):
                    self.message = 'Working file link'
                    self.status = True
                else:
                    self.message = 'Missing Document'

            elif self.url.startswith('/'):
                old_prepend_setting = settings.PREPEND_WWW
                settings.PREPEND_WWW = False
                c = Client()
                c.handler = LinkCheckHandler()
                response = c.get(self.url, follow=True)
                if response.status_code == 200:
                    self.message = 'Working internal link'
                    self.status = True
                    # see if the internal link points an anchor
                    if self.url[-1] == '#': # special case, point to #
                        self.message = 'Working internal hash anchor'
                    elif self.url.count('#'):
                        anchor = self.url.split('#')[1]
                        from linkcheck import parse_anchors
                        names = parse_anchors(response.content)
                        if anchor in names:
                            self.message = 'Working internal hash anchor'
                            self.status = True
                        else:
                            self.message = 'Broken internal hash anchor'
                            self.status = False

                elif (response.status_code == 302 or response.status_code == 301):
                    self.status = None
                    self.message = 'This link redirects: code %d (not automatically checked)' % (response.status_code, )
                else:
                    self.message = 'Broken internal link'
                settings.PREPEND_WWW = old_prepend_setting
            else:
                self.message = 'Invalid URL'

            if original_url: # restore the original url before saving
                self.url = original_url

            self.last_checked  = datetime.now()
            self.save()

        elif check_external and self.external:

            if self.last_checked and (self.last_checked > external_recheck_datetime):
                return self.status

            try:

                # Remove URL fragment identifiers
                url = self.url.rsplit('#')[0]

                if self.url.count('#'):
                    # We have to get the content so we can check the anchors
                    response = urllib2.urlopen(url)
                else:
                    # Might as well just do a HEAD request
                    req = HeadRequest(url, headers={'User-Agent' : "http://%s Linkchecker" % settings.SITE_DOMAIN})
                    try:
                        response = urllib2.urlopen(req)
                    except ValueError:
                        # ...except sometimes it triggers a bug in urllib2
                        response = urllib2.urlopen(url)

                self.message = ' '.join([str(response.code), response.msg])
                self.status = True

                if self.url.count('#'):

                    anchor = self.url.split('#')[1]
                    from linkcheck import parse_anchors
                    try:
                        names = parse_anchors(response.read())
                        if anchor in names:
                            self.message = 'Working external hash anchor'
                            self.status = True
                        else:
                            self.message = 'Broken external hash anchor'
                            self.status = False

                    except HTMLParseError:
                        # The external web page is mal-formatted
                        # I reckon a broken anchor on an otherwise good URL should count as a pass
                        self.message = "Page OK but anchor can't be checked"
                        self.status = True

            except BadStatusLine:
                    self.message = "Bad Status Line"

            except urllib2.HTTPError, e:
                if hasattr(e, 'code') and hasattr(e, 'msg'):
                    self.message = ' '.join([str(e.code), e.msg])
                else:
                    self.message = "Unknown Error"

            except urllib2.URLError, e:
                if hasattr(e, 'reason'):
                    self.message = 'Unreachable: '+str(e.reason)
                elif hasattr(e, 'code') and e.code!=301:
                    self.message = 'Error: '+str(e.code)
                else:
                    self.message = 'Redirect. Check manually: '+str(e.code)