Ejemplo n.º 1
0
    def __call__(self, url):
        url = normalize_url(url)
        if not url:
            return

        if not url.startswith(self.public_url):
            return

        if '++plone++production' in url:
            front, end = url.rsplit('/', 1)
            # check blacklist
            for black_listed in self._blacklisted_content:
                if url.startswith(front + black_listed):
                    return

        # since we're looking at plone here... let's try fixing up urls...
        if '++plone++' in url:
            # can always be from site root
            url = self.public_url + '/++plone++' + url.rsplit('++plone++',
                                                              1)[-1]  # noqa

        parsed = urlparse(url)
        vhm_path = self.vhm_base + parsed.path
        resp = subrequest(vhm_path)
        if resp.getStatus() == 404:
            return

        return {
            'data': resp.getBody(),
            'headers': resp.headers,
            'code': resp.getStatus()
        }
Ejemplo n.º 2
0
    def __call__(self, url):
        url = normalize_url(url)
        if not url:
            return

        resp = requests.get(url)
        if resp.status_code in (404, 403, 401, 500, 501, 502):
            return
        if len(resp.history) > 0:
            # if it was a redirect and came_from in the url
            if (resp.history[-1].status_code in (301, 302)
                    and 'came_from' in resp.url):
                return

        return {
            'data': resp.content,
            'headers': resp.headers,
            'code': resp.status_code
        }