Esempio n. 1
0
    def _fetch_blocked(self, code):
        resp = self._http_fetch(code, "GET")
        data = resp.read()

        if resp.status != 200:
            raise exceptions.ServiceException(
                "HTTP status changed from 200 to %i on second request" %
                resp.status)
        if not data:
            raise exceptions.CodeBlockedException(
                "Empty response on status 200")

        if self.RATE_LIMIT_STRING in data:
            raise exceptions.BlockedException()

        position = data.find(self.BLOCKED_STRING_START)
        if position == -1:
            raise exceptions.ServiceException(
                "Unexpected response on status 200")
        data = data[position + len(self.BLOCKED_STRING_START):]

        position = data.find(self.BLOCKED_STRING_END)
        if position == -1:
            raise exceptions.ServiceException(
                "Unexpected response on status 200")

        url = data[:position].decode("utf-8")
        return HTMLParser.HTMLParser().unescape(url).encode("utf-8")
Esempio n. 2
0
    def fetch(self, code):
        resp = self._http_fetch(code)

        if resp.status == 301:
            location = resp.getheader("Location")
            if not location:
                raise exceptions.ServiceException(
                    "No Location header after HTTP status 301")
            if resp.reason == "Moved":  # Normal bit.ly redirect
                return location
            elif resp.reason == "Moved Permanently":
                # Weird "bundles" redirect, forces connection close despite
                # sending Keep-Alive header
                self._conn.close()
                raise exceptions.CodeBlockedException()
            else:
                raise exceptions.ServiceException(
                    "Unknown HTTP reason %s after HTTP status 301" %
                    resp.reason)
        elif resp.status == 302:
            location = resp.getheader("Location")
            if not location:
                raise exceptions.ServiceException(
                    "No Location header after HTTP status 302")
            return self._parse_warning_url(code, location)
        elif resp.status == 403:
            raise exceptions.BlockedException()
        elif resp.status == 404:
            raise exceptions.NoRedirectException()
        elif resp.status == 410:
            raise exceptions.CodeBlockedException()
        else:
            raise exceptions.ServiceException("Unknown HTTP status %i" %
                                              resp.status)
Esempio n. 3
0
    def unexpected_http_status(self, code, resp):
        if resp.status == 302:
            location = resp.getheader("Location")

            if location and ("sharedby" in location or "visibli" in location):
                raise exceptions.NoRedirectException()
            elif location and location.startswith("http://yahoo.com"):
                raise exceptions.BlockedException("Banned (location=%s)" %
                                                  location)

            # Guess it be an override for site that busts out iframes
            return location

        if resp.status != 200:
            return super(BaseVisbliService,
                         self).unexpected_http_status(code, resp)

        resp, data = self._http_get(code)
        if resp.status != 200:
            raise exceptions.ServiceException(
                "HTTP status changed from 200 to %i on second request" %
                resp.status)

        match = re.search(r'<iframe id="[^"]+" src="([^"]+)">', data)
        if not match:
            if 'Undefined index:  HTTP_USER_AGENT' in data:
                raise exceptions.ServiceException(
                    "Website broken about user-agent")

            raise exceptions.ServiceException("No iframe url found")

        url = match.group(1).decode("utf-8")
        url = HTMLParser.HTMLParser().unescape(url).encode("utf-8")
        return url
Esempio n. 4
0
    def fetch(self, code):
        resp = self._http_head(code)

        if resp.status in self.http_status_redirect:
            location = resp.getheader("Location")
            if not location:
                raise exceptions.ServiceException(
                    "No Location header after HTTP status 301")
            return location
        elif resp.status in self.http_status_no_redirect:
            raise exceptions.NoRedirectException()
        elif resp.status in self.http_status_code_blocked:
            raise exceptions.CodeBlockedException()
        elif resp.status in self.http_status_blocked:
            raise exceptions.BlockedException()
        else:
            return self.unexpected_http_status(code, resp)
Esempio n. 5
0
    def unexpected_http_status(self, code, resp):
        if resp.status != 200:
            return super(Isgd, self).unexpected_http_status(code, resp)

        resp, data = self._http_get(code)
        if resp.status != 200:
            raise exceptions.ServiceException(
                "HTTP status changed from 200 to %i on second request" %
                resp.status)

        if not data:
            raise exceptions.CodeBlockedException(
                "Empty response on status 200")
        if "<div id=\"main\"><p>Rate limit exceeded - please wait 1 minute before accessing more shortened URLs</p></div>" in data:
            raise exceptions.BlockedException()
        if "<div id=\"disabled\"><h2>Link Disabled</h2>" in data:
            return self._parse_blocked(code, data)
        if "<p>The full original link is shown below. <b>Click the link</b> if you'd like to proceed to the destination shown:" in data:
            return self._parse_preview(code, data)
Esempio n. 6
0
        try:
            self._conn.request(
                "GET", "/urlshortener/v1/url?shortUrl=http://goo.gl/%s" % code)
            resp = self._conn.getresponse()
            data = resp.read()
        except httplib.HTTPException, e:
            self._conn.close()
            raise exceptions.ServiceException("HTTP exception: %s" % e)
        except socket.error, e:
            self._conn.close()
            raise exceptions.ServiceException("Socket error: %s" % e)

        if resp.status == 200:
            return self._parse_json(data)
        elif resp.status == 403:
            raise exceptions.BlockedException()
        elif resp.status == 404:
            raise exceptions.NoRedirectException()
        else:
            raise exceptions.ServiceException("Unexpected HTTP status %i" %
                                              resp.status)

    def _parse_json(self, data):
        try:
            data = json.loads(data)
        except ValueError:
            raise exceptions.ServiceException("Could not decode response")

        if not "kind" in data or data["kind"] != "urlshortener#url":
            raise exceptions.ServiceException("No/bad type given")
        if not "status" in data: