Ejemplo n.º 1
0
    def search(self, query):
        """Do a Yahoo! BOSS web search for *query*.

        Returns a list of URLs, no more than fifty, ranked by relevance (as
        determined by Yahoo). Raises
        :py:exc:`~earwigbot.exceptions.SearchQueryError` on errors.
        """
        base_url = "http://yboss.yahooapis.com/ysearch/web"
        query = quote_plus(query.join('"', '"'))
        params = {"q": query, "type": "html,text", "format": "json"}
        url = "{0}?{1}".format(base_url, urlencode(params))

        consumer = oauth.Consumer(key=self.cred["key"],
                                  secret=self.cred["secret"])
        client = oauth.Client(consumer)
        headers, body = client.request(url, "GET")

        if headers["status"] != "200":
            e = "Yahoo! BOSS Error: got response code '{0}':\n{1}'"
            raise SearchQueryError(e.format(headers["status"], body))

        try:
            res = loads(body)
        except ValueError:
            e = "Yahoo! BOSS Error: JSON could not be decoded"
            raise SearchQueryError(e)

        try:
            results = res["bossresponse"]["web"]["results"]
        except KeyError:
            return []
        return [result["url"] for result in results]
Ejemplo n.º 2
0
    def search(self, query):
        """Do a Yandex web search for *query*.

        Returns a list of URLs ranked by relevance (as determined by Yandex).
        Raises :py:exc:`~earwigbot.exceptions.SearchQueryError` on errors.
        """
        domain = self.cred.get("proxy", "yandex.com")
        url = "https://{0}/search/xml?".format(domain)
        query = re_sub(r"[^a-zA-Z0-9 ]", "", query).encode("utf8")
        params = {
            "user": self.cred["user"],
            "key": self.cred["key"],
            "query": '"' + query + '"',
            "l10n": "en",
            "filter": "none",
            "maxpassages": "1",
            "groupby": "mode=flat.groups-on-page={0}".format(self.count)
        }

        result = self._open(url + urlencode(params))

        try:
            data = lxml.etree.fromstring(result)
            return [elem.text for elem in data.xpath(".//url")]
        except lxml.etree.Error as exc:
            raise SearchQueryError("Yandex XML parse error: " + str(exc))
Ejemplo n.º 3
0
    def search(self, query):
        """Do a Google web search for *query*.

        Returns a list of URLs ranked by relevance (as determined by Google).
        Raises :py:exc:`~earwigbot.exceptions.SearchQueryError` on errors.
        """
        domain = self.cred.get("proxy", "www.googleapis.com")
        url = "https://{0}/customsearch/v1?".format(domain)
        params = {
            "cx": self.cred["id"],
            "key": self.cred["key"],
            "q": '"' + query.replace('"', "").encode("utf8") + '"',
            "alt": "json",
            "num": str(self.count),
            "safe": "off",
            "fields": "items(link)"
        }

        result = self._open(url + urlencode(params))

        try:
            res = loads(result)
        except ValueError:
            err = "Google Error: JSON could not be decoded"
            raise SearchQueryError(err)

        try:
            return [item["link"] for item in res["items"]]
        except KeyError:
            return []
Ejemplo n.º 4
0
    def search(self, query):
        """Do a Bing web search for *query*.

        Returns a list of URLs ranked by relevance (as determined by Bing).
        Raises :py:exc:`~earwigbot.exceptions.SearchQueryError` on errors.
        """
        service = "SearchWeb" if self.cred["type"] == "searchweb" else "Search"
        url = "https://api.datamarket.azure.com/Bing/{0}/Web?".format(service)
        params = {
            "$format": "json",
            "$top": str(self.count),
            "Query": "'\"" + query.replace('"', "").encode("utf8") + "\"'",
            "Market": "'en-US'",
            "Adult": "'Off'",
            "Options": "'DisableLocationDetection'",
            "WebSearchOptions": "'DisableHostCollapsing+DisableQueryAlterations'"
        }

        result = self._open(url + urlencode(params))

        try:
            res = loads(result)
        except ValueError:
            err = "Bing Error: JSON could not be decoded"
            raise SearchQueryError(err)

        try:
            results = res["d"]["results"]
        except KeyError:
            return []
        return [result["Url"] for result in results]
Ejemplo n.º 5
0
    def search(self, query):
        """Do a Yahoo! BOSS web search for *query*.

        Returns a list of URLs, no more than five, ranked by relevance
        (as determined by Yahoo).
        Raises :py:exc:`~earwigbot.exceptions.SearchQueryError` on errors.
        """
        key, secret = self.cred["key"], self.cred["secret"]
        consumer = oauth.Consumer(key=key, secret=secret)

        url = "http://yboss.yahooapis.com/ysearch/web"
        params = {
            "oauth_version": oauth.OAUTH_VERSION,
            "oauth_nonce": oauth.generate_nonce(),
            "oauth_timestamp": oauth.Request.make_timestamp(),
            "oauth_consumer_key": consumer.key,
            "q": '"' + query.encode("utf8") + '"', "count": "5",
            "type": "html,text,pdf", "format": "json",
        }

        req = oauth.Request(method="GET", url=url, parameters=params)
        req.sign_request(oauth.SignatureMethod_HMAC_SHA1(), consumer, None)
        try:
            response = self.opener.open(self._build_url(url, req))
            result = response.read()
        except (URLError, error) as exc:
            raise SearchQueryError("Yahoo! BOSS Error: " + str(exc))

        if response.headers.get("Content-Encoding") == "gzip":
            stream = StringIO(result)
            gzipper = GzipFile(fileobj=stream)
            result = gzipper.read()

        if response.getcode() != 200:
            e = "Yahoo! BOSS Error: got response code '{0}':\n{1}'"
            raise SearchQueryError(e.format(response.getcode(), result))
        try:
            res = loads(result)
        except ValueError:
            e = "Yahoo! BOSS Error: JSON could not be decoded"
            raise SearchQueryError(e)

        try:
            results = res["bossresponse"]["web"]["results"]
        except KeyError:
            return []
        return [result["url"] for result in results]
Ejemplo n.º 6
0
    def _open(self, *args):
        """Open a URL (like urlopen) and try to return its contents."""
        try:
            response = self.opener.open(*args)
            result = response.read()
        except (URLError, error) as exc:
            raise SearchQueryError("{0} Error: {1}".format(self.name, exc))

        if response.headers.get("Content-Encoding") == "gzip":
            stream = StringIO(result)
            gzipper = GzipFile(fileobj=stream)
            result = gzipper.read()

        code = response.getcode()
        if code != 200:
            err = "{0} Error: got response code '{1}':\n{2}'"
            raise SearchQueryError(err.format(self.name, code, result))

        return result
Ejemplo n.º 7
0
    def search(self, query):
        """Do a Yahoo! BOSS web search for *query*.

        Returns a list of URLs ranked by relevance (as determined by Yahoo).
        Raises :py:exc:`~earwigbot.exceptions.SearchQueryError` on errors.
        """
        key, secret = self.cred["key"], self.cred["secret"]
        consumer = oauth.Consumer(key=key, secret=secret)

        url = "http://yboss.yahooapis.com/ysearch/web"
        params = {
            "oauth_version": oauth.OAUTH_VERSION,
            "oauth_nonce": oauth.generate_nonce(),
            "oauth_timestamp": oauth.Request.make_timestamp(),
            "oauth_consumer_key": consumer.key,
            "q": '"' + query.encode("utf8") + '"',
            "count": str(self.count),
            "type": "html,text,pdf",
            "format": "json",
        }

        req = oauth.Request(method="GET", url=url, parameters=params)
        req.sign_request(oauth.SignatureMethod_HMAC_SHA1(), consumer, None)

        result = self._open(self._build_url(url, req))

        try:
            res = loads(result)
        except ValueError:
            err = "Yahoo! BOSS Error: JSON could not be decoded"
            raise SearchQueryError(err)

        try:
            results = res["bossresponse"]["web"]["results"]
        except KeyError:
            return []
        return [result["url"] for result in results]