Exemple #1
0
    def search(self, query, start=0, count=10, filter=True):
        """
		search(query, start = 0, count = 10, filter = True) -> results

		Search the web with Google.
		"""

        if filter: _filter = 1
        else: _filter = 0

        url = 'http://www.google.com/xhtml?'
        _query = http.encode_url({
            'q': query,
            'start': start,
            'num': count,
            'filter': _filter
        })

        content = self.open_url(url + _query).read()

        results = []

        for item in re.findall(
                '<a href="(.*?)" accesskey="\d+">(.*?)</a>(.*?)<span class="url">',
                content):
            title = item[1]
            excerpt = item[2]
            url = item[0][item[0].index(';u=') + 3:]

            if not url.startswith('https://') and not url.startswith('ftp://'):
                url = 'http://' + url

            results.append((url, title, excerpt))

        return results
	def search(self, query, start = 0, count = 10, filter = True):
		"""
		search(query, start = 0, count = 10, filter = True) -> results

		Search the web with Google.
		"""

		if filter: _filter = 1
		else: _filter = 0

		url = 'http://www.google.com/xhtml?'
		_query = http.encode_url({'q':query, 'start':start, 'num':count, 'filter': _filter})

		content = self.open_url(url + _query).read()

		results = []

		for item in re.findall(
			'<a href="(.*?)" accesskey="\d+">(.*?)</a>(.*?)<span class="url">',
			content):
			title = item[1]
			excerpt = item[2]
			url = item[0][item[0].index(';u=') + 3:]

			if not url.startswith('https://') and not url.startswith('ftp://'):
				url = 'http://' + url

			results.append((url, title, excerpt))

		return results
	def translate_url(self, url, from_lang = 'en', to_lang = 'de'):
		"""
		translate_url(url, from_lang = 'en', to_lang = 'de') -> string

		Traslate url from_lang to_lang.
		"""

		_url = 'http://66.249.93.104/translate_c?'
		query = http.encode_url({'langpair':from_lang + '|' + to_lang, 'u':url})

		return self.open_url(_url + query).read()
	def translate_text(self, text, from_lang = 'en', to_lang = 'de'):
		"""
		translate_text(text, from_lang = 'en' , to_lang = 'de') -> string

		Translate text from_lang to_lang.
		"""

		url = 'http://translate.google.com/translate_t?'
		query = http.encode_url({'langpair':from_lang + '|' + to_lang, 'text':text})

		content = self.open_url(url + query).read()
		textareas = re.findall('<textarea .*?>(.*?)</textarea>', content)

		return textareas[0].strip()
Exemple #5
0
    def search(self, *terms):
        """
		search(term1, term2, term3, ...) -> []

		Extract set from terms.
		"""

        url = 'http://labs.google.com/sets?'
        _query = http.encode_url([('q' + str(index + 1), value)
                                  for index, value in enumerate(terms)])

        content = self.open_url(url + _query).read()

        return re.findall('<center>(.*?)\s*</center></a>', content)
Exemple #6
0
    def translate_url(self, url, from_lang='en', to_lang='de'):
        """
		translate_url(url, from_lang = 'en', to_lang = 'de') -> string

		Traslate url from_lang to_lang.
		"""

        _url = 'http://66.249.93.104/translate_c?'
        query = http.encode_url({
            'langpair': from_lang + '|' + to_lang,
            'u': url
        })

        return self.open_url(_url + query).read()
	def search(self, query):
		"""
		search(query) -> []

		Suggest queries based on query.
		"""

		url = 'http://www.google.com/complete/search?'
		_query = http.encode_url({'qu': query})

		content = self.open_url(url + _query).read()
		terms = re.findall('"(.*?)",?', content)

		print terms
		return terms[3: -((len(terms) - 3)/2 + 1)]
	def search(self, *terms):
		"""
		search(term1, term2, term3, ...) -> []

		Extract set from terms.
		"""

		url = 'http://labs.google.com/sets?'
		_query = http.encode_url(
			[('q' + str(index + 1), value)
			 for index, value in enumerate(terms)])

		content = self.open_url(url + _query).read()

		return re.findall('<center>(.*?)\s*</center></a>', content)
Exemple #9
0
    def search(self, query):
        """
		search(query) -> []

		Suggest queries based on query.
		"""

        url = 'http://www.google.com/complete/search?'
        _query = http.encode_url({'qu': query})

        content = self.open_url(url + _query).read()
        terms = re.findall('"(.*?)",?', content)

        print terms
        return terms[3:-((len(terms) - 3) / 2 + 1)]
	def query_dns(self, query, last = '', page = 0):
		"""
		query_dns(query, last = '', page = 0) -> results

		Query the dns database of netcraft.
		"""

		type = 'site contains'
		tokens = query.split(':')

		if len(tokens) != 1:
			if tokens[0] == 'contains': type = 'site contains'
			elif tokens[0] == 'starts': type = 'site starts with'
			elif tokens[0] == 'ends': type = 'site ends with'
			elif tokens[0] == 'subdomain': type = 'subdomain matches'

			host = tokens[1]

		else:
			host = tokens[0]

		url = 'http://searchdns.netcraft.com/?'
		_query = http.encode_url(
			{'host':host,
			 'last':last,
			 'from':(page * 20) + 1,
			 'restriction':type})

		content = self.open_url(url + _query).read()

		results = []

		for item in re.findall(
			'<td align="left">\n' +
			'<a href="(.*?)">.*?</a></td>\n' +
			'<td align="center">.*?</td>\n' +
			'<td>(.*?)</td>\n' +
			'<td><a href=".*?q=(.*?)">(.*?)</a></td>\n' + 
			'<td><a href=".*?">(.*?)</a></td>',
			content): results.append((
				re.match('.*?://(.*?)/', item[0]).group(1),
				item[1],
				item[3],
				item[2],
				item[4]))

		return results
Exemple #11
0
    def translate_text(self, text, from_lang='en', to_lang='de'):
        """
		translate_text(text, from_lang = 'en' , to_lang = 'de') -> string

		Translate text from_lang to_lang.
		"""

        url = 'http://translate.google.com/translate_t?'
        query = http.encode_url({
            'langpair': from_lang + '|' + to_lang,
            'text': text
        })

        content = self.open_url(url + query).read()
        textareas = re.findall('<textarea .*?>(.*?)</textarea>', content)

        return textareas[0].strip()
Exemple #12
0
    def query_dns(self, query, last='', page=0):
        """
		query_dns(query, last = '', page = 0) -> results

		Query the dns database of netcraft.
		"""

        type = 'site contains'
        tokens = query.split(':')

        if len(tokens) != 1:
            if tokens[0] == 'contains': type = 'site contains'
            elif tokens[0] == 'starts': type = 'site starts with'
            elif tokens[0] == 'ends': type = 'site ends with'
            elif tokens[0] == 'subdomain': type = 'subdomain matches'

            host = tokens[1]

        else:
            host = tokens[0]

        url = 'http://searchdns.netcraft.com/?'
        _query = http.encode_url({
            'host': host,
            'last': last,
            'from': (page * 20) + 1,
            'restriction': type
        })

        content = self.open_url(url + _query).read()

        results = []

        for item in re.findall(
                '<td align="left">\n' + '<a href="(.*?)">.*?</a></td>\n' +
                '<td align="center">.*?</td>\n' + '<td>(.*?)</td>\n' +
                '<td><a href=".*?q=(.*?)">(.*?)</a></td>\n' +
                '<td><a href=".*?">(.*?)</a></td>', content):
            results.append((re.match('.*?://(.*?)/', item[0]).group(1),
                            item[1], item[3], item[2], item[4]))

        return results