Beispiel #1
0
class uol():
	headers = util.getHeaders()
	results = list()
	finalResults = list()
	counter = 1

	def search(self,query,pageId=1,verbose=False):
		self.verbose = verbose
		self.query = query
		self.headers['Referer']='https://busca.uol.com.br/result.html?term=%s' % query
		url = "https://busca.uol.com.br/search?client=uol&term=%s&page=%d" % (query,pageId)
		r = requests.get(url,headers=self.headers)
		self.searchParser(r.json())
		if self.finalResults:
			return self.finalResults

	def searchParser(self,content):
		if content['results']:
			if self.verbose: print 'UOL - Searching page %d' % self.counter
			for response in content['results']:
				self.results.append(response['url'])
			self.counter = self.counter + 1
			self.search(self.query,self.counter,verbose=self.verbose)
		else:
			self.finalResults = self.results
Beispiel #2
0
class ask():
	headers = util.getHeaders()
	results = list()
	finalResults = list()
	counter = 1

	def search(self,query,pageId=1,verbose=False):
		self.verbose = verbose
		url = "https://www.search.ask.com/web?q=%s&o=&tpr=10&page=%d" % (query,pageId)
		r = requests.get(url,headers=self.headers)
		self.searchParser(r.content)
		if self.finalResults:
			return self.finalResults

	def searchParser(self,content):
		tree = html.fromstring(content)
		urls = tree.xpath('//*[@id="algo-container"]/ol/li/div/a/@href')
		nextPage = tree.xpath('//*[@id="pagination-nav-block"]/div/a[2]/@href')
		if self.verbose: print 'Ask - Searching page %d' % self.counter
		self.counter = self.counter + 1
		for url in urls:
			self.results.append(url)
		if len(nextPage) != 0:
			if len(nextPage) == 2 or self.counter - 1 == 1:
				self.checkNextPage(nextPage)
			else:
				self.finalResults = self.results
		else:
			self.finalResults = self.results

	def checkNextPage(self,nextPage):
		query,pageId = nextPage[-1].split("&")[2].split("=")[1],nextPage[-1].split("&")[3].split("=")[1]
		self.search(query,int(pageId),verbose=self.verbose)
Beispiel #3
0
class yahoo():
	headers = util.getHeaders()
	results = list()
	finalResults = list()
	counter = 1

	def search(self,query=None,url=None,verbose=False):
		self.verbose = verbose
		url = url if url else 'https://br.search.yahoo.com/search?&p=%s' % query
		r = requests.get(url,headers=self.headers)
		self.searchParser(r.content)
		if self.finalResults:
			return self.finalResults

	def searchParser(self,content):
		tree = html.fromstring(content)
		urls = tree.xpath('//*[@class=" td-u"]/@href')
		nextPage = tree.xpath('//*[@class="next"]/@href')
		if self.verbose: print 'Yahoo - Searching page %d' % self.counter
		self.counter = self.counter + 1

		for url in urls:
			self.results.append(unquote(url).split("RU=")[1].split("/RK=")[0])
			
		if nextPage:
			self.checkNextPage(nextPage)
		else:
			self.finalResults = self.results

	def checkNextPage(self,nextPage):
		self.search(url=nextPage[0],verbose=self.verbose)
Beispiel #4
0
class bing():

    headers = util.getHeaders()
    results = list()
    finalResults = list()
    counter = 1

    def search(self, query, verbose=False):
        self.verbose = verbose
        url = 'https://www.bing.com/search?q=%s' % query
        r = requests.get(url, headers=self.headers)
        self.searchParser(r.content)
        if self.finalResults:
            return self.finalResults

    def searchParser(self, content):
        tree = html.fromstring(content)
        urls = tree.xpath('//*[@id="b_results"]/li/h2/a/@href')
        nextPage = tree.xpath(
            '//*[@id="b_results"]/li[11]/nav/ul/li[6]/a/@href')
        if self.verbose: print 'Bing - Searching page %d' % self.counter
        self.counter = self.counter + 1

        for url in urls:
            self.results.append(url)

        if nextPage:
            self.checkNextPage(nextPage)
        else:
            self.finalResults = self.results

    def checkNextPage(self, nextPage):
        query = nextPage[0].split("=", 1)[1]
        self.search(query, verbose=self.verbose)
Beispiel #5
0
class duckDuckGo():
    headers = util.getHeaders()
    results = list()
    finalResults = list()
    counter = 1

    def search(self, query, data=None, verbose=False):
        self.verbose = verbose
        self.query = query
        request = requests.post if data else requests.get
        url = "https://duckduckgo.com/html/?q=%s&s=0" % (query)
        r = request(url, headers=self.headers, data=data)
        self.searchParser(r.content)
        if self.finalResults:
            return self.finalResults

    def searchParser(self, content):
        tree = html.fromstring(content)
        urls = tree.xpath('//*[@id="links"]/div/div/h2/a/@href')
        nextPage = tree.xpath('//div[@class="nav-link"]/form')
        if self.verbose:
            print 'Duck Duck Go - Searching page %d' % self.counter
        self.counter = self.counter + 1
        for url in urls:
            self.results.append(url)

        self.checkNextPage(nextPage)

    def checkNextPage(self, nextPage):
        if nextPage:
            dataNetxPage = dict()
            for form in nextPage:
                for field in form.getchildren():
                    dataNetxPage[field.get('name')] = field.get('value')
            self.search(self.query, data=dataNetxPage, verbose=self.verbose)
        else:
            self.finalResults = self.results
Beispiel #6
0
class className():
    headers = util.getHeaders()
    results = list()
    finalResults = list()
    counter = 1

    def search(self, query, verbose=False):
        self.verbose = verbose
        url = "http://url.goes.here/search?p=%s" % query
        r = requests.get(url, headers=self.headers)
        self.searchParser(r.content)
        if self.finalResults:
            return self.finalResults

    def searchParser(self, content):
        tree = html.fromstring(content)
        urls = tree.xpath(
            '')  # You can get it using the google chrome developer tool
        nextPage = tree.xpath(
            '')  # You can get it using the google chrome developer tool
        if self.verbose:
            print 'Search Engine Name - Searching page %d' % self.counter
        self.counter = self.counter + 1

        for url in urls:
            self.results.append(url)

        if nextPage:  #or any logic you need
            self.checkNextPage(
                nextPage)  #Pass the new link or data to call search again
        else:
            self.finalResults = self.results  # Return the urls

    def checkNextPage(self, nextPage):
        #You code goes here
        self.search(query, verbose=self.verbose)