def googleSearch(self): #Query can be site specific-> site:link word google = GoogleSearch(self.query) ct = GoogleSearch(self.query).count() print "No. Of Google Results: %d" % (ct) results = google.top_urls() return results
def Search(paper): gs = GoogleSearch(paper + " ieeexplore.ieee.org") url = gs.top_urls()[0] artNumber = [word for word in url.split("=")][-1] return artNumber
from openpyxl import Workbook, load_workbook from googlesearch import GoogleSearch wb = load_workbook('Attendees.xlsx') for sheet in wb: ws = wb[sheet.title] for i in range(1, 290): nameCell = ws.cell(row=i, column=1) jobCell = ws.cell(row=i, column=3) search = str(nameCell.value) + " " + str(jobCell.value) + " email" gs = GoogleSearch(search) print search print gs.top_urls()[0] print("\n")
def url_search(query, lucky=True): gs = GoogleSearch(query) try: return [gs.top_url()] if lucky else gs.top_urls() except ProxyError: raise ValueError
def get_url_by_name(name,elem): gs = GoogleSearch('site:goodreads.com/{0}/show {1}'.format(elem,name)) for url in gs.top_urls(): if 'goodreads.com/{0}/show'.format(elem) in url: return str(url) return int(0)
from googlesearch import GoogleSearch from readability.readability import Document from bs4 import BeautifulSoup import re import sys import requests import urllib def remove_non_ascii(text): return ''.join(i for i in text if ord(i)<128) if __name__ == "__main__": if len(sys.argv) < 2: print 'no urls found' sys.exit(0) query = '' for i in range (1,len(sys.argv)): query += sys.argv[i] + ' ' gs = GoogleSearch(query) if len(gs.top_urls()) < 1: print 'no urls found' sys.exit(0) urls = gs.top_urls() # for url in urls: # print url html = urllib.urlopen(urls[0]).read() soup = BeautifulSoup(Document(html).summary(), "lxml") print remove_non_ascii(soup.get_text()[0:1000])