def searchWeb(text,output,c): headers = { "x-rapidapi-key": "cef4531821mshdd00775dfb4fa11p120050jsn333223957d0a", "x-rapidapi-host" :"google-search3.p.rapidapi.com" } query = { "q": text, "num": 5, "lr":"lang_en" } base_url = f'https://rapidapi.p.rapidapi.com/api/v1/search/' url = base_url resp = requests.get("https://rapidapi.p.rapidapi.com/api/v1/search/" + urllib.parse.urlencode(query), headers=headers) results = resp.json() from collections import defaultdict try: new_dict = defaultdict(int) if ( len(results['results'])): for ele in results['results']: url = ele['link'] title = ele['title'] snippet = ele['description'] new_dict[url]+=1 if url in output: output[url] = output[url] + 1 c[url] = (c[url]*(output[url] - 1) + cosineSim(text,strip_tags(snippet)))/(output[url]) else: output[url] = 1 c[url] = cosineSim(text,strip_tags(snippet)) except: return return
def searchWeb(text,output,c): try: text = text.encode('utf-8') except: text = text query = urllib.quote_plus(text) if len(query)>60: return output,c #using googleapis for searching web base_url = 'http://ajax.googleapis.com/ajax/services/search/web?v=1.0&q=' url = base_url + '%22' + query + '%22' request = urllib2.Request(url,None,{'Referer':'Google Chrome'}) response = urllib2.urlopen(request) results = simplejson.load(response) try: if ( len(results) and 'responseData' in results and 'results' in results['responseData'] and results['responseData']['results'] != []): for ele in results['responseData']['results']: Match = results['responseData']['results'][0] content = Match['content'] if Match['url'] in output: #print text #print strip_tags(content) output[Match['url']] = output[Match['url']] + 1 c[Match['url']] = (c[Match['url']]*(output[Match['url']] - 1) + cosineSim(text,strip_tags(content)))/(output[Match['url']]) else: output[Match['url']] = 1 c[Match['url']] = cosineSim(text,strip_tags(content)) except: return output,c return output,c
def searchWeb(text, output, c): try: text = text.encode('utf-8') except: text = text query = urllib.quote_plus(text) if len(query) > 60: return output, c #using googleapis for searching web base_url = 'http://ajax.googleapis.com/ajax/services/search/web?v=1.0&q=' url = base_url + '%22' + query + '%22' request = urllib2.Request(url, None, {'Referer': 'Google Chrome'}) response = urllib2.urlopen(request) results = simplejson.load(response) try: if (len(results) and 'responseData' in results and 'results' in results['responseData'] and results['responseData']['results'] != []): for ele in results['responseData']['results']: Match = results['responseData']['results'][0] content = Match['content'] if Match['url'] in output: #print text #print strip_tags(content) output[Match['url']] = output[Match['url']] + 1 c[Match['url']] = (c[Match['url']] * (output[Match['url']] - 1) + cosineSim(text, strip_tags(content)) ) / (output[Match['url']]) else: output[Match['url']] = 1 c[Match['url']] = cosineSim(text, strip_tags(content)) except: return output, c return output, c
def searchWeb(text, output, c): try: text = text.encode('utf-8') except: text = text #query = urllib.quote_plus(text) #query = text #if len(query)>60: # return output,c #using googleapis for searching web try: subscription_key = "9ea45659ed914562b85ed5b1a1feb1f1" #FIXME (Update this to my Azure subscription key?) client = WebSearchAPI(CognitiveServicesCredentials(subscription_key)) response = client.web.search(query=text) if hasattr(response.web_pages, 'value'): for page in response.web_pages.value: print "page name: {} ".format(page.name) print "page URL: {} ".format(page.url) content = page.snippet if page.url in output: output[page.url] = output[page.url] + 1 c[page.url] = (c[page.url] * (output[page.url] - 1) + cosineSim(text, strip_tags(content))) / ( output[page.url]) else: output[page.url] = 1 c[page.url] = cosineSim(text, strip_tags(content)) else: print("no match found...") #base_url = 'http://ajax.googleapis.com/ajax/services/search/web?v=1.0&q=' #url = base_url + '%22' + query + '%22' #request = urllib2.Request(url,None,{'Referer':'Google Chrome'}) #response = urllib2.urlopen(request) #results = simplejson.load(response) # if (len(results) and 'responseData' in results and 'results' in results['responseData'] and results['responseData']['results'] != []): # for ele in results['responseData']['results']: # Match = results['responseData']['results'][0] # content = Match['content'] # if Match['url'] in output: # #print text # #print strip_tags(content) # output[Match['url']] = output[Match['url']] + 1 # c[Match['url']] = (c[Match['url']]*(output[Match['url']] - 1) + cosineSim(text,strip_tags(content)))/(output[Match['url']]) # else: # output[Match['url']] = 1 # c[Match['url']] = cosineSim(text,strip_tags(content)) except: return output, c return output, c
def searchWeb(text, output, c): try: text = text.encode('utf-8') except: text = text query = urllib.quote_plus(text) if len(query) > 60: return output, c # using googleapis for searching web payload = { 'q': query, 'cx': settings.GOOGLECUSTOMSEARCH_CXKEY, 'key': settings.GOOGLECUSTOMSEARCH_APIKEY } base_url = 'https://www.googleapis.com/customsearch/v1' custom_headers = {'Referer': 'Google Chrome'} response = requests.get(base_url, params=payload, headers=custom_headers) results = response.json() # print results try: if (len(results) and 'searchInformation' in results and 'totalResults' in results['searchInformation'] and results['searchInformation']['totalResults'] > 0): for ele in results['items']: Match = ele content = Match['title'] if Match['link'] in output: # print text # print strip_tags(content) output[Match['link']] = output[Match['link']] + 1 c[Match['link']] = (c[Match['link']] * (output[Match['link']] - 1) + cosineSim(text, strip_tags(content)) ) / (output[Match['link']]) else: output[Match['link']] = 1 c[Match['link']] = cosineSim(text, strip_tags(content)) except: return output, c return output, c
def searchWeb(text,output,c,encode=False): if encode == True: text = text.encode('utf-8') query = urllib.quote_plus(text) #using googleapis for searching web base_url = 'http://ajax.googleapis.com/ajax/services/search/web?v=1.0&q=' url = base_url + '%22' + query + '%22' request = urllib2.Request(url,None,{'Referer':'http://www.rvce.edu.in'}) response = urllib2.urlopen(request) results = simplejson.load(response) if results['responseData']['results'] and results['responseData']['results'] != []: for ele in results['responseData']['results']: Match = results['responseData']['results'][0] content = Match['content'] if Match['url'] in output: #print text #print strip_tags(content) output[Match['url']] = output[Match['url']] + 1 c[Match['url']] = (c[Match['url']]*(output[Match['url']] - 1) + cosineSim(text,strip_tags(content)))/(output[Match['url']]) else: output[Match['url']] = 1 c[Match['url']] = cosineSim(text,strip_tags(content)) return output,c