def googleTextSearch(query): query = query.replace('+', ' ') try: try: search_results = google.search(query) except: search_results = google.search(query+' j') if len(search_results) > 0: return search_results else: return False except: return False
def main(): ''' 메인 함수 csv 파일을 입력으로 받아 Google Search를 진행한다. csv 파일을 출력으로 낸다. ''' # 인자 체크 if(len(sys.argv) != 3): print '[ERROR] We need two arguments' sys.exit(0) print 'We get two arguments', sys.argv[1], sys.argv[2] # csv파일 열기 ifile = open(sys.argv[1], 'r') creader = csv.reader(ifile, delimiter = ',', quotechar = '"') ofile = open(sys.argv[2], 'w') cwriter = csv.writer(ofile, delimiter = ',', quotechar = '"', quoting = csv.QUOTE_ALL) # csv파일 루프 for row in creader: gresult = google.search(row[0], 1) cwriter.writerow([row[0], len(gresult)]) time.sleep(random.randint(1, 3)) # 파일 닫기 ifile.close() ofile.close()
def run(self, query, count=10): num_page = 1 results = google.search(query, num_page)[:count] result = [google_result_to_dict(obj=obj) for obj in results] result = json.dumps(result) return result
def test_standard_search(self, html_f): """Test method to search in google.""" # replace method to get html from a test html file google.standard_search.get_html = \ Mock(return_value=html_f.read().decode('utf8')) search = google.search("github") self.assertNotEqual(len(search), 0)
def google_search(self, keyword, n_results=1): n_pages = 1 lang = 'en' search_results = google.search(keyword, n_pages, lang) properties = [] for result in search_results[:n_results]: p = {'title':str.str_encode(result.name), 'link':result.link, 'desc':str.str_encode(result.description), 'img':result.thumb} properties.append(p) return properties
def search(self, query, **kwargs): query = self._query(query, **kwargs) scraper = {'query': query, 'result': []}; found = 0 gs = google.search(query, lang='de') if gs: for entry in gs: found += 1 result = dict(name=entry.name, link=entry.link) scraper['result'].append(result) self._data['scraper'].update({'GoogleHTTP': scraper}) return found
def google(self): print bcolors.UNDERLINE + "------------Google module---------------------" + bcolors.ENDC print "First Page" search_results = google.search("\"" + self.email_address + "\"", 1) results = {} others = [] for result in search_results: print bcolors.OKGREEN + result.name + bcolors.ENDC print bcolors.FAIL + result.description + bcolors.ENDC print result.link others.append(result.name) others.append(result.description) others.append(result.link) results = {result.link: others} others = [] return results
def doGoogleSearch(): responseMessage = "" if 'Body' not in request.values: return "No message Body" else: requestBody = request.values.get('Body').encode('utf-8') requestParams = requestBody.split(' ') keyword = "" for item in requestParams[1:]: keyword += item num_page = 1 search_results = google.search(keyword, num_page) if len(search_results) > 0: responseMessage = unicode(search_results[0].description, "utf-8") else: responseMessage = "Error, no such results" return responseMessage
def filter_search(search_list=[],file_name="lolicon.url",keywords='lolicon loli pictures'): file_name = cache_dir + file_name # adds path to cache folder # Check path - existance of a file if os.path.exists(file_name): f = open(file_name, "r+") else: f = open(file_name, "wt") for url in search(keywords, stop=256): for x in range(0,len(common_finds)-1): if common_finds[x] in url: url = "" # checks common finds without lolicon content f_url = url + "\n" # newline added - f=formated f.write(f_url) # saves url to file search_list.append(url) # add a url to search list
def get_pesqs(self, temas): temas_pesq = [] for tema in temas: pesq = google.search(tema, 1) i = 0 for p in pesq: if i < 3: if p.name and p.description: obj = { "nome": p.name, "link": p.link, "desc": p.description, "tema": tema } temas_pesq.append(obj) i = i + 1 return temas_pesq
def getEmail(business_name, city, num_page, title_info=False): """ This program takes in a business name, runs a google search, then extracts every address associated with that business name, stores them in a dictionary, and returns that dictionary """ #current_address = str(current_address) num_page = int(num_page) # construct query name = str(business_name) location_string = str(city) address_string = ' email address' # Final Query query = name + ' ' + location_string + address_string #print(query) # Search "Final Query" in Google search_results = google.search(query, num_page) # Parse search results to extract address google_results = [] for result in search_results: #print (result.description) if title_info == False: googles = re.findall(r'[\w\.-]+@[\w\.-]+', result.description) if len(googles) != 0: google_results.append(googles) if title_info == True: googles = re.findall(r'[\w\.-]+@[\w\.-]+', result.name) if len(googles) != 0: google_results.append(googles) print(google_results) if len(google_results) == 0: return False else: return google_results[0][0]
def googleHacking(domain, dork, numP): results = google.search('site:' + domain + ' ' + dork, numP) print("-> Google Hacking Resuts") if not len(results) == 0: for i in range(len(results)): print("[*] Name: " + str(results[i].name)) print("[*] Link: " + str(results[i].link)) print("[*] URL: " + str(results[i].google_link)) print("[*] Description: " + str(results[i].description)) print("[*] Thumb: " + str(results[i].thumb)) print("[*] Cached: " + str(results[i].cached)) print("[*] Page: " + str(results[i].page)) print("[*] Index: " + str(results[i].index)) print("[*] Number of Results: " + str(results[i].number_of_results) + "\n") else: print("[!] Nothing was retrieved.")
def filter_search(search_list=[], file_name="lolicon.url", keywords='lolicon loli pictures'): file_name = cache_dir + file_name # adds path to cache folder # Check path - existance of a file if os.path.exists(file_name): f = open(file_name, "r+") else: f = open(file_name, "wt") for url in search(keywords, stop=256): for x in range(0, len(common_finds) - 1): if common_finds[x] in url: url = "" # checks common finds without lolicon content f_url = url + "\n" # newline added - f=formated f.write(f_url) # saves url to file search_list.append(url) # add a url to search list
def search_texts(attribute): try: time.sleep(10) arxiv_id = attribute[0].replace('arXiv:','').strip() title = attribute[1].replace('Title:','').strip() search_term = "github:\"" + arxiv_id + "\"" search_term = purify(search_term) print(search_term) search_results = google.search(search_term, 1) found = False for index, gResult in enumerate(search_results): if re.search(repourl_pattern, gResult.link): found = True print(search_term, index, " ====> ", gResult.link) break if found==False: print(search_term, " ====> ", "NOT FOUND") except Exception: traceback.print_exc()
def searcher(slide): query = slide.title query = query[0:-1] my_path = 's' + str(slide.slideNum) if query.endswith(" "): query = my_path[:-1] if not os.path.isdir(my_path): os.makedirs(my_path) s1 = "googleimagesdownload --keywords \"" + query + "\" --limit 1 -o " + my_path s2 = "googleimagesdownload --keywords \"" + query + " meme\" --limit 1 -o " + my_path os.system(s1) # save pic os.system(s2) advancedQuery = query google_result = google.search(advancedQuery, 2) i = 0 for res in google_result: if i == 10: break slide.url.append(res.link) i += 1
def google_wiki(ques, options, neg, sal): spinner = Halo(text='Googling and searching Wikipedia', spinner='dots2') spinner.start() num_pages = 1 points = list() content = "" maxo = "" maxp = -sys.maxsize words = split_string(ques) for o in options: o = o.lower() original = o o += ' wiki' # get google search results for option + 'wiki' search_wiki = google.search(o, num_pages) link = search_wiki[0].link content = get_page(link) soup = BeautifulSoup(content, "lxml") page = soup.get_text().lower() temp = 0 for word in words: try: sal[word] temp = temp + page.count(word) except: temp = temp temp += smart_answer(page, words) if neg: temp *= -1 points.append(temp) if temp > maxp: maxp = temp maxo = original spinner.succeed() spinner.stop() return points, maxo
def getResponseBody(): commandMessage = "Command: \n 1. Navigate from {from} to {to}.\n 2. Google {keyword}.\n 3. Tweet {message}.\n" if 'Body' not in request.values: return "No message Body" else: requestBody = request.values.get('Body').encode('utf-8') requestParams = requestBody.split(' ') responseMessage = "" if len(requestParams) == 0: responseMessage = commandMessage elif requestParams[0].lower() == "navigate": # Get directions fromIndex = requestBody.index('from') toIndex = requestBody.index('to') origin = requestBody[fromIndex + 5:toIndex] destination = requestBody[toIndex + 3:] responseMessage = getDirections(origin, destination) elif requestParams[0].lower() == "google": keyword = "" for item in requestParams[1:]: keyword += item num_page = 1 search_results = google.search(keyword, num_page) # print search_results[0].description if len(search_results) > 0: responseMessage = unicode(search_results[0].description, "utf-8") elif requestParams[0].lower() == "tweet": t = getTweetClient() responseMessage = "Twitter updated" twitterMessageBody = requestBody[5:] t.statuses.update(status=twitterMessageBody) elif requestParams[0].lower() == "hehe": responseMessage = ":)" else: # Give options: responseMessage = commandMessage return responseMessage
def getResponseBody(): commandMessage = "Command: \n 1. Navigate from {from} to {to}.\n 2. Google {keyword}.\n 3. Tweet {message}.\n" if 'Body' not in request.values: return "No message Body" else: requestBody = request.values.get('Body').encode('utf-8') requestParams = requestBody.split(' ') responseMessage = "" if len(requestParams) == 0: responseMessage = commandMessage elif requestParams[0].lower() == "navigate" : # Get directions fromIndex = requestBody.index('from') toIndex = requestBody.index('to') origin = requestBody[fromIndex+5:toIndex] destination = requestBody[toIndex+3:] responseMessage = getDirections(origin,destination) elif requestParams[0].lower() == "google" : keyword = "" for item in requestParams[1:]: keyword += item num_page = 1 search_results = google.search(keyword, num_page) # print search_results[0].description if len(search_results) > 0: responseMessage = unicode(search_results[0].description, "utf-8") elif requestParams[0].lower() == "tweet": t = getTweetClient() responseMessage = "Twitter updated" twitterMessageBody = requestBody[5:] t.statuses.update(status=twitterMessageBody) elif requestParams[0].lower() == "hehe" : responseMessage = ":)" else: # Give options: responseMessage = commandMessage return responseMessage
async def googler(context): a = context.message.content stuff = "" try: b = int(a[8]) except: query = a[7:] num = 2 else: query = a[9:] num = b search_results = google.search(query) for i in search_results[:num]: name = i.name[:i.name.index("/") - 6] stuff += f"**{name}**\n{i.description} \n(<{i.link}>)\n\n" if len(stuff) > 2000: await client.say( "Bro your request size is to **big**, try and tone it down maybe a little bit" ) else: await client.say(stuff)
def return_articles(list_keywords, num_page, list_newspaper_website): """return link of newspaper related to a given google request""" if isinstance(list_keywords, str): list_keywords = [list_keywords] dic_google_search_filtered = {} for keywords in list_keywords: list_search_results = google.search(keywords, num_page) for search in list_search_results: if (search.link is not None) \ and any(newspaper_website in search.link for newspaper_website in list_newspaper_website): article = ArticleFromSearch( link=search.link, google_search_title=search.name, google_search_description=search.description ) try: article.extract_article_from_link('fr') except BaseException as e: print('link: %s not working with error %s' % (search.link, e)) dic_google_search_filtered[search.link] = article.concatenate_output() return dic_google_search_filtered
def _search_supplier_data(self, supplier, url, data): info = {'url': None, 'sku': None} if supplier == 'Farnell': self.app.print('Searching Farnell...') result = google.search('{} - {} site:{}'.format( 'Farnell', data.mpn, url)) if not result: return None for i, val in enumerate(result): self.app.print(f'{i} {val.link}') self.app.print('Pick a result [number]: ') choice_word = input() if choice_word.isdecimal(): choice_index = int(choice_word) info['url'] = result[choice_index].link.strip('RL') info['sku'] = re.search('.*/(\d*)', info['url']).group(1) return info else: return None
def ask_google(self): phrase = self._get_phrase_from_words_list(self.words_list, self.min_words, self.max_words) links = [] if phrase is False: return phrase else: self.log.info(f"Searching '{phrase}'") try: search_results = google.search(phrase, self.pages) except Exception as e: self.log.warning( f"Probably you have exceeded the maximum number of requests ({e})" ) return links for result in search_results: self._add_result_to_queries(self.language_slug, phrase, result) links.append(result.link) self._save_queries_to_file(self.queries) return links
def _who_is(self, question): theme = interpreters.who_is_interpreter(question) try: search = 'quem é ' + theme print('[+] Searching: ', search) links = (res.link if res.link not in LINK_FILTER else '' for res in google.search(search)) for link in links: content = request(url=link) if content: clear_answer = get_answer_clean(theme=theme, content=content) if clear_answer: return clear_answer return "Por favor, tente especificar a sua pergunta..." except KeyboardInterrupt: pass except Exception as ex: print('Error >>>', ex)
def search_and_print(query, o1, o2, o3, num_pages): search_results = google.search(query, num_pages) desc = ' '.join([res.description.lower() for res in search_results]) desc += (' ' + ' '.join([res.name.lower() for res in search_results])) # print(search_results[0].name) oc1, oc2, oc3 = clean_opts(o1, o2, o3) print(query) print(oc1, oc2, oc3, sep="|") print(desc.count(oc1), desc.count(oc2), desc.count(oc3)) oc1_c, oc2_c, oc3_c = desc.count(o1), desc.count(o2), desc.count(o3) print(oc1_c, oc2_c, oc3_c) print(*get_word_wise(desc, oc1, oc2, oc3)) if isNot: print("NOT ### NOT ### NOT") if num_pages == 1 and sum([oc1_c, oc2_c, oc3_c]) == 0: search_and_print(query, o1, o2, o3, 2)
def alfred_items_for_query(query): alfred_results = [] with open('port', 'r') as infile: port = int(infile.read()) search_results = google.search(query, port) for result in search_results: title = result.get('title', '') #.decode('utf-8') href = result.get('href', '') #.decode('utf-8') # output href & set title variable attr = {} attr['arg'] = "{\"alfredworkflow\": {\"arg\": \"" + href + "\", \"variables\": {\"title\": \"" + title + "\"}}}" alfred_results.append(alfred.Item( title=title, subtitle=href, attributes=attr, icon='icon.png', )) return alfred_results
def doi_search(ref_data, missing_DOIs, num_page=1, num_links=2): try: for iter, index in enumerate(missing_DOIs): try: print( f"{iter} Searching Google for article at index {index}...") #print (ref_data[index]["text"]) search_results = google.search(ref_data[index]["text"], num_page) print(f"{len(search_results)} results found") c = 0 for result in search_results: if result and result.link[-4:] in [".pdf", ".xls"]: print("PDF encountered and ignored") continue print(f"Searching link {c+1} for the DOI...") print(result.link[-10:]) try: html = requests.get(result.link, verify=False, timeout=(5, 10)) matches = re.findall(doi_reg, html.text) if matches: ref_data[index]["doi"] = matches[0] save_ref_data(ref_data) print(f"DOI found: {matches[0]} and saved!") break except Exception: print('Something went wrong in http request. Skipping') continue c += 1 if c == num_links: break print("#" * 90) except Exception as e: print(str(e)) continue except Exception as e: print(str(e)) exit(1)
def search_single_name(self, name, term): search_results = google.search('"' + name + '" AND "' + term + '"', 1) for result in search_results: find_phone = [] find_cpf = [] find_cnpj = [] try: link = result.link html_text = self.response_link(result=result, name=name) for m in re.finditer(name.upper(), html_text.upper()): [start, end] = m.span() html = html_text[start - 200:end + 200] find_phone = find_phone + re.findall( r"(\(\d{2}\)\s?\d{4,5}-?\d{4})", html) find_cpf = find_cpf + re.findall( r"(\d{3}\.\d{3}\.\d{3}-\d{2})", html) find_cnpj = find_cnpj + re.findall( r"\d{2}\.\d{3}\.\d{3}\/\d{4}\-\d{2}", html) valid_if_exists_url = self.connection.table().find({ "link": link }).count() == 0 if valid_if_exists_url and (len(find_phone) or len(find_cpf)): find_phone = list(dict.fromkeys(find_phone)) find_cpf = list(dict.fromkeys(find_cpf)) find_cnpj = list(dict.fromkeys(find_cnpj)) self.save_table_phone_and_cpf_cpnj(name, find_phone, find_cpf, find_cnpj, link) print(name, str(find_phone), str(find_cpf), str(find_cnpj)) except Exception: Logger.log(name=name, link=result.link) break time.sleep(random.randint(1, 30))
def start_search(id, connection, cursor): sql_query = "SELECT Request FROM Requests WHERE chat_id = ?" sql_data = (id,) requests = cursor.execute(sql_query, sql_data).fetchall() for req in requests: result = google.search(query=req[0], time_interval='w', pages=config.google_pages, timeout=20) msg_array = ['Результаты по запросу:\n' + req[0] + '\n'] need_send = False for res in result: # проверка на выдачу этой ссылки sql_query = "SELECT chat_id FROM Found WHERE chat_id = ? AND link = ?" sql_data = (id, res.link) sql_result = cursor.execute(sql_query, sql_data).fetchall() if len(sql_result) == 0: # эта ссылка найдена в первый раз need_send = True # сохранение в БД sql_query = "INSERT INTO Found (chat_id, link) VALUES (?, ?)" sql_data = (id, res.link) cursor.execute(sql_query, sql_data) connection.commit() # формирование сообщения msg_array.append('**' + res.name + '**\n') msg_array[-1] += res.description + '\n' msg_array[-1] += res.link + '\n' # отправка результатов if need_send: # сообщения нужно нарезать блоками не более 4096 смволов limit = 4096 msg = '' for block in msg_array: if len(msg) + len(block) <= 4096 - 4: # минус '\n\n' x2 msg += block + '\n\n' else: bot.send_message(id, msg) msg = block + '\n\n' time.sleep(5) if msg != '': bot.send_message(id, msg) time.sleep(60) # чтобы в гугле не забанили
def search(site, search): site = site search = search num_page = 3 search_results = google.search("inurl:" + site + " intext:" + search, 3) search_results_list = [] subjectivity_list = [] polarity_list = [] num = [] number = 1 for result in search_results: search_results = result.description search_results_list.append(search_results) analysis = TextBlob(search_results) subjectivity = analysis.sentiment.subjectivity subjectivity_list.append(subjectivity) polarity = analysis.sentiment.subjectivity polarity_list.append(polarity) number = number + 1 num.append(number) sleep(5) tab = tt.Texttable() headings = ['Number', 'Results', 'Subjectivity', 'Polarity'] tab.header(headings) for row in zip(num, search_results_list, subjectivity_list, polarity_list): tab.add_row(row) avg_subjectivity = (sum(subjectivity_list) / len(subjectivity_list)) avg_polarity = (sum(polarity_list) / len(polarity_list)) table = tab.draw() print site print search print table print(site + " average subjectivity: " + str(avg_subjectivity)) print(site + " average polarity: " + str(avg_polarity))
def crawl(word, num_page=10): # 从google获取数据 search_results = google.search(word, num_page) result = [] # 将query的每个结果加入到result,分别获取title, description, url for item in search_results: result.append({ 'title': item.name, 'description': item.description, 'url': item.link }) # 直接将包含了dict的list转换成json并保存到文件 with open('result/{}.txt'.format(word), 'wt') as fw: fw.write(json.dumps(result)) # 相比上面,没有list的[] with open('result2/{}.txt'.format(word), 'at') as fw: for x in result: fw.write(json.dumps(x))
def get_lyrics(song, artists): try: num_page = 1 search_results = google.search(f"site:azlyrics.com {song} {artists}", num_page) lyricslink = search_results[0].link except Exception as e: print("Something went wrong gathering the lyrics.") print("Check if you have a working internet connection.") return source = requests.get(lyricslink).text soup = BeautifulSoup(source, 'lxml') lyricsdiv = soup.find('div', class_="col-xs-12 col-lg-8 text-center") lyrics = lyricsdiv.find('div', class_=None).text lyrics_available = does_song_have_lyrics(lyrics) if lyrics_available: return "Lyrics:\n" + lyrics else: return "This song doesn't have any lyrics!"
def parallel_for(o): original = o.lower() o += ' wiki' # get google search results for option + 'wiki' search_wiki = google.search(o, 1) link = search_wiki[0].link content = get_page(link) #Multiprocessing is not multithreading. soup = BeautifulSoup(content, "lxml") page = soup.get_text().lower() temp = 0 # Count is faster than collections.counter in this case. for word in words: temp = temp + page.count(word) temp += multiple_coincidence(page, words) print(original, " ", temp) return [original, temp]
def returnNewsJsonResult(): if request.method == 'POST': data = request.data decodeddata = data.decode('utf-8') jsondata = json.loads(decodeddata) each_query = jsondata.get('query') num_page = 1 # query = "Modi" search_results = google.search(each_query + "news", num_page) newsList = list() for result in search_results: newsJson = dict() http_loc = result.name.find("http") newsJson['title'] = result.name[0:http_loc] newsJson['desc'] = result.description newsJson['url'] = result.link newsList.append(newsJson) # print(json.dumps(newsList)) return json.dumps(newsList)
def method2(self): search_results = google.search(self.searchExactlyFor, 1, 'en') words = "" for result in search_results: words += result.name + "\n" words += result.description + "\n" words = words.lower().split() prediction = [] for answer in self.answers: answerWords = answer['text'].lower().split() count = 0 for word in words: for answerWord in answerWords: if answerWord in word: count += 1 prediction.append(count) return prediction
def search1(site, search): site = site search = search num_page = 5 text = 'inurl:' + site + ' intext:' + search search_results = google.search(text) search_results_list = [] subjectivity_list = [] polarity_list = [] num = [] number = 1 for reports in search_results: search_results = reports.description search_results_list.append(search_results) analysis = TextBlob(search_results) subjectivity = analysis.sentiment.subjectivity subjectivity_list.append(subjectivity) polarity = analysis.sentiment.polarity polarity_list.append(polarity) number = number + 1 num.append(number) sleep(5) tab = tt.Texttable() heading = ['Number', 'Result', 'Subjectivity', 'Polarity'] tab.header(heading) for row in zip(num, search_results_list, subjectivity_list, polarity_list): tab.add_row(row) avg_subjectivity = sum(subjectivity_list) / len(subjectivity_list) avg_polarity = sum(polarity_list) / len(polarity_list) table = tab.draw() print(site) print(search) print(table) print(site + "avg_subjectivity " + str(avg_subjectivity)) print(site + "avg_polarity " + str(avg_polarity))
def alfred_items_for_query(query): alfred_results = [] with open('port', 'r') as infile: port = int(infile.read()) search_results = google.search(query, port) for result in search_results: title = result.get('title', '') #.decode('utf-8') href = result.get('href', '') #.decode('utf-8') # output href & set title variable attr = {} attr[ 'arg'] = "{\"alfredworkflow\": {\"arg\": \"" + href + "\", \"variables\": {\"title\": \"" + title + "\"}}}" alfred_results.append( alfred.Item( title=title, subtitle=href, attributes=attr, icon='icon.png', )) return alfred_results
def search_google(self, query, pages=3, print_results=False): """Query google for search results Args: query (String): to send to google pages (Number): of pages to parse from google result Returns: (Bool): On Success or failure """ self.query = query self.inversion = False # check for inversion language and mark it if found colored_query = query.split(" ") query_without_inversion = query.split(" ") for i, word in enumerate(colored_query): for inversion in kInversionWords: if inversion in word.lower(): self.inversion = True colored_query[i] = termcolor.colored( colored_query[i], "red") # since inversions don't help in our queries, # we'll just drop them query_without_inversion[i] = "" colored_query_str = " ".join(colored_query) query_without_inversion_str = " ".join(query_without_inversion) self.logger.info("=================================") self.logger.info("Query: \"{}\"".format(colored_query_str)) try: self.results = google.search(query_without_inversion_str, pages) except Exception as e: self.logger.error("Caught exception in google query: {}".format(e)) return False self.logger.info("Got {} results from the googz".format( len(self.results))) if print_results: print(self.results) return True
def link_to_use(): if not show_name_exceptions(): pass else: show_name = show_name_exceptions() if show_name.endswith("all eps"): show_name = show_name.replace(" all eps", "") else: pass search_results = google.search( "ewatchseries .to " + show_name ) # in case WatchSeries changes their URL, this might need modifications for url in search_results: link_found = url.link break if "season" in link_found: link_found, season = link_found.rsplit('/', 1) link_found = link_found.replace( '.to/', '.to/serie/' ) # in case WatchSeries changes their URL, this might need modifications return link_found else: return link_found
async def api_google_search(client, conn, logger, context, name): message = context.message server_id = message.server.id const = await get_cached_server(conn, server_id) lang = const["locale"] if not lang in locale.keys(): em = discord.Embed(description="{who}, {response}.".format( who=message.author.display_name+"#"+message.author.discriminator, response="ошибка локализации", colour=0xC5934B)) await client.send_message(message.channel, embed=em) return em = discord.Embed(colour=int(const["em_color"], 16) + 512) try: await client.delete_message(message) except: pass dat = google.search(name, 1) if not dat: em.description = locale[lang]["api_data_not_found"].format( who=message.author.display_name+"#"+message.author.discriminator, data=name ) await client.send_message(message.channel, embed=em) return logg.info("google = {data}".format(data=str(dat))) em.set_footer(text="{name}#{discriminator}".format( name=message.author.name, discriminator=message.author.discriminator )) em.add_field( name="Response", value=str(dat), inline=True ) await client.send_message(message.channel, embed=em)
def google_wiki(sim_ques, options): num_pages = 1 points = list() content = "" maxo="" maxp=0 words = split_string(sim_ques) for o in options: o = o.lower() original=o #search_results = google.search(o, num_pages) o += ' wiki' search_wiki = google.search(o, num_pages) link = search_wiki[0].link content = get_page(link) soup = BeautifulSoup(content,"lxml") page = soup.get_text().lower() #Too slow to check another page ''' #search a non wiki page.. searching becoming too slow link = search_results[0].link content = get_page(link) soup= BeautifulSoup(content) page= page + soup.get_text().lower() ''' temp=0 for word in words: temp = temp + page.count(word) temp+=smart_answer(page, words) points.append(temp) if temp>maxp: maxp=temp maxo=original return points,maxo
def api_search(o, sim_ques, neg): num_pages = 1 words = split_string(sim_ques) content = "" o = o.lower() original = o o += ' wiki' # get google search results for option + 'wiki' search_wiki = google.search(o, num_pages) link = search_wiki[0].link content = get_page(link) soup = BeautifulSoup(content, "lxml") page = soup.get_text().lower() temp = 0 for word in words: temp = temp + page.count(word) temp += smart_answer(page, words) if neg: temp *= -1 return [temp, original]
def test_standard_search(self): """Test method to search in google.""" search = google.search("github") self.assertNotEqual(len(search), 0)
allContents.extend(themesInPage) print str(page) + " link processed" nonTopics = ['Sign in with your Email, Facebook, Google, Twitter or Microsoft Live account', 'Back to top of the page', 'Close Sign In', 'Privacy', 'Terms of Use', 'Search', 'None', 'Donate', 'Help', 'Dashboard', 'Groups', 'Library', 'Browse', 'Help', 'Donate'] cleanedContents = [contentItem for contentItem in allContents if contentItem not in nonTopics] regex = re.compile(r'CK-12|[1-9]|Teacher|Book') cleanedContents = filter(lambda i: not regex.search(i), cleanedContents) cleanedContents = [contentItem.split(" and ")[0] for contentItem in cleanedContents] suggestedArticles = [] for content in cleanedContents: #content = cleanedContents[102] try: search_results = google.search("wikipedia.org " + content, 1) sleep(randint(10, 20)) if len(search_results) > 2: search_results = search_results[0:2] for result in search_results: #result = search_results[0] suggestedArticle = str(result.link) suggestedArticles.append(suggestedArticle) print content + " searched." + " Suggested article is: " + suggestedArticle except: pass #Remove repeated results
from google import google num_page = 1 search_results = google.search("MIT", num_page) for i in search_results: print i # ,i.name # The title of the link # ,i.link # The external link (NOT implemented yet) # ,i.google_link # The google link # ,i.description # The description of the link # ,i.thumb # The link to a thumbnail of the website (NOT ##,implemented yet) # ,i.cached # A link to the cached version of the page ##,(NOT implemented yet) # ,i.page # What page this result was on (When ##,searching more than one page) # ,i.index # What index on this page it was on #) print print i.google_link print i.description for j in range(3): print "--------------------------------------------------------------"
from google import google num_page = 3 search_results = google.search("This is my query", 1) print search_results