async def googletest(context): """ Searches Google for a string. """ mg = MagicGoogle() reply = await context.get_reply_message() query = context.arguments if query: pass elif reply: query = reply.text else: await context.edit(lang('arg_error')) return query = query.replace(' ', '+') if not silent: await context.edit(lang('google_processing')) results = "" for i in mg.search(query=query, num=int(config['result_length'])): try: title = i['text'][0:30] + '...' link = i['url'] results += f"\n[{title}]({link}) \n" except: await context.edit(lang('google_connection_error')) return await context.edit(f"**Google** |`{query}`| 🎙 🔍 \n" f"{results}", link_preview=False) await log(f"{lang('google_success')} `{query}`")
async def googletest(context): """ Searches Google for a string. """ mg = MagicGoogle() reply = await context.get_reply_message() query = context.arguments if query: pass elif reply: query = reply.text else: await context.edit("出错了呜呜呜 ~ 无效的参数。") return query = query.replace(' ', '+') await context.edit("正在拉取结果 . . .") results = "" for i in mg.search(query=query, num=int(config['result_length'])): try: title = i['text'][0:30] + '...' link = i['url'] results += f"\n[{title}]({link}) \n" except: await context.edit("连接到 google服务器 失败") return await context.edit(f"**Google** |`{query}`| 🎙 🔍 \n" f"{results}", link_preview=False) await log(f"在Google搜索引擎上查询了 `{query}`")
class TestMagicGoogle(unittest.TestCase): """ Test MagicGoogle class """ def setUp(self): PROXIES = [{'http': '127.0.0.1:1087', 'https': '127.0.0.1:1087'}] self.mg = MagicGoogle(PROXIES) def tearDown(self): self.mg = None def test_search_url(self): sleep = random.randint(2, 15) result = list(self.mg.search_url(query='python', num=1, pause=sleep)) self.assertEqual(result[0], 'https://www.python.org/', 'test search_url fail')
import os import sys import time import random # import pprint import codecs sys.path.append(os.path.dirname(os.path.dirname(__file__))) # https://github.com/howie6879/magic_google # pip install magic_google from magic_google import MagicGoogle mg = MagicGoogle() x = 0 file = codecs.open("result-" + str(x) + ".txt", "w", "utf-8") for url in mg.search_url(query='Github', num=100, start=x * 100): file.write(url) print(url) # file.write("\n") file.write("\n----------------\n") file.close()
import pprint sys.path.append(os.path.dirname(os.path.dirname(__file__))) from magic_google import MagicGoogle ################################################ # """ # cd magic_google # python examples/google_search.py # """ ################################################# PROXIES = [{'http': 'http://127.0.0.1:1087', 'https': 'http://127.0.0.1:1087'}] # Or MagicGoogle() mg = MagicGoogle(PROXIES) # The first page of results # result = mg.search_page(query='python') # print(result) # # time.sleep(random.randint(1, 5)) # Get {'title','url','text'} for i in mg.search(query='python', num=1, language='en'): pprint.pprint(i) time.sleep(random.randint(1, 5)) # Output # {'text': 'The official home of the Python Programming Language.',
################################################ # """ # cd magic_google # python examples/google_search.py # """ ################################################# PROXIES = [{ 'http': 'http://127.0.0.1:1087', 'https': 'http://127.0.0.1:1087' }] # Or MagicGoogle() #mg = MagicGoogle(PROXIES) mg = MagicGoogle() search_key = input("Enter the Search Keyword: ") # The first page of results # result = mg.search_page(query='python') # print(result) # # time.sleep(random.randint(1, 5)) # Get {'title','url','text'} for i in mg.search(query=str(search_key), num=1, language='en'): pprint.pprint(i) time.sleep(random.randint(1, 5)) # Output # {'text': 'The official home of the Python Programming Language.',
import sys from magic_google import MagicGoogle import pprint mg = MagicGoogle() for i in mg.search(query=sys.argv[1], num=10): pprint.pprint(i)
def search(keyword, num=num, ty='google'): logging.info('搜索关键词:' + keyword) # lang,p=langid.classify(keyword) # if lang=='zh': # logging.info('中文关键词') # else: # logging.info('非中文') # return [] if ty == 'google': mg = MagicGoogle(PROXIES) urls = mg.search_url(query=keyword, num=num, start=0, pause=5) elif ty == 'gcs': results = search_google.api.results(BUILDARGS, CSEARGS) urls = results.get_values('items', 'link') # logging.info(links) else: mg = MagicBaidu() urls = mg.search_url(query=keyword, start=0, pause=5) # Crawling the whole page # result = mg.search_page(query=keyword) cx = tkit.CxExtractor() # Crawling url keywords = [] # for url in mg.search_url(query=keyword): n = 0 file_name = PATH + 'corpu' + str(time.time()) + ".txt" for url in urls: #google #for url in mg.search_url(query=keyword ,start=0, pause=10): #百度 logging.info(url) try: # items= cx.url_text_no_br(url=url) items = url_text(url=url) except: continue # logging.info("*"*50) # plogging.info.plogging.info(items) # plogging.info.plogging.info("*"*50) # items= tkit.Text().text_processing(items) items = text_pre(items) # plogging.info.plogging.info('句子数目为: ',len(items['sentence'])) logging.info('句子数目为: ' + str(items['sentences_num'])) # plogging.info.plogging.info(items) if items['sentences_num'] > 5: n = n + 1 if n % 5 == 0: # keywords = keywords+ items['keywords'] file_name = PATH + 'corpu' + str(time.time()) + ".txt" logging.info('写入文件: ' + file_name) my_open = open(file_name, 'a') my_open.write(str(items['text']) + '\n\n') my_open.close() t = random.randint(30, 100) logging.info('搜索结束休息中 ' + str(t) + 's') logging.info("Start : %s" % time.ctime()) time.sleep(t) logging.info("End : %s" % time.ctime()) return
from magic_google import MagicGoogle ################################################ # """ # cd magic_google # python examples/google_search.py # """ ################################################# PROXIES = [{ 'http': 'http://127.0.0.1:1087', 'https': 'http://127.0.0.1:1087' }] # Or MagicGoogle() mg = MagicGoogle() # The first page of results result = mg.search_page(query='python') print(result) # time.sleep(random.randint(1, 5)) # Get {'title','url','text'} for i in mg.search(query='python', num=1, language='en'): pprint.pprint(i) time.sleep(random.randint(1, 5)) # Output # {'text': 'The official home of the Python Programming Language.',
def setUp(self): PROXIES = [{'http': '127.0.0.1:1087', 'https': '127.0.0.1:1087'}] self.mg = MagicGoogle(PROXIES)