Esempio n. 1
0
async def googletest(context):
    """ Searches Google for a string. """
    mg = MagicGoogle()
    reply = await context.get_reply_message()
    query = context.arguments
    if query:
        pass
    elif reply:
        query = reply.text
    else:
        await context.edit(lang('arg_error'))
        return

    query = query.replace(' ', '+')
    if not silent:
        await context.edit(lang('google_processing'))
    results = ""
    for i in mg.search(query=query, num=int(config['result_length'])):
        try:
            title = i['text'][0:30] + '...'
            link = i['url']
            results += f"\n[{title}]({link}) \n"
        except:
            await context.edit(lang('google_connection_error'))
            return
    await context.edit(f"**Google** |`{query}`| 🎙 🔍 \n"
                       f"{results}",
                       link_preview=False)
    await log(f"{lang('google_success')} `{query}`")
Esempio n. 2
0
async def googletest(context):
    """ Searches Google for a string. """
    mg = MagicGoogle()
    reply = await context.get_reply_message()
    query = context.arguments
    if query:
        pass
    elif reply:
        query = reply.text
    else:
        await context.edit("出错了呜呜呜 ~ 无效的参数。")
        return

    query = query.replace(' ', '+')
    await context.edit("正在拉取结果 . . .")
    results = ""
    for i in mg.search(query=query, num=int(config['result_length'])):
        try:
            title = i['text'][0:30] + '...'
            link = i['url']
            results += f"\n[{title}]({link}) \n"
        except:
            await context.edit("连接到 google服务器 失败")
            return
    await context.edit(f"**Google** |`{query}`| 🎙 🔍 \n"
                       f"{results}",
                       link_preview=False)
    await log(f"在Google搜索引擎上查询了 `{query}`")
Esempio n. 3
0
class TestMagicGoogle(unittest.TestCase):
    """
    Test MagicGoogle class
    """
    def setUp(self):
        PROXIES = [{'http': '127.0.0.1:1087', 'https': '127.0.0.1:1087'}]
        self.mg = MagicGoogle(PROXIES)

    def tearDown(self):
        self.mg = None

    def test_search_url(self):
        sleep = random.randint(2, 15)
        result = list(self.mg.search_url(query='python', num=1, pause=sleep))
        self.assertEqual(result[0], 'https://www.python.org/',
                         'test search_url fail')
Esempio n. 4
0
import os
import sys
import time
import random
# import pprint
import codecs

sys.path.append(os.path.dirname(os.path.dirname(__file__)))
# https://github.com/howie6879/magic_google
# pip install magic_google
from magic_google import MagicGoogle

mg = MagicGoogle()

x = 0
file = codecs.open("result-" + str(x) + ".txt", "w", "utf-8")
for url in mg.search_url(query='Github', num=100, start=x * 100):
    file.write(url)
    print(url)
    # file.write("\n")
    file.write("\n----------------\n")
file.close()
import pprint

sys.path.append(os.path.dirname(os.path.dirname(__file__)))
from magic_google import MagicGoogle

################################################
# """
# cd magic_google
# python examples/google_search.py
# """
#################################################

PROXIES = [{'http': 'http://127.0.0.1:1087', 'https': 'http://127.0.0.1:1087'}]

# Or MagicGoogle()
mg = MagicGoogle(PROXIES)

# The first page of results
# result = mg.search_page(query='python')
# print(result)
#
# time.sleep(random.randint(1, 5))

# Get {'title','url','text'}
for i in mg.search(query='python', num=1, language='en'):
    pprint.pprint(i)

time.sleep(random.randint(1, 5))

# Output
# {'text': 'The official home of the Python Programming Language.',
Esempio n. 6
0
################################################
# """
# cd magic_google
# python examples/google_search.py
# """
#################################################

PROXIES = [{
    'http': 'http://127.0.0.1:1087',
    'https': 'http://127.0.0.1:1087'
}]

# Or MagicGoogle()
#mg = MagicGoogle(PROXIES)
mg = MagicGoogle()
search_key = input("Enter the Search Keyword: ")
# The first page of results
# result = mg.search_page(query='python')
# print(result)
#
# time.sleep(random.randint(1, 5))

# Get {'title','url','text'}
for i in mg.search(query=str(search_key), num=1, language='en'):
    pprint.pprint(i)

time.sleep(random.randint(1, 5))

# Output
# {'text': 'The official home of the Python Programming Language.',
Esempio n. 7
0
import sys
from magic_google import MagicGoogle
import pprint
mg = MagicGoogle()
for i in mg.search(query=sys.argv[1], num=10):
    pprint.pprint(i)
Esempio n. 8
0
def search(keyword, num=num, ty='google'):

    logging.info('搜索关键词:' + keyword)
    #   lang,p=langid.classify(keyword)
    #   if lang=='zh':
    #     logging.info('中文关键词')
    #   else:
    #     logging.info('非中文')
    #     return []

    if ty == 'google':

        mg = MagicGoogle(PROXIES)
        urls = mg.search_url(query=keyword, num=num, start=0, pause=5)
    elif ty == 'gcs':
        results = search_google.api.results(BUILDARGS, CSEARGS)
        urls = results.get_values('items', 'link')
        # logging.info(links)
    else:
        mg = MagicBaidu()
        urls = mg.search_url(query=keyword, start=0, pause=5)
    #  Crawling the whole page
#   result = mg.search_page(query=keyword)

    cx = tkit.CxExtractor()
    # Crawling url
    keywords = []
    #   for url in mg.search_url(query=keyword):
    n = 0
    file_name = PATH + 'corpu' + str(time.time()) + ".txt"

    for url in urls:  #google
        #for url in mg.search_url(query=keyword ,start=0, pause=10): #百度
        logging.info(url)

        try:

            #         items= cx.url_text_no_br(url=url)
            items = url_text(url=url)
        except:
            continue

#       logging.info("*"*50)
#       plogging.info.plogging.info(items)
#       plogging.info.plogging.info("*"*50)

#       items= tkit.Text().text_processing(items)
        items = text_pre(items)
        #       plogging.info.plogging.info('句子数目为: ',len(items['sentence']))
        logging.info('句子数目为: ' + str(items['sentences_num']))
        #       plogging.info.plogging.info(items)

        if items['sentences_num'] > 5:
            n = n + 1
            if n % 5 == 0:

                # keywords = keywords+ items['keywords']
                file_name = PATH + 'corpu' + str(time.time()) + ".txt"

            logging.info('写入文件:  ' + file_name)
            my_open = open(file_name, 'a')

            my_open.write(str(items['text']) + '\n\n')
            my_open.close()

    t = random.randint(30, 100)

    logging.info('搜索结束休息中 ' + str(t) + 's')
    logging.info("Start : %s" % time.ctime())
    time.sleep(t)
    logging.info("End : %s" % time.ctime())
    return
Esempio n. 9
0
from magic_google import MagicGoogle

################################################
# """
# cd magic_google
# python examples/google_search.py
# """
#################################################

PROXIES = [{
    'http': 'http://127.0.0.1:1087',
    'https': 'http://127.0.0.1:1087'
}]

# Or MagicGoogle()
mg = MagicGoogle()

# The first page of results
result = mg.search_page(query='python')
print(result)
#
time.sleep(random.randint(1, 5))

# Get {'title','url','text'}
for i in mg.search(query='python', num=1, language='en'):
    pprint.pprint(i)

time.sleep(random.randint(1, 5))

# Output
# {'text': 'The official home of the Python Programming Language.',
Esempio n. 10
0
 def setUp(self):
     PROXIES = [{'http': '127.0.0.1:1087', 'https': '127.0.0.1:1087'}]
     self.mg = MagicGoogle(PROXIES)