Example #1
0
def googleTextSearch(query):
    query = query.replace('+', ' ')
    try:
        try:
            search_results = google.search(query)
        except:
            search_results = google.search(query+' j')

        if len(search_results) > 0:
            return search_results
        else:
            return False

    except:
        return False
def main():
	'''
	메인 함수
	csv 파일을 입력으로 받아 Google Search를 진행한다.
	csv 파일을 출력으로 낸다.
	'''
	# 인자 체크
	if(len(sys.argv) != 3):
		print '[ERROR] We need two arguments'
		sys.exit(0)

	print 'We get two arguments', sys.argv[1], sys.argv[2]

	# csv파일 열기
	ifile = open(sys.argv[1], 'r')
	creader = csv.reader(ifile, delimiter = ',', quotechar = '"')

	ofile = open(sys.argv[2], 'w')
	cwriter = csv.writer(ofile, delimiter = ',', quotechar = '"', quoting = csv.QUOTE_ALL)

	# csv파일 루프
	for row in creader:
		gresult = google.search(row[0], 1)
		cwriter.writerow([row[0], len(gresult)])
		time.sleep(random.randint(1, 3))

	# 파일 닫기
	ifile.close()
	ofile.close()
    def run(self, query, count=10):
        num_page = 1

        results = google.search(query, num_page)[:count]
        result = [google_result_to_dict(obj=obj) for obj in results]
        result = json.dumps(result)
        return result
Example #4
0
    def test_standard_search(self, html_f):
        """Test method to search in google."""

        # replace method to get html from a test html file
        google.standard_search.get_html = \
            Mock(return_value=html_f.read().decode('utf8'))

        search = google.search("github")
        self.assertNotEqual(len(search), 0)
Example #5
0
 def google_search(self, keyword, n_results=1):
     n_pages = 1
     lang = 'en'
     search_results = google.search(keyword, n_pages, lang)
     properties = [] 
     for result in search_results[:n_results]:
         p = {'title':str.str_encode(result.name),
              'link':result.link,
              'desc':str.str_encode(result.description),
              'img':result.thumb}
         properties.append(p)
     return properties 
Example #6
0
    def search(self, query, **kwargs):

        query = self._query(query, **kwargs)
        scraper = {'query': query, 'result': []}; found = 0

        gs = google.search(query, lang='de')
        if gs:
            for entry in gs:
                found += 1
                result = dict(name=entry.name, link=entry.link)
                scraper['result'].append(result)

        self._data['scraper'].update({'GoogleHTTP': scraper})
        return found
Example #7
0
    def google(self):
        print bcolors.UNDERLINE + "------------Google module---------------------" + bcolors.ENDC
        print "First Page"
        search_results = google.search("\"" + self.email_address + "\"", 1)
        results = {}
        others = []
        for result in search_results:
            print bcolors.OKGREEN + result.name + bcolors.ENDC
            print bcolors.FAIL + result.description + bcolors.ENDC
            print result.link
            others.append(result.name)
            others.append(result.description)
            others.append(result.link)
            results = {result.link: others}
            others = []

        return results
Example #8
0
def doGoogleSearch():
	responseMessage = ""
	if 'Body' not in request.values:
		return "No message Body"
	else:
		requestBody = request.values.get('Body').encode('utf-8')

	requestParams = requestBody.split(' ')
	keyword = ""
	for item in requestParams[1:]:
		keyword += item
	num_page = 1
	search_results = google.search(keyword, num_page)
	if len(search_results) > 0:
		responseMessage = unicode(search_results[0].description, "utf-8")
	else:
		responseMessage = "Error, no such results"
	return responseMessage
Example #9
0
File: spam.py Project: kh0p/toraeru
def filter_search(search_list=[],file_name="lolicon.url",keywords='lolicon loli pictures'):
	file_name = cache_dir + file_name # adds path to cache folder

	# Check path - existance of a file
	if os.path.exists(file_name):
		f = open(file_name, "r+")
	else:
		f = open(file_name, "wt")

	for url in search(keywords, stop=256):
		for x in range(0,len(common_finds)-1):
			if common_finds[x] in url:
				url = ""
			# checks common finds without lolicon content

		f_url = url + "\n" 			# newline added - f=formated
		f.write(f_url) 				# saves url to file
		search_list.append(url) 	# add a url to search list
Example #10
0
    def get_pesqs(self, temas):
        temas_pesq = []

        for tema in temas:
            pesq = google.search(tema, 1)
            i = 0
            for p in pesq:
                if i < 3:
                    if p.name and p.description:
                        obj = {
                            "nome": p.name,
                            "link": p.link,
                            "desc": p.description,
                            "tema": tema
                        }
                        temas_pesq.append(obj)
                        i = i + 1

        return temas_pesq
Example #11
0
def getEmail(business_name, city, num_page, title_info=False):
    """
    This program takes in a business name, 
    runs a google search, 
    then extracts every address associated with 
    that business name, stores them in a dictionary,
    and returns that dictionary
    
    """
    #current_address = str(current_address)
    num_page = int(num_page)

    # construct query
    name = str(business_name)
    location_string = str(city)
    address_string = ' email address'

    # Final Query
    query = name + ' ' + location_string + address_string
    #print(query)

    # Search "Final Query" in Google
    search_results = google.search(query, num_page)

    # Parse search results to extract address

    google_results = []
    for result in search_results:
        #print (result.description)
        if title_info == False:
            googles = re.findall(r'[\w\.-]+@[\w\.-]+', result.description)
            if len(googles) != 0:
                google_results.append(googles)
        if title_info == True:
            googles = re.findall(r'[\w\.-]+@[\w\.-]+', result.name)
            if len(googles) != 0:
                google_results.append(googles)

    print(google_results)
    if len(google_results) == 0:
        return False
    else:
        return google_results[0][0]
Example #12
0
def googleHacking(domain, dork, numP):

    results = google.search('site:' + domain + ' ' + dork, numP)

    print("-> Google Hacking Resuts")

    if not len(results) == 0:
        for i in range(len(results)):
            print("[*] Name: " + str(results[i].name))
            print("[*] Link: " + str(results[i].link))
            print("[*] URL: " + str(results[i].google_link))
            print("[*] Description: " + str(results[i].description))
            print("[*] Thumb: " + str(results[i].thumb))
            print("[*] Cached: " + str(results[i].cached))
            print("[*] Page: " + str(results[i].page))
            print("[*] Index: " + str(results[i].index))
            print("[*] Number of Results: " +
                  str(results[i].number_of_results) + "\n")
    else:
        print("[!] Nothing was retrieved.")
Example #13
0
def filter_search(search_list=[],
                  file_name="lolicon.url",
                  keywords='lolicon loli pictures'):
    file_name = cache_dir + file_name  # adds path to cache folder

    # Check path - existance of a file
    if os.path.exists(file_name):
        f = open(file_name, "r+")
    else:
        f = open(file_name, "wt")

    for url in search(keywords, stop=256):
        for x in range(0, len(common_finds) - 1):
            if common_finds[x] in url:
                url = ""
            # checks common finds without lolicon content

        f_url = url + "\n"  # newline added - f=formated
        f.write(f_url)  # saves url to file
        search_list.append(url)  # add a url to search list
def search_texts(attribute):
	try:
		time.sleep(10)
		arxiv_id = attribute[0].replace('arXiv:','').strip()
		title = attribute[1].replace('Title:','').strip()
		search_term = "github:\"" + arxiv_id + "\""
		search_term = purify(search_term)
		print(search_term)
		search_results = google.search(search_term, 1)
		found = False
		for index, gResult in enumerate(search_results):
			if re.search(repourl_pattern, gResult.link):
				found = True
				print(search_term, index, " ====> ", gResult.link)
				break
		if found==False:
			print(search_term, " ====> ", "NOT FOUND")

	except Exception:
		traceback.print_exc()
Example #15
0
def searcher(slide):
    query = slide.title
    query = query[0:-1]
    my_path = 's' + str(slide.slideNum)
    if query.endswith(" "):
        query = my_path[:-1]
    if not os.path.isdir(my_path):
        os.makedirs(my_path)
    s1 = "googleimagesdownload --keywords \"" + query + "\" --limit 1 -o " + my_path
    s2 = "googleimagesdownload --keywords \"" + query + " meme\" --limit 1 -o " + my_path
    os.system(s1)  # save pic
    os.system(s2)
    advancedQuery = query
    google_result = google.search(advancedQuery, 2)
    i = 0
    for res in google_result:
        if i == 10:
            break
        slide.url.append(res.link)
        i += 1
Example #16
0
def google_wiki(ques, options, neg, sal):
    spinner = Halo(text='Googling and searching Wikipedia', spinner='dots2')
    spinner.start()
    num_pages = 1
    points = list()
    content = ""
    maxo = ""
    maxp = -sys.maxsize
    words = split_string(ques)
    for o in options:

        o = o.lower()
        original = o
        o += ' wiki'

        # get google search results for option + 'wiki'
        search_wiki = google.search(o, num_pages)

        link = search_wiki[0].link
        content = get_page(link)
        soup = BeautifulSoup(content, "lxml")
        page = soup.get_text().lower()

        temp = 0

        for word in words:
            try:
                sal[word]
                temp = temp + page.count(word)
            except:
                temp = temp
        temp += smart_answer(page, words)
        if neg:
            temp *= -1
        points.append(temp)
        if temp > maxp:
            maxp = temp
            maxo = original
    spinner.succeed()
    spinner.stop()
    return points, maxo
Example #17
0
def getResponseBody():

    commandMessage = "Command: \n 1. Navigate from {from} to {to}.\n 2. Google {keyword}.\n 3. Tweet {message}.\n"

    if 'Body' not in request.values:
        return "No message Body"
    else:
        requestBody = request.values.get('Body').encode('utf-8')

    requestParams = requestBody.split(' ')
    responseMessage = ""

    if len(requestParams) == 0:
        responseMessage = commandMessage
    elif requestParams[0].lower() == "navigate":
        # Get directions
        fromIndex = requestBody.index('from')
        toIndex = requestBody.index('to')
        origin = requestBody[fromIndex + 5:toIndex]
        destination = requestBody[toIndex + 3:]
        responseMessage = getDirections(origin, destination)
    elif requestParams[0].lower() == "google":
        keyword = ""
        for item in requestParams[1:]:
            keyword += item
        num_page = 1
        search_results = google.search(keyword, num_page)
        # print search_results[0].description
        if len(search_results) > 0:
            responseMessage = unicode(search_results[0].description, "utf-8")
    elif requestParams[0].lower() == "tweet":
        t = getTweetClient()
        responseMessage = "Twitter updated"
        twitterMessageBody = requestBody[5:]
        t.statuses.update(status=twitterMessageBody)
    elif requestParams[0].lower() == "hehe":
        responseMessage = ":)"
    else:
        # Give options:
        responseMessage = commandMessage
    return responseMessage
Example #18
0
def getResponseBody():

	commandMessage = "Command: \n 1. Navigate from {from} to {to}.\n 2. Google {keyword}.\n 3. Tweet {message}.\n"

	if 'Body' not in request.values:
		return "No message Body"
	else:
		requestBody = request.values.get('Body').encode('utf-8')

	requestParams = requestBody.split(' ')
	responseMessage = ""

	if len(requestParams) == 0:
		responseMessage = commandMessage
	elif requestParams[0].lower() == "navigate" :
		# Get directions
		fromIndex = requestBody.index('from')
		toIndex = requestBody.index('to')
		origin = requestBody[fromIndex+5:toIndex]
		destination = requestBody[toIndex+3:]
		responseMessage = getDirections(origin,destination)
	elif requestParams[0].lower() == "google" :
		keyword = ""
		for item in requestParams[1:]:
			keyword += item
		num_page = 1
		search_results = google.search(keyword, num_page)
		# print search_results[0].description
		if len(search_results) > 0:
			responseMessage = unicode(search_results[0].description, "utf-8")
	elif requestParams[0].lower() == "tweet":
		t = getTweetClient()
		responseMessage = "Twitter updated"
		twitterMessageBody = requestBody[5:]
		t.statuses.update(status=twitterMessageBody)
	elif requestParams[0].lower() == "hehe" :
		responseMessage = ":)"
	else:
		# Give options:
		responseMessage = commandMessage
	return responseMessage
Example #19
0
async def googler(context):
    a = context.message.content
    stuff = ""
    try:
        b = int(a[8])
    except:
        query = a[7:]
        num = 2
    else:
        query = a[9:]
        num = b
    search_results = google.search(query)
    for i in search_results[:num]:
        name = i.name[:i.name.index("/") - 6]
        stuff += f"**{name}**\n{i.description} \n(<{i.link}>)\n\n"
    if len(stuff) > 2000:
        await client.say(
            "Bro your request size is to **big**, try and tone it down maybe a little bit"
        )
    else:
        await client.say(stuff)
Example #20
0
def return_articles(list_keywords, num_page, list_newspaper_website):
    """return link of newspaper related to a given google request"""
    if isinstance(list_keywords, str):
        list_keywords = [list_keywords]
    dic_google_search_filtered = {}
    for keywords in list_keywords:
        list_search_results = google.search(keywords, num_page)
        for search in list_search_results:
            if (search.link is not None) \
                    and any(newspaper_website in search.link for newspaper_website in list_newspaper_website):
                article = ArticleFromSearch(
                    link=search.link,
                    google_search_title=search.name,
                    google_search_description=search.description
                )
                try:
                    article.extract_article_from_link('fr')
                except BaseException as e:
                    print('link: %s not working with error %s' % (search.link, e))
                dic_google_search_filtered[search.link] = article.concatenate_output()
    return dic_google_search_filtered
Example #21
0
    def _search_supplier_data(self, supplier, url, data):
        info = {'url': None, 'sku': None}

        if supplier == 'Farnell':
            self.app.print('Searching Farnell...')
            result = google.search('{} - {} site:{}'.format(
                'Farnell', data.mpn, url))
            if not result:
                return None

            for i, val in enumerate(result):
                self.app.print(f'{i} {val.link}')
            self.app.print('Pick a result [number]: ')
            choice_word = input()
            if choice_word.isdecimal():
                choice_index = int(choice_word)
                info['url'] = result[choice_index].link.strip('RL')
                info['sku'] = re.search('.*/(\d*)', info['url']).group(1)
                return info
        else:
            return None
Example #22
0
 def ask_google(self):
     phrase = self._get_phrase_from_words_list(self.words_list,
                                               self.min_words,
                                               self.max_words)
     links = []
     if phrase is False:
         return phrase
     else:
         self.log.info(f"Searching '{phrase}'")
         try:
             search_results = google.search(phrase, self.pages)
         except Exception as e:
             self.log.warning(
                 f"Probably you have exceeded the maximum number of requests ({e})"
             )
             return links
         for result in search_results:
             self._add_result_to_queries(self.language_slug, phrase, result)
             links.append(result.link)
         self._save_queries_to_file(self.queries)
     return links
Example #23
0
    def _who_is(self, question):
        theme = interpreters.who_is_interpreter(question)
        try:
            search = 'quem é ' + theme
            print('[+] Searching: ', search)
            links = (res.link if res.link not in LINK_FILTER else ''
                     for res in google.search(search))
            for link in links:
                content = request(url=link)
                if content:
                    clear_answer = get_answer_clean(theme=theme,
                                                    content=content)
                    if clear_answer:
                        return clear_answer

            return "Por favor, tente especificar a sua pergunta..."

        except KeyboardInterrupt:
            pass
        except Exception as ex:
            print('Error >>>', ex)
Example #24
0
def search_and_print(query, o1, o2, o3, num_pages):
    search_results = google.search(query, num_pages)

    desc = ' '.join([res.description.lower() for res in search_results])
    desc += (' ' + ' '.join([res.name.lower() for res in search_results]))
    # print(search_results[0].name)

    oc1, oc2, oc3 = clean_opts(o1, o2, o3)
    print(query)
    print(oc1, oc2, oc3, sep="|")

    print(desc.count(oc1), desc.count(oc2), desc.count(oc3))
    oc1_c, oc2_c, oc3_c = desc.count(o1), desc.count(o2), desc.count(o3)
    print(oc1_c, oc2_c, oc3_c)
    print(*get_word_wise(desc, oc1, oc2, oc3))

    if isNot:
        print("NOT ### NOT ### NOT")

    if num_pages == 1 and sum([oc1_c, oc2_c, oc3_c]) == 0:
        search_and_print(query, o1, o2, o3, 2)
def alfred_items_for_query(query):
    alfred_results = []
    with open('port', 'r') as infile:
        port = int(infile.read())

    search_results = google.search(query, port)
    for result in search_results:
        title = result.get('title', '') #.decode('utf-8')
        href = result.get('href', '') #.decode('utf-8')

        # output href & set title variable
        attr = {}
        attr['arg'] = "{\"alfredworkflow\": {\"arg\": \"" + href + "\", \"variables\": {\"title\": \"" + title + "\"}}}"
        alfred_results.append(alfred.Item(
            title=title,
            subtitle=href,
            attributes=attr,
            icon='icon.png',
        ))

    return alfred_results
Example #26
0
def doi_search(ref_data, missing_DOIs, num_page=1, num_links=2):
    try:
        for iter, index in enumerate(missing_DOIs):
            try:
                print(
                    f"{iter} Searching Google for article at index {index}...")
                #print (ref_data[index]["text"])
                search_results = google.search(ref_data[index]["text"],
                                               num_page)
                print(f"{len(search_results)} results found")
                c = 0
                for result in search_results:
                    if result and result.link[-4:] in [".pdf", ".xls"]:
                        print("PDF encountered and ignored")
                        continue
                    print(f"Searching link {c+1} for the DOI...")
                    print(result.link[-10:])
                    try:
                        html = requests.get(result.link,
                                            verify=False,
                                            timeout=(5, 10))
                        matches = re.findall(doi_reg, html.text)
                        if matches:
                            ref_data[index]["doi"] = matches[0]
                            save_ref_data(ref_data)
                            print(f"DOI found: {matches[0]} and saved!")
                            break
                    except Exception:
                        print('Something went wrong in http request. Skipping')
                        continue
                    c += 1
                    if c == num_links:
                        break
                print("#" * 90)
            except Exception as e:
                print(str(e))
                continue
    except Exception as e:
        print(str(e))
        exit(1)
    def search_single_name(self, name, term):

        search_results = google.search('"' + name + '" AND "' + term + '"', 1)

        for result in search_results:
            find_phone = []
            find_cpf = []
            find_cnpj = []
            try:
                link = result.link
                html_text = self.response_link(result=result, name=name)

                for m in re.finditer(name.upper(), html_text.upper()):
                    [start, end] = m.span()

                    html = html_text[start - 200:end + 200]

                    find_phone = find_phone + re.findall(
                        r"(\(\d{2}\)\s?\d{4,5}-?\d{4})", html)
                    find_cpf = find_cpf + re.findall(
                        r"(\d{3}\.\d{3}\.\d{3}-\d{2})", html)
                    find_cnpj = find_cnpj + re.findall(
                        r"\d{2}\.\d{3}\.\d{3}\/\d{4}\-\d{2}", html)

                valid_if_exists_url = self.connection.table().find({
                    "link": link
                }).count() == 0
                if valid_if_exists_url and (len(find_phone) or len(find_cpf)):
                    find_phone = list(dict.fromkeys(find_phone))
                    find_cpf = list(dict.fromkeys(find_cpf))
                    find_cnpj = list(dict.fromkeys(find_cnpj))
                    self.save_table_phone_and_cpf_cpnj(name, find_phone,
                                                       find_cpf, find_cnpj,
                                                       link)
                    print(name, str(find_phone), str(find_cpf), str(find_cnpj))
            except Exception:
                Logger.log(name=name, link=result.link)
                break

        time.sleep(random.randint(1, 30))
Example #28
0
def start_search(id, connection, cursor):
    sql_query = "SELECT Request FROM Requests WHERE chat_id = ?"
    sql_data = (id,)
    requests = cursor.execute(sql_query, sql_data).fetchall()
    for req in requests:
        result = google.search(query=req[0], time_interval='w', pages=config.google_pages, timeout=20)
        msg_array = ['Результаты по запросу:\n' + req[0] + '\n']
        need_send = False
        for res in result:
            # проверка на выдачу этой ссылки
            sql_query = "SELECT chat_id FROM Found WHERE chat_id = ? AND link = ?"
            sql_data = (id, res.link)
            sql_result = cursor.execute(sql_query, sql_data).fetchall()
            if len(sql_result) == 0:  # эта ссылка найдена в первый раз
                need_send = True
                # сохранение в БД
                sql_query = "INSERT INTO Found (chat_id, link) VALUES (?, ?)"
                sql_data = (id, res.link)
                cursor.execute(sql_query, sql_data)
                connection.commit()
                # формирование сообщения
                msg_array.append('**' + res.name + '**\n')
                msg_array[-1] += res.description + '\n'
                msg_array[-1] += res.link + '\n'

        # отправка результатов
        if need_send:
            # сообщения нужно нарезать блоками не более 4096 смволов
            limit = 4096
            msg = ''
            for block in msg_array:
                if len(msg) + len(block) <= 4096 - 4:  # минус '\n\n' x2
                    msg += block + '\n\n'
                else:
                    bot.send_message(id, msg)
                    msg = block + '\n\n'
                    time.sleep(5)
            if msg != '':
                bot.send_message(id, msg)
        time.sleep(60)  # чтобы в гугле не забанили
Example #29
0
def search(site, search):
    site = site
    search = search
    num_page = 3
    search_results = google.search("inurl:" + site + " intext:" + search, 3)
    search_results_list = []
    subjectivity_list = []
    polarity_list = []
    num = []
    number = 1

    for result in search_results:
        search_results = result.description
        search_results_list.append(search_results)

        analysis = TextBlob(search_results)
        subjectivity = analysis.sentiment.subjectivity
        subjectivity_list.append(subjectivity)
        polarity = analysis.sentiment.subjectivity
        polarity_list.append(polarity)
        number = number + 1
        num.append(number)
        sleep(5)

    tab = tt.Texttable()
    headings = ['Number', 'Results', 'Subjectivity', 'Polarity']
    tab.header(headings)

    for row in zip(num, search_results_list, subjectivity_list, polarity_list):
        tab.add_row(row)

    avg_subjectivity = (sum(subjectivity_list) / len(subjectivity_list))
    avg_polarity = (sum(polarity_list) / len(polarity_list))

    table = tab.draw()
    print site
    print search
    print table
    print(site + " average subjectivity: " + str(avg_subjectivity))
    print(site + " average polarity: " + str(avg_polarity))
Example #30
0
def crawl(word, num_page=10):

    # 从google获取数据
    search_results = google.search(word, num_page)
    result = []

    # 将query的每个结果加入到result,分别获取title, description, url
    for item in search_results:
        result.append({
            'title': item.name,
            'description': item.description,
            'url': item.link
        })

    # 直接将包含了dict的list转换成json并保存到文件
    with open('result/{}.txt'.format(word), 'wt') as fw:
        fw.write(json.dumps(result))

    # 相比上面,没有list的[]
    with open('result2/{}.txt'.format(word), 'at') as fw:
        for x in result:
            fw.write(json.dumps(x))
def get_lyrics(song, artists):
	try:
		num_page = 1
		search_results = google.search(f"site:azlyrics.com {song} {artists}", num_page)
		lyricslink = search_results[0].link
	except Exception as e:
		print("Something went wrong gathering the lyrics.")
		print("Check if you have a working internet connection.")
		return

	source = requests.get(lyricslink).text
	soup = BeautifulSoup(source, 'lxml')

	lyricsdiv = soup.find('div', class_="col-xs-12 col-lg-8 text-center")
	lyrics = lyricsdiv.find('div', class_=None).text

	lyrics_available = does_song_have_lyrics(lyrics)

	if lyrics_available:
		return "Lyrics:\n" + lyrics
	else:
		return "This song doesn't have any lyrics!"
Example #32
0
def parallel_for(o):
    original = o.lower()
    o += ' wiki'

    # get google search results for option + 'wiki'
    search_wiki = google.search(o, 1)
    link = search_wiki[0].link
    content = get_page(link)  #Multiprocessing is not multithreading.
    soup = BeautifulSoup(content, "lxml")
    page = soup.get_text().lower()

    temp = 0

    # Count is faster than collections.counter in this case.
    for word in words:
        temp = temp + page.count(word)

    temp += multiple_coincidence(page, words)

    print(original, " ", temp)

    return [original, temp]
Example #33
0
def returnNewsJsonResult():
    if request.method == 'POST':
        data = request.data
        decodeddata = data.decode('utf-8')
        jsondata = json.loads(decodeddata)

        each_query = jsondata.get('query')

        num_page = 1
        # query = "Modi"
        search_results = google.search(each_query + "news", num_page)
        newsList = list()
        for result in search_results:
            newsJson = dict()
            http_loc = result.name.find("http")
            newsJson['title'] = result.name[0:http_loc]
            newsJson['desc'] = result.description
            newsJson['url'] = result.link
            newsList.append(newsJson)

        # print(json.dumps(newsList))
        return json.dumps(newsList)
Example #34
0
    def method2(self):
        search_results = google.search(self.searchExactlyFor, 1, 'en')

        words = ""
        for result in search_results:
            words += result.name + "\n"
            words += result.description + "\n"

        words = words.lower().split()

        prediction = []
        for answer in self.answers:
            answerWords = answer['text'].lower().split()

            count = 0
            for word in words:
                for answerWord in answerWords:
                    if answerWord in word:
                        count += 1
            prediction.append(count)

        return prediction
Example #35
0
def search1(site, search):
    site = site
    search = search
    num_page = 5
    text = 'inurl:' + site + ' intext:' + search
    search_results = google.search(text)
    search_results_list = []
    subjectivity_list = []
    polarity_list = []
    num = []
    number = 1

    for reports in search_results:
        search_results = reports.description
        search_results_list.append(search_results)
        analysis = TextBlob(search_results)
        subjectivity = analysis.sentiment.subjectivity
        subjectivity_list.append(subjectivity)
        polarity = analysis.sentiment.polarity
        polarity_list.append(polarity)
        number = number + 1
        num.append(number)
        sleep(5)

    tab = tt.Texttable()
    heading = ['Number', 'Result', 'Subjectivity', 'Polarity']
    tab.header(heading)

    for row in zip(num, search_results_list, subjectivity_list, polarity_list):
        tab.add_row(row)

    avg_subjectivity = sum(subjectivity_list) / len(subjectivity_list)
    avg_polarity = sum(polarity_list) / len(polarity_list)
    table = tab.draw()
    print(site)
    print(search)
    print(table)
    print(site + "avg_subjectivity " + str(avg_subjectivity))
    print(site + "avg_polarity " + str(avg_polarity))
def alfred_items_for_query(query):
    alfred_results = []
    with open('port', 'r') as infile:
        port = int(infile.read())

    search_results = google.search(query, port)
    for result in search_results:
        title = result.get('title', '')  #.decode('utf-8')
        href = result.get('href', '')  #.decode('utf-8')

        # output href & set title variable
        attr = {}
        attr[
            'arg'] = "{\"alfredworkflow\": {\"arg\": \"" + href + "\", \"variables\": {\"title\": \"" + title + "\"}}}"
        alfred_results.append(
            alfred.Item(
                title=title,
                subtitle=href,
                attributes=attr,
                icon='icon.png',
            ))

    return alfred_results
Example #37
0
    def search_google(self, query, pages=3, print_results=False):
        """Query google for search results
        Args:
            query (String): to send to google
            pages (Number): of pages to parse from google result
        Returns:
            (Bool): On Success or failure
        """
        self.query = query
        self.inversion = False
        # check for inversion language and mark it if found
        colored_query = query.split(" ")
        query_without_inversion = query.split(" ")
        for i, word in enumerate(colored_query):
            for inversion in kInversionWords:
                if inversion in word.lower():
                    self.inversion = True
                    colored_query[i] = termcolor.colored(
                        colored_query[i], "red")
                    # since inversions don't help in our queries,
                    # we'll just drop them
                    query_without_inversion[i] = ""

        colored_query_str = " ".join(colored_query)
        query_without_inversion_str = " ".join(query_without_inversion)
        self.logger.info("=================================")
        self.logger.info("Query: \"{}\"".format(colored_query_str))
        try:
            self.results = google.search(query_without_inversion_str, pages)
        except Exception as e:
            self.logger.error("Caught exception in google query: {}".format(e))
            return False
        self.logger.info("Got {} results from the googz".format(
            len(self.results)))
        if print_results:
            print(self.results)
        return True
Example #38
0
def link_to_use():
    if not show_name_exceptions():
        pass
    else:
        show_name = show_name_exceptions()

    if show_name.endswith("all eps"):
        show_name = show_name.replace(" all eps", "")
    else:
        pass
    search_results = google.search(
        "ewatchseries .to " + show_name
    )  # in case WatchSeries changes their URL, this might need modifications
    for url in search_results:
        link_found = url.link
        break
    if "season" in link_found:
        link_found, season = link_found.rsplit('/', 1)
        link_found = link_found.replace(
            '.to/', '.to/serie/'
        )  # in case WatchSeries changes their URL, this might need modifications
        return link_found
    else:
        return link_found
Example #39
0
async def api_google_search(client, conn, logger, context, name):
    message = context.message
    server_id = message.server.id
    const = await get_cached_server(conn, server_id)
    lang = const["locale"]
    if not lang in locale.keys():
        em = discord.Embed(description="{who}, {response}.".format(
            who=message.author.display_name+"#"+message.author.discriminator,
            response="ошибка локализации",
            colour=0xC5934B))
        await client.send_message(message.channel, embed=em)
        return
    em = discord.Embed(colour=int(const["em_color"], 16) + 512)
    try:
        await client.delete_message(message)
    except:
        pass
    dat = google.search(name, 1)
    if not dat:
        em.description = locale[lang]["api_data_not_found"].format(
            who=message.author.display_name+"#"+message.author.discriminator,
            data=name
        )
        await client.send_message(message.channel, embed=em)
        return
    logg.info("google = {data}".format(data=str(dat)))
    em.set_footer(text="{name}#{discriminator}".format(
        name=message.author.name,
        discriminator=message.author.discriminator
    ))
    em.add_field(
        name="Response",
        value=str(dat),
        inline=True
    )
    await client.send_message(message.channel, embed=em)
Example #40
0
def google_wiki(sim_ques, options):
	num_pages = 1
	points = list()
	content = ""
	maxo=""
	maxp=0
	words = split_string(sim_ques)
	for o in options:
		o = o.lower()
		original=o
		#search_results = google.search(o, num_pages)
		o += ' wiki'
		search_wiki = google.search(o, num_pages)

		link = search_wiki[0].link
		content = get_page(link)
		soup = BeautifulSoup(content,"lxml")
		page = soup.get_text().lower()

		#Too slow to check another page
		'''
		#search a non wiki page.. searching becoming too slow
		link = search_results[0].link
		content = get_page(link)
		soup= BeautifulSoup(content)
		page= page + soup.get_text().lower()
		'''
		temp=0
		for word in words:
			temp = temp + page.count(word)
		temp+=smart_answer(page, words)
		points.append(temp)
		if temp>maxp:
			maxp=temp
			maxo=original
	return points,maxo
Example #41
0
def api_search(o, sim_ques, neg):
    num_pages = 1
    words = split_string(sim_ques)
    content = ""
    o = o.lower()
    original = o
    o += ' wiki'

    # get google search results for option + 'wiki'
    search_wiki = google.search(o, num_pages)

    link = search_wiki[0].link
    content = get_page(link)
    soup = BeautifulSoup(content, "lxml")
    page = soup.get_text().lower()

    temp = 0

    for word in words:
        temp = temp + page.count(word)
    temp += smart_answer(page, words)
    if neg:
        temp *= -1
    return [temp, original]
Example #42
0
    def test_standard_search(self):
        """Test method to search in google."""

        search = google.search("github")
        self.assertNotEqual(len(search), 0)
Example #43
0
    allContents.extend(themesInPage)
    print str(page) + " link processed"

nonTopics = ['Sign in with your Email, Facebook, Google, Twitter or Microsoft Live account', 'Back to top of the page',
             'Close Sign In', 'Privacy', 'Terms of Use', 'Search',
             'None', 'Donate', 'Help', 'Dashboard', 'Groups', 'Library', 'Browse', 'Help', 'Donate']
cleanedContents = [contentItem for contentItem in allContents if contentItem not in nonTopics]
regex = re.compile(r'CK-12|[1-9]|Teacher|Book')
cleanedContents = filter(lambda i: not regex.search(i), cleanedContents)
cleanedContents = [contentItem.split(" and ")[0] for contentItem in cleanedContents]

suggestedArticles = []
for content in cleanedContents:
    #content = cleanedContents[102]
    try:
        search_results = google.search("wikipedia.org " + content, 1)
        sleep(randint(10, 20))
        if len(search_results) > 2:
            search_results = search_results[0:2]
            for result in search_results:
                #result = search_results[0]
                suggestedArticle = str(result.link)
                suggestedArticles.append(suggestedArticle)

                print content + " searched." + " Suggested article is: " + suggestedArticle

    except:
        pass


#Remove repeated results
from google import google
num_page = 1
search_results = google.search("MIT", num_page)

for i in search_results:
	print i
#		,i.name # The title of the link
#		,i.link # The external link (NOT implemented yet)
#		,i.google_link # The google link
#		,i.description # The description of the link
#		,i.thumb # The link to a thumbnail of the website (NOT
##,implemented yet)
#		,i.cached # A link to the cached version of the page
##,(NOT implemented yet)
#		,i.page # What page this result was on (When
##,searching more than one page)
#		,i.index # What index on this page it was on
		#)
	print
	print i.google_link
	print i.description
	for j in range(3):
		print "--------------------------------------------------------------"
Example #45
0
from google import google
num_page = 3
search_results = google.search("This is my query", 1)
print search_results