コード例 #1
0
    def __init__(self, politician_name):
        """Initialize an object representing an article."""
        news = GoogleNews()
        news.setlang("uk")
        news.setencode("utf-8")
        news.setperiod("3d")
        news.search(politician_name)
        info = news.result()
        self.articles = []

        name, surname = politician_name.split()[0], politician_name.split()[1]
        self.link = f"https://www.google.com/search?q=+{name}+{surname}+новини&source=lnms&tbm=isch"

        def get_data(self):
            r = requests.get(self.link)
            return r.text

        html_data = get_data(self)
        soup = BeautifulSoup(html_data, "html.parser")
        image_links, num = [], 0
        for item in soup.find_all("img"):
            image_links.append(item["src"])
            num += 1
            if num == 6:
                break

        for i in range(5):
            text = info[i]
            info_list = [text["title"], text["link"], image_links[i + 1]]
            self.articles.append(info_list)
コード例 #2
0
def initalize_google_news(start_date, end_date):
    """Initializes the googlenews object."""

    print("initalize_google_news...")

    googlenews = GoogleNews(encode="utf-8")  # create googlenews object
    googlenews.setlang("en")
    googlenews.setperiod("d")
    googlenews.setencode("utf-8")
    googlenews.setTimeRange(start_date, end_date)  # using user specified date range

    return googlenews
コード例 #3
0
class Engine:
    def __init__(self):
        self.news = GoogleNews()
        self.news.setlang('en')
        #self.news.setTimeRange('01/01/2000','01/01/2015')
        self.news.setencode('utf-8')
        self.pageNumber = 1
        self.searchTerm = ""

    def nextPage(self):
        if self.news.result == None:
            raise RuntimeError("Engine has not searched yet")
        self.pageNumber += 1
        self.news.clear()
        self.news.getpage(self.pageNumber)
        if len(self.news.result()) == 0: return False
        else: return True

    def previousPage(self):
        if self.news.result == None:
            raise RuntimeError("Engine has not searched yet")
        self.pageNumber -= 1
        self.news.clear()
        self.news.getpage(self.pageNumber)
        if len(self.news.result()) == 0: return False
        else: return True

    def search(self, term):
        self.news.search(term)
        if len(self.news.result()) == 0:
            return False
        else:
            return self.news.result()

    def getPageNumber(self):
        return self.pageNumber

    def getResults(self):
        return self.news.result()

    def clear(self):
        self.news.clear()

    def resetPageNumber(self):
        self.pageNumber = 1
コード例 #4
0
'''
Example of GoogleNews usage.
'''

from GoogleNews import GoogleNews
from pprint import pprint

news = GoogleNews()

news.setlang('en')
news.setencode('utf-8')
news.setperiod('3d')

news.search('Programming')

info = news.result()

print(news.total_count())
print(len(info))

news.get_page(2)

info = news.result()

print(len(info))

pprint(info)
コード例 #5
0
#!/usr/bin/env python
# coding: utf-8
import pandas as pd
import jieba
from GoogleNews import GoogleNews
from bs4 import BeautifulSoup
import requests

googlenews = GoogleNews()

googlenews.setlang('cn')
googlenews.setperiod('d')
googlenews.setencode('utf-8')
googlenews.clear()

x = input("請輸入要搜尋的關鍵字,將為你搜集相關字詞內容:")
googlenews.search(x)

alldata = googlenews.result()
result = googlenews.gettext()
links = googlenews.get_links()
# print(type(result))
# print(len(result))
# print(alldata)

print()

for n in range(len(result)):
    print(result[n])
    print(links[n])