コード例 #1
0
    def __init__(self, politician_name):
        """Initialize an object representing an article."""
        news = GoogleNews()
        news.setlang("uk")
        news.setencode("utf-8")
        news.setperiod("3d")
        news.search(politician_name)
        info = news.result()
        self.articles = []

        name, surname = politician_name.split()[0], politician_name.split()[1]
        self.link = f"https://www.google.com/search?q=+{name}+{surname}+новини&source=lnms&tbm=isch"

        def get_data(self):
            r = requests.get(self.link)
            return r.text

        html_data = get_data(self)
        soup = BeautifulSoup(html_data, "html.parser")
        image_links, num = [], 0
        for item in soup.find_all("img"):
            image_links.append(item["src"])
            num += 1
            if num == 6:
                break

        for i in range(5):
            text = info[i]
            info_list = [text["title"], text["link"], image_links[i + 1]]
            self.articles.append(info_list)
コード例 #2
0
  def job(self):    
      #Download current database
      self.getDB()
      self.print_header(self.rawFileName)
      self.lineCounter(self.rawFileName)
      x = 0
      for tag in self.newsTags:
        #print("Collecting newses from tag: " + tag + "...")
        self.logger.info(f"Collecting newses from tag: {tag}")
        googlenews = GoogleNews()
        googlenews.clear()
        googlenews.set_lang(self.newsLang)
        googlenews.setperiod('1d')
        googlenews.get_news(tag)
        output = googlenews.results(sort=True)
        output = pd.DataFrame(output)
        x = x + len(output['title'])
        self.saveToFile(output, self.rawFileName)
      self.logger.info(f"Collected amount of news:  {x}")
      self.removeDuplicates(self.rawFileName, self.finalFileName)

      #os.remove(rawFileName) #delete bufor file
      #logger.info(f"Removed file with duplicates:  {rawFileName}")
      os.rename(self.finalFileName, self.rawFileName) #rename final file to bufor name
      self.logger.info(f"Renamed: {self.finalFileName} to: {self.rawFileName}")
      self.backupDB()
コード例 #3
0
def initalize_google_news(start_date, end_date):
    """Initializes the googlenews object."""

    print("initalize_google_news...")

    googlenews = GoogleNews(encode="utf-8")  # create googlenews object
    googlenews.setlang("en")
    googlenews.setperiod("d")
    googlenews.setencode("utf-8")
    googlenews.setTimeRange(start_date, end_date)  # using user specified date range

    return googlenews
コード例 #4
0
class GoogleNewsMethods():

    # Creates a googlenews object
    def __init__(self):
        self.googlenews = GoogleNews(lang="en")

    # This will return a list of news for perticular stock on a given date
    def newscollection(self, stock, date):
        self.googlenews.search(stock)
        self.googlenews.setTimeRange('05/01/2020', '05/28/2020')
        self.googlenews.setperiod('05/15/2020')
        self.newsList = self.googlenews.result()
        return (self.newsList)
コード例 #5
0
'''
Example of GoogleNews usage.
'''

from GoogleNews import GoogleNews
from pprint import pprint

news = GoogleNews()

news.setlang('en')
news.setencode('utf-8')
news.setperiod('3d')

news.search('Programming')

info = news.result()

print(news.total_count())
print(len(info))

news.get_page(2)

info = news.result()

print(len(info))

pprint(info)
コード例 #6
0
    return dataset



if __name__ == '__main__':
    import time
    import requests
    from bs4 import BeautifulSoup    
    from GoogleNews import GoogleNews
    googlenews = GoogleNews()
    googlenews = GoogleNews(lang='en')
    googlenews = GoogleNews(period='d')
    googlenews = GoogleNews(start='02/01/2020',end='02/28/2020')
    googlenews.setlang('en')
    googlenews.setperiod('d')
    googlenews.setTimeRange('02/01/2020','02/28/2020')
    googlenews.search('APPL')
    googlenews.getpage(2)
    x = googlenews.result()
    for item in x:
        web_link = item['link']
        
        start = time.time()
        page_source = requests.get(web_link)
        soup = BeautifulSoup(page_source.text, "lxml")
        print('s: ', time.time()-start)
        try: 
            text = soup.find('article').text
            #print(text)
        except:
コード例 #7
0
from tinydb import TinyDB, Query
import random
from GoogleNews import GoogleNews

# setting up database
links_db = TinyDB("links.json")
Topic = Query()

# setting up google news api
googlenews = GoogleNews(lang="en")
googlenews.setperiod("3")


def return_random_link(topic):
    links = links_db.search(Topic.topic == topic)
    if len(links) == 0:
        # return the google news link
        print("fetching google link")
        googlenews.search(topic)
        google_links = googlenews.result()
        return google_links[0]["link"]
    else:
        print("fetching link from database")
        choice = random.randrange(0, len(links) - 1)
        return links[choice]["link"]


print(return_random_link("latinx"))
コード例 #8
0
ファイル: webapp.py プロジェクト: manrj/twitter-streamlit
            self.i += 1
            current_prog = self.i / self.length
            self.prog_bar.progress(current_prog)


st.title('Public sentiments')
st.sidebar.title('User Inputs')

searchInput = st.sidebar.text_input('search query')
val = len(searchInput)
if val > 0:
    agree = st.sidebar.checkbox('frequency')
    if agree:
        option = st.sidebar.selectbox('How would you like to be contacted?',
                                      ('1h', '1d', '7d', '1y'))
        googlenews.setperiod(option)
    else:
        st.sidebar.markdown('Select the time range for the search')
        dt1 = st.sidebar.date_input('from date', datetime.date.today())
        dt2 = st.sidebar.date_input('till date', datetime.date.today())
        if dt1 > dt2:
            st.sidebar.error('SELECT A VALID "FROM" DATE')
        else:
            googlenews.setTimeRange(dt1, dt2)
    with st.spinner('Getting data...'):
        googlenews.search(searchInput)
        news_content = []

        ## ''' Google News start '''
        for i in range(1, 1 + 1):
            googlenews.getpage(i)