Esempio n. 1
0
 def __init__(self, start_date, end_date):
     self.API_KEY1 = '9382dd6539f448e59de4ab7c8c214f6f'  #김민수
     self.API_KEY2 = '08fe48df23494ab0bb4faa1162fee7fa'  #이명훈
     self.API_KEY3 = '0bc1cc3aff43418ba35488984b6742a4'  #최범석
     self.API_KEY4 = 'f996355abde44786b91bdef6bc92ee62'  #이명훈2
     self.API_KEY5 = '2533fbe4f09e4d9dbc51905dcd13d4a3'  #최범석2
     # Get the source
     self.tech_newsapi = NewsApiClient(api_key=self.API_KEY1)
     self.sources = self.tech_newsapi.get_sources()
     self.general_newsapi_1 = NewsApiClient(api_key=self.API_KEY2)
     self.general_newsapi_2 = NewsApiClient(api_key=self.API_KEY3)
     self.general_newsapi_3 = NewsApiClient(api_key=self.API_KEY4)
     self.google_newsapi = NewsApiClient(api_key=self.API_KEY5)
     # Make the magazine list
     self.general_magazine1 = [
         "ABC News", "Associated Press", "Business Insider", "CBS News",
         "CNN"
     ]
     self.general_magazine2 = [
         "Mashable", "NBC News", "The New York Times", "Reuters",
         "The Economist"
     ]
     self.general_magazine3 = [
         "The Washington Post", "The Washington Times", "Time", "USA Today"
     ]
     self.tech_magazine = [
         "Ars Technica", "Engadget", "Hacker News", "TechCrunch",
         "TechRader", "The Next Web", "The Verge", "Wired"
     ]
     self.today = datetime.date.today()
     self.start_date = datetime.datetime.strptime(start_date, "%Y-%m-%d")
     self.end_date = datetime.datetime.strptime(end_date, "%Y-%m-%d")
     self.timedelta = int((self.end_date - self.start_date).days) + 1
     # company_list
     self.cor_list = pd.read_csv(
         './company_data/Company.csv')['Name'].tolist()
     if os.path.exists('./source/') == False:
         os.mkdir('./source')
     if os.path.exists('./source/{}'.format(
             self.today.strftime("%Y-%m-%d"))) == False:
         os.mkdir('./source/{}'.format(self.today.strftime("%Y-%m-%d")))
     if os.path.exists('./backup/') == False:
         os.mkdir('./backup')
     if os.path.exists('./backup/{}'.format(
             self.today.strftime("%Y-%m-%d"))) == False:
         os.mkdir('./backup/{}'.format(self.today.strftime("%Y-%m-%d")))
     print("news_crawler start! From: {}, to: {}, {}days".format(
         self.start_date.strftime("%Y-%m-%d"),
         self.end_date.strftime("%Y-%m-%d"), self.timedelta))
Esempio n. 2
0
def main():
    app = QApplication(sys.argv)
    form = SmartMirrorApp()

    form.create_time_widget()
    move_top_right(form.timeWidget, form)
    timeUpdateTimer = QtCore.QTimer()
    timeUpdateTimer.timeout.connect(lambda: update_time_label(form))
    timeUpdateTimer.start(500)

    form.create_weather_widget()
    move_bottom_left(form.weatherWidget, form)
    update_weather_widget(form)
    weatherUpdateTimer = QtCore.QTimer()
    weatherUpdateTimer.timeout.connect(lambda: update_weather_widget(form))
    weatherUpdateTimer.start(60000)

    newsapi = NewsApiClient(api_key='a151e158d26740219c7d611284d01989')
    form.create_news_widget()
    move_top_left(form.newsWidget, form)
    update_news_widget(form, newsapi)
    newsUpdateTimer = QtCore.QTimer()
    newsUpdateTimer.timeout.connect(lambda: update_news_widget(form, newsapi))
    newsUpdateTimer.start(180000)

    app.exec_()
Esempio n. 3
0
def top_headlines(outputfile, query, sources, language, country, category):

    api_key = const.SAMPLE_API_KEY
    newsapi = NewsApiClient(api_key)
    try:
        headlines = newsapi.get_top_headlines(query,
                                              sources,
                                              language,
                                              country,
                                              category,
                                              page_size=100)
        if headlines['status'] == "ok":
            #print (headlines['totalResults'])

            sqlclient = SqlClient(const.DB_NAME)
            connection = sqlclient.create_connection()
            sqlclient.create_table(connection, const.TABLE)
            sqlclient.add_newsitem(connection, const.TABLE, headlines)

            #Export news to csv file
            sqlclient.export_to_csv(connection, const.TABLE, outputfile)
        else:
            print(headlines['message'])
    except NewsAPIException as inst:
        print("Error: " + inst.get_message())
    except Exception as inst:
        print(inst)
Esempio n. 4
0
def news_agg(request):
    newsapi = NewsApiClient(api_key='3fe5b067769946879821f0ee5afdab83')
    top_news = newsapi.get_top_headlines(sources='recode')
    print(len(top_news))
    articles = top_news['articles']

    desc = []
    news = []
    img = []
    url = []

    for i in range(len(articles)):

        myarticles = articles[i]

        desc.append(myarticles['description'])
        url.append(myarticles['url'])
        img.append(myarticles['urlToImage'])
        news.append(myarticles['title'])

        mylist = zip(url, img, desc, news)

    return render(request,
                  'base/news_aggregator.html',
                  context={'mylist': mylist})
Esempio n. 5
0
def index(request):
    api_key = os.getenv('key')
    # api_key = os.environ.get('KEY')

    newsapi = NewsApiClient(api_key)
    top = newsapi.get_top_headlines(sources='techcrunch')

    l = top['articles']
    desc = []
    title = []
    img = []
    author = []
    url = []
    source = []

    for i in range(len(l)):
        f = l[i]
        title.append(f['title'])
        desc.append(f['description'])
        img.append(f['urlToImage'])
        author.append(f['author'])
        url.append(f['url'])
        source.append(f['source']['name'])

    mylist = zip(title, desc, img, url, author, source)

    return render(request, 'index.html', context={"mylist": mylist})
Esempio n. 6
0
 def get(self,
         fields=["image", "title", "image", "link"],
         limit=20,
         **kargs):
     newsapi = NewsApiClient(api_key=self.__key)
     all_news = newsapi.get_everything(**kargs)
     return self.__transformDate(all_news.get("articles"), fields, limit)
Esempio n. 7
0
 def get_sources(self,
                 fields=["image", "title", "image", "link"],
                 limit=20,
                 **kargs):
     newsapi = NewsApiClient(api_key=self.__key)
     sources = newsapi.get_sources(**kargs)
     return self.__transformDate(sources.get("sources"), fields, limit)
Esempio n. 8
0
def getresource(searchfor):
    dicc = {}
    newsapi = NewsApiClient(api_key="d11761b89fdb4599b1497bf951690000")
    sources = newsapi.get_sources(category=searchfor,
                                  language="en",
                                  country="us")
    return jsonify(sources)
Esempio n. 9
0
def gen_sentiment_df(stock="DJI"):
    newsapi = NewsApiClient(api_key='f8970a68f49e43a18c9b5aff8e2bcfe1')
    a = date(2020, 2, 29)
    b = date(2020, 3, 27)
    sentiments = {}
    query = "stocks & " + stock
    for dt in rrule(DAILY, dtstart=a, until=b):
        str_date = str(dt.strftime("%Y-%m-%d"))
        all_articles = newsapi.get_everything(q=query,
                                              from_param=str_date,
                                              to=str_date,
                                              language='en',
                                              sort_by='relevancy',
                                              page=1)
        headlines = ""
        for a in all_articles['articles']:
            if isinstance(a["title"], str):
                headlines += a["title"]
        i = analyze_sentiment(headlines)
        sentiments[dt] = {i.magnitude, i.score}
    sentiment_df = pd.DataFrame(list(sentiments.values()),
                                columns=["magnitude", "score"],
                                index=sentiments.keys())
    sentiment_df = sentiment_df.fillna(0)
    return sentiment_df
    def get_news(self):
        """
        This function makes a request using authenticated api and instance argument to extract news articles.
        :return: dataframe with articles and other metadata relevant to each article
        """
        start = (datetime.today() - timedelta(days=7)).strftime('%Y-%m-%d')
        end = datetime.today().strftime('%Y-%m-%d')
        # Initialise NewsApiClient with an api key
        newsapi = NewsApiClient(api_key=nc.api_key)

        query = ' '.join(
            ['(' + ' OR '.join([query for query in self.queries]) + ')'])

        # Query for articles using keyword
        all_articles = newsapi.get_everything(q=query,
                                              from_param=start,
                                              to=end,
                                              language='en',
                                              sort_by='relevancy',
                                              page_size=100)
        # Extract articles from returned json and store in articles variable
        articles = all_articles['articles']
        # Convert articles into dataframe
        articles_df = pd.DataFrame(articles)
        # Use only name part in the source columns
        articles_df['source'] = articles_df.source.map(lambda x: x['name'])
        # Select relevant columns for analysis
        articles_df = articles_df[[
            'source', 'title', 'url', 'publishedAt', 'content'
        ]]
        articles_df.columns = [
            'Source', 'Title', 'Url', 'Published', 'Content'
        ]

        return articles_df
Esempio n. 11
0
def index(request):

    newsapi = NewsApiClient(api_key="a59e5f24831a4322b535578654582973")
    topheadlines = newsapi.get_top_headlines(category='business', country='in')
    articles = topheadlines['articles']

    desc = []
    news = []
    img = []
    author = []
    publishedAt = []
    url = []

    for i in range(len(articles)):
        myarticles = articles[i]

        news.append(myarticles['title'])
        desc.append(myarticles['description'])
        img.append(myarticles['urlToImage'])
        author.append(myarticles['author'])
        publishedAt.append(myarticles['publishedAt'])
        url.append(myarticles['url'])

    mylist = zip(news[:3], desc, img, author, publishedAt, url)

    return render(request, 'accounts/index.html', context={"mylist": mylist})
Esempio n. 12
0
 def get_top(self,
             fields=["image", "title", "image", "link"],
             limit=20,
             **kargs):
     newsapi = NewsApiClient(api_key=self.__key)
     tops = newsapi.get_top_headlines(**kargs)
     return self.__transformDate(tops.get("articles"), fields, limit)
Esempio n. 13
0
def customsearchresults(request):
    user_query = request.GET['search']
    newsapi = NewsApiClient(api_key='e714e075a7534f85b7e0bdfd2330c611')
    all_articles = newsapi.get_everything(q=user_query,
                                          language='en',
                                          sort_by='relevancy')
    all_articles = all_articles['articles']
    json_content = []
    for i in range(len(all_articles)):
        news_object = all_articles[i]

        # r1 = requests.get(news_object['url'])
        # text = r1.content
        # soup = BeautifulSoup(text, 'html.parser')
        # paragraph_list = soup.find_all('p')
        # whole_content = ""
        # json_content = []
        # for item in range(len(paragraph_list)):
        #     whole_content = whole_content + " " + paragraph_list[item].get_text()

        custom_object = {
            "heroes": "To be decided",
            "victim": "To be decided",
            "villian": "To be decided",
            "source": news_object["source"]["name"],
            "author": news_object["author"],
            "title": news_object['title'],
            "shortdescription": news_object['description'],
            "urlToImage": news_object["urlToImage"],
            "url": news_object["url"],
        }
        json_content.append(custom_object)

    return Response(json_content)
Esempio n. 14
0
def newsapi(stock):
    # newsapi_symbol = input("Enter a symbol")
    newsapi = NewsApiClient(api_key='861ff0ffbaaa4eaa9571ce516cc5e088')

    all_articles = newsapi.get_everything(q=stock,
                                          language='en',
                                          sort_by='publishedAt',
                                          page_size=100)

    sources = newsapi.get_sources()

    title = []
    desc = []

    i = 1
    pos, neg, neu = 0, 0, 0

    for article in all_articles['articles']:
        a = str(article['content'])
        title.append(
            str(article['title']) + ' : \n' + str(article['description']))
        # desc.append(str(article['description']))
        b = article['source']
        c = article['publishedAt']
        # print(i, a)
        i += 1

        analysis = TextBlob(a)
        if analysis.sentiment.polarity > 0:
            # print('\nPositive:\n', a)
            # print('The source is:', b['name'])
            # print('It was published at:', c)
            pos += 1

        elif analysis.sentiment.polarity == 0:
            # print('\nNeutral:\n', a)
            # print('The source is:', b['name'])
            # print('It was published at:', c)
            neu += 1

        else:
            # print('\nNegative:\n', a)
            # print('The source is:', b['name'])
            # print('It was published at:', c)
            neg += 1

    # print(title)

    total = pos + neg + neu
    pos_news, neg_news, neu_news = pos / total, neg / total, neu / total

    if pos_news - neg_news > 0:
        # print('\nThe net value of News is: ', (pos_news - neg_news + 1)/2)
        output = ((pos_news - neg_news + 1) * 100) / 2
    else:
        # print("\nThe net value of News is: ", (pos_news - neg_news + 1)/2)
        output = ((pos_news - neg_news + 1) * 100) / 2
    # print(output)
    return output, title, desc
Esempio n. 15
0
def topheadlines(request):
    newsapi = NewsApiClient(api_key='e714e075a7534f85b7e0bdfd2330c611')
    top_headlines = newsapi.get_top_headlines(category='business',
                                              language='en',
                                              country='us')
    #top_headlines = top_headlines['articles']

    return Response(top_headlines)
Esempio n. 16
0
def searchresults(request):
    user_query = request.GET['search']
    newsapi = NewsApiClient(api_key='e714e075a7534f85b7e0bdfd2330c611')
    all_articles = newsapi.get_everything(q=user_query,
                                          language='en',
                                          sort_by='relevancy')

    return Response(all_articles)
Esempio n. 17
0
def get_news():

    api = '23e4c7e51a9a49d39dc4e7261305dd02'
    newsapi = NewsApiClient(api_key=api)
    top_headlines = newsapi.get_top_headlines(country='us',
                                              category='business',
                                              page_size=70,
                                              language='en')
    return top_headlines
Esempio n. 18
0
def get_news(charity_name):
    all_articles = NewsApiClient(
        api_key="6223800a4d1b497597b28a33cd56e043"
        # api_key="54c63a9d4bad47bf97a59c8764581008" backup
    ).get_everything(q='"' + charity_name + '"')["articles"]
    results = list()
    for article in all_articles:
        results.append([article["title"], article["description"]])
    return results
Esempio n. 19
0
 def __init__(self):
     with open('config.json') as f:
         config = json.load(f)
     self.API_KEY = config['NEWS_API_KEY']
     try:
         self.newsapi = NewsApiClient(api_key=self.API_KEY)
         self.sources = self.newsapi.get_sources()
     except NewsAPIException as e:
         print("Invalid API key:", e)
Esempio n. 20
0
 def get_news(self, src):
     NEWS_API = NewsApiClient(api_key=NEWS_KEY)
     self.all_news = NEWS_API.get_top_headlines(sources=src)
     self.articles = self.all_news['articles']
     rand_headline = random.randint(0, 9)
     self.headline = self.articles[rand_headline]
     self.source = self.headline['source']
     self.author = self.headline['author']
     self.news_name = self.source['name']
Esempio n. 21
0
def getsearchresult():
    searchup = defaultdict(list)
    newsapi = NewsApiClient(api_key="d11761b89fdb4599b1497bf951690000")
    keyword_ = request.args.get('keyword')
    print("keyword", keyword_)
    from_ = request.args.get('from')
    print("keyword", from_)
    to_ = request.args.get('todate')
    print("keyword", to_)
    source_ = request.args.get('source')
    print("keyword", source_)

    if source_ == "all":
        try:
            final_result = newsapi.get_everything(q=keyword_,
                                                  from_param=from_,
                                                  to=to_,
                                                  language="en",
                                                  page_size=30,
                                                  sources="",
                                                  sort_by="publishedAt")
        except NewsAPIException as error:
            return str(error)
    else:
        try:
            final_result = newsapi.get_everything(q=keyword_,
                                                  from_param=from_,
                                                  to=to_,
                                                  sources=source_,
                                                  language="en",
                                                  page_size=30,
                                                  sort_by="publishedAt")
        except NewsAPIException as error:

            return str(error)

    data = final_result["articles"]
    for i in data:
        if i['title'] is None or i['title'] == "" or i['title'] == "null" or i[
                'author'] is None or i['author'] == "" or i[
                    'author'] == "null" or i['description'] is None or i[
                        'description'] == "" or i['description'] == "null" or i[
                            'source'] is None or i['source'] == "" or i[
                                'source'] == "null" or i['url'] is None or i[
                                    'url'] == "" or i['url'] == "null" or i[
                                        'urlToImage'] is None or i[
                                            'urlToImage'] == "" or i[
                                                'urlToImage'] == "null" or i[
                                                    'publishedAt'] is None or i[
                                                        'publishedAt'] == "" or i[
                                                            'publishedAt'] == "null":
            pass
        else:
            searchup['articles'].append(i)

    return jsonify(searchup)
Esempio n. 22
0
 def __init__(self, api_key):
     super(Headlines, self).__init__()
     self.news_cli = NewsApiClient(api_key=api_key)
     self.region = 'us-west-1'
     self.upload_cli = boto3.resource('s3', self.region)
     self.file_name = '_'.join([log_date, 'top_headlines.csv'])
     self.bucket_dict = defaultdict(list)
     self.en_sources = []
     self.headlines = []
     self.paths = []
Esempio n. 23
0
def main():
    auth = tweepy.OAuthHandler(api_key, api_secret)
    auth.set_access_token(access_token, access_secret)
    twitter_api = tweepy.API(auth)


    newsapi = NewsApiClient(api_key=news_api_key)


    articles = get_top_articles(newsapi)
    tweet_top_articles(twitter_api, articles)
Esempio n. 24
0
def get_articles(stock):
    newsapi = NewsApiClient(api_key='861ff0ffbaaa4eaa9571ce516cc5e088')

    all_articles = newsapi.get_everything(q=stock,
                                          language='en',
                                          sort_by='publishedAt',
                                          page_size=100)

    print(all_articles)

    return all_articles
Esempio n. 25
0
    def __init__(self, API_Key, keyword_list):
        """
        This method inititalizes the news api and also takes in a list of keywords as the argument and applies AND operation between them and queries it 
        and stores it in the variable response

        :type keyword_list: list
        :param keyword_list: list of keywords to query the api
        """

        #initialize news client with the api key
        self.news_api = NewsApiClient(api_key=API_Key)

        # the sting to be appended in the middle
        AND = " AND "

        # add AND in between the keywords in the list
        query_string = AND.join(keyword_list)

        # initialize an empty list of titles
        self.title_list = []

        # initialize an empty list of meta descriptions
        self.descriptions_list = []

        # initialize an empty list of Urls
        self.Urls_list = []

        # initialize an empty list of sources
        self.sources_list = []

        # query the api
        response = self.news_api.get_everything(q=query_string,
                                                sort_by='relevancy')

        # if the size of list_of_URLs is more then 5 set parse_length to 5 else according to its size
        parse_length = 5 if len(response['articles']) >= 5 else len(
            response['articles'])

        # for each article returned get the corresponding URL and append it to list_of_URLs
        for item in range(parse_length):

            # append every title to title list
            self.title_list.append(response["articles"][item]["title"])

            # append every description to description_list
            self.descriptions_list.append(
                response["articles"][item]["description"])

            # append every Urls to Urls_list
            self.Urls_list.append(response["articles"][item]["url"])

            # append every source to source list
            self.sources_list.append(
                response["articles"][item]['source']['name'])
Esempio n. 26
0
def get_news():

    # with app.app_context():

    newsapi = NewsApiClient(api_key='29b0d1fda8b6452fb4df7d86a3dc5b9a')
    data = newsapi.get_everything(q='health and fitness',
                                  language='en',
                                  page_size=20)
    articles = data['articles']
    # print(type(articles[0])) It showed that it is of dict type.
    articles_json = json.dumps(articles)
    return articles_json
Esempio n. 27
0
def news_update(n):
    load_dotenv()
    api_key = os.environ.get("NEWS_API_KEY")
    newsapi = NewsApiClient(api_key=api_key)
    top_headlines = newsapi.get_top_headlines(
        q="covid-19", language="en", country="ca", page_size=10
    )
    article = top_headlines["articles"]
    news = [html.H5("News about Covid-19 in Canada")] + [
        html.H6(html.A(i["title"], href=i["url"], target="_blank")) for i in article
    ]
    return news
def getInfo(query):
    #Key to access GoogleNews API
    query = query.lower()
    # query = urllib.parse.quote_plus(query)
    newsapi = NewsApiClient(api_key='edf0afe93d6644d198d8539e640134c9')
    # print(query)

    headlines = newsapi.get_top_headlines(q=query, language='en')
    # print(headlines)
    # headlines = newsapi.get_top_headlines(q=query, language='en')

    newsTitles = list()
    newsContent = list()
    newsSources = list()
    newsURL = list()
    # print("number of articles found = " + str(len(headlines['articles'])))

    #Adds all relevant information to separate lists
    numberOfArticles = len(headlines['articles'])
    if numberOfArticles > 5:
        numberOfArticles = 5
    for x in range(numberOfArticles):
        source = headlines['articles'][x]['source']['name']
        if source == "Google News" or source == "Reuters" or source == "Financial Times":
            print(source)
            # x -= 1
            continue
        newsTitles.append(headlines['articles'][x]['title'])
        newsContent.append(headlines['articles'][x]['content'])
        newsSources.append(headlines['articles'][x]['source']['name'])
        newsURL.append(headlines['articles'][x]['url'])

    if len(newsTitles) < 5:
        today = datetime.datetime.today()
        start_day = today - datetime.timedelta(days=1)
        headlines_all = newsapi.get_everything(q=query,
                                               from_param=str(start_day),
                                               to=str(today),
                                               language='en',
                                               sort_by='relevancy')
        for x in range(5 - len(newsTitles)):
            source = headlines_all['articles'][x]['source']['name']
            if source == "Google News" or source == "Reuters" or source == "Financial Times":
                print(source)
                # x -= 1
                continue
            newsTitles.append(headlines_all['articles'][x]['title'])
            newsContent.append(headlines_all['articles'][x]['content'])
            newsSources.append(headlines_all['articles'][x]['source']['name'])
            newsURL.append(headlines_all['articles'][x]['url'])

    return newsTitles, newsContent, newsSources, newsURL
def get_news_data(_category='general', _language='en', _country='us', _pagesize=100, _page=1):
    # set api key
    newsapi = NewsApiClient(api_key=config["news_api_key"])

    # get news data from the news api
    top_headlines = newsapi.get_top_headlines(
        category=_category,
        language=_language,
        country=_country,
        page_size=_pagesize,
        page=_page
    )
    return top_headlines
def search_articles(query_string, domain_blacklist_string,
                    domain_whitelist_string):
    newsapi = NewsApiClient(api_key='391c4cadc42a4a42aaf1ea266df4adfc')

    headlines = newsapi.get_everything(
        q=query_string,
        language='en',
        sort_by='relevancy',
        page_size=100,
        domains=domain_whitelist_string
        # exclude_domains=domain_blacklist_string
    )
    return headlines