Example #1
0
def get_news_search():
    get_everything_filtered = ''
    if request.method == 'GET':
        newsapi = NewsApiClient(api_key=NEWS_API_KEY)
        request_object = {'language':'en', 'page_size': 30, 'sort_by': 'publishedAt'}
        keys = ['author', 'description', 'title', 'url', 'urlToImage', 'publishedAt', 'source']
        error_message = ''
        get_everything = ''

        try:
            if 'keyword' in request.args:
                keyword = request.args['keyword']
                request_object['q'] = keyword
            else:
                error_message = 'Provide keyword'

            if 'from' in request.args:
                from_date = request.args['from']
                request_object['from_param'] = from_date
            else:
                error_message = 'Provide from_date'

            if 'to' in request.args:
                to_date = request.args['to']
                request_object['to'] = to_date
            else:
                error_message = 'Provide to_date'

            if error_message != '':
                get_everything_filtered = {'error': error_message}

            # Optional params
            if 'sources' in request.args and request.args['sources'] != 'all':
                sources = request.args['sources']
                request_object['sources'] = sources

            if 'sources' in request_object:
                get_everything = newsapi.get_everything(
                    sources=request_object['sources'],
                    language=request_object['language'],
                    q=request_object['q'],
                    from_param=request_object['from_param'],
                    to=request_object['to'],
                    page_size=request_object['page_size'],
                    sort_by=request_object['sort_by'])
            else:
                get_everything = newsapi.get_everything(
                    language=request_object['language'],
                    q=request_object['q'],
                    from_param=request_object['from_param'],
                    to=request_object['to'],
                    page_size=request_object['page_size'],
                    sort_by=request_object['sort_by'])

            get_everything_filtered = transform_data(get_everything['articles'], keys)
        except Exception as e:
            if error_message == '':
                error_message = e.args[0]
            get_everything_filtered = {'error': error_message}
        return json.dumps({"data" : get_everything_filtered})
Example #2
0
def make_api_calls(name):
    api = NewsApiClient(api_key=NEWS_API)
    query = '+"{}"'.format(name.replace("_", " "))
    articles_en_1 = api.get_everything(q=query,
                                       language='en',
                                       page_size=20,
                                       page=1)

    articles_en_2 = api.get_everything(q=query,
                                       language='en',
                                       page_size=20,
                                       page=2)

    articles_de_1 = api.get_everything(q=query,
                                       language='de',
                                       page_size=20,
                                       page=1)

    articles_de_2 = api.get_everything(q=query,
                                       language='de',
                                       page_size=20,
                                       page=2)

    return {
        "en":
        extract_articles(articles_en_1) + extract_articles(articles_en_2),
        "de": extract_articles(articles_de_1) + extract_articles(articles_de_2)
    }
Example #3
0
def info():
    api = NewsApiClient(api_key='0c121bea533546759ad2551c94d28118')
    api.get_sources()
    api.get_everything(sources='bbc-news', q='women AND safety')

    apiKey_list = [
        '20d469827dbb4eb384d22490ea5df888', '75d16a33351a44969f3a5ac41eb7cf20',
        '6496b9cb73c34054a8b58a3dee86c672', '5e6b9203fe4247369e70351f0ab2b1b3',
        '3907a8165aec4be89b2e12f3a5ad541a', '79bbb20ec53e4d1b85c2caca76402488',
        '388eff313e1a4d399d55ebb19d4db4cd', 'a0936894b7904a03a4c35ca6627ebc33',
        'c15a4b03480c4081bd3d184bc8559f23', 'c05beec776fa4b1fbcc46bdad8efa951',
        'be98dcb51dd64998ad08a6dd2c5f9e80', '376c9dfc704748279df3e6f30a751a1e',
        '46660f56bd6e45f986fea91dc87b1fc1', '5dbf8944da394e4ca003b7fea5b736c5',
        '31ba8f79f57d41c8b03d3334760154b3', '723db6ceb2e8465daffa882be629d6fb',
        '1a88fcc99b0b41de902fcdbc45bd4a97', '06d039549c914c78a46d2c0c137b7f7c',
        '1b47a4f26fc949c4ad280f9bfb81cd5d'
    ]  # List of different API Keys

    a, b, c = fetch_keyword_news(
        [
            "women", "women empowerment", "menstural cycle", "periods",
            "girl power", "women in business", "health"
        ],
        datetime.now(pytz.timezone('US/Eastern')).date(),
        apiKey_list[4])  # List which will contain all the latest news inputs

    print(a, b, c)
    arr = [a[0:3], b[0:3], c[0:3]]
    print(arr)
    # session['final'] = final

    return render_template('info.html', final=arr)
Example #4
0
def processget():
    newsapi = NewsApiClient(api_key="6c70eb3b2c36455ca87436fecbd77761")
    if request.method == 'GET':
        query = request.args.get('query')
        sources = request.args.get('sources')
        from_param = request.args.get('from_param')
        to = request.args.get('to')
        if query and sources != 'all' and from_param and to:
            try:
                output1 = newsapi.get_everything(q=query,
                                                 sources=sources,
                                                 from_param=from_param,
                                                 to=to,
                                                 language='en',
                                                 sort_by='publishedAt',
                                                 page_size=30)

                return jsonify({'output': output1})
            except NewsAPIException as err:
                print(err.args)
                return (jsonify({'erroroutput': err.args}))

        elif query and sources == 'all' and from_param and to:
            try:
                output1 = newsapi.get_everything(q=query,
                                                 from_param=from_param,
                                                 sources=None,
                                                 to=to,
                                                 language='en',
                                                 sort_by='publishedAt',
                                                 page_size=30)

                return jsonify({'output': output1})
            except NewsAPIException as err:
                return (jsonify({'erroroutput': err.args}))
Example #5
0
def search():

    # get form
    form = request.form
    search_input_keyword = form['search_input_keyword']
    search_input_from = form['search_input_from']
    search_input_to = form['search_input_to']
    search_select_source = form['search_select_source']
    
    # init api client
    newsapi = NewsApiClient(api_key='cf6de82e0a034204b073376d55103619')

    # get articles
    if 'all' in search_select_source:
        all_articles = newsapi.get_everything(q=search_input_keyword,
                                        from_param=search_input_from,
                                        to=search_input_to,
                                        language='en',
                                        sort_by='publishedAt',
                                        page_size=30)
    else:
        all_articles = newsapi.get_everything(q=search_input_keyword,
                                            sources=search_select_source,
                                            from_param=search_input_from,
                                            to=search_input_to,
                                            language='en',
                                            sort_by='publishedAt',
                                            page_size=30)
    return all_articles
Example #6
0
    async def _news_(self,ctx,arg1):
        newsapi = NewsApiClient(os.getenv('NEWS_API_KEY'))

        top_headlines = newsapi.get_everything(q=f'{arg1}',sources='newsweek')

        articles = top_headlines['articles']
        article1 = articles[0] 
        headline1 = article1['title'] 
        content1 = article1['content'] 
        url1=article1['url'] 

        top_headlines = newsapi.get_everything(q=f'{arg1}',sources='the-hindu')
        articles = top_headlines['articles']
        article2 = articles[0] 
        headline2 = article2['title'] 
        content2 = article2['content'] 
        url2=article2['url'] 

        top_headlines = newsapi.get_everything(q=f'{arg1}',sources='the-times-of-india')
        articles = top_headlines['articles']
        articles3 = articles[0] 
        headline3 = articles3['title'] 
        content3 = articles3['content'] 
        url3=articles3['url'] 

        embed=discord.Embed(title="Search results",description=f"{ctx.author.mention}, here's what I got!",color=ctx.author.color)
        embed.add_field(name='\u200b' ,value=f'**[{headline1}]({url1})**'+f"\n {content1[:-13]}",inline=False)
        embed.add_field(name='\u200b' ,value=f'**[{headline2}]({url2})**'+f"\n {content2[:-13]}",inline=False)
        embed.add_field(name='\u200b' ,value=f'**[{headline3}]({url3})**'+f"\n {content3[:-13]}",inline=False)

        await ctx.send(embed=embed)
def create_labeled_data():
    news_api = NewsApiClient(api_key='063f02817dbb49528058d7372964f645')
    x = 1
    while x <= 10:
        b_headlines = \
            news_api.get_everything(sources='buzzfeed', from_param=monthago_str, to=now_str,
                                    language='en',
                                    sort_by='relevancy', page_size=100, page=x)['articles']
        r_headlines = \
            news_api.get_everything(sources='reuters', from_param=monthago_str, to=now_str,
                                    language='en', sort_by='relevancy', page_size=100, page=x)['articles']
        a_headlines = \
            news_api.get_everything(sources='associated-press', from_param=monthago_str, to=now_str,
                                    language='en', sort_by='relevancy', page_size=100, page=x)['articles']
        b_titles = [article['title'] for article in b_headlines]
        b_titles = list(filter(None.__ne__, b_titles))
        b_titles = [(article, 'bait') for article in b_titles]
        bait_headlines.extend(b_titles)
        r_titles = [(article['title'], 'not_bait') for article in r_headlines]
        legit_headlines.extend(r_titles)
        a_titles = [(article['title'], 'not_bait') for article in a_headlines]
        legit_headlines.extend(a_titles)
        x += 1

    all_headlines = bait_headlines + legit_headlines
    random.shuffle(all_headlines)
    pickle.dump(all_headlines, open('headlines.p', 'wb'))
Example #8
0
def headline_extraction(day, month, term):
    """iterates through an entire months worth of dates and extracts the articles containing
     specified keywords for each given day"""
    """6876de1e74b64e4e862fc0b7943133be"""
    """1f66071454d94bf49a229003cc0a66d0"""
    """b87aacedc4db4880b592647e573fffd3"""
    num_articles = 100

    start = f"2020-{month}-{day}"
    end = f"2020-{month}-{day}"
    try:
        news = NewsApiClient(api_key="b87aacedc4db4880b592647e573fffd3")
        data = news.get_everything(q=term,
                                   sources=news_sites(),
                                   from_param=start,
                                   to=end,
                                   language="en",
                                   page_size=num_articles,
                                   sort_by='relevancy')

        print(f"Analyzing month {month} / day {day}: Found " +
              str(data["totalResults"]))
        news_df = df_setup(data)
        save_df(news_df, term)
    except:
        try:
            news = NewsApiClient(api_key="1f66071454d94bf49a229003cc0a66d0")
            data = news.get_everything(q=term,
                                       sources=news_sites(),
                                       from_param=start,
                                       to=end,
                                       language="en",
                                       page_size=num_articles,
                                       sort_by='relevancy')

            print(f"Analyzing month {month} / day {day}: Found " +
                  str(data["totalResults"]))
            news_df = df_setup(data)
            save_df(news_df, term)
        except:
            try:
                news = NewsApiClient(
                    api_key="6876de1e74b64e4e862fc0b7943133be")
                data = news.get_everything(q=term,
                                           sources=news_sites(),
                                           from_param=start,
                                           to=end,
                                           language="en",
                                           page_size=num_articles,
                                           sort_by='relevancy')

                print(f"Analyzing month {month} / day {day}: Found " +
                      str(data["totalResults"]))
                news_df = df_setup(data)
                save_df(news_df, term)
            except:
                print(f"skipped {start}/{end}")
Example #9
0
def pull_one_day(the_day, key):
    # Init
    newsapi = NewsApiClient(api_key=key)
    ps = 100

    # Init big list to hold results
    # Each entry is itself a list of: title, description, content, published at
    results = []

    # Run first request to determine size of response
    the_day_articles = newsapi.get_everything(q='crude AND oil',
                                             from_param=the_day,
                                             to=the_day,
                                             language='en',
                                             page_size=ps,
                                             sort_by='relevancy')

    # How many pages
    pages = int(the_day_articles['totalResults'] / ps + 1)
    last_page_no = the_day_articles['totalResults'] % ps
    print('Number of articles: ' + str(the_day_articles['totalResults']))
    print('Number of pages: ' + str(pages))
    print('Number of articles on last page: ' + str(last_page_no))

    # Crawl each page (can only crawl 10 pages x 100 articles with free subscription)
    for i in range(1, pages+1):
        # Request the page
        print('Processing page number ' + str(i))
        this_page = newsapi.get_everything(q='crude AND oil',
                                            from_param=the_day,
                                            to=the_day,
                                            language='en',
                                            page_size=ps,
                                            sort_by='relevancy',
                                            page=i)
        # Process each article into a row
        x = ps
        
        if i == pages:
            x = last_page_no
        for j in range(0, x):
            row = []
            for k in ['title', 'description', 'content', 'publishedAt']:
                row.append(this_page['articles'][j][k])
            results.append(row)
            
    # Confirm results were loaded correctly
    print(str(len(results)) + ' articles loaded')

    return results
Example #10
0
def new(message):
    try:
        text = message.text
        if text == "Получить новости":
            mark = types.ReplyKeyboardMarkup()
            mark.row('Информация о боте', 'Котировки', 'Форекс', 'Портфель')
            t =0
            newsapi = NewsApiClient(api_key='cfa09a3adc7f47eab3d72f016392e170')
            n = newsapi.get_everything(q=slov,
                               language='en',
                               sort_by='publishedAt',
                               page_size=4,
                               page=1)
            while t < len(n['articles']):
                bot.send_message(message.chat.id, (n['articles'][t]['url']))
                t += 1
            bot.send_message(message.chat.id, "Новости получены", reply_markup=mark)
        elif text == "Назад":
            mar = types.ReplyKeyboardMarkup()
            mar.row('Информация о боте', 'Котировки', 'Форекс', 'Портфель')
            bot.send_message(message.chat.id, "Выберите действие:", reply_markup=mar)
    except Exception as e:
        m = types.ReplyKeyboardMarkup()
        m.row('Информация о боте', 'Котировки', 'Форекс', 'Портфель')
        bot.send_message(message.chat.id, 'Что-то пошло не так...', reply_markup=m)
Example #11
0
async def process_articles(search_query: str,
                           language: str = "en",
                           API_KEY: int = None):
    #TODO: flesh out swagger defs
    #TODO: sanitize inputs
    #TODO: error handling
    if API_KEY:  #start news client with provided API keys
        newsapi_p = NewsApiClient(api_key=API_KEY)
        all_articles = newsapi_p.get_everything(q=search_query,
                                                language=language,
                                                sort_by='relevancy',
                                                page_size=100,
                                                page=1)
    else:
        all_articles = newsapi.get_everything(q=search_query,
                                              language=language,
                                              sort_by='relevancy',
                                              page_size=100,
                                              page=1)
    dup_assnd_arts, _ = dedup_svc.assign_duplicates(all_articles['articles'])
    gen = rel_clf.determine_relevance_batch(search_query, dup_assnd_arts)
    resp_articles = []
    for i in gen:
        resp_articles.append(i)
    resp_len = len(resp_articles)

    return {
        "query": search_query,
        "results": resp_len,
        "articles": resp_articles,
    }
Example #12
0
def fetchNews():
    # Init
    newsapi = NewsApiClient(api_key='80979f75db4c46198e1fb95d6238d0b1')

    query = "Phoenix"

    all_articles = newsapi.get_everything(q=query,
                                          from_param='2020-07-15',
                                          to='2017-07-16',
                                          language='en',
                                          sort_by='relevancy')
    js = json.dumps(all_articles)

    f = open("outputFile.txt", "a")
    stop_words = set(stopwords.words('english'))

    for val in json.loads(js)["articles"]:
        descrip = val["description"]
        table = str.maketrans('', '', string.punctuation)
        s = re.split(r'\W+', descrip)
        stripped = [w.translate(table) for w in s]
        words = [word.lower() for word in stripped]
        for word in words:
            if word not in stop_words:
                if word != "get":
                    if word != query.lower():
                        f.write("(news)"+word+"\n")
Example #13
0
def news(message):
    try:
        text = message.text
        if text == "Назад":
            mar = types.ReplyKeyboardMarkup()
            mar.row('Информация о боте', 'Котировки', 'Форекс', 'Портфель')
            bot.send_message(message.chat.id, "Выберите действие:", reply_markup=mar)
        elif text == "Добавить акцию":
            markup = types.ReplyKeyboardMarkup()
            markup.row("Получить новости", "Назад")
            items.append(ticket)
            print(items)
            mes = bot.send_message(message.chat.id,"Акция добавлена", reply_markup=markup)
            bot.register_next_step_handler(mes, news_next)
        elif text == "Получить новости":
            mark = types.ReplyKeyboardMarkup()
            mark.row("Добавить акцию", "Назад")
            t =0
            newsapi = NewsApiClient(api_key='cfa09a3adc7f47eab3d72f016392e170')
            n = newsapi.get_everything(q=name,
                               language='en',
                               sort_by='publishedAt',
                               page_size=4,
                               page=1)
            while t < len(n['articles']):
                bot.send_message(message.chat.id, (n['articles'][t]['url']))
                t += 1
            msg = bot.send_message(message.chat.id, "Новости получены", reply_markup=mark)
            bot.register_next_step_handler(msg, news_next)
    except Exception as e:
        m = types.ReplyKeyboardMarkup()
        m.row('Информация о боте', 'Котировки', 'Форекс', 'Портфель')
        bot.send_message(message.chat.id, 'Что-то пошло не так...', reply_markup=m)
Example #14
0
def index():
    today = date.today()
    this_day = today.strftime("%Y-%m-%d")
    # Init
    NEWS_KEY = os.environ.get("NEWS_KEY")
    newsapi = NewsApiClient(api_key=NEWS_KEY)

    # /v2/top-headlines
    # top_headlines = newsapi.get_top_headlines(q='music',
    #                                           #   sources='bbc-news,the-verge',
    #                                           #   category='business',
    #                                           #   category='entertainment',
    #                                           language='en',
    #                                           country='us')

    # /v2/everything
    all_articles = newsapi.get_everything(
        q='music',
        #   sources='bbc-news,the-verge',
        #   domains='bbc.co.uk,techcrunch.com',
        #   domains='billboard.com,digitalmusicnews.com, complex.com',
        #   from_param='2017-12-01',
        from_param=this_day,
        #   to='2017-12-12',
        language='en',
        #   sort_by='relevancy',
        sort_by='popularity',
        #   page=2
    )
    print("-------------------------------------------------")
    print(all_articles)
    print("-------------------------------------------------")
    return (all_articles)
def get_urls(category):
    """Open file with list of topics. Read each topic and call google news api to request articles ranked relevant to
    this topic.  Write the result set of urls to a new file (one file per topic). With 20 topics and 2 categories, 40
    files will be created.
    :param category:
    """
    news_api = NewsApiClient(api_key=config.get_newsapi_key())
    topics_list_file_path = os.path.join(config.get_app_root(), 'topics',
                                         category)

    with open(topics_list_file_path, 'r') as topics_file:
        for topic in topics_file:
            # remove new line character from topic string
            # todo: does rstrip remove last t from string like in "Brexit"?
            topic = topic.rstrip()

            topic_file_name = format_file_name(topic)

            # Create a new file to save result set
            urls_file_path = os.path.join(config.get_app_root(), 'urls',
                                          category.split('.')[0],
                                          topic_file_name)
            # Write result set of urls to new file
            with open(urls_file_path, 'w') as urls_file:
                all_articles = news_api.get_everything(q=topic,
                                                       sort_by='relevancy',
                                                       page_size=100)
                for article in all_articles['articles']:
                    urls_file.write(article['url'])
                    urls_file.write('\n')
def collect_news_data(dates, query, sources, ticker):
    """News collection via NewsAPI.org

    Args:
        dates ([array]): array of dates
        query (string): query string
        sources ([array]): list of sources to collect news from
        save_location (string): news data save location
    """
    newsapi = NewsApiClient(api_key='XXX')

    for date in dates:
        articles = newsapi.get_everything(q=query,
                                          language='en',
                                          from_param=date,
                                          to=date,
                                          sources=sources,
                                          sort_by="relevancy",
                                          page_size=100)

        if not os.path.exists("../data/news/{}".format(ticker)):
            os.makedirs("../data/news/{}".format(ticker))

        with open("../data/news/{}/{}.json".format(ticker, date), "w") as f:
            f.write(json.dumps(articles, indent=4))
Example #17
0
def newsFetcher(txt, top=False):

    newsapi = NewsApiClient(api_key='932e630a539a47308e1cef5d6eb05ed6')
    if top:
        data = newsapi.get_top_headlines(language='en',
                                         sources='bbc-news,the-verge')
        hash = dict()
        for i in data['articles']:
            if i['title'] not in hash.keys():
                hash[i['title']] = i['url']
        return hash

    blob = TextBlob(txt)
    b = blob.noun_phrases
    s = ''
    x = 0
    for i in b:
        s = s + i + " "
        if (x > 5):
            break
        x += 1
    data = newsapi.get_everything(q=s,
                                  language='en',
                                  page_size=100,
                                  sort_by='relevancy')
    hash = dict()
    for i in data['articles']:
        if i['title'] not in hash.keys():
            hash[i['title']] = i['url']
    return hash
Example #18
0
def newsapi_get_urls(search_words, agency_urls):
    if len(search_words) == 0 or agency_urls == None:
        return None
    print("Searching agencies for pages containing:", search_words)
    # This is my API key, each user must request their own
    # API key from https://newsapi.org/account
    api = NewsApiClient(api_key='6f174feb5d05447d920d538d45718afa')
    api_urls = []
    # Iterate over agencies and search words to pull more url's
    # Limited to 1,000 requests/day - Likely to be exceeded
    for agency in agency_urls:
        domain = agency_urls[agency].replace("http://", "")
        print(agency, domain)
        for word in search_words:
            # Get articles with q= in them, Limits to 20 URLs
            try:
                articles = api.get_everything(q=word, language='en',\
                                              sources=agency, domains=domain)
            except:
                print("--->Unable to pull news from:", agency, "for", word)
                continue
            # Pull the URL from these articles (limited to 20)
            d = articles['articles']
            for i in range(len(d)):
                url = d[i]['url']
                api_urls.append([agency, word, url])
    df_urls = pd.DataFrame(api_urls, columns=['agency', 'word', 'url'])
    n_total = len(df_urls)
    # Remove duplicates
    df_urls = df_urls.drop_duplicates('url')
    n_unique = len(df_urls)
    print("\nFound a total of", n_total, " URLs, of which", n_unique,\
          " were unique.")
    return df_urls
Example #19
0
def hello():

    # Init
    newsapi = NewsApiClient(api_key='966611677d694731833118dddf7bd7fc')

    # /v2/top-headlines
    forecast = newsapi.get_everything(q='forecast',
                                      sources='bbc-news,the-verge',
                                      domains='bbc.co.uk,techcrunch.com',
                                      from_param='2021-03-10',
                                      to='2021-12-11',
                                      language='en',
                                      sort_by='relevancy',
                                      page=1)
    # Find out what data type we are working with
    print(type(forecast))

    # output buffer'
    output = '{'
    # loop over the key and values in the dict
    for k, v in forecast.items():
        output = output + str(v)  # add the value onto the buffer
    output = output + '}'
    json_format = json.dumps(output)
    jsnn = json.loads(json_format)
    print('type of output: ', type(jsnn))
    return jsnn  # return buffer with data
Example #20
0
def index(request):

    newsapi = NewsApiClient(api_key=settings.API_KEY)

    top_headlines = newsapi.get_top_headlines(
        sources='bbc-news,the-verge,techcrunch')
    pprint(top_headlines)
    all_articles = newsapi.get_everything(
        sources='bbc-news,the-verge,techcrunch')
    # pprint(all_articles)
    # /v2/sources
    sources = newsapi.get_sources()
    # pprint(sources)
    # pprint(dir(newsapi))

    l = top_headlines['articles']
    desc = []
    news = []
    img = []
    url = []

    for i in range(len(l)):
        f = l[i]
        news.append(f['title'])
        desc.append(f['description'])
        img.append(f['urlToImage'])
        url.append(f['url'])
    mylist = zip(news, desc, img, url)

    return render(request, 'index.html', context={"mylist": mylist})
Example #21
0
def news_api(request, token):
    """
        returns featured news article for the user's field of study
    """
    # this sql queries the user's field of study by user's token
    sql = "SELECT field_of_study FROM `" + USER_TABLENAME + "` WHERE token='{}'".format(
        token)
    cursor = connection.cursor()
    cursor.execute(sql)
    field_of_study = cursor.fetchall()[0][0]
    newsapi = NewsApiClient(api_key=NEWS_API_KEY)
    all_articles = newsapi.get_everything(q=field_of_study,
                                          sources='bbc-news',
                                          language='en',
                                          sort_by='relevancy')
    title_list = []
    url_list = []
    url_to_image_list = []
    feed = all_articles["articles"]
    if len(all_articles["articles"]) < 10:
        top_headlines = newsapi.get_top_headlines(sources='bbc-news')
        feed = top_headlines["articles"]
    for key in feed:
        title_list.append(key["title"])
        url_list.append(key["url"])
        url_to_image_list.append(key["urlToImage"])

    context = {
        'url': url_list,
        'imageUrl': url_to_image_list,
        'title': title_list
    }
    return Response(json.dumps(context), 200)
Example #22
0
def search(request):
    if request.method == "GET":
        count = 0
        var = request.GET.get('u')
        newsapi = NewsApiClient(api_key="e3af2b7c171c4a8a9101eb0c4bae5280")
        topheadlines = newsapi.get_everything(q=var, language='en')
        articles = topheadlines['articles']

        desc = []
        news = []
        img = []
        link = []

        for i in range(len(articles)):
            myarticles = articles[i]
            content = myarticles['description']
            if var in content:
                count += 1
                news.append(myarticles['title'])
                desc.append(myarticles['description'])
                img.append(myarticles['urlToImage'])
                link.append(myarticles['url'])

        print(desc)
        mylist = zip(news, desc, img, link)
        if count == 0:
            return HttpResponse("No results found")
        else:
            return render(request,
                          'newsblog/newsource.html',
                          context={"mylist": mylist})
Example #23
0
def getArticles():
    #Don't worry, I update by API key every time I publish this on Github
    news = NewsApiClient(api_key='405cbb818b64428ab934b95bfec1426d')

    newsSources = 'bbc-news,the-verge,abc-news,ary news,associated press,wired,aftenposten,bbc news,bild,blasting news,bloomberg,business insider,engadget,google news,the verge'

    toDate = datetime.datetime.now().date()
    fromDate = toDate - datetime.timedelta(weeks=-4)

    ## Collect new contents
    all_news = []
    for i in range(1, 11):
        all_news.append(
            news.get_everything(sources=newsSources,
                                from_param=format(fromDate),
                                to=format(toDate),
                                language='en',
                                page_size=100,
                                page=i))

    ## Create List of Articles
    rawArticles = []
    for news in all_news:
        rawArticles = rawArticles + news['articles']

    # dict comprehension syntax - similar to usage in f#, or the foreach library in R
    rawArticles = {i: rawArticles[i] for i in range(len(rawArticles))}

    return rawArticles
Example #24
0
def get_news_results(api_key: str,
                     city: str,
                     state: str,
                     country: str,
                     article_limit: int = 5) -> str:
    key = (city, state, country)
    if key in NEWSPAPER_RESULTS:
        return NEWSPAPER_RESULTS[key]

    news_api = NewsApiClient(api_key=api_key)
    articles = news_api.get_everything(
        q=f'coronavirus+{city}+{state}')['articles'][:article_limit]

    article_template = '<li><a href="{url}" target="_blank">{title}</a></li>'
    articles_html_parts = [
        article_template.format(url=article['url'], title=article['title'])
        for article in articles
    ]
    articles_html = '\n'.join(articles_html_parts)

    news_results = f"""
        <ul>
            {articles_html}
        </ul>
    """

    NEWSPAPER_RESULTS[key] = news_results

    return news_results
Example #25
0
def newsByCompanyName(companies, N):
    '''
    Get news articles for given companies and given amount of days
    
    param: companies (dict) of company names as keys and position as values
    param: N (int) number of days of news to search for
    '''
    from_date = (datetime.now() - timedelta(days=N)).strftime("%Y-%m-%d")
    newsClient = NewsApiClient(api_key='551a38056949460ea29156d24df9735f')
    result= dict()
    for company in companies:
        wordsToSearchFor = company  # TODO: enhance search by searching for keywords related to the company and not just the name
        all_articles = newsClient.get_everything(q=wordsToSearchFor,
                                              sources='bbc-news, cnbc',
                                              # sources='cnbc',
                                              from_param=from_date,
                                              language='en',
                                              sort_by='relevancy')

        if len(all_articles['articles']) > 0:
            # Only want to keep description, link and timestamp
            keysToKeep = ['title', 'url', 'publishedAt', 'source']
            articles = all_articles['articles']
            refinedArticles = []
            for article in articles:
                refinedArticle = {}
                for key in keysToKeep:
                    refinedArticle[key] = article[key]
                refinedArticles.append(refinedArticle)
            # result[company] = all_articles['articles']
            result[company] = refinedArticles
    return result
Example #26
0
def display_exam():
    Subject = request.form['Subject']
    if (str(Subject).isspace() or Subject == ''):
        Subject = 'America'
        print(Subject)
    # Init
    newsapi = NewsApiClient(api_key='a9288315ab104f7aa9564ca6d08b8ce4')

    # /v2/top-headlines
    top_headlines = newsapi.get_top_headlines(q=str(Subject), language='en')

    # /v2/everything
    all_articles = newsapi.get_everything(q=str(Subject), language='en')

    # /v2/sources
    sources = newsapi.get_sources()

    #print(sources)
    print(type(all_articles))
    #print(top_headlines)
    #print(newsapi)

    return render_template('home.html',
                           sources=sources,
                           all_articles=json.dumps(all_articles),
                           top_headlines=top_headlines,
                           newsapi=newsapi)
Example #27
0
class GoogleNews():
    def __init__(self):
        self.news_api = NewsApiClient(
            api_key='cb7a4ae15a98429890aeedb9a7b460a0')
        self.key_word = [
            'Latest', 'World', 'U.S.', 'Business', 'Technology',
            'Entertainment', 'Sports', 'Science', 'Health'
        ]
        self.t = time.time()
        self.point_time = time.strftime('%Y-%m-%d', time.localtime(self.t))
        self.google_crawler = 1

    def googleNews(self):

        if self.google_crawler == 1:
            # 从google新闻中获取热门新闻
            news_url = "https://news.google.com/news/rss"
            ssl._create_default_https_context = ssl._create_unverified_context
            Client = urllib.request.urlopen(news_url)
            xml_page = Client.read()
            Client.close()
            soup_page = soup(xml_page, "xml")
            news_list = soup_page.findAll("item")
            return news_list

        elif self.google_crawler == 2:
            # 返回 google-news 指定日期和分类的资讯
            today = self.point_time
            url_list = []
            for kw in self.key_word:
                all_articles = self.news_api.get_everything(
                    q=kw,
                    sources='google-news',
                    domains='news.google.com',
                    from_param=today,
                    to=today[:-1] + str(int(today[-1]) - 1),
                    language='en',
                    sort_by='relevancy',
                    page_size=100,
                )
                articles = all_articles['articles']
                for i in range(len(articles)):
                    url = articles[i]['url']
                    url_list.append(url)
            return url_list

        else:
            # 返回google-news的头条新闻
            top_headlines = self.news_api.get_top_headlines(
                sources='google-news',
                language='en',
                page_size=100,
            )

            articles = top_headlines['articles']
            url_list = []
            for i in range(len(articles)):
                url = articles[i]['url']
                url_list.append(url)
            return url_list
Example #28
0
class BlogContentTemplate:

    def __init__(self):
        self.newsapi = NewsApiClient(api_key=settings.NEWS_API_KEY)

    def get_content(self, context, tag="", page=1):
        result = []
        try:
            all_articles = self.newsapi.get_everything(q=context, page=page)
        except ConnectionError:
            pass
        except ReadTimeout:
            pass
        except NewsAPIException:
            pass
        else:
            if all_articles['status'] == 'ok':
                articles = all_articles['articles']
                for item in articles:
                    title = item['title']
                    content = item['content']
                    user = item['author']
                    image = item['urlToImage']
                    tags = tag
                    url = item['url']
                    date = item['publishedAt']
                    new_content = Content(title, content, user, image, url, tags, date)
                    result.append(new_content)
        return result
def get_news(search_term, considered_sources):
    newsapi = NewsApiClient(api_key=config.NEWS_API_KEY)
    d = {
        'id': [],
        'name': [],
        'author': [],
        'publication_bias': [],
        'title': [],
        'url': [],
        'description': [],
        'publishedAt': [],
        'content': [],
        'fulltext': []
    }

    for source in considered_sources:
        newspkg = newsapi.get_everything(q=search_term, sources=source)
        articlepkg = newspkg['articles']
        print("Gathering Articles from " + source)
        for a in articlepkg:
            sourcepkg = a['source']
            d['id'].append(sourcepkg['id'])
            d['name'].append(sourcepkg['name'])
            d['author'].append(a['author'])
            d['publication_bias'].append(get_bias(sourcepkg['id']))
            d['title'].append(a['title'])
            d['url'].append(a['url'])
            d['description'].append(a['description'])
            d['publishedAt'].append(a['publishedAt'])
            d['content'].append(a['content'])
            d['fulltext'].append(download_fulltext(a['url']))

    df = pd.DataFrame.from_dict(d)

    return df
Example #30
0
def sensex(request):
    newsapi = NewsApiClient(api_key="92bfa5b740c3487390408cb5721efd7f")
    
    topheadlines = newsapi.get_everything(q="sensex",language='en',page=1, qintitle='sensex' )
    
    
    articles = topheadlines['articles']
 
    desc = []
    news = []
    img = []
    post = []

    for i in range(len(articles)):
        myarticles = articles[i]
        img.append(myarticles['urlToImage'])
        news.append(myarticles['title'])
        desc.append(myarticles['content'])
        post.append(myarticles['publishedAt'])
    
    mylist = zip(news, desc, img , post)

    print(topheadlines)

    return render(request, 'sensex.html', context={"mylist":mylist})