Ejemplo n.º 1
0
    def get_everything(self,
                       q=None,
                       sources=None,
                       domains=None,
                       exclude_domains=None,
                       from_param=None,
                       to=None,
                       language=None,
                       sort_by=None,
                       page=None,
                       page_size=None):
        """
            Search through millions of articles from over 5,000 large and small news sources and blogs.

            Optional parameters:
                (str) q - return headlines w/ specified coin! Valid values are:
                            'bitcoin', 'trump', 'tesla', 'ethereum', etc

                (str) sources - return headlines of news sources! some Valid values are:
                            'bbc-news', 'the-verge', 'abc-news', 'crypto coins news',
                            'ary news','associated press','wired','aftenposten','australian financial review','axios',
			    'bbc news','bild','blasting news','bloomberg','business insider','engadget','google news',
		  	    'hacker news','info money,'recode','techcrunch','techradar','the next web','the verge' etc.

		(str) domains - A comma-seperated string of domains (eg bbc.co.uk, techcrunch.com, engadget.com) to restrict the search to.

        (str) exclude_domains - A comma_seperated string of domains to be excluded from the search

		(str) from_parameter - A date and optional time for the oldest article allowed.
                                       (e.g. 2018-03-05 or 2018-03-05T03:46:15)

		(str) to - A date and optional time for the newest article allowed.

		(str) language - The 2-letter ISO-639-1 code of the language you want to get headlines for. Valid values are:
				'ar','de','en','es','fr','he','it','nl','no','pt','ru','se','ud','zh'

		(str) sort_by - The order to sort the articles in. Valid values are: 'relevancy','popularity','publishedAt'

		(int) page_size - The number of results to return per page (request). 20 is the default, 100 is the maximum.

		(int) page - Use this to page through the results if the total results found is greater than the page size.
        """

        # Define Payload
        payload = {}

        # Keyword/Phrase
        if q is not None:
            if type(q) == str:
                payload['q'] = q
            else:
                raise TypeError('keyword/phrase q param should be of type str')

        # Sources
        if sources is not None:
            if type(sources) == str:
                payload['sources'] = sources
            else:
                raise TypeError('sources param should be of type str')

        # Domains To Search
        if domains is not None:
            if type(domains) == str:
                payload['domains'] = domains
            else:
                raise TypeError('domains param should be of type str')

        if exclude_domains is not None:
            if isinstance(exclude_domains, str):
                payload['excludeDomains'] = exclude_domains
            else:
                raise TypeError('exclude_domains param should be of type str')

        # Search From This Date ...
        if from_param is not None:
            if type(from_param) == str:
                if (len(from_param)) >= 10:
                    for i in range(len(from_param)):
                        if (i == 4 and from_param[i] != '-') or (
                                i == 7 and from_param[i] != '-'):
                            raise ValueError(
                                'from_param should be in the format of YYYY-MM-DD'
                            )
                        else:
                            payload['from'] = from_param
                else:
                    raise ValueError(
                        'from_param should be in the format of YYYY-MM-DD')
            else:
                raise TypeError('from_param should be of type str')

        # ... To This Date
        if to is not None:
            if type(to) == str:
                if (len(to)) >= 10:
                    for i in range(len(to)):
                        if (i == 4 and to[i] != '-') or (i == 7
                                                         and to[i] != '-'):
                            raise ValueError(
                                'to should be in the format of YYYY-MM-DD')
                        else:
                            payload['to'] = to
                else:
                    raise ValueError(
                        'to param should be in the format of YYYY-MM-DD')
            else:
                raise TypeError('to param should be of type str')

        # Language
        if language is not None:
            if type(language) == str:
                if language not in const.languages:
                    raise ValueError('invalid language')
                else:
                    payload['language'] = language
            else:
                raise TypeError('language param should be of type str')

        # Sort Method
        if sort_by is not None:
            if type(sort_by) == str:
                if sort_by in const.sort_method:
                    payload['sortBy'] = sort_by
                else:
                    raise ValueError('invalid sort')
            else:
                raise TypeError('sort_by param should be of type str')

        # Page Size
        if page_size is not None:
            if type(page_size) == int:
                if 0 <= page_size <= 100:
                    payload['pageSize'] = page_size
                else:
                    raise ValueError(
                        'page_size param should be an int between 1 and 100')
            else:
                raise TypeError('page_size param should be an int')

        # Page
        if page is not None:
            if type(page) == int:
                if page > 0:
                    payload['page'] = page
                else:
                    raise ValueError(
                        'page param should be an int greater than 0')
            else:
                raise TypeError('page param should be an int')

        # Send Request
        r = requests.get(const.EVERYTHING_URL,
                         auth=self.auth,
                         timeout=30,
                         params=payload)

        # Check Status of Request
        if r.status_code != requests.codes.ok:
            raise NewsAPIException(r.json())

        return r.json()
Ejemplo n.º 2
0
    def get_sources(self, category=None, language=None, country=None):
        """
            Returns the subset of news publishers that top headlines...

            Optional parameters:
                (str) category - The category you want to get headlines for! Valid values are:
				 'business','entertainment','general','health','science','sports','technology'

		(str) language - The 2-letter ISO-639-1 code of the language you want to get headlines for. Valid values are:
				'ar','de','en','es','fr','he','it','nl','no','pt','ru','se','ud','zh'

                (str) country - The 2-letter ISO 3166-1 code of the country you want to get headlines! Valid values are:
                                'ae','ar','at','au','be','bg','br','ca','ch','cn','co','cu','cz','de','eg','fr','gb','gr',
                                'hk','hu','id','ie','il','in','it','jp','kr','lt','lv','ma','mx','my','ng','nl','no','nz',
                                'ph','pl','pt','ro','rs','ru','sa','se','sg','si','sk','th','tr','tw','ua','us'

				(str) category - The category you want to get headlines for! Valid values are:
						'business','entertainment','general','health','science','sports','technology'

        """

        # Define Payload
        payload = {}

        # Language
        if language is not None:
            if type(language) == str:
                if language in const.languages:
                    payload['language'] = language
                else:
                    raise ValueError('invalid language')
            else:
                raise TypeError('language param should be of type str')

        # Country
        if country is not None:
            if type(country) == str:
                if country in const.countries:
                    payload['country'] = country
                else:
                    raise ValueError('invalid country')
            else:
                raise TypeError('country param should be of type str')

                # Category
        if category is not None:
            if type(category) == str:
                if category in const.categories:
                    payload['category'] = category
                else:
                    raise ValueError('invalid category')
            else:
                raise TypeError('category param should be of type str')

        # Send Request
        r = requests.get(const.SOURCES_URL,
                         auth=self.auth,
                         timeout=30,
                         params=payload)

        # Check Status of Request
        if r.status_code != requests.codes.ok:
            raise NewsAPIException(r.json())

        return r.json()
Ejemplo n.º 3
0
    def get_top_headlines(self,
                          q=None,
                          sources=None,
                          language='en',
                          country=None,
                          category=None,
                          page_size=None,
                          page=None):
        """
            Returns live top and breaking headlines for a country, specific category in a country, single source, or multiple sources..

            Optional parameters:
                (str) q - return headlines w/ specific keyword or phrase. For example:
                          'bitcoin', 'trump', 'tesla', 'ethereum', etc.

                (str) sources - return headlines of news sources! some Valid values are:
                                'bbc-news', 'the-verge', 'abc-news', 'crypto coins news',
                                'ary news','associated press','wired','aftenposten','australian financial review','axios',
				'bbc news','bild','blasting news','bloomberg','business insider','engadget','google news',
				'hacker news','info money,'recode','techcrunch','techradar','the next web','the verge' etc.

		(str) language - The 2-letter ISO-639-1 code of the language you want to get headlines for. Valid values are:
				 'ar','de','en','es','fr','he','it','nl','no','pt','ru','se','ud','zh'

                (str) country - The 2-letter ISO 3166-1 code of the country you want to get headlines! Valid values are:
                                'ae','ar','at','au','be','bg','br','ca','ch','cn','co','cu','cz','de','eg','fr','gb','gr',
                                'hk','hu','id','ie','il','in','it','jp','kr','lt','lv','ma','mx','my','ng','nl','no','nz',
                                'ph','pl','pt','ro','rs','ru','sa','se','sg','si','sk','th','tr','tw','ua','us'

		(str) category - The category you want to get headlines for! Valid values are:
				 'business','entertainment','general','health','science','sports','technology'

		(int) page_size - The number of results to return per page (request). 20 is the default, 100 is the maximum.

		(int) page - Use this to page through the results if the total results found is greater than the page size.
        """

        # Define Payload
        payload = {}

        # Keyword/Phrase
        if q is not None:
            if type(q) == str:
                payload['q'] = q
            else:
                raise TypeError(
                    'keyword/phrase q param should be a of type str')

        # Sources
        if (sources is not None) and ((country is not None) or
                                      (category is not None)):
            raise ValueError(
                'cannot mix country/category param with sources param.')

        # Sources
        if sources is not None:
            if type(sources) == str:
                payload['sources'] = sources
            else:
                raise TypeError('sources param should be of type str')

        # Language
        if language is not None:
            if type(language) == str:
                if language in const.languages:
                    payload['language'] = language
                else:
                    raise ValueError('invalid language')
            else:
                raise TypeError('language param should be of type str')

        # Country
        if country is not None:
            if type(country) == str:
                if country in const.countries:
                    payload['country'] = country
                else:
                    raise ValueError('invalid country')
            else:
                raise TypeError('country param should be of type str')

        # Category
        if category is not None:
            if type(category) == str:
                if category in const.categories:
                    payload['category'] = category
                else:
                    raise ValueError('invalid category')
            else:
                raise TypeError('category param should be of type str')

        # Page Size
        if page_size is not None:
            if type(page_size) == int:
                if 0 <= page_size <= 100:
                    payload['pageSize'] = page_size
                else:
                    raise ValueError(
                        'page_size param should be an int between 1 and 100')
            else:
                raise TypeError('page_size param should be an int')

        # Page
        if page is not None:
            if type(page) == int:
                if page > 0:
                    payload['page'] = page
                else:
                    raise ValueError(
                        'page param should be an int greater than 0')
            else:
                raise TypeError('page param should be an int')

        # Send Request
        r = requests.get(const.TOP_HEADLINES_URL,
                         auth=self.auth,
                         timeout=30,
                         params=payload)

        # Check Status of Request
        if r.status_code != requests.codes.ok:
            raise NewsAPIException(r.json())

        return r.json()
Ejemplo n.º 4
0
    def get_sources(self,
                    category=None,
                    language=None,
                    country=None):  # noqa: C901
        """Call the `/sources` endpoint.

        Fetch the subset of news publishers that /top-headlines are available from.

        :param category: Find sources that display news of this category.
            See :data:`newsapi.const.categories` for the set of allowed values.
        :type category: str or None

        :param language: Find sources that display news in a specific language.
            See :data:`newsapi.const.languages` for the set of allowed values.
        :type language: str or None

        :param country: Find sources that display news in a specific country.
            See :data:`newsapi.const.countries` for the set of allowed values.
        :type country: str or None

        :return: JSON response as nested Python dictionary.
        :rtype: dict
        :raises NewsAPIException: If the ``"status"`` value of the response is ``"error"`` rather than ``"ok"``.
        """

        payload = {}

        # Language
        if language is not None:
            if is_valid_string(language):
                if language in const.languages:
                    payload["language"] = language
                else:
                    raise ValueError("invalid language")
            else:
                raise TypeError("language param should be of type str")

        # Country
        if country is not None:
            if is_valid_string(country):
                if country in const.countries:
                    payload["country"] = country
                else:
                    raise ValueError("invalid country")
            else:
                raise TypeError("country param should be of type str")

        # Category
        if category is not None:
            if is_valid_string(category):
                if category in const.categories:
                    payload["category"] = category
                else:
                    raise ValueError("invalid category")
            else:
                raise TypeError("category param should be of type str")

        # Send Request
        r = self.request_method.get(const.SOURCES_URL,
                                    auth=self.auth,
                                    timeout=30,
                                    params=payload)

        # Check Status of Request
        if r.status_code != requests.codes.ok:
            raise NewsAPIException(r.json())

        return r.json()
Ejemplo n.º 5
0
    def get_top_headlines(  # noqa: C901
            self,
            q=None,
            qintitle=None,
            sources=None,
            language="en",
            country=None,
            category=None,
            page_size=None,
            page=None):
        """Call the `/top-headlines` endpoint.

        Fetch live top and breaking headlines.

        This endpoint provides live top and breaking headlines for a country, specific category in a country,
        single source, or multiple sources. You can also search with keywords.  Articles are sorted by the earliest
        date published first.

        :param q: Keywords or a phrase to search for in the article title and body.  See the official News API
            `documentation <https://newsapi.org/docs/endpoints/everything>`_ for search syntax and examples.
        :type q: str or None

        :param qintitle: Keywords or a phrase to search for in the article title and body.  See the official News API
            `documentation <https://newsapi.org/docs/endpoints/everything>`_ for search syntax and examples.
        :type q: str or None

        :param sources: A comma-seperated string of identifiers for the news sources or blogs you want headlines from.
            Use :meth:`NewsApiClient.get_sources` to locate these programmatically, or look at the
            `sources index <https://newsapi.org/sources>`_.  **Note**: you can't mix this param with the
            ``country`` or ``category`` params.
        :type sources: str or None

        :param language: The 2-letter ISO-639-1 code of the language you want to get headlines for.
            See :data:`newsapi.const.languages` for the set of allowed values.
            The default for this method is ``"en"`` (English).  **Note**: this parameter is not mentioned in the
            `/top-headlines documentation <https://newsapi.org/docs/endpoints/top-headlines>`_ as of Sep. 2019,
            but *is* supported by the API.
        :type language: str or None

        :param country: The 2-letter ISO 3166-1 code of the country you want to get headlines for.
            See :data:`newsapi.const.countries` for the set of allowed values.
            **Note**: you can't mix this parameter with the ``sources`` param.
        :type country: str or None

        :param category: The category you want to get headlines for.
            See :data:`newsapi.const.categories` for the set of allowed values.
            **Note**: you can't mix this parameter with the ``sources`` param.
        :type category: str or None

        :param page_size: Use this to page through the results if the total results found is
            greater than the page size.
        :type page_size: int or None

        :param page: The number of results to return per page (request).
            20 is the default, 100 is the maximum.
        :type page: int or None

        :return: JSON response as nested Python dictionary.
        :rtype: dict
        :raises NewsAPIException: If the ``"status"`` value of the response is ``"error"`` rather than ``"ok"``.
        """

        payload = {}

        # Keyword/Phrase
        if q is not None:
            if is_valid_string(q):
                payload["q"] = q
            else:
                raise TypeError("keyword/phrase q param should be of type str")

        # Keyword/Phrase in Title
        if qintitle is not None:
            if is_valid_string(qintitle):
                payload["qintitle"] = qintitle
            else:
                raise TypeError(
                    "keyword/phrase qintitle param should be of type str")

        # Sources
        if (sources is not None) and ((country is not None) or
                                      (category is not None)):
            raise ValueError(
                "cannot mix country/category param with sources param.")

        # Sources
        if sources is not None:
            if is_valid_string(sources):
                payload["sources"] = sources
            else:
                raise TypeError("sources param should be of type str")

        # Language
        if language is not None:
            if is_valid_string(language):
                if language in const.languages:
                    payload["language"] = language
                else:
                    raise ValueError("invalid language")
            else:
                raise TypeError("language param should be of type str")

        # Country
        if country is not None:
            if is_valid_string(country):
                if country in const.countries:
                    payload["country"] = country
                else:
                    raise ValueError("invalid country")
            else:
                raise TypeError("country param should be of type str")

        # Category
        if category is not None:
            if is_valid_string(category):
                if category in const.categories:
                    payload["category"] = category
                else:
                    raise ValueError("invalid category")
            else:
                raise TypeError("category param should be of type str")

        # Page Size
        if page_size is not None:
            if type(page_size) == int:
                if 0 <= page_size <= 100:
                    payload["pageSize"] = page_size
                else:
                    raise ValueError(
                        "page_size param should be an int between 1 and 100")
            else:
                raise TypeError("page_size param should be an int")

        # Page
        if page is not None:
            if type(page) == int:
                if page > 0:
                    payload["page"] = page
                else:
                    raise ValueError(
                        "page param should be an int greater than 0")
            else:
                raise TypeError("page param should be an int")

        # Send Request
        r = self.request_method.get(const.TOP_HEADLINES_URL,
                                    auth=self.auth,
                                    timeout=30,
                                    params=payload)

        # Check Status of Request
        if r.status_code != requests.codes.ok:
            raise NewsAPIException(r.json())

        return r.json()
Ejemplo n.º 6
0
    def get_everything(  # noqa: C901
        self,
        q=None,
        qintitle=None,
        sources=None,
        domains=None,
        exclude_domains=None,
        from_param=None,
        to=None,
        language=None,
        sort_by=None,
        page=None,
        page_size=None,
    ):
        """Call the `/everything` endpoint.

        Search through millions of articles from over 30,000 large and small news sources and blogs.

        :param q: Keywords or a phrase to search for in the article title and body.  See the official News API
            `documentation <https://newsapi.org/docs/endpoints/everything>`_ for search syntax and examples.
        :type q: str or None

        :param qintitle: Keywords or a phrase to search for in the article title and body.  See the official News API
            `documentation <https://newsapi.org/docs/endpoints/everything>`_ for search syntax and examples.
        :type q: str or None

        :param sources: A comma-seperated string of identifiers for the news sources or blogs you want headlines from.
            Use :meth:`NewsApiClient.get_sources` to locate these programmatically, or look at the
            `sources index <https://newsapi.org/sources>`_.
        :type sources: str or None

        :param domains:  A comma-seperated string of domains (eg bbc.co.uk, techcrunch.com, engadget.com)
            to restrict the search to.
        :type domains: str or None

        :param exclude_domains:  A comma-seperated string of domains (eg bbc.co.uk, techcrunch.com, engadget.com)
            to remove from the results.
        :type exclude_domains: str or None

        :param from_param: A date and optional time for the oldest article allowed.
            If a str, the format must conform to ISO-8601 specifically as one of either
            ``%Y-%m-%d`` (e.g. *2019-09-07*) or ``%Y-%m-%dT%H:%M:%S`` (e.g. *2019-09-07T13:04:15*).
            An int or float is assumed to represent a Unix timestamp.  All datetime inputs are assumed to be UTC.
        :type from_param: str or datetime.datetime or datetime.date or int or float or None

        :param to: A date and optional time for the newest article allowed.
            If a str, the format must conform to ISO-8601 specifically as one of either
            ``%Y-%m-%d`` (e.g. *2019-09-07*) or ``%Y-%m-%dT%H:%M:%S`` (e.g. *2019-09-07T13:04:15*).
            An int or float is assumed to represent a Unix timestamp.  All datetime inputs are assumed to be UTC.
        :type to: str or datetime.datetime or datetime.date or int or float or None

        :param language: The 2-letter ISO-639-1 code of the language you want to get headlines for.
            See :data:`newsapi.const.languages` for the set of allowed values.
        :type language: str or None

        :param sort_by: The order to sort articles in.
            See :data:`newsapi.const.sort_method` for the set of allowed values.
        :type sort_by: str or None

        :return: JSON response as nested Python dictionary.
        :rtype: dict
        :raises NewsAPIException: If the ``"status"`` value of the response is ``"error"`` rather than ``"ok"``.
        """

        payload = {}

        # Keyword/Phrase
        if q is not None:
            if is_valid_string(q):
                payload["q"] = q
            else:
                raise TypeError("keyword/phrase q param should be of type str")

        # Keyword/Phrase in Title
        if qintitle is not None:
            if is_valid_string(qintitle):
                payload["qintitle"] = qintitle
            else:
                raise TypeError(
                    "keyword/phrase qintitle param should be of type str")

        # Sources
        if sources is not None:
            if is_valid_string(sources):
                payload["sources"] = sources
            else:
                raise TypeError("sources param should be of type str")

        # Domains To Search
        if domains is not None:
            if is_valid_string(domains):
                payload["domains"] = domains
            else:
                raise TypeError("domains param should be of type str")

        if exclude_domains is not None:
            if isinstance(exclude_domains, str):
                payload["excludeDomains"] = exclude_domains
            else:
                raise TypeError("exclude_domains param should be of type str")

        # Search From This Date ...
        if from_param is not None:
            payload["from"] = stringify_date_param(from_param)

        # ... To This Date
        if to is not None:
            payload["to"] = stringify_date_param(to)

        # Language
        if language is not None:
            if is_valid_string(language):
                if language not in const.languages:
                    raise ValueError("invalid language")
                else:
                    payload["language"] = language
            else:
                raise TypeError("language param should be of type str")

        # Sort Method
        if sort_by is not None:
            if is_valid_string(sort_by):
                if sort_by in const.sort_method:
                    payload["sortBy"] = sort_by
                else:
                    raise ValueError("invalid sort")
            else:
                raise TypeError("sort_by param should be of type str")

        # Page Size
        if page_size is not None:
            if type(page_size) == int:
                if 0 <= page_size <= 100:
                    payload["pageSize"] = page_size
                else:
                    raise ValueError(
                        "page_size param should be an int between 1 and 100")
            else:
                raise TypeError("page_size param should be an int")

        # Page
        if page is not None:
            if type(page) == int:
                if page > 0:
                    payload["page"] = page
                else:
                    raise ValueError(
                        "page param should be an int greater than 0")
            else:
                raise TypeError("page param should be an int")

        # Send Request
        r = self.request_method.get(const.EVERYTHING_URL,
                                    auth=self.auth,
                                    timeout=30,
                                    params=payload)

        # Check Status of Request
        if r.status_code != requests.codes.ok:
            raise NewsAPIException(r.json())

        return r.json()
Ejemplo n.º 7
0
def search():
    resultDict = {}
    keyword = request.args.get('keyword')
    fromDate = request.args.get('fromDate')
    toDate = request.args.get('toDate')
    #category = request.form.get('category')
    source = request.args.get('sources')

    try:
        if (source == "all"):
            searchResults = newsapi.get_everything(q=keyword,
                                                   from_param=fromDate,
                                                   to=toDate,
                                                   sort_by='publishedAt',
                                                   page_size=30,
                                                   language='en')
        else:
            searchResults = newsapi.get_everything(q=keyword,
                                                   from_param=fromDate,
                                                   to=toDate,
                                                   sources=source,
                                                   sort_by='publishedAt',
                                                   page_size=30,
                                                   language='en')
        resultDict['responseCode'] = 1
        listOfArticles = searchResults['articles']
        validResponses = []
        for i in range(len(listOfArticles)):
            # print(i)
            # print(listOfArticles)
            ourdict = listOfArticles[i]
            if ourdict["author"] == None or ourdict["author"] == "null":
                continue
            if ourdict["content"] == None or ourdict["content"] == "null":
                continue
            if ourdict["description"] == None or ourdict[
                    "description"] == "null":
                continue
            if ourdict["publishedAt"] == None or ourdict[
                    "publishedAt"] == "null":
                continue
            if ourdict["url"] == None or ourdict["url"] == "null":
                continue
            if ourdict["urlToImage"] == None or ourdict["urlToImage"] == "null":
                continue
            if ourdict["title"] == None or ourdict["title"] == "null":
                continue
            # if ourdict["source"]["id"] == None:
            #     continue
            if ourdict["source"]["name"] == None or ourdict["source"][
                    "name"] == "null":
                continue
            validResponses.append(ourdict)

        resultDict['searchResults'] = validResponses
        response = application.response_class(response=json.dumps(resultDict),
                                              status=200,
                                              mimetype='application/json')
    except Exception as e:
        j = NewsAPIException.get_message(e)
        resultDict['responseCode'] = 2
        resultDict['errorMessage'] = j
        resultDict['searchResults'] = []
        print(resultDict)
        response = application.response_class(response=json.dumps(resultDict),
                                              status=200,
                                              mimetype='application/json')
    print(response)
    return response