def search(self): nyt = NYTAPI("wbWOIDwmGPWGQALhXbfC3BDK3EMtFBMA") startDate = str(self.startDate) + " 00:00:00" endDate = str(self.endDate) + " 23:59:59" articles = nyt.article_search( query="Covid", results=10, dates={ #"begin": datetime.datetime(2020, 6, 24), #"end": datetime.datetime(2020, 6, 27) "begin": datetime.strptime(startDate, '%Y-%m-%d %H:%M:%S'), "end": datetime.strptime(endDate, '%Y-%m-%d %H:%M:%S') }, options={ "sort": "relevance", "sources": [ "New York Times", "AP", "Reuters", "International Herald Tribune" ], "type_of_material": ["News"] }) sorted_articles = sorted( articles, key=lambda x: datetime.strptime(x['pub_date'][0:10], '%Y-%m-%d'), reverse=True) for x in range(len(sorted_articles)): sorted_articles[x]['pub_date'] = datetime.strptime( sorted_articles[x]['pub_date'][0:10], '%Y-%m-%d').strftime('%d-%b-%Y') return sorted_articles
def search(self): nyt = NYTAPI("wbWOIDwmGPWGQALhXbfC3BDK3EMtFBMA") startDate = str(self.startDate) + " 00:00:00" endDate = str(self.endDate) + " 23:59:59" articles = nyt.article_search( query="Covid", results=10, dates={ #"begin": datetime.datetime(2020, 6, 24), #"end": datetime.datetime(2020, 6, 27) "begin": datetime.datetime.strptime(startDate, '%Y-%m-%d %H:%M:%S'), "end": datetime.datetime.strptime(endDate, '%Y-%m-%d %H:%M:%S') }, options={ "sort": "relevance", "sources": [ "New York Times", "AP", "Reuters", "International Herald Tribune" ], "type_of_material": ["News"] }) return articles
nyt.top_stories(section="science") nyt.most_viewed(days=30) time.sleep(5) nyt.most_shared(days=30, method="email") nyt.book_reviews(author="Michelle Obama") time.sleep(5) nyt.best_sellers_lists() nyt.best_sellers_list(date=datetime.datetime(2019, 1, 1), name="hardcover-fiction") time.sleep(5) nyt.movie_reviews(keyword="FBI", options={"order": "by-opening-date"}) nyt.article_metadata( url= "https://www.nytimes.com/2019/10/20/world/middleeast/erdogan-turkey-nuclear-weapons-trump.html" ) time.sleep(5) nyt.tag_query("Pentagon", max_results=20) nyt.archive_metadata(date=datetime.datetime(2019, 1, 1)) time.sleep(5) nyt.article_search(query="Trump", results=20, dates={ "begin_date": datetime.datetime(2019, 1, 1), "end_date": datetime.datetime(2019, 2, 1) }, options={"sort": "oldest"}) end = datetime.datetime.now() print(end - begin)
class TestNewYorkTimes(unittest.TestCase): def setUp(self): self.nyt = NYTAPI(API_KEY, parse_dates=True) def tearDown(self): self.nyt.close() def test_empty_api_key(self): with self.assertRaises(ValueError): NYTAPI() def test_top_stories(self): top_stories = self.nyt.top_stories() self.assertIsInstance(top_stories, list) self.assertGreater(len(top_stories), 0) for top_story in top_stories: self.assertIsInstance(top_story, dict) self.assertIsInstance(top_story["created_date"], datetime.datetime) self.assertIsInstance(top_story["published_date"], datetime.datetime) def test_top_stories_section(self): section = "world" top_stories_section = self.nyt.top_stories(section=section) self.assertIsInstance(top_stories_section, list) self.assertGreater(len(top_stories_section), 0) for top_story in top_stories_section: self.assertIsInstance(top_story, dict) def test_top_stories_wrong_section(self): with self.assertRaises(ValueError): self.nyt.top_stories("abcdfsda") with self.assertRaises(TypeError): self.nyt.top_stories(section=123) def test_most_viewed(self): most_viewed = self.nyt.most_viewed() self.assertIsInstance(most_viewed, list) self.assertGreater(len(most_viewed), 0) for most in most_viewed: self.assertIsInstance(most, dict) self.assertIsInstance(most["media"], list) def test_most_viewed_invalid_days(self): with self.assertRaises(ValueError): self.nyt.most_viewed(2) with self.assertRaises(TypeError): self.nyt.most_viewed(days="1") def test_most_shared(self): most_shared = self.nyt.most_shared() self.assertIsInstance(most_shared, list) self.assertGreater(len(most_shared), 0) for most in most_shared: self.assertIsInstance(most, dict) self.assertIsInstance(most["published_date"], datetime.date) self.assertIsInstance(most["updated"], datetime.datetime) self.assertIsInstance(most["media"], list) def test_most_shared_invalid(self): with self.assertRaises(ValueError): self.nyt.most_shared(method="twitter") with self.assertRaises(ValueError): self.nyt.most_shared(days=2) with self.assertRaises(TypeError): self.nyt.most_shared(days="2") def test_book_reviews(self): author = "Barack Obama" book_reviews = self.nyt.book_reviews(author=author) self.assertIsInstance(book_reviews, list) self.assertGreater(len(book_reviews), 0) for book_review in book_reviews: self.assertIsInstance(book_review, dict) self.assertEqual(book_review["book_author"], author) def test_book_reviews_invalid(self): with self.assertRaises(ValueError): self.nyt.book_reviews() with self.assertRaises(ValueError): self.nyt.book_reviews(isbn=213789, author="author") with self.assertRaises(ValueError): self.nyt.book_reviews(isbn=213789) def test_best_sellers_lists(self): best_sellers_lists = self.nyt.best_sellers_lists() self.assertIsInstance(best_sellers_lists, list) self.assertGreater(len(best_sellers_lists), 0) def test_best_seller_list(self): best_seller_list = self.nyt.best_sellers_list(date=datetime.datetime( 2019, 1, 1), name="hardcover-fiction") self.assertIsInstance(best_seller_list, list) self.assertEqual(best_seller_list[0]["primary_isbn13"], "9780385544153") def test_best_seller_list_invalid(self): with self.assertRaises(ValueError): self.nyt.best_sellers_list(name="not a name") with self.assertRaises(TypeError): self.nyt.best_sellers_list(date="123") def test_movie_reviews(self): movie_reviews = self.nyt.movie_reviews() self.assertIsInstance(movie_reviews, list) self.assertGreater(len(movie_reviews), 0) for movie_review in movie_reviews: self.assertIsInstance(movie_review, dict) def test_movie_reviews_invalid(self): with self.assertRaises(TypeError): self.nyt.movie_reviews(keyword=123) def test_article_metadata(self): article_metadata = self.nyt.article_metadata( "https://www.nytimes.com/live/2021/02/10/us/impeachment-trial/prosecutors-begin-arguments-against-trump-saying-he-became-the-inciter-in-chief-of-a-dangerous-insurrection" ) self.assertIsInstance(article_metadata, list) for article in article_metadata: self.assertIsInstance(article, dict) title = "Prosecutors argue that Trump ‘became the inciter in chief’ and retell riot with explicit video." creation_datetime = datetime.datetime( 2021, 2, 10, 11, 4, 8, tzinfo=datetime.timezone(datetime.timedelta(days=-1, seconds=68400)), ) self.assertEqual(article_metadata[0]["title"], title) self.assertEqual( article_metadata[0]["created_date"], creation_datetime, ) def test_article_metadata_invalid(self): with self.assertRaises(TypeError): self.nyt.article_metadata() with self.assertRaises(TypeError): self.nyt.article_metadata(123) with self.assertRaises(ValueError): self.nyt.article_metadata("text") def test_archive_metadata(self): archive_metadata = self.nyt.archive_metadata( date=datetime.date.today()) self.assertIsInstance(archive_metadata, list) self.assertGreater(len(archive_metadata), 0) for metadata in archive_metadata: self.assertIsInstance(metadata, dict) self.assertGreaterEqual( metadata["pub_date"], datetime.datetime.now(tz=datetime.timezone.utc).replace( day=1, hour=0, minute=0, second=0, microsecond=0), ) def test_archive_metadata_invalid(self): with self.assertRaises(TypeError): self.nyt.archive_metadata("string") with self.assertRaises(TypeError): self.nyt.archive_metadata(123) def test_article_search(self): search = self.nyt.article_search("Joe Biden", results=80) self.assertIsInstance(search, list) self.assertEqual(80, len(search)) for article in search: self.assertIsInstance(article, dict) def test_article_search_invalid(self): with self.assertRaises(TypeError): self.nyt.article_search(123) with self.assertRaises(TypeError): self.nyt.article_search("query", datetime.date.today()) def test_section_list(self): section_list = self.nyt.section_list() self.assertIsInstance(section_list, list) self.assertGreater(len(section_list), 0) for section in section_list: self.assertIsInstance(section, dict) def test_latest_articles(self): latest_articles = self.nyt.latest_articles() self.assertIsInstance(latest_articles, list) for article in latest_articles: self.assertIsInstance(article, dict) def test_latest_articles_invalid(self): with self.assertRaises(TypeError): self.nyt.latest_articles(source=123) def test_tag_query(self): tags = self.nyt.tag_query("Obama", max_results=2) self.assertIsInstance(tags, list) self.assertIs(2, len(tags)) def test_tag_query_invalid(self): with self.assertRaises(TypeError): self.nyt.tag_query(123) with self.assertRaises(TypeError): self.nyt.tag_query("Obama", max_results="2") def test_parse_dates_disabled(self): local_nyt = NYTAPI(API_KEY) data = local_nyt.article_metadata( "https://www.nytimes.com/live/2021/02/10/us/impeachment-trial/prosecutors-begin-arguments-against-trump-saying-he-became-the-inciter-in-chief-of-a-dangerous-insurrection" ) self.assertEqual(data[0]["created_date"], "2021-02-10T11:04:08-05:00")
from pynytimes import NYTAPI nyt = NYTAPI("YOUR_API_KEY") articles = nyt.article_search( "https://api.nytimes.com/svc/search/v2/articlesearch.json?fq=indigo&api-key=YOUR_API_KEY" ) #print(articles) news = [] for i in articles: dic = {} dic['url'] = i['web_url'] news.append(dic) #print(news) urls = [] for new in news: for key, url in new.items(): urls.append(url) for url in urls: print(url) # article :indigo """ https://www.nytimes.com/1889/07/14/archives/no-robbery.html https://www.nytimes.com/1858/10/29/archives/central-america-crops-and-contracts-in-costa-ricathe-proposed.html https://www.nytimes.com/1898/08/20/archives/reviews-of-books-dialect-tales-justly-praised.html https://www.nytimes.com/1890/10/19/archives/anne-bissell.html https://www.nytimes.com/1859/10/27/archives/european-news-the-jason-at-st-johns-further-by-the-persia-the-great.html https://www.nytimes.com/1859/06/27/archives/from-the-pacific-coast-nicaragua-rejects-the-american-ultimatum.html """
import pandas as pd nyt = NYTAPI("WxQXsVSaIIlTgEfG0VnrlP7JhOVYYL0j") search="US Embassy move to Jerusalem" start_date = datetime(2015, 1, 1) end_date = datetime(2019, 12, 31) articles = nyt.article_search( query = search, results = 50, dates = { "begin": start_date, "end": end_date }, options = { "sort": "relevance", "sources": [ "New York Times", ], } ) def clean(text): return ' '.join(re.sub("(@[A-Za-z0-9]+)|([^0-9A-Za-z \t]) | (\w+:\/\/\S+)", " ", text).split()) headlines = [] pub_dates = [] for article in articles: headlines.append(article['headline']['main'])
from datetime import date, datetime from pynytimes import NYTAPI # Make sure to set parse dates to True so that the dates # are parsed into datetime.datetime or datetime.date objects nyt = NYTAPI( key="Your API Key", # Get your API Key at https://developer.nytimes.com parse_dates=True, ) # Search articles about President Biden biden = nyt.article_search("biden") # You can optionally define the dates between which you want the articles to be biden_january = nyt.article_search(query="biden", dates={ "start": date(2021, 1, 1), "end": date(2021, 1, 31) }) # Optionally you can also define biden = nyt.article_search("biden", )