def test_scrape_spreadsheet(self): scraper = SpreadsheetScraper() stories = scraper.scrape_spreadsheet('tests/data/stories.xlsx') self.assertEqual(len(stories), 4) self.assertEqual(stories[0]['date'], '42467') # Crappy excel date format self.assertEqual(stories[0]['graphic_slug'], 'voting-wait-20160404') self.assertEqual(stories[0]['graphic_type'], 'Graphic') self.assertEqual( stories[0]['story_headline'], 'What Keeps Election Officials Up At Night? Fear Of Long Lines At The Polls' ) self.assertEqual( stories[0]['story_url'], 'http://www.npr.org/2016/04/07/473293026/what-keeps-election-officials-up-at-night-fear-of-long-lines-at-the-polls' ) self.assertEqual(stories[0]['contact'], 'Alyson Hurt') self.assertEqual(stories[0]['date'], '42467') self.assertEqual(stories[3]['graphic_slug'], 'seed-market-20160405') self.assertEqual(stories[3]['graphic_type'], 'Graphic') self.assertEqual( stories[3]['story_headline'], 'Big Seed: Consolidation Is Shrinking The Industry Even Further') self.assertEqual( stories[3]['story_url'], 'http://www.npr.org/sections/thesalt/2016/04/06/472960018/big-seed-consolidation-is-shrinking-the-industry-even-further' ) self.assertEqual(stories[3]['contact'], 'Alyson Hurt')
def test_get_story_stats(self, mock_time_bucket, mock_upload, mock_update, mock_linger, ): # Set some fake analytics linger_data = [{ 'slug': 'slug-here', 'stats': { 'total_people': 100, 'raw_avg_seconds': 330, 'minutes': 5, 'seconds': 30 } }] mock_upload.return_value = 'http://image-url-here' mock_linger.return_value = linger_data mock_time_bucket.return_value = 'time bucket' # Load a fake story clear_stories() scraper = SpreadsheetScraper() stories = scraper.scrape_spreadsheet('tests/data/stories.xlsx') stories = scraper.write([stories[0]]) get_story_stats() # Check the updater mock_update.assert_called_once_with(stories[0], linger_data, 'time bucket')
def scrape_spreadsheet(): """ Scrape 'Did we touch it?' spreadsheet """ db = dataset.connect(app_config.POSTGRES_URL) get_document(app_config.STORIES_GOOGLE_DOC_KEY, app_config.STORIES_PATH) scraper = SpreadsheetScraper() stories = scraper.scrape_spreadsheet(app_config.STORIES_PATH) scraper.write(db, stories)
def test_write_spreadsheet(self, mock_upload): mock_upload.return_value = 'http://image-url-here' clear_stories() scraper = SpreadsheetScraper() stories = scraper.scrape_spreadsheet('tests/data/stories.xlsx') scraper.write(stories) results = Story.select() self.assertEqual(len(results), 4) for idx, story in enumerate(stories): self.assertEqual(results[idx].name, story['story_headline']) self.assertEqual(results[idx].url, story['story_url'])
class TestScrapeSpreadsheet(unittest.TestCase): def setUp(self): self.scraper = SpreadsheetScraper() self.stories = self.scraper.scrape_spreadsheet(filename='tests/snapshots/did-visuals-touch-it.xlsx') def test_seamus_id(self): self.assertEqual(self.stories[1].seamus_id, '403291009') def test_duration(self): # This one is a none that should be zero self.assertEqual(self.stories[0].duration, 0) # Correct entry self.assertEqual(self.stories[1].duration, 0.5) # The next one was entered as a negative # self.assertEqual(self.stories[14].duration, 1) def test_contribution(self): self.assertEqual(self.stories[1].contribution, 'Request / tone / edit reporter or producer images, Source pre-existing images (photo or illustration) from the wires / Flickr / other newspapers / archives') def test_contributors(self): self.assertEqual(self.stories[0].contributors, 'Kainaz Amaria') self.assertEqual(self.stories[10].contributors, 'Ariel Zambelich, Emily Bogle, Ryan Kellman') def test_timestamp(self): self.assertEqual(self.stories[0].timestamp, datetime(2015, 4, 29, 14, 38, 32))
def test_scrape_spreadsheet(self): scraper = SpreadsheetScraper() stories = scraper.scrape_spreadsheet('tests/data/stories.xlsx') self.assertEqual(len(stories), 4) self.assertEqual(stories[0]['date'], '42467') # Crappy excel date format self.assertEqual(stories[0]['graphic_slug'], 'voting-wait-20160404') self.assertEqual(stories[0]['graphic_type'], 'Graphic') self.assertEqual(stories[0]['story_headline'], 'What Keeps Election Officials Up At Night? Fear Of Long Lines At The Polls') self.assertEqual(stories[0]['story_url'], 'http://www.npr.org/2016/04/07/473293026/what-keeps-election-officials-up-at-night-fear-of-long-lines-at-the-polls') self.assertEqual(stories[0]['contact'], 'Alyson Hurt') self.assertEqual(stories[0]['date'], '42467') self.assertEqual(stories[3]['graphic_slug'], 'seed-market-20160405') self.assertEqual(stories[3]['graphic_type'], 'Graphic') self.assertEqual(stories[3]['story_headline'], 'Big Seed: Consolidation Is Shrinking The Industry Even Further') self.assertEqual(stories[3]['story_url'], 'http://www.npr.org/sections/thesalt/2016/04/06/472960018/big-seed-consolidation-is-shrinking-the-industry-even-further') self.assertEqual(stories[3]['contact'], 'Alyson Hurt')
def test_write_spreadsheet_duplicates(self, mock_upload): mock_upload.return_value = 'http://image-url-here' clear_stories() scraper = SpreadsheetScraper() stories = scraper.scrape_spreadsheet('tests/data/stories.xlsx') # Insert the stories scraper.write(stories) results = Story.select() self.assertEqual(len(results), 4) # Now insert them again and make sure we don't have duplicates scraper.write(stories) results = Story.select() self.assertEqual(len(results), 4)
def load_spreadsheet(source): get_document(source['doc_key'], app_config.STORIES_PATH) scraper = SpreadsheetScraper() stories = scraper.scrape_spreadsheet(app_config.STORIES_PATH) new_stories = scraper.write(stories, team=source['team']) return new_stories
def setUp(self): self.scraper = SpreadsheetScraper() self.stories = self.scraper.scrape_spreadsheet(filename='tests/snapshots/did-visuals-touch-it.xlsx')