def scrape_spreadsheet(): """ Scrape 'Did we touch it?' spreadsheet """ db = dataset.connect(app_config.POSTGRES_URL) get_document(app_config.STORIES_GOOGLE_DOC_KEY, app_config.STORIES_PATH) scraper = SpreadsheetScraper() stories = scraper.scrape_spreadsheet(app_config.STORIES_PATH) scraper.write(db, stories)
def scrape_spreadsheet(): """ Scrape 'Did we touch it?' spreadsheet """ db = dataset.connect(app_config.POSTGRES_URL) get_document(app_config.STORIES_GOOGLE_DOC_KEY, app_config.STORIES_PATH) scraper = SpreadsheetScraper() stories = scraper.scrape_spreadsheet(app_config.STORIES_PATH) scraper.write(db, stories)
def test_write_spreadsheet(self, mock_upload): mock_upload.return_value = 'http://image-url-here' clear_stories() scraper = SpreadsheetScraper() stories = scraper.scrape_spreadsheet('tests/data/stories.xlsx') scraper.write(stories) results = Story.select() self.assertEqual(len(results), 4) for idx, story in enumerate(stories): self.assertEqual(results[idx].name, story['story_headline']) self.assertEqual(results[idx].url, story['story_url'])
def test_write_spreadsheet(self, mock_upload): mock_upload.return_value = 'http://image-url-here' clear_stories() scraper = SpreadsheetScraper() stories = scraper.scrape_spreadsheet('tests/data/stories.xlsx') scraper.write(stories) results = Story.select() self.assertEqual(len(results), 4) for idx, story in enumerate(stories): self.assertEqual(results[idx].name, story['story_headline']) self.assertEqual(results[idx].url, story['story_url'])
def test_get_story_stats(self, mock_time_bucket, mock_upload, mock_update, mock_linger, ): # Set some fake analytics linger_data = [{ 'slug': 'slug-here', 'stats': { 'total_people': 100, 'raw_avg_seconds': 330, 'minutes': 5, 'seconds': 30 } }] mock_upload.return_value = 'http://image-url-here' mock_linger.return_value = linger_data mock_time_bucket.return_value = 'time bucket' # Load a fake story clear_stories() scraper = SpreadsheetScraper() stories = scraper.scrape_spreadsheet('tests/data/stories.xlsx') stories = scraper.write([stories[0]]) get_story_stats() # Check the updater mock_update.assert_called_once_with(stories[0], linger_data, 'time bucket')
def test_write_spreadsheet_duplicates(self, mock_upload): mock_upload.return_value = 'http://image-url-here' clear_stories() scraper = SpreadsheetScraper() stories = scraper.scrape_spreadsheet('tests/data/stories.xlsx') # Insert the stories scraper.write(stories) results = Story.select() self.assertEqual(len(results), 4) # Now insert them again and make sure we don't have duplicates scraper.write(stories) results = Story.select() self.assertEqual(len(results), 4)
def test_write_spreadsheet_duplicates(self, mock_upload): mock_upload.return_value = 'http://image-url-here' clear_stories() scraper = SpreadsheetScraper() stories = scraper.scrape_spreadsheet('tests/data/stories.xlsx') # Insert the stories scraper.write(stories) results = Story.select() self.assertEqual(len(results), 4) # Now insert them again and make sure we don't have duplicates scraper.write(stories) results = Story.select() self.assertEqual(len(results), 4)
def load_spreadsheet(source): get_document(source['doc_key'], app_config.STORIES_PATH) scraper = SpreadsheetScraper() stories = scraper.scrape_spreadsheet(app_config.STORIES_PATH) new_stories = scraper.write(stories, team=source['team']) return new_stories
def load_spreadsheet(source): get_document(source['doc_key'], app_config.STORIES_PATH) scraper = SpreadsheetScraper() stories = scraper.scrape_spreadsheet(app_config.STORIES_PATH) new_stories = scraper.write(stories, team=source['team']) return new_stories