def collect(): """ Fetch articles from the sources, and save (or update) to db. """ results = [] logger.info('Fetching articles...') print('collecting') # Fetch entries for each source for source in Source.query.all(): try: logger.info('Fetching from {0}...'.format(source.ext_url)) articles = feed.articles(source) # Check for existing copy. for article in articles: if not Article.query.filter_by(ext_url=article.url).count(): db.session.add(article) results.append(article) except feed.SAXException as e: # Error with the feed, make a note. logger.info('Error fetching from {0}.'.format(source.ext_url)) source.errors += 1 logger.info('Finished fetching articles.') db.session.commit() return results
def test_articles_skips_short_articles(self): extracted_data = MagicMock() extracted_data.cleaned_text = 'short full text' self.create_patch('argos.core.membrane.feed.extract_entry_data', return_value=(extracted_data, 'short full text')) articles = feed.articles(self.source) self.assertEquals(len(articles), 0)
def test_articles_skips_404_articles(self): from urllib import error self.create_patch('argos.core.membrane.feed.extract_entry_data', side_effect=error.HTTPError(url=None, code=404, msg=None, hdrs=None, fp=None)) articles = feed.articles(self.source) self.assertEquals(len(articles), 0)
def test_articles(self): extracted_data = MagicMock() extracted_data.cleaned_text = full_text # Mock the download method to return some local image path. self.create_patch('argos.core.membrane.feed.download', return_value='/foo/bar/image.jpg') self.create_patch('argos.core.membrane.feed.extract_entry_data', return_value=(extracted_data, full_text)) articles = feed.articles(self.source) self.assertEquals(len(articles), 1)
def test_articles_skips_unreachable_articles(self): from urllib import error self.create_patch('argos.core.membrane.feed.extract_entry_data', side_effect=error.URLError('unreachable')) articles = feed.articles(self.source) self.assertEquals(len(articles), 0)