예제 #1
0
def collect():
    """
    Fetch articles from the sources,
    and save (or update) to db.
    """
    results = []

    logger.info('Fetching articles...')
    print('collecting')

    # Fetch entries for each source
    for source in Source.query.all():
        try:
            logger.info('Fetching from {0}...'.format(source.ext_url))
            articles = feed.articles(source)

            # Check for existing copy.
            for article in articles:
                if not Article.query.filter_by(ext_url=article.url).count():
                    db.session.add(article)
                results.append(article)

        except feed.SAXException as e:
            # Error with the feed, make a note.
            logger.info('Error fetching from {0}.'.format(source.ext_url))
            source.errors += 1

    logger.info('Finished fetching articles.')

    db.session.commit()

    return results
예제 #2
0
 def test_articles_skips_short_articles(self):
     extracted_data = MagicMock()
     extracted_data.cleaned_text = 'short full text'
     self.create_patch('argos.core.membrane.feed.extract_entry_data',
                       return_value=(extracted_data, 'short full text'))
     articles = feed.articles(self.source)
     self.assertEquals(len(articles), 0)
예제 #3
0
 def test_articles_skips_404_articles(self):
     from urllib import error
     self.create_patch('argos.core.membrane.feed.extract_entry_data',
                       side_effect=error.HTTPError(url=None,
                                                   code=404,
                                                   msg=None,
                                                   hdrs=None,
                                                   fp=None))
     articles = feed.articles(self.source)
     self.assertEquals(len(articles), 0)
예제 #4
0
    def test_articles(self):
        extracted_data = MagicMock()
        extracted_data.cleaned_text = full_text

        # Mock the download method to return some local image path.
        self.create_patch('argos.core.membrane.feed.download', return_value='/foo/bar/image.jpg')

        self.create_patch('argos.core.membrane.feed.extract_entry_data', return_value=(extracted_data, full_text))
        articles = feed.articles(self.source)
        self.assertEquals(len(articles), 1)
예제 #5
0
    def test_articles(self):
        extracted_data = MagicMock()
        extracted_data.cleaned_text = full_text

        # Mock the download method to return some local image path.
        self.create_patch('argos.core.membrane.feed.download',
                          return_value='/foo/bar/image.jpg')

        self.create_patch('argos.core.membrane.feed.extract_entry_data',
                          return_value=(extracted_data, full_text))
        articles = feed.articles(self.source)
        self.assertEquals(len(articles), 1)
예제 #6
0
 def test_articles_skips_unreachable_articles(self):
     from urllib import error
     self.create_patch('argos.core.membrane.feed.extract_entry_data',
                       side_effect=error.URLError('unreachable'))
     articles = feed.articles(self.source)
     self.assertEquals(len(articles), 0)
예제 #7
0
 def test_articles_skips_unreachable_articles(self):
     from urllib import error
     self.create_patch('argos.core.membrane.feed.extract_entry_data', side_effect=error.URLError('unreachable'))
     articles = feed.articles(self.source)
     self.assertEquals(len(articles), 0)
예제 #8
0
 def test_articles_skips_404_articles(self):
     from urllib import error
     self.create_patch('argos.core.membrane.feed.extract_entry_data', side_effect=error.HTTPError(url=None, code=404, msg=None, hdrs=None, fp=None))
     articles = feed.articles(self.source)
     self.assertEquals(len(articles), 0)
예제 #9
0
 def test_articles_skips_short_articles(self):
     extracted_data = MagicMock()
     extracted_data.cleaned_text = 'short full text'
     self.create_patch('argos.core.membrane.feed.extract_entry_data', return_value=(extracted_data, 'short full text'))
     articles = feed.articles(self.source)
     self.assertEquals(len(articles), 0)