Ejemplo n.º 1
0
    def test_fetch(self, mock_nntp):
        """Test whether it fetches a set of articles"""

        mock_nntp.return_value = MockNNTPLib()

        nntp = NNTP(NNTP_SERVER, NNTP_GROUP)
        articles = [article for article in nntp.fetch(offset=None)]

        expected = [
            ('<*****@*****.**>', 1,
             'd088688545d7c2f3733993e215503b367193a26d', 1458039948.0),
            ('<*****@*****.**>', 2,
             '8a20c77405349f442dad8e3ee8e60d392cc75ae7', 1458076496.0)
        ]
        expected_origin = NNTP_SERVER + '-' + NNTP_GROUP

        # Although there are 4 messages available on the server,
        # only two are valid
        self.assertEqual(len(articles), 2)

        for x in range(len(articles)):
            article = articles[x]
            expc = expected[x]
            self.assertEqual(article['data']['message_id'], expc[0])
            self.assertEqual(article['offset'], expc[1])
            self.assertEqual(article['uuid'], expc[2])
            self.assertEqual(article['origin'], expected_origin)
            self.assertEqual(article['updated_on'], expc[3])
            self.assertEqual(article['category'], 'article')
            self.assertEqual(article['tag'], expected_origin)
Ejemplo n.º 2
0
    def test_fetch_empty(self, mock_nntp):
        """Test if nothing is returned when there are no new articles"""

        mock_nntp.return_value = MockNNTPLib()

        nntp = NNTP(NNTP_SERVER, NNTP_GROUP)
        articles = [article for article in nntp.fetch(offset=3)]

        self.assertEqual(len(articles), 0)
Ejemplo n.º 3
0
    def test_search_fields(self, mock_nntp):
        """Test whether the search_fields is properly set"""

        mock_nntp.return_value = MockNNTPLib()

        nntp = NNTP(NNTP_SERVER, NNTP_GROUP)
        articles = [article for article in nntp.fetch(offset=None)]

        article = articles[0]
        self.assertEqual(nntp.metadata_id(article['data']), article['search_fields']['item_id'])
        self.assertEqual(article['data']['Newsgroups'], 'example.dev.project-link')
        self.assertEqual(article['data']['Newsgroups'], article['search_fields']['newsgroups'])

        article = articles[1]
        self.assertEqual(nntp.metadata_id(article['data']), article['search_fields']['item_id'])
        self.assertEqual(article['data']['Newsgroups'], 'mozilla.dev.project-link')
        self.assertEqual(article['data']['Newsgroups'], article['search_fields']['newsgroups'])
Ejemplo n.º 4
0
    def test_fetch_from_cache(self, mock_nntp):
        """Test whether the cache works"""

        mock_nntp.return_value = MockNNTPLib()

        # First, we fetch the tasks from the server,
        # storing them in a cache
        cache = Cache(self.tmp_path)
        nntp = NNTP(NNTP_SERVER, NNTP_GROUP, cache=cache)
        articles = [article for article in nntp.fetch()]

        self.assertEqual(len(articles), 2)

        # Now, we get the articles from the cache which
        # should be the same
        cached_articles = [article for article in nntp.fetch_from_cache()]
        self.assertEqual(len(cached_articles), len(articles))

        expected = [
            ('<*****@*****.**>', 1,
             'd088688545d7c2f3733993e215503b367193a26d', 1458039948.0),
            ('<*****@*****.**>', 2,
             '8a20c77405349f442dad8e3ee8e60d392cc75ae7', 1458076496.0)
        ]
        expected_origin = NNTP_SERVER + '-' + NNTP_GROUP

        self.assertEqual(len(cached_articles), len(expected))

        for x in range(len(cached_articles)):
            carticle = cached_articles[x]
            expc = expected[x]
            self.assertEqual(carticle['data']['message_id'], expc[0])
            self.assertEqual(carticle['offset'], expc[1])
            self.assertEqual(carticle['uuid'], expc[2])
            self.assertEqual(carticle['origin'], expected_origin)
            self.assertEqual(carticle['updated_on'], expc[3])
            self.assertEqual(carticle['category'], 'article')
            self.assertEqual(carticle['tag'], expected_origin)

            # Compare chached and fetched task
            self.assertDictEqual(carticle['data'], articles[x]['data'])
Ejemplo n.º 5
0
    def test_fetch_from_offset(self, mock_nntp):
        """Test whether it fetches a set of articles from a given offset"""

        mock_nntp.return_value = MockNNTPLib()

        nntp = NNTP(NNTP_SERVER, NNTP_GROUP)
        articles = [article for article in nntp.fetch(offset=2)]

        expected = ('<*****@*****.**>', 2,
                    '8a20c77405349f442dad8e3ee8e60d392cc75ae7', 1458076496.0)
        expected_origin = NNTP_SERVER + '-' + NNTP_GROUP

        self.assertEqual(len(articles), 1)

        article = articles[0]
        self.assertEqual(article['data']['message_id'], expected[0])
        self.assertEqual(article['offset'], expected[1])
        self.assertEqual(article['uuid'], expected[2])
        self.assertEqual(article['origin'], expected_origin)
        self.assertEqual(article['updated_on'], expected[3])
        self.assertEqual(article['category'], 'article')
        self.assertEqual(article['tag'], expected_origin)
Ejemplo n.º 6
0
class NntpImporter(MailImporter):
    """Mailing list importer using NNTP"""

    gmane_mangler_regex = re.compile(r'__[^\$]+\$gmane\$org')

    def __init__(self, mailing_list):
        """
        mailing_list: Mailing list object; See Importer.__init__
        server: NNTP server hostname to connect to
        port: TCP port used to connect to the server
        group: NNTP group on ``server'' to import messages from
        user: Username used to access the server [default: None]
        password: Password used to access the server [default: None]

        This constructs an instance of the NntpImporter to import messages from
        a given group on the NNTP server into the database.
        """
        super().__init__(mailing_list)
        self._parse_url()
        self.backend = NNTP(self.server, self.group)

    def _parse_url(self):
        """
        Parse URL from `self.object' and populate the following fields:
        - self.server
        - self.port
        - self.user
        - self.password
        - self.group
        """
        url = urllib.parse.urlparse(self.object.archive_url)
        assert url.scheme == 'nntp'

        self.group = url.path.lstrip('/')
        self.username = url.username
        self.password = url.password
        self.server = url.hostname
        self.port = url.port if url.port else 119

    def get_messages(self):
        """
        Iterate through all messages in the NNTP group
        """
        articles = self.backend.fetch()

        for article in articles:
            data = article['data']
            data['Message-ID'] = self.gmane_mangler_regex.sub('', data['Message-ID'])

            try:
                msg = Message(data['From'], data['Date'], data['Subject'],
                              data['Message-ID'], data['References'])
            except:
                logger.warning('Malformed message found, skipping:\n%s', article['updated_on'],
                               exc_info=True)
                msg = None

            # yield outside the try block to avoid capturing exceptions
            # that should terminate the loop instead
            if msg is not None:
                yield msg