Ejemplo n.º 1
0
    def test_fetch_from_date(self):
        """Test whether it only downloads archives after a given date"""

        pipermail_index = read_file('data/pipermail/pipermail_index.html')
        mbox_nov = read_file('data/pipermail/pipermail_2015_november.mbox')
        mbox_march = read_file('data/pipermail/pipermail_2016_march.mbox')
        mbox_april = read_file('data/pipermail/pipermail_2016_april.mbox')

        httpretty.register_uri(httpretty.GET,
                               PIPERMAIL_URL,
                               body=pipermail_index)
        httpretty.register_uri(httpretty.GET,
                               PIPERMAIL_URL + '2015-November.txt.gz',
                               body=mbox_nov)
        httpretty.register_uri(httpretty.GET,
                               PIPERMAIL_URL + '2016-March.txt',
                               body=mbox_march)
        httpretty.register_uri(httpretty.GET,
                               PIPERMAIL_URL + '2016-April.txt',
                               body=mbox_april)

        pmls = PipermailList('http://example.com/', self.tmp_path)

        links = pmls.fetch(from_date=datetime.datetime(2016, 3, 30))

        self.assertEqual(len(links), 2)
        self.assertEqual(links[0][0], PIPERMAIL_URL + '2016-April.txt')
        self.assertEqual(links[0][1], os.path.join(self.tmp_path, '2016-April.txt'))
        self.assertEqual(links[1][0], PIPERMAIL_URL + '2016-March.txt')
        self.assertEqual(links[1][1], os.path.join(self.tmp_path, '2016-March.txt'))

        mboxes = pmls.mboxes
        self.assertEqual(mboxes[0].filepath, os.path.join(self.tmp_path, '2016-March.txt'))
        self.assertEqual(mboxes[1].filepath, os.path.join(self.tmp_path, '2016-April.txt'))
Ejemplo n.º 2
0
    def test_fetch_http_403_error(self):
        """Test whether 403 HTTP errors are properly handled"""

        pipermail_index = read_file('data/pipermail/pipermail_index.html')
        mbox_nov = read_file('data/pipermail/pipermail_2015_november.mbox')
        mbox_march = read_file('data/pipermail/pipermail_2016_march.mbox')
        mbox_april = read_file('data/pipermail/pipermail_2016_april.mbox')

        httpretty.register_uri(httpretty.GET,
                               PIPERMAIL_URL,
                               body=pipermail_index)
        httpretty.register_uri(httpretty.GET,
                               PIPERMAIL_URL + '2015-November.txt.gz',
                               body=mbox_nov)
        httpretty.register_uri(httpretty.GET,
                               PIPERMAIL_URL + '2016-March.txt',
                               body=mbox_march)
        httpretty.register_uri(httpretty.GET,
                               PIPERMAIL_URL + '2016-April.txt',
                               body=mbox_april,
                               status=403)

        pmls = PipermailList('http://example.com/', self.tmp_path)
        links = pmls.fetch()

        self.assertEqual(len(links), 2)
Ejemplo n.º 3
0
    def test_search_fields(self):
        """Test whether the search_fields is properly set"""

        pmls = PipermailList('http://example.com/', self.tmp_path)
        links = pmls.fetch()

        for link in links:
            self.assertEqual(pmls.metadata_id(link['data']), link['search_fields']['item_id'])
Ejemplo n.º 4
0
    def test_fetch_empty(self):
        """Test whether it does not store anything when the list of archives is empty"""

        pipermail_index = read_file('data/pipermail/pipermail_index_empty.html')
        httpretty.register_uri(httpretty.GET,
                               PIPERMAIL_URL,
                               body=pipermail_index)

        pmls = PipermailList('http://example.com/', self.tmp_path)
        links = pmls.fetch()

        self.assertEqual(len(links), 0)
Ejemplo n.º 5
0
    def test_fetch_no_existing_dir(self):
        """Test whether the dir_path where to store the archives is created if it doesn't exist"""

        pipermail_index = read_file('data/pipermail/pipermail_index_empty.html')
        httpretty.register_uri(httpretty.GET,
                               PIPERMAIL_URL,
                               body=pipermail_index)

        # delete the dir path
        os.removedirs(self.tmp_path)

        self.assertFalse(os.path.exists(self.tmp_path))
        pmls = PipermailList('http://example.com/', self.tmp_path)
        _ = pmls.fetch()
        self.assertTrue(os.path.exists(self.tmp_path))
Ejemplo n.º 6
0
    def test_fetch_http_errors(self):
        """Test whether an exception is thrown when the HTTP error is not 403"""

        pipermail_index = read_file('data/pipermail/pipermail_index.html')
        mbox_april = read_file('data/pipermail/pipermail_2016_april.mbox')

        httpretty.register_uri(httpretty.GET,
                               PIPERMAIL_URL,
                               body=pipermail_index)
        httpretty.register_uri(httpretty.GET,
                               PIPERMAIL_URL + '2016-April.txt',
                               body=mbox_april,
                               status=404)

        pmls = PipermailList('http://example.com/', self.tmp_path)

        with self.assertRaises(requests.exceptions.HTTPError):
            links = pmls.fetch()