def test_fetch_from_date(self): """Test whether it only downloads archives after a given date""" pipermail_index = read_file('data/pipermail/pipermail_index.html') mbox_nov = read_file('data/pipermail/pipermail_2015_november.mbox') mbox_march = read_file('data/pipermail/pipermail_2016_march.mbox') mbox_april = read_file('data/pipermail/pipermail_2016_april.mbox') httpretty.register_uri(httpretty.GET, PIPERMAIL_URL, body=pipermail_index) httpretty.register_uri(httpretty.GET, PIPERMAIL_URL + '2015-November.txt.gz', body=mbox_nov) httpretty.register_uri(httpretty.GET, PIPERMAIL_URL + '2016-March.txt', body=mbox_march) httpretty.register_uri(httpretty.GET, PIPERMAIL_URL + '2016-April.txt', body=mbox_april) pmls = PipermailList('http://example.com/', self.tmp_path) links = pmls.fetch(from_date=datetime.datetime(2016, 3, 30)) self.assertEqual(len(links), 2) self.assertEqual(links[0][0], PIPERMAIL_URL + '2016-April.txt') self.assertEqual(links[0][1], os.path.join(self.tmp_path, '2016-April.txt')) self.assertEqual(links[1][0], PIPERMAIL_URL + '2016-March.txt') self.assertEqual(links[1][1], os.path.join(self.tmp_path, '2016-March.txt')) mboxes = pmls.mboxes self.assertEqual(mboxes[0].filepath, os.path.join(self.tmp_path, '2016-March.txt')) self.assertEqual(mboxes[1].filepath, os.path.join(self.tmp_path, '2016-April.txt'))
def test_fetch_http_403_error(self): """Test whether 403 HTTP errors are properly handled""" pipermail_index = read_file('data/pipermail/pipermail_index.html') mbox_nov = read_file('data/pipermail/pipermail_2015_november.mbox') mbox_march = read_file('data/pipermail/pipermail_2016_march.mbox') mbox_april = read_file('data/pipermail/pipermail_2016_april.mbox') httpretty.register_uri(httpretty.GET, PIPERMAIL_URL, body=pipermail_index) httpretty.register_uri(httpretty.GET, PIPERMAIL_URL + '2015-November.txt.gz', body=mbox_nov) httpretty.register_uri(httpretty.GET, PIPERMAIL_URL + '2016-March.txt', body=mbox_march) httpretty.register_uri(httpretty.GET, PIPERMAIL_URL + '2016-April.txt', body=mbox_april, status=403) pmls = PipermailList('http://example.com/', self.tmp_path) links = pmls.fetch() self.assertEqual(len(links), 2)
def test_search_fields(self): """Test whether the search_fields is properly set""" pmls = PipermailList('http://example.com/', self.tmp_path) links = pmls.fetch() for link in links: self.assertEqual(pmls.metadata_id(link['data']), link['search_fields']['item_id'])
def test_fetch_empty(self): """Test whether it does not store anything when the list of archives is empty""" pipermail_index = read_file('data/pipermail/pipermail_index_empty.html') httpretty.register_uri(httpretty.GET, PIPERMAIL_URL, body=pipermail_index) pmls = PipermailList('http://example.com/', self.tmp_path) links = pmls.fetch() self.assertEqual(len(links), 0)
def test_fetch_no_existing_dir(self): """Test whether the dir_path where to store the archives is created if it doesn't exist""" pipermail_index = read_file('data/pipermail/pipermail_index_empty.html') httpretty.register_uri(httpretty.GET, PIPERMAIL_URL, body=pipermail_index) # delete the dir path os.removedirs(self.tmp_path) self.assertFalse(os.path.exists(self.tmp_path)) pmls = PipermailList('http://example.com/', self.tmp_path) _ = pmls.fetch() self.assertTrue(os.path.exists(self.tmp_path))
def test_fetch_http_errors(self): """Test whether an exception is thrown when the HTTP error is not 403""" pipermail_index = read_file('data/pipermail/pipermail_index.html') mbox_april = read_file('data/pipermail/pipermail_2016_april.mbox') httpretty.register_uri(httpretty.GET, PIPERMAIL_URL, body=pipermail_index) httpretty.register_uri(httpretty.GET, PIPERMAIL_URL + '2016-April.txt', body=mbox_april, status=404) pmls = PipermailList('http://example.com/', self.tmp_path) with self.assertRaises(requests.exceptions.HTTPError): links = pmls.fetch()