def test_search_fields(self): """Test whether the search_fields is properly set""" pipermail_index = read_file('data/pipermail/pipermail_index.html') mbox_nov = read_file('data/pipermail/pipermail_2015_november.mbox') mbox_march = read_file('data/pipermail/pipermail_2016_march.mbox') mbox_april = read_file('data/pipermail/pipermail_2016_april.mbox') httpretty.register_uri(httpretty.GET, PIPERMAIL_URL, body=pipermail_index) httpretty.register_uri(httpretty.GET, PIPERMAIL_URL + '2015-November.txt.gz', body=mbox_nov) httpretty.register_uri(httpretty.GET, PIPERMAIL_URL + '2016-March.txt', body=mbox_march) httpretty.register_uri(httpretty.GET, PIPERMAIL_URL + '2016-April.txt', body=mbox_april) backend = Pipermail('http://example.com/', self.tmp_path) messages = [m for m in backend.fetch()] self.assertEqual(len(messages), 8) for msg in messages: self.assertEqual(backend.metadata_id(msg['data']), msg['search_fields']['item_id'])
def test_fetch_from_date(self): """Test whether it fetches and parses messages since the given date""" pipermail_index = read_file('data/pipermail/pipermail_index.html') mbox_nov = read_file('data/pipermail/pipermail_2015_november.mbox') mbox_march = read_file('data/pipermail/pipermail_2016_march.mbox') mbox_april = read_file('data/pipermail/pipermail_2016_april.mbox') httpretty.register_uri(httpretty.GET, PIPERMAIL_URL, body=pipermail_index) httpretty.register_uri(httpretty.GET, PIPERMAIL_URL + '2015-November.txt.gz', body=mbox_nov) httpretty.register_uri(httpretty.GET, PIPERMAIL_URL + '2016-March.txt', body=mbox_march) httpretty.register_uri(httpretty.GET, PIPERMAIL_URL + '2016-April.txt', body=mbox_april) from_date = datetime.datetime(2016, 3, 4) backend = Pipermail('http://example.com/', self.tmp_path) messages = [m for m in backend.fetch(from_date=from_date)] # For this test, mboxes from March and April should be downloaded. expected_downloads = [] for root, _, files in os.walk(self.tmp_path): for filename in sorted(files): location = os.path.join(root, filename) expected_downloads.append(location) self.assertListEqual(expected_downloads, [os.path.join(self.tmp_path, '2016-April.txt'), os.path.join(self.tmp_path, '2016-March.txt')]) # Although there is a message in the mbox from March, this message # was sent previous to the given date, so it is not included # into the expected result expected = [('<*****@*****.**>', 'b5320132f853e08d587fc24e46827b0084e0c752', 1460624816.0), ('<[email protected]>', '7a30847c497645d773d7ceb73b414887153bdbd3', 1461428336.0), ('<*****@*****.**>', '8aa40b01acbdd987208fab4d724b9ddddf5e60fe', 1461621607.0)] self.assertEqual(len(messages), 3) for x in range(len(messages)): message = messages[x] self.assertEqual(message['data']['Message-ID'], expected[x][0]) self.assertEqual(message['origin'], 'http://example.com/') self.assertEqual(message['uuid'], expected[x][1]) self.assertEqual(message['updated_on'], expected[x][2]) self.assertEqual(message['category'], 'message') self.assertEqual(message['tag'], 'http://example.com/')
def test_fetch_apache(self): """Test whether it fetches and parses apache's messages""" pipermail_index = read_file( 'data/pipermail/pipermail_apache_index.html') mbox_nov = read_file('data/pipermail/pipermail_2015_november.mbox') mbox_march = read_file('data/pipermail/pipermail_2016_march.mbox') mbox_april = read_file('data/pipermail/pipermail_2016_april.mbox') httpretty.register_uri(httpretty.GET, PIPERMAIL_URL, body=pipermail_index) httpretty.register_uri(httpretty.GET, PIPERMAIL_URL + '201511.mbox', body=mbox_nov) httpretty.register_uri(httpretty.GET, PIPERMAIL_URL + '201603.mbox', body=mbox_march) httpretty.register_uri(httpretty.GET, PIPERMAIL_URL + '201604.mbox', body=mbox_april) backend = Pipermail('http://example.com/', self.tmp_path) messages = [m for m in backend.fetch()] expected = [ ('<CACRHdMaObu7Dc0FWTWEesvRCzUNDG=7oA7KFqAgtOs_UKjb3Og@mail.gmail.com>', '9221eb7884be6f6b91fccd5d64107ce6c7f15e4d', 1447532968.0), ('<*****@*****.**>', 'd1b79ef1562b7caf4e4a99e3b7c391e5f733c0ff', 1447627429.0), ('<[email protected]>', '48d348ef11e8ad3f7688b645dc71d93ecde9ae57', 1448107551.0), ('<*****@*****.**>', '8c057f129fe161452ed2192ef5dce9bcfa10928a', 1448742330.0), ('<*****@*****.**>', '61d76ca22803b22937aa98f0b7d551ba6bfc7fb1', 1457025635.0), ('<*****@*****.**>', 'b5320132f853e08d587fc24e46827b0084e0c752', 1460624816.0), ('<[email protected]>', '7a30847c497645d773d7ceb73b414887153bdbd3', 1461428336.0), ('<*****@*****.**>', '8aa40b01acbdd987208fab4d724b9ddddf5e60fe', 1461621607.0) ] self.assertEqual(len(messages), 8) for x in range(len(messages)): message = messages[x] self.assertEqual(message['data']['Message-ID'], expected[x][0]) self.assertEqual(message['origin'], 'http://example.com/') self.assertEqual(message['uuid'], expected[x][1]) self.assertEqual(message['updated_on'], expected[x][2]) self.assertEqual(message['category'], 'message') self.assertEqual(message['tag'], 'http://example.com/')
def test_fetch_empty(self): """Test whether it works when pipermail does not store any mbox""" pipermail_index = read_file('data/pipermail/pipermail_index_empty.html') httpretty.register_uri(httpretty.GET, PIPERMAIL_URL, body=pipermail_index) backend = Pipermail('http://example.com/', self.tmp_path) messages = [m for m in backend.fetch()] self.assertListEqual(messages, [])