def test_init(self): """Check attributes initialization""" pmls = PipermailList(PIPERMAIL_URL, self.tmp_path) self.assertIsInstance(pmls, MailingList) self.assertEqual(pmls.uri, PIPERMAIL_URL) self.assertEqual(pmls.dirpath, self.tmp_path) self.assertEqual(pmls.url, PIPERMAIL_URL) self.assertTrue(pmls.verify) pmls = PipermailList(PIPERMAIL_URL, self.tmp_path, verify=False) self.assertIsInstance(pmls, MailingList) self.assertEqual(pmls.uri, PIPERMAIL_URL) self.assertEqual(pmls.dirpath, self.tmp_path) self.assertEqual(pmls.url, PIPERMAIL_URL) self.assertFalse(pmls.verify)
def test_fetch_http_errors(self): """Test whether an exception is thrown when the HTTP error is not 403""" pipermail_index = read_file('data/pipermail/pipermail_index.html') mbox_april = read_file('data/pipermail/pipermail_2016_april.mbox') httpretty.register_uri(httpretty.GET, PIPERMAIL_URL, body=pipermail_index) httpretty.register_uri(httpretty.GET, PIPERMAIL_URL + '2016-April.txt', body=mbox_april, status=404) pmls = PipermailList('http://example.com/', self.tmp_path) with self.assertRaises(requests.exceptions.HTTPError): links = pmls.fetch()
def test_fetch(self): """Test whether archives are fetched""" pipermail_index = read_file('data/pipermail/pipermail_index.html') mbox_nov = read_file('data/pipermail/pipermail_2015_november.mbox') mbox_march = read_file('data/pipermail/pipermail_2016_march.mbox') mbox_april = read_file('data/pipermail/pipermail_2016_april.mbox') httpretty.register_uri(httpretty.GET, PIPERMAIL_URL, body=pipermail_index) httpretty.register_uri(httpretty.GET, PIPERMAIL_URL + '2015-November.txt.gz', body=mbox_nov) httpretty.register_uri(httpretty.GET, PIPERMAIL_URL + '2016-March.txt', body=mbox_march) httpretty.register_uri(httpretty.GET, PIPERMAIL_URL + '2016-April.txt', body=mbox_april) pmls = PipermailList('http://example.com/', self.tmp_path) links = pmls.fetch() self.assertEqual(len(links), 3) self.assertEqual(links[0][0], PIPERMAIL_URL + '2016-April.txt') self.assertEqual(links[0][1], os.path.join(self.tmp_path, '2016-April.txt')) self.assertEqual(links[1][0], PIPERMAIL_URL + '2016-March.txt') self.assertEqual(links[1][1], os.path.join(self.tmp_path, '2016-March.txt')) self.assertEqual(links[2][0], PIPERMAIL_URL + '2015-November.txt.gz') self.assertEqual(links[2][1], os.path.join(self.tmp_path, '2015-November.txt.gz')) mboxes = pmls.mboxes self.assertEqual(mboxes[0].filepath, os.path.join(self.tmp_path, '2015-November.txt.gz')) self.assertEqual(mboxes[1].filepath, os.path.join(self.tmp_path, '2016-March.txt')) self.assertEqual(mboxes[2].filepath, os.path.join(self.tmp_path, '2016-April.txt'))
def test_mboxes(self): """Test whether it returns the mboxes ordered by the date on their filenames""" # Simulate the fetch process copying the files shutil.copy('data/pipermail_2015_november.mbox', os.path.join(self.tmp_path, '2015-November.txt.gz')) shutil.copy('data/pipermail_2016_march.mbox', os.path.join(self.tmp_path, '2016-March.txt')) shutil.copy('data/pipermail_2016_april.mbox', os.path.join(self.tmp_path, '2016-April.txt')) pmls = PipermailList('http://example.com/', self.tmp_path) mboxes = pmls.mboxes self.assertEqual(mboxes[0].filepath, os.path.join(self.tmp_path, '2015-November.txt.gz')) self.assertEqual(mboxes[1].filepath, os.path.join(self.tmp_path, '2016-March.txt')) self.assertEqual(mboxes[2].filepath, os.path.join(self.tmp_path, '2016-April.txt'))
commit: aaa7a9209f096aaaadccaaa7089aaaa3f758a703 Author: John Smith <*****@*****.**> AuthorDate: Tue Aug 14 14:30:13 2012 -0300 Commit: John Smith <*****@*****.**> CommitDate: Tue Aug 14 14:30:13 2012 -0300 ''' for commit in repo.fetch(): #print("ugh") print(commit['data']['Author']) print("Starting 2") # Url for the mailing list to analyze mail_repo_url = 'https://mail-archives.apache.org/mod_mbox/httpd-dev/' # Directory for letting Perceval clone the mailing list mail_repo_dir = '/tmp/perceval/' repo = PipermailList(url=mail_repo_url, dirpath=mail_repo_dir) #Does not seem to affect what repositories are printed k = str_to_datetime("1996-04") k = datetime_to_utc(k) print(k) for message in repo.fetch(from_date=k): print(message[0]) print("Done") ''' p2o.py --enrich --index git_raw --index-enrich git \ -e http://localhost:9200 --no_inc --debug \ git https://github.com/mozilla/addons-server.git '''