def test_mailman_chain(): url = "http://mail.scipy.org/pipermail/ipython-dev/" arx = archive.Archive(url) arx.save("test.csv") arx2 = archive.load("test.csv") arx3 = archive.Archive("ipython-dev") assert arx.data.shape == arx.data.shape os.remove("test.csv")
def test_mailman_chain(): name = "bigbang-dev-test.txt" #archive loaded from mbox arx = archive.Archive(name, archive_dir="tests/data", mbox=True) arx.save("test.csv") #archive loaded from stored csv arx2 = archive.load("test.csv") print arx.data.dtypes print arx.data.shape assert arx.data.shape == arx2.data.shape, \ "Original and restored archives are different shapes" assert (arx2.data.index == arx.data.index).all(), \ "Original and restored archives have nonidentical indices" assert [t.get_num_messages() for t in arx.get_threads()] == [3,1,2], \ "Thread message count in mbox archive is off" assert [t.get_num_messages() for t in arx2.get_threads()] == [3,1,2], \ "Thread message count in restored archive is off" # smoke test entity resolution arx2.resolve_entities() os.remove("test.csv")
def test_empty_list_compute_activity_issue_246(): test_df_csv_path = os.path.join(CONFIG.test_data_path, 'empty-archive-df.csv') df = pd.read_csv(test_df_csv_path) with assert_raises(mailman.MissingDataException): empty_archive = archive.Archive(df) activity = empty_archive.get_activity()
def test__empty_list_compute_activity_issue_246(self): test_df_csv_path = os.path.join( CONFIG.test_data_path, "empty-archive-df.csv" ) df = pd.read_csv(test_df_csv_path) with self.assertRaises(archive.MissingDataException): empty_archive = archive.Archive(df) empty_archive.get_activity()
def test_email_entity_resolution(self): name = "2001-November.txt" arx = archive.Archive(name, archive_dir="tests/data", mbox=True) e = process.resolve_sender_entities(arx.get_activity(resolved=False)) utils.repartition_dataframe(arx.get_activity(), e) self.assertTrue(True, msg="email entity resolution crashed")
def test_clean_message(): name = "2001-November.txt" arx = archive.Archive(name, archive_dir="tests/data", mbox=True) body = arx.data['Body']['<*****@*****.**>'] assert "But seemingly it is even stranger than this." in body, \ "Selected wrong message" assert "Is it a problem of lapack3.0 of of" in body, \ "Quoted text is not in uncleaned message" assert "Is it a problem of lapack3.0 of of" not in utils.clean_message(body), \ "Quoted text is in cleaned message"
def test_mailman_chain(): url = "http://mail.scipy.org/pipermail/ipython-dev/" mailman.collect_from_url(url) mailman.unzip_archive(url) arx = archive.Archive(url) arx.save("test.csv") arx2 = archive.load("test.csv") assert arx.data.shape == arx.data.shape os.remove("test.csv")
def test__open_list_archives(self): data = archive.open_list_archives( archive_name="3GPP_TSG_SA_WG4_EVS.mbox", archive_dir=CONFIG.test_data_path + "3GPP_mbox/", mbox=True, ) assert len(data.columns.values) == 6 assert len(data.index.values) == 50 data = archive.open_list_archives( archive_name="3GPP_mbox", archive_dir=CONFIG.test_data_path, mbox=False, ) assert len(data.columns.values) == 6 assert len(data.index.values) == 108 ## Testing add_affilation rel_email_affil = pd.DataFrame.from_records( [ { "email": "*****@*****.**", "affiliation": "TestOrg", "min_date": dateutil.parser.parse("2019-04-18"), "max_date": dateutil.parser.parse("2021-04-18"), } ] ) arx = archive.Archive(data) arx.add_affiliation(rel_email_affil) assert ( arx.data.loc["[email protected]"][ "affiliation" ] == "TestOrg" ) assert ( arx.data.loc["*****@*****.**"]["affiliation"] is None )
def test_mailman_chain(self): name = "bigbang-dev-test.txt" # archive loaded from mbox arx = archive.Archive(name, archive_dir=CONFIG.test_data_path, mbox=True) arx.save("test.csv") # archive loaded from stored csv arx2 = archive.load("test.csv") print(arx.data.dtypes) print(arx.data.shape) self.assertTrue( arx.data.shape == arx2.data.shape, msg="Original and restored archives are different shapes", ) self.assertTrue( (arx2.data.index == arx.data.index).all(), msg="Original and restored archives have nonidentical indices", ) self.assertTrue( [t.get_num_messages() for t in arx.get_threads()] == [3, 1, 2], msg="Thread message count in mbox archive is off", ) self.assertTrue( [t.get_num_messages() for t in arx2.get_threads()] == [3, 1, 2], msg="Thread message count in restored archive is off", ) # smoke test entity resolution arx2.resolve_entities() os.remove("test.csv")
def test_clean_message(self): name = "2001-November.txt" arx = archive.Archive(name, archive_dir=CONFIG.test_data_path, mbox=True) body = arx.data["Body"]["<*****@*****.**>"] self.assertTrue( "But seemingly it is even stranger than this." in body, msg="Selected wrong message", ) self.assertTrue( "Is it a problem of lapack3.0 of of" in body, msg="Quoted text is not in uncleaned message", ) self.assertTrue( "Is it a problem of lapack3.0 of of" not in utils.clean_message(body), msg="Quoted text is in cleaned message", )