Beispiel #1
0
def test_mailman_chain():
    url = "http://mail.scipy.org/pipermail/ipython-dev/"

    arx = archive.Archive(url)

    arx.save("test.csv")

    arx2 = archive.load("test.csv")

    arx3 = archive.Archive("ipython-dev")

    assert arx.data.shape == arx.data.shape

    os.remove("test.csv")
Beispiel #2
0
def test_mailman_chain():
    name = "bigbang-dev-test.txt"

    #archive loaded from mbox
    arx = archive.Archive(name, archive_dir="tests/data", mbox=True)

    arx.save("test.csv")

    #archive loaded from stored csv
    arx2 = archive.load("test.csv")

    print arx.data.dtypes
    print arx.data.shape

    assert arx.data.shape == arx2.data.shape, \
        "Original and restored archives are different shapes"

    assert (arx2.data.index == arx.data.index).all(), \
        "Original and restored archives have nonidentical indices"

    assert [t.get_num_messages() for t in arx.get_threads()] == [3,1,2], \
        "Thread message count in mbox archive is off"
    assert [t.get_num_messages() for t in arx2.get_threads()] == [3,1,2], \
        "Thread message count in restored archive is off"

    # smoke test entity resolution
    arx2.resolve_entities()

    os.remove("test.csv")
Beispiel #3
0
def test_empty_list_compute_activity_issue_246():
    test_df_csv_path = os.path.join(CONFIG.test_data_path,
                                    'empty-archive-df.csv')
    df = pd.read_csv(test_df_csv_path)

    with assert_raises(mailman.MissingDataException):
        empty_archive = archive.Archive(df)
        activity = empty_archive.get_activity()
Beispiel #4
0
    def test__empty_list_compute_activity_issue_246(self):
        test_df_csv_path = os.path.join(
            CONFIG.test_data_path, "empty-archive-df.csv"
        )
        df = pd.read_csv(test_df_csv_path)

        with self.assertRaises(archive.MissingDataException):
            empty_archive = archive.Archive(df)
            empty_archive.get_activity()
Beispiel #5
0
    def test_email_entity_resolution(self):
        name = "2001-November.txt"

        arx = archive.Archive(name, archive_dir="tests/data", mbox=True)

        e = process.resolve_sender_entities(arx.get_activity(resolved=False))

        utils.repartition_dataframe(arx.get_activity(), e)

        self.assertTrue(True, msg="email entity resolution crashed")
Beispiel #6
0
def test_clean_message():
    name = "2001-November.txt"

    arx = archive.Archive(name, archive_dir="tests/data", mbox=True)

    body = arx.data['Body']['<*****@*****.**>']

    assert "But seemingly it is even stranger than this." in body, \
        "Selected wrong message"

    assert "Is it a problem of lapack3.0 of of" in body, \
        "Quoted text is not in uncleaned message"

    assert "Is it a problem of lapack3.0 of of" not in utils.clean_message(body), \
        "Quoted text is in cleaned message"
Beispiel #7
0
def test_mailman_chain():
    url = "http://mail.scipy.org/pipermail/ipython-dev/"

    mailman.collect_from_url(url)
    mailman.unzip_archive(url)

    arx = archive.Archive(url)

    arx.save("test.csv")

    arx2 = archive.load("test.csv")

    assert arx.data.shape == arx.data.shape

    os.remove("test.csv")
Beispiel #8
0
    def test__open_list_archives(self):
        data = archive.open_list_archives(
            archive_name="3GPP_TSG_SA_WG4_EVS.mbox",
            archive_dir=CONFIG.test_data_path + "3GPP_mbox/",
            mbox=True,
        )
        assert len(data.columns.values) == 6
        assert len(data.index.values) == 50
        data = archive.open_list_archives(
            archive_name="3GPP_mbox",
            archive_dir=CONFIG.test_data_path,
            mbox=False,
        )
        assert len(data.columns.values) == 6
        assert len(data.index.values) == 108

        ## Testing add_affilation

        rel_email_affil = pd.DataFrame.from_records(
            [
                {
                    "email": "*****@*****.**",
                    "affiliation": "TestOrg",
                    "min_date": dateutil.parser.parse("2019-04-18"),
                    "max_date": dateutil.parser.parse("2021-04-18"),
                }
            ]
        )

        arx = archive.Archive(data)

        arx.add_affiliation(rel_email_affil)

        assert (
            arx.data.loc["[email protected]"][
                "affiliation"
            ]
            == "TestOrg"
        )
        assert (
            arx.data.loc["*****@*****.**"]["affiliation"]
            is None
        )
Beispiel #9
0
    def test_mailman_chain(self):
        name = "bigbang-dev-test.txt"

        # archive loaded from mbox
        arx = archive.Archive(name,
                              archive_dir=CONFIG.test_data_path,
                              mbox=True)

        arx.save("test.csv")

        # archive loaded from stored csv
        arx2 = archive.load("test.csv")

        print(arx.data.dtypes)
        print(arx.data.shape)

        self.assertTrue(
            arx.data.shape == arx2.data.shape,
            msg="Original and restored archives are different shapes",
        )

        self.assertTrue(
            (arx2.data.index == arx.data.index).all(),
            msg="Original and restored archives have nonidentical indices",
        )

        self.assertTrue(
            [t.get_num_messages() for t in arx.get_threads()] == [3, 1, 2],
            msg="Thread message count in mbox archive is off",
        )
        self.assertTrue(
            [t.get_num_messages() for t in arx2.get_threads()] == [3, 1, 2],
            msg="Thread message count in restored archive is off",
        )

        # smoke test entity resolution
        arx2.resolve_entities()

        os.remove("test.csv")
Beispiel #10
0
    def test_clean_message(self):
        name = "2001-November.txt"

        arx = archive.Archive(name,
                              archive_dir=CONFIG.test_data_path,
                              mbox=True)

        body = arx.data["Body"]["<*****@*****.**>"]

        self.assertTrue(
            "But seemingly it is even stranger than this." in body,
            msg="Selected wrong message",
        )

        self.assertTrue(
            "Is it a problem of lapack3.0 of of" in body,
            msg="Quoted text is not in uncleaned message",
        )

        self.assertTrue(
            "Is it a problem of lapack3.0 of of"
            not in utils.clean_message(body),
            msg="Quoted text is in cleaned message",
        )