Example #1
0
def test_build_extraction_dataset():
    if os.path.exists(os.path.join(TMP_DIR, 'extraction.data')):
        os.remove(os.path.join(TMP_DIR, 'extraction.data'))
    d.build_extraction_dataset(os.path.join(EMAILS_DIR, 'P'),
                               os.path.join(TMP_DIR, 'extraction.data'), 1)
    test_data = SparseDataSet(os.path.join(TMP_DIR, 'extraction.data'),
                              labelsColumn=-1)
    # the result is a loadable signature extraction dataset
    # 32 comes from 3 emails in emails/P folder, 11 lines checked to be
    # a signature, one email has only 10 lines
    eq_(test_data.size(), 32)
    eq_(len(features('')), test_data.numFeatures)
Example #2
0
def test_build_extraction_dataset():
    if os.path.exists(os.path.join(TMP_DIR, 'extraction.data')):
        os.remove(os.path.join(TMP_DIR, 'extraction.data'))
    d.build_extraction_dataset(os.path.join(EMAILS_DIR, 'P'),
                               os.path.join(TMP_DIR,
                                            'extraction.data'), 1)
    test_data = SparseDataSet(os.path.join(TMP_DIR, 'extraction.data'),
                              labelsColumn=-1)
    # the result is a loadable signature extraction dataset
    # 32 comes from 3 emails in emails/P folder, 11 lines checked to be
    # a signature, one email has only 10 lines
    eq_(test_data.size(), 32)
    eq_(len(features('')), test_data.numFeatures)
Example #3
0
def test_build_extraction_dataset():
    if os.path.exists(os.path.join(TMP_DIR, "extraction.data")):
        os.remove(os.path.join(TMP_DIR, "extraction.data"))
    d.build_extraction_dataset(os.path.join(EMAILS_DIR, "P"),
                               os.path.join(TMP_DIR, "extraction.data"), 1)

    filename = os.path.join(TMP_DIR, "extraction.data")
    file_data = genfromtxt(filename, delimiter=",")
    test_data = file_data[:, :-1]

    # the result is a loadable signature extraction dataset
    # 32 comes from 3 emails in emails/P folder, 11 lines checked to be
    # a signature, one email has only 10 lines
    eq_(test_data.shape[0], 32)
    eq_(len(features("")), test_data.shape[1])
Example #4
0
def test_build_extraction_dataset():
    if os.path.exists(os.path.join(TMP_DIR, 'extraction.data')):
        os.remove(os.path.join(TMP_DIR, 'extraction.data'))
    d.build_extraction_dataset(os.path.join(EMAILS_DIR, 'P'),
                               os.path.join(TMP_DIR,
                                            'extraction.data'), 1)

    filename = os.path.join(TMP_DIR, 'extraction.data')
    file_data = genfromtxt(filename, delimiter=",")
    test_data = file_data[:, :-1]

    # the result is a loadable signature extraction dataset
    # 32 comes from 3 emails in emails/P folder, 11 lines checked to be
    # a signature, one email has only 10 lines
    eq_(test_data.shape[0], 32)
    eq_(len(features('')), test_data.shape[1])