Beispiel #1
0
def test_threading():
    cache_dir = check_cache()

    fe = FeatureVectorizer(cache_dir=cache_dir, mode='w')
    uuid = fe.setup()
    fe.ingest(data_dir=data_dir)
    fe.parse_email_headers()

    cat = _EmailThreadingWrapper(cache_dir=cache_dir, parent_id=uuid)

    tree = cat.thread()
    cat.get_params()

    tree_ref = [{
        'id':
        0,
        'parent':
        None,
        'children': [{
            'id': 1,
            'children': [],
            'parent': 0
        }, {
            'id':
            2,
            'parent':
            0,
            'children': [{
                'id': 3,
                'children': [],
                'parent': 2
            }, {
                'id': 4,
                'children': [],
                'parent': 2
            }]
        }]
    }]

    assert [el.to_dict() for el in tree] == tree_ref

    assert len(fe.filenames_) == sum([el.tree_size for el in tree])
    assert len(fe.filenames_) == 5
    assert len(tree[0].flatten()) == 5
Beispiel #2
0
def test_email_parsing():
    data_dir = os.path.join(basename, "..", "..", "data",
                            "fedora-devel-list-2008-October")
    cache_dir = check_cache()

    fe = FeatureVectorizer(cache_dir=cache_dir, mode='w')
    uuid = fe.setup()
    fe.ingest(data_dir)

    email_md = fe.parse_email_headers()
    assert len(fe.filenames_) == len(email_md)

    fe.delete()