def test_threading(): cache_dir = check_cache() fe = EmailParser(cache_dir=cache_dir) uuid = fe.transform(data_dir, file_pattern='.*\d') filenames, res = fe.load(uuid) cat = _EmailThreadingWrapper(cache_dir=cache_dir, parent_id=uuid) tree = cat.thread() cat.get_params() tree_ref = [{ 'id': 0, 'parent': None, 'children': [{ 'id': 1, 'children': [], 'parent': 0 }, { 'id': 2, 'parent': 0, 'children': [{ 'id': 3, 'children': [], 'parent': 2 }, { 'id': 4, 'children': [], 'parent': 2 }], }] }] assert [el.to_dict() for el in tree] == tree_ref assert len(filenames) == sum([el.size for el in tree]) assert len(filenames) == 5
def test_search_filenames(): cache_dir = check_cache() fe = EmailParser(cache_dir=cache_dir) fe.transform(data_dir, file_pattern='.*\d') filenames = fe._pars['filenames'] for low, high, step in [(0, 1, 1), (0, 4, 1), (3, 1, -1)]: idx_slice = list(range(low, high, step)) filenames_slice = [filenames[idx] for idx in idx_slice] idx0 = fe.search(filenames_slice) assert_equal(idx0, idx_slice) assert_equal(filenames_slice, fe[idx0]) with pytest.raises(KeyError): fe.search(['DOES_NOT_EXIST.txt']) fe.list_datasets()
def test_email_parser(): cache_dir = check_cache() fe = EmailParser(cache_dir=cache_dir) uuid = fe.transform(data_dir, file_pattern='.*\d') filenames, res = fe.load(uuid) assert_equal(filenames, fe._pars['filenames']) assert len(filenames) == len(res) assert len(filenames) == 5 for message in res: assert isinstance(message, Message) fe.delete()