def test__parse_multi_layer_file__is_output_all_unicode(): doclist = parse.parse_multi_layer_file(join(TESTDIR, 'sample_enron_email.')) for docdict in doclist: docdict['content']['attachment']['filename'] = u'' docdict['rawbody'] = u'' for item in flatten_handle_all(docdict, dictvalues=True): if isinstance(item, _STRINGTYPES): assert(isinstance(item, unicode))
def test__parse_large_email_with_large_mix_of_files(): doclist = parse.parse_multi_layer_file(MINIEMLWithAttch) for prt in doclist: if utils.file_exts(prt['filename']).lower() in parse.OKEXT: assert(20 < len(prt['content']['body']))
def test__parse_multi_layer_file(): cached = utils.load_json(join(TESTDIR, 'sample_parsed_email.json'), mode='r') prsd = parse.parse_multi_layer_file('sample_enron_email.') new = json.loads(json.dumps(prsd)) assert(new[0]['content']['body'] == cached[0]['content']['body'])