def test__convert_datastruct_text(): smpldatastruct = utils.load_json("sample_parsed_email.json", mode="r") tounicode = convert_datastruct_text(smpldatastruct, convertfunc=unicode) tostr = convert_datastruct_text(smpldatastruct, convertfunc=str) nochange = convert_datastruct_text(smpldatastruct, convertfunc=lambda a: a) assert smpldatastruct == tounicode assert smpldatastruct == tostr assert smpldatastruct == nochange
def test__parse_multi_layer_file(): cached = utils.load_json(join(TESTDIR, 'sample_parsed_email.json'), mode='r') prsd = parse.parse_multi_layer_file('sample_enron_email.') new = json.loads(json.dumps(prsd)) assert(new[0]['content']['body'] == cached[0]['content']['body'])
from os.path import dirname from magic import from_buffer from types import NoneType from estratto.fixEncoding import(auto_unicode_dang_it, sane_unicode) from estratto.parseBinary import parse_binary # from estratto.fixEncoding import make_unicode_dang_it from estratto.parseEmail import(is_an_email, email_whole_parse) from estratto import utils CONFFILE = dirname(utils.__file__) + '/defconf.json' OKEXT = set(utils.load_json(CONFFILE, mode='r')['ok_ext_set']) def get_file_info_from_buffer(txt): # Consider putting in utils info = sane_unicode(from_buffer(txt)) mime = sane_unicode(from_buffer(txt, True)) ftype = sane_unicode(mime.split(u'/')[-1]) return info, mime, ftype # Consider using a namedtuple. def fit_into_data_mold(parseddict, txt, uri, ftype, mime, info): return {u'content': parseddict, u'rawbody': txt, u'filename': uri, u'type': ftype, u'mime': mime,
def test__load_json(): test = utils.load_json(TEST_DIR + "sample_parsed_email.json", mode="r") assert isinstance(test, list)
if sys.version_info[0] < 3: from email.Header import decode_header else: from email.header import decode_header from email.utils import parseaddr from base64 import b64decode from re import search, IGNORECASE, match from estratto.fixEncoding import auto_unicode_dang_it, sane_unicode, open_to_unicode from estratto import utils from estratto.utils import normize_dtime_tmzn_nrth_am # from estratto.utils import sopen CONFDICT = utils.load_json(dirname(utils.__file__) + "/defconf.json", mode="r") EMAILEXTS = set(CONFDICT["email_ext_set"]) EXTRA_HEADERS = CONFDICT["email_extra_headers"] EXTRA_ADDRESS_HEADERS = CONFDICT["email_extra_address_headers"] if sys.version_info[0] < 3: _STRINGTYPES = (basestring,) else: unicode = str # adjusting to python3 _STRINGTYPES = (str, bytes) # ---------------------------------------------------------------------------- # Basic email parsing def atch_fname_from_dispositions(dispositions):