def test__convert_datastruct_text():
    smpldatastruct = utils.load_json("sample_parsed_email.json", mode="r")
    tounicode = convert_datastruct_text(smpldatastruct, convertfunc=unicode)
    tostr = convert_datastruct_text(smpldatastruct, convertfunc=str)
    nochange = convert_datastruct_text(smpldatastruct, convertfunc=lambda a: a)
    assert smpldatastruct == tounicode
    assert smpldatastruct == tostr
    assert smpldatastruct == nochange
Example #2
0
def test__parse_multi_layer_file():
    cached = utils.load_json(join(TESTDIR, 'sample_parsed_email.json'),
                             mode='r')
    prsd = parse.parse_multi_layer_file('sample_enron_email.')
    new = json.loads(json.dumps(prsd))
    assert(new[0]['content']['body'] == cached[0]['content']['body'])
Example #3
0

from os.path import dirname

from magic import from_buffer
from types import NoneType

from estratto.fixEncoding import(auto_unicode_dang_it, sane_unicode)
from estratto.parseBinary import parse_binary
# from estratto.fixEncoding import make_unicode_dang_it
from estratto.parseEmail import(is_an_email,
                                email_whole_parse)
from estratto import utils

CONFFILE = dirname(utils.__file__) + '/defconf.json'
OKEXT = set(utils.load_json(CONFFILE, mode='r')['ok_ext_set'])


def get_file_info_from_buffer(txt):  # Consider putting in utils
    info = sane_unicode(from_buffer(txt))
    mime = sane_unicode(from_buffer(txt, True))
    ftype = sane_unicode(mime.split(u'/')[-1])
    return info, mime, ftype  # Consider using a namedtuple.


def fit_into_data_mold(parseddict, txt, uri, ftype, mime, info):
    return {u'content': parseddict,
            u'rawbody': txt,
            u'filename': uri,
            u'type': ftype,
            u'mime': mime,
Example #4
0
def test__load_json():
    test = utils.load_json(TEST_DIR + "sample_parsed_email.json", mode="r")
    assert isinstance(test, list)
Example #5
0
if sys.version_info[0] < 3:
    from email.Header import decode_header
else:
    from email.header import decode_header
from email.utils import parseaddr
from base64 import b64decode
from re import search, IGNORECASE, match


from estratto.fixEncoding import auto_unicode_dang_it, sane_unicode, open_to_unicode
from estratto import utils
from estratto.utils import normize_dtime_tmzn_nrth_am

# from estratto.utils import sopen

CONFDICT = utils.load_json(dirname(utils.__file__) + "/defconf.json", mode="r")
EMAILEXTS = set(CONFDICT["email_ext_set"])

EXTRA_HEADERS = CONFDICT["email_extra_headers"]
EXTRA_ADDRESS_HEADERS = CONFDICT["email_extra_address_headers"]

if sys.version_info[0] < 3:
    _STRINGTYPES = (basestring,)
else:
    unicode = str  # adjusting to python3
    _STRINGTYPES = (str, bytes)


# ----------------------------------------------------------------------------
# Basic email parsing
def atch_fname_from_dispositions(dispositions):