Ejemplo n.º 1
0
def _split_into_parts(raw):
    # TODO signal that we can delete the original message!-----
    # when all the processing is done.
    # TODO add the linked-from info !
    # TODO add reference to the original message?
    # TODO populate Default FLAGS/TAGS (unseen?)
    # TODO seed propely the content_docs with defaults??

    msg, chash, multi = _parse_msg(raw)
    size = len(msg.as_string())

    parts_map = walk.get_tree(msg)
    cdocs_list = list(walk.get_raw_docs(msg))
    cdocs_phashes = [c['phash'] for c in cdocs_list]
    body_phash = walk.get_body_phash(msg)

    mdoc = _build_meta_doc(chash, cdocs_phashes)
    fdoc = _build_flags_doc(chash, size, multi)
    hdoc = _build_headers_doc(msg, chash, body_phash, parts_map)

    # The MessageWrapper expects a dict, one-indexed
    cdocs = dict(enumerate(cdocs_list, 1))

    return mdoc, fdoc, hdoc, cdocs
Ejemplo n.º 2
0
FILENAME = "rfc822.plain.message"
FILENAME = "rfc822.multi-minimal.message"
"""

msg = p.parse(open(FILENAME))
DO_CHECK = False
#################################################

parts = W.get_parts(msg)

if DEBUG:
    def trim(item):
        item = item[:10]
    [trim(part["phash"]) for part in parts if part.get('phash', None)]

raw_docs = list(W.get_raw_docs(msg, parts))

body_phash_fun = [W.get_body_phash_simple,
                  W.get_body_phash_multi][int(msg.is_multipart())]
body_phash = body_phash_fun(W.get_payloads(msg))
parts_map = W.walk_msg_tree(parts, body_phash=body_phash)


# TODO add missing headers!
expected = {
    'body': '1ddfa80485',
    'multi': True,
    'part_map': {
        1: {
            'headers': {'Content-Disposition': 'inline',
                        'Content-Type': 'multipart/mixed; '