Ejemplo n.º 1
0
def unpack_message(data):
    msg = pyzmail.PyzMessage(data)

    # Get the name and email the message is coming from
    name, email = msg.get_address('from')
    email = email.lower()

    # Parse the date
    date = msg.get_decoded_header("Date")
    subj = msg.get_subject()
    if not date or not subj:
        return None

    date = parsedate(date)
    date = datetime(*date[:6])
    date = timezone.make_aware(date, timezone=utc)

    b = Bunch(name=name, email=email, date=date)
    b.id = msg.get_decoded_header("Message-ID")
    b.reply_to = msg.get_decoded_header('In-Reply-To')
    b.subj = subj
    for patt in REPLACE_PATT:
        b.subj = b.subj.replace(patt, "")

    # Get the body of the message
    if not msg.text_part:
        return None

    body = msg.text_part.get_payload()
    charset = detect(body)['encoding'] or 'utf-8'

    try:
        body = body.decode(charset, "replace")
        body = fix_accents(body)
    except Exception as exc:
        logger.error("error decoding message %s" % b.id)
        raise exc
    # Checks for remote body for bioconductor import
    body = bioc_remote_body(body)

    # Reformat the body
    body = format_text(body)

    try:
        b.body = to_unicode_or_bust(body)
    except UnicodeDecodeError as exc:
        # Ignore this post
        return None

    return b
Ejemplo n.º 2
0
def unpack_message(data):
    msg = pyzmail.PyzMessage(data)

    # Get the name and email the message is coming from
    name, email = msg.get_address('from')
    email = email.lower()

    # Parse the date
    date = msg.get_decoded_header("Date")
    subj = msg.get_subject()
    if not date or not subj:
        return None

    date = parsedate(date)
    date = datetime(*date[:6])
    date = timezone.make_aware(date, timezone=utc)

    b = Bunch(name=name, email=email, date=date)
    b.id = msg.get_decoded_header("Message-ID")
    b.reply_to = msg.get_decoded_header('In-Reply-To')
    b.subj = subj
    for patt in REPLACE_PATT:
        b.subj = b.subj.replace(patt, "")

    # Get the body of the message
    if not msg.text_part:
        return None

    body = msg.text_part.get_payload()
    charset = detect(body)['encoding'] or 'utf-8'

    try:
        body = body.decode(charset, "replace")
        body = fix_accents(body)
    except Exception, exc:
        logger.error("error decoding message %s" % b.id )
        raise exc
Ejemplo n.º 3
0
def msg_to_dict(msg):
    """
    Convert a PyZmail message to a dictionary

    @type msg: PyzMessage
    @param msg: email to convert
    @returns: {'Header': 'content'}
    """
    # FIXME: any repeated header will be ignored
    # Usually it is only 'Received' header
    d = {}

    if msg.text_part:
        body = msg.text_part.get_payload()
        charset = msg.text_part.charset
    else:
        body = msg.get_payload()
        charset = msg.get_charset()
    if charset:
        charset = charset.lower()
        i = charset.find('iso')
        u = charset.find('utf')
        if i > 0:
            charset = charset[i:]
        elif u > 0:
            charset = charset[u:]
        # Some old emails say it's ascii or unkown but in reality is not
        # not use any charset not iso or utf
        elif i != 0 and u != 0:
            charset = None

    for header in msg.keys():
        value = msg.get_decoded_header(header)
        value, _ = pyzmail.decode_text(value, charset, None)
        value = value.encode('UTF-8')
        header = header.replace('.', ',')    # mongoDB don't likes '.' on keys
        d[header] = value

    attach = []
    if type(body) == str:
        body, _ = pyzmail.decode_text(body, charset, None)
        body = body.encode('UTF-8')
    # On attachments of emails sometimes it end up with a list of email.message
    elif type(body) == list:
        for part in body:
            zmail = pyzmail.PyzMessage(part)
            a = msg_to_dict(zmail)
            attach.append(a)
        body = attach[0]['Body']
    d['Body'] = body

    if len(msg.mailparts) > 1:
        for mailpart in msg.mailparts:
            zmail = pyzmail.PyzMessage(mailpart.part)
            a = msg_to_dict(zmail)
            attach.append(a)

    if attach:
        d['Attachments'] = attach

    return d