コード例 #1
0
    def get_main_content(self, msg: email.message.EmailMessage):
        '''
        メール本文、フォーマット、キャラクターセットを取得する。
        '''

        try:
            body_part = msg.get_body()
            main_content = body_part.get_content()
            format_ = body_part.get_content_type()
            charset = body_part.get_content_charset()

        except Exception as error:
            print(error)
            main_content = '解析失敗'
            format_ = '不明'
            charset = '不明'
            # get_bodyでエラーになるのは文字コード設定がおかしいメールを受信した場合なので、
            # decodeせずにテキスト部分をそのまま返す。
            for part in msg.walk():
                if part.get_content_type() == 'text/plain':
                    format_ = part.get_content_type()
                    main_content = str(part.get_payload())
                    charset = part.get_content_charset()

        return main_content, format_, charset
コード例 #2
0
def filter_email(eml: email.message.EmailMessage) -> str: 
    # Get plaintext part
    body = eml.get_body("plain")
    if body == None:
        raise EmailParseException()

    # Find a link in the body
    link_re = r"(?i)\b((?:https?://|www\d{0,3}[.]|[a-z0-9.\-]+[.][a-z]{2,4}/)(?:[^\s()<>]+|\(([^\s()<>]+|(\([^\s()<>]+\)))*\))+(?:\(([^\s()<>]+|(\([^\s()<>]+\)))*\)|[^\s`!()\[\]{};:'\".,<>?«»“”‘’]))" 

    urls = re.findall(link_re, body.get_content())
    if len(urls) == 0:
        raise LinkNotFoundException()

    url: str = urls[0][0]

    # Some sites require this header
    headers = {
        'Accept' : 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
        'User-Agent': 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.0.7) Gecko/2009021910 Firefox/3.0.7'
    }

    # Download the contents at that URL with urllib
    request = urllib.request.Request(url, headers=headers)
    with urllib.request.urlopen(request) as response:
        webpage = response.read()

        # Parse with BeautifulSoup
        bs = BeautifulSoup(webpage, features='lxml')
        content = bs.prettify()

        return content

    return url
コード例 #3
0
def process_message(uid, msg: email.message.EmailMessage):

    try:
        raw_date = msg.get('Date')
        local_date = None

        # Now convert to local date-time
        date_tuple = email.utils.parsedate_tz(raw_date)
        if date_tuple:
            local_date = datetime.datetime.fromtimestamp(
                email.utils.mktime_tz(date_tuple))
    except:
        raw_date = None
        local_date = None

    links = []
    try:
        body = msg.get_body(('html', 'plain'))
        if body:
            if body.get_content_type() == 'text/plain':
                links = links_from_plaintext(body.get_content())
            elif body.get_content_type() == 'text/html':
                links = links_from_html(body.get_content())
    except:
        pass

    try:
        msg_to = msg.get('To')
    except:
        msg_to = None

    try:
        msg_from = msg.get('From')
    except:
        msg_from = None

    try:
        msg_sub = msg.get('Subject')
    except:
        msg_sub = None

    try:
        msg_id = msg.get('Message-ID')
    except:
        msg_id = None

    info = {
        'uid': uid,
        'to': msg_to,
        'from': msg_from,
        'subject': msg_sub,
        'raw_date': raw_date,
        'local_date': local_date,
        'message-id': msg_id,
        'links': links,
        'num-links': len(links)
    }

    return info