Ejemplo n.º 1
0
    def get_main_content(self, msg: email.message.EmailMessage):
        '''
        メール本文、フォーマット、キャラクターセットを取得する。
        '''

        try:
            body_part = msg.get_body()
            main_content = body_part.get_content()
            format_ = body_part.get_content_type()
            charset = body_part.get_content_charset()

        except Exception as error:
            print(error)
            main_content = '解析失敗'
            format_ = '不明'
            charset = '不明'
            # get_bodyでエラーになるのは文字コード設定がおかしいメールを受信した場合なので、
            # decodeせずにテキスト部分をそのまま返す。
            for part in msg.walk():
                if part.get_content_type() == 'text/plain':
                    format_ = part.get_content_type()
                    main_content = str(part.get_payload())
                    charset = part.get_content_charset()

        return main_content, format_, charset
Ejemplo n.º 2
0
    async def handle_message(self, message: email.message.EmailMessage):
        transactional_mail = False
        if message.get("Remove-List-Unsubscribe", None):
            del message["List-Unsubscribe"]
            del message["Remove-List-Unsubscribe"]
            transactional_mail = True

        del message['X-Peer']
        del message['X-MailFrom']
        del message['X-RcptTo']

        await aiosmtplib.send(message, hostname=SMTP_HOST, port=SMTP_PORT)

        if transactional_mail:
            log.info(
                f'Transactional Mail has been sent to {",".join(message.get_all("to", []))}'
            )
        else:
            log.info(
                f'Non-Transactional Mail has been sent to {",".join(message.get_all("to", []))}'
            )

        if LOG_CONTENT:
            for part in message.walk():
                # each part is a either non-multipart, or another multipart message
                # that contains further parts... Message is organized like a tree
                if part.get_content_type() == 'text/plain':
                    log.info(part.get_payload())
                if part.get_content_type() == 'text/html':
                    log.info(part.get_payload())
    def _convert_message_part_to_str(message_part: email.message.EmailMessage) -> Tuple[str, bool]:
        content: Union[str, bytes] = message_part.get_content()
        content_type = message_part.get_content_type()
        content_transfer_encoding = message_part['Content-Transfer-Encoding']
        logger_dict = {'ContentType': content_type, 'ContentTransferEncoding': content_transfer_encoding}

        if isinstance(content, str):
            logger.info('Successfully decoded message part with {ContentType} {ContentTransferEncoding} as string',
                        fparams=logger_dict)
            return content, False
        try:
            if content_type == 'application/xml':
                decoded_content = content.decode()
                logger.info('Successfully decoded message part with {ContentType} {ContentTransferEncoding} '
                            'as a string', fparams=logger_dict)
                return decoded_content, False
            decoded_content = base64.b64encode(content).decode()
            logger.info('Successfully encoded binary message part with {ContentType} {ContentTransferEncoding} as '
                        'a base64 string', fparams=logger_dict)
            return decoded_content, True
        except UnicodeDecodeError as e:
            logger.error('Failed to decode ebXML message part with {ContentType} {ContentTransferEncoding}.',
                         fparams=logger_dict)
            raise ebxml_envelope.EbXmlParsingError(f'Failed to decode ebXML message part with '
                                                   f'Content-Type: {content_type} and '
                                                   f'Content-Transfer-Encoding: {content_transfer_encoding}') from e
    def _extract_message_parts(msg: email.message.EmailMessage) -> Tuple[str, str, List[Dict[str, Union[str, bool]]]]:
        """Extract the ebXML and payload parts of the message and return them as a tuple.

        :param msg: The message to extract parts from.
        :return: A tuple containing the ebXML and payload (if present, otherwise None) parts of the message provided.
        """
        # EIS section 2.5.4 defines that the first MIME part must contain the ebML SOAP message and the message payload
        # (if present) must be the first additional attachment.

        if not msg.is_multipart():
            logger.error('Non-multipart message received')
            raise ebxml_envelope.EbXmlParsingError("Non-multipart message received")

        message_parts: Sequence[email.message.EmailMessage] = tuple(msg.iter_parts())

        EbxmlRequestEnvelope._report_any_defects_in_message_parts(message_parts)

        # ebXML part is the first part of the message
        ebxml_part = EbxmlRequestEnvelope._extract_ebxml_part(message_parts[0])

        payload_part = None
        attachments = []
        if len(message_parts) > 1:
            # HL7 payload part is the second part of the message
            payload_part = EbxmlRequestEnvelope._extract_hl7_payload_part(message_parts[1])

            # Any additional attachments are from the third part of the message onwards
            attachments.extend(EbxmlRequestEnvelope._extract_additional_attachments_parts(message_parts[2:]))

        return ebxml_part, payload_part, attachments
Ejemplo n.º 5
0
    def _get_payload(self, message: email.message.EmailMessage) -> str:
        """
        Get the body of the email.

        Note: MODIFIES message fetched to be seen.
        """
        if message.is_multipart():
            return message.get_payload(0).get_payload()
        else:
            return message.get_payload(0)
Ejemplo n.º 6
0
    def get_attachments(self, msg: email.message.EmailMessage, b_num: bytes):
        '''
        添付ファイルが存在する場合はファイルに出力し、ファイルパスとファイル名を返す。
        ファイルはメール番号のディレクトリに格納する。
        '''

        files = []
        for part in msg.iter_attachments():
            try:
                filename = part.get_filename()
                if not filename:
                    continue
                # メール番号でディレクトリを作成
                new_dir = os.path.join('./tmp/{}/'.format(
                    b_num.decode('utf-8')))
                os.makedirs(new_dir, exist_ok=True)
                # 添付ファイルを出力
                file_path = os.path.join(new_dir, filename)
                with open(file_path, 'wb') as fp:
                    fp.write(part.get_payload(decode=True))
                # ファイルパス(絶対パス)とファイル名を保存
                abs_path = os.path.abspath(file_path)
                files.append({'file_path': abs_path, 'file_name': filename})

            except Exception as error:
                print(error)

        return files
Ejemplo n.º 7
0
def filter_email(eml: email.message.EmailMessage) -> str: 
    # Get plaintext part
    body = eml.get_body("plain")
    if body == None:
        raise EmailParseException()

    # Find a link in the body
    link_re = r"(?i)\b((?:https?://|www\d{0,3}[.]|[a-z0-9.\-]+[.][a-z]{2,4}/)(?:[^\s()<>]+|\(([^\s()<>]+|(\([^\s()<>]+\)))*\))+(?:\(([^\s()<>]+|(\([^\s()<>]+\)))*\)|[^\s`!()\[\]{};:'\".,<>?«»“”‘’]))" 

    urls = re.findall(link_re, body.get_content())
    if len(urls) == 0:
        raise LinkNotFoundException()

    url: str = urls[0][0]

    # Some sites require this header
    headers = {
        'Accept' : 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
        'User-Agent': 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.0.7) Gecko/2009021910 Firefox/3.0.7'
    }

    # Download the contents at that URL with urllib
    request = urllib.request.Request(url, headers=headers)
    with urllib.request.urlopen(request) as response:
        webpage = response.read()

        # Parse with BeautifulSoup
        bs = BeautifulSoup(webpage, features='lxml')
        content = bs.prettify()

        return content

    return url
Ejemplo n.º 8
0
    def get_header_text(self, msg: email.message.EmailMessage):
        '''
        ヘッダー部をまとめて文字列として返す。
        '''

        text = ''
        for key, value in msg.items():
            text = text + '{}: {}\n'.format(key, value)
        return text
Ejemplo n.º 9
0
    def _get_body(msg: email.message.EmailMessage) -> Optional[str]:
        candidate = None

        for part in msg.walk():
            if part.get_content_type() == "text/plain" and not part.is_multipart() and part.get(
                    "Content-Disposition") is None:
                if not candidate:
                    candidate = part.get_payload(decode=True).decode("utf-8")
                else:
                    raise Exception("two possible candidates!")

        return candidate
Ejemplo n.º 10
0
    def _store_fault(item: Item, number: int,
                     mail: email.message.EmailMessage) -> str:
        filename = "{0:s}/{1:s}_{2:d}.eml".format(pmlib.config.target_path,
                                                  item.name, number)
        filename = os.path.abspath(os.path.normpath(filename))

        error_text = "Unable to decode mail {0:d} in {1:s}".format(
            number, item.name)

        f = open(filename, mode="w", encoding='utf-8')
        f.write(mail.as_string())
        f.close()
        return error_text
Ejemplo n.º 11
0
def process_message(uid, msg: email.message.EmailMessage):

    try:
        raw_date = msg.get('Date')
        local_date = None

        # Now convert to local date-time
        date_tuple = email.utils.parsedate_tz(raw_date)
        if date_tuple:
            local_date = datetime.datetime.fromtimestamp(
                email.utils.mktime_tz(date_tuple))
    except:
        raw_date = None
        local_date = None

    links = []
    try:
        body = msg.get_body(('html', 'plain'))
        if body:
            if body.get_content_type() == 'text/plain':
                links = links_from_plaintext(body.get_content())
            elif body.get_content_type() == 'text/html':
                links = links_from_html(body.get_content())
    except:
        pass

    try:
        msg_to = msg.get('To')
    except:
        msg_to = None

    try:
        msg_from = msg.get('From')
    except:
        msg_from = None

    try:
        msg_sub = msg.get('Subject')
    except:
        msg_sub = None

    try:
        msg_id = msg.get('Message-ID')
    except:
        msg_id = None

    info = {
        'uid': uid,
        'to': msg_to,
        'from': msg_from,
        'subject': msg_sub,
        'raw_date': raw_date,
        'local_date': local_date,
        'message-id': msg_id,
        'links': links,
        'num-links': len(links)
    }

    return info
Ejemplo n.º 12
0
 def email_message_to_plain(em: email.message.EmailMessage) -> str:
     """
     获取EmailMessage对象中的简单文本
     @ param em: EmailMessage对象
     @ return: 给定对象中的简单文本
     """
     for part in em.walk():
         part_content_type = part.get_content_type()
         if part_content_type not in ['text/plain','text/html']:
             continue
         try:
             part_content = part.get_content()
         except Exception:
             part_content = str(part.get_payload())
         if part_content_type == 'text/plain':
             return part_content if part_content is not None else "empty"
         else:
             return Formatter.html_to_plain(part_content)