예제 #1
0
 def guessCharsetAndConvert(document, text_content, content_type):
   """
   return encoded content_type and message if encoding
   is not utf-8
   """
   codec = guessEncodingFromText(text_content, content_type)
   if codec is not None:
     try:
       text_content = text_content.decode(codec).encode('utf-8')
     except (UnicodeDecodeError, LookupError):
       message = 'Conversion to base format with codec %r fails' % codec
       # try again with another guesser based on file command
       codec = guessEncodingFromText(text_content, 'text/plain')
       if codec is not None:
         try:
           text_content = text_content.decode(codec).encode('utf-8')
         except (UnicodeDecodeError, LookupError):
           message = 'Conversion to base format with codec %r fails'\
                                                                   % codec
         else:
           message = 'Conversion to base format with codec %r succeeds'\
                                                                   % codec
     else:
       message = 'Conversion to base format with codec %r succeeds'\
                                                                   % codec
   else:
     message = 'Conversion to base format without codec fails'
   return text_content, message
예제 #2
0
 def guessCharsetAndConvert(document, text_content, content_type):
   """
   return encoded content_type and message if encoding
   is not utf-8
   """
   codec = guessEncodingFromText(text_content, content_type)
   if codec is not None:
     try:
       text_content = text_content.decode(codec).encode('utf-8')
     except (UnicodeDecodeError, LookupError):
       message = 'Conversion to base format with codec %r fails' % codec
       # try again with another guesser based on file command
       codec = guessEncodingFromText(text_content, 'text/plain')
       if codec is not None:
         try:
           text_content = text_content.decode(codec).encode('utf-8')
         except (UnicodeDecodeError, LookupError):
           message = 'Conversion to base format with codec %r fails'\
                                                                   % codec
         else:
           message = 'Conversion to base format with codec %r succeeds'\
                                                                   % codec
     else:
       message = 'Conversion to base format with codec %r succeeds'\
                                                                   % codec
   else:
     message = 'Conversion to base format without codec fails'
   return text_content, message
예제 #3
0
  def getContentInformation(self):
    """
    Returns the content information from the header information.
    This is used by the metadata discovery system.

    Header information is converted in UTF-8 since this is the standard
    way of representing strings in ERP5.
    """
    result = {}
    for (name, value) in self._getMessage().items():
      try: 
        decoded_header = decode_header(value)
      except HeaderParseError, error_message:
        decoded_header = ()
        LOG('EmailDocument.getContentInformation', INFO,
            'Failed to decode %s header of %s with error: %s' %
            (name, self.getPath(), error_message))
      for text, encoding in decoded_header:
        try:
          if encoding is not None:
            text = text.decode(encoding).encode('utf-8')
          else:
            text = text.decode().encode('utf-8')
        except (UnicodeDecodeError, LookupError), error_message:
          encoding = guessEncodingFromText(text, content_type='text/plain')
          if encoding is not None:
            try:
              text = text.decode(encoding).encode('utf-8')
            except (UnicodeDecodeError, LookupError), error_message:
              text = repr(text)[1:-1]
          else:
            text = repr(text)[1:-1]
예제 #4
0
  def getTextContent(self, default=_MARKER):
    """
    Returns the content of the email as text. This is useful
    to display the content of an email.

    According to rfc, (http://tools.ietf.org/html/rfc2046#section-5.1.4)
    getTextContent should return html part of multipart/alternative couple
    If multipart/mixed, the html part is an attachement. So return the
    main content (text/plain).
    """
    self._checkConversionFormatPermission(None)
    if not self.hasFile():
      # Return the standard text content if no file was provided
      # Or standard text content is not empty.
      if default is _MARKER:
        return self._baseGetTextContent()
      else:
        return self._baseGetTextContent(default)

    else:
      part = self._getMessageTextPart()
      if part is None:
        text_result = ""
      else:
        part_encoding = part.get_content_charset()
        message_text = part.get_payload(decode=1)
        if part.get_content_type() == 'text/html':
          mime, text_result = self.convert(format='html',
                                           text_content=message_text,
                                           charset=part_encoding)
        else:
          if part_encoding != 'utf-8':
            try:
              if part_encoding is not None:
                text_result = message_text.decode(part_encoding).encode('utf-8')
              else:
                text_result = message_text.decode().encode('utf-8')
            except (UnicodeDecodeError, LookupError), error_message:
              LOG('EmailDocument.getTextContent', INFO, 
                  'Failed to decode %s TEXT message of %s with error: %s' % 
                  (part_encoding, self.getPath(), error_message))
              codec = guessEncodingFromText(message_text,
                                            content_type=part.get_content_type())
              if codec is not None:
                try:
                  text_result = message_text.decode(codec).encode('utf-8')
                except (UnicodeDecodeError, LookupError):
                  text_result = repr(message_text)
              else:
                text_result = repr(message_text)
          else:
            text_result = message_text
예제 #5
0
def testCharsetAndConvert(text_content, content_type, encoding):
  try:
    if encoding is not None:
      text_content = text_content.decode(encoding).encode('utf-8')
    else:
      text_content = text_content.decode().encode('utf-8')
  except (UnicodeDecodeError, LookupError), error_message:
    encoding = guessEncodingFromText(text_content, content_type)
    if encoding is not None:
      try:
        text_content = text_content.decode(encoding).encode('utf-8')
      except (UnicodeDecodeError, LookupError):
        text_content = repr(text_content)[1:-1]
    else:
      text_content = repr(text_content)[1:-1]
예제 #6
0
파일: EmailDocument.py 프로젝트: poses/erp5
def testCharsetAndConvert(text_content, content_type, encoding):
    try:
        if encoding is not None:
            text_content = text_content.decode(encoding).encode('utf-8')
        else:
            text_content = text_content.decode().encode('utf-8')
    except (UnicodeDecodeError, LookupError), error_message:
        encoding = guessEncodingFromText(text_content, content_type)
        if encoding is not None:
            try:
                text_content = text_content.decode(encoding).encode('utf-8')
            except (UnicodeDecodeError, LookupError):
                text_content = repr(text_content)[1:-1]
        else:
            text_content = repr(text_content)[1:-1]
예제 #7
0
 def _guessEncoding(self, string, mime='text/html'):
     """
   Deprecated method
 """
     return guessEncodingFromText(string, content_type=mime)
예제 #8
0
파일: Document.py 프로젝트: smetsjp/erp5
 def _guessEncoding(self, string, mime='text/html'):
   """
     Deprecated method
   """
   return guessEncodingFromText(string, content_type=mime)