예제 #1
0
def decodeMessageAsString(msg):
    """ This helper method takes Message object or string and returns
        string which does not contain base64 encoded parts
        Returns message without any encoding in parts
    """
    if isinstance(msg, str):
        msg = Parser().parsestr(msg)

    new = deepcopy(msg)
    # From is utf8 encoded: '=?utf-8?q?Site_Administrator_=3C=3E?='
    new.replace_header('From', decode_header(new['From'])[0][0])
    new.replace_header('Subject', decode_header(new['Subject'])[0][0])
    charset = Charset('utf-8')
    charset.header_encoding = SHORTEST
    charset.body_encoding = QP
    charset.output_charset = 'utf-8'

    for part in new.walk():
        if part.get_content_maintype() == "multipart":
            continue
        decoded = part.get_payload(decode=1)
        del part['Content-Transfer-Encoding']
        part.set_payload(decoded, charset)

    return new.as_string()
예제 #2
0
def decodeMessageAsString(msg):
    """ This helper method takes Message object or string and returns
        string which does not contain base64 encoded parts
        Returns message without any encoding in parts
    """
    if isinstance(msg, str):
        msg = Parser().parsestr(msg)

    new = deepcopy(msg)
    # From is utf8 encoded: '=?utf-8?q?Site_Administrator_=3C=3E?='
    new.replace_header('From', decode_header(new['From'])[0][0])
    new.replace_header('Subject', decode_header(new['Subject'])[0][0])
    charset = Charset('utf-8')
    charset.header_encoding = SHORTEST
    charset.body_encoding = QP
    charset.output_charset = 'utf-8'

    for part in new.walk():
        if part.get_content_maintype() == "multipart":
            continue
        decoded = part.get_payload(decode=1)
        del part['Content-Transfer-Encoding']
        part.set_payload(decoded, charset)

    return new.as_string()
예제 #3
0
def parseRequest(msg):
	# check mandatory header fields
	if not msg.has_key("Subject"): return None
	if not msg.has_key("From"): return None

	# parse the first subject header only
	s, enc = decode_header(msg.get_all("Subject")[0])[0]
	if not enc:
		s = unicode(s)
	else:
		s = unicode(s, enc)
	m = p.match(s)
	if not m: return None

	# normalize request
	gd = m.groupdict()
	delivery_type = gd["delivery_type"].upper()
	if delivery_type == "CANDC": delivery_type = "CandC"
	locale = gd["locale"].lower()
	locale = locale[:3] + locale[3:].upper()
	if gd.has_key("audio") and gd["audio"]:
		target = delivery_type + "." + locale + "+audio"
	else:
		target = delivery_type + "." + locale

	# normalize "From" entries
	fromlist = []
	for entry in msg.get_all("From", []):
		uentry = u""
		for partition, enc in decode_header(entry):
			if not enc:
				uentry += unicode(partition)
			else:
				uentry += unicode(partition, enc)
		fromlist.append(uentry)
			
	# normalize "Cc" entries
	cclist = []
	for entry in msg.get_all("Cc", []):
		uentry = u""
		for partition, enc in decode_header(entry):
			if not enc:
				uentry += unicode(partition)
			else:
				uentry += unicode(partition, enc)
		cclist.append(uentry)

	return (target, fromlist, cclist)
예제 #4
0
def decode_subject(subject):
    if subject[0:2] == '=?' and subject[-2:] == '?=':
        subject = u''.join(
            unicode(s, c or 'us-ascii') for s, c in decode_header(subject))
    else:
        subject = unicode(collapse_rfc2231_value(subject))
    return subject
예제 #5
0
def handle_attachment(message, content, related=False):
#    r = ''
#    if related:
#        r = '(r)'

    filename, encoding = decode_header(content.get_filename())[0]
    if encoding:
        filename = filename.decode(encoding, errors='replace')

    #if not related:
    #    print "saving attachment [%s] of type %s from message %d %s" % (filename, content.get_content_type(), message.id, r)

    a = Attachment()
    a.filename = filename  # TODO need to parse weird strings from this
    if not a.filename:
        a.filename = str(uuid.uuid4())
    a.content_type = content.get_content_type()
    a.stored_location = os.path.join(files_dir, str(message.id), get_valid_filename(a.filename))
        # probably want to fix this too
    a.mime_related = related
        # load the file
    file_content = content.get_payload(decode=1)
    a.file_md5 = hashlib.md5(file_content).hexdigest()  # again, probably a better way to do this than all in memory
    # actually write it do disk - should wrap this in a try except too
    if not os.path.exists(os.path.join(files_dir, str(message.id))):
        os.makedirs(os.path.join(files_dir, str(message.id)))
    with open(a.stored_location, 'wb') as fp:
        fp.write(file_content)
    a.message = message
    a.save()
    def test_japanese_codecs(self):
        eq = self.ndiffAssertEqual
        j = Charset("euc-jp")
        g = Charset("iso-8859-1")
        h = Header("Hello World!")
        jhello = '\xa5\xcf\xa5\xed\xa1\xbc\xa5\xef\xa1\xbc\xa5\xeb\xa5\xc9\xa1\xaa'
        ghello = 'Gr\xfc\xdf Gott!'
        h.append(jhello, j)
        h.append(ghello, g)
        # BAW: This used to -- and maybe should -- fold the two iso-8859-1
        # chunks into a single encoded word.  However it doesn't violate the
        # standard to have them as two encoded chunks and maybe it's
        # reasonable <wink> for each .append() call to result in a separate
        # encoded word.
        eq(
            h.encode(), """\
Hello World! =?iso-2022-jp?b?GyRCJU8lbSE8JW8hPCVrJUkhKhsoQg==?=
 =?iso-8859-1?q?Gr=FC=DF?= =?iso-8859-1?q?_Gott!?=""")
        eq(decode_header(h.encode()),
           [('Hello World!', None),
            ('\x1b$B%O%m!<%o!<%k%I!*\x1b(B', 'iso-2022-jp'),
            ('Gr\xfc\xdf Gott!', 'iso-8859-1')])
        long = 'test-ja \xa4\xd8\xc5\xea\xb9\xc6\xa4\xb5\xa4\xec\xa4\xbf\xa5\xe1\xa1\xbc\xa5\xeb\xa4\xcf\xbb\xca\xb2\xf1\xbc\xd4\xa4\xce\xbe\xb5\xc7\xa7\xa4\xf2\xc2\xd4\xa4\xc3\xa4\xc6\xa4\xa4\xa4\xde\xa4\xb9'
        h = Header(long, j, header_name="Subject")
        # test a very long header
        enc = h.encode()
        # TK: splitting point may differ by codec design and/or Header encoding
        eq(
            enc, """\
=?iso-2022-jp?b?dGVzdC1qYSAbJEIkWEVqOUYkNSRsJD8lYSE8JWskTztKGyhC?=
 =?iso-2022-jp?b?GyRCMnE8VCROPjVHJyRyQlQkQyRGJCQkXiQ5GyhC?=""")
        # TK: full decode comparison
        eq(h.__unicode__().encode('euc-jp'), long)
예제 #7
0
    def process_incoming_mail(self, msg):
        to = self.get_email_address_ident(msg, 'To')
        sender = msg.get('From')
        reply_to = self.get_email_address_ident(msg, 'In-Reply-To')

        title = msg.get('Subject')
        if title:
            new_title = u''
            for part in decode_header(title):
                if part[1]:
                    new_title += unicode(part[0], part[1])
                else:
                    new_title += unicode(part[0])
            title = new_title

        content = u''
        for part in msg.walk():
            if part.get_content_type() == 'text/plain':
                s = part.get_payload(decode=True)
                charsets = part.get_charsets() + msg.get_charsets()
                for charset in charsets:
                    try:
                        if charset is not None:
                            content += unicode(s, charset)
                        else:
                            content += unicode(s)
                    except UnicodeError, e:
                        self.logger.warning('Unicode error: %s' % e)
                        continue
                    except Exception, e:
                        self.logger.exception(e)
                        continue
                    else:
                        break
예제 #8
0
def getDecodedHeaders(msg, cset='utf-8'):
    """Returns a unicode containing all the headers of msg, unfolded and
    RFC 2047 decoded, normalized and separated by new lines.
    """

    headers = u''
    for h, v in msg.items():
        uvalue = u''
        try:
            v = decode_header(re.sub('\n\s', ' ', v))
        except HeaderParseError:
            v = [(v, 'us-ascii')]
        for frag, cs in v:
            if not cs:
                cs = 'us-ascii'
            try:
                uvalue += unicode(frag, cs, 'replace')
            except LookupError:
                # The encoding charset is unknown.  At this point, frag
                # has been QP or base64 decoded into a byte string whose
                # charset we don't know how to handle.  We will try to
                # unicode it as iso-8859-1 which may result in a garbled
                # mess, but we have to do something.
                uvalue += unicode(frag, 'iso-8859-1', 'replace')
        uhdr = h.decode('us-ascii', 'replace')
        headers += u'%s: %s\n' % (h, normalize(mm_cfg.NORMALIZE_FORM, uvalue))
    return headers
예제 #9
0
    def add_sender(self, message):
        def email_location():
            recieved = message.get_all('Original-Received')
            ips = [IP.findall(h) for h in recieved]
            ips = [
                ip[0] for ip in ips if ip and not ip[0].startswith("10.")
                and not ip[0].startswith("192.168")
            ]
            likely = ips[-1]
            try:
                logger.info("geocoder: Getting location for %s" % (likely))
                url = "http://freegeoip.net/json/%s" % likely
                logger.debug("geocoder: Fetching %s" % (url))
                loc = json.loads(urllib2.urlopen(url).read())
                ll = float(loc['latitude']), float(loc['longitude'])
                if any(ll):
                    return ll, 0
            except:
                pass

        users = getUtility(IUserDatabase)
        from_ = list(email.utils.parseaddr(message.get("From")))

        # Remove quoted printable
        from_[0] = decode_header(from_[0])[0]
        encoding = from_[0][1]
        if encoding is None:
            encoding = "utf-8"
        from_[0] = from_[0][0].decode(encoding)

        users.add_user(User(from_[0], from_[1], location_func=email_location))
def _get_header(str):
   '''Get the full text of a header and remove newlines.'''
   list = decode_header(str)
   retString = ''
   for string, charset in list:
       retString += string.replace("\n", '').replace("\r", '')
   return retString
예제 #11
0
def get_header_content(name='', str_encoded=''):
    message = Parser().parse(open(sys.argv[1]))
    for line in NEWLINE.split(message.get(name)):
        decoded_headers = decode_header(line)
        for parts in decoded_headers:
            str_encoded = str_encoded + parts[0]
    return re.sub('(\r\n|\s|\t){2,}', ' ', str_encoded)
예제 #12
0
파일: utils.py 프로젝트: jmehnle/pymilter
def parse_header(val):
  """Decode headers gratuitously encoded to hide the content.
  """
  try:
    h = decode_header(val)
    if not len(h) or (not h[0][1] and len(h) == 1): return val
    u = []
    for s,enc in h:
      if enc:
        try:
          u.append(unicode(s,enc,'replace'))
        except LookupError:
          u.append(unicode(s))
      else:
        u.append(unicode(s))
    u = ''.join(u)
    for enc in ('us-ascii','iso-8859-1','utf8'):
      try:
        return u.encode(enc)
      except UnicodeError: continue
  except UnicodeDecodeError: pass
  except LookupError: pass
  except ValueError: pass
  except email.Errors.HeaderParseError: pass
  return val
예제 #13
0
파일: common.py 프로젝트: miracle2k/stgit
 def __decode_header(header):
     """Decode a qp-encoded e-mail header as per rfc2047"""
     try:
         words_enc = decode_header(header)
         hobj = make_header(words_enc)
     except Exception, ex:
         raise CmdException, "header decoding error: %s" % str(ex)
예제 #14
0
파일: mailread.py 프로젝트: twol/Workspace
def mailread(src):
    """生メールから件名,本文,添付ファイル(画像)を取り出す
    """
    # Messageオブジェクトを作る
    m = email.message_from_string(src)
    # ヘッダをデコード
    subj = decode_header(m["Subject"])
    # ヘッダを表示
    try:
        print unicode(make_header(subj))
    except:
        pass
    print "-" * 70
    # 全パートをスキャン
    for part in m.walk():
        type = part.get_content_maintype()  # maintypeを得る
        if type and type.find("image") != -1:
            # 画像の添付が見つかったら,ファイルに保存
            filename = part.get_filename("notitle.img")
            f = open(filename, "wb")
            f.write(part.get_payload(decode=True))
            f.close()
        if type and type.find("text") != -1:
            # テキストは表示
            enc = part.get_charsets()[0] or "us-ascii"
            print part.get_payload().decode(enc, "ignore")
예제 #15
0
def main(args):
    try:
        opts, args = getopt.getopt(args, "hd:S:H:f:",
                                   ["help", "database=", "spamfile=",
                                    "hamfile=", "feature="])
    except getopt.GetoptError as msg:
        usage(msg)
        return 1
    charset = locale.getdefaultlocale()[1]
    if not charset:
        charset = 'us-ascii'
    mapfile = spamfile = hamfile = None
    features = set()
    for opt, arg in opts:
        if opt in ("-h", "--help"):
            usage()
            return 0
        elif opt in ("-d", "--database"):
            mapfile = arg
        elif opt in ("-H", "--hamfile"):
            hamfile = arg
        elif opt in ("-S", "--spamfile"):
            spamfile = arg
        elif opt in ("-f", "--feature"):
            features.add(str(arg, charset))
    if hamfile is None and spamfile is None:
        usage("At least one of -S or -H are required")
        return 1
    if mapfile is None:
        usage("'-d mapfile' is required")
        return 1
    try:
        mapd = pickle_read(mapfile)
    except IOError:
        usage("Mapfile %s does not exist" % mapfile)
        return 1
    if not features and not args:
        usage("Require at least one feature (-f) arg or one message file")
        return 1
    if not features:
        for f in args:
            for msg in getmbox(f):
                evidence = msg.get("X-Spambayes-Evidence", "")
                evidence = re.sub(r"\s+", " ", evidence)
                l = [e.rsplit(": ", 1)[0]
                     for e in evidence.split("; ")[2:]]
                for s in l:
                    try:
                        s = make_header(decode_header(s)).__unicode__()
                    except:
                        s = str(s, 'us-ascii', 'replace')
                    features.add(s)
        if not features:
            usage("No X-Spambayes-Evidence headers found")
            return 1
    if spamfile is not None:
        spamfile = file(spamfile, "w")
    if hamfile is not None:
        hamfile = file(hamfile, "w")
    extractmessages(features, mapd, hamfile, spamfile)
예제 #16
0
    def parse_header_field(self, field):
        if field is None:
            return None

        # preprocess head field
        # see http://stackoverflow.com/questions/7331351/python-email-header-decoding-utf-8
        field = re.sub(r"(=\?.*\?=)(?!$)", r"\1 ", field)

        decodefrag = decode_header(field)
        fragments = []
        for s, enc in decodefrag:
            if enc:
                try:
                    s = unicode(s, enc, errors='replace')
                except UnicodeDecodeError:
                    # desperate move here
                    try:
                        s = s.decode("latin1")
                    except:
                        pass
            else:
                try:
                    s = s.decode("latin1")
                except:
                    s = unicode(s, errors='ignore')
            fragments.append(s)
        field = u' '.join(fragments)
        return field.replace('\n\t', " ").replace('\n', '').replace('\r', '')
예제 #17
0
 def add_sender(self, message):
     def email_location():
         recieved = message.get_all('Original-Received')
         ips = [IP.findall(h) for h in recieved]
         ips = [ip[0] for ip in ips if ip and not ip[0].startswith("10.") and not ip[0].startswith("192.168")]
         likely = ips[-1]
         try:
             logger.info("geocoder: Getting location for %s" % (likely))
             url = "http://freegeoip.net/json/%s"%likely
             logger.debug("geocoder: Fetching %s" % (url))
             loc = json.loads(urllib2.urlopen(url).read())
             ll = float(loc['latitude']), float(loc['longitude'])
             if any(ll):
                 return ll, 0
         except:
             pass
     users = getUtility(IUserDatabase)
     from_ = list(email.utils.parseaddr(message.get("From")))
     
     # Remove quoted printable
     from_[0] = decode_header(from_[0])[0]
     encoding = from_[0][1]
     if encoding is None:
         encoding = "utf-8"
     from_[0] = from_[0][0].decode(encoding)
     
     users.add_user(User(from_[0], from_[1], location_func=email_location))
예제 #18
0
def mailread(src):
    """生メールから件名、本文、添付ファイル(画像)を取り出す
    """
    # Messageオブジェクトを作る
    m = email.message_from_string(src)
    # ヘッダをデコード
    subj = decode_header(m["Subject"])
    # ヘッダを表示
    try:
        print unicode(make_header(subj))
    except: pass;
    print "-" * 70
    # 全パートをスキャン
    for part in m.walk():
        type = part.get_content_maintype() # maintypeを得る
        if type and type.find("image") != -1:
            # 画像の添付が見つかったら、ファイルに保存
            filename = part.get_filename("notitle.img")
            f = open(filename, "wb")
            f.write(part.get_payload(decode = True))
            f.close()
        elif type and type.find("text") != -1:
            # テキストは表示
            enc ~ part.get_charsets()[0] or "us-ascii"
            print part.get_payload().decode(enc, "ignore")
예제 #19
0
    def finishHeader(self):
        if self.prevheader is not None:
            prevheader = self.prevheader.lower()

            decodedValueList = []
            try:
                parts = decode_header(self.prevvalue)
                for maybeUncoded in parts:
                    if isinstance(maybeUncoded, unicode):
                        decodedValueList.append(maybeUncoded)
                    else:
                        uncoded, encoding = maybeUncoded
                        if encoding is None:
                            encoding = 'ascii'
                        decodedValueList.append(_safelyDecode(uncoded, encoding))
            except ValueError:  # XXX where is this ValueError coming from?
                                # -glyph
                decodedValue = self.prevvalue.decode('ascii', 'replace')
            else:
                decodedValue = u''.join(decodedValueList)

            if prevheader in self._normalizeHeaders:
                values = decodedValue.split(self._normalizeHeaders[prevheader])
                for v in values:
                    self.part.addHeader(prevheader, v)
            else:
                self.part.addHeader(self.prevheader, decodedValue)
        self.prevheader = self.prevvalue = None
예제 #20
0
  def getContentInformation(self):
    """
    Returns the content information from the header information.
    This is used by the metadata discovery system.

    Header information is converted in UTF-8 since this is the standard
    way of representing strings in ERP5.
    """
    result = {}
    for (name, value) in self._getMessage().items():
      try: 
        decoded_header = decode_header(value)
      except HeaderParseError, error_message:
        decoded_header = ()
        LOG('EmailDocument.getContentInformation', INFO,
            'Failed to decode %s header of %s with error: %s' %
            (name, self.getPath(), error_message))
      for text, encoding in decoded_header:
        try:
          if encoding is not None:
            text = text.decode(encoding).encode('utf-8')
          else:
            text = text.decode().encode('utf-8')
        except (UnicodeDecodeError, LookupError), error_message:
          encoding = guessEncodingFromText(text, content_type='text/plain')
          if encoding is not None:
            try:
              text = text.decode(encoding).encode('utf-8')
            except (UnicodeDecodeError, LookupError), error_message:
              text = repr(text)[1:-1]
          else:
            text = repr(text)[1:-1]
예제 #21
0
    def finishHeader(self):
        if self.prevheader is not None:
            prevheader = self.prevheader.lower()

            decodedValueList = []
            try:
                parts = decode_header(self.prevvalue)
                for maybeUncoded in parts:
                    if isinstance(maybeUncoded, unicode):
                        decodedValueList.append(maybeUncoded)
                    else:
                        uncoded, encoding = maybeUncoded
                        if encoding is None:
                            encoding = 'ascii'
                        decodedValueList.append(
                            _safelyDecode(uncoded, encoding))
            except ValueError:  # XXX where is this ValueError coming from?
                # -glyph
                decodedValue = self.prevvalue.decode('ascii', 'replace')
            else:
                decodedValue = u''.join(decodedValueList)

            if prevheader in self._normalizeHeaders:
                values = decodedValue.split(self._normalizeHeaders[prevheader])
                for v in values:
                    self.part.addHeader(prevheader, v)
            else:
                self.part.addHeader(self.prevheader, decodedValue)
        self.prevheader = self.prevvalue = None
예제 #22
0
    def test_japanese_codecs(self):
        eq = self.ndiffAssertEqual
        j = Charset("euc-jp")
        g = Charset("iso-8859-1")
        h = Header("Hello World!")
        jhello = '\xa5\xcf\xa5\xed\xa1\xbc\xa5\xef\xa1\xbc\xa5\xeb\xa5\xc9\xa1\xaa'
        ghello = 'Gr\xfc\xdf Gott!'
        h.append(jhello, j)
        h.append(ghello, g)
        # BAW: This used to -- and maybe should -- fold the two iso-8859-1
        # chunks into a single encoded word.  However it doesn't violate the
        # standard to have them as two encoded chunks and maybe it's
        # reasonable <wink> for each .append() call to result in a separate
        # encoded word.
        eq(h.encode(), """\
Hello World! =?iso-2022-jp?b?GyRCJU8lbSE8JW8hPCVrJUkhKhsoQg==?=
 =?iso-8859-1?q?Gr=FC=DF?= =?iso-8859-1?q?_Gott!?=""")
        eq(decode_header(h.encode()),
           [('Hello World!', None),
            ('\x1b$B%O%m!<%o!<%k%I!*\x1b(B', 'iso-2022-jp'),
            ('Gr\xfc\xdf Gott!', 'iso-8859-1')])
        long = 'test-ja \xa4\xd8\xc5\xea\xb9\xc6\xa4\xb5\xa4\xec\xa4\xbf\xa5\xe1\xa1\xbc\xa5\xeb\xa4\xcf\xbb\xca\xb2\xf1\xbc\xd4\xa4\xce\xbe\xb5\xc7\xa7\xa4\xf2\xc2\xd4\xa4\xc3\xa4\xc6\xa4\xa4\xa4\xde\xa4\xb9'
        h = Header(long, j, header_name="Subject")
        # test a very long header
        enc = h.encode()
        # TK: splitting point may differ by codec design and/or Header encoding
        eq(enc , """\
=?iso-2022-jp?b?dGVzdC1qYSAbJEIkWEVqOUYkNSRsJD8lYSE8JWskTztKGyhC?=
 =?iso-2022-jp?b?GyRCMnE8VCROPjVHJyRyQlQkQyRGJCQkXiQ5GyhC?=""")
        # TK: full decode comparison
        eq(h.__unicode__().encode('euc-jp'), long)
예제 #23
0
 def getFrom(self):
     buf = parseaddr(self._msg.get('from', ''))
     header = decode_header(buf[0])
     data = ''.join([
         to_unicode(s, enc) for s, enc in header if self.codecs_lookup(enc)
     ])
     return (to_entities_quote(data), buf[1])
예제 #24
0
def getDecodedHeaders(msg, cset='utf-8'):
    """Returns a string containing all the headers of msg, unfolded and
    RFC 2047 decoded and encoded in cset.
    """

    headers = ''
    for h, v in msg.items():
        uvalue = u''
        try:
            v = decode_header(re.sub('\n\s', ' ', v))
        except HeaderParseError:
            v = [(v, 'us-ascii')]
        for frag, cs in v:
            if not cs:
                cs = 'us-ascii'
            try:
                uvalue += unicode(frag, cs, 'replace')
            except LookupError:
                # The encoding charset is unknown.  At this point, frag
                # has been QP or base64 decoded into a byte string whose
                # charset we don't know how to handle.  We will try to
                # unicode it as iso-8859-1 which may result in a garbled
                # mess, but we have to do something.
                uvalue += unicode(frag, 'iso-8859-1', 'replace')
        headers += '%s: %s\n' % (h, uvalue.encode(cset, 'replace'))
    return headers
예제 #25
0
	def email_parse(self, content):

		p = Parser()
		msgobj = p.parsestr(content)
		if msgobj['Subject'] is not None:
			decodefrag = decode_header(msgobj['Subject'])
			subj_fragments = []
			for s , enc in decodefrag:
				if enc:
					s = unicode(s , enc).encode('utf8','replace')
				subj_fragments.append(s)
			subject = ''.join(subj_fragments)
		else:
			subject = None

		attachments = []
		body_text = ""
		body_html = ""
		for part in msgobj.walk():
			attachment = self.email_parse_attachment(part)
			if attachment:
				attachments.append(attachment)
			elif part.get_content_type() == "text/plain":
				body_text += unicode(part.get_payload(decode=True),part.get_content_charset(),'replace').encode('utf8','replace')
			elif part.get_content_type() == "text/html":
				body_html += unicode(part.get_payload(decode=True),part.get_content_charset(),'replace').encode('utf8','replace')
		return { 'subject': subject, 'body_text': body_text, 'body_html': body_html, 'from': parseaddr(msgobj.get('From'))[1], 'to': parseaddr(msgobj.get('To'))[1], 'attachments': attachments }
예제 #26
0
def initializeObject(context, fields, message, defaultCharset='utf-8'):
    contentType = message.get_content_type()

    charset = message.get_charset()
    if charset is None:
        charset = message.get_param('charset')
    if charset is not None:
        charset = str(charset)
    else:
        charset = defaultCharset

    headerFields = {}
    primary = []

    for name, field in fields:
        if IPrimaryField.providedBy(field):
            primary.append((name, field))
        else:
            headerFields.setdefault(name.lower(), []).append(field)

    # Demarshal each header

    for name, value in message.items():

        name = name.lower()
        fieldset = headerFields.get(name, None)
        if fieldset is None or len(fieldset) == 0:
            LOG.debug("No matching field found for header %s" % name)
            continue

        field = fieldset.pop(0)

        marshaler = queryMultiAdapter((context, field,), IFieldMarshaler)
        if marshaler is None:
            LOG.debug("No marshaler found for field %s of %s" %
                      (name, repr(context)))
            continue

        headerValue, headerCharset = decode_header(value)[0]
        if headerCharset is None:
            headerCharset = charset

        # MIME messages always use CRLF. For headers, we're probably safer with
        # \n
        headerValue = headerValue.replace('\r\n', '\n')

        try:
            marshaler.demarshal(
                headerValue,
                message=message,
                charset=headerCharset,
                contentType=contentType,
                primary=False
            )
        except ValueError, e:
            # interface allows demarshal() to raise ValueError to indicate
            # marshalling failed
            LOG.debug("Demarshalling of %s for %s failed: %s" %
                      (name, repr(context), str(e)))
            continue
예제 #27
0
 def getSubject(self):
     buf = self._msg.get('subject', '')
     header = decode_header(buf)
     data = ''.join([
         to_unicode(s, enc) for s, enc in header if self.codecs_lookup(enc)
     ])
     return to_entities_quote(data)
예제 #28
0
    def __call__(self, request):
        headers, msg = request
        partners = self.get_partners(headers, msg)
        subject = u''
        for string, charset in decode_header(msg['Subject']):
            if charset:
                subject += string.decode(charset)
            else:
                subject += unicode(string)
        if partners:
            self.save_mail(msg, subject, partners)
        else:
            warning = MIMEText((warn_msg % (subject, )).encode('utf-8'),
                               'plain', 'utf-8')
            warning['Subject'] = 'Message de PengERP'
            warning['From'] = '*****@*****.**'
            warning['To'] = msg['From']
            s = smtplib.SMTP()
            s.connect()
            s.sendmail('*****@*****.**', self.email_re.findall(msg['From']),
                       warning.as_string())
            s.close()

        if msg.is_multipart():
            for message in [
                    m for m in msg.get_payload()
                    if m.get_content_type() == 'message/rfc822'
            ]:
                self((headers, message.get_payload()[0]))
예제 #29
0
def _decodeHeaders(msg, defaultCharacterSet='ascii'):
    """Decode message into (header, value) pairs."""

    # Get all mail headers.
    headers = msg.keys()

    # List of {header: value}. Sample:
    # [
    #   {'From': '*****@*****.**', 'To': '*****@*****.**',},
    # ]
    headers_values = []

    for h in headers:
        # Skip non-exist headers.
        if not h in msg.keys():
            continue

        try:
            # Decode header value to list of (decoded_string, charset) pairs.
            # Convert into unicode.
            header_value = u' '.join([
                unicode(text, charset or defaultCharacterSet)
                 for text, charset in decode_header(msg[h])
            ])
            headers_values += [{h: header_value}]
        except Exception, e:
            pass
예제 #30
0
    def parse_header_field(self, field):
        if field is None:
            return None

        # preprocess head field
        # see http://stackoverflow.com/questions/7331351/python-email-header-decoding-utf-8
        field = re.sub(r"(=\?.*\?=)(?!$)", r"\1 ", field)

        decodefrag = decode_header(field)
        fragments = []
        for s, enc in decodefrag:
            if enc:
                try:
                    s = unicode(s, enc, errors='replace')
                except UnicodeDecodeError:
                    # desperate move here
                    try:
                        s = s.decode("latin1")
                    except:
                        pass
            else:
                try:
                    s = s.decode("latin1")
                except:
                    s = unicode(s, errors='ignore')
            fragments.append(s)
        field = u' '.join(fragments)
        return field.replace('\n\t', " ").replace('\n', '').replace('\r', '')
예제 #31
0
파일: libString.py 프로젝트: CoachCoen/ECL
def ClearSubjectHeader(strSubject):
    """Returns mailSubject without list name"""
    strResult = strSubject
    for strPrefix in "UKCoach", "ec-l", "eurocoach-list":
        strResult = strResult.replace("[%s]" % strPrefix, "")
    (strResult, strEncoding) = decode_header(strResult)[0]
    return strResult
예제 #32
0
파일: bugmail.py 프로젝트: affix/Fedbot
def _get_header(str):
    '''Get the full text of a header and remove newlines.'''
    list = decode_header(str)
    retString = ''
    for string, charset in list:
        retString += string.replace("\n", '')
    return retString
예제 #33
0
    def process_message(self, remoteHosts, mailfrom, rcpttos, data):
        if not BlackHoleSmtp.real_address_list:
            BlackHoleSmtp.real_address_list = set([
                a.email
                for a in RealAddress.objects.filter(suspend__exact=False)
            ])
        recipients = ','.join(rcpttos)
        real_addresses = BlackHoleSmtp.real_address_list & set(
            rcpttos)  #TODO check rcpttos data
        for real_address in real_addresses:
            smtpd.PureProxy.process_message(self, remoteHosts, mailfrom,
                                            [real_address],
                                            data)  #check real_address data
            #smtpd.PureProxy.process_message(self, remoteHosts, mailfrom, rcpttos, data)
            debug('send email to: %s', (real_address, ))
        msg = message_from_string(data)
        log = LoggedMail(from_address=mailfrom, to_address=recipients)
        charset = 'latin1'

        log.raw_header = '\n'.join(
            ['%s:%s' % (key, msg.get(key)) for key in msg.keys()])
        header = ''
        for key in msg.keys():
            value, type = decode_header(msg.get(key))[0]
            if not type:
                value = unicode(value)
            else:
                value = unicode(value, type)
                charset = type
            header += '%s:%s(mime:%s)\n' % (key, value, type)
            if key == 'Subject':
                log.subject = value
        log.header = header
        log.charset = charset
        log.save()

        file_name_base = 'msg_%07d_' % log.id
        for part in msg.walk():
            if part.get_content_maintype() == 'multipart':
                continue
            if part.get_content_maintype() == 'text':
                log.raw_body += part.get_payload(decode=0)
                log.body += unicode(part.get_payload(decode=1), charset)
                log.save()
                continue
            file_name = part.get_filename()
            if not file_name:
                ext = ".bin"
                if hasattr(part, 'get_type'):
                    ext = mimetypes.guess_extension(part.get_type())
                file_name = 'part-%03d%s' % (file_count, ext)
            at = log.attatchment_set.create(origin_name=file_name)
            file_name = file_name_base + file_name
            file = part.get_payload(decode=1)
            f = open(os.path.join(ATTACH_DIR, file_name), 'w')
            f.write(file)
            f.close()
            at.file = u'attach/%s' % file_name
            at.save()
예제 #34
0
def initializeObject(context, fields, message, defaultCharset='utf-8'):
    contentType = message.get_content_type()

    charset = message.get_charset()
    if charset is None:
        charset = message.get_param('charset')
    if charset is not None:
        charset = str(charset)
    else:
        charset = defaultCharset

    headerFields = {}
    primary = []

    for name, field in fields:
        if IPrimaryField.providedBy(field):
            primary.append((name, field))
        else:
            headerFields.setdefault(name.lower(), []).append(field)

    # Demarshal each header

    for name, value in message.items():

        name = name.lower()
        fieldset = headerFields.get(name, None)
        if fieldset is None or len(fieldset) == 0:
            LOG.debug("No matching field found for header %s" % name)
            continue

        field = fieldset.pop(0)

        marshaler = queryMultiAdapter((
            context,
            field,
        ), IFieldMarshaler)
        if marshaler is None:
            LOG.debug("No marshaler found for field %s of %s" %
                      (name, repr(context)))
            continue

        headerValue, headerCharset = decode_header(value)[0]
        if headerCharset is None:
            headerCharset = charset

        # MIME messages always use CRLF. For headers, we're probably safer with \n
        headerValue = headerValue.replace('\r\n', '\n')

        try:
            marshaler.demarshal(headerValue,
                                message=message,
                                charset=headerCharset,
                                contentType=contentType,
                                primary=False)
        except ValueError, e:
            # interface allows demarshal() to raise ValueError to indicate marshalling failed
            LOG.debug("Demarshalling of %s for %s failed: %s" %
                      (name, repr(context), str(e)))
            continue
예제 #35
0
def parse(content):
    p = EmailParser()
    msgobj = p.parsestr(content)
    if msgobj['Subject'] is not None:
        decodefrag = decode_header(msgobj['Subject'])
        subj_fragments = []
        for s, enc in decodefrag:
            if enc:
                s = unicode(s , enc).encode('utf8', 'replace')
            subj_fragments.append(s)
        subject = ''.join(subj_fragments)
    else:
        subject = None

    attachments = []
    body = None
    html = None
    images = []
    images_content_type = [
        "image/jpg",
        "image/jpeg",
        "image/png",
        "image/tiff"
        "application/pdf",
    ]

    for part in msgobj.walk():
        print part.get_content_type()
        attachment = parse_attachment(part)
        if attachment:
            attachments.append(attachment)
        elif part.get_content_type() == "text/plain":
            if body is None:
                body = ""
            body += unicode(
                part.get_payload(decode=True),
                part.get_content_charset(),
                'replace'
            ).encode('utf8', 'replace')
        elif part.get_content_type() == "text/html":
            if html is None:
                html = ""
            html += unicode(
                part.get_payload(decode=True),
                part.get_content_charset(),
                'replace'
            ).encode('utf8', 'replace')
        elif part.get_content_type() in images_content_type:
            images.append(StringIO(part.get_payload(decode=True)))

    return {
        'subject': subject,
        'body': body,
        'html': html,
        'from': parseaddr(msgobj.get('From'))[1],
        'to': parseaddr(msgobj.get('To'))[1],
        'attachments': attachments,
        'images': images,
    }
 def getCC(self):
     res = []
     buf = getaddresses(self._msg.get_all('cc', ''))
     for i in buf:
         header = decode_header(i[0])
         data = ''.join([to_unicode(s, enc) for s, enc in header if self.codecs_lookup(enc)])
         res.append((to_entities_quote(data), i[1]))
     return res
예제 #37
0
def decode_QP(string):
    parts = []
    for decoded, charset in decode_header(string):
        if charset is None:
            charset = 'iso-8859-15'
        parts.append(str(decoded, charset, 'replace'))

    return u' '.join(parts)
예제 #38
0
파일: tests.py 프로젝트: luxcas/collective
 def send(self, from_, to, message):
     print '*TestingMailDelivery sending*:'
     print 'From:', decode_header(from_)[0][0]
     print 'To:', ', '.join(to)
     print 'Message follows:'
     decoded = decodeMessageAsString(message)
     print decoded
     self.sent.append(decoded)
예제 #39
0
파일: message.py 프로젝트: paulojc32/baruwa
    def get_header(self, header_text, default="ascii"):
        "Decode and return the header"
        if not header_text:
            return header_text

        sections = decode_header(header_text)
        return ' '.join(section.decode(enc or default, 'replace')
        for section, enc in sections)
예제 #40
0
 def send(self, from_, to, message):
     print '*TestingMailDelivery sending*:'
     print 'From:', decode_header(from_)[0][0]
     print 'To:', ', '.join(to)
     print 'Message follows:'
     decoded = decodeMessageAsString(message)
     print decoded
     self.sent.append(decoded)
예제 #41
0
def decode_QP(string):
    parts = []
    for decoded, charset in decode_header(string):
        if charset is None:
            charset = 'iso-8859-15'
        parts.append(str(decoded, charset, 'replace'))

    return u' '.join(parts)
예제 #42
0
파일: message.py 프로젝트: haugvald/baruwa
    def get_header(self, header_text, default="ascii"):
        "Decode and return the header"
        if not header_text:
            return header_text

        sections = decode_header(header_text)
        return ' '.join(section.decode(enc or default, 'replace')
        for section, enc in sections)
예제 #43
0
def parse(content):
    """
	parse email
    """
    p = EmailParser()
	#check content is a file or text
	#if content is path...

    #msgobj = p.parse(content)
	
    msgobj = p.parsestr(content)
    if msgobj['Subject'] is not None:
        decodefrag = decode_header(msgobj['Subject'])
        subj_fragments = []
        for s , enc in decodefrag:
            if enc:
                s = unicode(s , enc).encode('utf8','replace')
            subj_fragments.append(s)
        subject = ''.join(subj_fragments)
    else:
        subject = None

    attachments = []
    body = None
    html = None
    for part in msgobj.walk():
        attachment = parse_attachment(part)
        if attachment:
            attachments.append(attachment)
        elif part.get_content_type() == "text/plain":
            if body is None:
                body = ""
                if part.get_content_charset:
                    body += part.get_payload(decode=True)
                else:
                    body += unicode(
                    part.get_payload(decode=True),
                    part.get_content_charset(),
                'replace'
            ).encode('utf8','replace')
        elif part.get_content_type() == "text/html":
            if html is None:
                html = ""
            html += unicode(
                part.get_payload(decode=True),
                part.get_content_charset(),
                'replace'
            ).encode('utf8','replace')
    return {
        'subject' : subject,
        'body' : body,
        'html' : html,
        'from' : parseaddr(msgobj.get('From'))[1], 
        'to' : parseaddr(msgobj.get('To'))[1], 
		'date' : parse_date(msgobj.get('Date')),
        'attachments': attachments,
    }
예제 #44
0
 def decode_helper(self, headerpart):
     retur = ''
     headerbits = decode_header(headerpart)
     for item in headerbits:
         header, c = item
         if c:
             header = header.decode(c)
         retur += header
     return retur.strip().encode('UTF-8', 'replace')
예제 #45
0
파일: http.py 프로젝트: carvalhomb/tsmells
def decode_TEXT(value):
    """Decode RFC-2047 TEXT (e.g. "=?utf-8?q?f=C3=BCr?=" -> u"f\xfcr")."""
    atoms = decode_header(value)
    decodedvalue = ""
    for atom, charset in atoms:
        if charset is not None:
            atom = atom.decode(charset)
        decodedvalue += atom
    return decodedvalue
예제 #46
0
 def decode_email(self, file):
   # Prepare result
   theMail = {
     'attachment_list': [],
     'body': '',
     # Place all the email header in the headers dictionary in theMail
     'headers': {}
   }
   # Get Message
   msg = email.message_from_string(file)
   # Back up original file
   theMail['__original__'] = file
   # Recode headers to UTF-8 if needed
   for key, value in msg.items():
     decoded_value_list = decode_header(value)
     unicode_value = make_header(decoded_value_list)
     new_value = unicode_value.__unicode__().encode('utf-8')
     theMail['headers'][key.lower()] = new_value
   # Filter mail addresses
   for header in ('resent-to', 'resent-from', 'resent-cc', 'resent-sender',
                  'to', 'from', 'cc', 'sender', 'reply-to'):
     header_field = theMail['headers'].get(header)
     if header_field:
         theMail['headers'][header] = parseaddr(header_field)[1]
   # Get attachments
   body_found = 0
   for part in msg.walk():
     content_type = part.get_content_type()
     file_name = part.get_filename()
     # multipart/* are just containers
     # XXX Check if data is None ?
     if content_type.startswith('multipart'):
       continue
     # message/rfc822 contains attached email message
     # next 'part' will be the message itself
     # so we ignore this one to avoid doubling
     elif content_type == 'message/rfc822':
       continue
     elif content_type in ("text/plain", "text/html"):
       charset = part.get_content_charset()
       payload = part.get_payload(decode=True)
       #LOG('CMFMailIn -> ',0,'charset: %s, payload: %s' % (charset,payload))
       if charset:
         payload = unicode(payload, charset).encode('utf-8')
       if body_found:
         # Keep the content type
         theMail['attachment_list'].append((file_name,
                                            content_type, payload))
       else:
         theMail['body'] = payload
         body_found = 1
     else:
       payload = part.get_payload(decode=True)
       # Keep the content type
       theMail['attachment_list'].append((file_name, content_type,
                                          payload))
   return theMail
예제 #47
0
파일: http.py 프로젝트: Juanvvc/scfs
def decode_TEXT(value):
    """Decode RFC-2047 TEXT (e.g. "=?utf-8?q?f=C3=BCr?=" -> u"f\xfcr")."""
    atoms = decode_header(value)
    decodedvalue = ""
    for atom, charset in atoms:
        if charset is not None:
            atom = atom.decode(charset)
        decodedvalue += atom
    return decodedvalue
예제 #48
0
파일: httputil.py 프로젝트: 2mny/mylar
def decode_TEXT(value):
    r"""Decode :rfc:`2047` TEXT (e.g. "=?utf-8?q?f=C3=BCr?=" -> u"f\xfcr")."""
    from email.Header import decode_header
    atoms = decode_header(value)
    decodedvalue = ""
    for atom, charset in atoms:
        if charset is not None:
            atom = atom.decode(charset)
        decodedvalue += atom
    return decodedvalue
예제 #49
0
 def decoded_header(self,msgrep,header):
     if msgrep[header] is None:
         return None
     decodefrag = decode_header(msgrep[header])
     fragments = []
     for s , enc in decodefrag:
         if enc:
             s = unicode(s , enc).encode('utf8','replace')
         fragments.append(s)
     return ''.join(fragments)
예제 #50
0
 def decoded_header(self,msgrep,header):
     if msgrep[header] is None:
         return None
     decodefrag = decode_header(msgrep[header])
     fragments = []
     for s, enc in decodefrag:
         if enc:
             s = str(s).decode(enc,'replace').encode('utf8','replace')
         fragments.append(s)
     return ''.join(fragments)
예제 #51
0
def decode_TEXT(value):
    from email.Header import decode_header
    atoms = decode_header(value)
    decodedvalue = ''
    for atom, charset in atoms:
        if charset is not None:
            atom = atom.decode(charset)
        decodedvalue += atom

    return decodedvalue
예제 #52
0
def decode_TEXT(value):
    r"""Decode :rfc:`2047` TEXT (e.g. "=?utf-8?q?f=C3=BCr?=" -> u"f\xfcr")."""
    from email.Header import decode_header
    atoms = decode_header(value)
    decodedvalue = ""
    for atom, charset in atoms:
        if charset is not None:
            atom = atom.decode(charset)
        decodedvalue += atom
    return decodedvalue
예제 #53
0
파일: httputil.py 프로젝트: Pluckyduck/eve
def decode_TEXT(value):
    from email.Header import decode_header
    atoms = decode_header(value)
    decodedvalue = ''
    for atom, charset in atoms:
        if charset is not None:
            atom = atom.decode(charset)
        decodedvalue += atom

    return decodedvalue
예제 #54
0
파일: Mail.py 프로젝트: tuchang/ZWiki
 def __init__(self, context, message):  # -> none
     """Extract the bits of interest from an RFC2822 message string.
     context should be a wiki page. This perhaps should do the isJunk
     test up front to avoid unnecessary resource usage.
     """
     DEBUG('mailin.py processing incoming message:\n%s' % message)
     self.context = context
     self.original = message
     self.msg = email.message_from_string(self.original)
     self.date = self.msg['Date']
     # flatten a multi-line subject into one line
     s = re.sub('\n', '', self.msg.get('Subject', ''))
     # convert the possibly RFC2047-encoded subject to unicode.
     # Only the first encoded part is used if there is more than one.
     # misencoded subjects are ignored.
     (s, enc) = decode_header(s)[0]
     try:
         self.subject = tounicode(s, enc or 'ascii')
     except UnicodeDecodeError:
         self.subject = ''
     self.realSubject = re.sub(r'.*?\[.*?\] ?(.*)', r'\1', self.subject)
     self.messageid = self.msg.get('Message-id', '')
     self.inreplyto = self.msg.get('In-reply-to', '')
     self.From = self.msg.get('From')
     self.FromRealName = parseaddr(self.From)[0]
     self.FromEmail = parseaddr(self.From)[1]
     self.FromUserName = (self.FromRealName
                          or re.sub(r'@.*$', r'', self.FromEmail))
     self.sender = self.msg.get('Sender')
     self.senderEmail = (self.sender and parseaddr(self.sender)[1]) or None
     tos = self.msg.get_all('to', [])
     ccs = self.msg.get_all('cc', [])
     resent_tos = self.msg.get_all('resent-to', [])
     resent_ccs = self.msg.get_all('resent-cc', [])
     self.recipients = getaddresses(tos + ccs + resent_tos + resent_ccs)
     # mailing list support
     # XXX x-beenthere is mailman-specific - need to support ezmlm & others here
     #self.xbeenthere = (self.msg.get('X-BeenThere') or
     #                   re.search(r'[^\s<]+@[^\s>]+',self.msg.get('Delivered-To')).group())
     # ..Type Error - configured ezmlm to provide beenthere instead (?)
     self.xbeenthere = self.msg.get('X-BeenThere')
     # the mailin body will be the message's first text/plain part
     # (or a null string if there is none or it's misencoded)
     try:
         firstplaintextpart = typed_subpart_iterator(
             self.msg, 'text', 'plain').next()
         # as I understand it:
         # first decoding, from the content-transfer-encoding, eg quoted-printabe
         payload = firstplaintextpart.get_payload(decode=1)
         # second decoding, from utf8 or whatever to unicode
         charset = self.msg.get_content_charset('ascii')
         payloadutf8 = payload.decode(charset).encode('utf-8')
     except (StopIteration, UnicodeDecodeError):
         payloadutf8 = ''
     self.body = cleanupBody(payloadutf8)
예제 #55
0
 def getCC(self):
     res = []
     buf = getaddresses(self._msg.get_all('cc', ''))
     for i in buf:
         header = decode_header(i[0])
         data = ''.join([
             to_unicode(s, enc) for s, enc in header
             if self.codecs_lookup(enc)
         ])
         res.append((to_entities_quote(data), i[1]))
     return res
예제 #56
0
파일: msg.py 프로젝트: davideuler/dipi
def decode(x):
    b = decode_header(x)
    #print b
    u = u''

    for c in b:
        if c[1]:
            #print c[0].decode(c[1])
            u = u + c[0].decode(c[1])
        else:
            #print c[0]
            u = u + c[0]
    return u
예제 #57
0
def get_header(header_text, default="ascii"):
    "Decode and return the header"
    if not header_text:
        return header_text

    sections = decode_header(header_text)
    parts = []
    for section, encoding in sections:
        try:
            parts.append(section.decode(encoding or default, 'replace'))
        except LookupError:
            parts.append(section.decode(default, 'replace'))
    return u' '.join(parts)
예제 #58
0
 def decode_charset(self, field):
     # TK: This function was rewritten for unifying to Unicode.
     # Convert 'field' into Unicode one line string.
     try:
         pairs = decode_header(field)
         ustr = make_header(pairs).__unicode__()
     except (LookupError, UnicodeError, ValueError, HeaderParseError):
         # assume list's language
         cset = Utils.GetCharSet(self._mlist.preferred_language)
         if cset == 'us-ascii':
             cset = 'iso-8859-1'  # assume this for English list
         ustr = unicode(field, cset, 'replace')
     return u''.join(ustr.splitlines())