def decode_headers(msg): "Decode message headers according to global options" for header_list in g.remove_headers: header_list = header_list.split(',') if header_list[0] == '*': # Remove all headers except listed header_list = _get_exceptions(header_list) for header in msg.keys(): if header.lower() not in header_list: del msg[header] else: # Remove listed headers for header in header_list: del msg[header] for header_list, param_list in g.remove_headers_params: header_list = header_list.split(',') param_list = param_list.split(',') # Remove all params except listed. remove_all_params = param_list[0] == '*' if remove_all_params: param_list = _get_exceptions(param_list) if header_list[0] == '*': # Remove for all headers except listed header_list = _get_exceptions(header_list) for header in msg.keys(): if header.lower() not in header_list: _remove_headers_params(msg, header, remove_all_params, param_list) else: # Decode for listed headers for header in header_list: _remove_headers_params(msg, header, remove_all_params, param_list) for header_list in g.decode_headers: header_list = header_list.split(',') if header_list[0] == '*': # Decode all headers except listed header_list = _get_exceptions(header_list) for header in msg.keys(): if header.lower() not in header_list: decode_header(msg, header) else: # Decode listed headers for header in header_list: decode_header(msg, header) for header_list, param_list in g.decode_header_params: header_list = header_list.split(',') param_list = param_list.split(',') # Decode all params except listed. decode_all_params = param_list[0] == '*' if decode_all_params: param_list = _get_exceptions(param_list) if header_list[0] == '*': # Decode for all headers except listed header_list = _get_exceptions(header_list) for header in msg.keys(): if header.lower() not in header_list: _decode_headers_params(msg, header, decode_all_params, param_list) else: # Decode for listed headers for header in header_list: _decode_headers_params(msg, header, decode_all_params, param_list)
def set_type(self, type, header='Content-Type', requote=True): """Set the main type and subtype for the Content-Type header. type must be a string in the form "maintype/subtype", otherwise a ValueError is raised. This method replaces the Content-Type header, keeping all the parameters in place. If requote is False, this leaves the existing header's quoting as is. Otherwise, the parameters will be quoted (the default). An alternative header can be specified in the header argument. When the Content-Type header is set, we'll always also add a MIME-Version header. """ # BAW: should we be strict? if not type.count('/') == 1: raise ValueError # Set the Content-Type, you get a MIME-Version if header.lower() == 'content-type': del self['mime-version'] self['MIME-Version'] = '1.0' if header not in self: self[header] = type return params = self.get_params(header=header, unquote=requote) del self[header] self[header] = type # Skip the first param; it's the old type. for p, v in params[1:]: self.set_param(p, v, header, requote)
def check_unresolved_template(self, msg, target=None): message = msg.raw_msg headers = message.split("\n") for header in headers: if re.search(r"%[A-Z][A-Z_-]", header) and not \ re.search(r"^(?:x-vms-to|x-uidl|x-face|to|cc|from|subject|" r"references|in-reply-to|(?:x-|resent-|" r"x-original-)?message-id):", header.lower()): return True return False
def check_unresolved_template(self, msg, target=None): message = msg.raw_msg headers = message.split("\n") for header in headers: if Regex(r"%[A-Z][A-Z_-]").search(header) and not \ Regex(r"^(?:x-vms-to|x-uidl|x-face|to|cc|from|subject|" r"references|in-reply-to|(?:x-|resent-|" r"x-original-)?message-id):").search(header.lower()): return True return False
def format_headers(buffer, tree): '''Queue the headers that we're interested in from this tree.''' if is_image(tree): return for header, value in tree.items(): if header.lower() in interesting_headers: substring_pairs = email.header.decode_header(value) buffer.append("%s: %s\n" % (header, sanitize_substrings(substring_pairs))) buffer.append('\n')
def check_illegal_chars(self, msg, header, ratio, count, target=None): """look for 8-bit and other illegal characters that should be MIME encoded, these might want to exempt languages that do not use Latin-based alphabets, but only if the user wants it that way """ try: ratio = float(ratio) except ValueError: self.ctxt.log.warn("HeaderEval::Plugin check_illegal_chars " "invalid option: %s", ratio) return False try: count = int(count) except ValueError: self.ctxt.log.warn("HeaderEval::Plugin check_illegal_chars " "invalid option: %s", count) return False if header == 'ALL': raw_headers = msg.raw_headers key_headers = [] for keys in raw_headers.keys(): key_headers.append(keys) for key in key_headers: if key.lower() in ("subject", "from"): try: del raw_headers[key] except KeyError: pass else: raw_headers = {header: msg.get_raw_header(header)} # count illegal substrings (RFC 2045) # (non-ASCII + C0 controls except TAB, NL, CR) raw_str = ''.join([''.join(value) for value in raw_headers.values()]) try: raw_str = raw_str.decode("utf-8") except AttributeError: # in Python 3 all string is unicode object pass clean_hdr = ''.join([i if ord(i) < 128 else '' for i in raw_str]) illegal = len(raw_str) - len(clean_hdr) if illegal > 0 and header.lower() == "subject": exempt = 0 # only exempt a single cent sign, pound sign, or registered sign for except_chr in (u'\xa2', u'\xa3', u'\xae'): if except_chr in raw_str: exempt += 1 if exempt == 1: illegal -= exempt if raw_str: return (illegal / len(raw_str)) >= ratio and illegal >= count else: return False
def set_param(self, param, value, header='Content-Type', requote=True, charset=None, language=''): """Set a parameter in the Content-Type header. If the parameter already exists in the header, its value will be replaced with the new value. If header is Content-Type and has not yet been defined for this message, it will be set to "text/plain" and the new parameter and value will be appended as per RFC 2045. An alternate header can specified in the header argument, and all parameters will be quoted as necessary unless requote is False. If charset is specified, the parameter will be encoded according to RFC 2231. Optional language specifies the RFC 2231 language, defaulting to the empty string. Both charset and language should be strings. """ if not isinstance(value, tuple) and charset: value = (charset, language, value) if header not in self and header.lower() == 'content-type': ctype = 'text/plain' else: ctype = self.get(header) if not self.get_param(param, header=header): if not ctype: ctype = _formatparam(param, value, requote) else: ctype = SEMISPACE.join( [ctype, _formatparam(param, value, requote)]) else: ctype = '' for old_param, old_value in self.get_params(header=header, unquote=requote): append_param = '' if old_param.lower() == param.lower(): append_param = _formatparam(param, value, requote) else: append_param = _formatparam(old_param, old_value, requote) if not ctype: ctype = append_param else: ctype = SEMISPACE.join([ctype, append_param]) if ctype != self.get(header): del self[header] self[header] = ctype
def check_illegal_chars(self, msg, header, ratio, count, target=None): """look for 8-bit and other illegal characters that should be MIME encoded, these might want to exempt languages that do not use Latin-based alphabets, but only if the user wants it that way """ try: ratio = float(ratio) except ValueError: self.ctxt.log.warn("HeaderEval::Plugin check_illegal_chars " "invalid option: %s", ratio) return False try: count = int(count) except ValueError: self.ctxt.log.warn("HeaderEval::Plugin check_illegal_chars " "invalid option: %s", count) return False if header == 'ALL': raw_headers = msg.raw_headers key_headers = [] for keys in raw_headers.keys(): key_headers.append(keys) for key in key_headers: if key.lower() in ("subject", "from"): try: del raw_headers[key] except KeyError: pass else: raw_headers = {header: msg.get_raw_header(header)} # count illegal substrings (RFC 2045) # (non-ASCII + C0 controls except TAB, NL, CR) raw_str = ''.join([''.join(value) for value in raw_headers.values()]) clean_hdr = ''.join([i if ord(i) < 128 else '' for i in raw_str]) illegal = len(raw_str) - len(clean_hdr) if illegal > 0 and header.lower() == "subject": exempt = 0 # only exempt a single cent sign, pound sign, or registered sign for except_chr in (u'\xa2', u'\xa3', u'\xae'): if except_chr in raw_str: exempt += 1 if exempt == 1: illegal -= exempt if raw_str: return (illegal / len(raw_str)) >= ratio and illegal >= count else: return False
def __parse_native_mail(self): """ Parses a native (email.message.Message()) object """ self._headers = CaseInsensitiveDict() self._body = '' if not self.mail_native.is_multipart(): # TODO handle multipart mails charset = self.mail_native.get_content_charset() if python_version[1] == 2 or charset is None: self.set_body( self.mail_native.get_payload()) # pragma: no cover else: self.set_body( self.mail_native.get_payload(decode=True).decode(charset)) for field_name in self.mail_native.keys(): if field_name in self._headers.keys(): continue field_value = self.mail_native.get_all(field_name) # Change parsing behaviour for headers that could contain encoded strings if field_name in ['Subject', 'From', 'To', 'Cc', 'Bcc']: field_value = str( email.header.make_header( email.header.decode_header( self.mail_native.get(field_name)))) #if isinstance(field_value, list): # field_value_list = field_value # field_value = '' # for val in field_value_list: # if val[1]: # field_value += self.clean_value(val[0], val[1]) # elif isinstance(val[0], bytes): # field_value += self.clean_value(val[0]) # else: # field_value += val[0] #else: # field_value = self.clean_value(field_value[0][0], field_value[0][1]) self._headers[field_name] = field_value elif len(field_value) > 1: self._headers[field_name] = field_value else: self._headers[field_name] = field_value[0] if 'message-id' not in [ header.lower() for header in self.mail_native.keys() ]: self.reset_message_id(target='native')
def get_native(self): """ Returns a native (email.message.Message()) object """ if not self.mail_native: self.mail_native = email.message.Message() email.charset.add_charset(self.charset, email.charset.QP, email.charset.QP) c = email.charset.Charset(self.charset) self.mail_native.set_charset(c) if 'message-id' not in [header.lower() for header in self.get_headers()]: self.reset_message_id() for field_name, field_value in self.get_headers().items(): self.mail_native.add_header(field_name, field_value) self.mail_native.set_payload(self._body, charset=self.charset) return self.mail_native
def __parse_native_mail(self): """ Parses a native (email.message.Message()) object """ self._headers = CaseInsensitiveDict() self._body = '' if not self.mail_native.is_multipart(): # TODO handle multipart mails charset = self.mail_native.get_content_charset() if python_version[1] == 2 or charset is None: self.set_body(self.mail_native.get_payload()) # pragma: no cover else: self.set_body(self.mail_native.get_payload(decode=True).decode(charset)) for field_name in self.mail_native.keys(): if field_name in self._headers.keys(): continue field_value = self.mail_native.get_all(field_name) # Change parsing behaviour for headers that could contain encoded strings if field_name in ['Subject', 'From', 'To', 'Cc', 'Bcc']: field_value = str(email.header.make_header(email.header.decode_header(self.mail_native.get(field_name)))) #if isinstance(field_value, list): # field_value_list = field_value # field_value = '' # for val in field_value_list: # if val[1]: # field_value += self.clean_value(val[0], val[1]) # elif isinstance(val[0], bytes): # field_value += self.clean_value(val[0]) # else: # field_value += val[0] #else: # field_value = self.clean_value(field_value[0][0], field_value[0][1]) self._headers[field_name] = field_value elif len(field_value) > 1: self._headers[field_name] = field_value else: self._headers[field_name] = field_value[0] if 'message-id' not in [header.lower() for header in self.mail_native.keys()]: self.reset_message_id(target='native')
def get_native(self): """ Returns a native (email.message.Message()) object """ if not self.mail_native: self.mail_native = email.message.Message() email.charset.add_charset(self.charset, email.charset.QP, email.charset.QP) c = email.charset.Charset(self.charset) self.mail_native.set_charset(c) if 'message-id' not in [ header.lower() for header in self.get_headers() ]: self.reset_message_id() for field_name, field_value in self.get_headers().items(): self.mail_native.add_header(field_name, field_value) self.mail_native.set_payload(self._body, charset=self.charset) return self.mail_native
def _decode_gpg(self, message, decrypted): header, body = message.replace('\r\n', '\n').split('\n\n', 1) for line in header.lower().split('\n'): if line.startswith('charset:'): return decrypted.decode(line.split()[1]) return decrypted.decode('utf-8')
def get_header(msg, header): h = header.lower() if msg.has_key(h): return decode_header(msg[h]) return None
def _decode_gpg(self, message, decrypted): header, body = message.replace("\r\n", "\n").split("\n\n", 1) for line in header.lower().split("\n"): if line.startswith("charset:"): return decrypted.decode(line.split()[1]) return decrypted.decode("utf-8")