def coerce_string(value, otherwise = "", separator = None, strip = False, at_key = '@'): import bm_text if isinstance(value, dict): value = value.get(at_key) while True: if isinstance(value, AsIs): value = value.value continue if is_list_like(value): value = list(value) if is_list(value): if not value: return otherwise if separator != None: value = separator.join(map(lambda v: coerce_string(v, strip = strip, at_key = at_key), value)) if strip: value = value.strip() return value value = value[0] continue elif is_dict(value): if value.has_key(at_key): value = value.get(at_key) continue else: return otherwise break if value == None: return otherwise value = bm_text.tounicode(value) if strip: value = value.strip() return value
def _ExtractHTML(self, msg_obj, msg_original): html = "".join(email.Iterators.body_line_iterator(msg_obj, True)) html = bm_text.tounicode(html, [ msg_obj.get_charset(), self._ExtractCharsetFromHTML(html), ]) html = self.ScrubHTML(html) # # Per-mailer post processing # mailer = msg_original.get('X-Mailer') or "" mid = msg_original.get('Message-ID') or "" ua = msg_original.get('User-Agent') or "" if mailer.find('Outlook Express') > -1: html = self.ScrubHTMLOutlookExpress(html) elif mailer.find('Outlook Express') > -1: html = self.ScrubHTMLOutlookExpress(html) elif mid.find('mail.gmail.com') > -1: html = self.ScrubHTMLGMail(html) elif ua.find('Mozilla Thunderbord') > -1: html = self.ScrubHTMLThunderbird(html) elif ua.find('Microsoft-Entourage') > -1: html = self.ScrubHTMLEntourage(html) return html