def description(self): r = [ '"%s"' % rmsp(unicode(pred)) for pred in self.get_preds() if unicode(pred) ] r.extend('"%s"' % rmsp(unicode(pred)) for pred in self.doc_preds if unicode(pred)) return ' '.join(r) or 'all'
def genlines(msg): charset = msg.get_content_charset(config.MESSAGE_CHARSET) # Show part(s). for (i, mpart, level) in enum_message_parts(msg, favor='text/plain'): if i != None and level: yield term.color(config.COLOR4MIMETREE, '--- ' + get_mime_info(i, mpart)) # Show the child headers. if headerlevel: for (h, v) in get_headers(mpart): s = '%s: %s' % (h, rmsp(v)) color = config.HEADER_COLOR.get(h.lower(), '') for line in fold_text(term, s, indent2=' '): yield highlight(term, selection, color, line.rstrip()) yield '' # Show the payload. if mpart.get_content_maintype() == 'text': r = u'' text = get_body_text(mpart, charset) if MAX_PP_SIZE < len(text): for line in text.splitlines(): yield term.normal(line) else: for line in fold_text(term, text): r += line + '\n' for line in highlight(term, selection, '', r).splitlines(): yield line yield '' return
def genlines(msg): charset = msg.get_content_charset(config.MESSAGE_CHARSET) # Show part(s). for (i, mpart, level) in enum_message_parts(msg, favor="text/plain"): if i != None and level: yield term.color(config.COLOR4MIMETREE, "--- " + get_mime_info(i, mpart)) # Show the child headers. if headerlevel: for (h, v) in get_headers(mpart): s = "%s: %s" % (h, rmsp(v)) color = config.HEADER_COLOR.get(h.lower(), "") for line in fold_text(term, s, indent2=" "): yield highlight(term, selection, color, line.rstrip()) yield "" # Show the payload. if mpart.get_content_maintype() == "text": r = u"" text = get_body_text(mpart, charset) if MAX_PP_SIZE < len(text): for line in text.splitlines(): yield term.normal(line) else: for line in fold_text(term, text): r += line + "\n" for line in highlight(term, selection, "", r).splitlines(): yield line yield "" return
def create_message(corpus, data, msg0=None): from email import Charset, Parser, MIMEMessage if not isinstance(data, unicode): raise TypeError("data must be a unicode.") p = Parser.FeedParser() p.feed(data) msg1 = p.close() csmap = Charset.Charset(config.MESSAGE_CHARSET) attach_msgs = [] # Re-encode the headers. headers = [] labels = [] for (k, v) in msg1.items(): v = rmsp(v) if not v: continue kl = k.lower() if kl == "x-forward-msg": attach_msgs.extend(get_numbers(v)) continue if kl == "label": labels = v.split(",") continue headers.append((k.encode("ascii", "strict"), encode_header(v, csmap))) # Remove all the existing headers. for k in msg1.keys(): del msg1[k] # Reattach the headers. for (k, v) in headers: msg1[k] = v # Change the body. data = msg1.get_payload(decode=False) try: # First try to encode with us-ascii. data.encode("ascii", "strict") # Succeed. msg1.set_charset("ascii") except UnicodeError: # Re-encode the body. if not csmap.output_charset: csmap = Charset.Charset("utf-8") msg1.set_charset(str(csmap.output_charset)) data = data.encode(str(csmap.output_codec), "replace") msg1.set_payload(data) # Attach other messages (for forwarding). if attach_msgs: for loc in attach_msgs: p = Parser.FeedParser() p.feed(corpus.get_message(loc)) msg1 = mime_add(msg1, MIMEMessage.MIMEMessage(p.close())) # Integrate other mime objects. if msg0 and msg0.is_multipart(): for obj in msg0.get_payload()[1:]: msg1 = mime_add(msg1, obj) validate_message_structure(msg1) return (msg1, labels)
def create_message(corpus, data, msg0=None): from email import Charset, Parser, MIMEMessage if not isinstance(data, unicode): raise TypeError('data must be a unicode.') p = Parser.FeedParser() p.feed(data) msg1 = p.close() csmap = Charset.Charset(config.MESSAGE_CHARSET) attach_msgs = [] # Re-encode the headers. headers = [] labels = [] for (k, v) in msg1.items(): v = rmsp(v) if not v: continue kl = k.lower() if kl == 'x-forward-msg': attach_msgs.extend(get_numbers(v)) continue if kl == 'label': labels = v.split(',') continue headers.append((k.encode('ascii', 'strict'), encode_header(v, csmap))) # Remove all the existing headers. for k in msg1.keys(): del msg1[k] # Reattach the headers. for (k, v) in headers: msg1[k] = v # Change the body. data = msg1.get_payload(decode=False) try: # First try to encode with us-ascii. data.encode('ascii', 'strict') # Succeed. msg1.set_charset('ascii') except UnicodeError: # Re-encode the body. if not csmap.output_charset: csmap = Charset.Charset('utf-8') msg1.set_charset(str(csmap.output_charset)) data = data.encode(str(csmap.output_codec), 'replace') msg1.set_payload(data) # Attach other messages (for forwarding). if attach_msgs: for loc in attach_msgs: p = Parser.FeedParser() p.feed(corpus.get_message(loc)) msg1 = mime_add(msg1, MIMEMessage.MIMEMessage(p.close())) # Integrate other mime objects. if msg0 and msg0.is_multipart(): for obj in msg0.get_payload()[1:]: msg1 = mime_add(msg1, obj) validate_message_structure(msg1) return (msg1, labels)
def main(argv): import getopt def usage(): print('usage: %s [-b basedir] cmd [arg ...]' % argv[0]) return 100 try: (opts, args) = getopt.getopt(argv[1:], 'db:') except getopt.GetoptError: return usage() debug = 0 basedir = 'msg' for (k, v) in opts: if k == '-d': debug += 1 elif k == '-b': basedir = v if not args: return usage() cmd = args.pop(0) msgdb = MessageDB(basedir) if cmd == 'create': msgdb.create() elif cmd == 'import': msgdb.open() for path in args: tar = TarFile(path) while True: info = tar.next() if info is None: break fp = tar.fileobj fp.seek(info.offset+BLOCKSIZE) data = fp.read(info.size) recno = msgdb.add_file(gzip2bytes(data)) print(recno) msgdb.flush() msgdb.close() elif cmd == 'add': msgdb.open() for path in args: with open(path, 'r') as fp: data = fp.read() recno = msgdb.add_file(data) print(recno) msgdb.close() elif cmd == 'search': msgdb.open() for data in msgdb.search_text(args): print(rmsp(data)[:80]) msgdb.close() else: return usage() return 0
def show_headers(term, doc, headers): msg = doc.get_msg() for (h, showname) in headers: if h.lower() == "label": values = [doc.get_labels()] else: values = unicode_getall(msg, h) for v in values: v = rmsp(v) if showname: s = "%s: %s" % (h, v) else: s = v term.display(term.normal(s + "\n")) return
def show_headers(term, doc, headers): msg = doc.get_msg() for (h, showname) in headers: if h.lower() == 'label': values = [doc.get_labels()] else: values = unicode_getall(msg, h) for v in values: v = rmsp(v) if showname: s = '%s: %s' % (h, v) else: s = v term.display(term.normal(s + '\n')) return
def index_doc(self, doc, maxsents=100000): if self.maker == None: self.create_new_idx() docid = len(self.docinfo)+1 self.docinfo.append((docid, doc)) if 2 <= self.verbose: print >>sys.stderr, 'Reading: %r' % doc elif 1 <= self.verbose: sys.stderr.write('.'); sys.stderr.flush() terms = self.terms # other features add_features(terms, docid, 0, ( PROP_LABEL+x for x in self.corpus.loc_labels(doc.loc) )) add_features(terms, docid, 0, doc.get_feats()) # sents sentid = 0 title = doc.get_title() if title and sentid < maxsents: title = zen2han(rmsp(title)) self.maker.add(pack('>cii', PROP_SENT, docid, sentid), title.encode('utf-8')) add_features(terms, docid, sentid, set(doc.splitterms(title))) sentid += 1 for sent in doc.get_sents(): sent = zen2han(rmsp(sent)) if not sent: continue self.maker.add(pack('>cii', PROP_SENT, docid, sentid), sent.encode('utf-8')) add_features(terms, docid, sentid, set(doc.splitterms(sent))) sentid += 1 if maxsents <= sentid: break if ((self.max_docs_threshold and self.max_docs_threshold <= len(self.docinfo)) or (self.max_terms_threshold and self.max_terms_threshold <= len(terms))): self.flush() for subdoc in doc.get_subdocs(): if subdoc: self.index_doc(subdoc, maxsents=maxsents) return True
def get_editable_string(msg, labels): mpart = msg if msg.is_multipart(): mpart = get_message_part(msg, 1) if mpart.get_content_maintype() != "text": raise MessageStructureError("The first part is not text??.") # Construct text. text = u"" for h in config.EDITABLE_HEADERS: for v in unicode_getall(msg, h): text += u"%s: %s\n" % (h, rmsp(v)) if labels: text += u"Label: %s\n" % get_label_names(labels) text += u"\n" + get_body_text(mpart, msg.get_content_charset()) return text
def get_editable_string(msg, labels): mpart = msg if msg.is_multipart(): mpart = get_message_part(msg, 1) if mpart.get_content_maintype() != 'text': raise MessageStructureError('The first part is not text??.') # Construct text. text = u'' for h in config.EDITABLE_HEADERS: for v in unicode_getall(msg, h): text += u'%s: %s\n' % (h, rmsp(v)) if labels: text += u'Label: %s\n' % get_label_names(labels) text += u'\n' + get_body_text(mpart, msg.get_content_charset()) return text
def show_mime_part(term, msg, part, headerlevel=0, charset=None): mpart = get_message_part(msg, part) # Binary - might get InterfaceError. if mpart.get_content_type() != "text/plain": content = mpart.get_payload(decode=True) term.show_binary(content, mpart.get_content_type()) return # Headers are normally not displayed. if 2 <= headerlevel: for (h, v) in mpart.items(): term.display(term.normal("%s: %s\n" % (h, rmsp(v)))) term.display("\n") charset = msg.get_content_charset(charset or config.MESSAGE_CHARSET) term.display(term.normal(get_body_text(mpart, charset))) return
def show_mime_part(term, msg, part, headerlevel=0, charset=None): mpart = get_message_part(msg, part) # Binary - might get InterfaceError. if mpart.get_content_type() != 'text/plain': content = mpart.get_payload(decode=True) term.show_binary(content, mpart.get_content_type()) return # Headers are normally not displayed. if 2 <= headerlevel: for (h, v) in mpart.items(): term.display(term.normal('%s: %s\n' % (h, rmsp(v)))) term.display('\n') charset = msg.get_content_charset(charset or config.MESSAGE_CHARSET) term.display(term.normal(get_body_text(mpart, charset))) return
def send_message(msg, fromaddr, rcpts): import time, smtplib def getlogin(): import os if hasattr(os, "getlogin"): return os.getlogin() try: # WARNING: untested code! import _winreg key = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER, "SOFTWARE\\Microsoft\\Windows\\CurrentVersion\\Explorer") (name, _) = _winreg.QueryValueEx(key, "Logon User Name") key.Close() return name except ImportError: return "???" # Assign a message-id if there isn't one. if not msg["message-id"]: msg["Message-ID"] = make_msgid(getlogin()) # Assign the date. msg["Date"] = formatdate(time.time(), localtime=True) # Now remove BCC and other redundant (empty) headers. del msg["bcc"] for (k, v) in msg.items(): if not rmsp(v): del msg[k] data = msg_repr(msg) # Send the message. smtp = smtplib.SMTP() (host, port, user, password, tls) = config.SMTP_HOST try: smtp.connect(host, port) if tls: smtp.ehlo() smtp.starttls() smtp.ehlo() if user and password: smtp.login(user, password) smtp.sendmail(fromaddr, rcpts, data) except smtplib.SMTPException, e: raise MessageTransportError(str(e))
def send_message(msg, fromaddr, rcpts): import time, smtplib def getlogin(): import os if hasattr(os, 'getlogin'): return os.getlogin() try: # WARNING: untested code! import _winreg key = _winreg.OpenKey( _winreg.HKEY_CURRENT_USER, 'SOFTWARE\\Microsoft\\Windows\\CurrentVersion\\Explorer') (name, _) = _winreg.QueryValueEx(key, 'Logon User Name') key.Close() return name except ImportError: return '???' # Assign a message-id if there isn't one. if not msg['message-id']: msg['Message-ID'] = make_msgid(getlogin()) # Assign the date. msg['Date'] = formatdate(time.time(), localtime=True) # Now remove BCC and other redundant (empty) headers. del msg['bcc'] for (k, v) in msg.items(): if not rmsp(v): del msg[k] data = msg_repr(msg) # Send the message. smtp = smtplib.SMTP() (host, port, user, password, tls) = config.SMTP_HOST try: smtp.connect(host, port) if tls: smtp.ehlo() smtp.starttls() smtp.ehlo() if user and password: smtp.login(user, password) smtp.sendmail(fromaddr, rcpts, data) except smtplib.SMTPException, e: raise MessageTransportError(str(e))
def description(self): r = [ '"%s"' % rmsp(unicode(pred)) for pred in self.get_preds() if unicode(pred) ] r.extend( '"%s"' % rmsp(unicode(pred)) for pred in self.doc_preds if unicode(pred) ) return ' '.join(r) or 'all'