def resolve_address(self, addr): r = {} pat = re.compile(re.escape(addr), re.I | re.UNICODE) preds = [EMailPredicate('addr:' + addr)] corpus = self.get_corpus() try: selection = Selection(corpus, preds, doc_preds=[DEFAULT_FILTER_WITH_SENT], safe=False) for (i, doc) in selection.iter(timeout=1): msg = doc.get_msg() for (n, a) in unicode_getalladdrs(msg, 'from', 'to', 'cc'): x = formataddr((n, a)) if not pat.search(x): continue a = a.lower() if a not in r: r[a] = (1, x) else: (n, x) = r[a] r[a] = (n + 1, x) if config.RESOLVE_ADDRESS_NADDRS <= (i + 1): break except SearchTimeout: pass # ambiguous? return sorted(r.itervalues(), reverse=True)
def __init__(self, corpus, term_preds, doc_preds=None, disjunctive=False, window_size=0): # safe=False : we don't want to skip "future" messages. WindowMixin.__init__(self, window_size) Selection.__init__(self, corpus, term_preds, doc_preds, safe=False, disjunctive=disjunctive) return
def resolve_address(self, addr): r = {} pat = re.compile(re.escape(addr), re.I | re.UNICODE) preds = [ EMailPredicate('addr:'+addr) ] corpus = self.get_corpus() try: selection = Selection(corpus, preds, doc_preds=[ DEFAULT_FILTER_WITH_SENT ], safe=False) for (i,doc) in selection.iter(timeout=1): msg = doc.get_msg() for (n, a) in unicode_getalladdrs(msg, 'from', 'to', 'cc'): x = formataddr((n, a)) if not pat.search(x): continue a = a.lower() if a not in r: r[a] = (1, x) else: (n, x) = r[a] r[a] = (n+1, x) if config.RESOLVE_ADDRESS_NADDRS <= (i+1): break except SearchTimeout: pass # ambiguous? return sorted(r.itervalues(), reverse=True)
def search(argv): import getopt, locale, time def usage(): print ('usage: %s [-d] [-T timeout] [-s|-Y] [-D] [-a] ' '[-c savefile] [-b basedir] [-p prefix] [-t doctype] ' '[-e encoding] [-n results] idxdir [keyword ...]') % argv[0] sys.exit(2) try: (opts, args) = getopt.getopt(argv[1:], 'dT:sYDac:b:p:t:e:n:') except getopt.GetoptError: usage() debug = 0 timeout = 0 stat = False disjunctive = False savefile = '' basedir = '' prefix = '' doctype = document.PlainTextDocument predtype = KeywordPredicate encoding = locale.getpreferredencoding() n = 10 for (k, v) in opts: if k == '-d': debug += 1 elif k == '-T': timeout = int(v) elif k == '-D': disjunctive = True elif k == '-a': stat = True elif k == '-Y': predtype = YomiKeywordPredicate elif k == '-s': predtype = StrictKeywordPredicate elif k == '-c': savefile = v elif k == '-b': basedir = v elif k == '-p': prefix = v elif k == '-t': doctype = document.get_doctype(v) elif k == '-e': encoding = v elif k == '-n': n = int(v) if doctype == document.EMailDocument: predtype = EMailPredicate t0 = time.time() if args: idxdir = args[0] keywords = args[1:] indexdb = IndexDB(idxdir, prefix) indexdb.open() preds = [ predtype(unicode(kw, encoding)) for kw in keywords ] selection = Selection(indexdb, preds, disjunctive=disjunctive) selection.set_timeout(timeout) try: show_results(selection, n, encoding) except SearchTimeout: print 'SearchTimeout.' elif savefile: selection = load_selection(savefile) selection.set_timeout(timeout) try: show_results(selection, n, encoding) except SearchTimeout: print 'SearchTimeout.' else: usage() if savefile: save_selection(savefile, selection) if stat: print '%.2f sec, %d/%d hit' % (time.time()-t0, len(selection.found_docs), selection.narrowed) return