Esempio n. 1
0
 def resolve_address(self, addr):
     r = {}
     pat = re.compile(re.escape(addr), re.I | re.UNICODE)
     preds = [EMailPredicate('addr:' + addr)]
     corpus = self.get_corpus()
     try:
         selection = Selection(corpus,
                               preds,
                               doc_preds=[DEFAULT_FILTER_WITH_SENT],
                               safe=False)
         for (i, doc) in selection.iter(timeout=1):
             msg = doc.get_msg()
             for (n, a) in unicode_getalladdrs(msg, 'from', 'to', 'cc'):
                 x = formataddr((n, a))
                 if not pat.search(x): continue
                 a = a.lower()
                 if a not in r:
                     r[a] = (1, x)
                 else:
                     (n, x) = r[a]
                     r[a] = (n + 1, x)
             if config.RESOLVE_ADDRESS_NADDRS <= (i + 1): break
     except SearchTimeout:
         pass
     # ambiguous?
     return sorted(r.itervalues(), reverse=True)
Esempio n. 2
0
 def __init__(self, corpus, term_preds, doc_preds=None,
              disjunctive=False, window_size=0):
   # safe=False : we don't want to skip "future" messages.
   WindowMixin.__init__(self, window_size)
   Selection.__init__(self, corpus, term_preds, doc_preds,
                      safe=False, disjunctive=disjunctive)
   return
Esempio n. 3
0
 def resolve_address(self, addr):
   r = {}
   pat = re.compile(re.escape(addr), re.I | re.UNICODE)
   preds = [ EMailPredicate('addr:'+addr) ]
   corpus = self.get_corpus()
   try:
     selection = Selection(corpus, preds,
                           doc_preds=[ DEFAULT_FILTER_WITH_SENT ],
                           safe=False)
     for (i,doc) in selection.iter(timeout=1):
       msg = doc.get_msg()
       for (n, a) in unicode_getalladdrs(msg, 'from', 'to', 'cc'):
         x = formataddr((n, a))
         if not pat.search(x): continue
         a = a.lower()
         if a not in r:
           r[a] = (1, x)
         else:
           (n, x) = r[a]
           r[a] = (n+1, x)
       if config.RESOLVE_ADDRESS_NADDRS <= (i+1): break
   except SearchTimeout:
     pass
   # ambiguous?
   return sorted(r.itervalues(), reverse=True)
Esempio n. 4
0
 def __init__(self,
              corpus,
              term_preds,
              doc_preds=None,
              disjunctive=False,
              window_size=0):
     # safe=False : we don't want to skip "future" messages.
     WindowMixin.__init__(self, window_size)
     Selection.__init__(self,
                        corpus,
                        term_preds,
                        doc_preds,
                        safe=False,
                        disjunctive=disjunctive)
     return
Esempio n. 5
0
def search(argv):
  import getopt, locale, time
  def usage():
    print ('usage: %s [-d] [-T timeout] [-s|-Y] [-D] [-a] '
           '[-c savefile] [-b basedir] [-p prefix] [-t doctype] '
           '[-e encoding] [-n results] idxdir [keyword ...]') % argv[0]
    sys.exit(2)
  try:
    (opts, args) = getopt.getopt(argv[1:], 'dT:sYDac:b:p:t:e:n:')
  except getopt.GetoptError:
    usage()
  debug = 0
  timeout = 0
  stat = False
  disjunctive = False
  savefile = ''
  basedir = ''
  prefix = ''
  doctype = document.PlainTextDocument
  predtype = KeywordPredicate
  encoding = locale.getpreferredencoding()
  n = 10
  for (k, v) in opts:
    if k == '-d': debug += 1
    elif k == '-T': timeout = int(v)
    elif k == '-D': disjunctive = True
    elif k == '-a': stat = True
    elif k == '-Y': predtype = YomiKeywordPredicate
    elif k == '-s': predtype = StrictKeywordPredicate
    elif k == '-c': savefile = v
    elif k == '-b': basedir = v
    elif k == '-p': prefix = v
    elif k == '-t': doctype = document.get_doctype(v)
    elif k == '-e': encoding = v
    elif k == '-n': n = int(v)

  if doctype == document.EMailDocument:
    predtype = EMailPredicate

  t0 = time.time()
  if args:
    idxdir = args[0]
    keywords = args[1:]
    indexdb = IndexDB(idxdir, prefix)
    indexdb.open()
    preds = [ predtype(unicode(kw, encoding)) for kw in keywords ]
    selection = Selection(indexdb, preds, disjunctive=disjunctive)
    selection.set_timeout(timeout)
    try:
      show_results(selection, n, encoding)
    except SearchTimeout:
      print 'SearchTimeout.'
  elif savefile:
    selection = load_selection(savefile)
    selection.set_timeout(timeout)
    try:
      show_results(selection, n, encoding)
    except SearchTimeout:
      print 'SearchTimeout.'
  else:
    usage()
  
  if savefile:
    save_selection(savefile, selection)

  if stat:
    print '%.2f sec, %d/%d hit' % (time.time()-t0, len(selection.found_docs), selection.narrowed)
  return