def main(argv):
    from minds import proxy
    proxy.init('')
    print

    if len(argv) <= 1:
        print __doc__
        sys.exit(-1)

    option = argv[1]

    if option == '-q':
        print 'getQueueStatus numIndexed %s numQueued %s' % getQueueStatus()

    elif option == '-t':
        logdir  = cfg.getPath('logs')
        qlogs = _getQueuedLogs(logdir)
        transformed, discarded = TransformProcess().run(logdir, qlogs)
        print transformed, discarded

    elif option == '-i':
        logdir  = cfg.getPath('logs')
        qtxts = _getQueuedText(logdir)
        indexed, discarded = IndexProcess().run(logdir, qtxts)
        print indexed, discarded

    elif option == '-b':
        messagelog.mlog.lastRequest = datetime.datetime(1990,1,1)   # enable _shouldTransform
        result = backgroundIndexTask()
        print result
def getQueueStatus():
    """ Return the number of docs indexed and number of docs queued. """

    from minds import lucene_logic

    global totalIndexed
    if totalIndexed < 0:
        dbindex = cfg.getPath('archiveindex')
        reader = lucene_logic.Reader(dbindex)
        totalIndexed = reader.reader.	numDocs()
        reader.close()
    logdir = cfg.getPath('logs')
    numQueued = len(_getQueuedText(logdir)) + len(_getQueuedLogs(logdir))
    return totalIndexed, numQueued
    def _open(self):

        from minds import lucene_logic

        dbindex = cfg.getPath('archiveindex')
        self.writer = lucene_logic.Writer(dbindex)
        self.searcher = lucene_logic.Searcher(pathname=dbindex)
 def _save(self, filename):
     pathname = os.path.join(cfg.getPath('logs'), filename)
     tmppathname = pathname+'.tmp'
     fp = file(tmppathname,'wb')
     fp.write(self.buf.getvalue())
     fp.close()
     # write to a tmp file first and then rename to make it more atomic.
     os.rename(tmppathname, pathname)
def parseId(id):
    """ Return arc_path, filename represents by id.
        e.g. id=123456789 -> $archive/123456.zip/789

        Raises KeyError if malformed
    """
    if not id.isdigit() or len(id) != 9:
        raise KeyError, 'Invalid id: %s' % str(id)

    apath = cfg.getPath('archive')
    return os.path.join(apath, id[:6]+'.zip'), id[6:]
    def _findIdRange(self):
        """ Scan the $archive directory for zip files for the begin and end id. """

        apath = cfg.getPath('archive')
        files = filter(self.arc_pattern.match, os.listdir(apath))
        if not files:
            self._beginId = 0
            self._endId = 0
            return

        first_arc = min(files)
        last_arc  = max(files)

        first = self._findId(os.path.join(apath, first_arc), min)
        last  = self._findId(os.path.join(apath, last_arc ), max)

        self._beginId = int(first_arc[:6] + first)   # would be a 9 digit id
        self._endId   = int(last_arc[:6]  + last )+1 # would be a 9 digit id
def backgroundIndexTask(forceIndex=False):
    """ This is the main task of qmsg_processor. The tasks has two phrases.

    I. Transform phrase

        Parse *.qlog
        Filtered out unwanted docs
        Transform into *.qtxt
        Add into archive

        Suspense this process when user access proxy.


    II. Index phrase

        Add *.qtxt into index
        Optimize

        During optimize, block out searching.
        (12/03/04 note: Due to GIL and PyLucene implementation, it
        will actually block out every thing, including proxy.)

        Returns transformed, index, discarded
    """

    interval= cfg.getint('indexing','interval',3)
    logdir  = cfg.getPath('logs')
    now = datetime.datetime.now()

    transformed = 0
    discarded_t = 0
    indexed = 0
    discarded_i = 0

    qlogs = _getQueuedLogs(logdir)
    if forceIndex or _shouldTransform(now, interval):
        transformed, discarded_t = TransformProcess().run(logdir, qlogs)

    qtxts = _getQueuedText(logdir)
    if forceIndex or \
        (_shouldTransform(now, interval) and _shouldIndex(now, logdir, qtxts)): # first check is if there is new activity
        indexed, discarded_i = IndexProcess().run(logdir, qtxts)

    return transformed, indexed, discarded_t + discarded_i
def main(argv):

    if len(argv) < 2:
        print __doc__
        sys.exit(-1)

    from minds import proxy
    proxy.init(proxy.CONFIG_FILENAME)

    index_path = argv[1]
    shutil.rmtree(index_path, True)

    starttime = datetime.datetime.now()
    dbdoc = cfg.getPath('archive')
    idc = docarchive.idCounter
    idc._findIdRange()
    beginId = idc.beginId
    endId   = idc.endId
    print 'Reindex %s(#%d-%d) -> %s' % (dbdoc, beginId, endId, index_path)
    reindex(dbdoc, beginId, endId, index_path)
    print 'Reindex finished:', datetime.datetime.now() - starttime
def forwardTmpl(wfile, env, tmpl, renderMod, *args):

    # e.g. SCRIPT_NAME='/admin/snoop.py', tmpl='tmpl/home.html'

    scriptname = env.get("SCRIPT_NAME", "")  # '/admin/snoop.py'
    scriptpath, scriptfile = os.path.split(scriptname.lstrip("/"))  # 'admin', 'snoop'

    tmplPathname = os.path.join(cfg.getPath("docBase"), tmpl)

    # invoke tmpl's render() method
    fp = file(tmplPathname)
    ###    mod = importModuleByPath(tmplPathname)

    # reloading good for development time
    try:
        reload(renderMod)
    except:  # todo: HACK HACK reload does not work in py2exe service version. But it is OK not to reload.
        pass

    template = HTMLTemplate.Template(renderMod.render, fp.read())
    wfile.write(template.render(*args))
def openDomainFp(*args):
    """ open the domain data file """
    path = cfg.getPath('logs')
    filename = os.path.join(path,DOMAINFILE)
    return file(filename,*args)