Ejemplo n.º 1
0
def proxyMain():
    port = cfg.getint('http.proxy_port')
    numThreads = cfg.getint('http.proxy_threads', 1)
    server_address = ('', port)
    global proxy_httpd
    proxy_httpd = httpserver.PooledHTTPServer(server_address, proxyhandler.ProxyHandler, numThreads)
    log.info('Proxy: %s', proxy_httpd.report_config())
    proxy_httpd.serve_forever()
    def handle_one_request(self):
        """ Create logfp to log message. Wrap rfile to wiretap it. """

        messagelog.mlog.lastRequest = datetime.datetime.now()

        self.minfo = messagelog.MessageInfo()
        max_messagelog = cfg.getint('messagelog.max_messagelog', 2048)
        self.logfp = cachefile.CacheFile(max_messagelog*1024)

        try:
            # wiretap self.rfile using RecordFile; backup rfile in rfile0
            self.reqLogFp = multiblockfile.MbWriter(self.logfp)
            self.rfile0 = self.rfile
            self.rfile  = fileutil.RecordFile(self.rfile, self.reqLogFp)
            try:
                BaseHTTPServer.BaseHTTPRequestHandler.handle_one_request(self)
            finally:
                # disconnect rfile from RecordFile if it has not already been done.
                self.rfile = self.rfile0
        except:
            log.exception("Problem in handling request")

        if hasattr(self, 'command'):                    # note: command only assigned in BaseHTTPRequestHandler.handle_one_request()
            if self.command != 'CONNECT':
                messagelog.mlog.dispose(self.minfo, self.logfp, self.starttime)
Ejemplo n.º 3
0
def adminMain():
    port = cfg.getint('http.admin_port')
    server_address = ('', port)
    global admin_httpd
    admin_httpd = httpserver.HTTPServer(server_address, app_httpserver.AppHTTPRequestHandler)
    log.info("Start admin on '%s' port %s" % server_address)
    app_httpserver.log.info('app_httpserver setup: docBase=%s',
        app_httpserver.AppHTTPRequestHandler.docBase)

    admin_httpd.serve_forever()
Ejemplo n.º 4
0
def main(argv):
    option = argv[1]

    if 'lib' not in sys.path:
        sys.path.append('lib')

    if option == '--start':
        from minds import proxy
        proxy.main()

    elif option == '--inproc_stop':
        from minds.config import cfg
        port = cfg.getint('http','admin_port',0)
        url = 'http://localhost:%d/config?action=shutdown' % port
        print url
        fp = urllib.urlopen(url)
        print fp.read()
        fp.close()

    elif option == '--stop':
        from minds.config import cfg
        port = cfg.getint('http','admin_port',0)
        url = 'http://localhost:%d/config?action=shutdown' % port
        print url
        fp = urllib.urlopen(url)
        print fp.read()
        fp.close()

    elif option == '--san_test':
        from minds import san_test
        del sys.argv[1:2]
        san_test.main(argv)

    elif option == '--test':
        testmain(argv)

    elif option == '--run':
        run(argv)

    elif option == '--help':
        print __doc__
        print sys.getdefaultencoding()
Ejemplo n.º 5
0
def indexMain():
    interval = cfg.getint('indexing.interval',3)
    interval = min(interval, MAX_INDEX_INTERVAL)
    log.info('Scheduled index thread to run every %s minutes' % interval)
    while not _shutdownEvent.wait(interval * 60):
        if _shutdownEvent.isSet():
            break
        try:
            qmsg_processor.backgroundIndexTask()
            # reset interval after a successful process
            interval = cfg.getint('indexing.interval',3)
            interval = min(interval, MAX_INDEX_INTERVAL)
        except:
            # log error, do not let the indexMain thread die
            import traceback
            traceback.print_exc()
            # expotential backoff
            interval *= 2
            interval = min(interval, MAX_INDEX_INTERVAL)
            log.info('Restart index thread in %s minutes' % interval)
 def load(self):
     # note: this is designed to work even if all upgrade.* fields are not specified
     self.current_version = cfg.get   ('_system.version')
     self.feed_url        = cfg.get   ('upgrade_notification.feed_url','')
     _fetch_date_str      = cfg.get   ('upgrade_notification.fetch_date','')
     self.fetch_frequency = cfg.getint('upgrade_notification.fetch_frequency',10)
     self.last_entry_date = cfg.get   ('upgrade_notification.last_entry_date','')
     try:
         fdate = dateutil.parse_iso8601_date(_fetch_date_str).date()
     except ValueError:
         self.fetch_date = today_func()
         self.next_fetch = today_func()
     else:
         self._set_fetch_date(fdate)
    def run(self, logdir, qlogs):
        """ Transform documents in qlogs. Returns number of transformed, discarded """

        qlogs = filter(None, qlogs)         # defensively remove '' entries. Otherwise path would point to logdir for '' entry.
        if not qlogs: return 0, 0

        log.info('Transforming %s documents starting from %s' % (len(qlogs), qlogs[0]))

        # initialize configuration parameters
        global g_maxuri, g_archive_interval
        g_maxuri = cfg.getint('messagelog','maxuri',1024)
        g_archive_interval = cfg.getint('indexing','archive_interval',1)

        for filename in qlogs:              # main loop

            inpath = os.path.join(logdir,filename)
            outpath = os.path.splitext(inpath)[0] + '.qtxt'

            transformed = False             # transform
            try:
                transformed = self.transformDoc(inpath, outpath)
            except messagelog.ParseMessageLogError, e:
                log.warn('Error %s: %s', str(e), filename)
            except:
def backgroundIndexTask(forceIndex=False):
    """ This is the main task of qmsg_processor. The tasks has two phrases.

    I. Transform phrase

        Parse *.qlog
        Filtered out unwanted docs
        Transform into *.qtxt
        Add into archive

        Suspense this process when user access proxy.


    II. Index phrase

        Add *.qtxt into index
        Optimize

        During optimize, block out searching.
        (12/03/04 note: Due to GIL and PyLucene implementation, it
        will actually block out every thing, including proxy.)

        Returns transformed, index, discarded
    """

    interval= cfg.getint('indexing','interval',3)
    logdir  = cfg.getPath('logs')
    now = datetime.datetime.now()

    transformed = 0
    discarded_t = 0
    indexed = 0
    discarded_i = 0

    qlogs = _getQueuedLogs(logdir)
    if forceIndex or _shouldTransform(now, interval):
        transformed, discarded_t = TransformProcess().run(logdir, qlogs)

    qtxts = _getQueuedText(logdir)
    if forceIndex or \
        (_shouldTransform(now, interval) and _shouldIndex(now, logdir, qtxts)): # first check is if there is new activity
        indexed, discarded_i = IndexProcess().run(logdir, qtxts)

    return transformed, indexed, discarded_t + discarded_i
def _shouldIndex(now, logdir, queued):
    """ Checks queue status. Returns a flag indicates whether background
        indexing should be invoked.

        @param now - current time (e.g. from datetime.datetime.now())
        @param queued - list of queued files (e.g. from _getQueuedLogs() )

        @return - detail return code
         0: do not index
         1: index (numDoc has met)
         2: index (max_interval has reached)
        -1: index (fail to evaluate time elapsed since lastIssued and numDoc has met)
        -2: index (fail to evaluete time elapsed since first queued)
    """

    numQueued = len(queued)
    if numQueued < 1:
        return 0

    # todo: add some logging for the decision process?

    # Read config. Note interval and max_interval are compared against
    # different base. See rule 1 & 2 for detail.
    numDoc       = cfg.getint('indexing','numDoc',50)
    interval     = datetime.timedelta( seconds=cfg.getint('indexing','interval',3      )*60 )
    max_interval = datetime.timedelta( seconds=cfg.getint('indexing','max_interval',360)*60 )


    # Rule 1. time elapsed since 'lastIssued' > 'interval' and has 'numDoc'
    lastIssued = messagelog.mlog.lastIssued
    if lastIssued == None: lastIssued = now - interval

    # Detail of rule 1's 'now' v.s. 'lastIssued' timing chart
    #
    #
    #   -interval     lastIssued     +interval
    #       |             |              |
    # ------+-------------+--------------+---------
    #    ^        ^               ^           ^
    #    |        |               |           |
    #    |        |               |       1. Quiet enough. Check numDoc
    #    |        |               |
    #    |        |         2. Has recent activity. Wait till 1.
    #    |        |
    #    |  3. Assume a very recent activity just happened after 'now' is set. Wait till 1.
    #    |
    # 4. lastIssued is too far into the future.
    #    This cannot be explained by 3.
    #    Assume the clock has been reset to earily time.
    #    Check numDoc now to avoid stagnant.
    #
    #
    # Note: 3 and 4 are unusual scenarios (with lastIssued in the future w.r.t now)


    if now >= lastIssued + interval and numQueued >= numDoc:    # case 1
        return 1
    if now + interval < lastIssued and numQueued >= numDoc:     # case 4
        return -1


    # Rule 2. time elapsed since first queued > 'max_interval'
    firstfile = min(queued)
    mtime = os.path.getmtime( os.path.join(logdir, firstfile))
    d0 = datetime.datetime.fromtimestamp(mtime)
    elapsed = now - d0
    if elapsed.days < 0:
        return -2           # if elapsed < 0, the system clock must has been reset
    if elapsed >= max_interval:
        return 2

    return 0
Ejemplo n.º 10
0
def getMindRetrieveBaseURL():
    port = cfg.getint('http.admin_port')
    return 'http://localhost:%s' % port
Ejemplo n.º 11
0
 def render(self, node):
     node.mindretrieveURL.atts['href'] = '%s/' % response.getMindRetrieveBaseURL()
     node.importURL.atts['href'] = '%s/weblib/import' % response.getMindRetrieveBaseURL()
     node.bookmarkletURL.atts['href'] = response.buildBookmarklet()
     node.proxyPort.content = str(cfg.getint('http.proxy_port'))
     node.proxyInstructionURL.atts['href'] = '%s/help/ProxyInstruction' % response.getMindRetrieveBaseURL()
    def _transfer_data(self, soc):
        """ Transfers data across:

            browser  <-- self.connection -->  proxy  <-- soc -->  destination

            Returns rspBuf, header_size, bytes_received (content)
        """

        # these are data to keep track of the response stream
        rspBuf = cStringIO.StringIO()   # buffer until the end of header (\r\n\r\n) is found
        rspBuf_tail = ''                # header end may span two buffers, keep last 3 chars
        rspSize = 0                     # total count of response data read
        bodyPos = 0                     # position of response message body, 0 means not yet read

        proxy_pump = self._proxy_pump(self.connection, soc)

        # read until message body is found
        for cData, sData in proxy_pump:
            if cData:
                self.reqLogFp.write(cData)
            elif sData:
                bufPos = rspSize
                rspBuf.write(sData)
                rspSize += len(sData)
                bodyPos = self._findHeaderEnd(rspBuf_tail, sData, bufPos)
                if bodyPos:
                    break
                rspBuf_tail = sData[-3:]
                # todo: make the respone parsing line driven rather than buffer read driven?
                # No need for tricky _findHeaderEnd()
        else:
            # socket closed but header end still not found?
            bodyPos = rspSize

        # complete the request block
        self.reqLogFp.complete()

        # As a request/response protocol there should not be any more request data once response is sent.
        # But disconnect rfile from reqLogFp in anycase.
        self.rfile = self.rfile0

        # write response header in a new block
        fp = multiblockfile.MbWriter(self.logfp)
        rspBuf.seek(0)
        copyfileobj(rspBuf, fp, bodyPos)
        fp.complete()

        # write reposne body in a new block
        maxresponse = cfg.getint('messagelog.maxresponse', 1024)    # max maxresponse size in kb
        rspLogFp = multiblockfile.MbWriter(self.logfp)
        rspLogFp = fileutil.BoundedFile(rspLogFp, maxresponse*1024)
        rspBuf.seek(bodyPos)
        copyfileobj(rspBuf, rspLogFp, rspSize-bodyPos)

        # read the remaining message body (could be nothing)
        for cData, sData in proxy_pump:
            if cData:
                pass
            elif sData:
                rspSize += len(sData)
                rspLogFp.write(sData)

        rspLogFp.complete()

        rspBuf.seek(0)
        return rspBuf, bodyPos, rspSize - bodyPos