def proxyMain(): port = cfg.getint('http.proxy_port') numThreads = cfg.getint('http.proxy_threads', 1) server_address = ('', port) global proxy_httpd proxy_httpd = httpserver.PooledHTTPServer(server_address, proxyhandler.ProxyHandler, numThreads) log.info('Proxy: %s', proxy_httpd.report_config()) proxy_httpd.serve_forever()
def handle_one_request(self): """ Create logfp to log message. Wrap rfile to wiretap it. """ messagelog.mlog.lastRequest = datetime.datetime.now() self.minfo = messagelog.MessageInfo() max_messagelog = cfg.getint('messagelog.max_messagelog', 2048) self.logfp = cachefile.CacheFile(max_messagelog*1024) try: # wiretap self.rfile using RecordFile; backup rfile in rfile0 self.reqLogFp = multiblockfile.MbWriter(self.logfp) self.rfile0 = self.rfile self.rfile = fileutil.RecordFile(self.rfile, self.reqLogFp) try: BaseHTTPServer.BaseHTTPRequestHandler.handle_one_request(self) finally: # disconnect rfile from RecordFile if it has not already been done. self.rfile = self.rfile0 except: log.exception("Problem in handling request") if hasattr(self, 'command'): # note: command only assigned in BaseHTTPRequestHandler.handle_one_request() if self.command != 'CONNECT': messagelog.mlog.dispose(self.minfo, self.logfp, self.starttime)
def adminMain(): port = cfg.getint('http.admin_port') server_address = ('', port) global admin_httpd admin_httpd = httpserver.HTTPServer(server_address, app_httpserver.AppHTTPRequestHandler) log.info("Start admin on '%s' port %s" % server_address) app_httpserver.log.info('app_httpserver setup: docBase=%s', app_httpserver.AppHTTPRequestHandler.docBase) admin_httpd.serve_forever()
def main(argv): option = argv[1] if 'lib' not in sys.path: sys.path.append('lib') if option == '--start': from minds import proxy proxy.main() elif option == '--inproc_stop': from minds.config import cfg port = cfg.getint('http','admin_port',0) url = 'http://localhost:%d/config?action=shutdown' % port print url fp = urllib.urlopen(url) print fp.read() fp.close() elif option == '--stop': from minds.config import cfg port = cfg.getint('http','admin_port',0) url = 'http://localhost:%d/config?action=shutdown' % port print url fp = urllib.urlopen(url) print fp.read() fp.close() elif option == '--san_test': from minds import san_test del sys.argv[1:2] san_test.main(argv) elif option == '--test': testmain(argv) elif option == '--run': run(argv) elif option == '--help': print __doc__ print sys.getdefaultencoding()
def indexMain(): interval = cfg.getint('indexing.interval',3) interval = min(interval, MAX_INDEX_INTERVAL) log.info('Scheduled index thread to run every %s minutes' % interval) while not _shutdownEvent.wait(interval * 60): if _shutdownEvent.isSet(): break try: qmsg_processor.backgroundIndexTask() # reset interval after a successful process interval = cfg.getint('indexing.interval',3) interval = min(interval, MAX_INDEX_INTERVAL) except: # log error, do not let the indexMain thread die import traceback traceback.print_exc() # expotential backoff interval *= 2 interval = min(interval, MAX_INDEX_INTERVAL) log.info('Restart index thread in %s minutes' % interval)
def load(self): # note: this is designed to work even if all upgrade.* fields are not specified self.current_version = cfg.get ('_system.version') self.feed_url = cfg.get ('upgrade_notification.feed_url','') _fetch_date_str = cfg.get ('upgrade_notification.fetch_date','') self.fetch_frequency = cfg.getint('upgrade_notification.fetch_frequency',10) self.last_entry_date = cfg.get ('upgrade_notification.last_entry_date','') try: fdate = dateutil.parse_iso8601_date(_fetch_date_str).date() except ValueError: self.fetch_date = today_func() self.next_fetch = today_func() else: self._set_fetch_date(fdate)
def run(self, logdir, qlogs): """ Transform documents in qlogs. Returns number of transformed, discarded """ qlogs = filter(None, qlogs) # defensively remove '' entries. Otherwise path would point to logdir for '' entry. if not qlogs: return 0, 0 log.info('Transforming %s documents starting from %s' % (len(qlogs), qlogs[0])) # initialize configuration parameters global g_maxuri, g_archive_interval g_maxuri = cfg.getint('messagelog','maxuri',1024) g_archive_interval = cfg.getint('indexing','archive_interval',1) for filename in qlogs: # main loop inpath = os.path.join(logdir,filename) outpath = os.path.splitext(inpath)[0] + '.qtxt' transformed = False # transform try: transformed = self.transformDoc(inpath, outpath) except messagelog.ParseMessageLogError, e: log.warn('Error %s: %s', str(e), filename) except:
def backgroundIndexTask(forceIndex=False): """ This is the main task of qmsg_processor. The tasks has two phrases. I. Transform phrase Parse *.qlog Filtered out unwanted docs Transform into *.qtxt Add into archive Suspense this process when user access proxy. II. Index phrase Add *.qtxt into index Optimize During optimize, block out searching. (12/03/04 note: Due to GIL and PyLucene implementation, it will actually block out every thing, including proxy.) Returns transformed, index, discarded """ interval= cfg.getint('indexing','interval',3) logdir = cfg.getPath('logs') now = datetime.datetime.now() transformed = 0 discarded_t = 0 indexed = 0 discarded_i = 0 qlogs = _getQueuedLogs(logdir) if forceIndex or _shouldTransform(now, interval): transformed, discarded_t = TransformProcess().run(logdir, qlogs) qtxts = _getQueuedText(logdir) if forceIndex or \ (_shouldTransform(now, interval) and _shouldIndex(now, logdir, qtxts)): # first check is if there is new activity indexed, discarded_i = IndexProcess().run(logdir, qtxts) return transformed, indexed, discarded_t + discarded_i
def _shouldIndex(now, logdir, queued): """ Checks queue status. Returns a flag indicates whether background indexing should be invoked. @param now - current time (e.g. from datetime.datetime.now()) @param queued - list of queued files (e.g. from _getQueuedLogs() ) @return - detail return code 0: do not index 1: index (numDoc has met) 2: index (max_interval has reached) -1: index (fail to evaluate time elapsed since lastIssued and numDoc has met) -2: index (fail to evaluete time elapsed since first queued) """ numQueued = len(queued) if numQueued < 1: return 0 # todo: add some logging for the decision process? # Read config. Note interval and max_interval are compared against # different base. See rule 1 & 2 for detail. numDoc = cfg.getint('indexing','numDoc',50) interval = datetime.timedelta( seconds=cfg.getint('indexing','interval',3 )*60 ) max_interval = datetime.timedelta( seconds=cfg.getint('indexing','max_interval',360)*60 ) # Rule 1. time elapsed since 'lastIssued' > 'interval' and has 'numDoc' lastIssued = messagelog.mlog.lastIssued if lastIssued == None: lastIssued = now - interval # Detail of rule 1's 'now' v.s. 'lastIssued' timing chart # # # -interval lastIssued +interval # | | | # ------+-------------+--------------+--------- # ^ ^ ^ ^ # | | | | # | | | 1. Quiet enough. Check numDoc # | | | # | | 2. Has recent activity. Wait till 1. # | | # | 3. Assume a very recent activity just happened after 'now' is set. Wait till 1. # | # 4. lastIssued is too far into the future. # This cannot be explained by 3. # Assume the clock has been reset to earily time. # Check numDoc now to avoid stagnant. # # # Note: 3 and 4 are unusual scenarios (with lastIssued in the future w.r.t now) if now >= lastIssued + interval and numQueued >= numDoc: # case 1 return 1 if now + interval < lastIssued and numQueued >= numDoc: # case 4 return -1 # Rule 2. time elapsed since first queued > 'max_interval' firstfile = min(queued) mtime = os.path.getmtime( os.path.join(logdir, firstfile)) d0 = datetime.datetime.fromtimestamp(mtime) elapsed = now - d0 if elapsed.days < 0: return -2 # if elapsed < 0, the system clock must has been reset if elapsed >= max_interval: return 2 return 0
def getMindRetrieveBaseURL(): port = cfg.getint('http.admin_port') return 'http://localhost:%s' % port
def render(self, node): node.mindretrieveURL.atts['href'] = '%s/' % response.getMindRetrieveBaseURL() node.importURL.atts['href'] = '%s/weblib/import' % response.getMindRetrieveBaseURL() node.bookmarkletURL.atts['href'] = response.buildBookmarklet() node.proxyPort.content = str(cfg.getint('http.proxy_port')) node.proxyInstructionURL.atts['href'] = '%s/help/ProxyInstruction' % response.getMindRetrieveBaseURL()
def _transfer_data(self, soc): """ Transfers data across: browser <-- self.connection --> proxy <-- soc --> destination Returns rspBuf, header_size, bytes_received (content) """ # these are data to keep track of the response stream rspBuf = cStringIO.StringIO() # buffer until the end of header (\r\n\r\n) is found rspBuf_tail = '' # header end may span two buffers, keep last 3 chars rspSize = 0 # total count of response data read bodyPos = 0 # position of response message body, 0 means not yet read proxy_pump = self._proxy_pump(self.connection, soc) # read until message body is found for cData, sData in proxy_pump: if cData: self.reqLogFp.write(cData) elif sData: bufPos = rspSize rspBuf.write(sData) rspSize += len(sData) bodyPos = self._findHeaderEnd(rspBuf_tail, sData, bufPos) if bodyPos: break rspBuf_tail = sData[-3:] # todo: make the respone parsing line driven rather than buffer read driven? # No need for tricky _findHeaderEnd() else: # socket closed but header end still not found? bodyPos = rspSize # complete the request block self.reqLogFp.complete() # As a request/response protocol there should not be any more request data once response is sent. # But disconnect rfile from reqLogFp in anycase. self.rfile = self.rfile0 # write response header in a new block fp = multiblockfile.MbWriter(self.logfp) rspBuf.seek(0) copyfileobj(rspBuf, fp, bodyPos) fp.complete() # write reposne body in a new block maxresponse = cfg.getint('messagelog.maxresponse', 1024) # max maxresponse size in kb rspLogFp = multiblockfile.MbWriter(self.logfp) rspLogFp = fileutil.BoundedFile(rspLogFp, maxresponse*1024) rspBuf.seek(bodyPos) copyfileobj(rspBuf, rspLogFp, rspSize-bodyPos) # read the remaining message body (could be nothing) for cData, sData in proxy_pump: if cData: pass elif sData: rspSize += len(sData) rspLogFp.write(sData) rspLogFp.complete() rspBuf.seek(0) return rspBuf, bodyPos, rspSize - bodyPos