def main(): try: klogger.logger = klogger.get_logger("ketchlip", "webserver.log") klogger.info("Starting webserver on port " + str(PORT)) MyHandler.set_www_root(config.config.www_root) index_file, url_lookup_file = config.config.base_dir + "index", config.config.base_dir + "url_lookup" SearchSingleton().load(index_file, url_lookup_file) file_observer = FileObserver(index_file) file_observer.register_listener(SearchSingleton()) file_observer.start_observe() server = HTTPServer(('', PORT), MyHandler) klogger.info("HTTP server ready to serve on port " + str(PORT)) server.serve_forever() except KeyboardInterrupt: klogger.info('^C received, shutting down server') if file_observer: file_observer.unregister_listener(SearchSingleton()) file_observer.stop_observe() if server: server.socket.close() except Exception, e: klogger.exception(e)
def test_logging(): c = LogCapture() parser = OptionParser() c.addOptions(parser, {}) logger = klogger.get_logger("foo") logger = klogger.get_module_logger(__name__) c.start() logger.info("Goodbye") c.end() records = c.formatLogRecords() eq_("Goodbye", c.handler.buffer[0].msg) eq_("test.helpers.klogger_test", c.handler.buffer[0].name) eq_("test.helpers.klogger_test: INFO: Goodbye", records[0])
def main(): """ tweet_indexer consumes the output (tweetfile) created by tweet_scanner and creates: * indexfile: searchable dictionary - {word: [position: url_id] * graphfile: each url and their outbound links {url: [list of urls]} * url_lookupfile: dictionary containing url ids - {url_id: url} """ try: klogger.logger = klogger.get_logger("ketchlip", "indexer.log") input_queue = Queue() output_queue = Queue() base_dir = config.config.base_dir tweetfile = base_dir + "tweets.txt" # timestamp \t url indexfile = base_dir + "index" graphfile = base_dir + "graph" url_lookupfile = base_dir + "url_lookup" lookup_urlfile = base_dir + "lookup_url" since_file = base_dir + "since" index_persister = Persister(indexfile) graph_persister = Persister(graphfile) url_lookup_persister = Persister(url_lookupfile) lookup_url_persister = Persister(lookup_urlfile) since_persister = Persister(since_file) index = index_persister.load({}) graph = graph_persister.load({}) lookup_url = lookup_url_persister.load({}) since = since_persister.load() indexer = Indexer() indexer.index = index indexer.graph = graph indexer.lookup_url = lookup_url klogger.info("Indexing " + tweetfile) if since: klogger.info("Since " + str(since)) url_list = open(tweetfile, "r") include_count = 0 exclude_count = 0 for timestamp_url in url_list: timestamp, url = timestamp_url.split("\t") url = url.strip() if not url in lookup_url and (not since or since <= timestamp): input_queue.put_nowait(url) since = timestamp include_count += 1 else: exclude_count += 1 klogger.info("Including: " + str(include_count) + " Excluding: " + str(exclude_count)) if include_count <= 0: klogger.info("Nothting to index") return # Spawn off crawler and indexer gevent.joinall([ gevent.spawn(Crawler().gevent_crawl, input_queue, output_queue), gevent.spawn(indexer.gevent_index, input_queue, output_queue) ]) if not indexer.done: return klogger.info("Indexing failed") index = indexer.index graph = indexer.graph url_lookup = indexer.url_lookup lookup_url = indexer.lookup_url index_persister.save(index) graph_persister.save(graph) url_lookup_persister.save(url_lookup) lookup_url_persister.save(lookup_url) since_persister.save(since) klogger.info("Saved index in " + indexfile + " (length " + str(len(index)) + ")") klogger.info("Saved graph in " + graphfile + " (length " + str(len(graph)) + ")") klogger.info("Saved lookup url in " + lookup_urlfile + " (length " + str(len(lookup_url)) + ")") klogger.info("Saved url lookup in " + url_lookupfile + " (length " + str(len(url_lookup)) + ")") klogger.info("Indexing completed") except KeyboardInterrupt: klogger.info('^C received, shutting down indexer') except Exception, e: klogger.exception(e)