def get_taxbot_knowledge(): global taxbot_knowledge if taxbot_knowledge is not None: return taxbot_knowledge sink = TaxonomySink() try: run(file("/home/sioclog/taxonomy.log"), sink) except: print_exc() taxbot_knowledge = sink.taxonomy return taxbot_knowledge
def runcgi(datarooturi, logfiles): HTTP_HOST = os.environ.get('HTTP_HOST', "") SERVER_PORT = os.environ.get('SERVER_PORT', "") REQUEST_URI = os.environ.get('REQUEST_URI', "") HTTP_ACCEPT = os.environ.get('HTTP_ACCEPT', "") PATH_INFO = os.environ.get('PATH_INFO', "") query = cgi.FieldStorage() up_to = query.getfirst("up_to", None) # channel = query.getfirst("channel", "") # timeprefix = query.getfirst("time", "") if PATH_INFO == "/styles.css": print "Content-type: text/css" print css_stylesheet() return elif PATH_INFO == "/sitemap.xml": print "Content-type: text/xml" sink = ChannelsAndDaysSink() run(logfiles, sink) sitemap_index(sink, datarooturi) return if REQUEST_URI.endswith(".html"): extension = ".html" format = "html" elif REQUEST_URI.endswith(".turtle"): extension = ".turtle" format = "turtle" elif REQUEST_URI.endswith(".ttl"): extension = ".ttl" format = "turtle" elif REQUEST_URI.endswith(".txt"): extension = ".txt" format = "raw" else: # XXX do real content negotiation, e.g. mimeparse.py extension = "" if "turtle" in HTTP_ACCEPT: format = "turtle" elif "html" in HTTP_ACCEPT: format = "html" elif "text" in HTTP_ACCEPT: format = "raw" else: if "Googlebot" in os.environ.get('HTTP_USER_AGENT', ""): format = "html" # Accept: */* isn't the full truth... else: format = "turtle" # default parts = PATH_INFO.split('/') # remove extension if any: if parts[-1].endswith(extension): parts[-1] = parts[-1][:-len(extension) or None] if len(parts) > 1 and parts[1] not in ["channels", "users", "backlog"]: parts.insert(1, "channels") # XXX default type for now if len(parts) > 1: restype = parts[1] else: restype = "" if len(parts) > 2: channel = parts[2] else: channel = "" if len(parts) > 3: timeprefix = parts[3] else: timeprefix = "" # XXX the following assumes http over port 80, no QUERY_STRING requesturi = "http://"+HTTP_HOST+REQUEST_URI datauri = requesturi.split("?")[0] # exclude QUERY_STRING # remove extension if any, to reset content negotiation in datauri: if datauri.endswith(extension): datauri = datauri[:-len(extension) or None] crumbs = list(create_index_crumbs(datarooturi, datauri, restype, channel, timeprefix)) if format == "html": print "Content-type: text/html; charset=utf-8" print elif format == "turtle": print "Content-type: application/x-turtle; charset=utf-8" print elif format == "raw": print "Content-type: text/plain; charset=utf-8" print if restype == "users" and channel: sink = ChannelsAndDaysSink() run(logfiles, sink) latestsink = EventSink(datarooturi, None, None, datauri) latestpipeline = OffFilter(UserFilter(channel, ChannelMessageTailFilter(1, AddLinksFilter(latestsink)))) run(logfiles, latestpipeline) render_user(sink, format, crumbs, datarooturi, channel, datauri, latestsink) elif restype == "users": sink = ChannelsAndDaysSink() run(logfiles, sink) render_user_index(sink, format, crumbs, datarooturi, datauri) elif channel and timeprefix: # show log if format == "html": if restype == "backlog": # FIXME temporary hack to get the params right: nick = channel channel = timeprefix timeprefix = nick sink = AddLinksFilter(BackLogHtmlSink(nick, up_to, crumbs, datarooturi, channel, timeprefix, datauri)) timeprefix = "" else: sink = AddLinksFilter(HtmlSink(crumbs, datarooturi, channel, timeprefix, datauri)) elif format == "turtle": sink = AddLinksFilter(TurtleSink(datarooturi, channel, timeprefix)) elif format == "raw": sink = RawSink() pipeline = OffFilter(ChannelFilter('#'+channel, TimeFilter(timeprefix, sink ) )) run(logfiles, pipeline) else: # show index sink = ChannelsAndDaysSink() if channel: pipeline = ChannelFilter('#'+channel, sink) elif timeprefix: pipeline = TimeFilter(timeprefix, sink) else: pipeline = sink run(logfiles, pipeline) if format == "html": html_index(sink, crumbs, datarooturi, datauri, channel) elif format == "turtle": turtle_index(sink, datarooturi, datauri, channel)