def __init__(self, trend={'name': 'my_trend', 'columns': 'hashtag'}): self.trend = trend # trend = { 'name': 'my_trend', 'columns': ['hashtag']} client = StreamDrillClient("http://localhost:9669") client.delete(self.trend['name']) client.create(self.trend['name'], self.trend['columns'], 1000, ("hour", "minute", "second")) self.stream = client.stream()
print("""Usage: %s <log-file> for example: %s http-blog.mikiobraun.de.log""".replace("%s", sys.argv[0])) sys.exit(0) site = "http://blog.mikiobraun.de/" pageViews = "page-views" referers = "referers" visitors = "visitors" # 65.55.215.69 - - [01/Aug/2013:00:02:07 +0200] "GET /robots.txt HTTP/1.1" 410 1129 "-" "msnbot-media/1.1 (+http://search.msn.com/msnbot.htm)" # group 1 2 3 4 5 6 7 8 9 logline = re.compile(r"([0-9]+\.[0-9]+\.[0-9]+\.[0-9]+) (\S+) (\S+) \[([^\]]+)\] \"GET ([^\"]+) HTTP/1\.1\" ([0-9]+) ([0-9]+) \"([^\"]+)\" \"([^\"]+)\"") client = StreamDrillClient("http://localhost:9669") client.delete(pageViews) client.delete(referers) client.delete(visitors) client.create(pageViews, "path", 1000, ("hour", "minute", "second")) client.create(referers, "path:referer", 1000, ("hour", "minute", "second")) client.create(visitors, "path:addr", 1000, ("hour", "minute", "second")) stream = client.stream() for line in open(sys.argv[1]): result = logline.match(line) if result: #print(result.groups()) addr = result.group(1) path = result.group(5)
# # main # if len(sys.argv) != 3: print """Usage: %s username password """ % sys.argv[0] exit(0) # set up trends. These are the default API access codes for the demo instance. key = "f9aaf865-b89a-444d-9070-38ec6666e539" secret = "9e13e4ac-ad93-4c8f-a896-d5a937b84c8a" c = StreamDrillClient("http://localhost:9669", key, secret) # Create a trend. # # The reason we're storing host and path seperately is because this allows # us to filter for the host. c.create("twitter-links", "host:path", 10000, ("day", "hour", "minute")) c.clear("twitter-links") # This will create a link-out button linking to the real-time search page # of Twitter for that link. Works like a charm (well, mostly) c.setMeta("twitter-links", "linkTemplate", "http://twitter.com/search/realtime?q=http://$1$2") def parsedate(ds): return datetime.datetime(*email.utils.parsedate(ds)[:6])
for example: %s http-blog.mikiobraun.de.log""".replace("%s", sys.argv[0])) sys.exit(0) site = "http://blog.mikiobraun.de/" pageViews = "page-views" referers = "referers" visitors = "visitors" # 65.55.215.69 - - [01/Aug/2013:00:02:07 +0200] "GET /robots.txt HTTP/1.1" 410 1129 "-" "msnbot-media/1.1 (+http://search.msn.com/msnbot.htm)" # group 1 2 3 4 5 6 7 8 9 logline = re.compile( r"([0-9]+\.[0-9]+\.[0-9]+\.[0-9]+) (\S+) (\S+) \[([^\]]+)\] \"GET ([^\"]+) HTTP/1\.1\" ([0-9]+) ([0-9]+) \"([^\"]+)\" \"([^\"]+)\"" ) client = StreamDrillClient("http://localhost:9669") client.delete(pageViews) client.delete(referers) client.delete(visitors) client.create(pageViews, "path", 1000, ("hour", "minute", "second")) client.create(referers, "path:referer", 1000, ("hour", "minute", "second")) client.create(visitors, "path:addr", 1000, ("hour", "minute", "second")) stream = client.stream() for line in open(sys.argv[1]): result = logline.match(line) if result: #print(result.groups()) addr = result.group(1) path = result.group(5)
#!/usr/bin/env python # encoding=utf-8 # very simple example which constructs a table with three elements and randomly generates # three numbers in the range 0..10, 0..5, 0..20 with means at import sys from streamdrill import StreamDrillClient if len(sys.argv) != 3: print("""Usage: %s <trend-name> <column-names-sperated-by-colons> for example: %s my-trend name:number:room""".replace("%s", sys.argv[0])) sys.exit(0) trend = sys.argv[1] columns = sys.argv[2] client = StreamDrillClient("http://localhost:9669") client.delete(trend) client.create(trend, columns, 1000, ("hour", "minute", "second")) stream = client.stream() for line in sys.stdin: vals = line.split(",")[0:len(columns)] stream.update(trend, vals) stream.close()