コード例 #1
0
 def __init__(self, trend={'name': 'my_trend', 'columns': 'hashtag'}):
     self.trend = trend
     # trend = { 'name': 'my_trend', 'columns': ['hashtag']}
     client = StreamDrillClient("http://localhost:9669")
     client.delete(self.trend['name'])
     client.create(self.trend['name'], self.trend['columns'], 1000, ("hour", "minute", "second"))
     self.stream = client.stream()
コード例 #2
0
referers = "referers"
visitors = "visitors"

# 65.55.215.69 - - [01/Aug/2013:00:02:07 +0200] "GET /robots.txt HTTP/1.1" 410 1129 "-" "msnbot-media/1.1 (+http://search.msn.com/msnbot.htm)"
# group 1      2 3 4                                 5                     6   7     8   9
logline = re.compile(r"([0-9]+\.[0-9]+\.[0-9]+\.[0-9]+) (\S+) (\S+) \[([^\]]+)\] \"GET ([^\"]+) HTTP/1\.1\" ([0-9]+) ([0-9]+) \"([^\"]+)\" \"([^\"]+)\"")

client = StreamDrillClient("http://localhost:9669")
client.delete(pageViews)
client.delete(referers)
client.delete(visitors)
client.create(pageViews, "path", 1000, ("hour", "minute", "second"))
client.create(referers, "path:referer", 1000, ("hour", "minute", "second"))
client.create(visitors, "path:addr", 1000, ("hour", "minute", "second"))

stream = client.stream()

for line in open(sys.argv[1]):
    result = logline.match(line)
    if result:
        #print(result.groups())
        addr = result.group(1)
        path = result.group(5)
        referer = result.group(8)

        if path.endswith(".html"):
            print(addr, path, referer)
            stream.update(pageViews, [path])
            if referer != "-" and not referer.startswith(site):
                stream.update(referers, [path, referer])
            stream.update(visitors, [path, addr])
コード例 #3
0
# 65.55.215.69 - - [01/Aug/2013:00:02:07 +0200] "GET /robots.txt HTTP/1.1" 410 1129 "-" "msnbot-media/1.1 (+http://search.msn.com/msnbot.htm)"
# group 1      2 3 4                                 5                     6   7     8   9
logline = re.compile(
    r"([0-9]+\.[0-9]+\.[0-9]+\.[0-9]+) (\S+) (\S+) \[([^\]]+)\] \"GET ([^\"]+) HTTP/1\.1\" ([0-9]+) ([0-9]+) \"([^\"]+)\" \"([^\"]+)\""
)

client = StreamDrillClient("http://localhost:9669")
client.delete(pageViews)
client.delete(referers)
client.delete(visitors)
client.create(pageViews, "path", 1000, ("hour", "minute", "second"))
client.create(referers, "path:referer", 1000, ("hour", "minute", "second"))
client.create(visitors, "path:addr", 1000, ("hour", "minute", "second"))

stream = client.stream()

for line in open(sys.argv[1]):
    result = logline.match(line)
    if result:
        #print(result.groups())
        addr = result.group(1)
        path = result.group(5)
        referer = result.group(8)

        if path.endswith(".html"):
            print(addr, path, referer)
            stream.update(pageViews, [path])
            if referer != "-" and not referer.startswith(site):
                stream.update(referers, [path, referer])
            stream.update(visitors, [path, addr])