for stat in stats: yield { '@source': 'stuff://', '@type': 'combined', '@tags': [], '@fields': stat['fields'], '@timestamp': stat['date'], '@message': stat['raw'] } if __name__ == "__main__": import sys from poteau import Kibana from logging import DEBUG, basicConfig basicConfig(filename='poteau.log', level=DEBUG) from pyelasticsearch import ElasticSearch es = ElasticSearch(sys.argv[1], timeout=240, max_retries=10) k = Kibana(es) k.mapping['@fields']['properties']['path'] = { 'type': 'string', 'analyzer': 'path' } k.mapping['@fields']['properties']['query'] = {'type': 'string'} k.mapping['@fields']['properties']['ip'] = {'type': 'ip'} k.mapping['@fields']['properties']['geo'] = {'type': 'geo_point'} for day, size in k.index_documents( 'page', documents_from_phpstat(phpstat(sys.stdin))): print("[%s] #%i" % (day, size))
def documents_from_phpstat(stats): for stat in stats: yield { '@source': 'stuff://', '@type': 'combined', '@tags': [], '@fields': stat['fields'], '@timestamp': stat['date'], '@message': stat['raw'] } if __name__ == "__main__": import sys from poteau import Kibana from logging import DEBUG, basicConfig basicConfig(filename='poteau.log', level=DEBUG) from pyelasticsearch import ElasticSearch es = ElasticSearch(sys.argv[1], timeout=240, max_retries=10) k = Kibana(es) k.mapping['@fields']['properties']['path'] = { 'type': 'string', 'analyzer': 'path'} k.mapping['@fields']['properties']['query'] = {'type': 'string'} k.mapping['@fields']['properties']['ip'] = {'type': 'ip'} k.mapping['@fields']['properties']['geo'] = {'type': 'geo_point'} for day, size in k.index_documents('page', documents_from_phpstat(phpstat(sys.stdin))): print("[%s] #%i" % (day, size))
def documents_from_session(sessions, domain=""): for session in sessions: yield session.to_es(source=domain) if __name__ == "__main__": import sys from poteau import Kibana from pyelasticsearch import ElasticSearch idx = len(sys.argv) > 2 if idx: es = ElasticSearch(sys.argv[2], timeout=240, max_retries=10) k = Kibana(es) with open(sys.argv[1], 'r') as f: if idx: for day, size in k.index_documents('page', documents_from_combined(combined(f, user_agent=True, geo=True, date=parse_date))): print("[%s] #%i" % (day, size)) else: cpt = 0 for doc in documents_from_combined(combined(f, user_agent=True, geo=True, date=parse_date)): print doc['@fields']['extension'] cpt += 1 if cpt == 100: print len(UA_CACHE) break
from pyelasticsearch import ElasticSearch from myslow import MySlow import time def documents(lines): for ts, header, command in lines: yield { '@type': 'myslow', '@timestamp': time.strftime("%Y-%m-%dT%H:%M:%S", ts.timetuple()), '@message': command, '@fields': header } if __name__ == '__main__': import sys from poteau import Kibana # Instantiate it with an url es = ElasticSearch(sys.argv[1], timeout=240, max_retries=10) k = Kibana(es) for day, size in k.index_documents('myslow', documents(MySlow(sys.stdin))): print("[%s] #%i" % (day, size))
from mbox import Mbox def documents_from_mails(mails): """Build document from mail""" for ts, mail in mails: if ts[3] < 0: # This bug is audacious ts = list(ts) ts[3] = 12 ts = tuple(ts) yield { '@source': 'stuff://', '@type': 'mailadmin', '@tags': [mail.headers['From']], '@fields': mail.headers, '@timestamp': time.strftime("%Y-%m-%dT%H:%M:%S", ts), '@message': mail.body, 'id': mail.headers['Message-Id'] } if __name__ == '__main__': import sys from poteau import Kibana # Instantiate it with an url es = ElasticSearch(sys.argv[1], timeout=240, max_retries=10) k = Kibana(es) # Kibana need this kind of name emails = Mbox(sys.argv[2]) for day, size in k.index_documents('email', documents_from_mails(emails)): print("[%s] #%i" % (day, size))