def main(): project = "haproxy" tablename = "http_host" datalogger = DataLoggerWeb("https://datalogger-api.tirol-kliniken.cc/DataLogger") # datestring = datalogger.get_last_business_day_datestring() # two days back for haproxy logs datestring = (datetime.date.today() - datetime.timedelta(int(2))).isoformat() caches = datalogger.get_caches(project, tablename, datestring) vhosts = [eval(key)[0].split(":")[0] for key in caches["ts"]["keys"].keys()] index = 1 out_data = [] out_data.append(("index", "vhost", "domain", "fqdn", "ip", "ip_reverse_hostname", "status_code", "x_backend_server", "duration")) filter_vhost = generate_filter_vhost() for vhost in vhosts: if filter_vhost(vhost) is True: logging.info("vhost %s filtered out", vhost) continue ip = "unknown" hostname = "unknown" duration = -1.0 status_code = 0 x_backend_server = None domain = ".".join(vhost.split(".")[1:]) try: fqdn = socket.getfqdn(vhost) ip = socket.gethostbyname(vhost) hostname = socket.gethostbyaddr(ip)[0] except (socket.herror, socket.gaierror): pass if (ip == "unknown") or (not ip.startswith("10.")): logging.info("could not resolv hostname %s , probably fake", vhost) continue # could be obsolete elif (not ip.startswith("10.")): logging.info("%s is external, skipping", vhost) continue try: starttime = time.time() res = requests.request("GET", "http://%s/" % vhost, timeout=10, stream=False) duration = time.time()-starttime status_code = res.status_code except (requests.exceptions.ConnectionError, requests.exceptions.InvalidURL): logging.info("ConnectionError or InvalidURL occured %s", vhost) except requests.exceptions.ReadTimeout: logging.info("RequestTimeout occured %s", vhost) try: x_backend_server = res.headers['x-backend-server'] if len(x_backend_server) == 8: # TODO not exact, hack ip_backend_server = decode_ip(x_backend_server) x_backend_server = socket.gethostbyaddr(ip_backend_server)[0] # only hostname part else: x_backend_server = socket.getfqdn(x_backend_server) except KeyError: pass logging.debug("%40s : %20s : %40s : %15s : %40s : %d : %s : %02f", vhost, domain, fqdn, ip, hostname, status_code, x_backend_server, duration) out_data.append((index, vhost, domain, fqdn, ip, hostname, status_code, x_backend_server, duration)) index += 1 json.dump({"last_update_ts" : str(datetime.date.today()), "data" : out_data}, open("/var/www/webapps/webmap/webmap.json", "w"))
def main(): """ what do you think, what main should do """ yesterday_datestring = (datetime.date.today() - datetime.timedelta(1)).isoformat() parser = argparse.ArgumentParser(description='generate TimeseriesArrays on local backend') parser.add_argument('--url', default="https://datalogger-api.tirol-kliniken.cc/DataLogger", help="url of DataLogger Webapplication") parser.add_argument('--logdir', default="/data1/haproxy_daily/", help="directory where to find day sorted haproxylogs") parser.add_argument("-b", '--back', help="how many days back from now") parser.add_argument("-s", '--startdate', help="start date in isoformat YYY-MM-DD") parser.add_argument("-e", '--enddate', default=yesterday_datestring, help="stop date in isoformat YYY-MM-DD") parser.add_argument("-q", '--quiet', action='store_true', help="set to loglevel ERROR") parser.add_argument("-v", '--verbose', action='store_true', help="set to loglevel DEBUG") args = parser.parse_args() if args.quiet is True: logging.getLogger("").setLevel(logging.ERROR) if args.verbose is True: logging.getLogger("").setLevel(logging.DEBUG) if (args.back is not None) == (args.startdate is not None): logging.error("option -b and -e are mutual exclusive, use only one") sys.exit(1) startdate = None if args.back is not None: startdate = (datetime.date.today() - datetime.timedelta(int(args.back))).isoformat() elif args.startdate is not None: startdate = args.startdate else: logging.error("you have to provide either -b or -s") sys.exit(1) # lets get started datalogger = DataLoggerWeb(args.url) project = "haproxy" tablename = "http_host" baseurl = "%s/upload_raw_file/" % args.url logdir = args.logdir # where to find haproxy logs keys = ("http_host", ) values = ("bytes_read", "rsp_1xx", "rsp_2xx", "rsp_3xx", "rsp_4xx", "rsp_5xx", "rsp_other", "srv_queue", "backend_queue", "actconn", "feconn", "beconn", "srv_conn", "retries", "tq", "tw", "tc", "tr", "tt", "hits") ts_keyname = "ts" for datestring in datewalk(startdate, args.enddate): caches = datalogger.get_caches(project, tablename, datestring) if caches["tsa"]["raw"] is not None: logging.info("Skipping this datestring, raw data is already available") continue try: stringio = generate_datalogger_csv(logdir, datestring, keys, values, ts_keyname) #upload data files = {'myfile': stringio} url = "/".join((baseurl, project, tablename, datestring)) logging.info("calling %s", url) response = requests.post(url, files=files) print response.content except StandardError as exc: logging.error("Exception on file datestring %si, skipping this date", datestring) except zlib.error as exc: logging.error(exc)
def main(): project = "vicenter" tablename = "virtualMachineMemoryStats" dataloggerweb = DataLoggerWeb(DATALOGGER_URL) datestring = dataloggerweb.get_last_business_day_datestring() year, month, day = datestring.split("-") date1 = datetime.date(int(year), int(month), int(day)) print date1 date2 = date1 - datetime.timedelta(days=7) print date2.isoformat() # report_group("vicenter", "virtualMachineCpuStats", datestring, date2.isoformat(), "cpu.used.summation") # report(project, tablename, datestring, date2.isoformat(), "mem.active.average") report( "vicenter", "virtualMachineDatastoreStats", datestring, date2.isoformat(), "datastore.totalReadLatency.average" )
def main(): #project = "vicenter" #tablename = "virtualMachineMemoryStats" dataloggerweb = DataLoggerWeb(DATALOGGER_URL) datestring = dataloggerweb.get_last_business_day_datestring() year, month, day = datestring.split("-") date1 = datetime.date(int(year), int(month), int(day)) date2 = date1 - datetime.timedelta(days=7) print "Comparing %s with %s" % (date1, date2.isoformat()) #report_group("vicenter", "virtualMachineCpuStats", datestring, date2.isoformat(), "cpu.used.summation") report("vicenter", "virtualMachineCpuStats", datestring, date2.isoformat(), "cpu.used.summation") report("vicenter", "virtualMachineMemoryStats", datestring, date2.isoformat(), "mem.active.average") report("vicenter", "virtualMachineDatastoreStats", datestring, date2.isoformat(), "datastore.totalReadLatency.average") report("vicenter", "virtualMachineDatastoreStats", datestring, date2.isoformat(), "datastore.write.average") report("vicenter", "virtualMachineNetworkStats", datestring, date2.isoformat(), "net.usage.average") report("sanportperf", "fcIfC3AccountingTable", datestring, date2.isoformat(), "fcIfC3InOctets")
def report_group(project, tablename, datestring1, datestring2, value_key): # get data, from datalogger, or dataloggerhelper datalogger = DataLogger(BASEDIR, project, tablename) dataloggerweb = DataLoggerWeb(DATALOGGER_URL) print "loading data" starttime = time.time() # tsa1 = datalogger.load_tsa(datestring1) tsa1 = dataloggerweb.get_tsa(project, tablename, datestring1) tsa1 = datalogger.group_by(datestring1, tsa1, ("hostname",), lambda a, b: (a + b) / 2) # tsa2 = datalogger.load_tsa(datestring2) tsa2 = dataloggerweb.get_tsa(project, tablename, datestring2) tsa2 = datalogger.group_by(datestring2, tsa2, ("hostname",), lambda a, b: (a + b) / 2) print "Duration load %f" % (time.time() - starttime) starttime = time.time() cm = CorrelationMatrixTime(tsa1, tsa2, value_key) print "TOP most differing keys between %s and %s" % (datestring1, datestring2) for key, coefficient in sorted(cm.items(), key=lambda items: items[1], reverse=True)[:20]: print key, coefficient
# datalogger.get_tsa(project, tablename, datestring) datalogger.get_tsastats(project, tablename, datestring) else: # print("TSA filename : %s" % caches["tsa"]["keys"]) if len(caches["tsastat"]["keys"]) == 0: print(suffix, "TSASTAT Archive missing, calling get_tsastats") datalogger.get_tsastats(project, tablename, datestring) else: # print("TSASTAT filename : %s" % caches["tsastat"]["keys"]) if len(caches["ts"]["keys"]) == 0: print( suffix, "there are no ts archives, something went wrong, or tsa is completely empty, calling get_tsastats", ) datalogger.get_tsastats(project, tablename, datestring) else: # print("TS filename : %s" % len(caches["ts"]["keys"])) # print("TSSTAT filename : %s" % len(caches["tsstat"]["keys"])) print(suffix, "All fine") if __name__ == "__main__": datalogger = DataLoggerWeb() # for datestring in DataLogger.datewalker("2015-09-01", datalogger.get_last_business_day_datestring()): for datestring in datalogger.get_datewalk("2015-11-01", datalogger.get_last_business_day_datestring()): for project in datalogger.get_projects(): for tablename in datalogger.get_tablenames(project): # datalogger = DataLogger(BASEDIR, project, tablename) main(project, tablename, datestring, datalogger) # cProfile.run("main()")
if len(caches["tsa"]["keys"]) == 0: print(suffix, "TSA Archive missing, calling get_tsa and get_tsastats") #datalogger.get_tsa(project, tablename, datestring) datalogger.get_tsastats(project, tablename, datestring) else: #print("TSA filename : %s" % caches["tsa"]["keys"]) if len(caches["tsastat"]["keys"]) == 0: print(suffix, "TSASTAT Archive missing, calling get_tsastats") datalogger.get_tsastats(project, tablename, datestring) else: #print("TSASTAT filename : %s" % caches["tsastat"]["keys"]) if len(caches["ts"]["keys"]) == 0: print(suffix, "there are no ts archives, something went wrong, or tsa is completely empty, calling get_tsastats") datalogger.get_tsastats(project, tablename, datestring) else: #print("TS filename : %s" % len(caches["ts"]["keys"])) #print("TSSTAT filename : %s" % len(caches["tsstat"]["keys"])) print(suffix, "All fine") if __name__ == "__main__": datalogger = DataLoggerWeb() #for datestring in DataLogger.datewalker("2015-09-01", datalogger.get_last_business_day_datestring()): yesterday_datestring = (datetime.date.today() - datetime.timedelta(1)).isoformat() two_weeks_ago_daetstring = (datetime.date.today() - datetime.timedelta(28)).isoformat() for datestring in datalogger.get_datewalk(two_weeks_ago_daetstring, yesterday_datestring): for project in datalogger.get_projects(): for tablename in datalogger.get_tablenames(project): #datalogger = DataLogger(BASEDIR, project, tablename) main(project, tablename, datestring, datalogger) #cProfile.run("main()")
for value_key in tsastat[key].keys(): for stat_func, value in tsastat[key][value_key].items(): # group values by function grouped_value = group_funcs[stat_func](value, data[group_key][value_key][stat_func]) # store data[group_key][value_key][stat_func] = grouped_value # get to same format as TimeseriesArrayStats.to_json returns outdata = [tsastat.index_keys, tsastat.value_keys, ] outdata.append([(key, json.dumps(value)) for key, value in data.items()]) # use TimeseriesArrayStats.from_json to get to TimeseriesArrayStats # object new_tsastat = TimeseriesArrayStats.from_json(json.dumps(outdata)) return new_tsastat if __name__ == "__main__": datalogger = DataLoggerWeb(DATALOGGER_URL) #caches = datalogger.get_caches("sanportperf", "fcIfC3AccountingTable", datalogger.get_last_business_day_datestring()) tsastats = datalogger.get_tsastats("sanportperf", "fcIfC3AccountingTable", datalogger.get_last_business_day_datestring()) g_tsastat1 = groupby(tsastats, (u'hostname',)) tsastats = datalogger.get_tsastats("sanportperf", "fcIfC3AccountingTable", datalogger.get_last_business_day_datestring()) g_tsastat2 = tsastats.group_by_index_keys((u'hostname',)) print(g_tsastat1.keys()) print(g_tsastat2.keys()) assert g_tsastat1 == g_tsastat2 g_tsastat = groupby(tsastats, (u'ifDescr',)) print(g_tsastat.keys()) g_tsastat = groupby(tsastats, (u'hostname', u'ifDescr',)) assert g_tsastat == tsastats print(g_tsastat.keys()) #tsastats.remove_by_value(u'fcIfC3InOctets', "sum", 0.0) #csvdata = tsastats.to_csv("sum", u'fcIfC3OutOctets', reverse=True)