def get_data(cat=None): if cat == None : qry = "select user,ucat_term, seslen,bin+bout, arr_ts from data_log group by user" else: qry = "select user,ucat_term,seslen,bin+bout, arr_ts, from data_log where ucat_term = '" + cat + "' group by user" s = sql.RunSQL("azure.db") x = s.sqlq(qry) acct_data = dict() for r in x: usr = r[0].strip() cat = r[1].strip() slen = float(r[2]) b = float(r[3]) ats = float(r[4]) if usr not in acct_data: sa=[slen] ba=[b] atsa=[ats] acct_data[usr] = [cat, sa, ba, atsa] else: acct_data[usr][1].append(slen) acct_data[usr][2].append(b) acct_data[usr][3].append(ats) fx = [] for item in acct_data.items(): usr = item[0] val = item[1] cat = val[0] nslen = np.array(val[1]) nb = np.array(val[2]) nats = np.array(val[3]) nslen.sort() nb.sort() nats.sort() ifx = [usr, cat, np.sum(nslen), np.sum(nb), nats.min(), nats.max()] fx.append(ifx) y = s.sqlq("select ucat,price from user_pfx") pmap = dict() for r1 in y: pmap[r1[0].strip()] = float(r1[1]) return (fx, pmap)
def flow_ia_count(ap=None): ps = sql.RunSQL("unc-proc.db") pf = sql.RunSQL("unc.db") if ap == None: qry = "select * from sessions" else: qry = "select * from sessions where ap = " + str(int(ap)) ses1 = ps.sqlq(qry) seslist = np.array(ses1) counts = [] ia = [] qry = "select ts from flows where client = %%client%% and ap = %%ap%% and ts >= %%sts%% and ts <= %%ets%%" for ses in seslist: sts = ses[TS] ets = ses[TS] + ses[SLEN] + 300 client = ses[CL] sap = ses[AP] this_q = qry.replace("%%client%%", str(client)).replace( "%%ap%%", str(sap)).replace("%%sts%%", str(sts)).replace("%%ets%%", str(ets)) flows1 = pf.sqlq(this_q) if len(flows1) == 0: pass else: flows = np.array(flows1) n = len(flows) counts.append(n) if n > 1: this_ia = flows[1:n] - flows[0:n - 1] for each_ia in this_ia: ia.append(each_ia) return (counts, ia)
def main(): day_list = [13, 14, 15, 16, 17, 18, 19, 20] sql_str = "select bytes from flows where day = " unc = sql.RunSQL("unc.db") for d in day_list: fname = "flow_" + str(d) dsql_str = sql_str + str(d) print "Running query --> SQL: ", dsql_str f = unc.sqlq(dsql_str) print "Writing data --> file: ", fname util.write_data(fname, f) del (f)
def get_data1(cat=None): if cat == None : qry = "select user,ucat_term, sum(seslen),sum(bin+bout), min(arr_ts),max(arr_ts+seslen) from data_log where seslen <= 18100 group by user" else: qry = "select user,ucat_term, sum(seslen),sum(bin+bout),min(arr_ts),max(arr_ts+seslen) from data_log where seslen <= 18100 and ucat_term = '" + cat + "' group by user" # print qry s = sql.RunSQL("azure.db") x = s.sqlq(qry) fx = [] for r in x: ifx = [r[0].strip(), r[1].strip(), float(r[2]), float(r[3]), float(r[4]), float(r[5])] fx.append(ifx) y = s.sqlq("select ucat,price from user_pfx") pmap = dict() for r1 in y: pmap[r1[0].strip()] = float(r1[1]) return (fx, pmap)
def main(dt): slen = {"catm": "select seslen from data_log where ucat_term='catm\n' and seslen>0 and seslen<18000 order by seslen", \ "catd": "select seslen from data_log where ucat_term='catd\n' and seslen>0 and seslen<18000 order by seslen", \ "cath": "select seslen from data_log where ucat_term='cath\n' and seslen>0 and seslen<18000 order by seslen", \ "all": "select seslen from data_log where seslen>0 and seslen<18000 order by seslen"} inb = {"catm": "select bin from data_log where ucat_term='catm\n' and bin>0 and seslen>0 and seslen<18000 order by bin", \ "catd": "select bin from data_log where ucat_term='catd\n' and bin>0 and seslen>0 and seslen<18000 order by bin", \ "cath": "select bin from data_log where ucat_term='cath\n' and bin>0 and seslen>0 and seslen<18000 order by bin", \ "all": "select bin from data_log where bin>0 and seslen>0 and seslen<18000 order by bin"} outb = {"catm": "select bout from data_log where ucat_term='catm\n' and bout>0 and seslen>0 and seslen<18000 order by bout", \ "catd": "select bout from data_log where ucat_term='catd\n' and bout>0 and seslen>0 and seslen<18000 order by bout", \ "cath": "select bout from data_log where ucat_term='cath\n' and bout>0 and seslen>0 and seslen<18000 order by bout", \ "all": "select bout from data_log where bout>0 and seslen>0 and seslen<18000 order by bout"} to_inb = {"all": "select bin from data_log where seslen >= 18000 and bin>0 order by bin"} to_outb = {"all": "select bout from data_log where seslen >= 18000 and bout>0 order by bout"} tslen = {"catm": "select sum(seslen) t from data_log where ucat_term='catm\n' and seslen>0 and seslen<18000 group by user order by t", \ "catd": "select sum(seslen) t from data_log where ucat_term='catd\n' and seslen>0 and seslen<18000 group by user order by t", \ "cath": "select sum(seslen) t from data_log where ucat_term='cath\n' and seslen>0 and seslen<18000 group by user order by t", \ "all": "select sum(seslen) t from data_log where seslen>0 and seslen<18000 group by user order by t"} tinb = {"catm": "select sum(bin) t from data_log where ucat_term='catm\n' and bin > 0 and seslen>0 and seslen<18000 group by user order by t", \ "catd": "select sum(bin) t from data_log where ucat_term='catd\n' and bin > 0 and seslen>0 and seslen<18000 group by user order by t", \ "cath": "select sum(bin) t from data_log where ucat_term='cath\n' and bin > 0 and seslen>0 and seslen<18000 group by user order by t", \ "all": "select sum(bin) t from data_log where bin > 0 and seslen>0 and seslen<18000 group by user order by t"} toutb = {"catm": "select sum(bout) t from data_log where ucat_term='catm\n' and bout>0 and seslen>0 and seslen<18000 group by user order by t", \ "catd": "select sum(bout) t from data_log where ucat_term='catd\n' and bout>0 and seslen>0 and seslen<18000 group by user order by t", \ "cath": "select sum(bout) t from data_log where ucat_term='cath\n' and bout>0 and seslen>0 and seslen<18000 group by user order by t", \ "all": "select sum(bout) t from data_log where bout>0 and seslen>0 and seslen<18000 group by user order by t"} dtmap = {"slen": slen, "inb": inb, "outb": outb, "to_inb": to_inb, "to_outb": to_outb, "tslen": tslen, "tinb": tinb, "toutb": toutb} if dt not in dtmap: raise NotImplementedError("Type - " + dt + " - is not implemented") qmap = dtmap[dt] s = sql.RunSQL("azure.db") for i in qmap.items(): q = i[1] y = s.sqlq(q) x = np.array(y) x.sort() # just making sure df = i[0] + "_" + dt ccf = i[0] + "_ccdf" ecf = i[0] + "_ecdf" cc = util.ccdf(x) ec = util.ecdf(x) util.write_data(df, x) util.write_data(ccf, cc) util.write_data(ecf, ec) mle = ml.ModLav.fromFit(x,fit="mlefit") mme = ml.ModLav.fromFit(x,fit="mmefit") mle_mt = ml.ModLav.fromFit(x,fit="mlefit",mt=True) mme_mt = ml.ModLav.fromFit(x,fit="mmefit",mt=True) omle = ml.optfit(x,0.1*x.max(),10*x.max(),500,mlefit=True,mt=False); omle_mt = ml.optfit(x,0.1*x.max(),10*x.max(),500,mlefit=True,mt=True); omme = ml.optfit(x,0.1*x.max(),10*x.max(),500,mlefit=False,mt=False); omme_mt = ml.optfit(x,0.1*x.max(),10*x.max(),500,mlefit=False,mt=True); mle_opt = omle["fit"][0] xm_mle_opt = omle["fit"][1] mle_opt_mt = omle_mt["fit"][0] xm_mle_opt_mt = omle_mt["fit"][1] mme_opt = omme["fit"][0] xm_mme_opt = omme["fit"][1] mme_opt_mt = omme_mt["fit"][0] xm_mme_opt_mt = omme_mt["fit"][1] yyy = [("MLE", mle, x.max()), ("MME", mme, x.max()), ("MLE-MT", mle_mt, x.max()), ("MME-MT", mme_mt, x.max()), ("MLE-OPT", mle_opt, xm_mle_opt), ("MLE-OPT-MT", mle_opt_mt, xm_mle_opt_mt), ("MME-OPT", mme_opt, xm_mme_opt), ("MME-OPT-MT", mme_opt_mt, xm_mme_opt_mt)] n,amin,amax,mu,sigma = len(x), x.min(), x.max(), x.mean(), x.std() cv = sigma/mu q = ms.mquantiles(x, [0.1, 0.5, 0.9]) op_str = [] op_str.append("BASIC STATISTICS") op_str.append("----------------------------------------------------------------------") op_str.append("Size: " + str(n)) op_str.append("Range: " + str(amin) + " - " + str(amax)) op_str.append("Quantiles: 10% - " + str(q[0]) + " 50% - " + str(q[1]) + " 90% - " + str(q[2])) op_str.append("Mean: " + str(mu)) op_str.append("Sigma: " + str(sigma)) op_str.append("CV: " + str(cv)) op_str.append("\n") for yy in yyy: typ = i[0] lbl = yy[0] m = yy[1] xmx = yy[2] op_str.append(lbl) op_str.append("----------------------------------------------------------------------") op_str.append("Modlav params: " + str(m)) op_str.append("Xmax: " + str(xmx)) op_str.append("Xmax/Max: " + str(xmx/amax)) op_str.append("FIT metric: " + str(m.fitmetric(points=x))) op_str.append("K-S metric: " + str(m.ksmetric(points=x))) op_str.append("----------------------------------------------------------------------") op_str.append("\n") flbl = lbl.lower().replace("-", "_") fname_pfx = typ + "_" + flbl lx = util.gen_points(math.log10(x.min()),math.log10(xmx),2000) ex = np.power(10, lx) mcc = m.ccdf(ex) mec = m.cdf(ec[:,0]) fmcc = np.array([ex, mcc]).transpose() fmec = np.array([ec[:,0], mec]).transpose() util.write_data(fname_pfx+"_ccdf", fmcc) util.write_data(fname_pfx+"_ecdf", fmec) op1_str = [] for s1 in op_str: op1_str.append(s1 + "\n") txf = open(typ+"_metric", "w+") txf.writelines(op1_str) txf.close()
def main(): r = sql.RunSQL("azure.db") wday=["sun","mon","tue","wed","thu","fri","sat"] for w in wday: chkskew_for_wday(r,w)