def run_combine(setts, fn_b, i, counts_cmp, pool): fn_basis = "%s-%s" % (fn_b, i) k = numpy.random.randint(setts["k_low"], setts["k_up"]+1) patt_fn = [] combine_seqs = {} patterns_list = [] offset_t0 = 0 prev_span = 0 t0_list = [] for ii in numpy.random.choice(len(pool), size=k): fn_sub_basis = re.sub("_ds.txt", "", pool[ii]) seqs = readSequence({"filename": fn_sub_basis+"_ds.txt", "SEP": " "}) ds = DataSequence(seqs) pcsH = load_pc(fn_sub_basis+"_pcH.txt", ds) patt_fn.append(fn_sub_basis) next_t0 = offset_t0 + numpy.random.randint(numpy.ceil(prev_span*setts["t0_low"]), numpy.ceil(prev_span*setts["t0_up"])+1) t0_list.append(next_t0) for ev, seq in seqs.items(): if ev not in combine_seqs: combine_seqs[ev] = set() combine_seqs[ev].update(seq+next_t0) for (p, pt0, pE) in pcsH.getPatterns(): patterns_list.append((p, pt0+next_t0, pE)) offset_t0 = next_t0 prev_span = ds.getTend() comb_setts = {"t0s": t0_list, "patt_fn": patt_fn, "k": k} combine_ss = dict([(ev, numpy.array(sorted(s))) for (ev, s) in combine_seqs.items()]) ds = DataSequence(combine_ss) pcH = PatternCollection(patterns_list) writeSYNTHin(ds, pcH, fn_basis) #### mine data sequence SXPS = SyntheXPS() mine_seqs(ds, fn_basis, writePCout_fun=SXPS.addPC) pcF = SXPS.getPC() ##### out_v, stats, results = compare_pcs(ds, pcH, pcF) writeSYNTHout(setts, ds, pcH, pcF, out_v, fn_basis, save_pc=True, comb_setts=comb_setts) counts_cmp[out_v] = counts_cmp.get(out_v, 0)+1 if out_v != 0 : #== -1: print "RUN %s\tcl: %f vs. %f\t%d >> %s" % ( i, stats[0]["cl"], stats[1]["cl"], out_v, CMP_OUT_CODES[out_v]) return stats[0]["cl"], stats[1]["cl"], ds.codeLengthResiduals()
def prepare_synthetic_seq(patts_dets, bck_dets): Hpatts = [] Hoccs = set() for patt_dets in patts_dets: patt, occs = prepare_pattern(**patt_dets) Hoccs.update(occs) Hpatts.append(patt) tmp = sorted(Hoccs) T_first, T_last = (tmp[0][0], tmp[-1][0]) for bck_det in bck_dets: occs = prepare_bck(T_first, T_last, len(tmp), **bck_det) Hoccs.update(occs) ds = DataSequence(sorted(Hoccs)) ev_to_num = ds.getEvToNum() for (Ptree, tp0, Ep) in Hpatts: Ptree.mapEvents(ev_to_num) pc_org = PatternCollection(Hpatts) return pc_org, ds
def run_one(setts, fn_b, i, counts_cmp): fn_basis = "%s-%s" % (fn_b, i) if os.path.isfile(fn_basis+"_pcH.txt"): ds = DataSequence(readSequence({"filename": fn_basis+"_ds.txt", "SEP": " "})) pcH = load_pc(fn_basis+"_pcH.txt", ds) else: #### generate data sequence k = setts["level"] Rs = [] if k == 1: Rs = [numpy.random.randint(int(.66*setts["nb_occs"]), setts["nb_occs"])] elif k > 1: mm = int(numpy.floor((setts["nb_occs"]/(1.*numpy.prod(range(1,k+1))))**(1./k))) if mm < 3: nn = int(numpy.floor(setts["nb_occs"]**(1./k))) if nn < 3: Rs = [3 for kk in range(k)] else: xx = [(.33*nn, nn+1) for kk in range(k)] Rs = [numpy.random.randint(max(3, numpy.ceil(.33*nn)), nn+1) for kk in range(k)][::-1] else: xx = [((kk+.33)*mm, (kk+1.)*mm) for kk in range(k)] Rs = [numpy.random.randint(max(3, numpy.ceil((kk+.33)*mm)), (kk+1)*mm+1) for kk in range(k)][::-1] Ps = [numpy.random.randint(setts["p_down"], setts["p_up"])] for kk in range(1,k): prev = Ps[-1]*.5*Rs[kk-1] if not setts.get("overlap", False): prev = Ps[-1]*Rs[kk-1] tt = numpy.random.randint(prev, prev+100) ik = 0 while any([tt % p == 0 for p in Ps]) and ik < 100: ik += 1 tt = numpy.random.randint(prev, prev+100) Ps.append(tt) patts_dets = [{"inner": setts["inner"], "t0": 0, "Rs": Rs, "Ps": Ps, "noise_lvl": setts["noise_lvl"], "noise_dens": setts["noise_dens"]}] bck_dets = [] for (e, c) in setts.get("add_noise", []): bck_dets.append({"event": e, "nb_occs": c}) print patts_dets, bck_dets, numpy.prod(Rs) pcH, ds = prepare_synthetic_seq(patts_dets, bck_dets) writeSYNTHin(ds, pcH, fn_basis) ##### save_pcF = True if os.path.isfile(fn_basis+"_pcF.txt"): pcF = load_pc(fn_basis+"_pcF.txt", ds) save_pcF = False else: #### mine data sequence SXPS = SyntheXPS() mine_seqs(ds, fn_basis, writePCout_fun=SXPS.addPC) pcF = SXPS.getPC() ##### if os.path.isfile(fn_basis+"_summary.txt"): out_v = -1 with open(fn_basis+"_summary.txt") as fp: out_v = int(fp.readline().split()[0]) stats = {0: {"cl": pcH.codeLength(ds)}, 1: {"cl": pcF.codeLength(ds)}} else: out_v, stats, results = compare_pcs(ds, pcH, pcF) writeSYNTHout(setts, ds, pcH, pcF, out_v, fn_basis, save_pc=save_pcF) counts_cmp[out_v] = counts_cmp.get(out_v, 0)+1 if out_v != 0 : #== -1: print "RUN %s\tcl: %f vs. %f\t%d >> %s" % ( i, stats[0]["cl"], stats[1]["cl"], out_v, CMP_OUT_CODES[out_v]) return stats[0]["cl"], stats[1]["cl"], ds.codeLengthResiduals()
"max_len": 3000 } # fparams = {"filename": "/home/egalbrun/TKTL/misc/itrami/per-pat/data/traces/prepared/trace_bugzilla_1_data.dat", # "timestamp": False, "events": ["*"], "min_len": 30, "max_len": 3000} # seqs = readSequence(fparams) seq = [] for i in range(15): seq.append(i * 400 + numpy.arange(0, 20, 2)) seqs = {0: numpy.hstack(seq)} for ci, seq in seqs.items(): dets = None ds = DataSequence({"a": seq}) print "------------" if len(seq) < 30: print "SEQUENCE %s: (%d)\t %s" % (ci, len(seq), seq) else: print "SEQUENCE %s: (%d)" % (ci, len(seq)) data_details = ds.getDetails() results = run_test(seq, "a", data_details, dets) for result in results: nb_cycl = len([c for c in result["cycles"] if c.get("cp") > 0]) nb_res = numpy.sum( [len(c["occs"]) for c in result["cycles"] if c.get("cp") <= 0]) print "%s:\tCL=%f nC=%d nR=%d RT=%f" % ( result["meth"], result["CL"], nb_cycl, nb_res, result["RT"])