def run(self,basis,conf=1.0,show=False,outrules=False,verbose=True): """ the run method consists of target basis ("GenRR", "RRGenerator", or "RRClosureGenerator") confidence threshold in [0,1), show: whether rules will be shown interactively outrules: whether rules will be stored in a file """ basis2 = basis if basis == "GenRR": if self.rerulatt is None: self.rerulatt = rerulattice(self.supp,self.datasetfilename,xmlinput=True) self.rerulatt.xmlize() self.rerulatt.v.verb = verbose and self.verb latt = self.rerulatt rules = self.rerulatt.mineKrRR(self.supp,conf) secondminer = self.rerulatt.mineKrRR elif basis == "RRGenerator": if self.rerulatt is None: self.rerulatt = rerulattice(self.supp,self.datasetfilename,xmlinput=True) self.rerulatt.xmlize() self.rerulatt.v.verb = verbose and self.verb latt = self.rerulatt rules = self.rerulatt.mineRR(self.supp,conf) secondminer = self.rerulatt.mineRR elif basis == "RRClosureGenerator": if self.clrerulatt is None: self.clrerulatt = clrerulattice(self.supp,self.datasetfilename,xmlinput=True) self.clrerulatt.xmlize() self.clrerulatt.v.verb = verbose and self.verb latt = self.clrerulatt rules = self.clrerulatt.mineClosureRR(self.supp,conf) secondminer = self.clrerulatt.mineClosureRR else: "a print because there may be no lattice and no verbosity - to correct soon" print "Basis unavailable; options: GenRR, RRGenerator, RRClosureGenerator" return 0 count = None if outrules: outrulesfile = file(self.datasetfilename+basis2+("_c%2.3f"%conf)+("_s%2.3f"%self.supp)+".txt","w") count = printrules(rules,latt.nrtr,outrulesfile,doprint=True) if show: print "\n\n" count = printrules(rules,latt.nrtr,outfile=None,doprint=True) if not count: count = printrules(rules,latt.nrtr,outfile=None,doprint=False) print basis+" basis on "+self.datasetfilename+".txt has ", count, "rules of confidence at least", conf return count
import time from slarule import printrules out_file = "output.txt" if len(sys.argv)>1: out_file=sys.argv[1] output=open(out_file,"w") current_dir="./datasets/" tests={"test":[0.20]} ## tests={"retail":[0.001,0.0005],"adult":[0.01,0.005],"accidents":[0.5,0.4]} for filename in tests.keys(): output.write(filename+"\n") for supp in tests[filename]: output.write(str(supp)+"\n") time1=time.time() rl = clrerulattice(supp,current_dir+filename,v=False) time2=time.time()-time1 output.write("rerulattice: %3.3f"%time2+"\n") for ccc in [0.9,0.8,0.7]: output.write(str(ccc)+"\n") time1=time.time() ClosureRR =rl.mineClosureRR(supp,ccc) time2=time.time()-time1 outrulesfile = file(current_dir+filename+"ClosureRR"+("_c%2.3f"%ccc)+("_s%2.4f"%supp)+".txt","w") output.write("ClosureRR time: %.3f"%time2+"\n") output.write("%d repr rules found with ClosureRR at conf %.2f"%(printrules(ClosureRR,rl.nrtr,outrulesfile,doprint=True),ccc)+"\n") output.close()
from slarule import printrules ## forget = True forget = False ## filename = "pumsb_star" ## supp = 0.4 filename = "e13" supp = 1.0 / 13 rl = rerulattice(supp, filename) ## print printrules(rl.mingens,rl.nrtr,file(filename+"_IFrl30s.txt","w")), "rules in the iteration free basis." print printrules(rl.mingens, rl.nrtr), "rules in the iteration free basis." rl.findGDgens() ## print printrules(rl.GDgens,rl.nrtr,file(filename+"_GDrl30s.txt","w")), "rules in the GD basis." print printrules(rl.GDgens, rl.nrtr), "rules in the GD basis." ccc = 0.81 KrRRants = rl.mineKrRR(supp, ccc) print printrules(KrRRants, rl.nrtr), "repr rules found with Kr at conf", ccc RRants = rl.mineRR(supp, ccc)
def run(self, basis, conf=1.0, boost=0.0, show=False, outrules=False, verbose=True): """ the run method consists of target basis ("B*", "RR", or "GD") confidence threshold in [0,1], confidence boost threshold in [1,infty] recommended in [1,2], say 1.1 show: whether rules will be shown interactively outrules: whether rules will be stored in a file """ basis2 = basis if basis == "B*": basis2 = "Bstar" # for filenames if self.brulatt is None: self.brulatt = brulattice(self.supp, self.datasetfilename, xmlinput=True) self.brulatt.xmlize() self.brulatt.v.verb = verbose and self.verb latt = self.brulatt rules = self.brulatt.mineBstar(self.supp, conf, cboobd=boost) # careful here secondminer = self.brulatt.mineBstar elif basis == "RR": if self.rerulatt is None: self.rerulatt = rerulattice(self.supp, self.datasetfilename, xmlinput=True) self.rerulatt.xmlize() self.rerulatt.v.verb = verbose and self.verb latt = self.rerulatt rules = self.rerulatt.mineRR(self.supp, conf) secondminer = self.rerulatt.mineRR elif basis == "GD": conf = 1.0 if self.rerulatt is None: self.rerulatt = rerulattice(self.supp, self.datasetfilename) self.rerulatt.v.verb = verbose and self.verb latt = self.rerulatt self.rerulatt.findGDgens(self.supp) rules = self.rerulatt.GDgens secondminer = self.rerulatt.mineRR else: "a print because there may be no lattice and no verbosity - to correct soon" print "Basis unavailable; options: B*, RR, GD" return 0 warn = "" bv = "" if boost > 0: print "Filtering rules at confidence boost", boost warn = "Confidence-boost filtered " bv = "_b%2.3f" % boost cb = cboost(rules) seconf = conf / boost blockers = secondminer(self.supp, seconf) survived = cb.filt(boost, latt, blockers) rules = survived count = None if outrules: outrulesfile = file( self.datasetfilename + basis2 + ("_c%2.3f" % conf) + ("_s%2.3f" % self.supp) + bv + ".txt", "w") count = printrules(rules, latt.nrtr, outrulesfile, doprint=True) if show: print "\n\n" count = printrules(rules, latt.nrtr, outfile=None, doprint=True) if not count: count = printrules(rules, latt.nrtr, outfile=None, doprint=False) print warn + basis + " basis on " + self.datasetfilename + ".txt has ", count, "rules of confidence at least", conf return count
for supp in tests[filename]: output.write(str(supp) + "\n") time00 = time.time() rl = rerulattice(supp, current_dir + filename, v=False) time0 = time.time() output.write("rerulattice: %3.3f" % (time0 - time00) + "\n") for ccc in [0.9, 0.8, 0.7]: output.write(str(ccc) + "\n") time1 = time.time() KrRRants = rl.mineKrRR(supp, ccc) time2 = time.time() RRants = rl.mineRR(supp, ccc) time3 = time.time() ClosureRR = rl.mineClosureRR(supp, ccc) time4 = time.time() times = [time2 - time1, time3 - time2, time4 - time3] algorithms = ["Kr", "RR", "Bstar"] miners = [KrRRants, RRants, ClosureRR] for i, alg in enumerate(algorithms): outrulesfile = file( current_dir + filename + alg + ("_c%2.3f" % ccc) + ("_s%2.4f" % supp) + ".txt", "w") output.write("%s time: %.3f" % (alg, times[i]) + "\n") output.write( "%d repr rules found with %s at conf %.2f" % (printrules( miners[i], rl.nrtr, outrulesfile, doprint=True), alg, ccc) + "\n") output.close()
return p + q return [set([])] if __name__ == "__main__": from slarule import printrules ## forget = True forget = False ## filename = "pumsb_star" ## supp = 0.4 filename = "e13" supp = 1.0 / 13 rl = brulattice(supp, filename) ## rl.v.verb = False ccc = 0.7 cbb = 1.05 b = rl.mineBstar(supp, ccc, cboobd=cbb) print "\n", printrules(b, rl.nrtr) ## for ccc in [0.7,0.75,0.8]: ## for cbb in [1,1.05,1.1,1.15,1.2,1.25,1.3,1.35,1.4,1.45,1.5]: ## b = rl.mineBstar(supp,ccc,cboobd=cbb) ## print printrules(b,rl.nrtr,doprint=False), "B* rules found at conf", ccc, "boost", cbb
## ccc = 0.7 ## ccc = 0.8 ## filename = "cestapos" ## supp = 0.05 #### supp = 0.10 #### supp = 0.15 ## ccc = 0.7 #### ccc = 0.8 #### ccc = 0.9 rl = rerulattice(supp, filename) time1 = time.time() QrRRants = rl.mineQrRR(supp, ccc) time2 = time.time() print printrules(QrRRants, rl.nrtr), "repr rules found with Qr at conf", ccc time3 = time.time() KrRRants = rl.mineKrRR(supp, ccc) time4 = time.time() print printrules(KrRRants, rl.nrtr), "repr rules found with Kr at conf", ccc time5 = time.time() RRants = rl.mineRR(supp, ccc) time6 = time.time() print printrules(RRants, rl.nrtr), "repr rules found at conf", ccc print "Qr", time2 - time1 print "Kr", time4 - time3 print "RR", time6 - time5
self.outcome[cn].append((an, clift, clev)) return self.outcome if __name__ == "__main__": from rerulattice import rerulattice from slarule import printrules filename = "e13" supp = 0.99 / 13 #was 24 but... rl = rerulattice(supp, filename) print "Iteration-free basis:" print printrules(rl.mingens, rl.nrtr) conf = 0.75 RR = rl.mineRR(supp, conf) print "At confidence", conf print printrules(RR, rl.nrtr) cb = cboost(RR) boost = 1.05 seconf = conf / boost