Esempio n. 1
0
 def run(self,basis,conf=1.0,show=False,outrules=False,verbose=True):
     """
     the run method consists of
       target basis ("GenRR", "RRGenerator", or "RRClosureGenerator")
       confidence threshold in [0,1),
       show: whether rules will be shown interactively
       outrules: whether rules will be stored in a file
     """
     basis2 = basis
     if basis == "GenRR":
         if self.rerulatt is None:
             self.rerulatt = rerulattice(self.supp,self.datasetfilename,xmlinput=True)
         self.rerulatt.xmlize()
         self.rerulatt.v.verb = verbose and self.verb
         latt = self.rerulatt
         rules = self.rerulatt.mineKrRR(self.supp,conf)
         secondminer = self.rerulatt.mineKrRR
     elif basis == "RRGenerator":
         if self.rerulatt is None:
             self.rerulatt = rerulattice(self.supp,self.datasetfilename,xmlinput=True)
         self.rerulatt.xmlize()
         self.rerulatt.v.verb = verbose and self.verb 
         latt = self.rerulatt
         rules = self.rerulatt.mineRR(self.supp,conf)
         secondminer = self.rerulatt.mineRR
     elif basis == "RRClosureGenerator":
         if self.clrerulatt is None:
             self.clrerulatt = clrerulattice(self.supp,self.datasetfilename,xmlinput=True)
         self.clrerulatt.xmlize()
         self.clrerulatt.v.verb = verbose and self.verb 
         latt = self.clrerulatt
         rules = self.clrerulatt.mineClosureRR(self.supp,conf)
         secondminer = self.clrerulatt.mineClosureRR
     else:
         "a print because there may be no lattice and no verbosity - to correct soon"
         print "Basis unavailable; options: GenRR, RRGenerator, RRClosureGenerator"
         return 0
     count = None
     if outrules:
         outrulesfile = file(self.datasetfilename+basis2+("_c%2.3f"%conf)+("_s%2.3f"%self.supp)+".txt","w")
         count = printrules(rules,latt.nrtr,outrulesfile,doprint=True)
     if show:
         print "\n\n"
         count = printrules(rules,latt.nrtr,outfile=None,doprint=True)
     if not count:
         count = printrules(rules,latt.nrtr,outfile=None,doprint=False)
     print basis+" basis on "+self.datasetfilename+".txt has ", count, "rules of confidence at least", conf
     return count
Esempio n. 2
0
    import time
    from slarule import printrules
    out_file = "output.txt"
    if len(sys.argv)>1:
        out_file=sys.argv[1]
    output=open(out_file,"w")
    current_dir="./datasets/"
    tests={"test":[0.20]}
##    tests={"retail":[0.001,0.0005],"adult":[0.01,0.005],"accidents":[0.5,0.4]}
    for filename in tests.keys():
        output.write(filename+"\n")
        for supp in tests[filename]:
            output.write(str(supp)+"\n")
            time1=time.time()
            rl = clrerulattice(supp,current_dir+filename,v=False)
            time2=time.time()-time1
            output.write("rerulattice: %3.3f"%time2+"\n")
            for ccc in [0.9,0.8,0.7]:
                output.write(str(ccc)+"\n")
                time1=time.time()
                ClosureRR =rl.mineClosureRR(supp,ccc)
                time2=time.time()-time1
                outrulesfile = file(current_dir+filename+"ClosureRR"+("_c%2.3f"%ccc)+("_s%2.4f"%supp)+".txt","w")
                output.write("ClosureRR time: %.3f"%time2+"\n")
                output.write("%d repr rules found with ClosureRR at conf %.2f"%(printrules(ClosureRR,rl.nrtr,outrulesfile,doprint=True),ccc)+"\n")

    output.close()



Esempio n. 3
0
    from slarule import printrules

    ##    forget = True
    forget = False

    ##    filename = "pumsb_star"
    ##    supp = 0.4

    filename = "e13"
    supp = 1.0 / 13

    rl = rerulattice(supp, filename)

    ##    print printrules(rl.mingens,rl.nrtr,file(filename+"_IFrl30s.txt","w")), "rules in the iteration free basis."
    print printrules(rl.mingens, rl.nrtr), "rules in the iteration free basis."

    rl.findGDgens()

    ##    print printrules(rl.GDgens,rl.nrtr,file(filename+"_GDrl30s.txt","w")), "rules in the GD basis."
    print printrules(rl.GDgens, rl.nrtr), "rules in the GD basis."

    ccc = 0.81

    KrRRants = rl.mineKrRR(supp, ccc)

    print printrules(KrRRants,
                     rl.nrtr), "repr rules found with Kr at conf", ccc

    RRants = rl.mineRR(supp, ccc)
Esempio n. 4
0
 def run(self,
         basis,
         conf=1.0,
         boost=0.0,
         show=False,
         outrules=False,
         verbose=True):
     """
     the run method consists of
       target basis ("B*", "RR", or "GD")
       confidence threshold in [0,1],
       confidence boost threshold in [1,infty] recommended in [1,2], say 1.1
       show: whether rules will be shown interactively
       outrules: whether rules will be stored in a file
     """
     basis2 = basis
     if basis == "B*":
         basis2 = "Bstar"  # for filenames
         if self.brulatt is None:
             self.brulatt = brulattice(self.supp,
                                       self.datasetfilename,
                                       xmlinput=True)
         self.brulatt.xmlize()
         self.brulatt.v.verb = verbose and self.verb
         latt = self.brulatt
         rules = self.brulatt.mineBstar(self.supp, conf,
                                        cboobd=boost)  # careful here
         secondminer = self.brulatt.mineBstar
     elif basis == "RR":
         if self.rerulatt is None:
             self.rerulatt = rerulattice(self.supp,
                                         self.datasetfilename,
                                         xmlinput=True)
         self.rerulatt.xmlize()
         self.rerulatt.v.verb = verbose and self.verb
         latt = self.rerulatt
         rules = self.rerulatt.mineRR(self.supp, conf)
         secondminer = self.rerulatt.mineRR
     elif basis == "GD":
         conf = 1.0
         if self.rerulatt is None:
             self.rerulatt = rerulattice(self.supp, self.datasetfilename)
         self.rerulatt.v.verb = verbose and self.verb
         latt = self.rerulatt
         self.rerulatt.findGDgens(self.supp)
         rules = self.rerulatt.GDgens
         secondminer = self.rerulatt.mineRR
     else:
         "a print because there may be no lattice and no verbosity - to correct soon"
         print "Basis unavailable; options: B*, RR, GD"
         return 0
     warn = ""
     bv = ""
     if boost > 0:
         print "Filtering rules at confidence boost", boost
         warn = "Confidence-boost filtered "
         bv = "_b%2.3f" % boost
         cb = cboost(rules)
         seconf = conf / boost
         blockers = secondminer(self.supp, seconf)
         survived = cb.filt(boost, latt, blockers)
         rules = survived
     count = None
     if outrules:
         outrulesfile = file(
             self.datasetfilename + basis2 + ("_c%2.3f" % conf) +
             ("_s%2.3f" % self.supp) + bv + ".txt", "w")
         count = printrules(rules, latt.nrtr, outrulesfile, doprint=True)
     if show:
         print "\n\n"
         count = printrules(rules, latt.nrtr, outfile=None, doprint=True)
     if not count:
         count = printrules(rules, latt.nrtr, outfile=None, doprint=False)
     print warn + basis + " basis on " + self.datasetfilename + ".txt has ", count, "rules of confidence at least", conf
     return count
Esempio n. 5
0
        for supp in tests[filename]:
            output.write(str(supp) + "\n")
            time00 = time.time()
            rl = rerulattice(supp, current_dir + filename, v=False)
            time0 = time.time()
            output.write("rerulattice: %3.3f" % (time0 - time00) + "\n")
            for ccc in [0.9, 0.8, 0.7]:
                output.write(str(ccc) + "\n")
                time1 = time.time()
                KrRRants = rl.mineKrRR(supp, ccc)
                time2 = time.time()
                RRants = rl.mineRR(supp, ccc)
                time3 = time.time()
                ClosureRR = rl.mineClosureRR(supp, ccc)
                time4 = time.time()
                times = [time2 - time1, time3 - time2, time4 - time3]
                algorithms = ["Kr", "RR", "Bstar"]
                miners = [KrRRants, RRants, ClosureRR]
                for i, alg in enumerate(algorithms):
                    outrulesfile = file(
                        current_dir + filename + alg + ("_c%2.3f" % ccc) +
                        ("_s%2.4f" % supp) + ".txt", "w")
                    output.write("%s time: %.3f" % (alg, times[i]) + "\n")
                    output.write(
                        "%d repr rules found with %s at conf %.2f" %
                        (printrules(
                            miners[i], rl.nrtr, outrulesfile, doprint=True),
                         alg, ccc) + "\n")

    output.close()
Esempio n. 6
0
        return p + q
    return [set([])]


if __name__ == "__main__":

    from slarule import printrules

    ##    forget = True
    forget = False

    ##    filename = "pumsb_star"
    ##    supp = 0.4

    filename = "e13"
    supp = 1.0 / 13

    rl = brulattice(supp, filename)

    ##    rl.v.verb = False

    ccc = 0.7
    cbb = 1.05
    b = rl.mineBstar(supp, ccc, cboobd=cbb)
    print "\n", printrules(b, rl.nrtr)

##    for ccc in [0.7,0.75,0.8]:
##        for cbb in [1,1.05,1.1,1.15,1.2,1.25,1.3,1.35,1.4,1.45,1.5]:
##            b = rl.mineBstar(supp,ccc,cboobd=cbb)
##            print printrules(b,rl.nrtr,doprint=False), "B* rules found at conf", ccc, "boost", cbb
Esempio n. 7
0
    ##    ccc = 0.7
    ##    ccc = 0.8

    ##    filename = "cestapos"
    ##    supp = 0.05
    ####    supp = 0.10
    ####    supp = 0.15
    ##    ccc = 0.7
    ####    ccc = 0.8
    ####    ccc = 0.9

    rl = rerulattice(supp, filename)
    time1 = time.time()
    QrRRants = rl.mineQrRR(supp, ccc)
    time2 = time.time()
    print printrules(QrRRants,
                     rl.nrtr), "repr rules found with Qr at conf", ccc
    time3 = time.time()
    KrRRants = rl.mineKrRR(supp, ccc)
    time4 = time.time()
    print printrules(KrRRants,
                     rl.nrtr), "repr rules found with Kr at conf", ccc
    time5 = time.time()
    RRants = rl.mineRR(supp, ccc)
    time6 = time.time()
    print printrules(RRants, rl.nrtr), "repr rules found at conf", ccc

    print "Qr", time2 - time1
    print "Kr", time4 - time3
    print "RR", time6 - time5
Esempio n. 8
0
                self.outcome[cn].append((an, clift, clev))
        return self.outcome

if __name__ == "__main__":

    from rerulattice import rerulattice
    from slarule import printrules

    filename = "e13"
    supp = 0.99 / 13  #was 24 but...

    rl = rerulattice(supp, filename)

    print "Iteration-free basis:"

    print printrules(rl.mingens, rl.nrtr)

    conf = 0.75

    RR = rl.mineRR(supp, conf)

    print "At confidence", conf

    print printrules(RR, rl.nrtr)

    cb = cboost(RR)

    boost = 1.05

    seconf = conf / boost