Exemple #1
0
 def filt(self, boost, clatt, rrseconf):
     "return corr with surviving rules under the boost filtering - SLOW"
     self.outcome = corr()
     for cn in self.ants.keys():
         "check if any antecedent leaves a rule to cn - BRUTE FORCE"
         self.outcome[cn] = []
         for an in self.ants[cn]:
             goodsofar = True
             conf1 = float(cn.supp) / an.supp
             for cn2 in rrseconf.keys():
                 for an2 in rrseconf[cn2]:
                     if cn.difference(an) <= cn2 and an2 <= an:
                         for ss in allsubsets(set(an.difference(an2))):
                             an3 = ss.union(an2)
                             if an3 < an:
                                 cn3 = cn.difference(an).union(an3)
                                 conf2 = float(clatt.close(
                                     cn3).supp) / clatt.close(an3).supp
                                 if conf1 <= conf2 * boost:
                                     goodsofar = False
                                     break  # breaks for ss and skips else
                         else:
                             for elem in cn2.difference(cn):
                                 cn3 = set([elem]).union(cn)
                                 conf2 = float(
                                     clatt.close(cn3).supp) / an.supp
                                 if conf1 <= conf2 * boost:
                                     goodsofar = False
                                     break  # breaks for elem
                     if not goodsofar: break  # breaks for an2
                 if not goodsofar: break  # breaks for cn2
             if goodsofar:
                 self.outcome[cn].append(an)
     return self.outcome
Exemple #2
0
 def mineKrRR(self, suppthr, confthr, forget=False):
     """
     compute the representative rules for the given confidence using Kryszkiewicz IDA 2001 heuristic;
     will provide the iteration-free basis if called with conf 1
     thresholds expected in [0,1] to rescale here
     """
     if confthr == 1:
         return self.findmingens(suppthr)
     sthr = int(self.scale * suppthr)
     cthr = int(self.scale * confthr)
     self.v.zero(100)
     self.v.inimessg("Computing representative rules at confidence " +
                     str(confthr) +
                     " using Kryszkiewicz's incomplete heuristic...")
     ants = corr()
     self.v.messg("computing antecedents...")
     for nod in self.closeds:
         self.v.tick()
         c1 = self.scale * nod.mxs
         c2 = self.scale * nod.supp
         if c2 >= cthr * nod.mns and c1 < cthr * nod.mns:
             "this is the test of Prop 9 in Kryszkiewicz's paper"
             ants[nod] = []
             "computing valid antecedents ..."
             for node in self.preds[nod] + [nod]:
                 for m in self.mingens[node]:
                     if m < nod and c1 < cthr * m.supp and cthr * m.supp <= c2 and c2 < cthr * m.mns:
                         ants[nod].append(m)
     self.v.zero(500)
     self.v.messg("...done.\n")
     return ants
Exemple #3
0
 def __init__(self,supp,datasetfile="",v=None,xmlinput=False,externalminer=True):
     "get the closures, find minimal generators, set their mns"
     clattice.__init__(self,supp,datasetfile,v,xmlinput=xmlinput,externalminer=externalminer)
     self.mingens = corr()
     self.GDgens = None # upon computing it, will be a corr()
     self.hist_cuts = {}
     self.hist_trnsl = {}
     self.findmingens()
     self.setmns()
Exemple #4
0
 def setcuts(self,scsthr,sccthr,forget=False,skip=None,skippar=0):
     """
     supp/conf already scaled thrs in [0,self.scale]
     computes all cuts for that supp/conf thresholds, if not computed yet;
     keeps them in hist_cuts to avoid duplicate computation (unless forget);
     the cut for each node consists of two corrs, pos and neg border:
     hist_cuts : supp/conf thr -> (pos,neg)
     pos : node -> min ants,  neg : node -> max nonants
     wish to be able to use it for a support different from self.minsupp
     (unclear whether it works now in that case)
     Things that probably do not work now:
     Bstar may require a support improvement wrt larger closures
     signaled by skip not None AND skippar (improv) not zero
     Kr/BC heuristics may require a conf-based check on nodes,
     signaled by skip not None and skippar (conf) not zero
     """
     if (scsthr,sccthr) in self.hist_cuts.keys():
         "use cached version if it is there"
         return self.hist_cuts[scsthr,sccthr]
     if skip is not None and skippar != 0:
         "risk of not all closures traversed, don't cache the result"
         forget = True
     else:
         "skip is None or skippar is zero, then no skipping"
         skip = never
     cpos = corr()
     cneg = corr()
     self.v.zero(500)
     self.v.messg("...computing (non-)antecedents...")
     for nod in self.closeds:
         "review carefully and document this loop"
         if skip(nod,skippar,self.scale):
             "we will not compute rules from this closure"
             continue
         self.v.tick()
         if self.scale*nod.supp >= self.nrtr*scsthr:
             pos, neg = self._cut(nod,sccthr)
             cpos[nod] = pos
             cneg[nod] = neg
     if not forget: self.hist_cuts[scsthr,sccthr] = cpos, cneg
     self.v.messg("...done;")
     return cpos, cneg
 def mineClosureRR(self,suppthr,confthr,forget=False):
     """
     compute the representative rules for the given confidence;
     will provide the iteration-free basis if called with conf 1
     thresholds expected in [0,1] to rescale here
     """
     if confthr == 1:
         self.v.inimessg("This algorithm works only for confidence thresholds strictly smaller than 1")
         return corr()
     sthr = int(self.scale*suppthr)
     cthr = int(self.scale*confthr)
     self.v.zero(100)
     self.v.inimessg("Computing representative rules at confidence "+str(confthr)+" using our closure-aware approach...")
     ants = corr()
     self.v.messg("computing antecedents...")       
     for nod in self.closeds:
         self.v.tick()
         mxgs=0
         foundyesants=False
         for node in self.preds[nod]: 
             if mxgs<node.supp and cthr*node.supp<=self.scale*nod.supp:
                 mxgs=node.supp
                 foundyesants=True
         if not foundyesants:
             mingens = self._faces(nod,list(self.impreds[nod])).transv().hyedges
             if len(mingens)>1:
                 mxgs=nod.supp
             elif len(mingens[0])<nod.card:
                 mxgs=nod.supp
         c1=self.scale*nod.mxs
         c2=self.scale*nod.supp             
         if cthr*mxgs>c1 and mxgs>nod.supp:
             "this is the test of Prop 3 in our IEEE Trans paper"
             ants[nod] = []
             "computing valid antecedents (Prop 4 in our IEEE Trans paper)"
             for node in self.preds[nod]:
                 if c1<cthr*node.supp and cthr*node.supp<=c2 and c2<cthr*node.bmns:
                     ants[nod].append(node)
     self.v.zero(500)
     self.v.messg("...done.\n")
     return ants
Exemple #6
0
 def __init__(self,
              supp,
              datasetfile="",
              v=None,
              xmlinput=False,
              externalminer=True):
     "get the closures, find minimal generators, set their mns"
     clattice.__init__(self,
                       supp,
                       datasetfile,
                       v,
                       xmlinput=xmlinput,
                       externalminer=externalminer)
     self.mingens = corr()
     self.findmingens()
     self.setmns()
Exemple #7
0
 def mineRR(self, suppthr, confthr, forget=False):
     """
     compute the representative rules for the given confidence;
     will provide the iteration-free basis if called with conf 1
     thresholds expected in [0,1] to rescale here
     """
     if confthr == 1:
         return self.findmingens(suppthr)
     sthr = int(self.scale * suppthr)
     cthr = int(self.scale * confthr)
     if (sthr, cthr) in self.hist_RR.keys():
         return self.hist_RR[sthr, cthr]
     self.v.zero(100)
     self.v.inimessg("Computing representative rules at confidence " +
                     str(confthr) + "...")
     nonants = self.setcuts(sthr, cthr, forget)[1]
     ants = corr()
     self.v.messg("computing potential antecedents...")
     for nod in self.closeds:
         """
         careful, assuming nodes ordered by size here
         find all free noncl antecs as cut transv
         get associated data by search on mingens
         alternative algorithms exist to avoid the
         slow call to _findiinmingens - must try them
         """
         self.v.tick()
         if True:
             "to add here the support constraint if convenient"
             ants[nod] = []
             for m in self._faces(nod, nonants[nod]).transv().hyedges:
                 if m < nod:
                     mm = self._findinmingens(nod, m)
                     if mm == None:
                         self.v.errmessg(
                             str(m) + " not found among mingens at " +
                             str(nod))
                     ants[nod].append(mm)
     self.v.zero(500)
     self.v.messg("...checking valid antecedents...")
     ants.tighten(self.v)
     self.v.messg("...done.\n")
     return ants
Exemple #8
0
 def mineRR(self, suppthr, confthr, forget=False):
     """
     compute the representative rules for the given confidence;
     will provide the iteration-free basis if called with conf 1
     thresholds expected in [0,1] to rescale here
     """
     if confthr == 1:
         return self.findmingens(suppthr)
     sthr = int(self.scale * suppthr)
     cthr = int(self.scale * confthr)
     self.v.zero(100)
     self.v.inimessg("Computing representative rules at confidence " +
                     str(confthr) + " using our heuristic...")
     ants = corr()
     self.v.messg("computing antecedents...")
     for nod in self.closeds:
         self.v.tick()
         mxgs = 0
         for m in self.mingens[nod]:
             if m < nod:
                 mxgs = nod.supp
                 break
         for node in self.preds[nod]:
             if mxgs < node.supp and cthr * node.supp <= self.scale * nod.supp:
                 mxgs = node.supp
         c1 = self.scale * nod.mxs
         c2 = self.scale * nod.supp
         if cthr * mxgs > c1:
             "this is the test of Prop 5 in our EGC paper"
             ants[nod] = []
             "computing valid antecedents (Prop 6 in our EGC paper)"
             for node in self.preds[nod] + [nod]:
                 for m in self.mingens[node]:
                     if m < nod and c1 < cthr * m.supp and cthr * m.supp <= c2 and c2 < cthr * m.mns:
                         ants[nod].append(m)
     self.v.zero(500)
     self.v.messg("...done.\n")
     return ants
Exemple #9
0
 def mineQrRR(self, suppthr, confthr, forget=False):
     """
     ditto, just that here we use our slight variant
     of the incomplete Krysz IDA 2001 heuristic
     check whether this version finds empty antecedents - yes it does
     """
     sthr = int(self.scale * suppthr)
     cthr = int(self.scale * confthr)
     ######        if (sthr,cthr) in self.hist_KrRR.keys():
     ######            return self.hist_KrRR[sthr,cthr]
     self.v.zero(100)
     self.v.inimessg(
         "Computing representative rules at confidence " + str(confthr) +
         " using our slight variant of Kryszkiewicz's incomplete heuristic..."
     )
     nonants = self.setcuts(sthr, cthr, forget, skip, cthr)[1]
     ants = corr()
     self.v.messg("computing potential antecedents...")
     for nod in self.closeds:
         """
         see comments same place in mine RR
         I had here a test self.scale*nod.supp >= sthr*self.cl.nrtr
         """
         self.v.tick()
         if  self.scale*nod.supp >= cthr*nod.mns and \
             self.scale*nod.mxs < cthr*nod.mns:
             "that was the test of Prop 9 - might add here supp constraint"
             ants[nod] = []
             for m in self._faces(nod, nonants[nod]).transv().hyedges:
                 if m < nod:
                     mm = self._findinmingens(nod, m)
                     if mm == None: print m, "not found at", nod
                     ants[nod].append(mm)
     self.v.zero(500)
     self.v.messg("...checking valid antecedents...")
     ants.tighten(self.v)
     self.v.messg("...done.\n")
     return ants
Exemple #10
0
    def add_eval(self, clatt):
        "clift, clev, maybe cboost... - SLOW"
        self.outcome = corr()
        for cn in self.ants.keys():
            "check if any antecedent leaves a rule to cn - BRUTE FORCE"
            if self.ants[cn]:
                self.outcome[cn] = []
                for an in self.ants[cn]:
                    conf1 = float(cn.supp) / an.supp
                    cnr = cn.difference(an)
                    mxconfsub = 0
                    ##                    mxconfant = None
                    for anr in allsubsets(set(an)):
                        "find max conf of a rule anr -> cnr"
                        canr = clatt.close(anr)
                        if canr == an:
                            continue
                        cc = cnr.union(anr)
                        ccc = clatt.close(cc)
                        conf2 = float(ccc.supp) / canr.supp
                        if conf2 > mxconfsub:
                            mxconfsub = conf2
##                            mxconfant = canr
##                            mxconfcnr = ccc
                    if mxconfsub > 0:
                        clift = conf1 / mxconfsub
                        clev = conf1 - mxconfsub
##                         clant = mxconfant
##                         clcnr = mxconfcnr
                    else:
                        clift = None
                        clev = None
                        clant = None


##                self.outcome[cn].append((an,clift,clev,clant,clcnr))
                self.outcome[cn].append((an, clift, clev))
        return self.outcome
Exemple #11
0
 def findGDgens(self,suppthr=-1):
     """
     compute the GD antecedents - only proper antecedents returned
     ToDo: as in findmingens,
     optional suppthr in [0,1] to impose an extra level of iceberg
     if not present, use the support found in closures file
     check sthr in self.hist_GD.keys() before computing it
     when other supports handled, remember to memorize computed ones
     """
     if self.GDgens: return
     self.GDgens = corr()
     if True:
         sthr = self.scale*self.minsupp/self.nrtr
     self.v.zero(250)
     self.v.inimessg("Filtering minimal generators to obtain the Guigues-Duquenne basis...")
     for c1 in self.closeds:
         self.v.tick()
         self.GDgens[c1] = set([])
         for g1 in self.mingens[c1]:
             g1new = set(g1)
             changed = True
             while changed:
                 changed = False
                 for c2 in self.preds[c1]:
                     for g2 in self.mingens[c2]:
                         if g2 < g1new and not c2 <= g1new:
                             g1new.update(c2)
                             changed = True
             g1new = g1.copy().revise(g1new)
             if not c1 <= g1new:
                 "skip it if subsumed or if equal to closure"
                 for g3 in self.GDgens[c1]:
                     if g3 <= g1new: break
                 else:
                     "else of for: not subsumed"
                     self.GDgens[c1].add(g1new)
     self.v.messg("...done.\n")
Exemple #12
0
import scipy as sp
import corr
import time

def ccf(data):
    return np.array([sp.correlate(p, np.concatenate((p,p))) for p in data])

z = np.zeros(1000000).reshape(1000,1000)
for i in range(1000):
    for j in range(1000):
        z[i,j]=i^j

#z = np.array([[2.0,3.0,1.0,4.0,1.3,4.5,6.4,7.6]])

#print 2,libcorr.square(2)
# print z
x = np.array(z,dtype=np.float32)

start = time.time()     #Numpy
z = ccf(x)
numpy_time = time.time()- start

y=x                     #Cpp
start = time.time()
corr.corr(y)
cpp_time = time.time()- start
print "Numpy - %(numpy_time)f \nCpp - %(cpp_time)f" % locals()
#print x
# print z
# print y
Exemple #13
0
 def mineBstar(self, suppthr, confthr, forget=False, cboobd=0):
     """
     compute the Bstar basis for the given confidence and, possibly,
     conf boost; if present, use cboost bound to spare exploring some
     closures (but tgat might not work now);
     thresholds in [0,1], rescaled into [0,self.x.scale] inside
     TODO: CHECK SUPPTHR COMPATIBLE WITH X.SUPPTHR
     NOW THIS IS BEING DONE ELSEWHERE BUT MAYBE SHOULD BE HERE
     """
     sthr = int(self.scale * suppthr)
     cthr = int(self.scale * confthr)
     yesants = None
     if (sthr, cthr) in self.hist_Bstar.keys():
         yesants = self.hist_Bstar[sthr, cthr]
         if cboobd == 0: return yesants
     self.v.zero(100)
     self.v.inimessg("Computing B* basis at confidence " + str(confthr))
     if cboobd != 0:
         self.v.messg(" and confidence boost " + str(cboobd))
     if yesants is None:
         yesants = self.setcuts(sthr, cthr, forget, skip, cboobd)[0]
         self.v.messg("validating minimal antecedents...")
         yesants.tighten(self.v)
     if not forget: self.hist_Bstar[sthr, cthr] = yesants
     if cboobd > 0:
         "filter according to boost bound"
         filt = self.mineBstar(suppthr, confthr / cboobd, forget)
         outcome = corr()
         for cn in yesants:
             "check if any antecedent leaves a rule to cn - BRUTE FORCE ALGORITHM"
             outcome[cn] = []
             for an in yesants[cn]:
                 goodsofar = True
                 conf1 = float(cn.supp) / an.supp
                 for cn2 in filt:
                     for an2 in filt[cn2]:
                         if cn.difference(an) <= cn2 and an2 <= an:
                             totry = allsubsets(set(an.difference(an2)))
                             for ss in totry:
                                 an3 = self.close(ss.union(an2))
                                 if an3 < an:
                                     cn3 = cn.difference(an).union(an3)
                                     conf2 = float(
                                         self.close(cn3).supp) / an3.supp
                                     if conf1 <= conf2 * cboobd:
                                         goodsofar = False
                                         break  # breaks for ss and skips else
                             else:
                                 for elem in cn2.difference(cn):
                                     cn3 = set([elem]).union(cn)
                                     conf2 = float(
                                         self.close(cn3).supp) / an.supp
                                     if conf1 <= conf2 * cboobd:
                                         goodsofar = False
                                         break  # breaks for elem
                         if not goodsofar: break  # breaks for an2
                     if not goodsofar: break  # breaks for cn2
                 if goodsofar:
                     outcome[cn].append(an)
     else:
         outcome = yesants
     return outcome
Exemple #14
0
def api_corr(atdatetime):
    path = '../data/demo_{}_{}.sqlite'.format(atdatetime[0:4], atdatetime[4:])
    #result = corr('../data/demo_2020_0505.sqlite')
    result = corr(path)
    print(result)
    return render_template('corr.html', records=result)