def filt(self, boost, clatt, rrseconf): "return corr with surviving rules under the boost filtering - SLOW" self.outcome = corr() for cn in self.ants.keys(): "check if any antecedent leaves a rule to cn - BRUTE FORCE" self.outcome[cn] = [] for an in self.ants[cn]: goodsofar = True conf1 = float(cn.supp) / an.supp for cn2 in rrseconf.keys(): for an2 in rrseconf[cn2]: if cn.difference(an) <= cn2 and an2 <= an: for ss in allsubsets(set(an.difference(an2))): an3 = ss.union(an2) if an3 < an: cn3 = cn.difference(an).union(an3) conf2 = float(clatt.close( cn3).supp) / clatt.close(an3).supp if conf1 <= conf2 * boost: goodsofar = False break # breaks for ss and skips else else: for elem in cn2.difference(cn): cn3 = set([elem]).union(cn) conf2 = float( clatt.close(cn3).supp) / an.supp if conf1 <= conf2 * boost: goodsofar = False break # breaks for elem if not goodsofar: break # breaks for an2 if not goodsofar: break # breaks for cn2 if goodsofar: self.outcome[cn].append(an) return self.outcome
def mineKrRR(self, suppthr, confthr, forget=False): """ compute the representative rules for the given confidence using Kryszkiewicz IDA 2001 heuristic; will provide the iteration-free basis if called with conf 1 thresholds expected in [0,1] to rescale here """ if confthr == 1: return self.findmingens(suppthr) sthr = int(self.scale * suppthr) cthr = int(self.scale * confthr) self.v.zero(100) self.v.inimessg("Computing representative rules at confidence " + str(confthr) + " using Kryszkiewicz's incomplete heuristic...") ants = corr() self.v.messg("computing antecedents...") for nod in self.closeds: self.v.tick() c1 = self.scale * nod.mxs c2 = self.scale * nod.supp if c2 >= cthr * nod.mns and c1 < cthr * nod.mns: "this is the test of Prop 9 in Kryszkiewicz's paper" ants[nod] = [] "computing valid antecedents ..." for node in self.preds[nod] + [nod]: for m in self.mingens[node]: if m < nod and c1 < cthr * m.supp and cthr * m.supp <= c2 and c2 < cthr * m.mns: ants[nod].append(m) self.v.zero(500) self.v.messg("...done.\n") return ants
def __init__(self,supp,datasetfile="",v=None,xmlinput=False,externalminer=True): "get the closures, find minimal generators, set their mns" clattice.__init__(self,supp,datasetfile,v,xmlinput=xmlinput,externalminer=externalminer) self.mingens = corr() self.GDgens = None # upon computing it, will be a corr() self.hist_cuts = {} self.hist_trnsl = {} self.findmingens() self.setmns()
def setcuts(self,scsthr,sccthr,forget=False,skip=None,skippar=0): """ supp/conf already scaled thrs in [0,self.scale] computes all cuts for that supp/conf thresholds, if not computed yet; keeps them in hist_cuts to avoid duplicate computation (unless forget); the cut for each node consists of two corrs, pos and neg border: hist_cuts : supp/conf thr -> (pos,neg) pos : node -> min ants, neg : node -> max nonants wish to be able to use it for a support different from self.minsupp (unclear whether it works now in that case) Things that probably do not work now: Bstar may require a support improvement wrt larger closures signaled by skip not None AND skippar (improv) not zero Kr/BC heuristics may require a conf-based check on nodes, signaled by skip not None and skippar (conf) not zero """ if (scsthr,sccthr) in self.hist_cuts.keys(): "use cached version if it is there" return self.hist_cuts[scsthr,sccthr] if skip is not None and skippar != 0: "risk of not all closures traversed, don't cache the result" forget = True else: "skip is None or skippar is zero, then no skipping" skip = never cpos = corr() cneg = corr() self.v.zero(500) self.v.messg("...computing (non-)antecedents...") for nod in self.closeds: "review carefully and document this loop" if skip(nod,skippar,self.scale): "we will not compute rules from this closure" continue self.v.tick() if self.scale*nod.supp >= self.nrtr*scsthr: pos, neg = self._cut(nod,sccthr) cpos[nod] = pos cneg[nod] = neg if not forget: self.hist_cuts[scsthr,sccthr] = cpos, cneg self.v.messg("...done;") return cpos, cneg
def mineClosureRR(self,suppthr,confthr,forget=False): """ compute the representative rules for the given confidence; will provide the iteration-free basis if called with conf 1 thresholds expected in [0,1] to rescale here """ if confthr == 1: self.v.inimessg("This algorithm works only for confidence thresholds strictly smaller than 1") return corr() sthr = int(self.scale*suppthr) cthr = int(self.scale*confthr) self.v.zero(100) self.v.inimessg("Computing representative rules at confidence "+str(confthr)+" using our closure-aware approach...") ants = corr() self.v.messg("computing antecedents...") for nod in self.closeds: self.v.tick() mxgs=0 foundyesants=False for node in self.preds[nod]: if mxgs<node.supp and cthr*node.supp<=self.scale*nod.supp: mxgs=node.supp foundyesants=True if not foundyesants: mingens = self._faces(nod,list(self.impreds[nod])).transv().hyedges if len(mingens)>1: mxgs=nod.supp elif len(mingens[0])<nod.card: mxgs=nod.supp c1=self.scale*nod.mxs c2=self.scale*nod.supp if cthr*mxgs>c1 and mxgs>nod.supp: "this is the test of Prop 3 in our IEEE Trans paper" ants[nod] = [] "computing valid antecedents (Prop 4 in our IEEE Trans paper)" for node in self.preds[nod]: if c1<cthr*node.supp and cthr*node.supp<=c2 and c2<cthr*node.bmns: ants[nod].append(node) self.v.zero(500) self.v.messg("...done.\n") return ants
def __init__(self, supp, datasetfile="", v=None, xmlinput=False, externalminer=True): "get the closures, find minimal generators, set their mns" clattice.__init__(self, supp, datasetfile, v, xmlinput=xmlinput, externalminer=externalminer) self.mingens = corr() self.findmingens() self.setmns()
def mineRR(self, suppthr, confthr, forget=False): """ compute the representative rules for the given confidence; will provide the iteration-free basis if called with conf 1 thresholds expected in [0,1] to rescale here """ if confthr == 1: return self.findmingens(suppthr) sthr = int(self.scale * suppthr) cthr = int(self.scale * confthr) if (sthr, cthr) in self.hist_RR.keys(): return self.hist_RR[sthr, cthr] self.v.zero(100) self.v.inimessg("Computing representative rules at confidence " + str(confthr) + "...") nonants = self.setcuts(sthr, cthr, forget)[1] ants = corr() self.v.messg("computing potential antecedents...") for nod in self.closeds: """ careful, assuming nodes ordered by size here find all free noncl antecs as cut transv get associated data by search on mingens alternative algorithms exist to avoid the slow call to _findiinmingens - must try them """ self.v.tick() if True: "to add here the support constraint if convenient" ants[nod] = [] for m in self._faces(nod, nonants[nod]).transv().hyedges: if m < nod: mm = self._findinmingens(nod, m) if mm == None: self.v.errmessg( str(m) + " not found among mingens at " + str(nod)) ants[nod].append(mm) self.v.zero(500) self.v.messg("...checking valid antecedents...") ants.tighten(self.v) self.v.messg("...done.\n") return ants
def mineRR(self, suppthr, confthr, forget=False): """ compute the representative rules for the given confidence; will provide the iteration-free basis if called with conf 1 thresholds expected in [0,1] to rescale here """ if confthr == 1: return self.findmingens(suppthr) sthr = int(self.scale * suppthr) cthr = int(self.scale * confthr) self.v.zero(100) self.v.inimessg("Computing representative rules at confidence " + str(confthr) + " using our heuristic...") ants = corr() self.v.messg("computing antecedents...") for nod in self.closeds: self.v.tick() mxgs = 0 for m in self.mingens[nod]: if m < nod: mxgs = nod.supp break for node in self.preds[nod]: if mxgs < node.supp and cthr * node.supp <= self.scale * nod.supp: mxgs = node.supp c1 = self.scale * nod.mxs c2 = self.scale * nod.supp if cthr * mxgs > c1: "this is the test of Prop 5 in our EGC paper" ants[nod] = [] "computing valid antecedents (Prop 6 in our EGC paper)" for node in self.preds[nod] + [nod]: for m in self.mingens[node]: if m < nod and c1 < cthr * m.supp and cthr * m.supp <= c2 and c2 < cthr * m.mns: ants[nod].append(m) self.v.zero(500) self.v.messg("...done.\n") return ants
def mineQrRR(self, suppthr, confthr, forget=False): """ ditto, just that here we use our slight variant of the incomplete Krysz IDA 2001 heuristic check whether this version finds empty antecedents - yes it does """ sthr = int(self.scale * suppthr) cthr = int(self.scale * confthr) ###### if (sthr,cthr) in self.hist_KrRR.keys(): ###### return self.hist_KrRR[sthr,cthr] self.v.zero(100) self.v.inimessg( "Computing representative rules at confidence " + str(confthr) + " using our slight variant of Kryszkiewicz's incomplete heuristic..." ) nonants = self.setcuts(sthr, cthr, forget, skip, cthr)[1] ants = corr() self.v.messg("computing potential antecedents...") for nod in self.closeds: """ see comments same place in mine RR I had here a test self.scale*nod.supp >= sthr*self.cl.nrtr """ self.v.tick() if self.scale*nod.supp >= cthr*nod.mns and \ self.scale*nod.mxs < cthr*nod.mns: "that was the test of Prop 9 - might add here supp constraint" ants[nod] = [] for m in self._faces(nod, nonants[nod]).transv().hyedges: if m < nod: mm = self._findinmingens(nod, m) if mm == None: print m, "not found at", nod ants[nod].append(mm) self.v.zero(500) self.v.messg("...checking valid antecedents...") ants.tighten(self.v) self.v.messg("...done.\n") return ants
def add_eval(self, clatt): "clift, clev, maybe cboost... - SLOW" self.outcome = corr() for cn in self.ants.keys(): "check if any antecedent leaves a rule to cn - BRUTE FORCE" if self.ants[cn]: self.outcome[cn] = [] for an in self.ants[cn]: conf1 = float(cn.supp) / an.supp cnr = cn.difference(an) mxconfsub = 0 ## mxconfant = None for anr in allsubsets(set(an)): "find max conf of a rule anr -> cnr" canr = clatt.close(anr) if canr == an: continue cc = cnr.union(anr) ccc = clatt.close(cc) conf2 = float(ccc.supp) / canr.supp if conf2 > mxconfsub: mxconfsub = conf2 ## mxconfant = canr ## mxconfcnr = ccc if mxconfsub > 0: clift = conf1 / mxconfsub clev = conf1 - mxconfsub ## clant = mxconfant ## clcnr = mxconfcnr else: clift = None clev = None clant = None ## self.outcome[cn].append((an,clift,clev,clant,clcnr)) self.outcome[cn].append((an, clift, clev)) return self.outcome
def findGDgens(self,suppthr=-1): """ compute the GD antecedents - only proper antecedents returned ToDo: as in findmingens, optional suppthr in [0,1] to impose an extra level of iceberg if not present, use the support found in closures file check sthr in self.hist_GD.keys() before computing it when other supports handled, remember to memorize computed ones """ if self.GDgens: return self.GDgens = corr() if True: sthr = self.scale*self.minsupp/self.nrtr self.v.zero(250) self.v.inimessg("Filtering minimal generators to obtain the Guigues-Duquenne basis...") for c1 in self.closeds: self.v.tick() self.GDgens[c1] = set([]) for g1 in self.mingens[c1]: g1new = set(g1) changed = True while changed: changed = False for c2 in self.preds[c1]: for g2 in self.mingens[c2]: if g2 < g1new and not c2 <= g1new: g1new.update(c2) changed = True g1new = g1.copy().revise(g1new) if not c1 <= g1new: "skip it if subsumed or if equal to closure" for g3 in self.GDgens[c1]: if g3 <= g1new: break else: "else of for: not subsumed" self.GDgens[c1].add(g1new) self.v.messg("...done.\n")
import scipy as sp import corr import time def ccf(data): return np.array([sp.correlate(p, np.concatenate((p,p))) for p in data]) z = np.zeros(1000000).reshape(1000,1000) for i in range(1000): for j in range(1000): z[i,j]=i^j #z = np.array([[2.0,3.0,1.0,4.0,1.3,4.5,6.4,7.6]]) #print 2,libcorr.square(2) # print z x = np.array(z,dtype=np.float32) start = time.time() #Numpy z = ccf(x) numpy_time = time.time()- start y=x #Cpp start = time.time() corr.corr(y) cpp_time = time.time()- start print "Numpy - %(numpy_time)f \nCpp - %(cpp_time)f" % locals() #print x # print z # print y
def mineBstar(self, suppthr, confthr, forget=False, cboobd=0): """ compute the Bstar basis for the given confidence and, possibly, conf boost; if present, use cboost bound to spare exploring some closures (but tgat might not work now); thresholds in [0,1], rescaled into [0,self.x.scale] inside TODO: CHECK SUPPTHR COMPATIBLE WITH X.SUPPTHR NOW THIS IS BEING DONE ELSEWHERE BUT MAYBE SHOULD BE HERE """ sthr = int(self.scale * suppthr) cthr = int(self.scale * confthr) yesants = None if (sthr, cthr) in self.hist_Bstar.keys(): yesants = self.hist_Bstar[sthr, cthr] if cboobd == 0: return yesants self.v.zero(100) self.v.inimessg("Computing B* basis at confidence " + str(confthr)) if cboobd != 0: self.v.messg(" and confidence boost " + str(cboobd)) if yesants is None: yesants = self.setcuts(sthr, cthr, forget, skip, cboobd)[0] self.v.messg("validating minimal antecedents...") yesants.tighten(self.v) if not forget: self.hist_Bstar[sthr, cthr] = yesants if cboobd > 0: "filter according to boost bound" filt = self.mineBstar(suppthr, confthr / cboobd, forget) outcome = corr() for cn in yesants: "check if any antecedent leaves a rule to cn - BRUTE FORCE ALGORITHM" outcome[cn] = [] for an in yesants[cn]: goodsofar = True conf1 = float(cn.supp) / an.supp for cn2 in filt: for an2 in filt[cn2]: if cn.difference(an) <= cn2 and an2 <= an: totry = allsubsets(set(an.difference(an2))) for ss in totry: an3 = self.close(ss.union(an2)) if an3 < an: cn3 = cn.difference(an).union(an3) conf2 = float( self.close(cn3).supp) / an3.supp if conf1 <= conf2 * cboobd: goodsofar = False break # breaks for ss and skips else else: for elem in cn2.difference(cn): cn3 = set([elem]).union(cn) conf2 = float( self.close(cn3).supp) / an.supp if conf1 <= conf2 * cboobd: goodsofar = False break # breaks for elem if not goodsofar: break # breaks for an2 if not goodsofar: break # breaks for cn2 if goodsofar: outcome[cn].append(an) else: outcome = yesants return outcome
def api_corr(atdatetime): path = '../data/demo_{}_{}.sqlite'.format(atdatetime[0:4], atdatetime[4:]) #result = corr('../data/demo_2020_0505.sqlite') result = corr(path) print(result) return render_template('corr.html', records=result)