def updateConstraintEvaluation(self, G, PD, id, condition=1): """ function to now evaluate and update a possible candidate Parameters ---------- G : Networkx Graph Input Graph PD : PDClass Background Distribution id : int identifier of candidate to the updated condition : int, optional 1 if codelength does not changes, else 2, by default 1 """ if condition == 1: if self.gtype == 'U': DL = computeDescriptionLength( dlmode=8, excActionType=False, l=self.l, gtype=self.gtype, W=self.Data[id]['Pat'].NCount, kw=self.Data[id]['Pat'].ECount, C=len(PD.lprevUpdate), kws=self.Data[id]['Pat'].kws, isSimple=self.isSimple ) IG = computeInterestingness( self.Data[id]['Pat'].IC_dssg, DL, mode=self.imode ) self.Data[id]['Pat'].setDL(DL) self.Data[id]['Pat'].setI(IG) else: DL = computeDescriptionLength( dlmode=8, excActionType=False, l=6, gtype=self.gtype, WI=self.Data[id]['Pat'].inNL, WO=self.Data[id]['Pat'].outNL, kw=self.Data[id]['Pat'].ECount, C=len(PD.lprevUpdate), kws=self.Data[id]['Pat'].kws, isSimple=self.isSimple ) IG = computeInterestingness( self.Data[id]['Pat'].IC_dssg, DL, mode=self.imode ) self.Data[id]['Pat'].setDL(DL) self.Data[id]['Pat'].setI(IG) elif condition == 2: self.evaluateConstraintPair( G, PD, id[0], id[1] ) return
def computeParametersU(self, P, PD, k1, k2): """ Utility function to compute paramters for a potential candidate constraint pair when Input graph is undirected Parameters ---------- P : Pattern Input patter by merging two constraints PD : PDClass Background Distribution k1 : int identifier of first constraint k2 : int identifier of second constraint """ Params = dict() Params['Pat'] = P nlambda = PD.updateDistribution( Params['Pat'].G, idx=None, val_return='return', case=3, dropLidx=[k1, k2] ) #// TODO: handle this issue, code it !!!!!!!! Params['codeLengthC'] = getCodeLengthParallel( Params['Pat'].G, PD, gtype=self.gtype, case=2, NL=Params['Pat'].NL, isSimple=self.isSimple ) Params['codeLengthCprime'] = getCodeLengthParallel( Params['Pat'].G, PD, gtype=self.gtype, case=5, NL=Params['Pat'].NL, isSimple=self.isSimple, dropLidx=[k1, k2], nlambda=nlambda ) Params['Pat'].setIC_dssg( Params['codeLengthC'] - Params['codeLengthCprime'] ) Params['Pat'].setDL( computeDescriptionLength( dlmode=8, excActionType=False, l=6, gtype=self.gtype, W=Params['Pat'].NCount, kw=Params['Pat'].ECount, C=len(PD.lprevUpdate), kws=Params['Pat'].kws, isSimple=self.isSimple ) ) Params['Pat'].setI( computeInterestingness( Params['Pat'].IC_dssg, Params['Pat'].DL, mode=self.imode) ) if Params['Pat'].I > 0: Params['Pat'].setPrevOrder((int(k1),int(k2))) Params['Pat'].setPatType('merge') Params['Pat'].setLambda(nlambda) if int(k1) in self.curAdds and int(k2) in self.curAdds: raise Exception('ADD ADD MERGE EVALUATE HUA') self.Data[(k1,k2)] = Params return
def updateConstraintEvaluation(self, G, PD, id, condition=1): """ function to now evaluate and update a possible candidate Parameters ---------- G : Networkx Graph Input Graph PD : PDClass Background Distribution id : int identifier of candidate to the updated condition : int, optional 1 if codelength does not changes, else 2, by default 1 *Things to note P shall have the prev_order(identifier of the constraint) correct """ if condition == 1: #update only description length DL = computeDescriptionLength(dlmode=4, gtype=self.gtype, C=len(PD.lprevUpdate), l=self.l, excActionType=False) IG = computeInterestingness(self.Data[id]['Pat'].IC_dssg, DL, mode=2) self.Data[id]['Pat'].setDL(DL) self.Data[id]['Pat'].setI(IG) elif condition == 2: #update codelength and description length self.evaluateConstraint(G, PD, id) return
def updateConstraintEvaluation(self, G, PD, id, condition=1): """ function to now evaluate and update a possible candidate Parameters ---------- G : Networkx Graph Input Graph PD : PDClass Background Distribution id : int identifier of candidate to the updated condition : int, optional 1 if codelength does not changes, else 2, by default 1 #? To understand when a candidate need to be updated or added in the potential list, we shall ealise thatonly the patternx from previous states can only be split. #? As any action performed will result in a connected pattern(s), thus the list of potential candidates is only updated at the start of the state #? Now with any other action performed there are only two possibilities for each candidate #? First either the Description Length changes (Condition==1) #? Or Second, both the codelength and description length changes (Condition==2) """ if condition == 1: if self.gtype == 'U': DL = computeDescriptionLength( dlmode=7, C=len(PD.lprevUpdate), gtype=self.gtype, WS=self.Data[id]['Pat'].NCount, compos=self.Data[id]['compos'], excActionType=False, l=self.l, isSimple=self.isSimple ) IG = computeInterestingness( self.Data[id]['Pat'].IC_dssg, DL, mode=2 ) self.Data[id]['Pat'].setDL(DL) self.Data[id]['Pat'].setI(IG) for k,v in self.Data[id]['compos'].items(): v.setDL( self.Data[id]['Pat'].DL ) v.setI( self.Data[id]['Pat'].I ) else: DL = computeDescriptionLength( dlmode=7, C=len(PD.lprevUpdate), gtype=self.gtype, WIS=self.Data[id]['Pat'].InNCount, WOS=self.Data[id]['Pat'].OutNCount, compos=self.Data[id]['compos'], excActionType=False, l=self.l, isSimple=self.isSimple ) IG = computeInterestingness( self.Data[id]['Pat'].IC_dssg, DL, mode=2 ) self.Data[id]['Pat'].setDL(DL) self.Data[id]['Pat'].setI(IG) for k,v in self.Data[id]['compos'].items(): v.setDL( self.Data[id]['Pat'].DL ) v.setI( self.Data[id]['Pat'].I ) elif condition == 2: if self.gtype == 'U': self.processAsU(G, PD, id) elif self.gtype == 'D': self.processAsD(G, PD, id) return
def getReducedComponentU(self, G, PD, FinalParams, Lid, k): """ Utility function used when the input graph is undirected to remove nodes from each component of the candidate split Parameters ---------- G : Networkx Graph Input Graph PD : PDClass Background Distribution FinalParams : dict current value of prameters corresponding to the current split of the candidate constraint Lid : int identifier for the the candidate constraint for split k : int identifier for the component number of the candidate after split Returns ------- dict FinalParams: Updated after reducing a component """ doshrink = True while doshrink: doshrink = False bestRNode = None bestCLprime = None bestI = FinalParams['Pat'].I for node in FinalParams['compos'][k].NL: curCLprime = FinalParams['codeLengthCprime'] - self.computeCLgainRemoveNodeU(G, PD, list(set(FinalParams['compos'][k].NL)-set([node])), node, [Lid]) curIC = FinalParams['codeLengthC'] - curCLprime curDL = FinalParams['Pat'].DL - self.getDescriptionLengthChangeU(FinalParams['compos'][k], node, FinalParams['Pat'].NCount, FinalParams['NodesInc']) curI = computeInterestingness( curIC, curDL, mode=2 ) if curI > bestI: bestRNode = node bestCLprime = curCLprime bestI = curI if bestI > FinalParams['Pat'].I: FinalParams['codeLengthCprime'] = bestCLprime FinalParams['compos'][k].removeNode(bestRNode) FinalParams['Pat'].setIC_dssg( FinalParams['codeLengthC'] - FinalParams['codeLengthCprime'] ) FinalParams['Pat'].setDL( computeDescriptionLength( dlmode=7, C=len(PD.lprevUpdate), gtype=self.gtype, WS=FinalParams['Pat'].NCount, compos=FinalParams['compos'], excActionType=False, l=self.l, isSimple=self.isSimple ) ) FinalParams['Pat'].setI( computeInterestingness( FinalParams['Pat'].IC_dssg, FinalParams['Pat'].DL, mode=2 ) ) FinalParams['NodesInc'] -= 1 FinalParams['excludedNL'].append(bestRNode) doshrink = True return FinalParams
def getReducedComponentD(self, G, PD, FinalParams, Lid, k): """ Utility function used when the input graph is directed to remove nodes from each component of the candidate split Parameters ---------- G : Networkx Graph Input Graph PD : PDClass Background Distribution FinalParams : dict current value of prameters corresponding to the current split of the candidate constraint Lid : int identifier for the the candidate constraint for split k : int identifier for the component number of the candidate after split Returns ------- dict FinalParams: Updated after reducing a component """ doshrink = True count_remove_nodes = 0 while doshrink: doshrink = False bestRNode = None bestCLprime = None bestI = FinalParams['Pat'].I bestType = None for node in FinalParams['compos'][k].inNL: curCLprime = FinalParams['codeLengthCprime'] - self.computeCLgainRemoveNodeD( G, PD, FinalParams['compos'][k].outNL, node, [Lid], 1 ) #// Todo: check this code curIC = FinalParams['codeLengthC'] - curCLprime curDL = FinalParams['Pat'].DL - self.getDescriptionLengthChangeD( FinalParams['compos'][k], node, FinalParams['Pat'].InNCount, FinalParams['inNodesInc'], 1 ) #// Todo: check this code curI = computeInterestingness( curIC, curDL, mode=2 ) if curI > bestI: bestRNode = node bestCLprime = curCLprime bestI = curI bestType = 'in' for node in FinalParams['compos'][k].outNL: curCLprime = FinalParams['codeLengthCprime'] - self.computeCLgainRemoveNodeD( G, PD, FinalParams['compos'][k].inNL, node, [Lid], 2 ) #// Todo: check this code curIC = FinalParams['codeLengthC'] - curCLprime curDL = FinalParams['Pat'].DL - self.getDescriptionLengthChangeD( FinalParams['compos'][k], node, FinalParams['Pat'].OutNCount, FinalParams['outNodesInc'], 2 ) #// Todo: check this code curI = computeInterestingness( curIC, curDL, mode=2 ) if curI > bestI: bestRNode = node bestCLprime = curCLprime bestI = curI bestType = 'out' if bestI > FinalParams['Pat'].I: FinalParams['codeLengthCprime'] = bestCLprime if 'in' in bestType: FinalParams['compos'][k].removeInNode(bestRNode) FinalParams['inNodesInc'] -= 1 FinalParams['excludedInNL'].append(bestRNode) else: FinalParams['compos'][k].removeOutNode(bestRNode) FinalParams['outNodesInc'] -= 1 FinalParams['excludedOutNL'].append(bestRNode) FinalParams['Pat'].setIC_dssg( FinalParams['codeLengthC'] - FinalParams['codeLengthCprime'] ) FinalParams['Pat'].setDL( computeDescriptionLength( dlmode=7, C=len(PD.lprevUpdate), gtype=self.gtype, WIS=FinalParams['Pat'].InNCount, WOS=FinalParams['Pat'].OutNCount, compos=FinalParams['compos'], excActionType=False, l=self.l, isSimple=self.isSimple ) ) FinalParams['Pat'].setI( computeInterestingness( FinalParams['Pat'].IC_dssg, FinalParams['Pat'].DL, mode=2 ) ) count_remove_nodes += 1 doshrink = True return FinalParams
def processAsU(self, G, PD, id): """ Utility function for shrink action when the input graph is undirected. This function idenfies the final subgraph from a possible candidate shrink and compute the corresponding measures. Parameters ---------- G : Networkx Graph Input Graph PD : PDClass Background Distribution id : int identifier of a constraint to be evaluated """ NL = PD.lprevUpdate[id][1] H = G.subgraph(NL) components = nx.connected_component_subgraphs(H, copy=True) fcomponents = dict() it = 0 for comp in components: if comp.number_of_nodes() > self.minsize: fcomponents[it] = comp if len( fcomponents ) == 1: # * if valid components is more than 1 than split shall be performed baseParams = dict() baseParams['Pat'] = Pattern(H) baseParams['codeLengthC'] = getCodeLengthParallel( H, PD, gtype=self.gtype, case=2, isSimple=self.isSimple, NL=baseParams['Pat'].NL) baseParams['codeLengthCprime'] = baseParams['codeLengthC'] baseParams['Pat'].setIC_dssg(baseParams['codeLengthC'] - baseParams['codeLengthCprime']) baseParams['Pat'].setDL( computeDescriptionLength(dlmode=6, C=len(PD.lprevUpdate), gtype=self.gtype, WS=baseParams['Pat'].NCount, W=baseParams['Pat'].NCount, kw=baseParams['Pat'].ECount, isSimple=self.isSimple, kws=baseParams['Pat'].kws)) baseParams['Pat'].setI( computeInterestingness(baseParams['Pat'].IC_dssg, baseParams['Pat'].DL, mode=self.imode)) curPat = fcomponents[0] bestParams = None if curPat.number_of_nodes() < baseParams['Pat'].NCount: bestParams = dict() bestParams['Pat'] = Pattern(curPat) bestParams['codeLengthCprime'] = self.computeCodeLengthShrinkU( G, PD, 2, baseParams, bestParams, id) bestParams['Pat'].setIC_dssg(baseParams['codeLengthC'] - bestParams['codeLengthCprime']) bestParams['Pat'].setDL( computeDescriptionLength(dlmode=6, C=len(PD.lprevUpdate), gtype=self.gtype, WS=baseParams['Pat'].NCount, W=bestParams['Pat'].NCount, kw=bestParams['Pat'].ECount, isSimple=self.isSimple, kws=bestParams['Pat'].kws)) bestParams['Pat'].setI( computeInterestingness(bestParams['Pat'].IC_dssg, bestParams['Pat'].DL, mode=self.imode)) else: bestParams = baseParams # * Now reduce the only component in fcomponents FinalParams = self.getReducedSubgraphU(G, PD, baseParams, bestParams, id) FinalParams['SPat'] = FinalParams['Pat'].copy() FinalParams['Pat'] = baseParams['Pat'].copy() if bestParams['Pat'].I > FinalParams['SPat'].I: FinalParams['Pat'].setPrevOrder(id) FinalParams['Pat'].setPatType('shrink') FinalParams['SPat'].setPrevOrder(id) FinalParams['SPat'].setPatType('shrink') self.Data[id] = FinalParams return
def getReducedSubgraphD(self, G, PD, baseParams, bestParams, Lid): """ Utility function used when the input graph is directed to remove nodes from subgraph of the candidate shrink Parameters ---------- G : Networkx Graph Input Graph PD : PDClass Background Distribution baseParams : dict base value of prameters corresponding to the current shrink candidate, i.e., before shrink bestParams : dict current value of prameters corresponding to the current shrink candidate, i.e., after removing some disconnected nodes (if any) Lid : int identifier for the the candidate constraint for split Returns ------- dict FinalParams: Updated after reducing a subgraph """ doshrink = True count_remove_nodes = 0 FinalParams = bestParams while doshrink: #continue removing nodes one by one till no increase in IG doshrink = False for node in FinalParams['Pat'].inNL: curParams = dict() curParams['Pat'] = FinalParams['Pat'].copy() curParams['Pat'].removeInNode(node) curParams['codeLengthCprime'] = FinalParams[ 'codeLengthCprime'] - self.computeCLgainRemoveNodeD( G, PD, curParams['Pat'].outNL, node, [Lid], 1) curParams['Pat'].setIC_dssg(baseParams['codeLengthC'] - curParams['codeLengthCprime']) curParams['Pat'].setDL( computeDescriptionLength(dlmode=6, C=len(PD.lprevUpdate), gtype=self.gtype, WIS=baseParams['Pat'].InNCount, WOS=baseParams['Pat'].OutNCount, WI=curParams['Pat'].InNL, WO=curParams['Pat'].OutNL, kw=curParams['Pat'].ECount, isSimple=self.isSimple, kws=curParams['Pat'].kws)) curParams['Pat'].setI( computeInterestingness(curParams['Pat'].IC_dssg, curParams['Pat'].DL, mode=self.imode)) if curParams['Pat'].I > bestParams['Pat'].I: bestParams = curParams for node in FinalParams['Pat'].outNL: curParams = dict() curParams['Pat'] = FinalParams['Pat'].copy() curParams['Pat'].removeOutNode(node) curParams['codeLengthCprime'] = FinalParams[ 'codeLengthCprime'] - self.computeCLgainRemoveNodeD( G, PD, curParams['Pat'].inNL, node, [Lid], 2) curParams['Pat'].setIC_dssg(baseParams['codeLengthC'] - curParams['codeLengthCprime']) curParams['Pat'].setDL( computeDescriptionLength(dlmode=6, C=len(PD.lprevUpdate), gtype=self.gtype, WIS=baseParams['Pat'].InNCount, WOS=baseParams['Pat'].OutNCount, WI=curParams['Pat'].InNL, WO=curParams['Pat'].OutNL, kw=curParams['Pat'].ECount, isSimple=self.isSimple, kws=curParams['Pat'].kws)) curParams['Pat'].setI( computeInterestingness(curParams['Pat'].IC_dssg, curParams['Pat'].DL, mode=self.imode)) if curParams['Pat'].I > bestParams['Pat'].I: bestParams = curParams if bestParams['Pat'].I > FinalParams['Pat'].I: FinalParams = bestParams count_remove_nodes += 1 doshrink = True if count_remove_nodes > 0 or ( FinalParams['Pat'].InNCount < baseParams['Pat'].InNCount and FinalParams['Pat'].OutNCount < baseParams['Pat'].OutNCount): FinalParams['codeLengthC'] = baseParams['codeLengthC'] FinalParams['Pat'].setLambda( PD.updateDistribution(FinalParams['Pat'].G, idx=None, val_return='return', case=3, dropLidx=[Lid ])) #// Todo: computeNewLambda
def evaluateConstraint(self, G, PD, id): """ function to evaluate if a constraint is a feasible candidate for remove Parameters ---------- G : Networkx Graph Input Graph PD : PDClass Background Distribution id : int identifier of a constraint to be evaluated """ if self.gtype == 'U': Params = dict() NL = PD.lprevUpdate[id][1] H = G.subgraph(NL) Params['Pat'] = Pattern(H) Params['codeLengthC'] = getCodeLengthParallel( Params['Pat'].G, PD, NL=Params['Pat'].NL, case=2, isSimple=self.isSimple, gtype=self.gtype ) #now case is 1 as none of teh lambdas shall be removed Params['codeLengthCprime'] = getCodeLengthParallel( G, PD, NL=NL, case=4, dropLidx=[id], isSimple=self.isSimple, gtype=self.gtype ) #now case is 4 as one lambda is to be dropped to compute new codelength Params['Pat'].setIC_dssg(Params['codeLengthC'] - Params['codeLengthCprime']) Params['Pat'].setDL( computeDescriptionLength(dlmode=4, gtype=self.gtype, C=len(PD.lprevUpdate), l=self.l)) Params['Pat'].setI( computeInterestingness(Params['Pat'].IC_dssg, Params['Pat'].DL, mode=self.imode)) if Params['Pat'].I > 0: Params['Pat'].setPrevOrder(id) Params['Pat'].setPatType('remove') Params['Pat'].setLambda(PD.lprevUpdate[id][0]) self.Data[id] = Params else: Params = dict() inNL = PD.lprevUpdate[id][1] outNL = PD.lprevUpdate[id][2] HD = getDirectedSubgraph(G, inNL, outNL, self.isSimple) Params['Pat'] = Pattern(HD) Params['codeLengthC'] = getCodeLengthParallel( G, PD, inNL=inNL, outNL=outNL, case=1, isSimple=self.isSimple, gtype=self.gtype ) #now case is 1 as none of teh lambdas shall be removed Params['codeLengthCprime'] = getCodeLengthParallel( G, PD, inNL=inNL, outNL=outNL, case=4, dropLidx=[id], isSimple=self.isSimple, gtype=self.gtype ) #now case is 4 as one lambda is to be dropped to compute new codelength Params['Pat'].setIC_dssg(Params['codeLengthC'] - Params['codeLengthCprime']) Params['Pat'].setDL( computeDescriptionLength(dlmode=4, gtype=self.gtype, C=len(PD.lprevUpdate), l=self.l, excActionType=False)) Params['Pat'].setI( computeInterestingness(Params['Pat'].IC_dssg, Params['Pat'].DL, mode=self.imode)) if Params['Pat'].I > 0: Params['Pat'].setPrevOrder(id) Params['Pat'].setPatType('remove') Params['Pat'].setLambda(PD.lprevUpdate[id][0]) self.Data[id] = Params
val_return='return', case=3, dropLidx=[Lid ])) #// Todo: computeNewLambda FinalParams['codeLengthCprime'] = self.computeCodeLengthShrinkD( G, PD, 3, baseParams, FinalParams, Lid, FinalParams['Pat'].la) #// Todo computeNewCodeLength FinalParams['Pat'].setIC_dssg(FinalParams['codeLengthC'] - FinalParams['codeLengthCprime']) FinalParams['Pat'].setDL( computeDescriptionLength(dlmode=6, C=len(PD.lprevUpdate), gtype=self.gtype, WIS=baseParams['Pat'].InNCount, WOS=baseParams['Pat'].OutNCount, WI=FinalParams['Pat'].InNL, WO=FinalParams['Pat'].OutNL, kw=FinalParams['Pat'].ECount, excActionType=False, l=self.l, isSimple=self.isSimple, kws=FinalParams['Pat'].kws)) FinalParams['Pat'].setI( computeInterestingness(FinalParams['Pat'].IC_dssg, FinalParams['Pat'].DL, mode=self.imode)) return FinalParams ################################################################################################################################################################### def computeCLgainRemoveNodeD(self, G, PD, nodes, node, dropLidx, dir): """ Utility function to compute the gain/change in codelength by removing a node from a pattern.
def getBestOption(self, G, PD): """ function to return the best candidate to add Parameters ---------- G : networkx graph input graph PD : PDClass Input background distribution Returns ------- dict dictionary containing a Pattern to add and correspoding prior and posterior codelengths """ if len(self.Data) > 0: bestPattern = max(self.Data, key=lambda x: x.I) codeLengthC = None codeLengthCprime = None DL = None dlmode = 3 if self.gtype == 'U': nlambda = PD.updateDistribution(bestPattern.G, None, 'return', 2, None) codeLengthC = getCodeLengthParallel(G, PD, NL=bestPattern.NL, case=2, gtype=self.gtype, isSimple=self.isSimple) codeLengthCprime = getCodeLengthParallel( G, PD, NL=bestPattern.NL, case=3, gtype=self.gtype, isSimple=self.isSimple, nlambda=nlambda) DL = computeDescriptionLength(dlmode=dlmode, V=G.number_of_nodes(), W=bestPattern.NCount, kw=bestPattern.ECount, q=self.q, isSimple=self.isSimple, kws=bestPattern.kws, excActionType=False, l=self.l) else: nlambda = PD.updateDistribution(bestPattern.G, None, 'return', 2, None) codeLengthC = getCodeLengthParallel(G, PD, NL=bestPattern.NL, case=2, gtype=self.gtype, isSimple=self.isSimple) codeLengthCprime = getCodeLengthParallel( G, PD, inNL=bestPattern.inNL, outNL=bestPattern.outNL, case=3, gtype=self.gtype, isSimple=self.isSimple, nlambda=nlambda) DL = computeDescriptionLength(dlmode=dlmode, V=G.number_of_nodes(), WI=bestPattern.inNL, WO=bestPattern.outNL, kw=bestPattern.ECount, q=self.q, isSimple=self.isSimple, kws=bestPattern.kws, excActionType=False, l=self.l) IC_dssg = codeLengthC - codeLengthCprime bestPattern.setIC_dssg(IC_dssg) bestPattern.setDL(DL) bestPattern.setI( computeInterestingness(bestPattern.IC_dssg, bestPattern.DL, mode=self.imode)) bestPattern.setPatType('add') Params = dict() Params['Pat'] = bestPattern Params['codeLengthC'] = codeLengthC Params['codeLengthCprime'] = codeLengthCprime return Params else: return None
def getSeeds(self): """Function to get seeds to run the hill climber Raises: Exception 1: if self.nKseed != mNumNodes: raise Exception("Number of seeds should be equal to number of nodes here.") Exception: raise Exception('no valid seed mode given') Returns: list: seed node's list """ mNumNodes = self.G.number_of_nodes() seedNodes = [None] * self.nKseed if 'all' in self.seedMode: if self.nKseed != mNumNodes: raise Exception( "Number of seeds should be equal to number of nodes here.") for r in range(self.nKseed): seedNodes[r] = r elif 'uniform' in self.seedMode: randoml = list(self.G.nodes()) np.random.shuffle(randoml) for r in range(self.nKseed): seedNodes[r] = randoml[r] elif 'degree' in self.seedMode: degreeList = sorted(dict(self.G.degree()).items(), key=lambda kv: kv[1], reverse=True) for r in range(self.nKseed): seedNodes[r] = degreeList[r][0] elif 'interest' in self.seedMode: ListNode = sorted(list(self.G.nodes())) interestList = [] if self.gtype == 'U': for LNit in ListNode: print(LNit) curlist = list( set(self.G.neighbors(LNit)).union(set([LNit]))) H = self.G.subgraph(curlist) if len(curlist) > 1: ic = 0.0 dl = 0.0 if self.mode == 1: pw = computeSumOfEdgeProbablity( self.PD, gtype=self.gtype, NL=curlist, isSimple=self.isSimple) ic = IC_SSG(3, pw=pw, W=H) elif self.mode == 2: mu_w = computeSumOfExpectations( self.PD, gtype=self.gtype, NL=curlist, isSimple=self.isSimple) ic = AD(H.number_of_edges(), mu_w) elif self.mode == 3: mu_w, p0 = computePWparameters( self.PD, gtype=self.gtype, NL=curlist, isSimple=self.isSimple) ic = IC_DSIMP(H.number_of_edges(), NW(len(curlist)), mu_w, p0) dlmode = 1 if self.incEdge: dlmode = 2 dl = computeDescriptionLength( dlmode=dlmode, V=self.G.number_of_nodes(), W=H.number_of_nodes(), kw=H.number_of_edges(), q=self.q) interestValue = computeInterestingness(ic, dl) interestList.append(tuple([LNit, interestValue])) else: for LNit in ListNode: print(LNit) curlistOut = list( set(self.G.predecessors(LNit)).union(set([LNit]))) curlistIn = list( set(self.G.successors(LNit)).union(set([LNit]))) H = getDirectedSubgraph(self.G, curlistIn, curlistOut, self.isSimple) if len(curlistIn) > 1 and len(curlistOut) > 1: ic = 0.0 dl = 0.0 if self.mode == 1: pw = computeSumOfEdgeProbablity( self.PD, gtype=self.gtype, inNL=curlistIn, outNL=curlistOut, isSimple=self.isSimple) ic = IC_SSG(3, pw=pw, W=H) elif self.mode == 2: mu_w = computeSumOfExpectations( self.PD, gtype=self.gtype, inNL=curlistIn, outNL=curlistOut, isSimple=self.isSimple) ic = AD(H.number_of_edges(), mu_w) elif self.mode == 3: mu_w, p0 = computePWparameters( self.PD, gtype=self.gtype, inNL=curlistIn, outNL=curlistOut, isSimple=self.isSimple) ic = IC_DSIMP(H.number_of_edges(), NW_D(curlistIn, curlistOut), mu_w, p0) dlmode = 1 if self.incEdge: dlmode = 2 dl = computeDescriptionLength( dlmode=dlmode, V=self.G.number_of_nodes(), WI=curlistIn, WO=curlistOut, kw=H.number_of_edges(), q=self.q) interestValue = computeInterestingness(ic, dl) interestList.append(tuple([LNit, interestValue])) interestList = sorted(interestList, key=lambda kv: kv[1], reverse=True) mRange = min([self.nKseed, len(interestList)]) seedNodes = [0] * mRange for r in range(mRange): # print(r, interestList[r][0]) if interestList[r][0] is None: print(r, interestList[r][0]) seedNodes[r] = interestList[r][0] else: raise Exception('no valid seed mode given') return seedNodes
def shrinkPatternUtilD(self, pattern, nodeToCheck, dir_mode): # remove node at a time, compute final interestingness and return the updated pattern # count edges from nodeToCheck to pattern ########### Check in-node or out-node removal ############ kw_deficit = 0 params = dict() ic = 0.0 dl = 0.0 curInNL = pattern.inNL[:] curOutNL = pattern.outNL[:] curFunc = None if dir_mode == 1: for p in pattern.inNL: if nodeToCheck != p: kw_deficit += int(self.G.number_of_edges(nodeToCheck, p)) curOutNL.remove(nodeToCheck) curFunc = curInNL else: for p in pattern.outNL: if nodeToCheck != p: kw_deficit += self.G.number_of_edges(p, nodeToCheck) curInNL.remove(nodeToCheck) curFunc = curOutNL if self.mode == 1: params[ 'pw_deficit'] = computeSumOfEdgeProbablityBetweenNodeAndList( self.PD, nodeToCheck, curFunc, dir_mode=dir_mode, gtype=self.gtype, isSimple=self.isSimple) params['pw_new'] = pattern.sumPOS - params['pw_deficit'] params['kw_new'] = pattern.ECount - kw_deficit params['nw_new'] = NW_D(curInNL, curOutNL) ic = IC_SSG(1, pw=params['pw_new'], kw=params['kw_new'], nw=params['nw_new']) elif self.mode == 2: params[ 'mu_w_deficit'] = computeSumOfExpectationsBetweenNodeAndList( self.PD, nodeToCheck, curFunc, dir_mode=dir_mode, gtype=self.gtype, isSimple=self.isSimple) params['mu_w_new'] = pattern.expectedEdges - params['mu_w_deficit'] params['kw_new'] = pattern.ECount - kw_deficit ic = AD(params['kw_new'], params['mu_w_new']) else: params['kw_new'] = pattern.ECount - kw_deficit params['nw_new'] = NW_D(curInNL, curOutNL) params['mu_w_deficit'], params[ 'p0_deficit'] = computePWparametersBetweenNodeAndList( self.PD, nodeToCheck, curFunc, dir_mode=dir_mode, gtype=self.gtype, isSimple=self.isSimple) if pattern.minPOS == params['p0_deficit']: params['mu_w_new'], params['p0_new'] = computePWparameters( self.PD, gtype=self.gtype, inNL=curInNL, outNL=curOutNL, isSimple=self.isSimple) else: params['p0_new'] = pattern.minPOS params['mu_w_new'] = pattern.expectedEdges - params[ 'mu_w_deficit'] ic = IC_DSIMP(params['kw_new'], params['nw_new'], params['mu_w_new'], params['p0_new']) dlmode = 1 if self.incEdge: dlmode = 2 dl = computeDescriptionLength(dlmode=dlmode, V=self.G.number_of_nodes(), WI=curInNL, WO=curOutNL, kw=params['kw_new'], q=self.q) I = computeInterestingness(ic, dl) params['ic'] = ic params['dl'] = dl params['I'] = I return params
def extendPatternUtilD(self, pattern, nodeToCheck, dir_mode): # add one node a time and check for best gain # count edges from nodeToCheck to pattern # dir_mode (int): required id gtype is 'D"; 1 - from node to list and 2 from list to node ######### Check in-node and out-node addition ############ kw_surplus = 0 params = dict() ic = 0.0 dl = 0.0 curInNL = pattern.inNL[:] curOutNL = pattern.outNL[:] curFunc = None if dir_mode == 1: for p in pattern.inNL: if nodeToCheck != p: kw_surplus += int(self.G.number_of_edges(nodeToCheck, p)) curOutNL.append(nodeToCheck) curFunc = curInNL else: for p in pattern.outNL: if nodeToCheck != p: kw_surplus += self.G.number_of_edges(p, nodeToCheck) curInNL.append(nodeToCheck) curFunc = curOutNL if self.mode == 1: params[ 'pw_surplus'] = computeSumOfEdgeProbablityBetweenNodeAndList( self.PD, nodeToCheck, curFunc, dir_mode=dir_mode, gtype=self.gtype, isSimple=self.isSimple) params['pw_new'] = pattern.sumPOS + params['pw_surplus'] params['kw_new'] = pattern.ECount + kw_surplus params['nw_new'] = NW_D(curInNL, curOutNL) ic = IC_SSG(1, pw=params['pw_new'], kw=params['kw_new'], nw=params['nw_new']) elif self.mode == 2: params[ 'mu_w_surplus'] = computeSumOfExpectationsBetweenNodeAndList( self.PD, nodeToCheck, curFunc, dir_mode=dir_mode, gtype=self.gtype, isSimple=self.isSimple) params['mu_w_new'] = pattern.expectedEdges + params['mu_w_surplus'] params['kw_new'] = pattern.ECount + kw_surplus ic = AD(params['kw_new'], params['mu_w_new']) else: params['mu_w_surplus'], params[ 'p0_surplus'] = computePWparametersBetweenNodeAndList( self.PD, nodeToCheck, curFunc, dir_mode=dir_mode, gtype=self.gtype, isSimple=self.isSimple) params['mu_w_new'] = pattern.expectedEdges + params['mu_w_surplus'] params['p0_new'] = min(pattern.minPOS, params['p0_surplus']) params['kw_new'] = pattern.ECount + kw_surplus params['nw_new'] = NW_D(curInNL, curOutNL) ic = IC_DSIMP(params['kw_new'], params['nw_new'], params['mu_w_new'], params['p0_new']) dlmode = 1 if self.incEdge: dlmode = 2 dl = computeDescriptionLength(dlmode=dlmode, V=self.G.number_of_nodes(), WI=curInNL, WO=curOutNL, kw=params['kw_new'], q=self.q) I = computeInterestingness(ic, dl) params['ic'] = ic params['dl'] = dl params['I'] = I return params
def shrinkPatternUtil(self, pattern, nodeToCheck): """Util function to check for the best candidate node to remove Args: pattern (Pattern): input subgraph pattern nodeToCheck (int): node id of the vertex to check for removal Returns: dict: dictionary of parameters cmputed for the input node """ kw_deficit = 0 for p in pattern.NL: kw_deficit += self.G.number_of_edges(p, nodeToCheck) curNL = pattern.NL params = dict() ic = 0.0 dl = 0.0 if self.mode == 1: params[ 'pw_deficit'] = computeSumOfEdgeProbablityBetweenNodeAndList( self.PD, nodeToCheck, curNL, gtype=self.gtype, isSimple=self.isSimple) params['pw_new'] = pattern.sumPOS - params['pw_deficit'] params['kw_new'] = pattern.ECount - kw_deficit params['nw_new'] = NW(len(curNL) - 1) ic = IC_SSG(1, pw=params['pw_new'], kw=params['kw_new'], nw=params['nw_new']) elif self.mode == 2: params[ 'mu_w_deficit'] = computeSumOfExpectationsBetweenNodeAndList( self.PD, nodeToCheck, curNL, gtype=self.gtype, isSimple=self.isSimple) params['mu_w_new'] = pattern.expectedEdges - params['mu_w_deficit'] params['kw_new'] = pattern.ECount - kw_deficit ic = AD(params['kw_new'], params['mu_w_new']) else: params['kw_new'] = pattern.ECount - kw_deficit params['nw_new'] = NW(len(curNL) - 1) params['mu_w_deficit'], params[ 'p0_deficit'] = computePWparametersBetweenNodeAndList( self.PD, nodeToCheck, curNL, gtype=self.gtype, isSimple=self.isSimple) if pattern.minPOS == params['p0_deficit']: curNL.remove(nodeToCheck) params['mu_w_new'], params['p0_new'] = computePWparameters( self.PD, gtype=self.gtype, NL=curNL, isSimple=self.isSimple) else: params['p0_new'] = pattern.minPOS params['mu_w_new'] = pattern.expectedEdges - params[ 'mu_w_deficit'] ic = IC_DSIMP(params['kw_new'], params['nw_new'], params['mu_w_new'], params['p0_new']) dlmode = 1 if self.incEdge: dlmode = 2 dl = computeDescriptionLength(dlmode=dlmode, V=self.G.number_of_nodes(), W=len(curNL) - 1, kw=params['kw_new'], q=self.q) I = computeInterestingness(ic, dl) params['ic'] = ic params['dl'] = dl params['I'] = I return params
def processAsU(self, G, PD, id): """ Utility function for split action when the input graph is undirected. This function idenfies the final components from each possible candidate split and compute the corresponding measures. Parameters ---------- G : Networkx Graph Input Graph PD : PDClass Background Distribution id : int identifier of a constraint to be evaluated """ NL = PD.lprevUpdate[id][1] H = G.subgraph(NL) components = nx.connected_component_subgraphs(H, copy=True) fcomponents = dict() it = 0 for comp in components: if comp.number_of_nodes() > self.minsize: # print('Comp:{}\t #Nodes:{}'.format(it, comp.number_of_nodes())) fcomponents[it] = comp it += 1 # print(fcomponents) if len(fcomponents) > 1: #* If components are more than one then only we can split this pattern baseParams = dict() baseParams['Pat'] = Pattern(H) baseParams['NodesInc'] = 0 compPats = dict() nodes_union = set() for k,v in fcomponents.items(): compPats[k] = Pattern(v) baseParams['NodesInc'] += v.number_of_nodes() nodes_union = nodes_union.union(set(compPats[k].NL)) baseParams['compos'] = compPats baseParams['excludedNL'] = list( set(baseParams['Pat'].NL) - nodes_union ) baseParams['codeLengthC'] = getCodeLengthParallel( H, PD, gtype=self.gtype, case=2, isSimple=self.isSimple, NL=baseParams['Pat'].NL ) baseParams['codeLengthCprime'] = self.computeCodeLengthSplitU(G, PD, 2, baseParams, id) #// Todo : write code for this part baseParams['Pat'].setIC_dssg( baseParams['codeLengthC'] - baseParams['codeLengthCprime'] ) baseParams['Pat'].setDL( computeDescriptionLength( dlmode=7, C=len(PD.lprevUpdate), gtype=self.gtype, WS=baseParams['Pat'].NCount, compos=baseParams['compos'], isSimple=self.isSimple ) ) baseParams['Pat'].setI( computeInterestingness( baseParams['Pat'].IC_dssg, baseParams['Pat'].DL, mode=2 ) ) baseParams['Pat'].setPatType('split') baseParams['Pat'].setPrevOrder(id) # print(baseParams) #now try reducing each component FinalParams = baseParams for k in baseParams['compos'].keys(): FinalParams = self.getReducedComponentU(G, PD, FinalParams, id, k) #compute new lambdas for each new pattern/component for k,v in FinalParams['compos'].items(): v.setLambda( PD.updateDistribution( pat=v.G, idx=None, val_return='return', case=3, dropLidx=[id]) ) FinalParams['codeLengthCprime'] = self.computeCodeLengthSplitU(G, PD, 3, FinalParams, id) #// Todo : write code for this part FinalParams['Pat'].setIC_dssg( FinalParams['codeLengthC'] - FinalParams['codeLengthCprime'] ) FinalParams['Pat'].setDL( computeDescriptionLength( dlmode=7, C=len(PD.lprevUpdate), gtype=self.gtype, WS=FinalParams['Pat'].NCount, compos=FinalParams['compos'], excActionType=False, l=self.l, isSimple=self.isSimple ) ) FinalParams['Pat'].setI( computeInterestingness( FinalParams['Pat'].IC_dssg, FinalParams['Pat'].DL, mode=2 ) ) FinalParams['Pat'].setPatType('split') FinalParams['Pat'].setPrevOrder(id) # Now set these values to all component patterns for k,v in FinalParams['compos'].items(): v.setIC_dssg( FinalParams['Pat'].IC_dssg ) v.setDL( FinalParams['Pat'].DL ) v.setI( FinalParams['Pat'].I ) v.setPrevOrder(id) v.setPatType('split') self.Data[id] = FinalParams return self.Data
class EvaluateShrink: """ This data structure shall contain all the possible shrinks along with pattern number as key and other parameters as value """ def __init__(self, gtype='U', isSimple=True, l=6, imode=2, minsize=2): """ initialization function Parameters ---------- gtype : str, optional Input Graph type, 'U': Undirected, 'D': Directed, by default 'U' isSimple : bool, optional if input graph is a simple graph then True else False if it is a multigraph, by default True l : int, optional Total number of unique action types that can be performed, by default 6 imode : int, optional Interestingness mode--- 1: fraction, 2: Difference, by default 2 minsize : int, optional Minimum size of pattern, by default 2 """ self.Data = dict() self.gtype = gtype self.isSimple = isSimple self.l = l # possible types (give number) of action, default is 6 self.minsize = minsize self.imode = imode print('initialized EvaluateShrink') ################################################################################################################################################################### def evaluateAllConstraints(self, G, PD): """ function to evaluate all constraints and make a list of candidate constraints which are feasible to shrink Parameters ---------- G : Networkx Graph Input Graph PD : PDClass Background distribution """ self.Data = dict() for i in PD.lprevUpdate.keys(): self.evaluateConstraint(G, PD, i) return ################################################################################################################################################################### def evaluateConstraint(self, G, PD, id): """ function to evaluate if a constraint is a feasible candidate for shrink Parameters ---------- G : Networkx Graph Input Graph PD : PDClass Background Distribution id : int identifier of a constraint to be evaluated """ if self.gtype == 'U': self.processAsU(G, PD, id) elif self.gtype == 'D': self.processAsD(G, PD, id) return ################################################################################################################################################################### def processAsU(self, G, PD, id): """ Utility function for shrink action when the input graph is undirected. This function idenfies the final subgraph from a possible candidate shrink and compute the corresponding measures. Parameters ---------- G : Networkx Graph Input Graph PD : PDClass Background Distribution id : int identifier of a constraint to be evaluated """ NL = PD.lprevUpdate[id][1] H = G.subgraph(NL) components = nx.connected_component_subgraphs(H, copy=True) fcomponents = dict() it = 0 for comp in components: if comp.number_of_nodes() > self.minsize: fcomponents[it] = comp if len( fcomponents ) == 1: # * if valid components is more than 1 than split shall be performed baseParams = dict() baseParams['Pat'] = Pattern(H) baseParams['codeLengthC'] = getCodeLengthParallel( H, PD, gtype=self.gtype, case=2, isSimple=self.isSimple, NL=baseParams['Pat'].NL) baseParams['codeLengthCprime'] = baseParams['codeLengthC'] baseParams['Pat'].setIC_dssg(baseParams['codeLengthC'] - baseParams['codeLengthCprime']) baseParams['Pat'].setDL( computeDescriptionLength(dlmode=6, C=len(PD.lprevUpdate), gtype=self.gtype, WS=baseParams['Pat'].NCount, W=baseParams['Pat'].NCount, kw=baseParams['Pat'].ECount, isSimple=self.isSimple, kws=baseParams['Pat'].kws)) baseParams['Pat'].setI( computeInterestingness(baseParams['Pat'].IC_dssg, baseParams['Pat'].DL, mode=self.imode)) curPat = fcomponents[0] bestParams = None if curPat.number_of_nodes() < baseParams['Pat'].NCount: bestParams = dict() bestParams['Pat'] = Pattern(curPat) bestParams['codeLengthCprime'] = self.computeCodeLengthShrinkU( G, PD, 2, baseParams, bestParams, id) bestParams['Pat'].setIC_dssg(baseParams['codeLengthC'] - bestParams['codeLengthCprime']) bestParams['Pat'].setDL( computeDescriptionLength(dlmode=6, C=len(PD.lprevUpdate), gtype=self.gtype, WS=baseParams['Pat'].NCount, W=bestParams['Pat'].NCount, kw=bestParams['Pat'].ECount, isSimple=self.isSimple, kws=bestParams['Pat'].kws)) bestParams['Pat'].setI( computeInterestingness(bestParams['Pat'].IC_dssg, bestParams['Pat'].DL, mode=self.imode)) else: bestParams = baseParams # * Now reduce the only component in fcomponents FinalParams = self.getReducedSubgraphU(G, PD, baseParams, bestParams, id) FinalParams['SPat'] = FinalParams['Pat'].copy() FinalParams['Pat'] = baseParams['Pat'].copy() if bestParams['Pat'].I > FinalParams['SPat'].I: FinalParams['Pat'].setPrevOrder(id) FinalParams['Pat'].setPatType('shrink') FinalParams['SPat'].setPrevOrder(id) FinalParams['SPat'].setPatType('shrink') self.Data[id] = FinalParams return ################################################################################################################################################################### def getReducedSubgraphU(self, G, PD, baseParams, bestParams, Lid): """ Utility function used when the input graph is undirected to remove nodes from subgrapht of the candidate sphrink Parameters ---------- G : Networkx Graph Input Graph PD : PDClass Background Distribution baseParams : dict base value of prameters corresponding to the current shrink candidate, i.e., before shrink bestParams : dict current value of prameters corresponding to the current shrink candidate, i.e., after removing some disconnected nodes (if any) Lid : int identifier for the the candidate constraint for split Returns ------- dict FinalParams: Updated after reducing a component """ doshrink = True count_remove_nodes = 0 FinalParams = bestParams while doshrink: #continue removing nodes one by one till no increase in IG doshrink = False for node in FinalParams['Pat'].NL: curParams = dict() curParams['Pat'] = FinalParams['Pat'].copy() curParams['Pat'].removeNode(node) curParams['codeLengthCprime'] = FinalParams[ 'codeLengthCprime'] - self.computeCLgainRemoveNodeU( G, PD, curParams['Pat'].NL, node, [Lid]) curParams['Pat'].setIC_dssg(baseParams['codeLengthC'] - curParams['codeLengthCprime']) curParams['Pat'].setDL( computeDescriptionLength(dlmode=6, C=len(PD.lprevUpdate), gtype=self.gtype, WS=baseParams['Pat'].NCount, W=curParams['Pat'].NCount, kw=curParams['Pat'].ECount, isSimple=self.isSimple, kws=curParams['Pat'].kws)) curParams['Pat'].setI( computeInterestingness(curParams['Pat'].IC_dssg, curParams['Pat'].DL, mode=self.imode)) if curParams['Pat'].I > bestParams['Pat'].I: bestParams = curParams if bestParams['Pat'].I > FinalParams['Pat'].I: FinalParams = bestParams count_remove_nodes += 1 doshrink = True if count_remove_nodes > 0 or FinalParams['Pat'].NCount < baseParams[ 'Pat'].NCount: FinalParams['codeLengthC'] = baseParams['codeLengthC'] FinalParams['Pat'].setLambda( PD.updateDistribution(pat=FinalParams['Pat'].G, idx=None, val_return='return', case=3, dropLidx=[Lid ])) #// Todo: computeNewLambda FinalParams['codeLengthCprime'] = self.computeCodeLengthShrinkU( G, PD, 3, baseParams, FinalParams, Lid, FinalParams['Pat'].la) #// Todo computeNewCodeLength FinalParams['Pat'].setIC_dssg(FinalParams['codeLengthC'] - FinalParams['codeLengthCprime']) FinalParams['Pat'].setDL( computeDescriptionLength(dlmode=6, C=len(PD.lprevUpdate), gtype=self.gtype, WS=baseParams['Pat'].NCount, W=FinalParams['Pat'].NCount, kw=FinalParams['Pat'].ECount, excActionType=False, l=self.l, isSimple=self.isSimple, kws=FinalParams['Pat'].kws)) FinalParams['Pat'].setI( computeInterestingness(FinalParams['Pat'].IC_dssg, FinalParams['Pat'].DL, mode=self.imode))