def k2Validate(G, start, parents): good = True (valid, levels) = G.isBfsTree(start, parents) # isBfsTree implements Graph500 tests 1 and 2 if not valid: if kdt.master(): print "isBfsTree detected failure of Graph500 test %d" % abs(ret) return False # Spec test #3: # every input edge has vertices whose levels differ by no more than 1 # Note: don't actually have input edges, will use the edges in # the resulting graph as a proxy [origI, origJ, ign] = G.toParVec() del ign li = levels[origI]; lj = levels[origJ] del origI if not ((abs(li-lj) <= 1) | ((li==-1) & (lj==-1))).all(): if kdt.master(): print "At least one graph edge has endpoints whose levels differ by more than one and is in the BFS tree" print li, lj good = False # Spec test #4: # the BFS tree spans a connected component's vertices (== all edges # either have both endpoints in the tree or not in the tree, or # source is not in tree and destination is the root) neither_in = (li == -1) & (lj == -1) both_in = (li > -1) & (lj > -1) out2root = (li == -1) & (origJ == start) del origJ if not (neither_in | both_in | out2root).all(): if kdt.master(): print "The tree does not span exactly the connected component, root=%d" % start #print levels, neither_in, both_in, out2root, (neither_in | both_in | out2root) good = False del both_in, out2root # Spec test #5: # a vertex and its parent are joined by an edge of the original graph respects = abs(li-lj) <= 1 if not (neither_in | respects).all(): if kdt.master(): print "At least one vertex and its parent are not joined by an original edge" good = False return good
def gabp(A, b, maxround, epsilon): copy_time=0 scale_time=0 t_time=0 add_time=0 mul_divide_time=0 sum_time=0 cmp_time=0 t1 = time.time() m = A.nvert() pv = kdt.ParVec(0) #Mh,MJ init to m by m all-zero matrices Mh = kdt.DiGraph(pv,pv,pv,m) MJ = Mh.copy() conv = False stencil=A.copy() stencil.ones() stencil.removeSelfLoops() #stencil.reverseEdges() #print stencil #create an m*m identity matrix pi = kdt.ParVec.range(0,m) pw = kdt.ParVec(m,1) eye = kdt.DiGraph(pi,pi,pw,m) diagA = A*eye [piDiagA,pjDiagA,peDiagA] = diagA.toParVec() h = b.copy() J = peDiagA.copy() ha = h / J r=1 t2 = time.time() init_time = t2-t1 rel_norm = 40000 while r<=maxround: if kdt.master(): print "starting GBP round %d, relnorm=%f"%(r, rel_norm) preRes = ha t3 = time.time() # COPY Mhtemp = stencil.copy() MJtemp = stencil.copy() t4 = time.time() copy_time += (t4-t3) # SCALE Mhtemp.scale(h) # default direction: dir=kdt.DiGraph.Out, which scales rows MJtemp.scale(J) # print MJtemp.toParVec() t5 = time.time() scale_time += t5-t4 # if kdt.master(): # print "scale time: %f" % (t5-t4) # TRANSPOSE Mh.reverseEdges() MJ.reverseEdges() t6 = time.time() t_time += t6-t5 # ADD h_m = Mhtemp + -Mh J_m = MJtemp + -MJ t7 = time.time() add_time += t7-t6 # MUL/DIVIDE val = -A / J_m Mh = val * h_m MJ = val * A t8 = time.time() mul_divide_time += t8-t7 # SUM Mh.removeSelfLoops() MJ.removeSelfLoops() h = b + Mh.sum(kdt.DiGraph.In) J = peDiagA + MJ.sum(kdt.DiGraph.In) t9 = time.time() sum_time += t9-t8 Ja = 1.0/J ha=h*Ja ha_norm = ha.norm(2) if (ha_norm == 0.0): rel_norm = 0 else: rel_norm = (ha-preRes).norm(2)/ha_norm #rel_norm = (ha-preRes).norm(2) #Adam t10 = time.time() cmp_time += t10-t9 # if kdt.master(): # print "rel_norm %f after round %d"% (rel_norm,r) if r > 2 and rel_norm<epsilon: y = kdt.SpParVec(m) y._spv=A._spm.SpMV_PlusTimes(ha.toSpParVec()._spv) real_norm = (y-b).toParVec().norm(2) if kdt.master(): after = time.time() print "GBP Converged after %d rounds, reached rel_norm %f real_norm %f"% (r,rel_norm,real_norm) print "run time %fs"%(after-t1) conv = True break r += 1 after=time.time() if kdt.master(): print "init time: %fs"%init_time print "copy time: %fs"%copy_time print "scale_time: %fs"%scale_time print "t_time: %fs"%t_time print "add_time: %fs"%add_time print "m_d_time: %fs"%mul_divide_time print "sum_time: %fs"%sum_time print "cmp_time: %fs"%cmp_time print "total_time: %fs"%(after-t1) if conv==False: y = kdt.SpParVec(m) y._spv=A._spm.SpMV_PlusTimes(ha.toSpParVec()._spv) real_norm = (y-b).toParVec().norm(2) if kdt.master(): print "GBP did not converge in %d rounds, reached rel_norm %f real_norm %f"%(r-1,rel_norm,real_norm) print "run time %fs"%(after-t1) #print ha #print Ja if False: if kdt.master(): print "writing resulting vector x to x.mtx" X = kdt.DiGraph(kdt.ParVec.range(nvert), kdt.ParVec.ones(nvert)-1, ha, nvert) X.save("x.mtx") return
import getopt import kdt k = -1 if (len(sys.argv) > 1): outfile = sys.argv[1] for i in range(1, len(sys.argv)): a = sys.argv[i] if a == '-k': k = int(sys.argv[i+1]) if k > 2: import ModelProblemGen if (kdt.master()): print "Generating %d-by-%d model problem..."%(k,k) A, b = ModelProblemGen.getModelProbem(k) else: A = kdt.DiGraph.load('thermal2/thermal2.mtx'); b = kdt.ParVec.load('thermal2/thermal2_b.mtx'); nvert = A.nvert() def gabp(A, b, maxround, epsilon): copy_time=0 scale_time=0 t_time=0 add_time=0 mul_divide_time=0
def k2Validate(G, start, parents): good = True [valid, levels] = G.isBfsTree(start, parents) # isBfsTree implements Graph500 tests 1 and 2 if not valid: if kdt.master(): print "isBfsTree detected failure of Graph500 test %d" % abs(ret) return False # Spec test #3: # every input edge has vertices whose levels differ by no more than 1 edgeMax = kdt.SpParVec.toSpParVec(G._spm.SpMV_SelMax(levels.toSpParVecAll()._spv)) edgeMin = -kdt.SpParVec.toSpParVec(G._spm.SpMV_SelMax((-levels).toSpParVecAll()._spv)) if ((edgeMax-edgeMin) > 1).any(): if kdt.master(): print "At least one graph edge has endpoints whose levels differ by more than one" good = False # Spec test #4: # the BFS tree spans a connected component's vertices (== all edges # either have both endpoints in the tree or not in the tree, or # source is not in tree and destination is the root) # set not-in-tree vertices' levels to -2 import pyCombBLAS as pcb levels._dpv.Apply(pcb.ifthenelse(pcb.bind2nd(pcb.equal_to(),-1), pcb.set(-2), pcb.identity())) edgeMax = kdt.SpParVec.toSpParVec(G._spm.SpMV_SelMax(levels.toSpParVecAll()._spv)) edgeMin = -kdt.SpParVec.toSpParVec(G._spm.SpMV_SelMax((-levels).toSpParVecAll()._spv)) if ((edgeMax-edgeMin) > 1).any(): if kdt.master(): print "The tree does not span exactly the connected component, root=%d" good = False # Spec test #5: # a vertex and its parent are joined by an edge of the original graph, # except for the root, which has no parent in the tree Gnv = G.nvert(); Gne = G.nedge() [Gi, Gj, ign] = G.toParVec() del ign # non-root tree vertices == NRT Vs NRTVs = (levels!=-2) & (parents!=kdt.ParVec.range(Gnv)) nNRTVs = NRTVs.nnz() TGi = kdt.ParVec.range(nNRTVs) TGj1 = kdt.ParVec.range(Gnv)[NRTVs] TGj2 = parents[NRTVs] M = max(Gne, Gnv) #FIX: really should use SpParMats here, as don't need spm and spmT tmpG1 = kdt.HyGraph(TGi, TGj1, 1, M, Gnv) tmpG2 = kdt.HyGraph(TGi, TGj2, 1, M, Gnv) tmpG1._spm += tmpG2._spm tmpG1._spmT += tmpG2._spmT del tmpG2 tmpG3 = kdt.HyGraph(Gi, Gj, 1, M, Gnv) tmpG4 = kdt.DiGraph() tmpG4._spm = tmpG1._spm.SpMM(tmpG3._spmT) #!? not tmp3._spmT ? maxIncid = tmpG4.max(kdt.DiGraph.Out)[kdt.ParVec.range(Gnv) < nNRTVs] if (maxIncid != 2).any(): if kdt.master(): print "At least one vertex and its parent are not joined by an original edge" good = False return good
del tmpG2 tmpG3 = kdt.HyGraph(Gi, Gj, 1, M, Gnv) tmpG4 = kdt.DiGraph() tmpG4._spm = tmpG1._spm.SpMM(tmpG3._spmT) #!? not tmp3._spmT ? maxIncid = tmpG4.max(kdt.DiGraph.Out)[kdt.ParVec.range(Gnv) < nNRTVs] if (maxIncid != 2).any(): if kdt.master(): print "At least one vertex and its parent are not joined by an original edge" good = False return good if len(file) == 0: raise SystemExit, "No generation of Graph500 HyGraph for now; must use file" if kdt.master(): print "Generating a Graph500 RMAT graph with 2^%d vertices..."%(scale) G = kdt.HyGraph() K1elapsed = G.genGraph500Edges(scale) #G.save("testgraph.mtx") if kdt.master(): print "Generation took %fs."%(K1elapsed) else: if kdt.master(): print 'Loading %s'%(file) G = kdt.HyGraph.load(file) K1elapsed = 0.0 if False:
def detect(self, input, epsilon, mu, debug=False): start = time.time() # Load graph as a matrix matrix = kdt.Mat.load(fname=input, element=True, par_IO=False) if kdt.master(): print '- Matrix Loading Time: ' + str(time.time() - start) if debug: print 'Input Matrix: ' print matrix # Recover symmetricity t0 = time.time() temp = matrix.copy() temp.transpose() distanceMatrix = matrix.eWiseApply( temp, op=(lambda e1, e2: e1 if e1 >= 0 else e2), allowANulls=True, allowBNulls=True, doOp=(lambda e1, e2: e1 >= 0 or e2 >= 0), inPlace=False, ANull=-1, BNull=-1) matrix = distanceMatrix.copy() matrix.apply(op=(lambda e: 1)) degrees = matrix.count(dir=kdt.Mat.Column, pred=(lambda e: e >= 0)) if debug and kdt.master(): print 'Degrees: ' print degrees blankMatrix = kdt.Mat.eye(n=matrix.nrow(), m=matrix.ncol()) blankMatrix.removeMainDiagonal() temp = matrix.copy() temp.scale(degrees, op=(lambda e1, e2: e2 + 1), dir=kdt.Mat.Row) temp.scale(degrees, op=(lambda e1, e2: math.sqrt(e1 * (e2 + 1))), dir=kdt.Mat.Column) distanceMatrix.eWiseApply(temp, op=(lambda e1, e2: e1 / e2), allowANulls=False, allowBNulls=True, inPlace=True) epsilonMatrix = blankMatrix.eWiseApply( distanceMatrix, op=(lambda e1, e2: e2), allowANulls=True, allowBNulls=False, doOp=(lambda e1, e2: e2 >= epsilon), inPlace=False) if kdt.master(): print '- E-distance Computation Time: ' + str(time.time() - t0) if debug: print 'Epsilon Matrix: ' print epsilonMatrix t0 = time.time() cores = epsilonMatrix.count(dir=kdt.Mat.Row) cores.apply(op=(lambda e: 1 if e >= mu else 0)) if kdt.master(): print '- Computing Core Time: ' + str(time.time() - t0) if debug and kdt.master(): print 'Cores: ' print cores t0 = time.time() temp.scale(cores, dir=kdt.Mat.Row) temp.scale(cores, dir=kdt.Mat.Column) coreMatrix = temp.eWiseApply(temp, op=(lambda e1, e2: 1), allowANulls=False, allowBNulls=False, doOp=(lambda e1, e2: e1 > 0 and e2 > 0), allowIntersect=True) coreMatrix.eWiseApply(epsilonMatrix, op=(lambda e1, e2: e1), allowANulls=False, allowBNulls=False, inPlace=True) if kdt.master(): print '- Computing Core Matrix: ' + str(time.time() - t0) if debug and kdt.master(): print 'Core Matrix: ' print coreMatrix t0 = time.time() sourceIndex, targetIndex, valueIndex = coreMatrix.toVec() graph = kdt.DiGraph(sourceIndex, targetIndex, valueIndex, matrix.nrow()) components = graph.connComp() if kdt.master(): print '- Computing Component Time: ' + str(time.time() - t0) if debug and kdt.master(): print 'Components: ' print components t0 = time.time() frequencies = components.hist() frequencies.eWiseApply(cores, op=(lambda e1, e2: 0), allowANulls=False, allowBNulls=False, doOp=(lambda e1, e2: e2 == 1), inPlace=True) if kdt.master(): print '- Computing Frequency Time: ' + str(time.time() - t0) if debug and kdt.master(): print 'Frequencies: ' print frequencies t0 = time.time() solitaries = frequencies.eWiseApply( cores, op=(lambda e1, e2: 1), allowANulls=False, allowBNulls=False, doOp=(lambda e1, e2: e1 == 1 and e2 != 1), inPlace=False) if kdt.master(): print '- Computing Solitary Time: ' + str(time.time() - t0) if debug and kdt.master(): print 'Solitaries: ' print solitaries t0 = time.time() # Set all solitary nodes as outliers components.eWiseApply(solitaries, op=(lambda e1, e2: -1), allowANulls=False, allowBNulls=False, doOp=(lambda e1, e2: e2 == 1), inPlace=True) # Find borders temp = epsilonMatrix.copy() temp.apply(op=(lambda e: 1)) temp.scale(cores, dir=kdt.Mat.Column) temp.scale(components, dir=kdt.Mat.Column) coreMatrix = blankMatrix.eWiseApply(temp, op=(lambda e1, e2: e2), allowANulls=True, allowBNulls=False, doOp=(lambda e1, e2: e2 > 0), inPlace=False) borders = coreMatrix.max(dir=kdt.Mat.Row, init=-1) if kdt.master(): print '- Computing Border Time: ' + str(time.time() - t0) if debug and kdt.master(): print 'Borders: ' print borders t0 = time.time() # Assign maximum labels to borders components.eWiseApply(borders, op=(lambda e1, e2: e2), allowANulls=False, allowBNulls=False, doOp=(lambda e1, e2: e2 >= 0), inPlace=True) if kdt.master(): print '- Assigning Border Label Time ' + str(time.time() - t0) t0 = time.time() # Find hubs solitaries.eWiseApply(borders, op=(lambda e1, e2: 1 if e1 * e2 < 0 else 0), allowANulls=False, allowBNulls=False, inPlace=True) matrix.scale(solitaries, op=(lambda e1, e2: e1 if e2 > 0 else 0), dir=kdt.Mat.Row) neighborMatrix = blankMatrix.eWiseApply(matrix, op=(lambda e1, e2: e2), allowANulls=True, allowBNulls=False, doOp=(lambda e1, e2: e2 > 0), inPlace=False) neighborMatrix.scale(components, op=(lambda e1, e2: e2 if e1 > 0 else 0), dir=kdt.Mat.Column) neighborMatrix = blankMatrix.eWiseApply(neighborMatrix, op=(lambda e1, e2: e2), allowANulls=True, allowBNulls=False, doOp=(lambda e1, e2: e2 >= 0), inPlace=False) def check_cluster(current, previous): if previous == -2: return current elif previous == -1: return -1 else: if current == -2: return previous else: if previous != current: return -1 else: return current neighbor_clusters = neighborMatrix.reduce(dir=kdt.Mat.Row, op=check_cluster, init=-2) components.eWiseApply(neighbor_clusters, op=(lambda e1, e2: -3), allowANulls=False, allowBNulls=False, doOp=(lambda e1, e2: e2 == -1), inPlace=True) if kdt.master(): print 'Finding Hub Time: ' + str(time.time() - t0) if kdt.master(): print 'Community Detection Time: ' + str(time.time() - start) clusters = set() outlier = 0 hub = 0 for cluster in components: if cluster == -1: outlier += 1 elif cluster == -3: hub += 1 else: clusters.add(cluster) if kdt.master(): print 'Clusters: ' + str(len(clusters)) print 'Hubs: ' + str(hub) print 'Outliers: ' + str(outlier)
''' Created on Sep 30, 2017 @author: Seokyong Hong ''' import os import sys import kdt import time from community.SCAN import SCAN if __name__ == '__main__': if len(sys.argv) != 4: print( "Usage: mpirun -np <#processes> python Test.py <input> <mu> <epsilon>" ) sys.exit() start = time.time() input_path = sys.argv[1] mu = int(sys.argv[2]) epsilon = float(sys.argv[3]) scan = SCAN() scan.detect(input=input_path, epsilon=epsilon, mu=mu, debug=False) if kdt.master(): print 'Time: ' + str(time.time() - start)
import kdt import sys import time from stats import splitthousands if (len(sys.argv) < 1): B = kdt.DiGraph() B.genGraph500Edges(10) B._spm.Apply(kdt.pyCombBLAS.set(1)) else: inmatrixfile = sys.argv[1] if (kdt.master()): print "Loading matrix from",inmatrixfile B = kdt.DiGraph.load(inmatrixfile) bedges = B._spm.getnee() expansion=3 inflation = 3 prunelimit = 0.0000001 addSelfLoops=False # nedges run if kdt.master(): print "Starting run to find number of edges..." C, nedges = B._markov(addSelfLoops=addSelfLoops, expansion=expansion, inflation=inflation, prunelimit=prunelimit, retNEdges=True) # timed run if kdt.master(): print "nedges=%d. Starting timed run..."%(nedges) before = time.time()