Example #1
0
def k2Validate(G, start, parents):
	good = True
	
	(valid, levels) = G.isBfsTree(start, parents)
	#	isBfsTree implements Graph500 tests 1 and 2 
	if not valid:
		if kdt.master():
			print "isBfsTree detected failure of Graph500 test %d" % abs(ret)
		return False

	# Spec test #3:
	# every input edge has vertices whose levels differ by no more than 1
	# Note:  don't actually have input edges, will use the edges in
	#    the resulting graph as a proxy
	[origI, origJ, ign] = G.toParVec()
	del ign
	li = levels[origI]; 
	lj = levels[origJ]
	del origI
	if not ((abs(li-lj) <= 1) | ((li==-1) & (lj==-1))).all():
		if kdt.master():
			print "At least one graph edge has endpoints whose levels differ by more than one and is in the BFS tree"
			print li, lj
		good = False

	# Spec test #4:
	# the BFS tree spans a connected component's vertices (== all edges 
	# either have both endpoints in the tree or not in the tree, or 
	# source is not in tree and destination is the root)
	neither_in = (li == -1) & (lj == -1)
	both_in = (li > -1) & (lj > -1)
	out2root = (li == -1) & (origJ == start)
	del origJ
	if not (neither_in | both_in | out2root).all():
		if kdt.master():
			print "The tree does not span exactly the connected component, root=%d" % start
			#print levels, neither_in, both_in, out2root, (neither_in | both_in | out2root)
		good = False
	del both_in, out2root

	# Spec test #5:
	# a vertex and its parent are joined by an edge of the original graph
	respects = abs(li-lj) <= 1
	if not (neither_in | respects).all():
		if kdt.master():
			print "At least one vertex and its parent are not joined by an original edge"
		good = False

	return good
Example #2
0
def gabp(A, b, maxround, epsilon):
	copy_time=0
	scale_time=0
	t_time=0
	add_time=0
	mul_divide_time=0
	sum_time=0
	cmp_time=0
	
	t1 = time.time()
	m = A.nvert()
	pv = kdt.ParVec(0)
	#Mh,MJ init to m by m all-zero matrices
	Mh = kdt.DiGraph(pv,pv,pv,m)
	MJ = Mh.copy()
	
	conv = False
	
	stencil=A.copy()
	stencil.ones()
	stencil.removeSelfLoops()
	#stencil.reverseEdges()
	#print stencil
	
	#create an m*m identity matrix
	pi = kdt.ParVec.range(0,m)
	pw = kdt.ParVec(m,1)
	eye = kdt.DiGraph(pi,pi,pw,m)
	
	diagA = A*eye
	[piDiagA,pjDiagA,peDiagA] = diagA.toParVec()
	h = b.copy()
	J = peDiagA.copy()
	
	
	ha = h / J
	r=1
	t2 = time.time()
	init_time = t2-t1
	rel_norm = 40000
	while r<=maxround:
		if kdt.master():
			print "starting GBP round %d, relnorm=%f"%(r, rel_norm)
		preRes = ha
	
		t3 = time.time()
		# COPY
		Mhtemp = stencil.copy()
		MJtemp = stencil.copy()
	
		t4 = time.time()
		copy_time += (t4-t3)
	
		# SCALE
		Mhtemp.scale(h)	# default direction: dir=kdt.DiGraph.Out, which scales rows
		MJtemp.scale(J)
#		print MJtemp.toParVec()
		
		t5 = time.time()
		scale_time += t5-t4
		
#		if kdt.master():
#			print "scale time: %f" % (t5-t4)

		# TRANSPOSE
		Mh.reverseEdges()
		MJ.reverseEdges()
		t6 = time.time()
		t_time += t6-t5
		
		# ADD
		h_m = Mhtemp + -Mh
		J_m = MJtemp + -MJ
		t7 = time.time()
		add_time += t7-t6
		
		# MUL/DIVIDE
		val = -A / J_m
		Mh = val * h_m
		MJ = val * A
		
		t8 = time.time()
		mul_divide_time += t8-t7
		
		# SUM
		Mh.removeSelfLoops()
		MJ.removeSelfLoops()
		h = b + Mh.sum(kdt.DiGraph.In)
		J = peDiagA + MJ.sum(kdt.DiGraph.In)
		
		t9 = time.time()
		sum_time += t9-t8
		
		Ja = 1.0/J
		ha=h*Ja

		ha_norm = ha.norm(2)
		if (ha_norm == 0.0):
			rel_norm = 0
		else:
			rel_norm = (ha-preRes).norm(2)/ha_norm
			
		#rel_norm = (ha-preRes).norm(2) #Adam
		
		t10 = time.time()
		cmp_time += t10-t9

 #	if kdt.master():
#		print "rel_norm %f after round %d"% (rel_norm,r)   	
		if r > 2 and rel_norm<epsilon:
			y = kdt.SpParVec(m)
			y._spv=A._spm.SpMV_PlusTimes(ha.toSpParVec()._spv)
			real_norm = (y-b).toParVec().norm(2)
			if kdt.master():
				after = time.time()
				print "GBP Converged after %d rounds, reached rel_norm %f real_norm %f"% (r,rel_norm,real_norm)
				print "run time %fs"%(after-t1)
			conv = True
			break
		r += 1
	after=time.time()
	if kdt.master():
		print "init time:   %fs"%init_time
		print "copy time:   %fs"%copy_time
		print "scale_time:  %fs"%scale_time
		print "t_time:      %fs"%t_time
		print "add_time:    %fs"%add_time
		print "m_d_time:    %fs"%mul_divide_time
		print "sum_time:    %fs"%sum_time
		print "cmp_time:    %fs"%cmp_time
		print "total_time:  %fs"%(after-t1)
	if conv==False:
		y = kdt.SpParVec(m)
		y._spv=A._spm.SpMV_PlusTimes(ha.toSpParVec()._spv)
		real_norm = (y-b).toParVec().norm(2)
		if kdt.master():
			print "GBP did not converge in %d rounds, reached rel_norm %f real_norm %f"%(r-1,rel_norm,real_norm)
			print "run time %fs"%(after-t1)
	#print ha
	#print Ja
	
	if False:
		if kdt.master():
			print "writing resulting vector x to x.mtx"
		X = kdt.DiGraph(kdt.ParVec.range(nvert), kdt.ParVec.ones(nvert)-1, ha, nvert)
		X.save("x.mtx")
	return
Example #3
0
import getopt
import kdt

k = -1

if (len(sys.argv) > 1):
	outfile = sys.argv[1]
	for i in range(1, len(sys.argv)):
		a = sys.argv[i]
		if a == '-k':
			k = int(sys.argv[i+1])

if k > 2:
	import ModelProblemGen
	
	if (kdt.master()):
		print "Generating %d-by-%d model problem..."%(k,k)
		
	A, b = ModelProblemGen.getModelProbem(k)
else:
	A = kdt.DiGraph.load('thermal2/thermal2.mtx');
	b = kdt.ParVec.load('thermal2/thermal2_b.mtx');

nvert = A.nvert()

def gabp(A, b, maxround, epsilon):
	copy_time=0
	scale_time=0
	t_time=0
	add_time=0
	mul_divide_time=0
def k2Validate(G, start, parents):
	good = True
	
	[valid, levels] = G.isBfsTree(start, parents)
	#	isBfsTree implements Graph500 tests 1 and 2 
	if not valid:
		if kdt.master():
			print "isBfsTree detected failure of Graph500 test %d" % abs(ret)
		return False

	# Spec test #3:
	# every input edge has vertices whose levels differ by no more than 1
	edgeMax = kdt.SpParVec.toSpParVec(G._spm.SpMV_SelMax(levels.toSpParVecAll()._spv))
	edgeMin = -kdt.SpParVec.toSpParVec(G._spm.SpMV_SelMax((-levels).toSpParVecAll()._spv))
	if ((edgeMax-edgeMin) > 1).any():
		if kdt.master():
			print "At least one graph edge has endpoints whose levels differ by more than one"
		good = False

	# Spec test #4:
	# the BFS tree spans a connected component's vertices (== all edges 
	# either have both endpoints in the tree or not in the tree, or 
	# source is not in tree and destination is the root)

	# set not-in-tree vertices' levels to -2
	import pyCombBLAS as pcb
	levels._dpv.Apply(pcb.ifthenelse(pcb.bind2nd(pcb.equal_to(),-1), pcb.set(-2), pcb.identity()))
	edgeMax = kdt.SpParVec.toSpParVec(G._spm.SpMV_SelMax(levels.toSpParVecAll()._spv))
	edgeMin = -kdt.SpParVec.toSpParVec(G._spm.SpMV_SelMax((-levels).toSpParVecAll()._spv))
	if ((edgeMax-edgeMin) > 1).any():
		if kdt.master():
			print "The tree does not span exactly the connected component, root=%d"
		good = False

	# Spec test #5:
	# a vertex and its parent are joined by an edge of the original graph,
	# except for the root, which has no parent in the tree
	Gnv = G.nvert(); Gne = G.nedge()
	[Gi, Gj, ign] = G.toParVec()
	del ign
	# non-root tree vertices == NRT Vs
	NRTVs = (levels!=-2) & (parents!=kdt.ParVec.range(Gnv))
	nNRTVs = NRTVs.nnz()
	TGi = kdt.ParVec.range(nNRTVs)
	TGj1 = kdt.ParVec.range(Gnv)[NRTVs]
	TGj2 = parents[NRTVs]
	M = max(Gne, Gnv)
	#FIX:  really should use SpParMats here, as don't need spm and spmT	
	tmpG1 = kdt.HyGraph(TGi, TGj1, 1, M, Gnv)
	tmpG2 = kdt.HyGraph(TGi, TGj2, 1, M, Gnv)
	tmpG1._spm  += tmpG2._spm
	tmpG1._spmT += tmpG2._spmT
	del tmpG2
	tmpG3 = kdt.HyGraph(Gi, Gj, 1, M, Gnv)
	tmpG4 = kdt.DiGraph()
	tmpG4._spm = tmpG1._spm.SpMM(tmpG3._spmT)  #!?  not tmp3._spmT ?
	maxIncid = tmpG4.max(kdt.DiGraph.Out)[kdt.ParVec.range(Gnv) < nNRTVs]
	if (maxIncid != 2).any():
		if kdt.master():
			print "At least one vertex and its parent are not joined by an original edge"
		good = False

	return good
	del tmpG2
	tmpG3 = kdt.HyGraph(Gi, Gj, 1, M, Gnv)
	tmpG4 = kdt.DiGraph()
	tmpG4._spm = tmpG1._spm.SpMM(tmpG3._spmT)  #!?  not tmp3._spmT ?
	maxIncid = tmpG4.max(kdt.DiGraph.Out)[kdt.ParVec.range(Gnv) < nNRTVs]
	if (maxIncid != 2).any():
		if kdt.master():
			print "At least one vertex and its parent are not joined by an original edge"
		good = False

	return good


if len(file) == 0:
	raise SystemExit, "No generation of Graph500 HyGraph for now; must use file"
	if kdt.master():
		print "Generating a Graph500 RMAT graph with 2^%d vertices..."%(scale)
	G = kdt.HyGraph()
	K1elapsed = G.genGraph500Edges(scale)
	#G.save("testgraph.mtx")
	if kdt.master():
		print "Generation took %fs."%(K1elapsed)

else:
	if kdt.master():
		print 'Loading %s'%(file)
	G = kdt.HyGraph.load(file)
	K1elapsed = 0.0


if False:
Example #6
0
    def detect(self, input, epsilon, mu, debug=False):
        start = time.time()
        # Load graph as a matrix
        matrix = kdt.Mat.load(fname=input, element=True, par_IO=False)
        if kdt.master():
            print '- Matrix Loading Time: ' + str(time.time() - start)
            if debug:
                print 'Input Matrix: '
                print matrix

        # Recover symmetricity
        t0 = time.time()
        temp = matrix.copy()
        temp.transpose()
        distanceMatrix = matrix.eWiseApply(
            temp,
            op=(lambda e1, e2: e1 if e1 >= 0 else e2),
            allowANulls=True,
            allowBNulls=True,
            doOp=(lambda e1, e2: e1 >= 0 or e2 >= 0),
            inPlace=False,
            ANull=-1,
            BNull=-1)

        matrix = distanceMatrix.copy()
        matrix.apply(op=(lambda e: 1))
        degrees = matrix.count(dir=kdt.Mat.Column, pred=(lambda e: e >= 0))
        if debug and kdt.master():
            print 'Degrees: '
            print degrees

        blankMatrix = kdt.Mat.eye(n=matrix.nrow(), m=matrix.ncol())
        blankMatrix.removeMainDiagonal()

        temp = matrix.copy()
        temp.scale(degrees, op=(lambda e1, e2: e2 + 1), dir=kdt.Mat.Row)
        temp.scale(degrees,
                   op=(lambda e1, e2: math.sqrt(e1 * (e2 + 1))),
                   dir=kdt.Mat.Column)
        distanceMatrix.eWiseApply(temp,
                                  op=(lambda e1, e2: e1 / e2),
                                  allowANulls=False,
                                  allowBNulls=True,
                                  inPlace=True)
        epsilonMatrix = blankMatrix.eWiseApply(
            distanceMatrix,
            op=(lambda e1, e2: e2),
            allowANulls=True,
            allowBNulls=False,
            doOp=(lambda e1, e2: e2 >= epsilon),
            inPlace=False)
        if kdt.master():
            print '- E-distance Computation Time: ' + str(time.time() - t0)
            if debug:
                print 'Epsilon Matrix: '
                print epsilonMatrix

        t0 = time.time()
        cores = epsilonMatrix.count(dir=kdt.Mat.Row)
        cores.apply(op=(lambda e: 1 if e >= mu else 0))
        if kdt.master():
            print '- Computing Core Time: ' + str(time.time() - t0)
        if debug and kdt.master():
            print 'Cores: '
            print cores

        t0 = time.time()
        temp.scale(cores, dir=kdt.Mat.Row)
        temp.scale(cores, dir=kdt.Mat.Column)
        coreMatrix = temp.eWiseApply(temp,
                                     op=(lambda e1, e2: 1),
                                     allowANulls=False,
                                     allowBNulls=False,
                                     doOp=(lambda e1, e2: e1 > 0 and e2 > 0),
                                     allowIntersect=True)
        coreMatrix.eWiseApply(epsilonMatrix,
                              op=(lambda e1, e2: e1),
                              allowANulls=False,
                              allowBNulls=False,
                              inPlace=True)
        if kdt.master():
            print '- Computing Core Matrix: ' + str(time.time() - t0)
        if debug and kdt.master():
            print 'Core Matrix: '
            print coreMatrix

        t0 = time.time()
        sourceIndex, targetIndex, valueIndex = coreMatrix.toVec()
        graph = kdt.DiGraph(sourceIndex, targetIndex, valueIndex,
                            matrix.nrow())
        components = graph.connComp()
        if kdt.master():
            print '- Computing Component Time: ' + str(time.time() - t0)
        if debug and kdt.master():
            print 'Components: '
            print components

        t0 = time.time()
        frequencies = components.hist()
        frequencies.eWiseApply(cores,
                               op=(lambda e1, e2: 0),
                               allowANulls=False,
                               allowBNulls=False,
                               doOp=(lambda e1, e2: e2 == 1),
                               inPlace=True)
        if kdt.master():
            print '- Computing Frequency Time: ' + str(time.time() - t0)
        if debug and kdt.master():
            print 'Frequencies: '
            print frequencies

        t0 = time.time()
        solitaries = frequencies.eWiseApply(
            cores,
            op=(lambda e1, e2: 1),
            allowANulls=False,
            allowBNulls=False,
            doOp=(lambda e1, e2: e1 == 1 and e2 != 1),
            inPlace=False)
        if kdt.master():
            print '- Computing Solitary Time: ' + str(time.time() - t0)
        if debug and kdt.master():
            print 'Solitaries: '
            print solitaries

        t0 = time.time()
        # Set all solitary nodes as outliers
        components.eWiseApply(solitaries,
                              op=(lambda e1, e2: -1),
                              allowANulls=False,
                              allowBNulls=False,
                              doOp=(lambda e1, e2: e2 == 1),
                              inPlace=True)
        # Find borders
        temp = epsilonMatrix.copy()
        temp.apply(op=(lambda e: 1))
        temp.scale(cores, dir=kdt.Mat.Column)
        temp.scale(components, dir=kdt.Mat.Column)
        coreMatrix = blankMatrix.eWiseApply(temp,
                                            op=(lambda e1, e2: e2),
                                            allowANulls=True,
                                            allowBNulls=False,
                                            doOp=(lambda e1, e2: e2 > 0),
                                            inPlace=False)
        borders = coreMatrix.max(dir=kdt.Mat.Row, init=-1)
        if kdt.master():
            print '- Computing Border Time: ' + str(time.time() - t0)
        if debug and kdt.master():
            print 'Borders: '
            print borders

        t0 = time.time()
        # Assign maximum labels to borders
        components.eWiseApply(borders,
                              op=(lambda e1, e2: e2),
                              allowANulls=False,
                              allowBNulls=False,
                              doOp=(lambda e1, e2: e2 >= 0),
                              inPlace=True)
        if kdt.master():
            print '- Assigning Border Label Time ' + str(time.time() - t0)

        t0 = time.time()
        # Find hubs
        solitaries.eWiseApply(borders,
                              op=(lambda e1, e2: 1 if e1 * e2 < 0 else 0),
                              allowANulls=False,
                              allowBNulls=False,
                              inPlace=True)
        matrix.scale(solitaries,
                     op=(lambda e1, e2: e1 if e2 > 0 else 0),
                     dir=kdt.Mat.Row)
        neighborMatrix = blankMatrix.eWiseApply(matrix,
                                                op=(lambda e1, e2: e2),
                                                allowANulls=True,
                                                allowBNulls=False,
                                                doOp=(lambda e1, e2: e2 > 0),
                                                inPlace=False)
        neighborMatrix.scale(components,
                             op=(lambda e1, e2: e2 if e1 > 0 else 0),
                             dir=kdt.Mat.Column)
        neighborMatrix = blankMatrix.eWiseApply(neighborMatrix,
                                                op=(lambda e1, e2: e2),
                                                allowANulls=True,
                                                allowBNulls=False,
                                                doOp=(lambda e1, e2: e2 >= 0),
                                                inPlace=False)

        def check_cluster(current, previous):
            if previous == -2:
                return current
            elif previous == -1:
                return -1
            else:
                if current == -2:
                    return previous
                else:
                    if previous != current:
                        return -1
                    else:
                        return current

        neighbor_clusters = neighborMatrix.reduce(dir=kdt.Mat.Row,
                                                  op=check_cluster,
                                                  init=-2)
        components.eWiseApply(neighbor_clusters,
                              op=(lambda e1, e2: -3),
                              allowANulls=False,
                              allowBNulls=False,
                              doOp=(lambda e1, e2: e2 == -1),
                              inPlace=True)
        if kdt.master():
            print 'Finding Hub Time: ' + str(time.time() - t0)

        if kdt.master():
            print 'Community Detection Time: ' + str(time.time() - start)

        clusters = set()
        outlier = 0
        hub = 0
        for cluster in components:
            if cluster == -1:
                outlier += 1
            elif cluster == -3:
                hub += 1
            else:
                clusters.add(cluster)

        if kdt.master():
            print 'Clusters: ' + str(len(clusters))
            print 'Hubs: ' + str(hub)
            print 'Outliers: ' + str(outlier)
Example #7
0
'''
Created on Sep 30, 2017

@author: Seokyong Hong
'''
import os
import sys
import kdt
import time
from community.SCAN import SCAN

if __name__ == '__main__':
    if len(sys.argv) != 4:
        print(
            "Usage: mpirun -np <#processes> python Test.py <input> <mu> <epsilon>"
        )
        sys.exit()

    start = time.time()
    input_path = sys.argv[1]
    mu = int(sys.argv[2])
    epsilon = float(sys.argv[3])
    scan = SCAN()
    scan.detect(input=input_path, epsilon=epsilon, mu=mu, debug=False)
    if kdt.master():
        print 'Time: ' + str(time.time() - start)
Example #8
0
import kdt
import sys
import time
from stats import splitthousands

if (len(sys.argv) < 1):
	B = kdt.DiGraph()
	B.genGraph500Edges(10)
	B._spm.Apply(kdt.pyCombBLAS.set(1))
else:
	inmatrixfile = sys.argv[1]
	if (kdt.master()):
		print "Loading matrix from",inmatrixfile
	B = kdt.DiGraph.load(inmatrixfile)

bedges = B._spm.getnee()

expansion=3
inflation = 3
prunelimit = 0.0000001
addSelfLoops=False

# nedges run
if kdt.master():
	print "Starting run to find number of edges..."
C, nedges = B._markov(addSelfLoops=addSelfLoops, expansion=expansion, inflation=inflation, prunelimit=prunelimit, retNEdges=True)

# timed run
if kdt.master():
	print "nedges=%d. Starting timed run..."%(nedges)
before = time.time()