def testSimplePhaseI(self): self.logger.info("BEGIN") # create a trival graph. listOfEdges = [(0,1), (1,2)] listOfWeight = [1 for i in listOfEdges] louvainLevel0 = Louvain.buildGraph("trival graph", listOfEdges, listOfWeight) louvainLevel0._calculateQ() numRows = 3 # number of nodes louvainLevel0._phaseI(numRows, isLouvainInit=True) for clusterId, cluster in louvainLevel0._clusters.items(): print() self.logger.info("cluserId:{}".format(clusterId)) self.logger.info(cluster) expected = { 0 : {'clusterId':0, 'numNodes':0, 'weightsInsideCluster':0, 'totalWeight':0}, 1 : {'clusterId':1, 'numNodes':3, 'weightsInsideCluster':4, 'totalWeight':4}, 2 : {'clusterId':2, 'numNodes':0, 'weightsInsideCluster':0, 'totalWeight':0} } self.checkClusters(expected, louvainLevel0._clusters) self.assertEqual(louvainLevel0._Q, 0.5) self.logger.info("END\n")
def testIGraphModularity(self): self.logger.info("BEGIN") #g.community_multilevel() g = self.createSimpleIGraph() # modularity membership is a list with length = number of nodes # the value in the list corresponds to the cluster the node is ml = [i for i in range(g.vcount())] self.logger.info("membership:{}".format(ml)) expectedModularity = g.modularity(ml) self.logger.info("iGraph Modularity:{}".format(expectedModularity)) self.logger.warning( "iGraph Modularity can not be used to test bootstrap") self.logger.warning( "the cluster only have a single node. no edge is inside how come modularity is not 0" ) # test out code listOfEdges = [e.tuple for e in g.es] self.logger.info("listOfEdges:\n{}".format(listOfEdges)) listOfWeight = list(g.es['weight']) #[e['weights'] for e in g.es] self.logger.info("listOfWeight:\n{}".format(listOfWeight)) # louvainLevel0 = Louvain(None) # louvainLevel0.bootStrapInit(listOfEdges, listOfWeight) louvainLevel0 = Louvain.buildGraph("testBootStrapModularity", listOfEdges, listOfWeight) self.logger.info("Q:{}".format(louvainLevel0._Q)) self.logger.info("END\n")
def testChangeInModulartiyTrivalGraph(self): self.logger.info("BEGIN") # create a trival graph. listOfEdges = [(0, 1), (1, 2)] listOfWeight = [1 for i in listOfEdges] louvainLevel0 = Louvain.buildGraph("trival graph", listOfEdges, listOfWeight) louvainLevel0._calculateQ() self.logger.info("louvainLevel0:{}".format(louvainLevel0)) # make sure graph is set u as expected for nid, node in louvainLevel0._nodeLookup.items(): for eid, edge in node._edgesDict.items(): self.logger.info(edge) print() # check modularity before move beforeMoveQ = louvainLevel0._Q self.assertEqual(beforeMoveQ, 0.0) n1 = louvainLevel0._nodeLookup[1] fromCluster = louvainLevel0._clusters[1] targetCluster = louvainLevel0._clusters[2] predictedChangeInQ = louvainLevel0.modularityGainIfMove( fromCluster, targetCluster, n1) self.logger.info("predicted changeInQ:{}".format(predictedChangeInQ)) self.assertAlmostEqual(predictedChangeInQ, 0.25) # move fromCluster.moveNode(targetCluster, n1, louvainLevel0._nodeLookup, isLouvainInit=True) # calculate Q louvainLevel0._calculateQ() afterMoveQ = louvainLevel0._Q self.assertAlmostEqual(afterMoveQ, 0.25) expectedChangeInQ = afterMoveQ - beforeMoveQ self.logger.info("expectedChangeInQ:{} afterMoveQ:{} before:{}"\ .format(expectedChangeInQ, afterMoveQ, beforeMoveQ)) self.logger.info("predicted changeInQ:{}".format(predictedChangeInQ)) self.logger.info("modularityGainIfMove:{} expected:{}"\ .format(predictedChangeInQ, expectedChangeInQ)) self.assertEqual(predictedChangeInQ, expectedChangeInQ) self.logger.info("END\n")
def testGetClusterAssigments(self): self.logger.info("BEGIN") # build lovain level tree listOfEdges = [(0, 1), (1, 0), (0, 2), (2, 0), (1, 2), (2, 1), (0, 3), (0, 6), (0, 7), (3, 4), (4, 3), (3, 5), (5, 3), (3, 0), (6, 7), (7, 6), (6, 8), (8, 6), (6, 9), (9, 6), (8, 9), (9, 8), (9, 7), (7, 9), (6, 0), (7, 0)] listOfWeight = [1 for i in listOfEdges] # build out init graph and calculate Q louvainLevel0 = Louvain.buildGraph("level0", listOfEdges, listOfWeight) louvainLevel0._calculateQ() # run phase I: find best cluster assignments numRows = 10 # number of nodes louvainLevel0._phaseI(numRows, isLouvainInit=True) ll_0_ClusterAssigments = louvainLevel0.getClusterAssigments() self.logger.info( "level0 cluster assignments:\n{}".format(ll_0_ClusterAssigments)) # create next level and run phaseII # phase II consolidates clusters found in previous level louvainLevel1 = Louvain.buildLouvain("level1", louvainLevel0) louvainLevel1._phaseII() louvainLevel1._calculateQ() # lets assume this is the top level. louvainLevel1._phaseI(numRows) ll_1_ClusterAssigments = louvainLevel1.getClusterAssigments() self.logger.info( "level1 cluster assignments:\n{}".format(ll_1_ClusterAssigments)) self.logger.info( "**************** check for side effects output should be same as above" ) ll_0_ClusterAssigments = louvainLevel0.getClusterAssigments() # self.assertEqual(ll_0_ClusterAssigments, {5: [0, 1, 2, 3, 4, 5], 9: [9, 6, 7, 8]}) # this looks possible but not what I expected self.assertEqual(ll_0_ClusterAssigments, { 5: [9, 8], 9: [6, 7, 0, 1, 2, 3, 4, 5] }) ll_1_ClusterAssigments = louvainLevel1.getClusterAssigments() self.assertEqual(ll_1_ClusterAssigments, {9: [9, 8, 6, 7, 0, 1, 2, 3, 4, 5]}) self.logger.info("END\n")
def testPhaseI(self): self.logger.info("BEGIN") listOfEdges = [(0,1), (1,0), (0,2), (2,0), (0,3), (3,0), (1,2), (2,1), (3,4), (4,3) ] listOfWeight = [1 for i in listOfEdges] louvainLevel0 = Louvain.buildGraph("testPhaseI graph", listOfEdges, listOfWeight) louvainLevel0._calculateQ() numRows = 5 # number of nodes louvainLevel0._phaseI(numRows, isLouvainInit=True) for clusterId, cluster in louvainLevel0._clusters.items(): print('') self.logger.info("cluserId:{}".format(clusterId)) self.logger.info(cluster) expected = { 0 : {'clusterId':0, 'numNodes':0, 'weightsInsideCluster':0, 'totalWeight':0}, 1 : {'clusterId':1, 'numNodes':0, 'weightsInsideCluster':0, 'totalWeight':0}, 2 : {'clusterId':2, 'numNodes':0, 'weightsInsideCluster':0, 'totalWeight':0}, 3 : {'clusterId':3, 'numNodes':3, 'weightsInsideCluster':8, 'totalWeight':7}, 4 : {'clusterId':4, 'numNodes':2, 'weightsInsideCluster':2, 'totalWeight':3} } self.checkClusters(expected, louvainLevel0._clusters) c3 = louvainLevel0._clusters[3] for n in c3._nodeList: self.logger.info("clusterId:{}, nodeId:{}".format(c3._clusterId, n._nodeId)) print('') c4 = louvainLevel0._clusters[4] for n in c4._nodeList: self.logger.info("clusterId:{}, nodeId:{}".format(c4._clusterId, n._nodeId)) expectedNodesInCluster = { 3: [0,1,2], 4: [3,4] } for clusterId in expectedNodesInCluster.keys(): c = louvainLevel0._clusters[clusterId] nodeIds = [n._nodeId for n in c._nodeList] self.assertEqual(sorted(nodeIds), sorted(expectedNodesInCluster[clusterId])) self.logger.info("END\n")
def testDJGraph(self): self.logger.info("BEGIN") listOfEdges = [(0, 1), (1, 0), (2, 3), (3, 2)] # all other test assume weights are 1. set to a big number # that will make it easy to spot bugs in summary stats. listOfWeight = [5, 5, 10, 10] numRows = 4 # number of nodes louvainLevel0 = Louvain.buildGraph("level 0", listOfEdges, listOfWeight) # check Q louvainLevel0._calculateQ() self.logger.info("after buildGraph() louvainLevel0._Q:{}".format( louvainLevel0._Q)) self.assertAlmostEqual(louvainLevel0._Q, 0) # check is initialization of graph correct print() for cluster in louvainLevel0._clusters.values(): self.logger.info("{}".format(cluster)) print() for node in louvainLevel0._nodeLookup.values(): self.logger.info("\nnode {}".format(node)) print() expectedNodeConfigDict = { 0: "clusterId:0 nodeId:0 numEdges:1 adjEdgeWeights:10 _weightsInClusterDict{1: 10} _edgesDict{1: srcId:0 targetId:1 weight:10}", 1: "clusterId:1 nodeId:1 numEdges:1 adjEdgeWeights:10 _weightsInClusterDict{0: 10} _edgesDict{0: srcId:1 targetId:0 weight:10}", 2: "clusterId:2 nodeId:2 numEdges:1 adjEdgeWeights:10 _weightsInClusterDict{3: 10} _edgesDict{3: srcId:2 targetId:3 weight:10}", 3: "nodeId:3 numEdges:1 adjEdgeWeights:10 _weightsInClusterDict{2: 10} _edgesDict{2: srcId:3 targetId:2 weight:10}" } # check kiin for node in louvainLevel0._nodeLookup.values(): self.logger.info("nodeId:{} _weightsInClusterDict:{}".format( node._nodeId, node._weightsInClusterDict)) # run phase I louvainLevel0._phaseI( numRows, isLouvainInit=True) # TODO: can probably get rid of isLouvainInit self.logger.info( "after phase I() louvainLevel0:\n{}".format(louvainLevel0)) l0Assignments = louvainLevel0.getClusterAssigments() self.logger.info( "l0Assigments cluster assignments:\n{}".format(l0Assignments)) # check Q louvainLevel0._calculateQ() self.logger.info("after phase I louvainLevel0._Q:{}".format( louvainLevel0._Q)) self.assertAlmostEqual(louvainLevel0._Q, 0.7222222222222221) # build next level louvainLevel1 = Louvain.buildLouvain("level 1 ", louvainLevel0) self.logger.info( "after buildLouvain louvainLevel1\n{}".format(louvainLevel1)) # phase II louvainLevel1._phaseII( isLouvainInit=False ) # TODO: can probably get rid of isLouvainInit) self.logger.info( "after phaseII() louvainLevel1 this log line looks funnny:\n{}". format(louvainLevel1)) l1Assignments = louvainLevel1.getClusterAssigments() self.logger.info( "louvainLevel1 cluster assignments:\n{}".format(l1Assignments)) self.logger.info("END\n")
def testPhaseII(self): self.logger.info("BEGIN") listOfEdges = [(0,1), (1,0), (0,2), (2,0), (1,2), (2,1), (0,3), (0,6), (0,7), (3,4), (4,3), (3,5), (5,3), (3,0), (6,7),(7,6), (6,8), (8,6), (6,9), (9,6), (8,9), (9,8), (9,7), (7,9), (6,0), (7,0) ] listOfWeight = [1 for i in listOfEdges] louvainLevel0 = Louvain.buildGraph("testPhaseII graph level0", listOfEdges, listOfWeight) louvainLevel0._calculateQ() numRows = 10 # the number of nodes louvainLevel0._phaseI(numRows, isLouvainInit=True) expectedAfterPhaseL0_I = { 0:{'custerId': 0, 'numNodes':0 , 'weightsInsideCluster': 0, 'totalWeight': 0}, 1:{'cluserId': 1, 'numNodes':0 , 'weightsInsideCluster': 0, 'totalWeight': 0}, 2:{'cluserId': 2, 'numNodes':0 , 'weightsInsideCluster': 0, 'totalWeight': 0}, 3:{'cluserId': 3, 'numNodes':0 , 'weightsInsideCluster': 0, 'totalWeight': 0}, 4:{'cluserId': 4, 'numNodes':0 , 'weightsInsideCluster': 0, 'totalWeight': 0}, 5:{'cluserId': 5, 'numNodes':6 , 'weightsInsideCluster':14, 'totalWeight':14}, 6:{'cluserId': 6, 'numNodes':0 , 'weightsInsideCluster': 0, 'totalWeight': 0}, 7:{'cluserId': 7, 'numNodes':0 , 'weightsInsideCluster': 0, 'totalWeight': 0}, 8:{'cluserId': 8, 'numNodes':0 , 'weightsInsideCluster': 0, 'totalWeight': 0}, 9:{'cluserId': 9, 'numNodes':4 , 'weightsInsideCluster':10, 'totalWeight': 12} } self.logger.info("TODO: empty clusters should be pruned, notice") self.checkClusters(expectedAfterPhaseL0_I, louvainLevel0._clusters) for cId in [5, 9]: nodeIdList = sorted([n._nodeId for n in louvainLevel0._clusters[cId]._nodeList]) self.logger.info("clusterId:{} nodeList[{}]".format(cId, nodeIdList)) # check phase II louvainLevel1 = Louvain.buildLouvain("testPhaseII graph level1", louvainLevel0) louvainLevel1._phaseII() print('') self.logger.info("************ check L1 phase II") for clusterId, cluster in louvainLevel1._clusters.items(): self.logger.info("clusterId:{} cluster:{}".format(clusterId,cluster)) expectedAfterPhaseL1_II = { 5 : {'cluster':5, 'numNodes':1, 'weightsInsideCluster':0, 'totalWeight':2}, 9 : {'cluster':9, 'numNodes':1, 'weightsInsideCluster':0, 'totalWeight':2}, } self.checkClusters(expectedAfterPhaseL1_II, louvainLevel1._clusters) # check the node caches are set up correctl for clusterId, cluster in louvainLevel1._clusters.items(): for node in cluster._nodeList: self.logger.info("clusterId:{} nodeId:{} _weightsInClusterDict:{}"\ .format(clusterId, node._nodeId, node._weightsInClusterDict)) self.logger.info("clusterId:{} nodeId:{} _nodesInClusterDict:{}"\ .format(clusterId, node._nodeId, node._nodesInClusterDict)) # test Louvain algo would run Phase I on louvainLevel2 # we have to calculate Q before phaseI louvainLevel1._calculateQ() numRows = 10 # number of nodes louvainLevel1._phaseI(numRows) print('') self.logger.info("************ check L1 after phase I") for clusterId, cluster in louvainLevel1._clusters.items(): self.logger.info("clusterId:{} cluster:{}".format(clusterId,cluster)) self.logger.info("END\n")