예제 #1
0
    def testGetClusterAssigments(self):
        self.logger.info("BEGIN")

        # build lovain level tree
        listOfEdges = [(0, 1), (1, 0), (0, 2), (2, 0), (1, 2), (2, 1), (0, 3),
                       (0, 6), (0, 7), (3, 4), (4, 3), (3, 5), (5, 3), (3, 0),
                       (6, 7), (7, 6), (6, 8), (8, 6), (6, 9), (9, 6), (8, 9),
                       (9, 8), (9, 7), (7, 9), (6, 0), (7, 0)]
        listOfWeight = [1 for i in listOfEdges]

        # build out init graph and calculate Q
        louvainLevel0 = Louvain.buildGraph("level0", listOfEdges, listOfWeight)
        louvainLevel0._calculateQ()

        # run phase I: find best cluster assignments
        numRows = 10  # number of nodes
        louvainLevel0._phaseI(numRows, isLouvainInit=True)

        ll_0_ClusterAssigments = louvainLevel0.getClusterAssigments()
        self.logger.info(
            "level0 cluster assignments:\n{}".format(ll_0_ClusterAssigments))

        # create next level and run phaseII
        # phase II consolidates clusters found in previous level
        louvainLevel1 = Louvain.buildLouvain("level1", louvainLevel0)
        louvainLevel1._phaseII()
        louvainLevel1._calculateQ()

        # lets assume this is the top level.
        louvainLevel1._phaseI(numRows)

        ll_1_ClusterAssigments = louvainLevel1.getClusterAssigments()
        self.logger.info(
            "level1 cluster assignments:\n{}".format(ll_1_ClusterAssigments))

        self.logger.info(
            "**************** check for side effects output should be same as above"
        )
        ll_0_ClusterAssigments = louvainLevel0.getClusterAssigments()
        #         self.assertEqual(ll_0_ClusterAssigments, {5: [0, 1, 2, 3, 4, 5], 9: [9, 6, 7, 8]})
        # this looks possible but not what I expected
        self.assertEqual(ll_0_ClusterAssigments, {
            5: [9, 8],
            9: [6, 7, 0, 1, 2, 3, 4, 5]
        })

        ll_1_ClusterAssigments = louvainLevel1.getClusterAssigments()
        self.assertEqual(ll_1_ClusterAssigments,
                         {9: [9, 8, 6, 7, 0, 1, 2, 3, 4, 5]})

        self.logger.info("END\n")
예제 #2
0
    def testDJGraph(self):
        self.logger.info("BEGIN")

        listOfEdges = [(0, 1), (1, 0), (2, 3), (3, 2)]

        # all other test assume weights are 1. set to a big number
        # that will make it easy to spot bugs in summary stats.
        listOfWeight = [5, 5, 10, 10]
        numRows = 4  # number of nodes

        louvainLevel0 = Louvain.buildGraph("level 0", listOfEdges,
                                           listOfWeight)
        # check Q
        louvainLevel0._calculateQ()
        self.logger.info("after  buildGraph() louvainLevel0._Q:{}".format(
            louvainLevel0._Q))
        self.assertAlmostEqual(louvainLevel0._Q, 0)

        # check is initialization of graph correct
        print()
        for cluster in louvainLevel0._clusters.values():
            self.logger.info("{}".format(cluster))

        print()
        for node in louvainLevel0._nodeLookup.values():
            self.logger.info("\nnode {}".format(node))
            print()

        expectedNodeConfigDict = {
            0:
            "clusterId:0 nodeId:0 numEdges:1 adjEdgeWeights:10 _weightsInClusterDict{1: 10} _edgesDict{1: srcId:0 targetId:1 weight:10}",
            1:
            "clusterId:1 nodeId:1 numEdges:1 adjEdgeWeights:10 _weightsInClusterDict{0: 10} _edgesDict{0: srcId:1 targetId:0 weight:10}",
            2:
            "clusterId:2 nodeId:2 numEdges:1 adjEdgeWeights:10 _weightsInClusterDict{3: 10} _edgesDict{3: srcId:2 targetId:3 weight:10}",
            3:
            "nodeId:3 numEdges:1 adjEdgeWeights:10 _weightsInClusterDict{2: 10} _edgesDict{2: srcId:3 targetId:2 weight:10}"
        }

        # check kiin
        for node in louvainLevel0._nodeLookup.values():
            self.logger.info("nodeId:{} _weightsInClusterDict:{}".format(
                node._nodeId, node._weightsInClusterDict))

        # run phase I
        louvainLevel0._phaseI(
            numRows,
            isLouvainInit=True)  # TODO: can probably get rid of isLouvainInit
        self.logger.info(
            "after phase I() louvainLevel0:\n{}".format(louvainLevel0))
        l0Assignments = louvainLevel0.getClusterAssigments()
        self.logger.info(
            "l0Assigments cluster assignments:\n{}".format(l0Assignments))

        # check Q
        louvainLevel0._calculateQ()
        self.logger.info("after phase I   louvainLevel0._Q:{}".format(
            louvainLevel0._Q))
        self.assertAlmostEqual(louvainLevel0._Q, 0.7222222222222221)

        # build next level
        louvainLevel1 = Louvain.buildLouvain("level 1 ", louvainLevel0)
        self.logger.info(
            "after buildLouvain louvainLevel1\n{}".format(louvainLevel1))

        # phase II
        louvainLevel1._phaseII(
            isLouvainInit=False
        )  # TODO: can probably get rid of isLouvainInit)
        self.logger.info(
            "after phaseII() louvainLevel1  this log line looks funnny:\n{}".
            format(louvainLevel1))
        l1Assignments = louvainLevel1.getClusterAssigments()
        self.logger.info(
            "louvainLevel1 cluster assignments:\n{}".format(l1Assignments))

        self.logger.info("END\n")
 def testPhaseII(self):
     self.logger.info("BEGIN")
     
     listOfEdges = [(0,1), (1,0), (0,2), (2,0), (1,2), (2,1),
                     (0,3), (0,6), (0,7),
                     (3,4), (4,3), (3,5), (5,3),
                     (3,0),
                     (6,7),(7,6), (6,8), (8,6), (6,9), (9,6), (8,9), (9,8), (9,7), (7,9), 
                     (6,0), (7,0)
                    ]
     listOfWeight = [1 for i in listOfEdges]
     louvainLevel0 = Louvain.buildGraph("testPhaseII graph level0", listOfEdges, listOfWeight)
     louvainLevel0._calculateQ()
     numRows = 10 # the number of nodes
     louvainLevel0._phaseI(numRows, isLouvainInit=True)    
     
     expectedAfterPhaseL0_I = {
         0:{'custerId': 0,  'numNodes':0 , 'weightsInsideCluster': 0, 'totalWeight': 0},
         1:{'cluserId': 1,  'numNodes':0 , 'weightsInsideCluster': 0, 'totalWeight': 0},
         2:{'cluserId': 2,  'numNodes':0 , 'weightsInsideCluster': 0, 'totalWeight': 0},
         3:{'cluserId': 3,  'numNodes':0 , 'weightsInsideCluster': 0, 'totalWeight': 0},
         4:{'cluserId': 4,  'numNodes':0 , 'weightsInsideCluster': 0, 'totalWeight': 0},
         5:{'cluserId': 5,  'numNodes':6 , 'weightsInsideCluster':14, 'totalWeight':14},
         6:{'cluserId': 6,  'numNodes':0 , 'weightsInsideCluster': 0, 'totalWeight': 0},
         7:{'cluserId': 7,  'numNodes':0 , 'weightsInsideCluster': 0, 'totalWeight': 0},
         8:{'cluserId': 8,  'numNodes':0 , 'weightsInsideCluster': 0, 'totalWeight': 0},
         9:{'cluserId': 9,  'numNodes':4 , 'weightsInsideCluster':10, 'totalWeight': 12}
         }
     
     self.logger.info("TODO: empty clusters should be pruned, notice")
     
     self.checkClusters(expectedAfterPhaseL0_I, louvainLevel0._clusters)
     
     for cId in [5, 9]:
         nodeIdList = sorted([n._nodeId for n in louvainLevel0._clusters[cId]._nodeList])
         self.logger.info("clusterId:{} nodeList[{}]".format(cId, nodeIdList))
     
     # check phase II
     louvainLevel1 = Louvain.buildLouvain("testPhaseII graph level1", louvainLevel0)
     louvainLevel1._phaseII()
     
     print('')
     self.logger.info("************ check L1 phase II")
     for clusterId, cluster in louvainLevel1._clusters.items():
         self.logger.info("clusterId:{} cluster:{}".format(clusterId,cluster))
         
     expectedAfterPhaseL1_II = {
         5 : {'cluster':5, 'numNodes':1, 'weightsInsideCluster':0, 'totalWeight':2},
         9 : {'cluster':9, 'numNodes':1, 'weightsInsideCluster':0, 'totalWeight':2},
         }
     self.checkClusters(expectedAfterPhaseL1_II, louvainLevel1._clusters)
     
     # check the node caches are set up correctl
     for clusterId, cluster in louvainLevel1._clusters.items():
         for node in cluster._nodeList:
             self.logger.info("clusterId:{} nodeId:{} _weightsInClusterDict:{}"\
                       .format(clusterId, node._nodeId, node._weightsInClusterDict))
             self.logger.info("clusterId:{} nodeId:{} _nodesInClusterDict:{}"\
                       .format(clusterId, node._nodeId, node._nodesInClusterDict))                
     
     # test Louvain algo would run Phase I on louvainLevel2
     # we have to calculate Q before phaseI
     louvainLevel1._calculateQ()
     numRows = 10 # number of nodes        
     louvainLevel1._phaseI(numRows)
     
     print('')
     self.logger.info("************ check L1 after phase I")
     for clusterId, cluster in louvainLevel1._clusters.items():
         self.logger.info("clusterId:{} cluster:{}".format(clusterId,cluster))        
     
     self.logger.info("END\n")