def cluster(self):
        
        # We cluster for each argument independently!        
        retval = ClusterResult()
        
        curOffset = 0
        argNum = 0
        for symbolsForArg in self.contentProvider.getSourceAPISymbols():
            D = self._calculateDistanceMatrix(symbolsForArg)

            curOffset = len(retval.clusterIdToDatapoint.keys())

            if len(symbolsForArg) == 0:
                argNum += 1
                continue
            
            if len(symbolsForArg) == 1:
                retval.register(curOffset, symbolsForArg[0], argNum)
                argNum += 1
                continue
            
            Z = linkage(D, method=self.linkageMethod)
            clustering = fcluster(Z, self.maxDistInCluster, criterion = 'distance')
            
            retval.registerSet(symbolsForArg, clustering, curOffset, argNum)
            argNum += 1
        
        return retval
    def cluster(self, models):

        retval = ClusterResult()

        for model in models:
            for argNum in range(model.getNumberOfArguments()):

                invocs = model.members
                curOffset = len(retval.clusterIdToDatapoint.keys())

                embedder = ConditionEmbedder(self.contentProvider)
                embedder.embed(invocs, argNum)

                # TODO: we need to be able to pass a distance parameter to joern-cluster
                clusterLines = [x.rstrip() for x in launch('joern-cluster')]
                clustering = []
                datapoints = []

                for line in clusterLines:
                    (nodeId, clusterId) = line.split('\t')
                    clustering.append(int(clusterId))
                    datapoints.append(nodeId)

                retval.registerSet(datapoints, clustering, curOffset, argNum)

                os.system('rm -rf embedding')

        return retval
Example #3
0
    def cluster(self, models):

        retval = ClusterResult()

        for model in models:
            for argNum in range(model.getNumberOfArguments()):

                invocs = model.members
                curOffset = len(retval.clusterIdToDatapoint.keys())

                embedder = ConditionEmbedder(self.contentProvider)
                embedder.embed(invocs, argNum)

                # TODO: we need to be able to pass a distance parameter to joern-cluster
                clusterLines = [x.rstrip() for x in launch("joern-cluster")]
                clustering = []
                datapoints = []

                for line in clusterLines:
                    (nodeId, clusterId) = line.split("\t")
                    clustering.append(int(clusterId))
                    datapoints.append(nodeId)

                retval.registerSet(datapoints, clustering, curOffset, argNum)

                os.system("rm -rf embedding")

        return retval
Example #4
0
    def cluster(self):

        # We cluster for each argument independently!
        retval = ClusterResult()

        curOffset = 0
        argNum = 0
        for symbolsForArg in self.contentProvider.getSourceAPISymbols():
            D = self._calculateDistanceMatrix(symbolsForArg)

            curOffset = len(retval.clusterIdToDatapoint.keys())

            if len(symbolsForArg) == 0:
                argNum += 1
                continue

            if len(symbolsForArg) == 1:
                retval.register(curOffset, symbolsForArg[0], argNum)
                argNum += 1
                continue

            Z = linkage(D, method=self.linkageMethod)
            clustering = fcluster(Z,
                                  self.maxDistInCluster,
                                  criterion='distance')

            retval.registerSet(symbolsForArg, clustering, curOffset, argNum)
            argNum += 1

        return retval
    def cluster(self, sourceClusters):
        
        self.defStmts = self.contentProvider.getAllDefStmtsPerArg()
        self.sClusters = sourceClusters
        
        converter = InvocationsToDataMatrix()        
        dataMatrix = converter.convert(self.defStmts, self.sClusters)
        
        if dataMatrix.T.shape == (1,1):
            return ClusterResult()
        
        D = pdist(dataMatrix.T, METRIC)
        Z = linkage(D, method= LINKAGE_METHOD)
        
        clustering = fcluster(Z, self.maxDistInCluster, criterion = 'distance')
        result = ClusterResult()

        result.setNumberOfArguments(len(self.defStmts[0]) if len(self.defStmts) > 0 else 0)
        result.registerSet(range(len(self.defStmts)), clustering)
        result.dataMatrix = dataMatrix
        result.callSiteIds = self.contentProvider.getInvocationCallSiteIds()
        
        return result