Exemple #1
0
    def _filter(self, onlyForCallSite):

        self.modelSelector = ModelSelector()

        self.modelSelector.setCallSite(onlyForCallSite)
        self.modelSelector.setMinNumMembers(self.minNumMembersInInvocCluster)
        self.modelSelector.setTopN(self.topnInvocClusters)

        self.models = self.modelSelector.select(self.models)
 def _filter(self, onlyForCallSite):
     
     self.modelSelector = ModelSelector()
     
     self.modelSelector.setCallSite(onlyForCallSite)
     self.modelSelector.setMinNumMembers(self.minNumMembersInInvocCluster)
     self.modelSelector.setTopN(self.topnInvocClusters)
     
     self.models = self.modelSelector.select(self.models)
class CheckOverlayCreator:
    
    def __init__(self, contentProvider):
        self.contentProvider = contentProvider    
        
        self.minCondObserved = DEFAULT_MIN_COND_OBSERVED
        self.topnCheckHist = DEFAULT_TOP_N_CHECK_HIST
        self.minFracChecks = DEFAULT_MIN_FRAC_CHECKS
    
    def setMinCondObserved(self, val):
        self.minCondObserved = val
    
    def setTopnCheckHist(self, val):
        self.topnCheckHist = val
    
    def setMinFracChecks(self, val):
        self.minFracChecks = val
    
    def createForModels(self, models, onlyForSubChecks = None):

        self.models = [CheckModel(model) for model in models]

        self._generateChecksForAllModels()
        self._retrieveConditions()
        self._distributeChecksPerArg()
                
        self._createConditionClusters()
        
        for model in self.models:
            self.createOverlayForModel(model)
        
        self._filter(onlyForSubChecks)
            
        # self._generateLabels()
        
    
    def _generateLabels(self):
        
        # TODO: Optimization: It's probably possible here to generate node-labels only
        # for those ASTs that are part of one of the models still left.
        
        self.conditionLabels = self.contentProvider.getAllASTNodeLabels()
        for model in self.models:
            model.extractCommonLabels(self.nodeIdToConditionIndex, self.conditionLabels, self.minFracChecks)
        
    
    
    def _createConditionClusters(self):
        
        self.cndClusterTool = ConditionClusterer(self.contentProvider)
        self.conditionClusters = self.cndClusterTool.cluster(self.models)
    
    def createOverlayForModel(self, model):
        
        # Save references to global condition data.
        model.setConditionClusters(self.conditionClusters)
        model.conditionsCode = self.conditionsCode
        model.nodeIdToConditionIndex = self.nodeIdToConditionIndex
        
        model.generateCheckHist()
        model.pruneCheckHist(self.topnCheckHist, self.minCondObserved)
        
    
    def getModels(self):
        return self.models  
    
    def _generateChecksForAllModels(self):
        l = [m.members for m in self.models]
        invocs = uniq(flatten(l))
        
        self.contentProvider.generateChecksForInvocations(invocs)

    def _retrieveConditions(self):

        self.conditions = self.contentProvider.getAllConditions()
        self.nodeIdToConditionIndex = {}
        for i in range(len(self.conditions)):
            self.nodeIdToConditionIndex[self.conditions[i]] = i

        self.conditions = list(set(self.conditions))
        self.conditionsCode = self.contentProvider.getAllConditionsCode()


    def _distributeChecksPerArg(self):
        self.checksPerArg = self.contentProvider.getAllChecksPerArg()
        # list: index is invocation index

        for i in range(len(self.models)):
            for j in range(len(self.models[i].members)):
                self.models[i].checks.append(self.checksPerArg[self.models[i].members[j]])
                # self.models[i].checks.append(self.checksPerArg[j])
    
    def _filter(self, onlyForSubChecks):
        self.modelSelector = ModelSelector()
        self.modelSelector.setSubChecks(onlyForSubChecks)
        self.models = self.modelSelector.selectForChecks(self.models)
 def _filter(self, onlyForSubChecks):
     self.modelSelector = ModelSelector()
     self.modelSelector.setSubChecks(onlyForSubChecks)
     self.models = self.modelSelector.selectForChecks(self.models)
Exemple #5
0
 def _filter(self, onlyForSubChecks):
     self.modelSelector = ModelSelector()
     self.modelSelector.setSubChecks(onlyForSubChecks)
     self.models = self.modelSelector.selectForChecks(self.models)
Exemple #6
0
class CheckOverlayCreator:
    def __init__(self, contentProvider):
        self.contentProvider = contentProvider

        self.minCondObserved = DEFAULT_MIN_COND_OBSERVED
        self.topnCheckHist = DEFAULT_TOP_N_CHECK_HIST
        self.minFracChecks = DEFAULT_MIN_FRAC_CHECKS

    def setMinCondObserved(self, val):
        self.minCondObserved = val

    def setTopnCheckHist(self, val):
        self.topnCheckHist = val

    def setMinFracChecks(self, val):
        self.minFracChecks = val

    def createForModels(self, models, onlyForSubChecks=None):

        self.models = [CheckModel(model) for model in models]

        self._generateChecksForAllModels()
        self._retrieveConditions()
        self._distributeChecksPerArg()

        self._createConditionClusters()

        for model in self.models:
            self.createOverlayForModel(model)

        self._filter(onlyForSubChecks)

        # self._generateLabels()

    def _generateLabels(self):

        # TODO: Optimization: It's probably possible here to generate node-labels only
        # for those ASTs that are part of one of the models still left.

        self.conditionLabels = self.contentProvider.getAllASTNodeLabels()
        for model in self.models:
            model.extractCommonLabels(self.nodeIdToConditionIndex,
                                      self.conditionLabels, self.minFracChecks)

    def _createConditionClusters(self):

        self.cndClusterTool = ConditionClusterer(self.contentProvider)
        self.conditionClusters = self.cndClusterTool.cluster(self.models)

    def createOverlayForModel(self, model):

        # Save references to global condition data.
        model.setConditionClusters(self.conditionClusters)
        model.conditionsCode = self.conditionsCode
        model.nodeIdToConditionIndex = self.nodeIdToConditionIndex

        model.generateCheckHist()
        model.pruneCheckHist(self.topnCheckHist, self.minCondObserved)

    def getModels(self):
        return self.models

    def _generateChecksForAllModels(self):
        l = [m.members for m in self.models]
        invocs = uniq(flatten(l))

        self.contentProvider.generateChecksForInvocations(invocs)

    def _retrieveConditions(self):

        self.conditions = self.contentProvider.getAllConditions()
        self.nodeIdToConditionIndex = {}
        for i in range(len(self.conditions)):
            self.nodeIdToConditionIndex[self.conditions[i]] = i

        self.conditions = list(set(self.conditions))
        self.conditionsCode = self.contentProvider.getAllConditionsCode()

    def _distributeChecksPerArg(self):
        self.checksPerArg = self.contentProvider.getAllChecksPerArg()
        # list: index is invocation index

        for i in range(len(self.models)):
            for j in range(len(self.models[i].members)):
                self.models[i].checks.append(
                    self.checksPerArg[self.models[i].members[j]])
                # self.models[i].checks.append(self.checksPerArg[j])

    def _filter(self, onlyForSubChecks):
        self.modelSelector = ModelSelector()
        self.modelSelector.setSubChecks(onlyForSubChecks)
        self.models = self.modelSelector.selectForChecks(self.models)
class DataFlowModelCreator:
    
    def __init__(self, contentProvider):
        self.contentProvider = contentProvider
        
        self.sourceDistInCluster = DEFAULT_SOURCE_DIST_IN_CLUSTER
        self.invocDistInCluster = DEFAULT_INVOC_DIST_IN_CLUSTER
        self.minNumMembersInInvocCluster = DEFAULT_MIN_NUM_MEMBERS_IN_INVOC_CLUSTER
        self.topnInvocClusters = DEFAULT_TOPN_INVOC_CLUSTERS
    
    def setSourceDistInCluster(self, val):
        self.sourceDistInCluster = val
    
    def setInvocDistInCluster(self, val):
        self.invocDistInCluster = val
    
    def setMinNumMemersInInvocCluster(self, val):
        self.minNumMembersInInvocCluster = val
    
    def setTopnInvocClusters(self, val):
        self.topnInvocClusters = val
    
    def createDataFlowModels(self, sinkSymbol, onlyForCallSite = None):
           
        self.selector = self._sinkSymbolToSelector(sinkSymbol)
        
        self._createForSelector()
        self._filter(onlyForCallSite)
        
        for model in self.models:
            model.calculateSourcesPerArg(self.invocClusterTool)
    
    def getModels(self):
        return self.models  



    def _sinkSymbolToSelector(self, sinkSymbol):
        return 'getCallsTo("%s")' % (sinkSymbol)
    
    def _createForSelector(self):
        
        self.contentProvider.generate(self.selector)
        
        # Cluster source API symbols using jaro distance.
        
        self.sourceClusterer = SourceClusterer(self.contentProvider)
        self.sourceClusterer.setMaxDistInCluster(self.sourceDistInCluster)
        sourceClusters = self.sourceClusterer.cluster()
        
        # Cluster invocations based on source-argument mappings.
        
        self.invocClusterTool = InvocationClusterer(self.contentProvider)
        self.invocClusterTool.setMaxDistInCluster(self.invocDistInCluster)
        self.invocClusters = self.invocClusterTool.cluster(sourceClusters)
        
        self._createModelsFromInvocClusters(self.invocClusters)
    
    def _createModelsFromInvocClusters(self, invocClusters):

        X = invocClusters.dataMatrix

        self.models = []
        for (clusterId, invocIds) in invocClusters.clusterIdToDatapoint.iteritems():
            newModel = DataFlowModel()
            newModel.clusterId = clusterId
            newModel.members = invocIds
            # We need to add 1 here for the 'other' group
            newModel.setNumberOfArguments(invocClusters.getNumberOfArguments() + 1)
            newModel.callSiteIds = [invocClusters.callSiteIds[x] for x in invocIds]
            newModel.sharedSourceClusters = np.nonzero(np.sum(X[:, tuple(newModel.members)], axis=1) > 0.5* len(invocIds))[0]
            newModel.selector = self.selector
            self.models.append(newModel)

        # Sort models by number of members
        self.models.sort(key=lambda x: len(x.members), reverse=True)
    
    def _filter(self, onlyForCallSite):
        
        self.modelSelector = ModelSelector()
        
        self.modelSelector.setCallSite(onlyForCallSite)
        self.modelSelector.setMinNumMembers(self.minNumMembersInInvocCluster)
        self.modelSelector.setTopN(self.topnInvocClusters)
        
        self.models = self.modelSelector.select(self.models)
Exemple #8
0
class DataFlowModelCreator:
    def __init__(self, contentProvider):
        self.contentProvider = contentProvider

        self.sourceDistInCluster = DEFAULT_SOURCE_DIST_IN_CLUSTER
        self.invocDistInCluster = DEFAULT_INVOC_DIST_IN_CLUSTER
        self.minNumMembersInInvocCluster = DEFAULT_MIN_NUM_MEMBERS_IN_INVOC_CLUSTER
        self.topnInvocClusters = DEFAULT_TOPN_INVOC_CLUSTERS

    def setSourceDistInCluster(self, val):
        self.sourceDistInCluster = val

    def setInvocDistInCluster(self, val):
        self.invocDistInCluster = val

    def setMinNumMemersInInvocCluster(self, val):
        self.minNumMembersInInvocCluster = val

    def setTopnInvocClusters(self, val):
        self.topnInvocClusters = val

    def createDataFlowModels(self, sinkSymbol, onlyForCallSite=None):

        self.selector = self._sinkSymbolToSelector(sinkSymbol)

        self._createForSelector()
        self._filter(onlyForCallSite)

        for model in self.models:
            model.calculateSourcesPerArg(self.invocClusterTool)

    def getModels(self):
        return self.models

    def _sinkSymbolToSelector(self, sinkSymbol):
        return 'getCallsTo("%s")' % (sinkSymbol)

    def _createForSelector(self):

        self.contentProvider.generate(self.selector)

        # Cluster source API symbols using jaro distance.

        self.sourceClusterer = SourceClusterer(self.contentProvider)
        self.sourceClusterer.setMaxDistInCluster(self.sourceDistInCluster)
        sourceClusters = self.sourceClusterer.cluster()

        # Cluster invocations based on source-argument mappings.

        self.invocClusterTool = InvocationClusterer(self.contentProvider)
        self.invocClusterTool.setMaxDistInCluster(self.invocDistInCluster)
        self.invocClusters = self.invocClusterTool.cluster(sourceClusters)

        self._createModelsFromInvocClusters(self.invocClusters)

    def _createModelsFromInvocClusters(self, invocClusters):

        X = invocClusters.dataMatrix

        self.models = []
        for (clusterId,
             invocIds) in invocClusters.clusterIdToDatapoint.iteritems():
            newModel = DataFlowModel()
            newModel.clusterId = clusterId
            newModel.members = invocIds
            # We need to add 1 here for the 'other' group
            newModel.setNumberOfArguments(
                invocClusters.getNumberOfArguments() + 1)
            newModel.callSiteIds = [
                invocClusters.callSiteIds[x] for x in invocIds
            ]
            newModel.sharedSourceClusters = np.nonzero(
                np.sum(X[:, tuple(newModel.members)], axis=1) > 0.5 *
                len(invocIds))[0]
            newModel.selector = self.selector
            self.models.append(newModel)

        # Sort models by number of members
        self.models.sort(key=lambda x: len(x.members), reverse=True)

    def _filter(self, onlyForCallSite):

        self.modelSelector = ModelSelector()

        self.modelSelector.setCallSite(onlyForCallSite)
        self.modelSelector.setMinNumMembers(self.minNumMembersInInvocCluster)
        self.modelSelector.setTopN(self.topnInvocClusters)

        self.models = self.modelSelector.select(self.models)