Example #1
0
 def _getFunctionNamesFromSink(self, sinkName):
     sinkUserProvider = SinkUserProvider(self.projectRoot)
     sink = sinkUserProvider.getSinkByName(sinkName)
     (unused, callsToSink) = sink
     functionNames = self.unique(
         [self.projectRoot + c[1] for c in callsToSink])
     return functionNames
Example #2
0
class SinkMatrixCreator:
    def __init__(self, projectRoot):
        self.projectRoot = projectRoot
        self.programDir = '/'.join(self.projectRoot.split('/')[:-3]) + '/'
        self.sinkUserProvider = SinkUserProvider(self.projectRoot + '../../')

    def createMatrixForSink(self, sinkName):
        (unused, callsToSink) = self.sinkUserProvider.getSinkByName(sinkName)
        functionNames = self.uniq(
            ['%s%s' % (self.programDir, c[1]) for c in callsToSink])

        return self.createMatrixForFunctionNames(functionNames)

    """
    This operation looses TF-IDF. I don't think that's the way to go.
    """

    def createMatrixForFunctionNames(self, functionNames):
        self._loadFunc2SubtreesMap()

        self.nameToDictMap = NameToDictMap()
        self.allSymbolsDict = OccurrenceCounter()
        nameDictMapToMatrix = NameDictMapToMatrix()

        functions = [(doc, self.func2SubtreesMap.d[doc])
                     for doc in functionNames]

        for (doc, func) in functions:

            for (ngram, nOccurrences) in func.iteritems():
                for unused in xrange(nOccurrences):
                    self.nameToDictMap.add(ngram, doc)
                    self.allSymbolsDict.add(ngram)

        nameDictMapToMatrix.convertFromDicts(self.nameToDictMap,
                                             self.allSymbolsDict)
        newTermDocMatrix = nameDictMapToMatrix.termDocumentMatrix
        return newTermDocMatrix

    def _loadFunc2SubtreesMap(self):
        filename = self.projectRoot + 'func2SubtreesMap.pickl'
        self.func2SubtreesMap = pickle.load(file(filename))

    def uniq(self, seq, idfun=None):
        # order preserving
        if idfun is None:

            def idfun(x):
                return x

        seen = {}
        result = []
        for item in seq:
            marker = idfun(item)
            if marker in seen: continue
            seen[marker] = 1
            result.append(item)
        return result
Example #3
0
def sinkSnippetEmbedder(projectRoot, sinkOfInterest, configuration):

    print 'embed for sink: %s' % sinkOfInterest
    sink = SinkUserProvider(projectRoot).getSinkByName(sinkOfInterest)
    sinkUserEmbedder = SinkSnippetEmbedder(projectRoot,
                                           configuration['ngramN'],
                                           configuration['smallerNgramsToo'])
    (name,
     termDocMatrix) = sinkUserEmbedder.embedSinkUsers(sink)  #@UnusedVariable
    return termDocMatrix
Example #4
0
def embedFunctionsUsingSink(projectRoot, filterName, ngramN, smallerNgramsToo,
                            sinkOfInterest):
    from tools.SinkSnippetEmbedder.SinkSnippetEmbedder import SinkSnippetEmbedder
    from tools.SinkSnippetEmbedder.SinkUserProvider import SinkUserProvider

    name = '%s_%d.pickl' % (filterName, ngramN)
    embedder = SinkSnippetEmbedder(projectRoot, ngramN, smallerNgramsToo)
    sink = SinkUserProvider(projectRoot).getSinkByName(sinkOfInterest)

    embedder.embedSinkUsers(sink)
    return embedder.save(name, sinkOfInterest)
Example #5
0
class SinkMatrixCreator:
    def __init__(self, projectRoot):
        self.projectRoot = projectRoot
        self.programDir = '/'.join(self.projectRoot.split('/')[:-3]) + '/'
        self.sinkUserProvider = SinkUserProvider(self.projectRoot + '../../')

    def createMatrixForSink(self, sinkName):
        (unused, callsToSink) = self.sinkUserProvider.getSinkByName(sinkName)
        functionNames = self.uniq([ '%s%s' % (self.programDir, c[1]) for c in callsToSink])
        
        return self.createMatrixForFunctionNames(functionNames)
    
    """
    This operation looses TF-IDF. I don't think that's the way to go.
    """
    
    def createMatrixForFunctionNames(self, functionNames):
        self._loadFunc2SubtreesMap()
        
        self.nameToDictMap = NameToDictMap()
        self.allSymbolsDict = OccurrenceCounter()
        nameDictMapToMatrix = NameDictMapToMatrix()
        
        functions = [(doc ,self.func2SubtreesMap.d[doc]) for doc in functionNames]
        
        for (doc, func) in functions:
            
            for (ngram, nOccurrences) in func.iteritems():
                for unused in xrange(nOccurrences):
                    self.nameToDictMap.add(ngram, doc)
                    self.allSymbolsDict.add(ngram)
        
        nameDictMapToMatrix.convertFromDicts(self.nameToDictMap, self.allSymbolsDict)
        newTermDocMatrix = nameDictMapToMatrix.termDocumentMatrix
        return newTermDocMatrix
        
    
    def _loadFunc2SubtreesMap(self):
        filename = self.projectRoot + 'func2SubtreesMap.pickl'
        self.func2SubtreesMap = pickle.load(file(filename))
    
    def uniq(self, seq, idfun=None): 
        # order preserving
        if idfun is None:
            def idfun(x): return x
        seen = {}
        result = []
        for item in seq:
            marker = idfun(item)
            if marker in seen: continue
            seen[marker] = 1
            result.append(item)
        return result
Example #6
0
 def _getAvailableSinks(self):
     sinkUserProvider = SinkUserProvider(self.projectRoot)
     return sinkUserProvider.getSinks(self.args.min_calls_to_sink)
Example #7
0
 def _getFunctionNamesFromSink(self, sinkName):
     sinkUserProvider = SinkUserProvider(self.projectRoot)
     sink = sinkUserProvider.getSinkByName(sinkName)
     (unused, callsToSink) = sink
     functionNames = self.unique([self.projectRoot + c[1] for c in callsToSink])
     return functionNames
Example #8
0
 def __init__(self, projectRoot):
     self.projectRoot = projectRoot
     self.programDir = '/'.join(self.projectRoot.split('/')[:-3]) + '/'
     self.sinkUserProvider = SinkUserProvider(self.projectRoot + '../../')
Example #9
0
 def _getAvailableSinks(self):
     sinkUserProvider = SinkUserProvider(self.projectRoot)
     return sinkUserProvider.getSinks(self.args.min_calls_to_sink)
Example #10
0
 def __init__(self, projectRoot):
     self.projectRoot = projectRoot
     self.programDir = '/'.join(self.projectRoot.split('/')[:-3]) + '/'
     self.sinkUserProvider = SinkUserProvider(self.projectRoot + '../../')
Example #11
0
 def _getAvailableSinks(self):
     from tools.SinkSnippetEmbedder.SinkUserProvider import SinkUserProvider
     sinkUserProvider = SinkUserProvider(self.args.projectDirectory)
     return sinkUserProvider.getSinks(self.args.min_calls_to_sink)
 def _getAvailableSinks(self):
     from tools.SinkSnippetEmbedder.SinkUserProvider import SinkUserProvider
     sinkUserProvider = SinkUserProvider(self.args.projectDirectory)
     return sinkUserProvider.getSinks(self.args.min_calls_to_sink)