コード例 #1
0
class ExpressionTreeEmbedder:
    def __init__(self):

        self.exprTreeProvider = ExpressionTreeProvider()
        self.trackingInfoProvider = TrackingInfoProvider()
        self.treeToExprConverter = TreeToExpressionConverter()

    def embed(self, context, symbol):
        return self._termDocumentMatrixFromContext(context, symbol)

    def _termDocumentMatrixFromContext(self, context, symbol):

        x = self._termDictsFromContext(context, symbol)
        if x == None: return None
        (vecs, allNgrams) = x

        self.nameDictMapToMatrix = NameDictMapToMatrix()
        self.nameDictMapToMatrix.convertFromDicts(vecs, allNgrams)
        termDocMatrix = self.nameDictMapToMatrix.termDocumentMatrix
        return termDocMatrix

    def _termDictsFromContext(self, context, symbol):

        vecs = NameToDictMap()
        allNgrams = OccurrenceCounter()

        context.neighbours.append(context.origin)

        for neighbour in context.neighbours:
            nOcc = neighbour.nOccurrences
            location = neighbour.location
            expressions = self.treeToExprConverter.getExpressionsForSymbol(
                location, symbol)
            # expressions.append('@+$_+@')
            # expressions.append('@+EXPR@+$_+@+@')

            # print 'FOO %s: %s: %s' % (symbol, location, expressions)

            neighbour.setExpressions(expressions)

            # add null-vector for function if it does not contain expressions
            if len(expressions) == 0:
                vecs.add(None, location)

            for expr in expressions:
                # vecs.add(expr, location, 1.0/nOcc)
                # vecs.add(expr, location, 1.0)
                vecs.setItem(expr, location, 1.0)
                allNgrams.add(expr)

        context.neighbours.pop()

        if len(vecs.d) == 0 or len(allNgrams.d) == 0:
            return None

        return (vecs, allNgrams)

    def getAllConditionNodes(self):
        return self.tree.conditionalNodes
コード例 #2
0
class ExpressionTreeEmbedder:
    def __init__(self):
    
        self.exprTreeProvider = ExpressionTreeProvider()
        self.trackingInfoProvider = TrackingInfoProvider()
        self.treeToExprConverter = TreeToExpressionConverter()
    
    def embed(self, context, symbol):
        return self._termDocumentMatrixFromContext(context, symbol)
    
    def _termDocumentMatrixFromContext(self, context, symbol):
        
        x = self._termDictsFromContext(context, symbol)
        if x == None: return None
        (vecs, allNgrams)= x
                
        self.nameDictMapToMatrix = NameDictMapToMatrix()
        self.nameDictMapToMatrix.convertFromDicts(vecs, allNgrams)
        termDocMatrix = self.nameDictMapToMatrix.termDocumentMatrix
        return termDocMatrix

    def _termDictsFromContext(self, context, symbol):
        
        vecs = NameToDictMap()
        allNgrams = OccurrenceCounter()
        
        context.neighbours.append(context.origin)
        
        for neighbour in context.neighbours:
            nOcc = neighbour.nOccurrences
            location = neighbour.location
            expressions = self.treeToExprConverter.getExpressionsForSymbol(location, symbol)
            # expressions.append('@+$_+@')
            # expressions.append('@+EXPR@+$_+@+@')
            
            # print 'FOO %s: %s: %s' % (symbol, location, expressions)
            
            neighbour.setExpressions(expressions)

            # add null-vector for function if it does not contain expressions
            if len(expressions) == 0:
                vecs.add(None, location)
            
            for expr in expressions:
                # vecs.add(expr, location, 1.0/nOcc)
                # vecs.add(expr, location, 1.0)
                vecs.setItem(expr, location, 1.0)
                allNgrams.add(expr)            
        
        context.neighbours.pop()
        
        if len(vecs.d) == 0 or len(allNgrams.d) == 0:
            return None
    
        return (vecs, allNgrams)
                   
    def getAllConditionNodes(self):
        return self.tree.conditionalNodes
コード例 #3
0
 def __init__(self, projectRoot, ngramN, smallerNgramsToo):
     
     self.projectRoot = projectRoot
     self.nCalls = self._determineTotalNumberOfCalls()
     
     self.callAreaExtractor = SinkSnippetExtractor()
     self.embedder = Embedder(projectRoot)
     self.embedder.configureNgramCalculator(ngramN, smallerNgramsToo)
     self.nameDictMapToMatrix = NameDictMapToMatrix()
コード例 #4
0
    def _termDocumentMatrixFromContext(self, context, symbol):

        x = self._termDictsFromContext(context, symbol)
        if x == None: return None
        (vecs, allNgrams) = x

        self.nameDictMapToMatrix = NameDictMapToMatrix()
        self.nameDictMapToMatrix.convertFromDicts(vecs, allNgrams)
        termDocMatrix = self.nameDictMapToMatrix.termDocumentMatrix
        return termDocMatrix
コード例 #5
0
ファイル: create.py プロジェクト: MLDroid/chucky-old
def main(projectRoot, tfidf=True):
    
    nameDictMapFilename =  projectRoot + 'func2SubtreesMap.pickl'
    allSymbolsFilename = projectRoot + 'allSubtreesDict.pickl'
    termDocMatrixFilename = projectRoot + 'termDocMatrix.pickl'
    
    # if os.path.exists(termDocMatrixFilename):
    #    print 'Term by Document Matrix already exists, skipping.'
    #    return
    
    converter = NameDictMapToMatrix()
    converter.convertFromFiles(nameDictMapFilename, allSymbolsFilename)
    
    if tfidf:
        converter.termDocumentMatrix.tfidf()   
    converter.save(projectRoot)
コード例 #6
0
 def _termDocumentMatrixFromContext(self, context, symbol):
     
     x = self._termDictsFromContext(context, symbol)
     if x == None: return None
     (vecs, allNgrams)= x
             
     self.nameDictMapToMatrix = NameDictMapToMatrix()
     self.nameDictMapToMatrix.convertFromDicts(vecs, allNgrams)
     termDocMatrix = self.nameDictMapToMatrix.termDocumentMatrix
     return termDocMatrix
コード例 #7
0
 def createMatrixForFunctionNames(self, functionNames):
     self._loadFunc2SubtreesMap()
     
     self.nameToDictMap = NameToDictMap()
     self.allSymbolsDict = OccurrenceCounter()
     nameDictMapToMatrix = NameDictMapToMatrix()
     
     functions = [(doc ,self.func2SubtreesMap.d[doc]) for doc in functionNames]
     
     for (doc, func) in functions:
         
         for (ngram, nOccurrences) in func.iteritems():
             for unused in xrange(nOccurrences):
                 self.nameToDictMap.add(ngram, doc)
                 self.allSymbolsDict.add(ngram)
     
     nameDictMapToMatrix.convertFromDicts(self.nameToDictMap, self.allSymbolsDict)
     newTermDocMatrix = nameDictMapToMatrix.termDocumentMatrix
     return newTermDocMatrix
コード例 #8
0
    def createMatrixForFunctionNames(self, functionNames):
        self._loadFunc2SubtreesMap()

        self.nameToDictMap = NameToDictMap()
        self.allSymbolsDict = OccurrenceCounter()
        nameDictMapToMatrix = NameDictMapToMatrix()

        functions = [(doc, self.func2SubtreesMap.d[doc])
                     for doc in functionNames]

        for (doc, func) in functions:

            for (ngram, nOccurrences) in func.iteritems():
                for unused in xrange(nOccurrences):
                    self.nameToDictMap.add(ngram, doc)
                    self.allSymbolsDict.add(ngram)

        nameDictMapToMatrix.convertFromDicts(self.nameToDictMap,
                                             self.allSymbolsDict)
        newTermDocMatrix = nameDictMapToMatrix.termDocumentMatrix
        return newTermDocMatrix
コード例 #9
0
ファイル: create.py プロジェクト: pombredanne/chucky-old
def main(projectRoot, tfidf=True):

    nameDictMapFilename = projectRoot + 'func2SubtreesMap.pickl'
    allSymbolsFilename = projectRoot + 'allSubtreesDict.pickl'
    termDocMatrixFilename = projectRoot + 'termDocMatrix.pickl'

    # if os.path.exists(termDocMatrixFilename):
    #    print 'Term by Document Matrix already exists, skipping.'
    #    return

    converter = NameDictMapToMatrix()
    converter.convertFromFiles(nameDictMapFilename, allSymbolsFilename)

    if tfidf:
        converter.termDocumentMatrix.tfidf()
    converter.save(projectRoot)
コード例 #10
0
class SinkSnippetEmbedder:

    def __init__(self, projectRoot, ngramN, smallerNgramsToo):
        
        self.projectRoot = projectRoot
        self.nCalls = self._determineTotalNumberOfCalls()
        
        self.callAreaExtractor = SinkSnippetExtractor()
        self.embedder = Embedder(projectRoot)
        self.embedder.configureNgramCalculator(ngramN, smallerNgramsToo)
        self.nameDictMapToMatrix = NameDictMapToMatrix()
   
    def _isSinkCalledTooOften(self, callsToSink):
        # If more than 50 percent of calls are calls to
        # this function, this sink is just called to often
        # to be interesting
        if float(len(callsToSink))/self.nCalls > UPPER_BOUND_FOR_NUMBER_OF_CALLS_AS_FRACTION:
            print 'Sink called too often'
            return True
        return False
    
    def _isSinkNotCalledOftenEnough(self, callsToSink):
        return (len(callsToSink) < LOWER_BOUND_FOR_NUMBER_OF_CALLS)
    
    def _determineTotalNumberOfCalls(self):
        callIndex = pickle.load(file(self.projectRoot + 'callIndex.pickl'))
        return numpy.sum([len(v) for v in callIndex.d.itervalues()])
        
    def embedSinkUsers(self, sink):
        
        callsToSink = sink[1]
        
        if self._isSinkCalledTooOften(callsToSink):
            return (None, None)
        if self._isSinkNotCalledOftenEnough(callsToSink):
            print 'Sink not called often enough'
            return (None, None)
        
        getSinkAreaSubtree = self.callAreaExtractor.getSinkAreaSubtree
        filterAndAddAST = self.embedder.filterAndAddAST
        
        for label in callsToSink:
            areaSubtree = getSinkAreaSubtree(self.projectRoot, label)    
            filterAndAddAST(label, areaSubtree)
        
        (vecs, allNgrams) = self.embedder.getMaps()
        self.nameDictMapToMatrix.convertFromDicts(vecs, allNgrams)
        return (sink[0], self.nameDictMapToMatrix.termDocumentMatrix)
    
    def save(self, name, sinkName):
        import os
        
        embeddingsDir = self.projectRoot + 'embeddings'
        thisEmbeddingDir = embeddingsDir + '/'+ name
        sinkEmbeddingDir = thisEmbeddingDir + '/' + 'sinks'
        thisSinkEmbeddingDir = sinkEmbeddingDir + '/' + sinkName
        
        if not os.path.exists(embeddingsDir):
            os.mkdir(embeddingsDir)
        
        if not os.path.exists(thisEmbeddingDir):
            os.mkdir(thisEmbeddingDir)
        
        if not os.path.exists(sinkEmbeddingDir):
            os.mkdir(sinkEmbeddingDir)
        
        if not os.path.exists(thisSinkEmbeddingDir):
            os.mkdir(thisSinkEmbeddingDir)
            
        pickle.dump(self.nameDictMapToMatrix.nameDictMap, file( thisSinkEmbeddingDir + '/func2SubtreesMap.pickl', 'w'))
        pickle.dump(self.nameDictMapToMatrix.allSymbolsDict, file( thisSinkEmbeddingDir + '/allSubtreesDict.pickl', 'w'))