コード例 #1
0
    def _termDictsFromContext(self, context, symbol):

        vecs = NameToDictMap()
        allNgrams = OccurrenceCounter()

        context.neighbours.append(context.origin)

        for neighbour in context.neighbours:
            nOcc = neighbour.nOccurrences
            location = neighbour.location
            expressions = self.treeToExprConverter.getExpressionsForSymbol(
                location, symbol)
            # expressions.append('@+$_+@')
            # expressions.append('@+EXPR@+$_+@+@')

            # print 'FOO %s: %s: %s' % (symbol, location, expressions)

            neighbour.setExpressions(expressions)

            # add null-vector for function if it does not contain expressions
            if len(expressions) == 0:
                vecs.add(None, location)

            for expr in expressions:
                # vecs.add(expr, location, 1.0/nOcc)
                # vecs.add(expr, location, 1.0)
                vecs.setItem(expr, location, 1.0)
                allNgrams.add(expr)

        context.neighbours.pop()

        if len(vecs.d) == 0 or len(allNgrams.d) == 0:
            return None

        return (vecs, allNgrams)
コード例 #2
0
    def createMatrixForFunctionNames(self, functionNames):
        self._loadFunc2SubtreesMap()

        self.nameToDictMap = NameToDictMap()
        self.allSymbolsDict = OccurrenceCounter()
        nameDictMapToMatrix = NameDictMapToMatrix()

        functions = [(doc, self.func2SubtreesMap.d[doc])
                     for doc in functionNames]

        for (doc, func) in functions:

            for (ngram, nOccurrences) in func.iteritems():
                for unused in xrange(nOccurrences):
                    self.nameToDictMap.add(ngram, doc)
                    self.allSymbolsDict.add(ngram)

        nameDictMapToMatrix.convertFromDicts(self.nameToDictMap,
                                             self.allSymbolsDict)
        newTermDocMatrix = nameDictMapToMatrix.termDocumentMatrix
        return newTermDocMatrix
コード例 #3
0
 def __init__(self):
     self.vecs = NameToDictMap()
     self.allSymbols = OccurrenceCounter()