def _termDictsFromContext(self, context, symbol): vecs = NameToDictMap() allNgrams = OccurrenceCounter() context.neighbours.append(context.origin) for neighbour in context.neighbours: nOcc = neighbour.nOccurrences location = neighbour.location expressions = self.treeToExprConverter.getExpressionsForSymbol( location, symbol) # expressions.append('@+$_+@') # expressions.append('@+EXPR@+$_+@+@') # print 'FOO %s: %s: %s' % (symbol, location, expressions) neighbour.setExpressions(expressions) # add null-vector for function if it does not contain expressions if len(expressions) == 0: vecs.add(None, location) for expr in expressions: # vecs.add(expr, location, 1.0/nOcc) # vecs.add(expr, location, 1.0) vecs.setItem(expr, location, 1.0) allNgrams.add(expr) context.neighbours.pop() if len(vecs.d) == 0 or len(allNgrams.d) == 0: return None return (vecs, allNgrams)
def createMatrixForFunctionNames(self, functionNames): self._loadFunc2SubtreesMap() self.nameToDictMap = NameToDictMap() self.allSymbolsDict = OccurrenceCounter() nameDictMapToMatrix = NameDictMapToMatrix() functions = [(doc, self.func2SubtreesMap.d[doc]) for doc in functionNames] for (doc, func) in functions: for (ngram, nOccurrences) in func.iteritems(): for unused in xrange(nOccurrences): self.nameToDictMap.add(ngram, doc) self.allSymbolsDict.add(ngram) nameDictMapToMatrix.convertFromDicts(self.nameToDictMap, self.allSymbolsDict) newTermDocMatrix = nameDictMapToMatrix.termDocumentMatrix return newTermDocMatrix
def __init__(self): self.vecs = NameToDictMap() self.allSymbols = OccurrenceCounter()