class SourceCodeTagCloud(object): ''' wrapper class for generating the data for source code tag cloud ''' def __init__(self, dirname, pattern='*.c', lang=None): self.dirname = dirname self.pattern = pattern self.lang = lang self.tagcloud = None # Stores frequency of tag cloud. # Store information about how many files that tag was found self.fileTagCount = dict() self.createTagCloud() def createTagCloud(self): self.tagcloud = TagCloud() dirlister = DirFileLister(self.dirname) for fname in dirlister.getFilesForPatternOrLang(pattern=self.pattern, lang=self.lang): self.__addFile(fname) def __addFile(self, srcfile): assert(self.tagcloud != None) print "Adding tags information of file: %s" % srcfile tokenizer = TagCloudTokenizer(srcfile) fileTokenset = set() for srctoken in tokenizer: value = srctoken.value self.tagcloud.addWord(value, srctoken.ttype) if value not in fileTokenset: self.fileTagCount[value] = self.fileTagCount.get(value, 0) + 1 fileTokenset.add(value) def getTags(self, numWords=100, filterFunc=None): return self.tagcloud.getSortedTagWordList(numWords, filterFunc) def getFileCount(self, tagWord): return self.fileTagCount.get(tagWord, 0)
def createTagCloud(self): self.tagcloud = TagCloud() dirlister = DirFileLister(self.dirname) for fname in dirlister.getFilesForPatternOrLang(pattern=self.pattern, lang=self.lang): self.__addFile(fname)