class SourceCodeTagCloud(object):

    '''
    wrapper class for generating the data for source code tag cloud
    '''

    def __init__(self, dirname, pattern='*.c', lang=None):
        self.dirname = dirname
        self.pattern = pattern
        self.lang = lang
        self.tagcloud = None  # Stores frequency of tag cloud.
        # Store information about how many files that tag was found
        self.fileTagCount = dict()
        self.createTagCloud()

    def createTagCloud(self):
        self.tagcloud = TagCloud()

        dirlister = DirFileLister(self.dirname)
        for fname in dirlister.getFilesForPatternOrLang(pattern=self.pattern, lang=self.lang):
            self.__addFile(fname)

    def __addFile(self, srcfile):
        assert(self.tagcloud != None)
        print "Adding tags information of file: %s" % srcfile
        tokenizer = TagCloudTokenizer(srcfile)
        fileTokenset = set()
        for srctoken in tokenizer:
            value = srctoken.value
            self.tagcloud.addWord(value, srctoken.ttype)
            if value not in fileTokenset:
                self.fileTagCount[value] = self.fileTagCount.get(value, 0) + 1
                fileTokenset.add(value)

    def getTags(self, numWords=100, filterFunc=None):
        return self.tagcloud.getSortedTagWordList(numWords, filterFunc)

    def getFileCount(self, tagWord):
        return self.fileTagCount.get(tagWord, 0)
    def createTagCloud(self):
        self.tagcloud = TagCloud()

        dirlister = DirFileLister(self.dirname)
        for fname in dirlister.getFilesForPatternOrLang(pattern=self.pattern, lang=self.lang):
            self.__addFile(fname)