def __init__(self):
     self.savedCategories = None
     self._corpus = ""
     self._features = []
     self._classifierIndex = None
     self._mins = {}
     self.__featureExtractor = FeatureExtractor()
class Classifier(object):
    
    def __init__(self):
        self.savedCategories = None
        self._corpus = ""
        self._features = []
        self._classifierIndex = None
        self._mins = {}
        self.__featureExtractor = FeatureExtractor()
    
    def _getCorpusShort(self):
        return self._corpus[:50] if self._corpus else ""
    
    def _getGroupedCategories(self, categories):
        groupedCategories = {}
        for category in categories :
            if not groupedCategories.get(category.categoryName) :
                groupedCategories[category.categoryName] = {}
            groupedCategories[category.categoryName][category.yes] = category 
        return groupedCategories
    
    def setFeatureExtractor(self, featureExtractor):
        self.__featureExtractor = featureExtractor
    
    def setMinThreshold(self, categoryName, yes, value):
        if not self._mins.get(categoryName) :
            self._mins[categoryName] = {}
        self._mins[categoryName][yes] = value
        
    def getMinThreshold(self, categoryName, yes):
        return self._mins.get(categoryName, {}).get(yes, .6)
        
    '''
    Classification of text corpus
    '''
    def classify(self, corpus):
        self._corpus = corpus
        self._features = self.__featureExtractor.getFeatures(corpus)
        self._classifierIndex = ClassifierIndex(self._features)
    
        # Find the category with the highest probability
        logger = logging.getLogger("Classifier.classify")
        
        categories = ClassifierCategory.getAllCategories()
        
        groupedCategories = self._getGroupedCategories(categories)
        probableTags = []
        try :
            probableTags = self._getProbableTags(groupedCategories)
        except Exception, ex :
            logger.exception("classification failure:  " + str(ex))

        return probableTags