def __init__(self, stop_words_file, related_training_data_file, awareness_training_data_file, needs_training, related_classifier_dump_file, awareness_classifier_dump_file, feature_list_file, classifier_type='nb'): self.helper = ClassifierHelper() self.stop_words = self.init_stop_words(stop_words_file) self.feature_list = [] if needs_training: self.related_classifier = self.train_classifier( related_training_data_file, related_classifier_dump_file, feature_list_file, classifier_type) self.awareness_classifier = self.train_classifier( awareness_training_data_file, awareness_classifier_dump_file, feature_list_file, classifier_type) else: with open(related_classifier_dump_file, 'rb') as f: self.related_classifier = pickle.load(f) with open(awareness_classifier_dump_file, 'rb') as f: self.awareness_classifier = pickle.load(f) with open(feature_list_file, 'r') as f: for token in f: self.feature_list.append(token.strip())
def __init__(self, trainingDataFile, classifierDumpFile, datadir): # Instantiate classifier helper self.helper = ClassifierHelper('%s/%s' % (datadir, 'feature_list.txt'), '%s/%s' % (datadir, 'stop_words.txt')) self.trainingDataFile = trainingDataFile self.classifierPickled = classifierDumpFile self.last_trained = None self.classifier = self._getClassifier()