class QClassifierImpl: """ A wrapper for question classifier """ def __init__(self, train_data_path, pred_qs = None): """ Constructor """ logging.basicConfig(level = logging.DEBUG, format='%(asctime)s %(filename)s[line:%(lineno)d] %(levelname)s %(message)s', datefmt='%a, %d %b %Y %H:%M:%S', filename='qclassifier.log', filemode='w') reload(sys) sys.setdefaultencoding('utf8') self.clf = None self.path = train_data_path self.pred_qs = pred_qs self.extractor = FeatureExtractor() self.features = None self.labels = None self.vectorizer = None self.cate = ['Person', 'Number', 'Location', 'Other'] def train(self): """ Train use all of the given data """ self.extractor.load(path = self.path) self.features = self.extractor.extract_features() self.labels = self.extractor.get_labels() self.clf = QClassifier(questions = self.extractor.questions) assert(len(self.labels) == len(self.features)) X = self.features Y = self.labels self.vectorizer = FeatureHasher(input_type = 'string', non_negative = True) X = self.vectorizer.transform(X) Y = asarray(Y) logging.info('start training') self.clf.train(X, Y) logging.info('done') def get_type(self, question): """ Get type for a given question """ if not self.features or not self.labels: logging.error('You need to train model first!') return None if not question: logging.error('Question should not be None') return None f = [self.extractor.extract_features_aux(question)] f = self.vectorizer.transform(f) # print self.clf.predict(f) return self.cate[self.clf.predict(f)[0]]
class QClassifierImpl: """ A wrapper for question classifier """ def __init__(self, train_data_path, pred_qs=None): """ Constructor """ logging.basicConfig( level=logging.DEBUG, format= '%(asctime)s %(filename)s[line:%(lineno)d] %(levelname)s %(message)s', datefmt='%a, %d %b %Y %H:%M:%S', filename='qclassifier.log', filemode='w') reload(sys) sys.setdefaultencoding('utf8') self.clf = None self.path = train_data_path self.pred_qs = pred_qs self.extractor = FeatureExtractor() self.features = None self.labels = None self.vectorizer = None self.cate = ['Person', 'Number', 'Location', 'Other'] def train(self): """ Train use all of the given data """ self.extractor.load(path=self.path) self.features = self.extractor.extract_features() self.labels = self.extractor.get_labels() self.clf = QClassifier(questions=self.extractor.questions) assert (len(self.labels) == len(self.features)) X = self.features Y = self.labels self.vectorizer = FeatureHasher(input_type='string', non_negative=True) X = self.vectorizer.transform(X) Y = asarray(Y) logging.info('start training') self.clf.train(X, Y) logging.info('done') def get_type(self, question): """ Get type for a given question """ if not self.features or not self.labels: logging.error('You need to train model first!') return None if not question: logging.error('Question should not be None') return None f = [self.extractor.extract_features_aux(question)] f = self.vectorizer.transform(f) # print self.clf.predict(f) return self.cate[self.clf.predict(f)[0]]