def predict( query, model_path, stopwords_path, person_name_path, place_name_path, common_char_path, segment_sep, domain_sample_path, ngram, pmi_path, entropy_path, ): logger.info('model predict') # get feature feat = Feature(stopwords_path=stopwords_path, person_name_path=person_name_path, place_name_path=place_name_path, common_char_path=common_char_path, segment_sep=segment_sep, domain_sample_path=domain_sample_path, ngram=ngram, pmi_path=pmi_path, entropy_path=entropy_path) features, terms = feat.get_feature(query, is_word_segmented=False) # predict classification model model = load_pkl(model_path) logger.debug("model predict") label_pred = model.predict(features) logger.info("words: %s" % terms) logger.info("predict label: %s" % label_pred) print("predict label: %s" % label_pred) return label_pred
def tfidf_word_feature(data_set, is_infer=False, feature_vec_path='', word_vocab=None): """ Get TFIDF ngram feature by word """ if is_infer: vectorizer = load_pkl(feature_vec_path) data_feature = vectorizer.transform(data_set) else: vectorizer = TfidfVectorizer(analyzer='word', vocabulary=word_vocab, sublinear_tf=True) data_feature = vectorizer.fit_transform(data_set) vocab = vectorizer.vocabulary_ logger.debug('vocab size: %d' % len(vocab)) logger.debug(data_feature.shape) # if not self.is_infer: save_pkl(vectorizer, feature_vec_path, overwrite=True) return data_feature
def _init(self): self.model = load_pkl(self.model_path) self.inited = True
def check_inited(self): if not self.inited: self.model = load_pkl(self.model_path) logger.debug('Loaded model: {}'.format(self.model_path)) self.inited = True