Ejemplo n.º 1
0
def predict(
    query,
    model_path,
    stopwords_path,
    person_name_path,
    place_name_path,
    common_char_path,
    segment_sep,
    domain_sample_path,
    ngram,
    pmi_path,
    entropy_path,
):
    logger.info('model predict')
    # get feature
    feat = Feature(stopwords_path=stopwords_path,
                   person_name_path=person_name_path,
                   place_name_path=place_name_path,
                   common_char_path=common_char_path,
                   segment_sep=segment_sep,
                   domain_sample_path=domain_sample_path,
                   ngram=ngram,
                   pmi_path=pmi_path,
                   entropy_path=entropy_path)
    features, terms = feat.get_feature(query, is_word_segmented=False)
    # predict classification model
    model = load_pkl(model_path)
    logger.debug("model predict")
    label_pred = model.predict(features)
    logger.info("words: %s" % terms)
    logger.info("predict label: %s" % label_pred)
    print("predict label: %s" % label_pred)
    return label_pred
Ejemplo n.º 2
0
def tfidf_word_feature(data_set,
                       is_infer=False,
                       feature_vec_path='',
                       word_vocab=None):
    """
    Get TFIDF ngram feature by word
    """
    if is_infer:
        vectorizer = load_pkl(feature_vec_path)
        data_feature = vectorizer.transform(data_set)
    else:
        vectorizer = TfidfVectorizer(analyzer='word',
                                     vocabulary=word_vocab,
                                     sublinear_tf=True)
        data_feature = vectorizer.fit_transform(data_set)
    vocab = vectorizer.vocabulary_
    logger.debug('vocab size: %d' % len(vocab))
    logger.debug(data_feature.shape)
    # if not self.is_infer:
    save_pkl(vectorizer, feature_vec_path, overwrite=True)
    return data_feature
Ejemplo n.º 3
0
 def _init(self):
     self.model = load_pkl(self.model_path)
     self.inited = True
Ejemplo n.º 4
0
 def check_inited(self):
     if not self.inited:
         self.model = load_pkl(self.model_path)
         logger.debug('Loaded model: {}'.format(self.model_path))
         self.inited = True