class Fasttext_clf(BaseEstimator, ClassifierMixin): data_path = pkg_resources.resource_filename('addr_detector.model', 'ft_ad.ftz') def __init__(self, path=data_path): self.model = FastText(path) self.default = '0' def fit(self, X, y): return self def predict(self, X): results = [] if isinstance(X, str): # res=self.model.predict_single(X) results = results + [self.default if not res else res] elif isinstance(X, list): # X=[(x) for x in X] res = self.model.predict(X) results = results + self.model.predict(X) return results def predict_proba(self, X): results = [] if isinstance(X, str): # results = results + [self.model.predict_proba_single(X)] elif isinstance(X, list): #X=[(x+'\n') for x in X] results = results + self.model.predict_proba(X) return results
class FastTextClassifier: def __init__(self): self.classifier = None self.model_path = str(model_dir / 'classifier.model') self.preprocessor = PreprocessData() self.load_model() def load_model(self, ): if os.path.exists(self.model_path): self.classifier = FastText(self.model_path) else: self.train_model() print("no such model, model now") def set_model_path(self, new_path): self.model_path = new_path def train_model(self): classifier = FastText.supervised(input=train_data_path, output=model_dir, lr=0.25, ws=4) classifier.save_model(self.model_path) self.classifier = classifier print("test result in training data:") result = classifier.test(train_data_path) print(result) print("test result in testing data:") result = classifier.test(test_data_path) print(result) def predict(self, text): """ :param text: a str query :return: predicted label of the input sentence """ rmsign_text = self.preprocessor.remove_sign(text) pre_data = self.preprocessor.remove_stop_words(rmsign_text) if len(text.split()) <= 2: b = ('0', ) return b label = self.classifier.predict_single(pre_data) try: return label[0] except: b = ('0', ) return b