예제 #1
0
class Fasttext_clf(BaseEstimator, ClassifierMixin):
    data_path = pkg_resources.resource_filename('addr_detector.model', 'ft_ad.ftz')
    def __init__(self, path=data_path):
        self.model = FastText(path)
        self.default = '0'

    def fit(self, X, y):
        return self

    def predict(self, X):
        results = []
        if isinstance(X, str):  #
            res=self.model.predict_single(X)
            results = results + [self.default if not res  else res]
        elif isinstance(X, list):
           # X=[(x) for x in X]
            res = self.model.predict(X)
            results = results + self.model.predict(X)
        return results

    def predict_proba(self, X):
        results = []
        if isinstance(X, str):  #
            results = results + [self.model.predict_proba_single(X)]
        elif isinstance(X, list):
            #X=[(x+'\n') for x in X]
            results = results + self.model.predict_proba(X)
        return results
class FastTextClassifier:
    def __init__(self):
        self.classifier = None
        self.model_path = str(model_dir / 'classifier.model')
        self.preprocessor = PreprocessData()
        self.load_model()

    def load_model(self, ):
        if os.path.exists(self.model_path):
            self.classifier = FastText(self.model_path)
        else:
            self.train_model()
            print("no such model, model now")

    def set_model_path(self, new_path):
        self.model_path = new_path

    def train_model(self):
        classifier = FastText.supervised(input=train_data_path,
                                         output=model_dir,
                                         lr=0.25,
                                         ws=4)
        classifier.save_model(self.model_path)
        self.classifier = classifier
        print("test result in training data:")
        result = classifier.test(train_data_path)
        print(result)
        print("test result in testing data:")
        result = classifier.test(test_data_path)
        print(result)

    def predict(self, text):
        """
        :param text: a str query
        :return: predicted label of the input sentence
        """
        rmsign_text = self.preprocessor.remove_sign(text)
        pre_data = self.preprocessor.remove_stop_words(rmsign_text)
        if len(text.split()) <= 2:
            b = ('0', )
            return b
        label = self.classifier.predict_single(pre_data)

        try:
            return label[0]
        except:
            b = ('0', )
            return b