def train(spam_words, unlabeled_words): spams = list(map(features, spam_words)) unlabeled = list(map(features, unlabeled_words)) model = PositiveNaiveBayesClassifier.train(spams, unlabeled, 0.5) data = PickleData('bayesmodel.pickle') data.write(model) return model
def predictor(): data = PickleData('bayesmodel.pickle') if data.exists: model = data.read() else: model = retrain() def classify(word): pb = model.prob_classify(features(word)) return pb.prob(1) > 0.95 return classify
def sentiment_classifier(): data = PickleData('sentiments.pickle') if data.exists: model = data.read() else: model = train_sentiments_classifier() from snownlp import SnowNLP def classify(word): nlp = SnowNLP(word) neu = 1 if (estimate_neu(nlp.tags)>0.6) else 0 senti = nlp.sentiments - 0.5 prob = model.prob_classify(features(word)) prob_senti = prob.prob(1) - prob.prob(-1) if neu: return 0 else: fs = (prob_senti + senti)/3 if fs >= 0.25: return 1 elif fs <= -0.25: return -1 else: return 0 return classify
def train_sentiments_classifier(): pairs = PairData('sentiments.txt', 'utf8') model = NaiveBayesClassifier.train(pairs) data = PickleData('sentiments.pickle') data.write(model) return model