class LogReg(object): ''' logistic regression ''' def __init__(self, texts, classes, nlpdict=None, scale=1, C=1.0): self.scale = scale self.l = LogisticRegression(penalty='l2', dual=True, C=C, \ class_weight='auto') if nlpdict: self.dictionary = nlpdict else: self.dictionary = NLPDict(texts=texts) vectors = self.dictionary.feature_vectors(texts) self.l.fit(vectors, classes) def classify(self, texts): ''' Classify a list of texts ''' vectors = self.dictionary.feature_vectors(texts) pred_prob = self.l.predict_proba(vectors) predictions = [] for pair in pred_prob: predictions.append(pair[1] - pair[0]) predictions = np.array(predictions) predictions = (predictions + 1) / 2 predictions *= self.scale predictions[predictions > 1] = 1 predictions[predictions < 0] = 0 return predictions
class RFC(object): def __init__(self, texts, classes): self.dictionary = NLPDict(texts=texts) vectors = self.dictionary.feature_vectors(texts) self.nb = MultinomialNB() self.nb.fit(vectors, classes) def classify(self, texts): vectors = self.dictionary.feature_vectors(texts) pred_prob = self.nb.predict_proba(vectors) predictions = [] for pair in pred_prob: predictions.append(pair[1] - pair[0]) predictions = np.array(predictions) predictions = (predictions + 1) / 2 #predictions *= 0.75 predictions[predictions > 1] = 1 predictions[predictions < 0] = 0 return predictions
class SGDC(object): def __init__(self, texts, classes, nlpdict): # TODO: add list of smileys to texts/classes self.s = SGDClassifier(loss="hinge", penalty="l1", shuffle=True, class_weight="auto") if nlpdict: self.dictionary = nlpdict else: self.dictionary = NLPDict(texts=texts) self._train(texts, classes) def _train(self, texts, classes): vectors = self.dictionary.feature_vectors(texts) self.s.fit(vectors, classes) def classify(self, texts): vectors = self.dictionary.feature_vectors(texts) predictions = self.s.decision_function(vectors) predictions = predictions / 20 + 0.5 predictions[predictions > 1] = 1 predictions[predictions < 0] = 0 return predictions
class SVM(object): def __init__(self, texts, classes, nlpdict=None): # TODO: add list of smileys to texts/classes self.svm = svm.LinearSVC(C=1000, class_weight='auto') if nlpdict: self.dictionary = nlpdict else: self.dictionary = NLPDict(texts=texts) self._train(texts, classes) def _train(self, texts, classes): vectors = self.dictionary.feature_vectors(texts) self.svm.fit(vectors, classes) def classify(self, texts): vectors = self.dictionary.feature_vectors(texts) predictions = self.svm.decision_function(vectors) predictions = np.transpose(predictions) predictions = predictions / 2 + 0.5 predictions = map(lambda x: 1 if x>1 else (0 if x<0 else x),predictions) return predictions
class SVM(object): def __init__(self, texts, classes, nlpdict=None): # TODO: add list of smileys to texts/classes self.svm = svm.LinearSVC(C=1000, class_weight='auto') if nlpdict: self.dictionary = nlpdict else: self.dictionary = NLPDict(texts=texts) self._train(texts, classes) def _train(self, texts, classes): vectors = self.dictionary.feature_vectors(texts) self.svm.fit(vectors, classes) def classify(self, texts): vectors = self.dictionary.feature_vectors(texts) predictions = self.svm.decision_function(vectors) predictions = np.transpose(predictions)[0] predictions = predictions / 2 + 0.5 predictions[predictions > 1] = 1 predictions[predictions < 0] = 0 return predictions
class SVM(object): def __init__(self, texts, classes, nlpdict=None): # TODO: add list of smileys to texts/classes self.svm = svm.LinearSVC(C=1000, class_weight="auto") if nlpdict: self.dictionary = nlpdict else: self.dictionary = NLPDict(texts=texts) self._train(texts, classes) def _train(self, texts, classes): vectors = self.dictionary.feature_vectors(texts) self.svm.fit(vectors, classes) def classify(self, texts): vectors = self.dictionary.feature_vectors(texts) predictions = self.svm.decision_function(vectors) predictions = np.transpose(predictions)[0] predictions = predictions / 2 + 0.5 predictions[predictions > 1] = 1 predictions[predictions < 0] = 0 return predictions
class SGDC(object): def __init__(self, texts, classes, nlpdict): # TODO: add list of smileys to texts/classes self.s = SGDClassifier(loss='hinge', penalty='l1', shuffle=True, \ class_weight='auto') if nlpdict: self.dictionary = nlpdict else: self.dictionary = NLPDict(texts=texts) self._train(texts, classes) def _train(self, texts, classes): vectors = self.dictionary.feature_vectors(texts) self.s.fit(vectors, classes) def classify(self, texts): vectors = self.dictionary.feature_vectors(texts) predictions = self.s.decision_function(vectors) predictions = predictions / 20 + 0.5 predictions[predictions > 1] = 1 predictions[predictions < 0] = 0 return predictions