Ejemplo n.º 1
0
class Model:
    def __init__(self, name):
        self.name = name
        self.construct()

    def construct(self):
        if self.name == "linear":
            self.model = LinearSVC()
        elif self.name == "logistic":
            # self.model = LogisticRegression(solver='lbfgs',multi_class='multinomial', n_jobs=-1)
            self.model = LogisticRegression()
        elif self.name == "cnn":
            self.model = CNN()
        elif self.name == "bayes":
            self.model = MultinomialNB()

    def predict(self, X):
        return self.model.predict(X)

    def predict_proba(self, X):
        if self.name == "linear":
            return self.model.decision_function(X)
        elif self.name == "logistic":
            return self.model.predict_proba(X)
        elif self.name == "cnn":
            return self.model.decision_function(X)
        elif self.name == "bayes":
            return self.model.predict_proba(X)

    def fit(self, X, Y):
        self.model.fit(X, Y)
Ejemplo n.º 2
0
def test(method, train_data, test_data):
    train_text, train_label, train_votes = function.data2vec(train_data)
    test_text, test_label, test_votes = function.data2vec(test_data)
    # train
    vec_model = CountVectorizer()
    train_text_vec = vec_model.fit_transform(train_text)
    if method == 'naive_bayes':
        cls = MultinomialNB()
    else:
        cls = LinearSVC()
    cls.fit(train_text_vec, train_label)
    # predict label
    test_text_vec = vec_model.transform(test_text)
    predicted_label = cls.predict(test_text_vec)
    print(
        classification_report(test_label,
                              predicted_label,
                              digits=4,
                              target_names=EMOTIONS_LIST,
                              zero_division=0))
    # predict prob for coefficient
    if method == 'naive_bayes':
        predicted_votes = cls.predict_proba(test_text_vec)
    else:
        predicted_votes = cls.decision_function(test_text_vec)
    coefficient = 0
    for p_vote, t_vote in zip(predicted_votes, test_votes):
        coefficient += pearsonr(p_vote, t_vote)[0]
    coefficient /= len(predicted_votes)
    print(f"coefficient = {coefficient:.4f}")
df_cm = pd.DataFrame(conf_matrix, index=class_names, columns=class_names )
plt.figure(figsize=(5, 5))
hm = sns.heatmap(df_cm, cbar=False, annot=True, square=True, fmt='d', annot_kws={'size': 20},
                 yticklabels=df_cm.columns, xticklabels=df_cm.columns)
hm.yaxis.set_ticklabels(hm.yaxis.get_ticklabels(), rotation=0, ha='right', fontsize=10)
hm.xaxis.set_ticklabels(hm.xaxis.get_ticklabels(), rotation=0, ha='right', fontsize=10)
plt.ylabel('True label', fontsize=20)
plt.xlabel('Predicted label', fontsize=20)
# Show heat map
plt.tight_layout()
plt.show()
# -----------------------------------------------------------------------
# Plot ROC Area Under Curve

y_predict_probability = clf.decision_function(X_test)
fpr, tpr, _ = roc_curve(y_test,  y_predict_probability)
auc = roc_auc_score(y_test, y_predict_probability)

# print(fpr)
# print(tpr)
# print(auc)

plt.figure()
lw = 2
plt.plot(fpr, tpr, color='darkorange',
         lw=lw, label='ROC curve (area = %0.2f)' % auc)
plt.plot([0, 1], [0, 1], color='navy', lw=lw, linestyle='--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')