예제 #1
0
 def get_prediction(array):
     lonzo_list = []
     for i in range(25):
         linSVC = LinearSVC()
         linSVC.fit(data, feat)
         result = linSVC._predict_proba_lr(array)
         lonzo_list.append(list(result[0]))
     avg = [float(sum(col)) / len(col) for col in zip(*lonzo_list)]
     return avg
예제 #2
0
    def test_14_linearsvc(self):
        print("\ntest 14 (LinearSVC with preprocessing) [binary-class]\n")
        X, X_test, y, features, target, test_file = self.data_utility.get_data_for_binary_classification()

        model = LinearSVC()
        pipeline_obj = Pipeline([
            ("model", model)
        ])
        pipeline_obj.fit(X,y)
        file_name = 'test14sklearn.pmml'
        
        skl_to_pmml(pipeline_obj, features, target, file_name)
        model_name  = self.adapa_utility.upload_to_zserver(file_name)
        predictions, probabilities = self.adapa_utility.score_in_zserver(model_name, test_file)
        model_pred = pipeline_obj.predict(X_test)
        model_prob = model._predict_proba_lr(X_test)
        self.assertEqual(self.adapa_utility.compare_predictions(predictions, model_pred), True)
        self.assertEqual(self.adapa_utility.compare_probability(probabilities, model_prob), True)
예제 #3
0
def cross_eval():
    aawd_train_x, aawd_train_y, _, _ = get_aawd_binary_train_dev()
    threshold = 0.8

    def translate_label(prob):
        return prob > threshold

    train_x = aawd_train_x
    train_y = aawd_train_y
    feature_extractor = svm.NGramFeature(False, 4)
    # _, _, argu_ana_dev_x, argu_ana_dev_y = get_data()
    X_train_counts = feature_extractor.fit_transform(train_x)
    svclassifier = LinearSVC()
    svclassifier.fit(X_train_counts, train_y)

    eval_x = train_x
    eval_y = train_y
    x_test_count = feature_extractor.transform(eval_x)
    test_pred = svclassifier._predict_proba_lr(x_test_count)
    disagree_prob = test_pred[:, 1]
    test_pred = lmap(translate_label, disagree_prob)
    result = accuracy(test_pred, eval_y)
    print(result)
예제 #4
0
##############################
##########   SVM   ###########
##############################
##############################
from sklearn.svm import LinearSVC, SVC
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
import pandas as pd
# import matplotlib.pyplot as plt
iris = load_iris()
X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target)

svc = LinearSVC()
svc.fit(X_train, y_train)
svc.score(X_test, y_test)
result = svc._predict_proba_lr(X_test)
result1 = svc.decision_function(X_test)
result_compare=pd.DataFrame(result).join(pd.DataFrame(y_test, columns=['actual']))
result_decision_compare=pd.DataFrame(result1).join(pd.DataFrame(y_test, columns=['actual']))

test_1 = X_test[0]
###CALCULATE RAW MODEL OUTPUT
svc.coef_@test_1+svc.intercept_
svc._predict_proba_lr(X_test)[0]
y_test[0]



svm = SVC(kernel='linear')
svm.fit(X_train, y_train)
svm.score(X_test, y_test)
                index=None,
                encoding='utf8')
print('MultinomialNB特征已保存\n')

############################ Linersvc(LinerSVC) ################################
print('LinerSVC stacking')
stack_train = np.zeros((len(train), number))
stack_test = np.zeros((len(test), number))
score_va = 0

for i, (tr, va) in enumerate(
        StratifiedKFold(score, n_folds=n_folds, random_state=1017)):
    print('stack:%d/%d' % ((i + 1), n_folds))
    lsvc = LinearSVC(random_state=1017)
    lsvc.fit(train_feature[tr], score[tr])
    score_va = lsvc._predict_proba_lr(train_feature[va])
    score_te = lsvc._predict_proba_lr(test_feature)
    print(score_va)
    print('得分' +
          str(mean_squared_error(score[va], lsvc.predict(train_feature[va]))))
    stack_train[va] += score_va
    stack_test += score_te
stack_test /= n_folds
stack = np.vstack([stack_train, stack_test])
df_stack = pd.DataFrame()
for i in range(stack.shape[1]):
    df_stack['tfidf_lsvc_classfiy_{}'.format(i)] = np.around(stack[:, i], 6)
df_stack.to_csv('feature/tfidf_lsvc_2_error_single_classfiy.csv',
                index=None,
                encoding='utf8')
print('LSVC特征已保存\n')
                             strip_accents='unicode',
                             norm='l2')


X_train = vectorizer.fit_transform(train_nyt_data)
X_test = vectorizer.transform(test_nyt_data1)

from sklearn.decomposition import TruncatedSVD

svd = TruncatedSVD(n_components = 500)
X_train = svd.fit_transform(X_train)
X_test = svd.transform(X_test)

svm_classifier = LinearSVC(class_weight={1:8.3, 0:10}).fit(X_train, ytrain)

proba = svm_classifier._predict_proba_lr(X_test)

print(proba)

for i in proba:
    print(i)

ytest = lb.transform(y_test)

y_svm_predicted = svm_classifier.predict(X_test)
TP0 = 0
TP1 = 0
for i in range(0, len(proba)) :
    if ytest[i] == 0 and  y_svm_predicted[i] == 0:
        TP0 =TP0 +1
    if ytest[i] == 1 and  y_svm_predicted[i] == 1:
    # show the plot
    plt.savefig("Logistic ROC.png")

    #SVC Model 
    print("-------- Started SVC Modeling ----")
    svc = LinearSVC()
    svc.fit(Xtrain, ytrain)
    svc_predict = svc.predict(Xtest)
    cnf_mtx(ytest, svc_predict, "SVC-Confusion Matrix.png")
  

    print("ROC AUC score: %.3f " % roc_auc_score(ytest, svc_predict)) #ROC
    print(classification_report(ytest, svc_predict))
    print("Accuracy: %.3f" % accuracy_score(ytest, svc_predict))
    ns_probs = [0 for _ in range(len(ytest))]
    svc_probs = svc._predict_proba_lr(Xtest)
    # keep probabilities for the positive outcome only
    svc_probs = svc_probs[:, 1]
    # calculate scores
    ns_auc = roc_auc_score(ytest, ns_probs)
    svc_auc = roc_auc_score(ytest, svc_probs)
    # summarize scores
    print('No Skill: ROC AUC=%.3f' % (ns_auc))
    print('SVC: ROC AUC=%.3f' % (svc_auc))
    # calculate roc curves
    ns_fpr, ns_tpr, _ = roc_curve(ytest, ns_probs,)
    svc_fpr, svc_tpr, _ = roc_curve(ytest, svc_probs)
    # plot the roc curve for the model
    plt.clf() # clear and new graph
    plt.plot(ns_fpr, ns_tpr, linestyle='--', label='No Skill')
    plt.plot(svc_fpr, svc_tpr, marker='.', label='SVC')