def get_prediction(array): lonzo_list = [] for i in range(25): linSVC = LinearSVC() linSVC.fit(data, feat) result = linSVC._predict_proba_lr(array) lonzo_list.append(list(result[0])) avg = [float(sum(col)) / len(col) for col in zip(*lonzo_list)] return avg
def test_14_linearsvc(self): print("\ntest 14 (LinearSVC with preprocessing) [binary-class]\n") X, X_test, y, features, target, test_file = self.data_utility.get_data_for_binary_classification() model = LinearSVC() pipeline_obj = Pipeline([ ("model", model) ]) pipeline_obj.fit(X,y) file_name = 'test14sklearn.pmml' skl_to_pmml(pipeline_obj, features, target, file_name) model_name = self.adapa_utility.upload_to_zserver(file_name) predictions, probabilities = self.adapa_utility.score_in_zserver(model_name, test_file) model_pred = pipeline_obj.predict(X_test) model_prob = model._predict_proba_lr(X_test) self.assertEqual(self.adapa_utility.compare_predictions(predictions, model_pred), True) self.assertEqual(self.adapa_utility.compare_probability(probabilities, model_prob), True)
def cross_eval(): aawd_train_x, aawd_train_y, _, _ = get_aawd_binary_train_dev() threshold = 0.8 def translate_label(prob): return prob > threshold train_x = aawd_train_x train_y = aawd_train_y feature_extractor = svm.NGramFeature(False, 4) # _, _, argu_ana_dev_x, argu_ana_dev_y = get_data() X_train_counts = feature_extractor.fit_transform(train_x) svclassifier = LinearSVC() svclassifier.fit(X_train_counts, train_y) eval_x = train_x eval_y = train_y x_test_count = feature_extractor.transform(eval_x) test_pred = svclassifier._predict_proba_lr(x_test_count) disagree_prob = test_pred[:, 1] test_pred = lmap(translate_label, disagree_prob) result = accuracy(test_pred, eval_y) print(result)
############################## ########## SVM ########### ############################## ############################## from sklearn.svm import LinearSVC, SVC from sklearn.datasets import load_iris from sklearn.model_selection import train_test_split import pandas as pd # import matplotlib.pyplot as plt iris = load_iris() X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target) svc = LinearSVC() svc.fit(X_train, y_train) svc.score(X_test, y_test) result = svc._predict_proba_lr(X_test) result1 = svc.decision_function(X_test) result_compare=pd.DataFrame(result).join(pd.DataFrame(y_test, columns=['actual'])) result_decision_compare=pd.DataFrame(result1).join(pd.DataFrame(y_test, columns=['actual'])) test_1 = X_test[0] ###CALCULATE RAW MODEL OUTPUT svc.coef_@test_1+svc.intercept_ svc._predict_proba_lr(X_test)[0] y_test[0] svm = SVC(kernel='linear') svm.fit(X_train, y_train) svm.score(X_test, y_test)
index=None, encoding='utf8') print('MultinomialNB特征已保存\n') ############################ Linersvc(LinerSVC) ################################ print('LinerSVC stacking') stack_train = np.zeros((len(train), number)) stack_test = np.zeros((len(test), number)) score_va = 0 for i, (tr, va) in enumerate( StratifiedKFold(score, n_folds=n_folds, random_state=1017)): print('stack:%d/%d' % ((i + 1), n_folds)) lsvc = LinearSVC(random_state=1017) lsvc.fit(train_feature[tr], score[tr]) score_va = lsvc._predict_proba_lr(train_feature[va]) score_te = lsvc._predict_proba_lr(test_feature) print(score_va) print('得分' + str(mean_squared_error(score[va], lsvc.predict(train_feature[va])))) stack_train[va] += score_va stack_test += score_te stack_test /= n_folds stack = np.vstack([stack_train, stack_test]) df_stack = pd.DataFrame() for i in range(stack.shape[1]): df_stack['tfidf_lsvc_classfiy_{}'.format(i)] = np.around(stack[:, i], 6) df_stack.to_csv('feature/tfidf_lsvc_2_error_single_classfiy.csv', index=None, encoding='utf8') print('LSVC特征已保存\n')
strip_accents='unicode', norm='l2') X_train = vectorizer.fit_transform(train_nyt_data) X_test = vectorizer.transform(test_nyt_data1) from sklearn.decomposition import TruncatedSVD svd = TruncatedSVD(n_components = 500) X_train = svd.fit_transform(X_train) X_test = svd.transform(X_test) svm_classifier = LinearSVC(class_weight={1:8.3, 0:10}).fit(X_train, ytrain) proba = svm_classifier._predict_proba_lr(X_test) print(proba) for i in proba: print(i) ytest = lb.transform(y_test) y_svm_predicted = svm_classifier.predict(X_test) TP0 = 0 TP1 = 0 for i in range(0, len(proba)) : if ytest[i] == 0 and y_svm_predicted[i] == 0: TP0 =TP0 +1 if ytest[i] == 1 and y_svm_predicted[i] == 1:
# show the plot plt.savefig("Logistic ROC.png") #SVC Model print("-------- Started SVC Modeling ----") svc = LinearSVC() svc.fit(Xtrain, ytrain) svc_predict = svc.predict(Xtest) cnf_mtx(ytest, svc_predict, "SVC-Confusion Matrix.png") print("ROC AUC score: %.3f " % roc_auc_score(ytest, svc_predict)) #ROC print(classification_report(ytest, svc_predict)) print("Accuracy: %.3f" % accuracy_score(ytest, svc_predict)) ns_probs = [0 for _ in range(len(ytest))] svc_probs = svc._predict_proba_lr(Xtest) # keep probabilities for the positive outcome only svc_probs = svc_probs[:, 1] # calculate scores ns_auc = roc_auc_score(ytest, ns_probs) svc_auc = roc_auc_score(ytest, svc_probs) # summarize scores print('No Skill: ROC AUC=%.3f' % (ns_auc)) print('SVC: ROC AUC=%.3f' % (svc_auc)) # calculate roc curves ns_fpr, ns_tpr, _ = roc_curve(ytest, ns_probs,) svc_fpr, svc_tpr, _ = roc_curve(ytest, svc_probs) # plot the roc curve for the model plt.clf() # clear and new graph plt.plot(ns_fpr, ns_tpr, linestyle='--', label='No Skill') plt.plot(svc_fpr, svc_tpr, marker='.', label='SVC')