def gnb_estimators_growing(clf, x_test, y_test, x_train, y_train): penalty = [2**i for i in range(-5, 15, 3)] gamma = [2**i for i in range(-15, 3, 2)] err_train = [] err_test = [] clf.C = 1.0 for n_gamma in gamma: print('For n_gamma:', n_gamma) clf.gamma = n_gamma for n_penalty in penalty: print('For n_penalty:', n_penalty) clf.C = n_penalty clf.fit(x_train, y_train) err_train.append(hamming_loss(y_train, clf.predict(x_train))) err_test.append(hamming_loss(y_test, clf.predict(x_test))) plt.plot(penalty, err_train, color="blue",label="train") plt.plot(penalty, err_test, color="red",label="test") plt.xlabel("Penalty") plt.ylabel("Error") plt.legend(loc="upper right",fancybox=True); plt.show() err_train = [] err_test = [] print('Growing algorithm ended')
def compare_manual_vs_model(): with open(DATA_FOLDER + "labels_int.p", "r") as f: y_dict = pickle.load(f) print "Loading test data" X_test, y_test, filenames_test = dataset.load_test() y_pred = joblib.load("../models/pred_ml_improved.pkl") relevant = [] for pred, correct, filename in zip(y_pred, y_test, filenames_test): if filename in FILES: relevant.append((pred, correct, filename, CLASSIFICATIONS[filename])) model_predictions, correct, filename, manual_predictions = zip(*relevant) manual_predictions = learn.multilabel_binary_y(manual_predictions) model_predictions = np.array(model_predictions) correct = learn.multilabel_binary_y(correct) rules = infer_topology.infer_topology_rules() improved_manual = infer_topology.apply_topology_rules(rules, manual_predictions) prediction_names = ["MODEL", "MANUAL", "IMPROVED_MANUAL"] predictions = [model_predictions, manual_predictions, improved_manual] for name, pred in zip(prediction_names, predictions): print "\n{}\n--".format(name) print "Zero-one classification loss", zero_one_loss(correct, pred) print "Hamming loss", hamming_loss(correct, pred) print "Precision:", precision_score(correct, pred, average="weighted", labels=label_list) print "Recall :", recall_score(correct, pred, average="weighted", labels=label_list) print "F1 score :", f1_score(correct, pred, average="weighted", labels=label_list)
def report_dataset(X, y_true, title): y_proba = model.predict_proba(X, batch_size=batch_size) # multi-label classes with default threshold y_pred = y_proba >= 0.5 print(title + ' accuracy (exatch match):', accuracy_score(y_true, y_pred)) print(title + ' hamming score (non-exatch match):', 1 - hamming_loss(y_true, y_pred)) print(title + 'AUC:', roc_auc_score(y_true.flatten(), y_proba.flatten()))
def calculate_result(actual,pred): m_precision = metrics.precision_score(actual,pred) m_recall = metrics.recall_score(actual,pred) print 'Hamming_loss:{0:.3f}'.format(hamming_loss(actual, pred, classes=None)) print 'Precision:{0:.3f}'.format(m_precision) print 'Recall:{0:0.3f}'.format(m_recall) print 'F1-score:{0:.3f}'.format(metrics.f1_score(actual,pred,average='micro'))
def svmDesc(lab_pred,lab_test, title='Confusion matrix', cmap=plot.cm.Blues,taskLabels=taskLabels,normal=True): #build confussion matrix itself conM = confusion_matrix(lab_test, lab_pred) if normal== True: conM = conM.astype('float') / conM.sum(axis=1)[:, np.newaxis] #build heatmap graph of matrix plot.imshow(conM, interpolation='nearest', cmap=cmap) plot.title(title) plot.colorbar() tick_marks = np.arange(len(taskLabels)) plot.xticks(tick_marks, taskLabels, rotation=45) plot.yticks(tick_marks, taskLabels) plot.tight_layout() plot.ylabel('True label') plot.xlabel('Predicted label') #classification report creport = classification_report(lab_test,lab_pred) print "CLASSIFICATION REPORT: " print creport #hamming distance hamming = hamming_loss(lab_test,lab_pred) print "HAMMING DISTANCE: %s" % str(hamming) #jaccard similarity score jaccard = jaccard_similarity_score(lab_test,lab_pred) print "JACCARD SIMILARITY SCORE: %s" % str(jaccard) #precision score pscore = precision_score(lab_test,lab_pred) print "PRECISION SCORE: %s" % str(pscore)
def train_and_eval(x_train, y_train, x_test, y_test, model, param_result): print("\nTraining and evaluating...") for result_list in param_result: print("Fitting: " + str(result_list[2])) opt_model = result_list[2] opt_model.fit(x_train, y_train) y_pred = opt_model.predict(x_test) print("\nClassification Report:") print(metrics.classification_report(y_test, y_pred)) print("\nAccuracy Score:") print(metrics.accuracy_score(y_test, y_pred)) print("\nConfusion Matrix:") print(metrics.confusion_matrix(y_test, y_pred)) print("\nF1-Score:") print(metrics.f1_score(y_test, y_pred)) print("\nHamming Loss:") print(metrics.hamming_loss(y_test, y_pred)) print("\nJaccard Similarity:") print(metrics.jaccard_similarity_score(y_test, y_pred)) # vvv Not supported due to ValueError: y_true and y_pred have different number of classes 3, 2 # print('\nLog Loss:') # print(metrics.log_loss(y_test, y_pred)) # vvv multiclass not supported # print('\nMatthews Correlation Coefficient:') # print(metrics.matthews_corrcoef(y_test, y_pred)) print("\nPrecision:") print(metrics.precision_score(y_test, y_pred)) # vvv Not supported due to ValueError: y_true and y_pred have different number of classes 3, 2 # print('\nRecall:') # print(metrics.recall(y_test, y_pred)) print()
def err(k): #tmp=Ysub[k,:].dot(proj1000T) tmp=Ysub[k,:].dot(proj) pred=(tmp>0.5).astype(int) #return absolute num incorrect labels per sample (hamming loss is normalized by #cols) #return metrics.hamming_loss(Ytrunc[k,:].todense(),pred)*42048 return metrics.hamming_loss(y_testBin[k,:],pred)*42048
def hamming_loss(self, classifier_name, context, information, pattern_kind): from sklearn.metrics import hamming_loss self.measures[classifier_name]["hamming_loss"] = \ hamming_loss( context["patterns"].patterns[classifier_name][pattern_kind], information.info[classifier_name]["discretized_outputs"][pattern_kind])
def test_calc_hamming_loss(self): labels_true = [s['label'] for s in self.samples] data = [s['info'] for s in self.samples] labels_pred = self.model.predict(data) loss = hamming_loss(labels_true, labels_pred) self.assertLess(loss, self.BASELINE_LOSS)
def label_based_measures(y_true, y_pred): """ Evaluation measures used to assess the predictive performance in multi-label label-based learning: hamming_loss, precision, recall and f1 """ m = {} m['hamming_accuracy'] = 1 - hamming_loss(y_true, y_pred) m['precision'], m['recall'], m['f1'], _ = precision_recall_fscore_support(y_true, y_pred) return m
def randomClassifyClasses(self, ticketsToClasses): #conditionally classify something correctly as a class. #we need labeled data with the classes changed in that commit tickets = [(ticket, classes) for ticket, classes in ticketsToClasses.items()] random.shuffle(tickets) trainIndex = int(len(tickets) * .8) trainTickets = tickets[:trainIndex] testTickets = tickets[trainIndex:] print(len(tickets)) print(trainIndex) trainText = np.array([ticket[0].summary for ticket in trainTickets]) trainLabels = np.array([ticket[1] for ticket in trainTickets]) testText = np.array([ticket[0].summary for ticket in testTickets]) testLabels = np.array([ticket[1] for ticket in testTickets]) ticketLabels = [ticket[1] for ticket in tickets] target_names = list(set([label for labelList in ticketLabels for label in labelList])) print ("Total of %d labels, so %.5f *x accuracy is baseline" % (len(target_names), (1.0 / (len(target_names) * 1.0)))) lb = preprocessing.LabelBinarizer() Y = lb.fit_transform(trainLabels) #dv = DictVectorizer() classifier = Pipeline([ ('hash', HashingVectorizer()), ('tfidf', TfidfTransformer()), ('clf', OneVsRestClassifier(LinearSVC()))]) classifier.fit(trainText, Y) #predicted = classifier.predict(testText) predictedLabels = [] numLabels = len(lb.classes_) for i in range(0, len(testTickets)): labelList = [lb.classes_[random.randrange(0, numLabels - 1)] for j in range(0, random.randrange(0, numLabels))] predictedLabels.append(labelList) predictedLabels = np.array(predictedLabels) fpredictedLabels = [pred for pred in predictedLabels if len(pred) != 0] ftestLabels = [testLabels[i] for i in range(0, len(testLabels)) if len(predictedLabels[i]) != 0] ftestText = [testText[i] for i in range(0, len(testLabels)) if len(predictedLabels[i]) != 0] print("original: %d filtered %d" % (len(predictedLabels), len(fpredictedLabels))) for i in range(0, len(predictedLabels)): if len(predictedLabels[i]) == 0: print(i) for item, plabels, alabels in zip(ftestText, fpredictedLabels, ftestLabels): print ('TICKET: \n%s PREDICTED => \n\t\t%s' % (item, ', '.join(plabels))) print ('\n\t\ttACTUAL => \n\t\t%s' % ', '.join(alabels)) #classification_report(testLabels, predictedLabels) f1Score = f1_score(ftestLabels, fpredictedLabels) precision = precision_score(ftestLabels, fpredictedLabels) accuracy = accuracy_score(ftestLabels, fpredictedLabels) recall = recall_score(ftestLabels, fpredictedLabels) hamming = hamming_loss(ftestLabels, fpredictedLabels) self.classifier = classifier return (precision, recall, accuracy, f1Score, hamming)
def rf_estimators_growing(clf, x_test, y_test, x_train, y_train): estimators = [i for i in range(145, 150, 1)]#[1, 2, 3, 4, 5, 10, 20, 30, 40, 50]# + \ #[i for i in range(100, 1000, 100)]# + \ # [i for i in range(1000, 6000, 1000)] err_train = [] err_test = [] clf.max_features = 5 for n_estimators in estimators: print('For n_estimators:', n_estimators) clf.n_estimators = n_estimators clf.fit(x_train, y_train) err_train.append(hamming_loss(y_train, clf.predict(x_train))) err_test.append(hamming_loss(y_test, clf.predict(x_test))) plt.plot(estimators, err_test, 'r-') plt.show() print('Growing algorithm ended')
def test_losses(): """Test loss functions""" y_true, y_pred, _ = make_prediction(binary=True) n_samples = y_true.shape[0] n_classes = np.size(unique_labels(y_true)) # Classification # -------------- with warnings.catch_warnings(True): # Throw deprecated warning assert_equal(zero_one(y_true, y_pred), 13) assert_almost_equal(zero_one(y_true, y_pred, normalize=True), 13 / float(n_samples), 2) assert_almost_equal(zero_one_loss(y_true, y_pred), 13 / float(n_samples), 2) assert_equal(zero_one_loss(y_true, y_pred, normalize=False), 13) assert_almost_equal(zero_one_loss(y_true, y_true), 0.0, 2) assert_almost_equal(zero_one_loss(y_true, y_true, normalize=False), 0, 2) assert_almost_equal(hamming_loss(y_true, y_pred), 2 * 13. / (n_samples * n_classes), 2) assert_equal(accuracy_score(y_true, y_pred), 1 - zero_one_loss(y_true, y_pred)) assert_equal(accuracy_score(y_true, y_pred, normalize=False), n_samples - zero_one_loss(y_true, y_pred, normalize=False)) with warnings.catch_warnings(True): # Throw deprecated warning assert_equal(zero_one_score(y_true, y_pred), 1 - zero_one_loss(y_true, y_pred)) # Regression # ---------- assert_almost_equal(mean_squared_error(y_true, y_pred), 12.999 / n_samples, 2) assert_almost_equal(mean_squared_error(y_true, y_true), 0.00, 2) # mean_absolute_error and mean_squared_error are equal because # it is a binary problem. assert_almost_equal(mean_absolute_error(y_true, y_pred), 12.999 / n_samples, 2) assert_almost_equal(mean_absolute_error(y_true, y_true), 0.00, 2) assert_almost_equal(explained_variance_score(y_true, y_pred), -0.04, 2) assert_almost_equal(explained_variance_score(y_true, y_true), 1.00, 2) assert_equal(explained_variance_score([0, 0, 0], [0, 1, 1]), 0.0) assert_almost_equal(r2_score(y_true, y_pred), -0.04, 2) assert_almost_equal(r2_score(y_true, y_true), 1.00, 2) assert_equal(r2_score([0, 0, 0], [0, 0, 0]), 1.0) assert_equal(r2_score([0, 0, 0], [0, 1, 1]), 0.0)
def print_results(Y_test, Y_pred, classes_, f): print("Hamming score (acc)\t", 1 - hamming_loss(Y_test, Y_pred), file=f) print("F1 (micro-averaged)\t", f1_score(Y_test, Y_pred, average='micro'), file=f) print("F1 (macro-averaged)\t", f1_score(Y_test, Y_pred, average='macro'), file=f) print("\nLabel\tAccuracy\tPrecision\tRecall\tF1", file=f) for i, label in enumerate(classes_): print(label + "\t" + "%.4f" % accuracy_score(Y_test[:, i], Y_pred[:, i]) + "\t" + "%.4f" % precision_score(Y_test[:, i], Y_pred[:, i]) + "\t" + "%.4f" % recall_score(Y_test[:, i], Y_pred[:, i]) + "\t" + "%.4f" % f1_score(Y_test[:, i], Y_pred[:, i]), file=f)
def run(y_true, y_pred): perf = {} perf['accuracy'] = accuracy_score(y_true, y_pred) perf['precision'] = precision_score(y_true, y_pred, average='micro') perf['recall'] = recall_score(y_true, y_pred, average='micro') perf['fbeta_score'] = fbeta_score(y_true, y_pred, average='macro', beta=1.0) perf['hamming_loss'] = hamming_loss(y_true, y_pred) perf['cm'] = confusion_matrix(y_true, y_pred) return perf
def evalClassifier(vScore_test, thePredictedScores): target_names = ['Low_Risk', 'High_Risk'] ''' the way skelarn treats is the following: first index -> lower index -> 0 -> 'Low' the way skelarn treats is the following: next index after first -> next lower index -> 1 -> 'high' ''' print "precison, recall, F-stat" print(classification_report(vScore_test, thePredictedScores, target_names=target_names)) print"*********************" # preserve the order first test(real values from dataset), then predcited (from the classifier ) ''' are under the curve values .... reff: http://gim.unmc.edu/dxtests/roc3.htm 0.80~0.90 -> good, any thing less than 0.70 bad , 0.90~1.00 -> excellent ''' area_roc_output = roc_auc_score(vScore_test, thePredictedScores) # preserve the order first test(real values from dataset), then predcited (from the classifier ) print "Area under the ROC curve is ", area_roc_output print"*********************" ''' mean absolute error (mae) values .... reff: http://scikit-learn.org/stable/modules/generated/sklearn.metrics.mean_absolute_error.html the smaller the better , ideally expect 0.0 ''' mae_output = mean_absolute_error(vScore_test, thePredictedScores) # preserve the order first test(real values from dataset), then predcited (from the classifier ) print "Mean absolute errro output is ", mae_output print"*********************" ''' accuracy_score ... reff: http://scikit-learn.org/stable/modules/model_evaluation.html#scoring-parameter .... percentage of correct predictions ideally 1.0, higher the better ''' accuracy_score_output = accuracy_score(vScore_test, thePredictedScores) # preserve the order first test(real values from dataset), then predcited (from the classifier ) print "Accuracy output is ", accuracy_score_output print"*********************" ''' hamming_loss ... reff: http://scikit-learn.org/stable/modules/model_evaluation.html#scoring-parameter .... percentage of correct predictions ideally 0.0, lower the better ''' hamming_loss_output = hamming_loss(vScore_test, thePredictedScores) # preserve the order first test(real values from dataset), then predcited (from the classifier ) print "Hamming loss output is ", hamming_loss_output print"*********************" ''' jaccardian score ... reff: http://scikit-learn.org/stable/modules/model_evaluation.html#scoring-parameter .... percentage of correct predictions ideally 1.0, higher the better ''' jaccardian_output = jaccard_similarity_score(vScore_test, thePredictedScores) # preserve the order first test(real values from dataset), then predcited (from the classifier ) print "Jaccardian output is ", jaccardian_output print"*********************"
def classify(XTrain,XTest,YTrain,YTest,c,cv=False): # print XTrain.shape,XTest.shape,YTrain.shape,YTest.shape; # print classifier # YTrain = YTrain.todense() # XTrain = XTrain.todense() # print type(XTrain) # print type(YTrain) start_time = time.time() # print XTrain.shape classifier = OneVsRestClassifier(LinearSVC(penalty='L1',loss='L2',C=c,dual=False,multi_class='ovr'))#,verbose=1)); classifier.fit(XTrain, YTrain) predicted = classifier.predict(XTest) # if not a cross-validation instance, need to print results if not cv: qbGbl.weights = classifier.coef_ # print report print metrics.classification_report(YTest, predicted,target_names=yTransformer.classes_) # ## Collective Statistics print 'accuracy score of the classifier: {0}'.format(1.0-metrics.hamming_loss(YTest,predicted)) print 'value of the C\t\t\t: {0}'.format(c) print 'Time taken to classify\t\t: {0} seconds'.format(time.time()-start_time); # print 'precision score of the classifier: {0}'.format(metrics.precision_score(YTest,predicted)) # print 'recall score the classifier: {0}'.format(metrics.recall_score(YTest,predicted)) # print 'F1 score of the classifier: {0}'.format(metrics.f1_score(YTest,predicted)) return float(1.0-metrics.hamming_loss(YTest,predicted))
def test_multilabel_hamming_loss(): # Dense label indicator matrix format y1 = np.array([[0, 1, 1], [1, 0, 1]]) y2 = np.array([[0, 0, 1], [1, 0, 1]]) assert_equal(hamming_loss(y1, y2), 1 / 6) assert_equal(hamming_loss(y1, y1), 0) assert_equal(hamming_loss(y2, y2), 0) assert_equal(hamming_loss(y2, 1 - y2), 1) assert_equal(hamming_loss(y1, 1 - y1), 1) assert_equal(hamming_loss(y1, np.zeros(y1.shape)), 4 / 6) assert_equal(hamming_loss(y2, np.zeros(y1.shape)), 0.5)
def printscore(true_matrix,max_matrix,voting_matrix,mean_matrix): print "\t\tvoting classification report" print classification_report(true_matrix,voting_matrix) print "\t\tmax classification report" print classification_report(true_matrix,max_matrix) print "\t\tmean classification report" print classification_report(true_matrix,mean_matrix) print "------------------------------------------" print "\tvoting accuracy :{}\n".format(accuracy_score(true_matrix,voting_matrix)) print "max accuracy :{}\n".format(accuracy_score(true_matrix,max_matrix)) print "mean accuracy :{}\n".format(accuracy_score(true_matrix,mean_matrix)) print "------------------------------------------" print "\tvoting Hamming loss:{}\n".format(hamming_loss(true_matrix,voting_matrix)) print "max Hamming loss:{}\n".format(hamming_loss(true_matrix,max_matrix)) print "mean Hamming loss:{}\n".format(hamming_loss(true_matrix,mean_matrix)) print "------------------------------------------" print "\tvoting f1 score:{}\n".format(f1_score(true_matrix,voting_matrix,average='macro')) print "max f1 score:{}\n".format(f1_score(true_matrix,max_matrix,average='macro')) print "mean f1 score:{}\n".format(f1_score(true_matrix,mean_matrix,average='macro')) print "------------------------------------------" fpr, tpr, thresholds = roc_curve(true_matrix, voting_matrix, pos_label=2) print "\tvoting auc:{}\n".format(metrics.auc(fpr, tpr)) fpr, tpr, thresholds = roc_curve(true_matrix, max_matrix, pos_label=2) print "max auc:{}\n".format(metrics.auc(fpr, tpr)) fpr, tpr, thresholds = roc_curve(true_matrix, mean_matrix, pos_label=2) print "mean auc:{}\n".format(metrics.auc(fpr, tpr)) return
def performance(self, preds): accuracy = accuracy_score(self.y_test, preds) precision = precision_score(self.y_test, preds) recall = recall_score(self.y_test, preds) f1 = f1_score(self.y_test, preds) jss = jaccard_similarity_score(self.y_test, preds) hl = hamming_loss(self.y_test, preds) zol = zero_one_loss(self.y_test, preds) return {'accuracy_score': accuracy, 'precision_score': precision, 'recall_score': recall, 'f1_score': f1, 'jaccard_similarity_score': jss, 'hamming_loss': hl, 'zero_one_loss': zol}
def find_error_rate(shdphmm_seq, true_seq): shdphmm_seq = one_index(shdphmm_seq) all_permutations_and_dicts = generate_sequence_permutations(shdphmm_seq, true_seq) min_error_rate = 1.0 best_seq = None best_matching = None for permuted_seq, match_dict in all_permutations_and_dicts: error_rate = hamming_loss(permuted_seq, true_seq) if error_rate <= min_error_rate: min_error_rate = error_rate best_seq = permuted_seq best_matching = match_dict return min_error_rate, best_seq, best_matching
def getResult(self, predict, data_set): y_true, y_predict = control.calculate_entire_ds(predict, data_set) result = metrics.classification_report(y_true, y_predict) result += "\nAccuracy classification: %f\n" % metrics.accuracy_score(y_true, y_predict) result += "F1 score: %f\n" % metrics.f1_score(y_true, y_predict) result += "Fbeta score: %f\n" % metrics.fbeta_score(y_true, y_predict, beta=0.5) result += "Hamming loss: %f\n" % metrics.hamming_loss(y_true, y_predict) result += "Hinge loss: %f\n" % metrics.hinge_loss(y_true, y_predict) result += "Jaccard similarity: %f\n" % metrics.jaccard_similarity_score(y_true, y_predict) result += "Precision: %f\n" % metrics.precision_score(y_true, y_predict) result += "Recall: %f\n" % metrics.recall_score(y_true, y_predict) if self.is_binary(): result += "Average precision: %f\n" % metrics.average_precision_score(y_true, y_predict) result += "Matthews correlation coefficient: %f\n" % metrics.matthews_corrcoef(y_true, y_predict) result += "Area Under the Curve: %f" % metrics.roc_auc_score(y_true, y_predict) return result
def evaluate_multilabel(y_test, label_list, predictions_file="../models/pred_ml.pkl"): y_test_mlb = multilabel_binary_y(y_test) y_pred = joblib.load(predictions_file) print "F1 score micro:", f1_score(y_test_mlb, y_pred, average="micro", labels=label_list) print "F1 score weighted:", f1_score(y_test_mlb, y_pred, average="weighted", labels=label_list) print "F1 score samples:", f1_score(y_test_mlb, y_pred, average="samples", labels=label_list) print "F1 score:", f1_score(y_test_mlb, y_pred, average=None, labels=label_list) print "Accuracy", accuracy_score(y_test_mlb, y_pred) print classification_report(y_test_mlb, y_pred, target_names=label_list, digits=5) print "Zero-one classification loss", zero_one_loss(y_test_mlb, y_pred) print "Hamming loss", hamming_loss(y_test_mlb, y_pred) im = y_test_mlb + y_pred * 2 scipy.misc.imsave("predictions.png", im)
def run_experiment(model, train_X, train_y, test_X, test_y, label_names=None): import timeit start = timeit.default_timer() if label_names: model.fit(train_X, train_y, label_names=label_names) else: model.fit(train_X, train_y) end = timeit.default_timer() print "Training takes %.2f secs" % (end - start) pred_y = model.predict(test_X) print "Subset accuracy: %.2f\n" % (accuracy_score(test_y, pred_y)*100) print "Hamming loss: %.2f\n" % (hamming_loss(test_y, pred_y)) print "Accuracy(Jaccard): %.2f\n" % (jaccard_similarity_score(test_y, pred_y)) p_ex, r_ex, f_ex, _ = precision_recall_fscore_support(test_y, pred_y, average="samples") print "Precision/Recall/F1(example) : %.2f %.2f %.2f\n" \ % (p_ex, r_ex, f_ex) p_mic, r_mic, f_mic, _ = precision_recall_fscore_support(test_y, pred_y, average="micro") print "Precision/Recall/F1(micro) : %.2f %.2f %.2f\n" \ % (p_mic, r_mic, f_mic) p_mac, r_mac, f_mac, _ = precision_recall_fscore_support(test_y, pred_y, average="macro") print "Precision/Recall/F1(macro) : %.2f %.2f %.2f\n" \ % (p_mac, r_mac, f_mac)
def print_summary(test_y, pred_y): print "Subset accuracy: %.2f\n" % (accuracy_score(test_y, pred_y)*100) print "Hamming loss: %.2f\n" % (hamming_loss(test_y, pred_y)) print "Accuracy(Jaccard): %.2f\n" % (jaccard_similarity_score(test_y, pred_y)) p_ex, r_ex, f_ex, _ = precision_recall_fscore_support(test_y, pred_y, average="samples") print "Precision/Recall/F1(example) : %.2f %.2f %.2f\n" \ % (p_ex, r_ex, f_ex) p_mic, r_mic, f_mic, _ = precision_recall_fscore_support(test_y, pred_y, average="micro") print "Precision/Recall/F1(micro) : %.2f %.2f %.2f\n" \ % (p_mic, r_mic, f_mic) p_mac, r_mac, f_mac, _ = precision_recall_fscore_support(test_y, pred_y, average="macro") print "Precision/Recall/F1(macro) : %.2f %.2f %.2f\n" \ % (p_mac, r_mac, f_mac)
def compute_performances_for_multiclass(y_test, y_test_predicted, class_names, performances): # Compute the accuracy classification score : return the fraction of correctly classified samples performances.accuracy_score_fraction = accuracy_score(y_test, y_test_predicted, normalize=True) # Compute the accuracy classification score : return return the number of correctly classified samples performances.accuracy_score_number = accuracy_score(y_test, y_test_predicted, normalize=False) # Print information in the console print("\nAccuracy classification score : ") print(" Fraction of correctly classified samples : %.2f" % performances.accuracy_score_fraction) print(" Number of correctly classified samples: %.2f" % performances.accuracy_score_number) # Compute the Cohen's kappa score performances.cohen_kappa_score = cohen_kappa_score(y_test, y_test_predicted) # Print information in the console print("\nCohen's kappa score : %.2f" % performances.cohen_kappa_score) # Compute the confusion matrix without normalization performances.confusion_matrix_without_normalization = confusion_matrix( y_test, y_test_predicted) # Compute the confusion matrix with normalization performances.confusion_matrix_with_normalization = \ performances.confusion_matrix_without_normalization.astype('float') \ / performances.confusion_matrix_without_normalization.sum(axis=1)[:, np.newaxis] # Print information in the console print("\nConfusion matrix : ") print(" Confusion matrix without normalization : ") square_matrix_size = len( performances.confusion_matrix_without_normalization) for i in range(square_matrix_size): if i == 0: print(' [' + np.array2string( performances.confusion_matrix_without_normalization[i])) elif i == square_matrix_size - 1: print(' ' + np.array2string( performances.confusion_matrix_without_normalization[i]) + ']') else: print(' ' + np.array2string( performances.confusion_matrix_without_normalization[i])) print(" Confusion matrix with normalization : ") square_matrix_size = len(performances.confusion_matrix_with_normalization) for i in range(square_matrix_size): if i == 0: print(' [' + np.array2string( performances.confusion_matrix_with_normalization[i])) elif i == square_matrix_size - 1: print(' ' + np.array2string( performances.confusion_matrix_with_normalization[i]) + ']') else: print(' ' + np.array2string( performances.confusion_matrix_with_normalization[i])) # Compute the classification_report performances.classification_report = classification_report( y_test, y_test_predicted, target_names=class_names, digits=4) # Print information in the console print("\nclassification_report : ") print(performances.classification_report) # Compute the average Hamming loss performances.hamming_loss = hamming_loss(y_test, y_test_predicted) # Print information in the console print("\nAverage Hamming loss : %.2f" % performances.hamming_loss) # Compute the Jaccard similarity coefficient score with normalization performances.jaccard_similarity_score_with_normalization = jaccard_similarity_score( y_test, y_test_predicted, normalize=True) # Compute the Jaccard similarity coefficient score without normalization performances.jaccard_similarity_score_without_normalization = jaccard_similarity_score( y_test, y_test_predicted, normalize=False) # Print information in the console print("\nJaccard similarity coefficient score : ") print(" Average of Jaccard similarity coefficient : %.2f" % performances.jaccard_similarity_score_with_normalization) print( " Sum of the Jaccard similarity coefficient over the sample set : %.2f" % performances.jaccard_similarity_score_without_normalization) # Compute the precision performances.micro_precision = precision_score(y_test, y_test_predicted, average='micro') performances.macro_precision = precision_score(y_test, y_test_predicted, average='macro') performances.weighted_precision = precision_score(y_test, y_test_predicted, average='weighted') performances.none_precision = precision_score(y_test, y_test_predicted, average=None) # Print information in the console print("\nPrecision score : ") print(" micro : %.2f" % performances.micro_precision) print(" macro : %.2f" % performances.macro_precision) print(" weighted : %.2f" % performances.weighted_precision) print(" None : " + np.array2string(performances.none_precision)) print(" Classes : " + np.array2string(class_names)) # Compute the recall performances.micro_recall = recall_score(y_test, y_test_predicted, average='micro') performances.macro_recall = recall_score(y_test, y_test_predicted, average='macro') performances.weighted_recall = recall_score(y_test, y_test_predicted, average='weighted') performances.none_recall = recall_score(y_test, y_test_predicted, average=None) # Print information in the console print("\nRecall score : ") print(" micro : %.2f" % performances.micro_recall) print(" macro : %.2f" % performances.macro_recall) print(" weighted : %.2f" % performances.weighted_recall) print(" None : " + np.array2string(performances.none_recall)) print(" Classes : " + np.array2string(class_names)) # Compute the F1 score performances.micro_f1_score = f1_score(y_test, y_test_predicted, average='micro') performances.macro_f1_score = f1_score(y_test, y_test_predicted, average='macro') performances.weighted_f1_score = f1_score(y_test, y_test_predicted, average='weighted') performances.none_f1_score = f1_score(y_test, y_test_predicted, average=None) # Print information in the console print("\nF1-score : ") print(" micro : %.2f" % performances.micro_f1_score) print(" macro : %.2f" % performances.macro_f1_score) print(" weighted : %.2f" % performances.weighted_f1_score) print(" None : " + np.array2string(performances.none_f1_score)) print(" Classes : " + np.array2string(class_names)) # Compute the Matthews correlation coefficient performances.matthews_corrcoef = matthews_corrcoef(y_test, y_test_predicted) # Print information in the console print("\nMatthews correlation coefficient : %.2f" % performances.matthews_corrcoef) return performances
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2) X = np.array([[411, 500, 426], [100, -11, -96], [125, 900, .00], [.11, 60., 126], [211, 100, 16], [300, .60, 926], [11., .00, 26], [341, 700, 126]]) Y = np.array([[0, 0, 0], [0, 0, 1], [0, 1, 0], [0, 1, 1], [1, 0, 0], [1, 1, 0], [1, 1, 1], [1, 1, 1]]) lp = LabelPowersetClassifier(base_estimator=RandomForestClassifier(n_estimators=50)) lp.fit(X_train, Y_train) #print(lp.predict(X)) Y_pred = lp.predict(X_test) print("Zero One Loss {}".format(zero_one_loss(Y_test, Y_pred))) print("Hamming Loss {}".format(hamming_loss(Y_test, Y_pred))) print("F1 Score {}".format(f1_score(Y_test, Y_pred, average='macro')))
for C in Cs: for kernel in kernels: for gamma in gammas: print('C = ', C, ', kernel = ', kernel, ', gamma = ', gamma) inner_start_time = time.time() svc_ = SVC(C=C, kernel=kernel, gamma=gamma) svc_.fit(x_train, y_train) prediction = svc_.predict(x_val) inner_end_time = time.time() inner_time_passed = inner_end_time - inner_start_time hamm = metrics.hamming_loss(y_val, prediction) acc = metrics.accuracy_score(y_val, prediction) f1 = metrics.f1_score(y_val, prediction, average='micro') print('HM:', hamm) print('AS:', acc) print('F1:', f1) if acc > max_acc: print('Nova najbolja tacnost (', acc, '>', max_acc, ') dala je kombinacija kernel = ', kernel, ', C =', C, ', gamma = ', gamma) max_acc = acc best_kernel = kernel best_C = C best_gamma = gamma acc_tuple = (acc, hamm, f1, kernel, C, gamma,
def main(argv): #Defaults values PATHOLOGY_NAME = 'Cardiomegaly' DATASET_SIZE = 2000 try: opts, args = getopt.getopt(argv, "hp:s:", ["pathology=", "size="]) except getopt.GetoptError: print 'test.py -p <pathology> -s <size>' sys.exit(2) for opt, arg in opts: if opt == '-h': print 'test.py -p <pathology> -s <size>' sys.exit() elif opt in ("-p", "--pathology"): PATHOLOGY_NAME = arg elif opt in ("-s", "--size"): DATASET_SIZE = int(arg) print 'Pathology is ', PATHOLOGY_NAME print 'Dataset pathology size is ', DATASET_SIZE MODEL_NAME = "my" + PATHOLOGY_NAME + str(DATASET_SIZE) + ".h5" FILE_NAME = "Data_" + PATHOLOGY_NAME + str(DATASET_SIZE) + ".csv" PICTURE_NAME = PATHOLOGY_NAME + str(DATASET_SIZE) + ".png" #Load model model = loadModel(MODEL_NAME) # SGD > RMSprop > Adam sgd = SGD(lr=0.001, decay=1e-6, momentum=0.9, nesterov=True) #sgd = SGD(lr=1e-4, decay=1e-6, momentum=0.9, nesterov=True) #opt = Adam(lr=lr_schedule(0)) #optimizer = RMSprop(lr=lr_schedule(0), decay=1e-6) model.compile(loss='binary_crossentropy', optimizer='rmsprop', metrics=['accuracy']) #load and build training set data = createDataSet(FILE_NAME) dataTrain = loadTrainingDataset(data, PATHOLOGY_NAME, DATASET_SIZE) X_train, Y_train = buildImageset(dataTrain, PATHOLOGY_NAME) #Add callback to monitor model quality filepath = OUTPUT_DIR + MODEL_NAME + "-{val_acc:.2f}.hdf5" checkpoint = ModelCheckpoint(filepath, monitor='val_acc', verbose=1, save_best_only=True) lr_scheduler = LearningRateScheduler(lr_schedule) lr_reducer = ReduceLROnPlateau(factor=np.sqrt(0.1), cooldown=0, patience=5, min_lr=0.5e-6) #callbacks_list = [checkpoint,lr_scheduler,lr_reducer] callbacks_list = [lr_scheduler, lr_reducer] #train model #Y_train = to_categorical(Y_train, num_classes=2) start = time.time() history = model.fit(X_train, Y_train, batch_size=batch_size, epochs=epoch, shuffle=True, callbacks=callbacks_list, validation_split=0.10, verbose=1) end = time.time() print("fit duration :" + str(end - start) + "sec") #set Trained to 1 data.loc[data['currentTraining'] == 1, ['trained']] = 1 print("Already trained images : " + str(data[data['trained'] == 1]['Image Index'].count())) #save current step to training file data.to_csv(OUTPUT_DIR + FILE_NAME, index=False) #plot history history_plot(history, PICTURE_NAME) if PASS_2 == True: #Relaunch training to fine tune last Inception layers for i, layer in enumerate(model.layers): print(i, layer.name) for layer in model.layers[:249]: layer.trainable = False for layer in model.layers[249:]: layer.trainable = True model.compile(loss='binary_crossentropy', optimizer=sgd, metrics=['accuracy']) start = time.time() history = model.fit(X_train, Y_train, batch_size=batch_size, epochs=30, shuffle=True, callbacks=callbacks_list, validation_split=0.10, verbose=1) end = time.time() print("fit duration :" + str(end - start) + "sec") #plot history history_plot(history, "1_" + PICTURE_NAME) #save model model.save(OUTPUT_DIR + MODEL_NAME) #Check Results dataTest = loadDataset(data, PATHOLOGY_NAME) X_test, y_test = buildImageset(dataTest, PATHOLOGY_NAME) score = model.evaluate(X_test, y_test, verbose=1, batch_size=batch_size) print("\n\n\n###########################") print("######## RESULTS ##########\n") print('Test loss:', score[0]) print('Test accuracy:', score[1]) out = model.predict(X_test, batch_size=batch_size) out = np.array(out) np.seterr(divide='ignore', invalid='ignore') threshold = np.arange(0.01, 0.99, 0.01) best_threshold = bestthreshold(threshold, out, y_test) print("best_threshold : " + str(best_threshold)) if best_threshold < 0.02: #try to find a better one threshold = np.arange(0.001, 0.01, 0.001) best_threshold = bestthreshold(threshold, out, y_test) print("best_threshold : " + str(best_threshold)) y_pred = np.array( [1 if out[i, 0] >= best_threshold else 0 for i in range(len(y_test))]) print( "hamming loss : " + str(hamming_loss(y_test, y_pred)) ) #the loss should be as low as possible and the range is from 0 to 1 #print("results :\n"+str(y_pred)) total_correctly_predicted = len( [i for i in range(len(y_test)) if (y_test[i] == y_pred[i]).sum() == 1]) print("totel correct : " + str(total_correctly_predicted)) print("ratio correct predict: " + str(total_correctly_predicted / float(len(y_test)))) false_positive = np.array([ 1 if (y_test[i] == 0 and y_pred[i] == 1) else 0 for i in range(len(y_test)) ]).sum() print("false_positive = " + str(false_positive)) false_negative = np.array([ 1 if (y_test[i] == 1 and y_pred[i] == 0) else 0 for i in range(len(y_test)) ]).sum() print("false_negative = " + str(false_negative)) precision = precision_score(y_test, y_pred, average='weighted') recall = recall_score(y_test, y_pred, average='weighted') f1 = f1_score(y_test, y_pred, average="weighted") print("Precision: ", precision) print("Recall: ", recall) print("F1: ", f1)
def test_multilabel_hamming_loss(): # Dense label indicator matrix format y1 = np.array([[0, 1, 1], [1, 0, 1]]) y2 = np.array([[0, 0, 1], [1, 0, 1]]) assert_equal(hamming_loss(y1, y2), 1 / 6) assert_equal(hamming_loss(y1, y1), 0) assert_equal(hamming_loss(y2, y2), 0) assert_equal(hamming_loss(y2, np.logical_not(y2)), 1) assert_equal(hamming_loss(y1, np.logical_not(y1)), 1) assert_equal(hamming_loss(y1, np.zeros(y1.shape)), 4 / 6) assert_equal(hamming_loss(y2, np.zeros(y1.shape)), 0.5) with ignore_warnings(): # sequence of sequences is deprecated # List of tuple of label y1 = [(1, 2,), (0, 2,)] y2 = [(2,), (0, 2,)] assert_equal(hamming_loss(y1, y2), 1 / 6) assert_equal(hamming_loss(y1, y1), 0) assert_equal(hamming_loss(y2, y2), 0) assert_equal(hamming_loss(y2, [(), ()]), 0.75) assert_equal(hamming_loss(y1, [tuple(), (10, )]), 0.625) assert_almost_equal(hamming_loss(y2, [tuple(), (10, )], classes=np.arange(11)), 0.1818, 2)
pred = np.array(pred) indices = pred > 0 comp = np.zeros(pred.shape) comp[indices] = 1 for x in range(1, len(data)): pred = sess.run(prediction, feed_dict={ text: [bv.sequenceTranslate(data[0][0].split(), wdfull)] }) pred = np.array(pred) indices = pred > 0 tmp = np.zeros(pred.shape) tmp[indices] = 1 comp = np.concatenate((comp, tmp)) truths = np.array([bv.translate(b[1], labelDict) for b in data]) print('Hamming Loss:', hamming_loss(comp, truths)) print('Zero One Loss:', zero_one_loss(comp, truths)) print('Jaccard Score:', jaccard_score(comp, truths, average='samples')) print('F1-Score Micro:', f1_score(comp, truths, average='micro')) print('F1-Score Macro:', f1_score(comp, truths, average='macro')) print('Accuracy :', accuracy_score(comp, truths))
# print(clf.best_params_) # Perform a grid search to find the number of estimators # classifier = RandomForestClassifier(n_estimators=3) # classifier.fit(X_train, y_train) # y_predicted = classifier.predict(X_test) # y_predicted = y_predicted.astype(int) start_time = time.process_time() classifier = LabelPowerset( RandomForestClassifier(random_state=0, n_estimators=10, min_samples_leaf=10, n_jobs=-1)) total_time = time.process_time() - start_time print("Total time taken is : " + str(total_time)) # classifier = RandomForestClassifier(random_state=0, n_estimators=10, min_samples_leaf=10) # classifier = BinaryRelevance(classifier = LinearSVC(), require_dense = [False, True]) # classifier = LabelPowerset(SGDClassifier(penalty='l2', alpha=0.01)) classifier.fit(X_train, y_train) y_predicted = classifier.predict(X_test) print("Jaccard Similarity Score is : " + str(jaccard_similarity_score(y_test, y_predicted))) print("Hamming Loss is : " + str(hamming_loss(y_test, y_predicted))) # print("F1_Similarity score is : "+str(f1_score(y_test,y_predicted,average='macro'))) # model_filename = "final_model.sav" # pickle.dump(classifier, open(model_filename, 'wb'))
X_train_embeds, X_val_embeds = [ WE.get_sentence_vector(tokenized_sentence(x), vector_dict, stopwords=STOPWORDS) for x in raw_X_train ], [ WE.get_sentence_vector(tokenized_sentence(x), vector_dict, stopwords=STOPWORDS) for x in raw_X_val ] lr_embed_clf = MultiOutputClassifier( LogisticRegression( max_iter=300, multi_class="multinomial", penalty="none", solver="lbfgs" ) ).fit(X_train_embeds, y_train) print(hamming_loss(y_val, lr_embed_clf.predict(X_val_embeds))) print(classification_report(y_val, lr_embed_clf.predict(X_val_embeds))) ## Seeing where no prediction was made null_predictions = len( [i for i in lr_embed_clf.predict(X_val_embeds) if not np.any(np.nonzero(i))] ) print(f"{null_predictions} out of {len(y_val)} predictions were null.") dub_ref_model = lr_embed_clf.estimators_[4] vocab, id2tok, tok2id = get_vocab(train_dataset) target_label = "dubious reference" BATCH_SIZE = 1 pred = [] actual = [] vectors = [] for batch, targets, lengths, raw_data in create_dataset(
def score(y_pred, y): return {'accuracy': accuracy_score(y, y_pred), 'f1-score': f1_score(y, y_pred, average='weighted'), 'precision': precision_score(y, y_pred, average='weighted'), 'recall': recall_score(y, y_pred, average='weighted'), 'hamming_loss': hamming_loss(y, y_pred)}
def main(argv): #Defaults values MODEL_NAME = 'myModel.h5' IMAGE_NAME = 'test' shape = 224 third_dim = 1 try: opts, args = getopt.getopt(argv, "hm:t:s:d:", ["model=", "test=", "shape=", "third_dim="]) except getopt.GetoptError: print 'dataPredict.py -m <model> -t <test> -s <shape> -d <third_dim>' sys.exit(2) for opt, arg in opts: if opt == '-h': print 'dataPredict.py -m <model> -t <test> -s <shape> -d <third_dim>' sys.exit() elif opt in ("-m", "--model"): MODEL_NAME = arg elif opt in ("-t", "--test"): IMAGE_NAME = arg elif opt in ("-s", "--shape"): shape = int(arg) elif opt in ("-d", "--third_dim"): third_dim = int(arg) print 'Pathology model is ', MODEL_NAME print 'Test is ', IMAGE_NAME print 'Shape is ', str(shape) print 'Third dimension is ', str(third_dim) img_width, img_height = shape, shape model = loadModel(MODEL_NAME) dataTest = loadDataset(IMAGE_NAME) #columns for each desease pathology_list = [ 'Cardiomegaly', 'Emphysema', 'Effusion', 'Hernia', 'Nodule', 'Pneumothorax', 'Atelectasis', 'Pleural_Thickening', 'Mass', 'Edema', 'Consolidation', 'Infiltration', 'Fibrosis', 'Pneumonia', 'No Finding' ] X_test, y_test = buildImageset(dataTest, img_width, img_height, third_dim) #y_test = to_categorical(y_test, num_classes=2) #print("categorical expected results :\n"+str(y_test)) if IMAGE_NAME == 'test' or IMAGE_NAME == 'random': score = model.evaluate(X_test, y_test, verbose=1, batch_size=batch_size) print("\n\n\n###########################") print("######## RESULTS ##########\n") print('Test loss:', score[0]) print('Test accuracy:', score[1]) out = model.predict(X_test, batch_size=batch_size) out = np.array(out) np.seterr(divide='ignore', invalid='ignore') threshold = np.arange(0.01, 0.99, 0.01) best_threshold = bestthreshold(threshold, out, y_test) print("best_threshold : " + str(best_threshold)) for i in range(len(best_threshold)): if best_threshold[i] < 0.02: #try to find a better one threshold = np.arange(0.001, 0.01, 0.001) acc = [] accuracies = [] y_prob = np.array(out[:, i]) for j in threshold: y_pred = [1 if prob >= j else 0 for prob in y_prob] acc.append(matthews_corrcoef(y_test[:, i], y_pred)) acc = np.array(acc) index = np.where(acc == acc.max()) accuracies.append(acc.max()) best_threshold[i] = threshold[index[0][0]] print("best_threshold : " + str(best_threshold) + "\n") for j in range(y_test.shape[1]): y_pred = np.array([ 1 if out[i, j] >= best_threshold[j] else 0 for i in range(len(y_test)) ]) print("\nResults for " + str(pathology_list[j]) + " :") print(" Test size : " + str(len(y_test))) positive = np.array([ 1 if y_test[i, j] == 1 else 0 for i in range(len(y_test)) ]).sum() print(" Total number of desease : " + str(positive)) positive_predict = np.array([ 1 if y_pred[i] == 1 else 0 for i in range(len(y_test)) ]).sum() print(" Total number of predict desease : " + str(positive_predict)) positive_match = np.array([ 1 if (y_test[i, j] == 1 and y_pred[i] == 1) else 0 for i in range(len(y_test)) ]).sum() print(" Total number of matching desease : " + str(positive_match) + " Ratio : " + str(positive_match / (1. * positive))) negative_false = np.array([ 1 if (y_test[i, j] == 1 and y_pred[i] == 0) else 0 for i in range(len(y_test)) ]).sum() positive_false = np.array([ 1 if (y_test[i, j] == 0 and y_pred[i] == 1) else 0 for i in range(len(y_test)) ]).sum() print(" Total number of False negative : " + str(negative_false) + " False positive : " + str(positive_false)) negative = np.array([ 1 if y_test[i, j] == 0 else 0 for i in range(len(y_test)) ]).sum() negative_match = np.array([ 1 if (y_test[i, j] == 0 and y_pred[i] == 0) else 0 for i in range(len(y_test)) ]).sum() print(" Total number of matching no desease : " + str(negative_match) + " Ratio : " + str(negative_match / (1. * negative))) total_correctly_predicted = len([ i for i in range(len(y_test)) if (y_test[i, j] == y_pred[i]).sum() == 1 ]) print(" Totel correct : " + str(total_correctly_predicted) + " Ratio : " + str(total_correctly_predicted / (1. * len(y_test)))) print("Other metrics :") print(" Hamming loss : " + str(hamming_loss(y_test[:, j], y_pred))) precision = precision_score(y_test[:, j], y_pred, average='weighted') recall = recall_score(y_test[:, j], y_pred, average='weighted') f1 = f1_score(y_test[:, j], y_pred, average="weighted") roc = roc_auc_score(y_test[:, j], y_pred) print(" Precision: " + str(precision)) print(" Recall: " + str(recall)) print(" F1: " + str(f1)) print(" AUC: " + str(roc)) y_pred = np.array([[ 1 if out[i, j] >= best_threshold[j] else 0 for j in range(y_test.shape[1]) ] for i in range(len(y_test))]) total_correctly_predicted = len([ i for i in range(len(y_test)) if (y_test[i] == y_pred[i]).sum() == NUMBER_OF_DESEASES ]) print("\nGlobal totel correct : " + str(total_correctly_predicted)) precision = precision_score(y_test, y_pred, average='weighted') recall = recall_score(y_test, y_pred, average='weighted') f1 = f1_score(y_test, y_pred, average="weighted") roc = roc_auc_score(y_test, y_pred) print("Global Precision: " + str(precision)) print("Global Recall: " + str(recall)) print("Global F1: " + str(f1)) print("Global AUC: " + str(roc)) else: #Check model on image dataset scores = model.predict(X_test) print("result before threshold :" + str(scores))
scores[i - 1][1] = train_scores[1] #print(train_scores) print("Evaluation on Validation set") val_scores = model.evaluate(val_x, val_y, batch_size=batch_size) scores[i - 1][2] = val_scores[0] scores[i - 1][3] = val_scores[1] predictions = model.predict(val_x) # Turn to prediction multilabel outputs predictions[predictions >= 0.5] = 1 predictions[predictions < 0.5] = 0 #print(predictions[0]) # Add in hamming loss, jaccard score, recall, precision, f1 score, then save multilabel confusion matrix scikit_scores = [] scikit_scores.append(hamming_loss(val_y, predictions)) scikit_scores.append(jaccard_score(val_y, predictions, average=None)) scikit_scores.append(recall_score(val_y, predictions, average=None)) scikit_scores.append(precision_score(val_y, predictions, average=None)) scikit_scores.append(f1_score(val_y, predictions, average=None)) confusion = multilabel_confusion_matrix(val_y, predictions) print(confusion) print(f1_score(val_y, predictions, average=None)) #print(val_scores) #print(scikit_scores) #print(confusion) # Save the scikit parameters for this fold with open( dir_RNN_Stats_val + '/Scikit_Scores_For_Fold_{}_Weighted.pkl'.format(str(i)),
print("p@15:", precision_15) # nDCG @k nDCG_1 = [] nDCG_3 = [] nDCG_5 = [] Hamming_loss_5 = [] Hamming_loss_10 = [] Hamming_loss_15 = [] for i in range(pred.shape[0]): ndcg1 = ndcg_score(test_labels[i], pred[i], k=1, gains="linear") ndcg3 = ndcg_score(test_labels[i], pred[i], k=3, gains="linear") ndcg5 = ndcg_score(test_labels[i], pred[i], k=5, gains="linear") hl_5 = hamming_loss(test_labels[0], top_5_pred[0]) hl_10 = hamming_loss(test_labels[0], top_10_pred[0]) hl_15 = hamming_loss(test_labels[0], top_15_pred[0]) nDCG_1.append(ndcg1) nDCG_3.append(ndcg3) nDCG_5.append(ndcg5) Hamming_loss_5.append(hl_5) Hamming_loss_10.append(hl_10) Hamming_loss_15.append(hl_15) nDCG_1 = np.mean(nDCG_1) nDCG_3 = np.mean(nDCG_3) nDCG_5 = np.mean(nDCG_5) Hamming_loss_5 = np.mean(Hamming_loss_5)
# Classification metrics # Accuracy score y_pred = [0, 2, 1, 3] y_true = [0, 1, 2, 3] metrics.accuracy_score(y_true, y_pred) # percent of correct samples metrics.accuracy_score(y_true, y_pred, normalize=False) # number of correct samples # Classification report y_true = [0, 1, 2, 2, 0] y_pred = [0, 0, 2, 2, 0] target_names = ['class 0', 'class 1', 'class 2'] metrics.classification_report(y_true, y_pred, target_names=target_names) # Hamming loss y_pred = [1, 2, 3, 4] y_true = [2, 2, 3, 4] metrics.hamming_loss(y_true, y_pred) # Hamming loss = 1 - accuracy # Precision, recall and F-measures y_pred = [0, 1, 0, 0] y_true = [0, 1, 0, 1] metrics.precision_score(y_true, y_pred) metrics.recall_score(y_true, y_pred) metrics.f1_score(y_true, y_pred) metrics.fbeta_score(y_true, y_pred, beta=0.5) metrics.fbeta_score(y_true, y_pred, beta=1) metrics.fbeta_score(y_true, y_pred, beta=2) metrics.precision_recall_fscore_support(y_true, y_pred, beta=0.5) # precision_recall_curve y_true = np.array([0, 0, 1, 1]) y_scores = np.array([0.1, 0.4, 0.35, 0.8]) precision, recall, threshold = metrics.precision_recall_curve(y_true, y_scores) metrics.average_precision_score(y_true, y_scores)
print('\nCohen Kappa Score') print(cohen_kappa_score(actual_data, test_data)) print('\nConfusion Matrix') print(confusion_matrix(actual_data, test_data)) print('\nHinge Loss') print(hinge_loss(actual_data, test_data)) print('\nMatthews Correlation Coefficient') print(matthews_corrcoef(actual_data, test_data)) print('\nAccuracy Score') print(accuracy_score(actual_data, test_data)) print('\nClassification Report') print(classification_report(actual_data, test_data)) print('\nF1 Score') print(f1_score(actual_data, test_data)) print('\nHamming Loss') print(hamming_loss(actual_data, test_data)) print('\nJaccard Similarity Score') print(jaccard_similarity_score(actual_data, test_data)) # causes memory errors # print('\nLog Loss') # print(log_loss(actual_data, test_data)) print('\nPrecision Recall F-Score Support') print(precision_recall_fscore_support(actual_data, test_data)) print('\nZero-One Loss') print(zero_one_loss(actual_data, test_data)) print('\nAverage Precision Score') print(average_precision_score(actual_data, test_data)) print('\nROC AUC Score') print(roc_auc_score(actual_data, test_data)) print('\n')
# initialize classifier chains multi-label classifier # with a gaussian naive bayes base classifier classifier = ClassifierChain(LogisticRegression()) # train classifier.fit(X_train, y_train) # predict predictions = classifier.predict(X_test) #print(predictions) print("Log reg Classifier chains: ", accuracy_score(y_test, predictions)) print(classification_report(y_test, predictions)) #In the multilabel case with binary label indicators: print("Hamming loss: ", hamming_loss(y_test, predictions)) print(" ") classifier = ClassifierChain(GaussianNB()) # train classifier.fit(X_train, y_train) # predict predictions = classifier.predict(X_test) #print(predictions) print("Naive B Classifier chains: ", accuracy_score(y_test, predictions)) print(classification_report(y_test, predictions)) #In the multilabel case with binary label indicators: print("Hamming loss: ", hamming_loss(y_test, predictions)) print(" ")
def test_multilabel_hamming_loss(): # Dense label indicator matrix format y1 = np.array([[0.0, 1.0, 1.0], [1.0, 0.0, 1.0]]) y2 = np.array([[0.0, 0.0, 1.0], [1.0, 0.0, 1.0]]) assert_equal(1 / 6., hamming_loss(y1, y2)) assert_equal(0.0, hamming_loss(y1, y1)) assert_equal(0.0, hamming_loss(y2, y2)) assert_equal(1.0, hamming_loss(y2, np.logical_not(y2))) assert_equal(1.0, hamming_loss(y1, np.logical_not(y1))) assert_equal(4. / 6, hamming_loss(y1, np.zeros(y1.shape))) assert_equal(0.5, hamming_loss(y2, np.zeros(y1.shape))) # List of tuple of label y1 = [(1, 2,), (0, 2,)] y2 = [(2,), (0, 2,)] assert_equal(1 / 6., hamming_loss(y1, y2)) assert_equal(0.0, hamming_loss(y1, y1)) assert_equal(0.0, hamming_loss(y2, y2)) assert_equal(0.75, hamming_loss(y2, [(), ()])) assert_equal(0.625, hamming_loss(y1, [tuple(), (10, )])) assert_almost_equal(0.1818, hamming_loss(y2, [tuple(), (10, )], classes=np.arange(11)), 2)
# model load model = pickle.load(open('E:/nmb/nmb_data/cp/svc/svc_C001.data', 'rb')) # rb : read # time >> # evaluate y_pred = model.predict(x_test) # print(y_pred[:100]) # print(y_pred[100:]) accuracy = accuracy_score(y_test, y_pred) recall = recall_score(y_test, y_pred) precision = precision_score(y_test, y_pred) f1 = f1_score(y_test, y_pred) hamm_loss = hamming_loss(y_test, y_pred) hinge_loss = hinge_loss(y_test, y_pred) log_loss = log_loss(y_test, y_pred) print("hamming_loss : \t", hamm_loss) print("hinge_loss : \t", hinge_loss) print("log_loss : \t", log_loss) print("accuracy : \t", accuracy) print("recall : \t", recall) print("precision : \t", precision) print("f1 : \t", f1) # predict 데이터 pred_pathAudio = 'E:/nmb/nmb_data/predict/F' files = librosa.util.find_files(pred_pathAudio, ext=['wav'])
print('-' * 30) # Compute the F1 score, also known as balanced F-score or F-measure f1 = metrics.f1_score(y_true, y_pred) f_eval.write('F1 score: ' + str(f1) + '\n') print('F1 score: ', str(f1)) print('-' * 30) # Compute the F-beta score f_beta = metrics.fbeta_score(y_true, y_pred, beta=0.5) f_eval.write('F-beta (b=0.5) score: ' + str(f_beta) + '\n') print('F-beta (b=0.5) score: ', str(f_beta)) print('-' * 30) # Compute the average Hamming loss. hamming = metrics.hamming_loss(y_true, y_pred) f_eval.write('Hamming loss: ' + str(hamming) + '\n') print('Hamming loss: ', str(hamming)) print('-' * 30) # Compute Jaccard similarity coefficient score jacc = metrics.jaccard_similarity_score(y_true, y_pred) f_eval.write('Jaccard similarity coefficient score: ' + str(jacc) + '\n') print('Jaccard similarity coefficient score: ', str(jacc)) print('-' * 30) # Compute the Matthews correlation coefficient (MCC) for binary classes matthews = metrics.matthews_corrcoef(y_true, y_pred) f_eval.write('Matthews correlation coefficient (MCC): ' + str(matthews) + '\n') print('Matthews correlation coefficient (MCC): ', str(matthews)) print('-' * 30)
def evaluate(y, pred): """Evaluates the performance of a model Args: y: true values (as pd.Series) y_pred: predicted values of the model (as np.array) Returns: dictionary: dictionary with all calculated values """ y = np.asarray(y.to_frame()) classes = np.greater(pred, 0.5).astype(int) tp = np.count_nonzero(classes * y) tn = np.count_nonzero((classes - 1) * (y - 1)) fp = np.count_nonzero(classes * (y - 1)) fn = np.count_nonzero((classes - 1) * y) # Calculate accuracy, precision, recall and F1 score. accuracy = (tp + tn) / (tp + fp + fn + tn) try: precision = tp / (tp + fp) except ZeroDivisionError: precision = np.NaN try: sensitivity = tp / (tp + fn) except ZeroDivisionError: sensitivity = np.NaN try: specificity = tn / (tn + fp) except ZeroDivisionError: specificity = np.NaN bal_acc = (sensitivity + specificity) / 2 try: fpr = fp / (fp + tn) except ZeroDivisionError: fpr = np.NaN try: fnr = fn / (tp + fn) except ZeroDivisionError: fnr = np.NaN try: fmeasure = (2 * precision * sensitivity) / (precision + sensitivity) except ZeroDivisionError: fmeasure = np.nan mse = mean_squared_error(classes, y) mcc = matthews_corrcoef(y, classes) youden = sensitivity + specificity - 1 try: AUC = roc_auc_score(y, pred) except ValueError: AUC = np.nan hamming = hamming_loss(y, classes) kappa = cohen_kappa_score(y, classes) gmean = sqrt(sensitivity * specificity) ret = { 'tp': tp, 'tn': tn, 'fp': fp, 'fn': fn, 'acc': accuracy, 'bal_acc': bal_acc, 'sens': sensitivity, 'spec': specificity, 'fnr': fnr, 'fpr': fpr, 'fmeas': fmeasure, 'mse': mse, 'youden': youden, 'mcc': mcc, 'auc': AUC, 'hamming': hamming, 'cohen_kappa': kappa, 'gmean': gmean } return ret
print(product) print("ratio: 1: {}".format(r + 1)) print("precision:", precision_score(Y_test, prediction, average=None)) print("recall:", recall_score(Y_test, prediction, average=None)) print("f1-score:", f1_score(Y_test, prediction, average=None)) print("micro f1-score:", f1_score(Y_test, prediction, average='micro')) print("macro f1-score:", f1_score(Y_test, prediction, average='macro')) print("weighted f1-score:", f1_score(Y_test, prediction, average='weighted')) print("auc-score:", roc_auc_score(Y_test, scores, average=None)) print("micro auc-score:", roc_auc_score(Y_test, scores, average='micro')) print("macro auc-score:", roc_auc_score(Y_test, scores, average='macro')) print("weighted auc-score:", roc_auc_score(Y_test, scores, average='weighted')) print("hamming_loss:", hamming_loss(Y_test, prediction)) idx = np.argsort(scores, axis=0)[::-1] sorted_Y_test = np.array([Y_test[:, i][idx[:, i]] for i in range(5)]).T print( "precision@100:", precision_score(sorted_Y_test[:100, :], np.ones((100, 5)), average=None)) print( "precision@1000:", precision_score(sorted_Y_test[:1000, :], np.ones((1000, 5)), average=None)) idx = np.sum(Y_test, 1) > 0
key=lambda i: item[i])[-k:] classelements.reverse() precision = 0 dcg = 0 loop_var2 = 0 for element in classelements: if Y_test[loop_var][element] == 1: precision += 1 dcg += 1 / math.log(loop_var2 + 2) loop_var2 += 1 Total_Precision += precision * 1.0 / k Total_DCG += dcg * 1.0 / norm loop_var += 1 print "Precision@", k, ": ", Total_Precision * 1.0 / loop_var print "NDCG@", k, ": ", Total_DCG * 1.0 / loop_var print "Coverage Error: ", coverage_error(Y_test, pred_proba) print "Label Ranking Average precision score: ", label_ranking_average_precision_score( Y_test, pred_proba) print "Label Ranking Loss: ", label_ranking_loss(Y_test, pred_proba) print "Hamming Loss: ", hamming_loss(Y_test, pred) print "Weighted F1score: ", f1_score(Y_test, pred, average='weighted') # print "Total time taken: ", time.time()-start, "seconds." endtime = time.time() print "Total time taken: ", endtime - start, "seconds." print "********************************************************"
def evaluate(self, threshold=0.5): """Evaluate performance against the persistent evaluation data set Keyword arguments: threshold -- Value between 0->1. The cutoff where the numerical prediction becomes boolean. Default: 0.5 """ # Prepare the data data_generator = tf.keras.preprocessing.image.ImageDataGenerator( preprocessing_function=preprocess_input) evaluation_generator = data_generator.flow_from_dataframe( self.evaluation_labels, directory=self.image_dir, x_col="filename", y_col=DATA_LABELS, target_size=(self.image_width, self.image_height), shuffle=False, batch_size=1, class_mode="other") # If a seed was selected, then also evaluate on the validation set for that seed if not self.random_seed: print("Using validation set...") # Data augmentation data_generator = tf.keras.preprocessing.image.ImageDataGenerator( preprocessing_function=self.preprocess_training_function, samplewise_center=True, validation_split=0.2) evaluation_generator = data_generator.flow_from_dataframe( self.training_labels, directory=self.image_dir, x_col="filename", y_col=DATA_LABELS, target_size=(self.image_width, self.image_height), batch_size=1, subset='validation', shuffle=False, seed=self.seed, class_mode="other") else: print("Using evaluation set...") predictions = self.model.predict_generator( evaluation_generator, verbose=1, steps=len(evaluation_generator)) self._save_prediction_histograms(predictions) predictions = predictions > threshold ground_truth = self.evaluation_labels[DATA_LABELS].to_numpy() filenames = self.evaluation_labels[["filename"]].to_numpy() stats = classification_report(ground_truth, predictions, target_names=DATA_LABELS, output_dict=True) stats["total_binary_accuracy"] = 1 - hamming_loss( ground_truth, predictions) stats["all_or_nothing_accuracy"] = accuracy_score( ground_truth, predictions) stats["top_10_best"] = self._top_images(filenames, ground_truth, predictions, best=True) stats["top_10_worst"] = self._top_images(filenames, ground_truth, predictions, best=False) stats["none_of_the_above_recall"] = self._none_of_the_above_recall( ground_truth, predictions) stats[ "none_of_the_above_precision"] = self._none_of_the_above_precision( ground_truth, predictions) return stats
#train the classifier model = ClassifierChain(RandomForestClassifier(n_jobs=-1, verbose=1)) model.fit(train_data_vector, train_labels) #test the classifier predicted_labels = model.predict(test_data_vector) predicted_labels_train = model.predict(train_data_vector) predicted_probabilities = model.predict_proba(test_data_vector) #test accuracy #~7% with random forest and binary relevance #~7% with random forest and classifier chain #~5% with random forest and label powerset #~4% with multilabel knn test_acc = accuracy_score(test_labels, predicted_labels) train_acc = accuracy_score(train_labels, predicted_labels_train) test_hamm_loss = hamming_loss(test_labels, predicted_labels) test_cov_err = coverage_error(test_labels, predicted_probabilities.toarray()) test_rank_loss = label_ranking_loss(test_labels, predicted_probabilities.toarray()) test_avr_prec = label_ranking_average_precision_score( test_labels, predicted_probabilities.toarray()) print("Train accuracy: ", train_acc) print("Test accuracy: ", test_acc) print("Hamming loss: ", test_hamm_loss) print("Coverage error: ", test_cov_err) print("Ranking loss: ", test_rank_loss) print("Average precision: ", test_avr_prec)
def get_scores(self, silent=False): if self.args.is_hierarchical: eval_features = convert_examples_to_hierarchical_features( self.eval_examples, self.args.max_seq_length, self.tokenizer) else: eval_features = convert_examples_to_features( self.eval_examples, self.args.max_seq_length, self.tokenizer, use_guid=True, is_regression=self.args.is_regression) unpadded_input_ids = [f.input_ids for f in eval_features] unpadded_input_mask = [f.input_mask for f in eval_features] unpadded_segment_ids = [f.segment_ids for f in eval_features] if self.args.is_hierarchical: pad_input_matrix(unpadded_input_ids, self.args.max_doc_length) pad_input_matrix(unpadded_input_mask, self.args.max_doc_length) pad_input_matrix(unpadded_segment_ids, self.args.max_doc_length) padded_input_ids = torch.tensor(unpadded_input_ids, dtype=torch.long) padded_input_mask = torch.tensor(unpadded_input_mask, dtype=torch.long) padded_segment_ids = torch.tensor(unpadded_segment_ids, dtype=torch.long) if self.args.is_regression: label_ids = torch.tensor([f.label_id for f in eval_features], dtype=torch.float) else: label_ids = torch.tensor([f.label_id for f in eval_features], dtype=torch.long) doc_ids = torch.tensor([f.guid for f in eval_features], dtype=torch.long) eval_data = TensorDataset(padded_input_ids, padded_input_mask, padded_segment_ids, label_ids, doc_ids) eval_sampler = SequentialSampler(eval_data) eval_dataloader = DataLoader(eval_data, sampler=eval_sampler, batch_size=self.args.batch_size) self.model.eval() total_loss = 0 nb_eval_steps, nb_eval_examples = 0, 0 predicted_labels, target_labels, target_doc_ids = list(), list(), list( ) for input_ids, input_mask, segment_ids, label_ids, doc_ids in tqdm( eval_dataloader, desc="Evaluating", disable=silent): input_ids = input_ids.to(self.args.device) input_mask = input_mask.to(self.args.device) segment_ids = segment_ids.to(self.args.device) label_ids = label_ids.to(self.args.device) target_doc_ids.extend(doc_ids.tolist()) with torch.no_grad(): logits = self.model(input_ids=input_ids, attention_mask=input_mask, token_type_ids=segment_ids)[0] if self.args.is_multilabel: predicted_labels.extend( F.sigmoid(logits).round().long().cpu().detach().numpy()) target_labels.extend(label_ids.cpu().detach().numpy()) # if self.args.pos_weights: # pos_weights = [float(w) for w in self.args.pos_weights.split(',')] # pos_weight = torch.FloatTensor(pos_weights) # else: # pos_weight = torch.ones([self.args.num_labels]) if self.args.loss == 'cross-entropy': criterion = torch.nn.BCEWithLogitsLoss(size_average=False) loss = criterion(logits.cpu(), label_ids.float().cpu()) elif self.args.loss == 'mse': criterion = torch.nn.MSELoss(size_average=False) m = torch.nn.Sigmoid() loss = criterion(m(logits.cpu()), label_ids.float().cpu()) else: if self.args.num_labels > 2: predicted_labels.extend( torch.argmax(logits, dim=1).cpu().detach().numpy()) target_labels.extend(label_ids.cpu().detach().numpy()) loss = F.cross_entropy(logits, torch.argmax(label_ids, dim=1)) else: if self.args.is_regression: predicted_labels.extend( logits.view(-1).cpu().detach().numpy()) target_labels.extend( label_ids.view(-1).cpu().detach().numpy()) criterion = torch.nn.MSELoss() loss = criterion( logits.view(-1).cpu(), label_ids.view(-1).cpu()) else: predicted_labels.extend( torch.argmax(logits, dim=1).cpu().detach().numpy()) target_labels.extend(label_ids.cpu().detach().numpy()) loss_fct = torch.nn.CrossEntropyLoss() loss = loss_fct(logits.view(-1, self.args.num_labels), label_ids.view(-1)) if self.args.n_gpu > 1: loss = loss.mean() if self.args.gradient_accumulation_steps > 1: loss = loss / self.args.gradient_accumulation_steps total_loss += loss.item() nb_eval_examples += input_ids.size(0) nb_eval_steps += 1 avg_loss = total_loss / nb_eval_steps predicted_label_sets = [ predicted_label.tolist() for predicted_label in predicted_labels ] target_label_sets = [ target_label.tolist() for target_label in target_labels ] if self.args.is_regression: rmse, kendall, pearson, spearman, pearson_spearman = evaluate_for_regression( target_labels, predicted_labels) score_values = [ rmse.tolist(), kendall, pearson, spearman, pearson_spearman, avg_loss, list( zip(target_doc_ids, target_label_sets, predicted_label_sets)) ] score_names = [ METRIC_RMSE, METRIC_KENDALL, METRIC_PEARSON, METRIC_SPEARMAN, METRIC_PEARSON_SPEARMAN, 'avg_loss', 'label_set_info (id/gold/pred)' ] else: hamming_loss = metrics.hamming_loss(target_labels, predicted_labels) predicted_labels, target_labels = np.array( predicted_labels), np.array(target_labels) cm = metrics.multilabel_confusion_matrix(target_labels, predicted_labels) accuracy = metrics.accuracy_score(target_labels, predicted_labels) if self.args.num_labels == 2: precision = metrics.precision_score(target_labels, predicted_labels, average='binary') recall = metrics.recall_score(target_labels, predicted_labels, average='binary') f1 = evaluate_with_metric(target_labels, predicted_labels, METRIC_F1_BINARY) else: precision_micro = metrics.precision_score(target_labels, predicted_labels, average='micro') recall_micro = metrics.recall_score(target_labels, predicted_labels, average='micro') f1_micro = metrics.f1_score(target_labels, predicted_labels, average='micro') f1_macro = evaluate_with_metric(target_labels, predicted_labels, METRIC_F1_MACRO) precision_macro = metrics.precision_score(target_labels, predicted_labels, average='macro') recall_macro = metrics.recall_score(target_labels, predicted_labels, average='macro') precision_class, recall_class, f1_class, support_class = metrics.precision_recall_fscore_support( target_labels, predicted_labels) if self.args.num_labels == 2: score_values = [ precision, recall, f1, accuracy, avg_loss, hamming_loss, cm.tolist(), list( zip(target_doc_ids, target_label_sets, predicted_label_sets)) ] score_names = [ 'precision', 'recall', 'f1', 'accuracy', 'avg_loss', 'hamming_loss', 'confusion_matrix', 'label_set_info (id/gold/pred)' ] else: score_values = [ precision_macro, recall_macro, f1_macro, accuracy, avg_loss, hamming_loss, precision_micro, recall_micro, f1_micro, precision_class.tolist(), recall_class.tolist(), f1_class.tolist(), support_class.tolist(), cm.tolist(), list( zip(target_doc_ids, target_label_sets, predicted_label_sets)) ] score_names = [ 'precision_macro', 'recall_macro', METRIC_F1_MACRO, 'accuracy', 'avg_loss', 'hamming_loss', 'precision_micro', 'recall_micro', 'f1_micro', 'precision_class', 'recall_class', 'f1_class', 'support_class', 'confusion_matrix', 'label_set_info (id/gold/pred)' ] return score_values, score_names
# does cross-validation with 10-fold for each combination of kernels and Cs classifier1 = GridSearchCV(pipe, param_grid=params, n_jobs=2, cv=10, scoring='accuracy') #reg = pipe classifier1.fit(X, first_lvl_targets) print "Best parameters set found on development set:" print(classifier1.best_params_) predictions1 = classifier1.predict(X_test) train_pred1 = classifier1.predict(X) error1 = hamming_loss(first_lvl_targets, train_pred1) print 'Training error 1', error1 # distinguish 2 cases: # 1. 2 class classification for young male vs female # 2. 4 class classification for old people X21 = [] # young people y21 = [] X22 = [] # old people y22 = [] for i, lab in enumerate(first_lvl_targets): if lab == 0: # young people X21.append(X[i, :]) if y[i] == 2: y21.append(0)
# In[14]: from sklearn.model_selection import cross_validate, KFold cv_scores = cross_validate(estimator=text_clf, X=X_test, y=y_test, cv=KFold(shuffle=True, n_splits=5)) cv_scores # In[10]: from sklearn.metrics import hamming_loss, accuracy_score, precision_score, recall_score, classification_report hamming_loss(predicted, y_test) accuracy_score(predicted, y_test) precision_score(y_test, predicted, average='macro') precision_score(y_test, predicted, average='micro') recall_score(y_test, predicted, average='micro') recall_score(y_test, predicted, average='macro') print(classification_report(y_test, predicted)) print(hamming_loss(predicted, y_test)) # In[24]: # Test if SVM performs better from sklearn.linear_model import SGDClassifier text_clf_svm = Pipeline([('vect', CountVectorizer()), ('tfidf', TfidfTransformer()),
def calc_hamming_loss(y_true, y_pred): '''calculates the Hamming loss using the scikit-learn implementation''' return hamming_loss(y_true, np.array(y_pred))
def test_multilabel_hamming_loss(): # Dense label indicator matrix format y1 = np.array([[0, 1, 1], [1, 0, 1]]) y2 = np.array([[0, 0, 1], [1, 0, 1]]) w = np.array([1, 3]) assert_equal(hamming_loss(y1, y2), 1 / 6) assert_equal(hamming_loss(y1, y1), 0) assert_equal(hamming_loss(y2, y2), 0) assert_equal(hamming_loss(y2, 1 - y2), 1) assert_equal(hamming_loss(y1, 1 - y1), 1) assert_equal(hamming_loss(y1, np.zeros(y1.shape)), 4 / 6) assert_equal(hamming_loss(y2, np.zeros(y1.shape)), 0.5) assert_equal(hamming_loss(y1, y2, sample_weight=w), 1. / 12) assert_equal(hamming_loss(y1, 1-y2, sample_weight=w), 11. / 12) assert_equal(hamming_loss(y1, np.zeros_like(y1), sample_weight=w), 2. / 3) # sp_hamming only works with 1-D arrays assert_equal(hamming_loss(y1[0], y2[0]), sp_hamming(y1[0], y2[0]))
model = models[modelInd] acc = [] pre = [] rec = [] f1 = [] ham = [] for iter in range(5): X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=35) model.fit(X_train, Y_train) Y_predict = np.rint(model.predict(X_test)).astype(np.int32) import sklearn.metrics as M pre.append(M.precision_score(Y_test, Y_predict, average='micro')) rec.append(M.recall_score(Y_test, Y_predict, average='micro')) f1.append(M.f1_score(Y_test, Y_predict, average='micro')) ham.append(M.hamming_loss(Y_test, Y_predict)) fout.write("Precision: " + str(np.average(pre)) + " " + str(pre) + "\n") fout.write("Recall: " + str(np.average(rec)) + " " + str(rec) + "\n") fout.write("F1: " + str(np.average(f1)) + " " + str(f1) + "\n") fout.write("HammingLoss: " + str(np.average(ham)) + " " + str(ham) + "\n") fout.close()
def hamming_accuracy(y_true, y_pred): """ hamming_accuracy = 1 - hamming_loss """ return 1 - hamming_loss(y_true, y_pred)
def test_multilabel_hamming_loss(): # Dense label indicator matrix format y1 = np.array([[0, 1, 1], [1, 0, 1]]) y2 = np.array([[0, 0, 1], [1, 0, 1]]) assert_equal(hamming_loss(y1, y2), 1 / 6) assert_equal(hamming_loss(y1, y1), 0) assert_equal(hamming_loss(y2, y2), 0) assert_equal(hamming_loss(y2, np.logical_not(y2)), 1) assert_equal(hamming_loss(y1, np.logical_not(y1)), 1) assert_equal(hamming_loss(y1, np.zeros(y1.shape)), 4 / 6) assert_equal(hamming_loss(y2, np.zeros(y1.shape)), 0.5) with ignore_warnings(): # sequence of sequences is deprecated # List of tuple of label y1 = [( 1, 2, ), ( 0, 2, )] y2 = [(2, ), ( 0, 2, )] assert_equal(hamming_loss(y1, y2), 1 / 6) assert_equal(hamming_loss(y1, y1), 0) assert_equal(hamming_loss(y2, y2), 0) assert_equal(hamming_loss(y2, [(), ()]), 0.75) assert_equal(hamming_loss(y1, [tuple(), (10, )]), 0.625) assert_almost_equal( hamming_loss(y2, [tuple(), (10, )], classes=np.arange(11)), 0.1818, 2)
def trainer(X, Y, algornum): X_train ,X_test, Y_train, Y_test = train_test_split(X,Y,test_size= 0.2, random_state=35) #构造一个算法的数组,暂时没有svm algors = [] #bayes = BernoulliNB() #svmc = svm.SVC(kernel='linear') dtc = DecisionTreeClassifier(random_state= 32) rfc = RandomForestClassifier() #etc = ExtraTreesClassifier() #algors.append(bayes) #algors.append(svmc) algors.append(dtc) algors.append(rfc) #algors.append(etc) #dtc = DecisionTreeClassifier(random_state= 32) model = algors[algornum] model.fit(X_train, Y_train) Y_predict = model.predict(X_test) #分类器评估 scores=[] scores.append(accuracy_score(Y_test, Y_predict)) scores.append(average_precision_score(Y_test,Y_predict,average='micro' )) #scores.append(average_precision_score(Y_test,Y_predict,average='samples' )) scores.append(hamming_loss(Y_test, Y_predict)) #scores.append(zero_one_loss(Y_test, Y_predict)) #scores.append(jaccard_similarity_score(Y_test, Y_predict)) scores.append(f1_score(Y_test,Y_predict,average='micro' )) #scores.append(f1_score(Y_test,Y_predict,average='macro' )) #scores.append(f1_score(Y_test,Y_predict,average='weighted')) #scores.append(f1_score(Y_test,Y_predict,average='samples' )) #scores.append(label_ranking_loss(Y_test, Y_predict)) scores.append(recall_score(Y_test, Y_predict, average='micro')) scoresNames = ["accuracy_score", "average_precision_score micro", #"average_precision_score samples", "hamming_loss", #"zero_one_loss", #"jaccard_similarity_score", "f1_score micro ", #"f1_score macro ", #"f1_score weighted", #"f1_score samples", #"label_ranking_loss", "recall_micro" ] # 打印准确率到csv ''' evaluationDataFrame = pd.DataFrame(data={'scoreNames':scoresNames, everyFeatureNames[i]: scores}) trans_evaluationDataFrame = evaluationDataFrame.T trans_evaluationDataFrame.to_csv("C:/Julia/nlp/newcode/ec.csv" , mode = 'a' ) ''' # 打印准确率到excel for j in range(len(scoresNames)): print(scoresNames[j], scores[j]) return model
random_state=9000) num_words = min(MAX_NUM_WORDS, len(word_index)) #model model = Sequential() model.add( Embedding(input_dim=num_words + 1, output_dim=EMBEDDING_DIM, input_length=MAX_SEQUENCE_LENGTH, embeddings_regularizer=regularizers.l2(0.01))) model.add(Conv1D(128, 5, activation='tanh')) model.add(MaxPooling1D(5)) model.add(Conv1D(128, 5, activation='tanh')) model.add(MaxPooling1D(5)) model.add(Conv1D(128, 5, activation='tanh')) model.add(MaxPooling1D(15)) model.add(Flatten()) model.add(Dense(128, activation='tanh')) model.add(Dense(len(labels_index), activation='sigmoid')) model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['categorical_accuracy']) # trainning model.fit(x_train, y_train, nb_epoch=5, batch_size=32, validation_split=0.2) predicted = model.predict(x_test, batch_size=None, verbose=0, steps=None) predicted[predicted >= 0.4] = 1 predicted[predicted < 0.4] = 0 print("loss:" + str(metrics.hamming_loss(y_test, predicted))) print("accuracy:" + str(metrics.accuracy_score(y_test, predicted))) print('F1 score:' + str(f1_score(y_test, predicted, average='weighted')))
for j in range(y_trainSub.shape[1])] #Make a prediction for each label preds=np.array([np.ravel(e.predict(X_test)) for e in ests]) #Project the prediction to the full label space y_pred=proj.dot(preds) #Round to either 0 or 1 for each entry (decision) y_predRnd=(y_pred.transpose()>0.5).astype(int) #To get actual class labels in a list-of-lists (list for each row) #lolPred=[all_classes[np.ravel(np.nonzero(y_predRnd[k,:]))].tolist() # for k in range(y_predRnd.shape[0))] #Score our accuracy stats['n_train'] += X_train.shape[0] #stats['accuracy']=accuracy_score(y_testBin,y_predRnd) stats['accuracyHL']=hamming_loss(y_testBin,y_predRnd)*42048 stats['F1']=f1_score(y_testBin,y_predRnd) stats['accuracy_history'].append((stats['accuracyHL'], stats['F1'], stats['n_train'])) stats['runtime_history'].append((stats['accuracyHL'],time.time() - stats['t0'])) print stats['n_train'], stats['accuracyHL'], stats['F1'] if X_train.shape[0]<batchSizeTrain: break #Now run to predict labels on the test set testSet=csv.reader(open('Test.csv','rU'),dialect=csv.excel) testSet.pop(0) # pop-off header batchSizeTest=1e4 batch_iteratorTest=batchGenTestSet(size=batchSizeTest,doc_iter=testSet)
def mlc_hamming_loss(y_true,y_pred): return hamming_loss(y_true,y_pred)