def create_report_binary(y_true, y_pred): """ Create quick summary, Args - Input y_true: ground-truth (n_batch x n_classes), one-hot format y_pred: prediction (n_batch x n_classes), one-hot format - Return res: pandas table of prediction/ground-truth/status (fp/fn) conf: pdml confusion matrix table auc: auc score EXAMPLE of Usage final_result, eval_metric_table, auc = create_report(y_val, val_pred) eval_metric_table.stats() """ res = pd.DataFrame({ 'y_true': y_true.argmax(axis=1), 'y_pred': y_pred.argmax(axis=1) }) res['status'] = res['y_pred'] - res['y_true'] # 1: fp, -1: fn auc = roc_auc_score(y_true=res.y_true, y_score=res.y_pred) conf = pdml.ConfusionMatrix(y_true=res.y_true, y_pred=res.y_pred) return res, conf, auc
def test_pandas_confusion_cm_strings(self): y_true = ['rabbit', 'cat', 'rabbit', 'rabbit', 'cat', 'dog', 'dog', 'rabbit', 'rabbit', 'cat', 'dog', 'rabbit'] y_pred = ['cat', 'cat', 'rabbit', 'dog', 'cat', 'rabbit', 'dog', 'cat', 'rabbit', 'cat', 'rabbit', 'rabbit'] cm = pdml.ConfusionMatrix(y_true, y_pred) assert isinstance(cm, pdml.confusion_matrix.LabeledConfusionMatrix) print("Confusion matrix:\n%s" % cm) asserts(y_true, y_pred, cm)
def F_P_R(articles, classifier): categories = classifier.categories() values = list((k, v) for k, v in articles.values()) pred = list(v for k, v in values) act = list(k for k, v in values) pred = [item.rstrip() for item in pred] act = [item.rstrip() for item in act] conf = pd.ConfusionMatrix(act, pred) conf.print_stats()
params = grid_result.cv_results_['params'] for mean, stdev, param in zip(means, stds, params): print("%f (%f) with: %r" % (mean, stdev, param)) import pandas as pd pred_classes = grid.predict(test_images) pred = pd.DataFrame({ 'ImageId': range(1, len(pred_classes) + 1), 'Label': pred_classes }) pred.to_csv('models/mnist-nodes512-lr001.csv', index=False) import pandas_ml as pdml cm = pdml.ConfusionMatrix(pred_classes, test_labels) cm.print_stats() # Sklearn How to Save a Model Created From a Pipeline and GridSearchCV Using Joblib or Pickle? (https://stackoverflow.com/questions/34143829/sklearn-how-to-save-a-model-created-from-a-pipeline-and-gridsearchcv-using-jobli) #type(grid) # sklearn.model_selection._search.GridSearchCV #len(dir(grid)) #grid.best_estimator_ # #type(grid_result) # sklearn.model_selection._search.GridSearchCV #len(dir(grid_result)) #grid_result.best_estimator_ # same memory address as grid # # #from sklearn.externals import joblib #joblib.dump(grid_result.best_estimator_, 'models/mnist-nodes512-lr001.pkl', compress=1) # TypeError: can't pickle _thread.RLock objects
def confusion_matrix(truth, pred): conf = pml.ConfusionMatrix(truth, pred) return conf
def ConfusionMatrixResults(y_pred,yn): cm = pdl.ConfusionMatrix(yn.astype(int).ravel(),y_pred.astype(int)) cm.print_stats()
dec = 4 box = [] fig_box = [] f1_score = [] sensitivity = [] specificity = [] accuracy = [] # count a number of images in the file in order to reshape the numpy array accordingly image_num = 0 for m, p in zip(mask_list, pred_list): image_num = image_num + 1 tmp_file = open("tmp.txt", 'w') m_img = np.array(Image.open(m), dtype=np.float32) p_img = np.array(Image.open(p), dtype=np.float32) pdml_cm = pdml.ConfusionMatrix(m_img.ravel(), p_img.ravel()) sys.stdout = tmp_file print >> pdml_cm.print_stats() tmp_file.close() data = dict(name=[], data=[]) with open("tmp.txt", 'r') as f: for line in f: line = line.rstrip('\n') if "TP:" in line: data["name"].append("True Positive") data["data"].append(float(line.split(' ')[1])) elif "TN:" in line: data["name"].append("True Negative") data["data"].append(float(line.split(' ')[1]))
#x,y = ros.fit_sample(x, y.ravel()) #print len(x) #print len(y) xo = np.delete(x, [i for i in range(3000, len(df1))], axis=0) yo = np.delete(y, [i for i in range(3000, len(df1))], axis=0) xn = np.delete(x, [i for i in range(0, 2999)], axis=0) yn = np.delete(y, [i for i in range(0, 2999)], axis=0) clf = RandomForestClassifier(n_estimators=10, max_depth=None, min_samples_split=2, random_state=0) clf.fit(xo, yo.astype(int).ravel()) print "The mean accuracy score for the training data set is " print clf.score(xo, yo.astype(int)) y_pred = clf.predict(xn) '' cm = pdl.ConfusionMatrix(yn.astype(int).ravel(), y_pred) cm.print_stats() #print df2
# Plot both signals sigtoplot = pd.Series(SIG) sigtoplot.set_axis(np.linspace(0.0, 9.9, num=numpoints, endpoint=True), inplace=True) sigtoplot = sigtoplot.interpolate(method='cubic') sigtoplot.plot(linewidth=3, color='red') sigtoplot = pd.Series(SIG_HAT) sigtoplot.set_axis(np.linspace(0.0, 9.9, num=numpoints, endpoint=True), inplace=True) sigtoplot = sigtoplot.interpolate(method='cubic') sigtoplot.plot(linestyle='--', color='black') plt.title('Signal Comparison') plt.ylabel('Signal Voltage') plt.xlabel('Time (s)') plt.legend(['Input', 'Output'], loc='upper left') plt.show() symbol_diff = 0 for n in np.arange(sig_hat.size): if (sig_hat[n] != sig[n]): symbol_diff += 1 SER = symbol_diff / siglength print(SER) print('\n\n'+str(100*(siglength-symbol_diff)/siglength)+'\n\n') if confusion == True: confusion_matrix = pdml.ConfusionMatrix(sig, sig_hat) stats = confusion_matrix.stats() confusion_matrix.plot(normalized=True) plt.xticks([]) plt.yticks([]) plt.savefig('ForPub/Confusion_M'+str(M)+'_L'+str(L)+'_SNR'+str(SNR)) plt.show()
def test_cnn(self, imageNumber, hdf5Number, patchNumber, imageHeight=360, imageWidth=480, use_trained_weights=False): cafee_model = '' if use_trained_weights is False: weights = self.get_caffemodel() if len(weights) == 0: log.info("Caffe model is not there!") sys.exit() log.info("Caffe model found: " + str(weights[0]) + "") cafee_model = weights[0] else: cafee_model = self.namedtupleConfig.trained_weights log.info("Caffe model used: " + str(cafee_model) + "") txtloc = self.output_area + "/confusion_matrix.txt" # The accuracy log will be saved to this location caffe.set_mode_gpu( ) # set using GPU, if use CPU, call caffe.set_mode_cpu() net = caffe.Net(self.test_prototxt, cafee_model, caffe.TEST) # load net fid = open(txtloc, 'w+') # open accuracy log fid.write("pandas implementation of Confusion Matrix\n") for image_i in range(imageNumber): pInfer = [] lInfer = [] dInfer = [] rawImg = np.zeros((8, imageHeight, imageWidth)) pixels = 0 for hdf5, patch in product(range(hdf5Number), range(patchNumber)): if pixels >= (imageHeight * imageWidth): break net.forward( ) # everytime net forward is called, one batch of output is produced. In our case, batch=1 in inference model image = net.blobs['data'].data label = net.blobs['label'].data predicted = net.blobs[ 'prob'].data # probability, number of channels is equal to number of outputs in the model # raw images rawImgPatch = np.squeeze(image[0, :, :, :]) rawImgPixel = rawImgPatch[:, 17, 17] row = pixels // imageWidth col = pixels % imageWidth rawImg[:, row, col] = rawImgPixel # prediction images pInfer.append(np.argmax(np.array(predicted[0]))) # mask images lInfer.append(np.array(label[0])) pixels = pixels + 1 # raw images rawImg = rawImg.transpose((1, 2, 0)) rawImg = rawImg / 255 rawImg = rawImg[:, :, (2, 1, 0)] rawImg = rawImg[17:-16, 17:-16, :] scipy.misc.imsave( self.output_area + "/raw_" + str(image_i) + '.tif', rawImg) # predicted images predImg = np.reshape(pInfer, (imageHeight, imageWidth)) predImg = predImg[17:-16, 17:-16] predImg2 = Image.fromarray(np.uint8(255 * (np.array(predImg)))) predImg2.save(self.output_area + "/prediction_" + str(image_i) + ".png") # mask images labelImg = np.reshape(lInfer, (imageHeight, imageWidth)) labelImg = labelImg[17:-16, 17:-16] labelImg2 = Image.fromarray(np.uint8(255 * (np.array(labelImg)))) labelImg2.save(self.output_area + "/mask_" + str(image_i) + ".png") pdml_cm = pdml.ConfusionMatrix(labelImg.ravel(), predImg.ravel()) sys.stdout = fid print pdml_cm.print_stats()
def test(self, imageNumber, imageHeight=360, imageWidth=480, use_trained_weights=False): ''' Following batch normalization, a caffe neural network model can be used to make predictions on a set of unknown data. This function will take a neural network model defined as an 'inference' model, such as: segnet_inference_9chan.prototxt. in addition the trained weights are supplied to the fucntion. PLEASE NOTE: If error occurs, such as kernel shutdown, make sure the directory to the hdf5 database, '___.txt', is correctly identified in the 'inference_model.prototext' file. ### Input Parameters # hdf5 database: The location of the hdf5 database, for instance 'test h5', is specified in the inferenceModel.prototext file # inferenceModel: inference model # weights: CNN network weights as a result of the function compute_bn_statistics # resultsDir: results saved to this directory # imageHeight, imageWidth: height and width, used to calculate total number of pixels for average accuracy calculation ''' cafee_model = '' if use_trained_weights is False: weights = self.get_caffemodel() if len(weights) == 0: log.info("Caffe model is not there!") sys.exit() log.info("Caffe model found: " + str(weights[0]) + "") cafee_model = weights[0] else: cafee_model = self.namedtupleConfig.trained_weights log.info("Caffe model used: " + str(cafee_model) + "") iter = imageNumber #: number of images in the hdf5 database that will be processed by this function txtloc = self.output_area + "/confusion_matrix.txt" # The accuracy log will be saved to this location caffe.set_mode_gpu( ) # set using GPU, if use CPU, call caffe.set_mode_cpu() net = caffe.Net(self.test_prototxt, cafee_model, caffe.TEST) # load net fid = open(txtloc, 'w+') # open accuracy log fid.write("pandas implementation of Confusion Matrix\n") for i in range(iter): net.forward( ) # everytime net forward is called, one batch of output is produced. In our case, batch=1 in inference model image = net.blobs['data'].data label = net.blobs['label'].data predicted = net.blobs[ 'prob'].data # probability, number of channels is euqal to number of outputs in the model image = np.squeeze(image[0, :, :, :]) image = image.transpose( (1, 2, 0)) # change data dimension from channel*H*W to H*W*channel image = image / 255 image = image[:, :, (2, 1, 0)] # switch RGB channel # display image #BM-no-plt plt.imshow(image) #BM-no-plt plt.figure(i + 1) output = np.squeeze(predicted[0, :, :, :]) # squeeze 4D to 3D ind = np.argmax(output, axis=0) label = np.squeeze(label) ind = ind.astype('uint8') label = label.astype('uint8') fid.write(str('=' * 30 + ("image %d:" % i) + '=' * 30) + '\n') pdml_cm = pdml.ConfusionMatrix(label.ravel(), ind.ravel()) sys.stdout = fid print pdml_cm.print_stats() # save Result, Mask and Image scipy.misc.imsave(self.output_area + "/raw_" + str(i) + '.tif', image) scipy.misc.imsave( self.output_area + "/prediction_" + str(i) + '.tif', ind.astype(float)) # convert to float to save visible mask scipy.misc.imsave(self.output_area + "/mask_" + str(i) + '.tif', label.astype(float))
self.w1 = self.w1 + self.lr * dw1 self.w0 = self.w0 + self.lr * dw0 # print("Kosten: " + str(self.cost(pred, y))) def predict(self, X): a0 = self.activation(self.w0 @ X.T) pred = self.activation(self.w1 @ a0) return pred def cost(self, pred, y): # SUM((y - pred)^2) s = (1 / 2) * (y.T - pred)**2 return np.mean(np.sum(s, axis=0)) model = NeuralNetwork(0.25) with open("w0.p", "rb") as file: model.w0 = pickle.load(file) with open("w1.p", "rb") as file: model.w1 = pickle.load(file) y_test_pred = model.predict(X_test / 255.) y_test_pred = np.argmax(y_test_pred, axis=0) import pandas_ml as pdml cfm = pdml.ConfusionMatrix(y_test, y_test_pred) print(cfm)
def pdml_confusion_matrix(self): return pdml.ConfusionMatrix(self.Y, self.y_pred)