def test_pandas_confusion_normalized(): y_true = [2, 0, 2, 2, 0, 1, 1, 2, 2, 0, 1, 2] y_pred = [0, 0, 2, 1, 0, 2, 1, 0, 2, 0, 2, 2] cm = ConfusionMatrix(y_true, y_pred) df = cm.to_dataframe() df_norm = cm.to_dataframe(normalized=True) assert(df_norm.sum(axis=1).sum() == len(df))
def test_pandas_confusion_cm_stats_integers(): y_true = [600, 200, 200, 200, 200, 200, 200, 200, 500, 500, 500, 200, 200, 200, 200, 200, 200, 200, 200, 200] y_pred = [100, 200, 200, 100, 100, 200, 200, 200, 100, 200, 500, 100, 100, 100, 100, 100, 100, 100, 500, 200] print("y_true: %s" % y_true) print("y_pred: %s" % y_pred) cm = ConfusionMatrix(y_true, y_pred) assert isinstance(cm.stats(), OrderedDict) cm.print_stats()
def test_pandas_confusion_max_min(): y_true = ['rabbit', 'cat', 'rabbit', 'rabbit', 'cat', 'dog', 'dog', 'rabbit', 'rabbit', 'cat', 'dog', 'rabbit'] y_pred = ['cat', 'cat', 'rabbit', 'dog', 'cat', 'rabbit', 'dog', 'cat', 'rabbit', 'cat', 'rabbit', 'rabbit'] print("y_true: %s" % y_true) print("y_pred: %s" % y_pred) cm = ConfusionMatrix(y_true, y_pred) assert cm.max() == 3 assert cm.min() == 0
def test_pandas_confusion_cm_int(): y_true = [2, 0, 2, 2, 0, 1, 1, 2, 2, 0, 1, 2] y_pred = [0, 0, 2, 1, 0, 2, 1, 0, 2, 0, 2, 2] labels = ["ant", "bird", "cat"] cm = ConfusionMatrix(y_true, y_pred, labels=labels) print("Confusion matrix:\n%s" % cm) asserts(y_true, y_pred, cm) assert cm.len() == len(labels)
def test_pandas_confusion_cm_empty_row(): y_true = [2, 0, 2, 2, 0, 0] y_pred = [0, 0, 2, 2, 1, 2] # cm = ConfusionMatrix(y_true, y_pred) cm = ConfusionMatrix(y_true, y_pred, labels=["ant", "bird", "cat"]) cm = ConfusionMatrix(y_true, y_pred) print("Confusion matrix:\n%s" % cm) asserts(y_true, y_pred, cm)
def test_value_counts(): df = pd.DataFrame({ 'Height': [150, 150, 151, 151, 152, 155, 155, 157, 157, 157, 157, 158, 158, 159, 159, 159, 160, 160, 162, 162, 163, 164, 165, 168, 169, 169, 169, 170, 171, 171, 173, 173, 174, 176, 177, 177, 179, 179, 179, 179, 179, 181, 181, 182, 183, 184, 186, 190, 190], 'Weight': [54, 55, 55, 47, 58, 53, 59, 60, 56, 55, 62, 56, 55, 55, 64, 61, 59, 59, 63, 66, 64, 62, 66, 66, 72, 65, 75, 71, 70, 70, 75, 65, 79, 78, 83, 75, 84, 78, 74, 75, 74, 90, 80, 81, 90, 81, 91, 87, 100], 'Size': ['S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'M', 'M', 'M', 'M', 'M', 'M', 'M', 'M', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'XL', 'XL', 'XL', 'XL', 'XL', 'XL', 'XL', 'XL'], 'SizePred': ['S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'M', 'M', 'M', 'M', 'M', 'M', 'M', 'M', 'M', 'M', 'M', 'M', 'L', 'M', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'XL', 'L', 'L', 'XL', 'L', 'XL', 'XL', 'XL'], }) cm = ConfusionMatrix(df["Size"], df["SizePred"]) assert (cm.true - df.Size.value_counts()).sum() == 0 assert (cm.pred - df.SizePred.value_counts()).sum() == 0 cm.print_stats()
def accuracy_info(self): answerListB = self.testing_answer.tolist() answerList = [answer.index(1) for answer in answerListB] cm = ConfusionMatrix(answerList, self.sess.run(self.predict_op, feed_dict={self.data_placeholder: self.testing_data, self.answer_placeholder: self.testing_answer})) cmData = cm.to_array('a') acc = [cmData[0][0], cmData[1][1], cmData[2][2]] print cm print acc
def train_test_and_evaluate(pipeline, X_train, y_train, X_test, y_test): pipeline.fit(X_train, y_train) y_pred_class = pipeline.predict(X_test) unique_label = np.unique(y_test) matrix = ConfusionMatrix(y_test, y_pred_class, labels=['True Value', 'Predicted Value']) print('-' * 75 + '\nConfusion Matrix\n') print(matrix) print('f1_score', f1_score(y_test, y_pred_class, average="macro")) print('precision', precision_score(y_test, y_pred_class, average="macro")) print('recall', recall_score(y_test, y_pred_class, average="macro")) return pipeline, matrix.to_dataframe(), y_pred_class
def test_pandas_confusion_cm_binarize(): y_true = ['rabbit', 'cat', 'rabbit', 'rabbit', 'cat', 'dog', 'dog', 'rabbit', 'rabbit', 'cat', 'dog', 'rabbit'] y_pred = ['cat', 'cat', 'rabbit', 'dog', 'cat', 'rabbit', 'dog', 'cat', 'rabbit', 'cat', 'rabbit', 'rabbit'] cm = ConfusionMatrix(y_true, y_pred) print("Confusion matrix:\n%s" % cm) select = ['cat', 'dog'] print("Binarize with %s" % select) binary_cm = cm.binarize(select) print("Binary confusion matrix:\n%s" % binary_cm) assert cm.sum() == binary_cm.sum()
def write_score_file(score_file, f1_mean, f1, model, Y_test_indices, y_prediction_classes): with open(score_file, 'w') as score_handler: score_handler.write("Micro-averaged F1: {}\n".format(f1_mean)) score_handler.write("Individual scores: {}\n".format(f1)) score_handler.write("Confusion matrix:\n") score_handler.write(str(ConfusionMatrix(Y_test_indices, y_prediction_classes))) score_handler.write("\n\n\nModel summary\n") model.summary(print_fn=lambda x: score_handler.write(x + '\n'))
def test_pandas_confusion_normalized_issue1(): # should insure issue 1 is fixed # see http://stackoverflow.com/questions/19233771/sklearn-plot-confusion-matrix-with-labels/31720054#31720054 y_true = ['business', 'business', 'business', 'business', 'business', 'business', 'business', 'business', 'business', 'business', 'business', 'business', 'business', 'business', 'business', 'business', 'business', 'business', 'business', 'business'] y_pred = ['health', 'business', 'business', 'business', 'business', 'business', 'health', 'health', 'business', 'business', 'business', 'business', 'business', 'business', 'business', 'business', 'health', 'health', 'business', 'health'] cm = ConfusionMatrix(y_true, y_pred) df = cm.to_dataframe() df_norm = cm.to_dataframe(normalized=True) assert(df_norm.sum(axis=1, skipna=False).fillna(1).sum() == len(df))
def train_test_and_evaluate(pipeline, X_train, y_train, X_test, y_test): pipeline.fit(X_train, y_train) y_pred_class = pipeline.predict(X_test) confusion_matrix = ConfusionMatrix(list(y_test), list(y_pred_class)) display_accuracy_difference(y_test, y_pred_class) classification_report = confusion_matrix.classification_report print '-' * 75 + '\nConfusion Matrix\n' print confusion_matrix print '-' * 75 + '\nClassification Report\n' print classification_report return pipeline, confusion_matrix
def evaluateModel(pp, preds, fold): # ************ PROCESSING THE PREDICTIONS preds[preds >= 0.5] = 1 preds[preds < 0.5] = 0 print("F1 SCORE:") print(f1_score(pp.test_y, preds, average=None)) print("Hamming Loss:") print(hamming_loss(pp.test_y, preds)) print("Zero-one loss:") print(zero_one_loss(pp.test_y, preds)) # I reach here in plus_one_hot_encode, I want to transform it in one hot y_test_preds = utils.from_plus_to_one_hot(np.array(pp.test_y)) preds_transf = utils.from_plus_to_one_hot(np.array(preds)) cm = ConfusionMatrix( np.array(y_test_preds).argmax(1), np.array(preds_transf).argmax(1)) print(cm) ax = cm.plot() ax.set_xticklabels(classes, rotation="vertical") ax.set_yticklabels(classes) plt.savefig("cmpre{0}.png".format(fold))
def number_pred(save, show): y_true = [600, 200, 200, 200, 200, 200, 200, 200, 500, 500, 500, 200, 200, 200, 200, 200, 200, 200, 200, 200] y_pred = [100, 200, 200, 100, 100, 200, 200, 200, 100, 200, 500, 100, 100, 100, 100, 100, 100, 100, 500, 200] cm = ConfusionMatrix(y_true, y_pred) # print(cm.binarize(100).P) # cm.enlarge(300) # cm.enlarge([300, 400]) print(cm) cm.plot() filename = 'numbers.png' if save: plt.savefig(os.path.join(basepath, '..', 'screenshots', filename)) if show: plt.show() # print("") # print(cm.classes) # print("") # cm.print_stats(None) cm.print_stats()
def test_pandas_confusion_get(): y_true = ['rabbit', 'cat', 'rabbit', 'rabbit', 'cat', 'dog', 'dog', 'rabbit', 'rabbit', 'cat', 'dog', 'rabbit'] y_pred = ['cat', 'cat', 'rabbit', 'dog', 'cat', 'rabbit', 'dog', 'cat', 'rabbit', 'cat', 'rabbit', 'rabbit'] print("y_true: %s" % y_true) print("y_pred: %s" % y_pred) cm = ConfusionMatrix(y_true, y_pred) assert cm.get("cat") == cm.get("cat", "cat") assert cm.get("cat") == 3 assert cm.get("dog") == 1 assert cm.get("rabbit") == 3 assert cm.get("dog", "rabbit") == 2
def get_confusion_matrix(_results_file): df = pd.read_csv(results_file, sep='\t', header=None) true_lbls = df[1] pred_lbls = df[2] confusion_matrix = ConfusionMatrix(true_lbls, pred_lbls) confusion_matrix.plot() cm_file = _results_file.replace('.txt', '_cm.jpg') plt.savefig(cm_file) print() print(confusion_matrix) print() cm = confusion_matrix.to_dataframe() correct = 0 for i in range(cm.shape[0]): correct += cm.iloc[i][i] recall = cm.iloc[i][i] * 100 / cm.sum(axis=0)[i] prec = cm.iloc[i][i] * 100 / cm.sum(axis=1)[i] print('Class %s recall = %.4f precision = %.4f' % (cm.columns[i], recall, prec)) print('Overall accuracy = %.4f' % float(correct * 100 / sum(cm.sum(axis=0))))
def eval_fish_net(weights, test_iters=10): prediction = [] test_net = caffe.Net(fish_net(train=False), weights, caffe.TEST) accuracy = 0 for i in range(0,30): rainbow = test_net.forward()['probs'][i][0] blue = test_net.forward()['probs'][i][1] chain = test_net.forward()['probs'][i][2] if (rainbow> blue) and (rainbow> chain): prediction.append("rainbow_trout") if (blue> rainbow) and (blue > chain): prediction.append("bluegill") if (chain> blue) and (chain > rainbow): prediction.append("chain_pickerel") for it in xrange(test_iters): accuracy += test_net.forward()['acc'] accuracy /= test_iters confusion_matrix = ConfusionMatrix(actual, prediction) print("Confusion matrix:\n%s" % confusion_matrix) return test_net, accuracy
def size_pred(save, show): df = pd.DataFrame({ 'Height': [150, 150, 151, 151, 152, 155, 155, 157, 157, 157, 157, 158, 158, 159, 159, 159, 160, 160, 162, 162, 163, 164, 165, 168, 169, 169, 169, 170, 171, 171, 173, 173, 174, 176, 177, 177, 179, 179, 179, 179, 179, 181, 181, 182, 183, 184, 186, 190, 190], 'Weight': [54, 55, 55, 47, 58, 53, 59, 60, 56, 55, 62, 56, 55, 55, 64, 61, 59, 59, 63, 66, 64, 62, 66, 66, 72, 65, 75, 71, 70, 70, 75, 65, 79, 78, 83, 75, 84, 78, 74, 75, 74, 90, 80, 81, 90, 81, 91, 87, 100], 'Size': ['S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'M', 'M', 'M', 'M', 'M', 'M', 'M', 'M', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'XL', 'XL', 'XL', 'XL', 'XL', 'XL', 'XL', 'XL'], 'SizePred': ['S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'M', 'M', 'M', 'M', 'M', 'M', 'M', 'M', 'M', 'M', 'M', 'M', 'L', 'M', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'XL', 'L', 'L', 'XL', 'L', 'XL', 'XL', 'XL'], }) cm = ConfusionMatrix(df["Size"], df["SizePred"]) print(cm) cm.print_stats() cm.plot() filename = 'size.png' if save: plt.savefig(os.path.join(basepath, '..', 'screenshots', filename)) if show: plt.show()
'business', 'health', 'health', 'business', 'business', 'business',\ 'business', 'business', 'business', 'business', 'business',\ 'health', 'health', 'business', 'health'] """ y_test = [2, 1, 1, 5, 5, 2, 7, 5, 3, 3, 1, 1, 5, 1, 3, 7, 5, 7, 3, 2, 1, 7, 1,\ 3, 2, 5, 2, 7, 2, 1, 7, 3, 3, 3, 7, 5, 7, 5, 7, 1, 7, 2, 5, 1, 2, 2, 1, 7, 5,\ 3, 7, 3, 7, 3, 3, 2, 7, 3, 1, 7, 2, 1, 7, 5, 7, 3, 2, 5, 1, 2, 3, 2, 7, 7, 3,\ 7, 1, 3, 5, 1, 7, 1, 7, 1, 7, 7, 5, 3, 7, 2, 1, 5, 7, 1, 3, 7, 2, 5, 2, 1, 3,\ 5, 2, 5, 2, 5, 3, 1, 7, 3, 1, 2, 3, 2, 5, 5, 7, 1, 1, 3, 5, 2, 3, 7, 7] y_pred = [2, 5, 1, 5, 5, 2, 7, 5, 3, 3, 1, 1, 5, 1, 3, 7, 5, 7, 3, 2, 1, 7, 1,\ 3, 2, 5, 2, 5, 2, 1, 7, 3, 3, 3, 7, 5, 7, 5, 3, 5, 7, 2, 5, 1, 2, 2, 1, 7, 5,\ 3, 7, 3, 7, 3, 3, 2, 7, 3, 1, 7, 2, 1, 7, 5, 7, 3, 2, 5, 1, 2, 3, 2, 7, 7, 3,\ 7, 1, 2, 5, 1, 7, 3, 7, 3, 7, 7, 5, 3, 7, 2, 1, 5, 5, 1, 3, 1, 2, 5, 2, 1, 3,\ 5, 2, 5, 2, 1, 3, 2, 7, 3, 1, 2, 1, 2, 5, 5, 7, 1, 1, 3, 5, 2, 3, 7, 7] """ y_test = [600, 200, 200, 200, 200, 200, 200, 200, 500, 500, 500, 200, 200,200,\ 200, 200, 200, 200, 200, 200] y_pred = [100, 200, 200, 100, 100, 200, 200, 200, 100, 200, 500, 100, 100,100,\ 100, 100, 100, 100, 500, 200] """ #print type(y_pred) #test_str = map(str,y_test) #pred_str = map(str,y_pred) cm = ConfusionMatrix(y_test, y_pred) #cm = ConfusionMatrix(test_str, pred_str) print cm cm.print_stats()
def benchmark(subtask, config_parameters): t0 = time.time() logger = logging.getLogger() np.random.seed(14021993) # Step 1) Load dataset train_path = 'data/THF/sentence/subtask{}_v3_train.json'.format(subtask) test_path = 'data/THF/sentence/subtask{}_v3_test.json'.format(subtask) number_of_classes = 2 if subtask == 'A' else 3 # config_parameters['number_of_classes'] = number_of_classes embedding_cache = None if config_parameters['embeddings_cache_name']: embedding_cache_path = 'data/embedding_cache/{}'.format(config_parameters['embeddings_cache_name']) logger.info('Loading embedding cache: {}'.format(embedding_cache_path)) embedding_cache = pickle.load(open(embedding_cache_path, "rb")) logger.info('Embedding cache loaded') logger.debug('Create mapping') word_to_index_mapping, index_to_embedding_mapping = vocabulary_builder.create_mappings(train_path=train_path, test_path=test_path, word_to_embedding_cache=embedding_cache) logger.info('The embedding layer has {} entries'.format(len(word_to_index_mapping))) logger.debug('Loading train and test set') X_train, Y_train, train_unique_ids, Y_train_indices = load_dataset(train_path, word_to_index_mapping, subtask, config_parameters['padding_length']) X_test, Y_test, test_unique_ids, Y_test_indices = load_dataset(test_path, word_to_index_mapping, subtask, config_parameters['padding_length']) # Step 2) Create model with parameters model_parameters = config_parameters['keras_model_parameters'] logger.info(config_parameters) if config_parameters['embeddings_cache_name']: model_parameters['index_to_embedding_mapping'] = index_to_embedding_mapping model = model_selector.get_model(config_parameters['keras_model_name'], number_of_classes, model_parameters) # Step 3) Train the model logger.info('Train...') current_time = time.strftime('%Y%m%d_%H%M%S') model_save_path = 'results/sentence_deeplearning/temp/{}_{}_{}_{}'.format(subtask, config_parameters['keras_model_name'], '{:03}'.format( config_parameters['evaluation_ID']), current_time) checkpoint_save_path = model_save_path + "_best.hdf5" checkpoint = ModelCheckpoint(checkpoint_save_path, monitor='val_acc', verbose=0, save_best_only=True, mode='auto') model.fit(X_train, Y_train, batch_size=config_parameters['batch_size'], epochs=config_parameters['epochs'], verbose=1, callbacks=[checkpoint], validation_data=(X_test, Y_test)) # Step 4) Save the last model model.save(model_save_path + "_last.hdf5") model.summary() # Calculate results for the best and the last model saved_models = [{'name': 'best', 'extension': '_best.hdf5'}, {'name': 'last', 'extension': '_last.hdf5'}] for saved_model in saved_models: model_load_path = model_save_path + saved_model['extension'] logger.info('Loading model for prediction: {}'.format(model_load_path)) model = load_model(model_load_path) # Step 5) Predict the test set score, acc = model.evaluate(X_test, Y_test, batch_size=config_parameters['batch_size']) y_prediction = model.predict(X_test, batch_size=config_parameters['batch_size']) y_prediction_classes = np.argmax(y_prediction, axis=1) # Step 6) Print results logger.info(y_prediction_classes) logger.info('Test score: {}'.format(score)) logger.info('Test accuracy: {}'.format(acc)) f1 = f1_score(Y_test_indices, y_prediction_classes, average=None) f1_mean = np.mean(f1) logger.info("Macro-averaged F1: {}".format(f1_mean)) logger.info("Individual scores: {}".format(f1)) logger.info("Confusion matrix:") logger.info(ConfusionMatrix(Y_test_indices, y_prediction_classes)) output_path_base = 'results/sentence_deeplearning/temp/{}_{}_{}_{}_{}'.format(subtask, config_parameters[ 'keras_model_name'], '{:03}'.format(config_parameters[ 'evaluation_ID']), current_time, saved_model['name']) # Step 7) Print results to the file system utils.write_prediction_file(path=output_path_base + '.predictions', test_unique_ids=test_unique_ids, Y_test_indices=Y_test_indices, y_prediction_classes=y_prediction_classes) utils.write_score_file(score_file=output_path_base + '.score', f1_mean=f1_mean, f1=f1, model=model, Y_test_indices=Y_test_indices, y_prediction_classes=y_prediction_classes) print("Total execution time in %0.3fs" % (time.time() - t0)) print("*****************************************")
mean_fare = data_test_clean['Fare'].dropna().mean() data_test_clean.iloc[:, 5][data_test_clean.iloc[:, 5].isnull()] = mean_fare # data_test_clean = data_test_clean.dropna() data_test_clean['Age'] = data_test_clean['Age'].astype('int64') data_test_clean['Fare'] = data_test_clean['Fare'].astype('int64') data_test_clean['Sex'] = lb.fit_transform(data_test_clean['Sex']) data_test_clean['Embarked'] = lb2.fit_transform(data_test_clean['Embarked']) # LOGISTIC REGRESSION logr = LogisticRegression(class_weight='balanced') logr = logr.fit(x_train, y_train) logr_predictions_train = logr.predict(x_train) accuracy_train = accuracy_score(y_train, logr_predictions_train) print accuracy_train #0.78675 cm1 = ConfusionMatrix(y_train, logr_predictions_train) # cm1.print_stats() logr_predictions_test = logr.predict(data_test_clean) data_test_id = data_test['PassengerId'].values prediction_file = open("RESULTS_LR4.csv", "wb") prediction_file_object = csv.writer(prediction_file) prediction_file_object.writerow(["PassengerId", "Survived"]) print 'This needs to be 418: ', len(data_test_id) print 'This needs to be 418: ', len(logr_predictions_test) prediction_file_object.writerows(zip(data_test_id, logr_predictions_test)) prediction_file.close() #Accuracy = 0.73206 # REGULARIZATION - search for the best parameter
def main(): filename = "mushroom.csv" idtree = id3_tree.id3_tree() ''' split the data into data features, output classes and label names. ''' data, classes, feature_names = idtree.read_data(filename) data, classes = shuffle(data, classes, random_state = 0) target_label = dict(Counter(classes)) print "target datase contains Poisonous: %d and Edible: %d" % (target_label.values()[0], target_label.values()[1]) ''' Split mushroom data into training and test data set 3/4 training and 1/4 test dataset ''' ntrain = int(0.75 * np.shape(data)[0]) train_data = data[:ntrain] test_data = data[ntrain:] train_target = classes[:ntrain] test_target = classes[ntrain:] print "\n\nThe data split into train data: (%s, %s)" % np.shape(train_data) print "and Test dataset: : (%s, % s) \n\n" % np.shape(test_data) feature_info_gain = [] feature = np.shape(feature_names)[0] feature_entropy = [] for i in range(feature): feature_entropy = idtree.calc_information_gain(data, classes, i) for i in range(feature): feature_info_gain.append([feature_names[i],feature_entropy[i]]) print "Information gain for the whole features in mushroom dataset\n" df = pd.DataFrame(feature_info_gain) print tabulate(feature_info_gain, headers=("Feature Name", "Information Gain"), tablefmt="orgtbl") + "\n \n \n " ''' train the dataset and creating tree from the training data ''' mtree = idtree.make_tree(train_data, train_target, feature_names,maxlevel=4) idtree.printTree(mtree,'') predicted_output = idtree.classifyAll(mtree, test_data) confusion_matrix = ConfusionMatrix(test_target, predicted_output) print("\nConfusion matrix:\n\n%s" % confusion_matrix) accuracyScore = accuracy_score(test_target, predicted_output) print "\nAccurecy Score: %0.2f " % (accuracyScore * 100) + "%" ''' calculating the precision and recall from the confusion matrix ''' precision, recall = calc_precision_recall(test_target, predicted_output) print "\nprecision and recall of the classifier is: \nPrecision %0.2f \nRecall %0.4f " % (precision, recall) print accuracyscores, perf_measure = early_stopping(train_data, train_target, test_data,test_target, feature_names) print "Performance of the classifier with early stopping" print tabulate(perf_measure[:6], headers=( "TP", "FP", "TN", "FN"), tablefmt="orgtbl") + "\n \n \n "
def test_pandas_confusion_cm_strings(): y_true = ['rabbit', 'cat', 'rabbit', 'rabbit', 'cat', 'dog', 'dog', 'rabbit', 'rabbit', 'cat', 'dog', 'rabbit'] y_pred = ['cat', 'cat', 'rabbit', 'dog', 'cat', 'rabbit', 'dog', 'cat', 'rabbit', 'cat', 'rabbit', 'rabbit'] cm = ConfusionMatrix(y_true, y_pred) print("Confusion matrix:\n%s" % cm) asserts(y_true, y_pred, cm)
##Import Test data created on R gname = "NewsDataTest.csv" full_file_test = path + "/" + gname data_test = np.loadtxt(full_file_test, delimiter=",", skiprows=1, usecols=tuple(cols[1:])) X_test = data_test[:, 1:-2] y_test = data_test[:, -1] #I set up a model with prior weights logistic = linear_model.LogisticRegression(class_weight='balanced') logistic = logistic.fit(X, y) y_predicted = logistic.predict(X_test) #predicted class cm = ConfusionMatrix(y_test, y_predicted) cm.print_stats() acc = accuracy_score(y_test, y_predicted) #print(classification_report(y_test, y_predicted)) cmatrix = confusion_matrix(y_test, y_predicted) ROI = cmatrix[1, 1] * 100 + cmatrix[0, 0] * 15 + cmatrix[0, 1] * ( -15) + cmatrix[1, 0] * (-30) #I then tried regularization (Using grid search to search for the most optimal regularization parameter c) param_grid = {'C': [0.001, 0.01, 0.1, 1, 10, 100, 1000]} clf = GridSearchCV(LogisticRegression(penalty='l2'), param_grid) GridSearchCV(cv=None, estimator=LogisticRegression(C=1.0, intercept_scaling=1, dual=False, fit_intercept=True,
if __name__ == '__main__': flag = 2 # save_feature() df = pd.read_csv(path.join(feature_data), header=0) if flag == 1: features = df[df.columns.difference(['label', 'app'])].values labels = df['label'].values X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.2, random_state=42) clf = LinearSVC(C=1.0, loss='squared_hinge', penalty='l2',multi_class='ovr') clf.fit(X_train, y_train) y_pred = clf.predict(X_test) print("Accuracy: {0:0.1f}%".format(accuracy_score(y_test, y_pred) * 100)) confusion_matrix = ConfusionMatrix(y_test, y_pred, display_sum=False) print(confusion_matrix) elif flag ==2: cates = ['other', 'video', 'live', 'audio', 'radio'] for cate in cates: total_num = 0.0 wright_num = 0 accuracy_list = [] apps = np.unique(df[df.label == cate]['app'].values) for i, app in enumerate(apps): df_train = df[df.app != app] df_test = df[df.app == app] x_train, y_train = df_train[df.columns.difference(['label', 'app'])].values, df_train['label'].values x_test, y_test = df_test[df.columns.difference(['label', 'app'])].values, df_test['label'].values clf = Pipeline([('std', StandardScaler()), ('clf', SVC())]) clf.fit(x_train, y_train)
def main(): path=sys.argv[1] with open(path) as f: config=json.load(f) batch_size=int(config['batch_size']) nb_classes=int(config['nb_classes']) weight_path=config['weights'] #####################First level of Classification ################################ ##### load model model=None model=new.load_model(nb_classes,weight_path) ####### specify the loss function sgd = SGD(lr=0.00005, decay = 1e-5, momentum=0.99, nesterov=True) #sgd = SGD(lr=0.00005, decay = 1e-6, momentum=0.9, nesterov=True) model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy']) #model.compile(loss='sparse_categorical_crossentropy', optimizer=sgd, metrics=[metrics.mae, metrics.sparse_categorical_accuracy]) ######## load data test={} with open(config['data_path']+'/'+config['dataset_name']+'.test','rb') as f: test=pickle.load(f) x_test,y_test,imgname=test['data'],test['labels'],test['imgname'] x_ts = x_test.reshape((-1,227,227,1)) print(x_ts.shape, 'test samples') print(y_test.shape, 'test sample labels') ##### evalution and prediction and confusion matrix formation scores=model.evaluate(x_ts,y_test,batch_size=batch_size,verbose=0) print("model %s: %.2f%%" % (model.metrics_names[1], scores[1]*100)) prediction= model.predict_classes(x_ts,verbose=1) #print(prediction) np.save('prediction.npy', prediction) pre=np.array(prediction) pre=MultiLabelBinarizer().fit_transform(pre.reshape(-1, 1)) orig=y_test print('') print('') print('score for first level classification: ',scores) ''' count = 0 for i in range(0,len(pre)): if not np.array_equal(orig[i],pre[i]): print(imgname[i],"_",orig[i],"_",pre[i],"_False") count = count + 1 print (count) ''' aa=[0,1] aa = np.array(aa) print('') print('') print(MultiLabelBinarizer().fit_transform(aa.reshape(-1, 1))) print("0-Nontumor 1-Tumor") a=[0,1] a=np.array(a) b=[1,0] b=np.array(b) y_true = [] y_pred = [] print(range(len(prediction))) for i in range(len(prediction)): if np.array_equal(orig[i],a): y_true.append(1) elif np.array_equal(orig[i],b): y_true.append(0) for i in range(len(prediction)): if np.array_equal(pre[i],a): y_pred.append(1) elif np.array_equal(pre[i],b): y_pred.append(0) cm = ConfusionMatrix(y_true, y_pred) print('') print('') print('*****************************Confusion Matrix for first level Classification****************************') print(cm) print('') print('') ############################ Second level Classification ############################### '''
def test_pandas_confusion_cm_stats_animals(): y_true = ['rabbit', 'cat', 'rabbit', 'rabbit', 'cat', 'dog', 'dog', 'rabbit', 'rabbit', 'cat', 'dog', 'rabbit'] y_pred = ['cat', 'cat', 'rabbit', 'dog', 'cat', 'rabbit', 'dog', 'cat', 'rabbit', 'cat', 'rabbit', 'rabbit'] print("y_true: %s" % y_true) print("y_pred: %s" % y_pred) cm = ConfusionMatrix(y_true, y_pred) assert isinstance(cm.stats(), OrderedDict) assert cm.population == len(y_true) # 12 cm.print_stats() cm_stats = cm.stats() # noqa assert cm.binarize("cat").TP == cm.get("cat") # cm.get("cat", "cat") assert cm.binarize("cat").TP == 3 assert cm.binarize("dog").TP == cm.get("dog") # 1 assert cm.binarize("rabbit").TP == cm.get("rabbit") # 3
plt.plot(c, cv_scores, '-o') plt.xscale('log') # In[ ]: predicted = clf.predict(X_test) expected = y_test print(accuracy_score(expected, predicted)) # In[ ]: predicted_probs = clf.predict_proba(X_test) print(log_loss(y_test, predicted_probs)) # In[ ]: cm = ConfusionMatrix(expected, predicted) cm_stats = cm.to_dataframe().apply(lambda x: x/sum(x), axis=1) cm_stats.to_csv('data/confusion_matrix_stats.csv') # In[ ]: mpl.rcParams['figure.figsize'] = (10.0, 5.0) cm.plot(normalized=True)
from pandas_confusion import ConfusionMatrix cm=ConfusionMatrix(y_test,pred) cm.plot(cmap=c.get_cmap('PuBu'))
# collect perceptron votes to build confusion matrix # collect_votes runs perceptron for instances of letters in the testing data set # returns the winning letter by vote to store into predicted predicted = collect_votes(letter) #print letter.value[0], predicted # append to confusion matrix using pandas y_pred = y_pred.append(pd.Series(predicted, index=[letter_increment])) y_actu = y_actu.append(pd.Series(letter.value[0], index=[letter_increment])) # append pandas_confusion y_pred_stats.append(predicted) y_actu_stats.append(letter.value[0]) # increment counter for next letter letter_increment += 1 # make confusion matrix using pandas df_confusion = pd.crosstab(y_actu, y_pred, rownames=['Actual'], colnames=['Predicted'], margins=True) print df_confusion # make confusion matrix and print stats using pandas_confusion cm = ConfusionMatrix(y_actu_stats, y_pred_stats) # print("Confusion matrix:\n%s" % cm) cm.print_stats()
votes = dict.fromkeys(alpha, 0) # reset dictionary for next test case y.append(count) # add classfication to array for confusion matrix if count == tcf.index[i]: #if the vote matches the known value of the target incremement correct t_correct += 1 # *************************************************************************************************** # **************************** Creates and Displays Confusion Matrix ******************************** # *************************************************************************************************** # uses pandas_confusion library to generate confusion matrix print '\n\nConfusion Matrix:\n\n' print '\tAccuracy is: ', m.ceil(float(t_correct) / 10000 * 100), '\n\n' y_actul = pd.Series(y_true, name='Actual') y_pred = pd.Series(y, name='Predicted') confusion1 = ConfusionMatrix(y_actul, y_pred) # confusion1.print_stats() confusion2 = pd.crosstab(y_actul, y_pred, rownames=['Actual'], colnames=['Predicted'], margins=True) print confusion2 print confusion_matrix(y_actul, y_pred)
from sklearn.metrics import f1_score import numpy as np from pandas_confusion import ConfusionMatrix with open('C_baseline.predictions') as file: y_true = [] y_pred = [] next(file) for line in file: row = line.replace('\n', '').split('\t') y_true.append(row[1]) y_pred.append(row[2]) f1 = f1_score(y_true, y_pred, average=None) f1_mean = np.mean(f1) print("Micro-averaged F1: {}".format(f1_mean)) print("Individual scores: {}".format(f1)) print("Confusion matrix:") print(ConfusionMatrix(y_true, y_pred))
def main(save, show): basepath = os.path.dirname(__file__) # y_true = [2, 0, 2, 2, 0, 1, 1, 2, 2, 0, 1, 2] # y_pred = [0, 0, 2, 1, 0, 2, 1, 0, 2, 0, 2, 2] # cm = ConfusionMatrix(y_true, y_pred) # cm = ConfusionMatrix(y_true, y_pred, labels=["ant", "bird", "cat"]) # y_true = [2, 0, 2, 2, 0, 1] # y_pred = [0, 0, 2, 2, 0, 2] # cm = ConfusionMatrix(y_true, y_pred) # cm = ConfusionMatrix(y_true, y_pred, labels=["ant", "bird", "cat"]) y_true = ['rabbit', 'cat', 'rabbit', 'rabbit', 'cat', 'dog', 'dog', 'rabbit', 'rabbit', 'cat', 'dog', 'rabbit'] y_pred = ['cat', 'cat', 'rabbit', 'dog', 'cat', 'rabbit', 'dog', 'cat', 'rabbit', 'cat', 'rabbit', 'rabbit'] cm = ConfusionMatrix(y_true, y_pred) # y_true = ["cat", "ant", "cat", "cat", "ant", "bird"] # y_pred = ["ant", "ant", "cat", "cat", "ant", "cat"] # >>> cm(y_true, y_pred, labels=["ant", "bird", "cat"]) # array([[2, 0, 0], # [0, 0, 1], # [1, 0, 2]]) # cm = ConfusionMatrix(y_true, y_pred) print("Confusion matrix:\n%s" % cm) df = cm.to_dataframe() print(df) print(df.dtypes) cm.plot() filename = 'cm.png' if save: plt.savefig(os.path.join(basepath, '..', 'screenshots', filename)) if show: plt.show() cm.plot(normalized=True) filename = 'cm_norm.png' if save: plt.savefig(os.path.join(basepath, '..', 'screenshots', filename)) if show: plt.show() cm.print_stats() print(cm.classification_report) print("sklearn confusion_matrix:\n%s" % confusion_matrix(y_true, y_pred)) print(classification_report(y_true, y_pred)) # stat = 'precision' # print(cm._avg_stat(stat)) # print(cm.ACC) # import seaborn as sns # cm.plot(normalized=True, backend=Backend.Seaborn) # sns.plt.show() print("Binarize a confusion matrix") y_true = ["cat", "ant", "cat", "cat", "ant", "bird"] y_pred = ["ant", "ant", "cat", "cat", "ant", "cat"] cm = ConfusionMatrix(y_true, y_pred) print(cm) binary_cm = cm.binarize(['ant', 'cat']) # A bird is not a "land_animal" print(binary_cm)
neigh = neigh.fit(X, y) y_predicted_train = neigh.predict_proba(X) #predicted class for training set #obtain optimal probability threshold for classification maxrev = 0 final_threshold = 0.5 for x in xrange(1, 100): thresh = 0.01 * x predicted_y_train = np.array( [1 if x > thresh else 0 for x in list(y_predicted_train[:, 1])]) cmatrix = confusion_matrix(y, predicted_y_train) newROI = cmatrix[1, 1] * 100 + cmatrix[0, 0] * 15 + cmatrix[0, 1] * ( -15) + cmatrix[1, 0] * (-30) if newROI > maxrev: maxrev = newROI final_threshold = thresh y_predicted_test = neigh.predict_proba( X_test) #predicted probability for test set predicted_y_test = np.array([ 1 if x > final_threshold else 0 for x in list(y_predicted_test[:, 1]) ]) #apply threshold to classify the test set #obtain relevant statistics cm = ConfusionMatrix(y_test, predicted_y_test) cm.print_stats() acc = accuracy_score(y_test, predicted_y_test) cmatrix = confusion_matrix(y_test, predicted_y_test) ROI = cmatrix[1, 1] * 100 + cmatrix[0, 0] * 15 + cmatrix[0, 1] * ( -15) + cmatrix[1, 0] * (-30)
# for every letter, run through every perceptron # and record votes for which letter perceptron returns for letter in letters_list_testing: # text = "\rTesting instance "+str((letter_increment)+1)+"/"+str(len(letters_list_testing)) # sys.stdout.write(text) # collect perceptron votes to build confusion matrix # collect_votes runs perceptron for instances of letters in the testing data set # returns the winning letter by vote to store into predicted predicted = collect_votes(letter) #print letter.value[0], predicted # append to confusion matrix using pandas y_pred = y_pred.append(pd.Series(predicted, index=[letter_increment])) y_actu = y_actu.append(pd.Series(letter.value[0], index=[letter_increment])) # append pandas_confusion y_pred_stats.append(predicted) y_actu_stats.append(letter.value[0]) # increment counter for next letter letter_increment += 1 # make confusion matrix using pandas df_confusion = pd.crosstab(y_actu, y_pred, rownames=['Actual'], colnames=['Predicted'], margins=True) print df_confusion # make confusion matrix and print stats using pandas_confusion cm = ConfusionMatrix(y_actu_stats, y_pred_stats) # print("Confusion matrix:\n%s" % cm) cm.print_stats()