예제 #1
0
def evaluate(model, name, test_set_x, test_set_y):
    predictions_valid = model.predict(test_set_x)
    evaluation.plot_confusion_matrix(test_set_y, predictions_valid, name)
    evaluation.plot_confusion_matrix(test_set_y,
                                     predictions_valid,
                                     name,
                                     normalize=True)
    accuracy = accuracy_score(test_set_y, predictions_valid)
    return accuracy
예제 #2
0
def visualize_results(results, test_set_y):
    for result in results:
        evaluation.plot_confusion_matrix(
            test_set_y, result["results"]["predictions_valid"], result["name"])
        evaluation.plot_confusion_matrix(
            test_set_y,
            result["results"]["predictions_valid"],
            result["name"],
            normalize=True)
예제 #3
0
def evaluate(model: Model, name: str, plot_title: str, test_set_x, test_set_y,
             batch_size: int, cnn: bool):
    test_set_y_encoded = to_categorical(test_set_y)
    if cnn:
        test_set_x = np.expand_dims(test_set_x, axis=2)
    loss, accuracy = model.evaluate(test_set_x,
                                    test_set_y_encoded,
                                    batch_size=batch_size,
                                    verbose=2)
    y_pred_encoded = model.predict(test_set_x)
    y_pred = predicted_to_label(y_pred_encoded)
    evaluation.plot_confusion_matrix(test_set_y,
                                     y_pred,
                                     name,
                                     title=plot_title)
    evaluation.plot_confusion_matrix(test_set_y,
                                     y_pred,
                                     name,
                                     title=plot_title,
                                     normalize=True)

    return loss, accuracy
예제 #4
0
def train_neural_network(x_train, train_labels, x_test, orig_test):
    """
  Trains neural network ready-to-use dataframes
  Args:
  X_train: train dataset
  train_labels: train labels
  X_test: test dataset
  """
    train_features = np.array(x_train)
    test_features = np.array(x_test)
    train_labels = np.array(train_labels['Col2'])

    model = models.make_model(params=train_features,
                              model_name='neural_network_1')

    checkpoint_cb, tensorboard_cb = models.callbacks(
        model_name='nn_submission03_s_1_m1_f_2165.ckpt')
    epochs = 6
    batch_size = 32

    history = model.fit(train_features,
                        train_labels,
                        batch_size=batch_size,
                        epochs=epochs,
                        callbacks=[checkpoint_cb, tensorboard_cb]
                        #     validation_data=(val_features, val_labels)
                        )

    evaluation.evaluation(model, train_features, train_labels)
    evaluation.plot_metrices(epochs, history, if_val=False)
    evaluation.plot_confusion_matrix(model, train_features, train_labels)
    evaluation.submission_nn(
        model=model,
        test_features=test_features,
        orig_test_df=orig_test,
        submission_name='nn_submission03_s_1_m1_f_2165.csv')
                ]  #it should be normal and abnormal for linux machines

#print classification report
print(
    classification_report(Y_valid.argmax(axis=-1),
                          y_pred.argmax(axis=-1),
                          target_names=target_names))

# Compute confusion matrix
cnf_matrix = confusion_matrix(Y_valid.argmax(axis=-1), y_pred.argmax(axis=-1))
np.set_printoptions(precision=4)

# Plot non-normalized confusion matrix
plt.figure(figsize=(15, 10), dpi=300)
plot_confusion_matrix(cnf_matrix,
                      classes=target_names,
                      title='Confusion matrix, without normalization')

# Plot normalized confusion matrix
plt.figure()
plot_confusion_matrix(cnf_matrix,
                      classes=target_names,
                      normalize=True,
                      title='Normalized confusion matrix')

plt.show()
###############################################################################
# transfer it back
y_pred = np.argmax(y_pred, axis=1)
Y_valid = np.argmax(Y_valid, axis=1)
print(y_pred)
예제 #6
0
print(y_pred)
print(Y_test)

np.savetxt('custom_model_y_pred.csv',y_pred,fmt='%i',delimiter = ",")
np.savetxt('custom_model_Y_test.csv',Y_test,fmt='%i',delimiter = ",")
#################compute confusion matrix######################################

#plot the confusion matrix
target_names = ['class 0(abnormal)', 'class 1(normal)']
print(classification_report(Y_test,y_pred,target_names=target_names))
print(confusion_matrix(Y_test,y_pred))
cnf_matrix = (confusion_matrix(Y_test,y_pred))
np.set_printoptions(precision=4)
plt.figure(figsize=(20,10), dpi=300)
# Plot non-normalized confusion matrix
plot_confusion_matrix(cnf_matrix, classes=target_names,
                  title='Confusion matrix')
plt.show()
###############################################################################
# visualizing losses and accuracy
train_loss=hist.history['loss']
val_loss=hist.history['val_loss']
train_acc=hist.history['acc']
val_acc=hist.history['val_acc']
xc=range(num_epoch)

plt.figure(1,figsize=(20,10), dpi=300)
plt.plot(xc,train_loss)
plt.plot(xc,val_loss)
plt.xlabel('num of Epochs')
plt.ylabel('loss')
plt.title('train_loss vs val_loss')
예제 #7
0
파일: run.py 프로젝트: jason4521/malaria
def evaluate(hist, pred, truth):
    # compute the ROC-AUC values
    fpr = dict()
    tpr = dict()
    roc_auc = dict()
    for i in range(param['classes']):
        fpr[i], tpr[i], _ = roc_curve(truth[:, i], pred[:, i])
        roc_auc[i] = auc(fpr[i], tpr[i])

    # Compute micro-average ROC curve and ROC area
    fpr["micro"], tpr["micro"], _ = roc_curve(truth.ravel(), pred.ravel())
    roc_auc["micro"] = auc(fpr["micro"], tpr["micro"])

    # Plot ROC curves
    plt.figure(figsize=(15, 10), dpi=300)
    lw = 1
    plt.plot(fpr[1],
             tpr[1],
             color='red',
             lw=lw,
             label='ROC curve (area = %0.4f)' % roc_auc[1])
    plt.plot([0, 1], [0, 1], color='black', lw=lw, linestyle='--')
    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title('Receiver operating characteristics')
    plt.legend(loc="lower right")
    plt.show()

    # computhe the cross-entropy loss score
    score = log_loss(truth, pred)
    print(score)

    # compute the average precision score
    prec_score = average_precision_score(truth, pred)
    print(prec_score)

    # compute the accuracy on validation data
    test_accuracy = accuracy_score(truth.argmax(axis=-1), pred.argmax(axis=-1))
    print("Test_Accuracy = ", test_accuracy)

    # declare target names
    target_names = ['class 0(abnormal)', 'class 1(normal)'
                    ]  # it should be normal and abnormal for linux machines

    # print classification report
    print(
        classification_report(truth.argmax(axis=-1),
                              pred.argmax(axis=-1),
                              target_names=target_names))

    # Compute confusion matrix
    cnf_matrix = confusion_matrix(truth.argmax(axis=-1), pred.argmax(axis=-1))
    np.set_printoptions(precision=4)

    # Plot non-normalized confusion matrix
    plt.figure(figsize=(15, 10), dpi=300)
    plot_confusion_matrix(cnf_matrix,
                          classes=target_names,
                          title='Confusion matrix, without normalization')

    # Plot normalized confusion matrix
    plt.figure()
    plot_confusion_matrix(cnf_matrix,
                          classes=target_names,
                          normalize=True,
                          title='Normalized confusion matrix')

    plt.show()
    # transfer it back
    pred = np.argmax(pred, axis=1)
    truth = np.argmax(truth, axis=1)
    print(pred)
    print(truth)

    # visualizing losses and accuracy
    train_loss = hist.history['loss']
    val_loss = hist.history['val_loss']
    train_acc = hist.history['acc']
    val_acc = hist.history['val_acc']
    xc = range(param['num_epoch'])

    plt.figure(1, figsize=(15, 10), dpi=300)
    plt.plot(xc, train_loss)
    plt.plot(xc, val_loss)
    plt.xlabel('num of Epochs')
    plt.ylabel('loss')
    plt.title('train_loss vs val_loss')
    plt.grid(True)
    plt.legend(['train', 'val'])
    plt.style.use('classic')

    plt.figure(2, figsize=(15, 10), dpi=300)
    plt.plot(xc, train_acc)
    plt.plot(xc, val_acc)
    plt.xlabel('num of Epochs')
    plt.ylabel('accuracy')
    plt.title('train_acc vs val_acc')
    plt.grid(True)
    plt.legend(['train', 'val'], loc=4)
    plt.style.use('classic')
    plt.show()
예제 #8
0
def main():
    # Command line Interface
    parser = argparse.ArgumentParser()
    parser.add_argument('-d',
                        '--dirpath',
                        default=DATASETS_DIRPATH,
                        help="dataset directory path")
    parser.add_argument('-n',
                        '--n_train',
                        default=None,
                        type=int,
                        help="number of rows to download on the train dataset")
    parser.add_argument('-t',
                        '--n_test',
                        default=None,
                        type=int,
                        help="number of rows to download on the test dataset")
    parser.add_argument('-e',
                        "--epochs",
                        default=3,
                        type=int,
                        help="set the number of epochs")
    parser.add_argument('-b',
                        "--batch_size",
                        default=86,
                        type=int,
                        help="set batch size")
    cli = parser.parse_args()

    # Download and clean train dataset
    train = DataSet(dirpath=cli.dirpath, filename="train.csv")
    train.download(nrows=cli.n_train)
    train.split_X_Y()
    train.normalize()
    train.reshape()
    train.convert_digits_to_one_hot_vectors()
    print(train)

    # Split trian/validation datasets
    validation = train.extract_validation(size=0.1)
    print(validation)

    # Download clean test dataset
    test = DataSet(dirpath=cli.dirpath, filename="test.csv")
    test.download(nrows=cli.n_test)
    test.set_X()
    test.normalize()
    test.reshape()
    print(test)

    # Setup convolutional neural network model
    model = Sequential()

    model.add(
        Conv2D(filters=32,
               kernel_size=(5, 5),
               padding='Same',
               activation='relu',
               input_shape=(28, 28, 1)))
    model.add(
        Conv2D(filters=32,
               kernel_size=(5, 5),
               padding='Same',
               activation='relu'))
    model.add(MaxPool2D(pool_size=(2, 2)))
    model.add(Dropout(0.25))

    model.add(
        Conv2D(filters=64,
               kernel_size=(3, 3),
               padding='Same',
               activation='relu'))
    model.add(
        Conv2D(filters=64,
               kernel_size=(3, 3),
               padding='Same',
               activation='relu'))
    model.add(MaxPool2D(pool_size=(2, 2), strides=(2, 2)))
    model.add(Dropout(0.25))

    model.add(Flatten())
    model.add(Dense(256, activation="relu"))
    model.add(Dropout(rate=0.5))
    model.add(Dense(10, activation="softmax"))

    # Define the optimizer
    optimizer = RMSprop(lr=0.001, rho=0.9, epsilon=1e-08, decay=0.0)

    # Compile the model
    model.compile(optimizer=optimizer,
                  loss="categorical_crossentropy",
                  metrics=["accuracy"])

    # Set learning rate decay
    learning_rate_reduction = ReduceLROnPlateau(monitor='val_acc',
                                                patience=3,
                                                verbose=1,
                                                factor=0.5,
                                                min_lr=0.00001)

    # Perform synthetic data augmentation
    data_generator = ImageDataGenerator(
        featurewise_center=False,  # set input mean to 0 over the dataset
        samplewise_center=False,  # set each sample mean to 0
        featurewise_std_normalization=
        False,  # divide inputs by std of the dataset
        samplewise_std_normalization=False,  # divide each input by its std
        zca_whitening=False,  # apply ZCA whitening
        rotation_range=
        10,  # randomly rotate images in the range (degrees, 0 to 180)
        zoom_range=0.1,  # Randomly zoom image
        width_shift_range=
        0.1,  # randomly shift images horizontally (fraction of total width)
        height_shift_range=
        0.1,  # randomly shift images vertically (fraction of total height)
        horizontal_flip=False,  # randomly flip images
        vertical_flip=False  # randomly flip images
    )

    data_generator.fit(train.X)

    history = model.fit_generator(
        data_generator.flow(train.X, train.Y, batch_size=cli.batch_size),
        epochs=cli.epochs,
        validation_data=(validation.X, validation.Y),
        verbose=2,
        steps_per_epoch=train.X.shape[0] // cli.batch_size,
        callbacks=[learning_rate_reduction])

    # plot loss and accuracy
    evaluation.plot_loss_and_accuracy(history)

    # Predict digits for the validation dataset
    prediction = DataSet()
    prediction.Y = model.predict(validation.X)
    prediction.X = validation.X

    prediction.convert_one_hot_vectors_to_digits()
    validation.convert_one_hot_vectors_to_digits()

    confusion_mtx = confusion_matrix(validation.Y, prediction.Y)
    evaluation.plot_confusion_matrix(confusion_mtx)

    # Predict results
    results = model.predict(test.X)
    results = convert_one_hot_vectors_to_digits(results)
    print(results)

    # Generate Submission file
    submission_file = Submission(results)
    submission_file.save()
    def TestModel(self,
                  sess,
                  writer,
                  datapath='',
                  str_dataset='eval',
                  data_count=32,
                  out_report=False,
                  show_ratio=True,
                  step=0):
        '''
        测试检验模型效果
        '''
        data = DataSpeech(datapath, str_dataset)
        # data.LoadDataList(str_dataset)
        num_data = sum(data.DataNum)  # 获取数据的数量
        if (data_count <= 0 or data_count >
                num_data):  # 当data_count为小于等于0或者大于测试数据量的值时,则使用全部数据来测试
            data_count = num_data

        try:
            ran_num = random.randint(0, num_data - 1)  # 获取一个随机数
            overall_p = 0
            overall_n = 0
            overall_tp = 0
            overall_tn = 0
            accuracy = 0
            sensitivity = 0
            specificity = 0
            score = 0

            nowtime = time.strftime('%Y%m%d_%H%M%S',
                                    time.localtime(time.time()))
            txt_obj = []
            if (out_report == True):
                txt_obj = open('Test_Report_' + str_dataset + '_' + nowtime +
                               '.txt',
                               'w',
                               encoding='UTF-8')  # 打开文件并读入

            start = time.time()
            cm_pre = []
            cm_lab = []
            map = {0: 'normal', 1: 'bowel sounds'}
            # data_count = 200
            for i in tqdm(range(data_count)):
                data_input, data_labels = data.GetData(
                    (ran_num + i) % num_data,
                    mode='non-repetitive')  # 从随机数开始连续向后取一定数量数据

                predictions = []
                if PRIOR_ART == False:
                    if len(data_input) <= AUDIO_LENGTH:
                        data_in = np.zeros(
                            (1, AUDIO_LENGTH, AUDIO_FEATURE_LENGTH, 1),
                            dtype=np.float)
                        data_in[0, 0:len(data_input)] = data_input
                        data_pre = self.model.predict_on_batch(data_in)
                        predictions = np.argmax(data_pre[0], axis=0)
                    else:
                        assert (0)
                else:
                    data_pre = self.model.predict_on_batch(
                        np.expand_dims(data_input, axis=0))
                    predictions = np.argmax(data_pre[0], axis=0)
                # print('predictions:',predictions)
                # print('data_pre:',np.argmax(data_pre[0], axis=0))
                # print ('data_label:',data_labels[0])

                cm_pre.append(map[predictions])
                cm_lab.append(map[data_labels[0]])

                tp, fp, tn, fn = Compare2(predictions,
                                          data_labels[0])  # 计算metrics
                overall_p += tp + fn
                overall_n += tn + fp
                overall_tp += tp
                overall_tn += tn

                txt = ''
                if (out_report == True):
                    txt += str(i) + '\n'
                    txt += 'True:\t' + str(data_labels) + '\n'
                    txt += 'Pred:\t' + str(data_pre) + '\n'
                    txt += '\n'
                    txt_obj.write(txt)

            if overall_p != 0:
                sensitivity = overall_tp / overall_p * 100
                sensitivity = round(sensitivity, 2)
            else:
                sensitivity = 'None'
            if overall_n != 0:
                specificity = overall_tn / overall_n * 100
                specificity = round(specificity, 2)
            else:
                specificity = 'None'
            if sensitivity != 'None' and specificity != 'None':
                score = (sensitivity + specificity) / 2
                score = round(score, 2)
            else:
                score = 'None'
            accuracy = (overall_tp + overall_tn) / (overall_p +
                                                    overall_n) * 100
            accuracy = round(accuracy, 2)
            end = time.time()
            dtime = round(end - start, 2)
            # print('*[测试结果] 片段识别 ' + str_dataset + ' 敏感度:', sensitivity, '%, 特异度: ', specificity, '%, 得分: ', score, ', 准确度: ', accuracy, '%, 用时: ', dtime, 's.')
            strg = '*[测试结果] 片段识别 {0} 敏感度:{1}%, 特异度: {2}%, 得分: {3}, 准确度: {4}%, 用时: {5}s.'.format(
                str_dataset, sensitivity, specificity, score, accuracy, dtime)
            tqdm.write(strg)

            assert (len(cm_lab) == len(cm_pre))
            img_cm = plot_confusion_matrix(cm_lab,
                                           cm_pre,
                                           list(map.values()),
                                           tensor_name='MyFigure/cm',
                                           normalize=False)
            writer.add_summary(img_cm, global_step=step)
            summary = tf.Summary()
            summary.value.add(tag=str_dataset + '/sensitivity',
                              simple_value=sensitivity)
            summary.value.add(tag=str_dataset + '/specificity',
                              simple_value=specificity)
            summary.value.add(tag=str_dataset + '/score', simple_value=score)
            summary.value.add(tag=str_dataset + '/accuracy',
                              simple_value=accuracy)
            writer.add_summary(summary, global_step=step)

            if (out_report == True):
                txt = '*[测试结果] 片段识别 ' + str_dataset + ' 敏感度:' + sensitivity + '%, 特异度: ' + specificity + '%, 得分: ' + score + ', 准确度: ' + accuracy + '%, 用时: ' + dtime + 's.'
                txt_obj.write(txt)
                txt_obj.close()

            metrics = {
                'data_set': str_dataset,
                'sensitivity': sensitivity,
                'specificity': specificity,
                'score': score,
                'accuracy': accuracy
            }
            return metrics

        except StopIteration:
            print('[Error] Model Test Error. please check data format.')