Beispiel #1
0
def test_sensitivity_specificity_unused_pos_label():
    # but average != 'binary'; even if data is binary
    msg = r"use labels=\[pos_label\] to specify a single"
    with pytest.warns(UserWarning, match=msg):
        sensitivity_specificity_support([1, 2, 1], [1, 2, 2],
                                        pos_label=2,
                                        average="macro")
def test_sensitivity_specificity_support_errors():
    y_true, y_pred, _ = make_prediction(binary=True)

    # Bad pos_label
    with pytest.raises(ValueError):
        sensitivity_specificity_support(
            y_true, y_pred, pos_label=2, average='binary')

    # Bad average option
    with pytest.raises(ValueError):
        sensitivity_specificity_support([0, 1, 2], [1, 2, 0], average='mega')
Beispiel #3
0
def test_sensitivity_specificity_support_errors():
    y_true, y_pred, _ = make_prediction(binary=True)

    # Bad pos_label
    with raises(ValueError):
        sensitivity_specificity_support(y_true, y_pred, pos_label=2,
                                        average='binary')

    # Bad average option
    with raises(ValueError):
        sensitivity_specificity_support([0, 1, 2], [1, 2, 0], average='mega')
Beispiel #4
0
def bootstrapping_results(clf, n_size, labels, iterable, n_iterations):
    print "{} ITERARION OUT OF {}\r".format(n_iterations, iterable)
    sys.stdout.write("\r")
    X_boot, y_boot = resample(X_test, y_test, n_samples=n_size, replace = True)
    
    t0 = t.time()

    y_pred = clf.predict( X_boot)

    t1 = t.time()
    
    TP, FP, TN, FN = perf_measure(y_boot, y_pred)
    
    sens_specifity = sensitivity_specificity_support( y_boot, y_pred, average = "micro")
    report = classification_report( y_boot, y_pred, labels = labels, digits = 2)
    
    report = report.split("\n")[ -2]
    averages = report.split()
    
    speed = t1-t0
    if ( FN + TN) != 0:
        NPV = TN / ( FN + TN)
    else:
        NPV = 0
    specificity = float( sens_specifity[ 1])
    
    accuracy = round(accuracy_score( y_boot, y_pred, normalize = True), 2)
    precision = float(averages[ 3])
    recall = float(averages[ 4])
    f1 = float(averages[ 5])
    scores_row = np.array([accuracy,  precision, recall, f1, specificity, NPV, speed])
    return scores_row
def plot(disease, data):
    subSet = data[data.disease_lang_concat.str.startswith(disease)]
    labels = list(set(subSet['true_label']))
    labels.sort()
    sensitivity, specificity, support = sensitivity_specificity_support(
        subSet['true_label'],
        subSet['predicted_translated'],
        labels=labels,
        average=None)
    df = pd.DataFrame({
        'Factor': labels,
        'Sensitivity': sensitivity,
        'Specificity': specificity
    })
    tidy = (df.set_index('Factor').stack().reset_index().rename(columns={
        'level_1': 'Variable',
        0: 'Value'
    }))

    sns.set(font_scale=0.8)

    # plt.interactive(False)
    a4_dims = (15, 8)
    width = 0.35
    fig, ax = plt.subplots(figsize=a4_dims)
    y_pos = np.arange(len(labels))
    sen = ax.barh(y_pos, sensitivity, width, color='b')
    ax.set_yticks(y_pos)
    ax.set_yticklabels(labels)
    ax.invert_yaxis()

    spe = ax.barh(y_pos + width, specificity, width, color='y')
    ax.set_xlabel('scores')
    ax.set_title(disease)
    ax.legend((sen, spe), ('Sensitivity', 'Specificity'))
    plt.legend(bbox_to_anchor=(0., 1.02, 1., .102),
               loc=3,
               ncol=2,
               mode='expand',
               borderaxespad=0.)
    plt.tight_layout(h_pad=1)
    plt.show()

    plt.savefig(disease + '.png')

    # sns.barplot(ax=ax, data=tidy,  palette="BuGn_d", orient='h',y='Factor',x='Value')
    # sns.despine(fig)
    # # ax.legend(ncol=2, loc="upper left", frameon=True)
    # mng = plt.get_current_fig_manager()
    # # mng.window.showMaximized()
    # plt.title(disease, fontdict={'fontsize': 10})
    # plt.legend(bbox_to_anchor=(0.,1.02,1.,.102), loc=3, ncol=2, mode='expand',borderaxespad=0.)
    # plt.tight_layout(h_pad=1)

    # sns.barplot(ax=ax, data=pd.DataFrame.from_records([specificity], columns=labels), orient='h', palette="RdBu_r")
    #
    # plt.title(disease + ' specificity', fontdict={'fontsize': 10})
    # plt.tight_layout(h_pad=1)

    int = 1
Beispiel #6
0
def sen_spe_sup(y_true, y_pred, labels, average=None):
    """Produce sensitivity, specificity, support."""
    sss = sensitivity_specificity_support(y_pred=y_pred,
                                          y_true=y_true,
                                          average=average)
    wht = ['Sensitivity', 'Specificity', 'Support']
    return pd.DataFrame(dict(list(zip(wht, sss))), index=labels).T
Beispiel #7
0
 def forward(self, y_hat, y):
     y_shape = y.shape[0]
     predicted = torch.max(y_hat.data, 1)[1]
     y_train_changed = torch.argmax(y, dim=1)
     sensitivity, _, _ = sensitivity_specificity_support(
         np.array(predicted.cpu().detach().numpy()),
         np.array(y_train_changed.cpu().detach().numpy()))
     topk_loss = CategoricalCrossEntropyLoss()(y_hat, y) - sensitivity
     _, indexes = torch.topk(topk_loss, y_shape - self.topk)
     new_pred = torch.index_select(y_hat, 0, indexes)
     self.loss_logger.append(new_pred)
     new_train = torch.index_select(y, 0, indexes)
     print(new_train.shape)
     return nn.NLLLoss()(new_pred, torch.argmax(new_train,
                                                dim=1)), self.loss_logger
def scoreIteration(args):
    data = pd.read_csv(args.filepath, encoding='ISO-8859-1')
    data['predicted_translated'] = data[GF_MODULE] + DELIMIT + data[
        GF_INTERVENTION]
    data['true_label'] = data[CORRECT_MODULE] + DELIMIT + data[GF_INTERVENTION]
    labels = list(set(data['true_label']))

    sensitivity, specificity, support = sensitivity_specificity_support(
        data['true_label'],
        data['predicted_translated'],
        labels=labels,
        average='macro')
    print(args.filepath)
    print('sensitivity - ' + str(sensitivity))
    print('specificity - ' + str(specificity))
    print('')
Beispiel #9
0
 def forward(self, y_hat, y):
     y_shape = y.shape[0]
     predicted = torch.max(y_hat.data, 1)[1]
     y_train_changed = torch.argmax(y, dim=1)
     sensitivity, _, _ = sensitivity_specificity_support(
         np.array(predicted.cpu().detach().numpy()),
         np.array(y_train_changed.cpu().detach().numpy()))
     topk_loss = CategoricalCrossEntropyLoss()(
         y_hat,
         y) + self.lmbda * (1 - torch.sum(y_hat * y, axis=-1)) - sensitivity
     _, indexes = torch.topk(topk_loss, int(y_shape * self.ratio))
     new_pred = torch.index_select(y_hat, 0, indexes)
     self.loss_logger.append(topk_loss)
     new_train = torch.index_select(y, 0, indexes)
     return nn.NLLLoss(weight=self.weights)(
         torch.log(new_pred), torch.argmax(new_train,
                                           dim=1)), self.loss_logger
def test_sensitivity_specificity_score_binary():
    y_true, y_pred, _ = make_prediction(binary=True)

    # detailed measures for each class
    sen, spe, sup = sensitivity_specificity_support(
        y_true, y_pred, average=None)
    assert_allclose(sen, [0.88, 0.68], rtol=R_TOL)
    assert_allclose(spe, [0.68, 0.88], rtol=R_TOL)
    assert_array_equal(sup, [25, 25])

    # individual scoring function that can be used for grid search: in the
    # binary class case the score is the value of the measure for the positive
    # class (e.g. label == 1). This is deprecated for average != 'binary'.
    for kwargs in ({}, {'average': 'binary'}):
        sen = assert_no_warnings(sensitivity_score, y_true, y_pred, **kwargs)
        assert sen == pytest.approx(0.68, rel=R_TOL)

        spe = assert_no_warnings(specificity_score, y_true, y_pred, **kwargs)
        assert spe == pytest.approx(0.88, rel=R_TOL)
def test_sensitivity_specificity_score_binary():
    y_true, y_pred, _ = make_prediction(binary=True)

    # detailed measures for each class
    sen, spe, sup = sensitivity_specificity_support(y_true,
                                                    y_pred,
                                                    average=None)
    assert_allclose(sen, [0.88, 0.68], rtol=R_TOL)
    assert_allclose(spe, [0.68, 0.88], rtol=R_TOL)
    assert_array_equal(sup, [25, 25])

    # individual scoring function that can be used for grid search: in the
    # binary class case the score is the value of the measure for the positive
    # class (e.g. label == 1). This is deprecated for average != 'binary'.
    for kwargs in ({}, {"average": "binary"}):
        sen = assert_no_warnings(sensitivity_score, y_true, y_pred, **kwargs)
        assert sen == pytest.approx(0.68, rel=R_TOL)

        spe = assert_no_warnings(specificity_score, y_true, y_pred, **kwargs)
        assert spe == pytest.approx(0.88, rel=R_TOL)
Beispiel #12
0
def test_sensitivity_specificity_score_binary():
    """Test Sensitivity Specificity for binary classification task"""
    y_true, y_pred, _ = make_prediction(binary=True)

    # detailed measures for each class
    sen, spe, sup = sensitivity_specificity_support(y_true,
                                                    y_pred,
                                                    average=None)
    assert_array_almost_equal(sen, [0.88, 0.68], 2)
    assert_array_almost_equal(spe, [0.68, 0.88], 2)
    assert_array_equal(sup, [25, 25])

    # individual scoring function that can be used for grid search: in the
    # binary class case the score is the value of the measure for the positive
    # class (e.g. label == 1). This is deprecated for average != 'binary'.
    for kwargs, my_assert in [({}, assert_no_warnings),
                              ({
                                  'average': 'binary'
                              }, assert_no_warnings)]:
        sen = my_assert(sensitivity_score, y_true, y_pred, **kwargs)
        assert_array_almost_equal(sen, 0.68, 2)

        spe = my_assert(specificity_score, y_true, y_pred, **kwargs)
        assert_array_almost_equal(spe, 0.88, 2)
Beispiel #13
0
def csv_model():
    """""

    This function classification geometric features from .csv, analysis of job program

    """ ""

    dataset = pd.read_csv("data_set_geometric_features.csv",
                          sep=',',
                          encoding='latin1',
                          dayfirst=True,
                          index_col=None,
                          header=None)

    y = [1] * 96 + [0] * 96

    X_train, X_test, y_train, y_test = train_test_split(dataset,
                                                        y,
                                                        test_size=0.3,
                                                        random_state=0)

    print("Original data_set sizes: ", X_train.shape, X_test.shape)

    sc = StandardScaler(copy=True, with_mean=True, with_std=True)

    X_train = sc.fit_transform(X_train.astype(float))
    X_test = sc.transform(X_test.astype(float))

    classifier = RandomForestClassifier(
        n_estimators=196,
        criterion='gini',
        random_state=0,
        max_depth=10,
        min_samples_split=14,
        max_features=6,
    )
    classifier.fit(X_train, y_train)
    importances = classifier.feature_importances_
    std = np.std(
        [tree.feature_importances_ for tree in classifier.estimators_], axis=0)

    indices = np.argsort(importances)[::-1]

    print("Feature ranking:")

    for f in range(dataset.shape[1]):
        print("%d. feature %d (%f)" %
              (f + 1, indices[f], importances[indices[f]]))

    plt.figure()
    plt.title("Feature importances")
    plt.bar(range(dataset.shape[1]),
            importances[indices],
            color="r",
            yerr=std[indices],
            align="center")
    plt.xticks(range(dataset.shape[1]), indices)
    plt.xlim([-1, dataset.shape[1]])

    y_pred = classifier.predict(X_test)
    print(classification_report(y_test, y_pred))
    print(confusion_matrix(y_test, y_pred))

    precision, recall, fscore, support = precision_recall_fscore_support(
        y_test, y_pred)
    _, specificity, _ = sensitivity_specificity_support(y_test, y_pred)

    print('Accuracy: ', accuracy_score(y_test, y_pred))
    print('Precision value: ', precision[0])
    print('Recall value: ', recall[0])
    print('F-score value: ', fscore[0])
    print('Specificity value: ', specificity[0])

    result = pd.DataFrame(y_pred)
    result.to_csv('result_RandomForest.csv', index=False, header=False)
Beispiel #14
0
# In[6]:

print(X_train.shape)

# In[7]:

from sklearn.ensemble import RandomForestClassifier
from imblearn.metrics import sensitivity_specificity_support
clf = RandomForestClassifier(random_state=6)
clf.fit(X_train, y_train)
prediksi = clf.predict(X_test)
from sklearn.metrics import accuracy_score
print(accuracy_score(y_test, prediksi))
print(classification_report(y_test, prediksi))
print(sensitivity_specificity_support(y_test, prediksi, average='macro'))

# In[8]:

from sklearn.ensemble import RandomForestClassifier
clf = RandomForestClassifier()
from sklearn.model_selection import cross_val_score
score = cross_val_score(clf, xList, yList, cv=10)
print(score)
print(score.mean())

# In[ ]:

clf1 = RandomForestClassifier(random_state=1)
clf2 = GradientBoostingClassifier()
clf3 = LinearDiscriminantAnalysis()
def test_sensitivity_specificity_unused_pos_label():
    # but average != 'binary'; even if data is binary
    with warns(UserWarning, r"use labels=\[pos_label\] to specify a single"):
        sensitivity_specificity_support([1, 2, 1], [1, 2, 2],
                                        pos_label=2,
                                        average='macro')
Beispiel #16
0
        loss,logger = criterion(y_pred, torch.reshape(y_train.long(),(-1,class_length)))
        if(criterion == CategoricalCrossEntropyLoss()):
            loss = torch.mean(loss)
        print(loss)
        softmax_vals.append([y_pred,logger])
        
        predicted = torch.max(y_pred.data, 1)[1]
        batch_corr = (predicted == y_train_changed).sum()
        trn_corr += batch_corr
        
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        # Print interim results     
        sensitivity, specificity,support = sensitivity_specificity_support(np.array(predicted.cpu().detach().numpy()),np.array(y_train_changed.cpu().detach().numpy()),average = "micro")
        specificity_corr+=specificity
        sensitivity_corr+=sensitivity
        metrics.append([specificity, sensitivity, specificity_corr*100*100/(total_size+(batch_size*(b-1))), sensitivity_corr*100*100/(total_size+(batch_size*(b-1)))])
        del y_pred 
        del y_train
        torch.cuda.empty_cache()
        if (b%1==0 and b!=0):
            print(f'epoch: {i:2}  batch: {b:4} [{total_size+(batch_size*(b-1)):6}/{str(len(train))}]  loss: {loss.item():10.8f}  \
accuracy: {trn_corr.item()*100/(total_size+(batch_size*(b-1))):7.3f}%')
            accuracy.append(trn_corr.item()*100/(total_size+(batch_size*(b-1))))
            print(f'sensitivity: {sensitivity_corr*100*100/(total_size+(batch_size*(b-1)))}%        specificity: {specificity_corr*100*100/(total_size+(batch_size*(b-1)))}%')
            print(f'y_train: {y_train_changed}')
            print(f'predicted: {predicted}')
            epoch+=1
    scheduler.step()
def test_sensitivity_specificity_unused_pos_label():
    # but average != 'binary'; even if data is binary
    with warns(UserWarning, "use labels=\[pos_label\] to specify a single"):
        sensitivity_specificity_support(
            [1, 2, 1], [1, 2, 2], pos_label=2, average='macro')
Beispiel #18
0
def model():

    # data, hand = pre_processing.get_image("result_segment_input", "result_segment_output")
    # data, hand = pre_processing.get_image("test_input", "test_output")
    data, hand = pre_processing.get_image("scan/T2_scan", "scan/T2_scan_output")


    trainX, testX, trainY, testY = train_test_split(data, hand, test_size=0.25, random_state=100)

    model = keras.models.Sequential()

    model.add(Conv2D(32, (3, 3),  padding='same', input_shape=(64, 64, 1)))
    model.add(Activation('relu'))
    model.add(Conv2D(32, (6, 6), activation='relu', strides=(1, 1), padding='same'))

    #model.add(MaxPooling2D(pool_size=(2, 2)))

    model.add(Conv2D(64, (6, 6), activation='relu', strides=(1, 1), padding='same'))
    # model.add(Conv2D(64, (3, 3), activation='relu', strides=(1, 1), padding='same'))
    #model.add(MaxPooling2D(pool_size=(2, 2)))

    model.add(Conv2D(128, (9, 9), activation='relu', strides=(1, 1), padding='same'))
    # model.add(Conv2D(128, (3, 3), activation='relu', strides=(1, 1), padding='same'))

    #model.add(MaxPooling2D(pool_size=(2, 2)))

    model.add(Conv2D(256, (9, 9), activation='relu', strides=(1, 1), padding='same'))
    # model.add(Conv2D(256, (6, 6), activation='relu', strides=(1, 1), padding='same'))
    #model.add(MaxPooling2D(pool_size=(2, 2)))

    # model.add(Conv2D(384, (6, 6), activation='relu', strides=(1, 1), padding='same'))
    # model.add(Conv2D(384, (6, 6), activation='relu', strides=(1, 1), padding='same'))

    #model.add(concatenate([UpSampling2D(size=(2, 2))(conv5), conv4], axis=1))
    # model.add(Conv2D(256, (6, 6), activation='relu', strides=(1, 1), padding='same'))
    model.add(Conv2D(256, (9, 9), activation='relu', strides=(1, 1), padding='same'))

    #up7 = concatenate([UpSampling2D(size=(2, 2))(conv6), conv3], axis=1)
    model.add(Conv2D(128, (9, 9), activation='relu', strides=(1, 1), padding='same'))
    # model.add(Conv2D(128, (3, 3), activation='relu', strides=(1, 1), padding='same'))

    #up8 = concatenate([UpSampling2D(size=(2, 2))(conv7), conv2], axis=1)
    # model.add(Conv2D(64, (3, 3), activation='relu', strides=(1, 1), padding='same'))
    model.add(Conv2D(64, (6, 6), activation='relu', strides=(1, 1), padding='same'))

    #up9 = concatenate([UpSampling2D(size=(2, 2))(conv8), conv1], axis=1)
    model.add(Conv2D(32, (3, 3), activation='relu', strides=(1, 1), padding='same'))
    model.add(Conv2D(32, (3, 3), activation='relu', strides=(1, 1), padding='same'))

    model.add(Conv2D(1, (1, 1), strides=(1, 1), activation='relu'))

    #model = Model(inputs=[inputs], outputs=[conv10]) подумать, как собирается модель
    model.summary()

    model.compile(optimizer=Adam(lr=1e-5), loss=keras.losses.binary_crossentropy, metrics=['accuracy'])

    EPOCHS = 5

    trainX = np.expand_dims(np.array(trainX), axis=3)
    testX = np.expand_dims(np.array(testX), axis=3)

    trainY = np.expand_dims(np.array(trainY), axis=3)
    testY = np.expand_dims(np.array(testY), axis=3)

    print(trainX.shape)
    print(trainY.shape)
    print(testX.shape)
    print(testY.shape)

    H = model.fit(trainX, trainY, validation_data=(testX, testY), epochs=EPOCHS, batch_size=32, verbose=2)
    print("[INFO] evaluating network...")

    predictions = model.predict(testX, batch_size=32)

    # from sklearn import preprocessing
    # scaler = preprocessing.MinMaxScaler(feature_range=(0, 1))
    # for i in range(predictions.shape[0]):
    #      cv2.imshow("frame", testY[i, :, :, 0])
    #      cv2.waitKey(0)
    #      cv2.imshow("frame", scaler.fit_transform(predictions[i, :, :, 0]))
    #      cv2.waitKey(0)

    plt.figure(figsize=(8, 6))
    fpr, tpr, thresholds = roc_curve(testY.flatten(), predictions.flatten())
    roc_auc = auc(fpr, tpr)
    plt.plot(fpr, tpr, label='%s ROC (area = %0.2f)' % ('CNN', roc_auc))
    plt.plot([0, 1], [0, 1], 'k--')
    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.0])
    plt.xlabel('Correctly defined neoplasms')
    plt.ylabel('Fallaciously defined neoplasms')
    plt.legend(loc=0, fontsize='small')
    plt.title("ROC - curve")
    plt.show()

    #threshold = (np.min(predictions) + np.max(predictions)) / 2.0
    # optimal_idx = np.argmax(tpr - fpr)
    # optimal_threshold = thresholds[optimal_idx]
    testY = np.where(testY > 0.5, 1, 0)

    optimal_threshold = get_optimal_threshold(predictions.flatten(), testY.flatten())

    print('optimal_threshold', optimal_threshold)

    predictions = np.where(predictions > optimal_threshold, 1, 0)

    precision, recall, fscore, support = precision_recall_fscore_support(testY.flatten(), predictions.flatten())
    _, specificity, _ = sensitivity_specificity_support(testY.flatten(), predictions.flatten())
    print('Accuracy', accuracy_score(testY.flatten(), predictions.flatten()))
    print('binary precision value', precision[1])
    print('binary recall value', recall[1])
    print('binary fscore value', fscore[1])
    print('binary specificity value', specificity[1])

    print(classification_report(testY.flatten(), predictions.flatten()))

    N = np.arange(0, EPOCHS)
    plt.style.use("ggplot")
    plt.figure()
    plt.plot(N, H.history["loss"], label="train_loss", linestyle='--')
    plt.plot(N, H.history["val_loss"], label="val_loss", linestyle='-.')
    plt.plot(N, H.history["acc"], label="train_acc", linestyle= ':')
    plt.plot(N, H.history["val_acc"], label="val_acc",  linestyle='-')
    plt.title("Training Loss and Accuracy (Simple NN)")
    plt.xlabel("Epoch #")
    plt.ylabel("Loss/Accuracy")
    plt.legend()
    plt.savefig("Training Loss and Accuracy.pdf")
    plt.show()


    plt.figure(figsize=(8, 8))
    recall = metrics.recall_score(testY.flatten(), predictions.flatten(), average=None)

    precision, recall, thresholds = metrics.precision_recall_curve(testY.flatten(), predictions.flatten())
    plt.plot(recall, precision)
    plt.ylabel("Precision")
    plt.xlabel("Recall")
    plt.title("Curve dependent Precision и Recall of threshold")
    plt.legend(loc='best')
    plt.show()
Beispiel #19
0
def test(batch_size, image_size, model_id):

    data_path = os.path.join(os.path.dirname(os.getcwd()), "v7/fold3/valid/")
    test_normal = get_image_names(os.path.join(data_path, "normal/"),
                                  label='normal/')
    test_covid = get_image_names(os.path.join(data_path, "covid/"),
                                 label='covid/')
    test_pneumonia = get_image_names(os.path.join(data_path, "pneumonia/"),
                                     label='pneumonia/')

    test_files = test_normal + test_covid + test_pneumonia
    test_labels = np.concatenate(
        (np.zeros(len(test_normal)), np.ones(len(test_covid)),
         2 * np.ones(len(test_pneumonia))))
    normal_id = [
        re.split(r'[_]', test_normal[i])[-5] for i in range(len(test_normal))
    ]
    covid_id = [
        re.split(r'[_]', test_covid[i])[-5] for i in range(len(test_covid))
    ]
    pneumonia_id = [
        re.split(r'[_]', test_pneumonia[i])[-5]
        for i in range(len(test_pneumonia))
    ]

    #num_norm, ind = np.unique(np.array(pneumo_id), return_counts=True)
    num_normal = len(set(normal_id))
    num_covid = len(set(covid_id))
    num_neumo = len(set(pneumonia_id))
    print(num_normal)
    print(num_covid)
    print(num_neumo)

    target_names = ['normal', 'neumo', 'pneumonia']

    checkpoint_dir = "../models" + '/' + model_id
    if not exists(checkpoint_dir):
        raise IOError("model path, {}, could not be resolved".format(
            str(checkpoint_dir)))

    num_test_seqs = len(test_files)
    print("Number of test slices={}".format(num_test_seqs))

    with tf.Graph().as_default() as graph:

        model = DAN(image_size=image_size,
                    batch_size=batch_size,
                    is_train=False)
        C5_logits = graph.get_tensor_by_name('Classifier5GMP:0')
        C4_logits = graph.get_tensor_by_name('Classifier4GMP:0')
        C3_logits = graph.get_tensor_by_name('Classifier3GMP:0')

        saver = tf.train.Saver()
        init = tf.global_variables_initializer()

        # Create a session for running Ops on the Graph.
        sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True))
        sess.run(init)

    ckpt = tf.train.get_checkpoint_state(checkpoint_dir)
    if ckpt and ckpt.model_checkpoint_path:
        print("loading checkpoint %s,waiting......" %
              ckpt.model_checkpoint_path)
        saver.restore(sess, ckpt.model_checkpoint_path)
        print("load complete!")

    labels = []
    C_prob = []
    logits = []
    c5logits = []
    c4logits = []
    c3logits = []

    test_batches = get_minibatches_ind(len(test_files),
                                       batch_size,
                                       shuffle=True)

    with Parallel(n_jobs=batch_size) as parallel:
        shapes = np.repeat(np.array([image_size]), batch_size, axis=0)
        paths = np.repeat(data_path, batch_size, axis=0)
        images = []
        for _, batch_idx in test_batches:
            if len(batch_idx) == batch_size:
                labels.append(test_labels[batch_idx])
                test_seq = np.zeros(
                    (batch_size, image_size[0], image_size[1], 1),
                    dtype="float32")
                test_names = np.array(test_files)[batch_idx]

                test_output = parallel(
                    delayed(augment_data)(f, s, p)
                    for f, s, p in zip(test_names, shapes, paths))

                for i in range(batch_size):
                    test_seq[i] = test_output[i]
                images.append(test_seq)

                output = sess.run(
                    [
                        model.prob_out, model.logit_out, C5_logits, C4_logits,
                        C3_logits
                    ],
                    feed_dict={
                        model.x: test_seq,
                        model.target: test_labels[batch_idx],
                        model.dr_rate: 1.0
                    })

                C_prob.append(output[0])
                logits.append(output[1])
                c5logits.append(output[2])
                c4logits.append(output[3])
                c3logits.append(output[4])

        labels = np.concatenate(labels, axis=0).astype('int32')
        scores = np.concatenate(C_prob, axis=0)
        c5prob = special.softmax(
            np.squeeze(np.concatenate(c5logits, axis=0), axis=(1, 2)), -1)
        c4prob = special.softmax(
            np.squeeze(np.concatenate(c4logits, axis=0), axis=(1, 2)), -1)
        c3prob = special.softmax(
            np.squeeze(np.concatenate(c3logits, axis=0), axis=(1, 2)), -1)

    c5logit = np.argmax(c5prob, 1)
    c4logit = np.argmax(c4prob, 1)
    c3logit = np.argmax(c3prob, 1)

    results = [np.argmax(scores, 1), c5logit, c4logit, c3logit]

    precision = []
    recall = []
    accuracy = []
    f1score = []
    for i in range(4):
        precision.append(
            precision_score(labels, results[i], average='weighted'))
        recall.append(recall_score(labels, results[i], average='weighted'))
        accuracy.append(accuracy_score(labels, results[i]))
        f1score.append(f1_score(labels, results[i], average='weighted'))
    rep = sensitivity_specificity_support(labels,
                                          results[0],
                                          average='weighted')
    cm = confusion_matrix(labels, results[0])
    cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
    print(cm.diagonal())
    print(
        classification_report_imbalanced(labels,
                                         results[0],
                                         target_names=target_names))
    print(num_normal)
    print(num_covid)
    print(rep)
    print(precision)
    print(recall)
    print(accuracy)
    print(f1score)
optimal_threshold = get_optimal_threshold(y_pred, testY)

print('optimal_threshold', optimal_threshold)

predictions = np.where(y_pred > optimal_threshold, 1, 0)

# r_sq = classifier.score(testY, y_pred)
# print('coefficient of determination:', r_sq)
# print('intercept:', classifier.intercept_)
# print('slope:', classifier.coef_)
# print('predicted response:', y_pred, sep='\n')

precision, recall, f_score, support = precision_recall_fscore_support(
    testY, predictions)
_, specificity, _ = sensitivity_specificity_support(testY, predictions)
print('Accuracy', accuracy_score(testY, predictions))
print('binary precision value', precision[1])
print('binary recall value', recall[1])
print('binary f_score value', f_score[1])
print('binary specificity value', specificity[1])

print(classification_report(testY, predictions))
print(confusion_matrix(testY, predictions))

plt.figure(figsize=(8, 6))
fpr, tpr, threshold = roc_curve(testY, predictions)
roc_auc = auc(fpr, tpr)
plt.plot(fpr, tpr, label='%s ROC (area = %0.2f)' % ('CNN', roc_auc))
plt.plot([0, 1], [0, 1], 'k--')
plt.xlim([0.0, 1.0])
        import seaborn as sns

        matplotlib.interactive(True)
        data = pd.read_csv(args.filepath, encoding='ISO-8859-1')
        data['predicted_translated'] = data[GF_MODULE] + DELIMIT + data[
            GF_INTERVENTION]
        data['true_label'] = data[CORRECT_MODULE] + DELIMIT + data[
            GF_INTERVENTION]
        labels = list(set(data['true_label']))
        plot('malaria', data)
        plot('hiv', data)
        plot('tb', data)

        sensitivity, specificity, support = sensitivity_specificity_support(
            data['true_label'],
            data['predicted_translated'],
            labels=labels,
            average='macro')
        print('sensitivity - ' + str(sensitivity))
        print('specificity - ' + str(specificity))
        print(support)
        uniform_data = np.random.rand(10, 12)
        iris = sns.load_dataset("iris")
        sns.set(font_scale=0.5)

        # plt.interactive(False)
        a4_dims = (20, 8)
        fig, ax = plt.subplots(figsize=a4_dims)
        ax = sns.barplot(ax=ax,
                         data=pd.DataFrame.from_records([sensitivity],
                                                        columns=labels),
Beispiel #22
0
def main():
    """""
    
    This function carry out multiple stage processing images

    """ ""

    mypath_in = 'part_start'  #больные и здоровые в серых тонах
    mypath_out = 'part_finish_0'  #результат обработки больных и здоровых вручную
    mypath_late = 'part_start_late'  #результат обработки больных и здоровых вручную
    mypath_no = 'no_pathologies'  # здоровые по мнению кода

    onlyfiles = [f for f in listdir(mypath_in) if isfile(join(mypath_in, f))]
    onlyfiles_out = [
        f for f in listdir(mypath_out) if isfile(join(mypath_out, f))
    ]

    img = np.empty(len(onlyfiles), dtype=object)

    for n in range(0, len(onlyfiles)):

        img[n] = cv2.imread(join(mypath_in, onlyfiles[n]))
        gray = cv2.cvtColor(img[n], cv2.COLOR_BGR2GRAY)

        newImg = cv2.resize(gray, (512, 512))
        R = np.mean(newImg)
        std = np.std(newImg)
        standardized_images_out = ((newImg - R) / std) * 40 + 127
        blur = cv2.blur(standardized_images_out, (7, 7))

        kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (1, 1))
        opening = cv2.morphologyEx(blur, cv2.MORPH_OPEN, kernel, 1)
        thresh = cv2.adaptiveThreshold(opening.astype(np.uint8), 255,
                                       cv2.ADAPTIVE_THRESH_MEAN_C,
                                       cv2.THRESH_BINARY, 11, 0)
        kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (4, 4))
        dilation = cv2.dilate(thresh, kernel, 1)
        erode = cv2.erode(dilation, kernel, 3)
        closing = cv2.morphologyEx(erode, cv2.MORPH_CLOSE, kernel, 5)

        _, contours, hierarchy = cv2.findContours(closing, cv2.RETR_TREE,
                                                  cv2.CHAIN_APPROX_SIMPLE)

        selection(contours, onlyfiles[n], closing.shape, mypath_late,
                  mypath_no)

    list_precision = []
    list_recall = []
    list_fscore = []
    list_specificity = []
    list_accuracy = []
    start_img = np.empty(len(onlyfiles_out), dtype=object)
    final_img = np.empty(len(onlyfiles_out), dtype=object)

    onlyfiles_late = [
        f for f in listdir(mypath_late) if isfile(join(mypath_late, f))
    ]

    for n in range(0, positive_number):

        start_img[n] = cv2.imread(
            join(mypath_late, onlyfiles_late[n]),
            cv2.IMREAD_GRAYSCALE)  #ошибка в количестве файлов
        final_img[n] = cv2.imread(join(mypath_out, onlyfiles_late[n]),
                                  cv2.IMREAD_GRAYSCALE)
        gray_s = cv2.resize(start_img[n], (512, 512))
        gray_f = cv2.resize(final_img[n], (512, 512))
        _, start = cv2.threshold(gray_s, 127, 255, cv2.THRESH_BINARY)
        _, final = cv2.threshold(gray_f, 127, 255, cv2.THRESH_BINARY)
        y_pred = start.ravel()
        y_true = final.ravel()

        #print(onlyfiles_out[n], classification_report(y_true, y_pred))

        precision, recall, fscore, _ = precision_recall_fscore_support(
            y_true == 255, y_pred == 255, pos_label=True, average="binary")
        _, specificity, _ = sensitivity_specificity_support(y_true, y_pred)
        accuracy = accuracy_score(y_true, y_pred)
        list_precision.append(precision)
        list_recall.append(recall)
        list_fscore.append(fscore)
        list_specificity.append(specificity[1])
        list_accuracy.append(accuracy)

    print('Metrics for multiple stage processing images')

    print('#######################################')

    print('Precision value: ', np.mean(list_precision))
    print('Recall value: ', np.mean(list_recall))
    print('F-score value: ', np.mean(list_fscore))
    print('Specificity value: ', np.mean(list_specificity))
    print('Accuracy value: ', np.mean(list_accuracy))
def model(data):

    y = [0] * 14 + [1] * 11
    # y = [0] * 168 + [1] * 132

    trainX, testX, trainY, testY = train_test_split(data,
                                                    y,
                                                    test_size=0.25,
                                                    random_state=100)

    model = keras.models.Sequential()

    model.add(Conv2D(32, (3, 3), padding='same', input_shape=(64, 64, 12)))
    model.add(Activation('relu'))

    model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='valid'))

    model.add(
        Conv2D(filters=256, kernel_size=(9, 9), strides=(1, 1),
               padding='same'))
    model.add(Activation('relu'))

    model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='valid'))

    model.add(
        Conv2D(filters=384, kernel_size=(3, 3), strides=(1, 1),
               padding='same'))
    model.add(Activation('relu'))

    model.add(
        Conv2D(filters=384, kernel_size=(3, 3), strides=(1, 1),
               padding='same'))
    model.add(Activation('relu'))

    model.add(
        Conv2D(filters=256, kernel_size=(3, 3), strides=(1, 1),
               padding='same'))
    model.add(Activation('relu'))

    model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='valid'))

    model.add(Flatten())

    model.add(Dense(4096, input_shape=(4096, )))
    model.add(Activation('relu'))

    model.add(Dropout(0.4))

    model.add(Dense(4096))
    model.add(Activation('relu'))

    model.add(Dropout(0.4))

    model.add(Dense(100))
    model.add(Activation('relu'))

    model.add(Dropout(0.4))

    model.add(Dense(17))
    model.add(Activation('relu'))
    model.add(Dense(1, activation='sigmoid'))
    model.summary()

    model.compile(optimizer=Adam(lr=1e-4),
                  loss=keras.losses.binary_crossentropy,
                  metrics=['accuracy'])

    EPOCHS = 20

    trainX = np.expand_dims(np.array(trainX), axis=3)
    testX = np.expand_dims(np.array(testX), axis=3)

    print(trainX.shape)

    n_samples, num, chanel = trainX.shape
    trainX = trainX.reshape(
        (n_samples, num // (12 * 64), num // (12 * 64), num // (64**2)))

    n_samples, num, chanel = testX.shape
    testX = testX.reshape(
        (n_samples, num // (12 * 64), num // (12 * 64), num // (64**2)))

    print(testY)

    H = model.fit(trainX,
                  trainY,
                  validation_data=(testX, testY),
                  epochs=EPOCHS,
                  batch_size=32,
                  verbose=2)
    print("[INFO] evaluating network...")

    predictions = model.predict(testX, batch_size=32)

    # predictions = np.where(np.array(predictions) > 0.5, 1, 0)

    optimal_threshold = get_optimal_threshold(np.array(predictions), testY)

    print('optimal_threshold', optimal_threshold)

    predictions = np.where(np.array(predictions) > optimal_threshold, 1, 0)

    print(predictions)

    precision, recall, fscore, support = precision_recall_fscore_support(
        testY, predictions)
    _, specificity, _ = sensitivity_specificity_support(testY, predictions)
    print('Accuracy', accuracy_score(testY, predictions))
    print('binary precision value', precision[1])
    print('binary recall value', recall[1])
    print('binary fscore value', fscore[1])
    print('binary specificity value', specificity[1])

    print(classification_report(testY, predictions))

    draw_loss(EPOCHS, H)

    plt.figure(figsize=(5, 5))
    fpr, tpr, thresholds = roc_curve(testY, predictions)
    roc_auc = auc(fpr, tpr)
    plt.plot(fpr, tpr, label='%s ROC (area = %0.2f)' % ('CNN', roc_auc))
    plt.plot([0, 1], [0, 1], 'k--')
    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.0])
    plt.xlabel('Correctly defined pixels')
    plt.ylabel('Fallaciously defined pixels')
    plt.legend(loc=0, fontsize='small')
    plt.title("ROC - curve")
    plt.show()

    plt.figure(figsize=(8, 8))
    precision, recall, thresholds = metrics.precision_recall_curve(
        testY, predictions)
    plt.plot(recall, precision)
    plt.ylabel("Precision")
    plt.xlabel("Recall")
    plt.title("Curve dependent Precision и Recall of threshold")
    plt.legend(loc='best')
    plt.show()