def getOptimalT_helper(model, ValDataGen, valid_nums):

    lastFullValPred = np.empty((0, 28))
    lastFullValLabels = np.empty((0, 28))
    for i in tqdm(range(valid_nums)):
        img, lbl = next(ValDataGen)
        scores = model.predict(img)
        lastFullValPred = np.append(lastFullValPred, scores, axis=0)
        lastFullValLabels = np.append(lastFullValLabels, lbl, axis=0)
    print(lastFullValPred.shape, lastFullValLabels.shape)

    rng = np.arange(0, 1, 0.001)
    f1s = np.zeros((rng.shape[0], 28))
    for j, t in enumerate(tqdm(rng)):
        for i in range(28):
            p = np.array(lastFullValPred[:, i] > t, dtype=np.int8)
            scoref1 = off1(lastFullValLabels[:, i], p, average='binary')
            f1s[j, i] = scoref1

    print(np.max(f1s, axis=0))
    print(np.mean(np.max(f1s, axis=0)))

    T = np.empty(28)
    for i in range(28):
        T[i] = rng[np.where(f1s[:, i] == np.max(f1s[:, i]))[0][0]]

    print(T)

    return T, np.mean(np.max(f1s, axis=0))
Example #2
0
    def getOptimalT(self, model, val_gen):
        last_full_val_pred = np.empty((0, 28))
        last_full_val_labels = np.empty((0, 28))

        for i in tqdm(range(len(val_gen))):
            data_im, data_label = val_gen[i]
            scores = model.predict(data_im)
            last_full_val_pred = np.append(last_full_val_pred, scores, axis=0)
            last_full_val_labels = np.append(last_full_val_labels,
                                             data_label,
                                             axis=0)
        print(last_full_val_pred.shape, last_full_val_labels.shape)

        rng = np.arange(0, 1, 0.001)
        f1s = np.zeros((rng.shape[0], 28))
        for j, t in enumerate(tqdm(rng)):
            for i in range(28):
                p = np.array(last_full_val_pred[:, i] > t, dtype=np.int8)
                #scoref1 = K.eval(f1_score(fullValLabels[:,i], p, average='binary'))
                scoref1 = off1(last_full_val_labels[:, i], p, average='binary')
                f1s[j, i] = scoref1

        print(np.max(f1s, axis=0))
        print(np.mean(np.max(f1s, axis=0)))

        T = np.empty(28)
        for i in range(28):
            T[i] = rng[np.where(f1s[:, i] == np.max(f1s[:, i]))[0][0]]
        #print('Choosing threshold: ', T, ', validation F1-score: ', max(f1s))
        print(T)
        return T, np.mean(np.max(f1s, axis=0))
Example #3
0
    def f1_full_validation(self, y_true, y_pred, class_num, save_path=None):
        """


        :param y_true:
        :param y_pred:
        :param class_num:
        :param save_path:
        """
        rng = np.arange(0, 1, 0.001)
        f1s = np.zeros((rng.shape[0], class_num))

        # reshape, if problem is mutli-label problem
        y_true = np.reshape(y_true, (y_true.shape[0], -1, class_num))
        y_pred = np.reshape(y_pred, (y_pred.shape[0], -1, class_num))

        for j, t in enumerate(tqdm(rng, 'Updating the F1 threshold')):
            for i in range(class_num):
                p = np.array(y_pred[..., i] > t, dtype=np.int8)
                scoref1 = off1(y_true[..., i].reshape(-1),
                               p.reshape(-1),
                               average='binary')
                f1s[j, i] = scoref1

        print('Individual F1-scores for each class:')
        print(np.max(f1s, axis=0))
        print('Macro F1-score CV =', np.mean(np.max(f1s, axis=0)))

        T = np.empty(class_num * y_true.shape[1])

        F_T = np.empty(class_num)

        # Duplicate the threshold j times
        for j in range(y_true.shape[1]):
            for i in range(class_num):
                T[i + j * class_num] = rng[np.where(
                    f1s[:, i] == np.max(f1s[:, i]))[0][0]]
                F_T[i] = rng[np.where(f1s[:, i] == np.max(f1s[:, i]))[0][0]]
        print('Probability threshold maximizing CV F1-score for each class:')
        print(F_T)

        self.threshold = T
        self.final_t = F_T
def evaluate():
    print("Loading weights ...")
    import glob
    import os

    list_of_files = glob.glob('*.h5')  # * means all if need specific format then *.csv
    latest_file = max(list_of_files, key=os.path.getctime)
    model = load_model(latest_file, custom_objects={'f1_measure': f1_measure})
    fullValGen = valid_gen

    lastFullValPred = np.empty((0, 28))
    lastFullValLabels = np.empty((0, 28))
    for i in tqdm(range(len(fullValGen))):
        im, lbl = fullValGen[i]
        scores = model.predict(im)
        lastFullValPred = np.append(lastFullValPred, scores, axis=0)
        lastFullValLabels = np.append(lastFullValLabels, lbl, axis=0)
    print(lastFullValPred.shape, lastFullValLabels.shape)

    from sklearn.metrics import f1_score as off1, f1_score

    rng = np.arange(0, 1, 0.001)
    f1s = np.zeros((rng.shape[0], 28))
    for j, t in enumerate(tqdm(rng)):
        for i in range(28):
            p = np.array(lastFullValPred[:, i] > t, dtype=np.int8)
            scoref1 = off1(lastFullValLabels[:, i], p, average='binary')
            f1s[j, i] = scoref1

    print('Individual F1-scores for each class:')
    print(np.max(f1s, axis=0))
    print('Macro F1-score CV =', np.mean(np.max(f1s, axis=0)))

    T = np.empty(28)
    for i in range(28):
        T[i] = rng[np.where(f1s[:, i] == np.max(f1s[:, i]))[0][0]]
    print('Probability threshold maximizing CV F1-score for each class:')
    print(T)

    pathsTest, labelsTest = testDataset()
    testg = ProteinDataGenerator(pathsTest, labelsTest, BATCH_SIZE, SHAPE)
    submit = pd.read_csv(DATA_DIR + 'sample_submission.csv')
    P = np.zeros((pathsTest.shape[0], 28))

    for i in tqdm(range(len(testg))):
        images, labels = testg[i]
        score = model.predict(images)
        P[i * BATCH_SIZE:i * BATCH_SIZE + score.shape[0]] = score

    PP = np.array(P)
    prediction = []

    for row in tqdm(range(submit.shape[0])):

        str_label = ''

        for col in range(PP.shape[1]):
            if (PP[row, col] < T[col]):
                str_label += ''
            else:
                str_label += str(col) + ' '
        prediction.append(str_label.strip())

    submit['Predicted'] = np.array(prediction)
    ts = str(int(time.time()))
    submit.to_csv('model' + ts + '.csv', index=False)
Example #5
0
lastFullValLabels = np.empty((0, 28))
for i in tqdm(range(len(fullValGen))):
    im, lbl = fullValGen[i]
    scores = bestModel.predict(im)
    lastFullValPred = np.append(lastFullValPred, scores, axis=0)
    lastFullValLabels = np.append(lastFullValLabels, lbl, axis=0)
print(lastFullValPred.shape, lastFullValLabels.shape)



rng = np.arange(0, 1, 0.001)
f1s = np.zeros((rng.shape[0], 28))
for j, t in enumerate(tqdm(rng)):
    for i in range(28):
        p = np.array(lastFullValPred[:, i] > t, dtype=np.int8)
        scoref1 = off1(lastFullValLabels[:, i], p, average='binary')
        f1s[j, i] = scoref1

print('Individual F1-scores for each class:')
print(np.max(f1s, axis=0))
print('Macro F1-score CV =', np.mean(np.max(f1s, axis=0)))

plt.plot(rng, f1s)
T = np.empty(28)
for i in range(28):
    T[i] = rng[np.where(f1s[:, i] == np.max(f1s[:, i]))[0][0]]
    if T[i] < 0.01:
        T[i] = 0.01
print('Probability threshold maximizing CV F1-score for each class:')
print(T)
Example #6
0
def generateSubmitFile(model=None):
    pathsTest, labelsTest = getTestDataset()
    if not model:
        model = load_model('./base.model',
                           custom_objects={'f1': f1})  # , 'f1_loss': f1_loss})
    paths, labels = getTrainDataset()
    testg = ProteinDataGenerator(pathsTest, labelsTest, BATCH_SIZE, SHAPE)
    lastTrainIndex = int((1 - VAL_RATIO) * paths.shape[0])
    pathsVal = paths[lastTrainIndex:]
    labelsVal = labels[lastTrainIndex:]
    vg = ProteinDataGenerator(pathsVal,
                              labelsVal,
                              BATCH_SIZE,
                              SHAPE,
                              use_cache=True,
                              shuffle=False)
    submit = pd.read_csv(sample_dir)
    P = np.zeros((pathsTest.shape[0], 28))
    for i in tqdm(range(len(testg))):
        images, labels = testg[i]
        score = model.predict(images)
        P[i * BATCH_SIZE:i * BATCH_SIZE + score.shape[0]] = score
    rng = np.arange(0, 1, 0.001)
    f1s = np.zeros((rng.shape[0], 28))
    lastFullValPred = np.empty((0, 28))
    np.random.seed(SEED)
    keys = np.arange(paths.shape[0], dtype=np.int)
    np.random.shuffle(keys)

    fullValGen = vg
    lastFullValLabels = np.empty((0, 28))
    for i in tqdm(range(len(fullValGen))):
        im, lbl = fullValGen[i]
        scores = model.predict(im)
        lastFullValPred = np.append(lastFullValPred, scores, axis=0)
        lastFullValLabels = np.append(lastFullValLabels, lbl, axis=0)
    print(lastFullValPred.shape, lastFullValLabels.shape)
    for j, t in enumerate(tqdm(rng)):
        for i in range(28):
            p = np.array(lastFullValPred[:, i] > t, dtype=np.int8)
            scoref1 = off1(lastFullValLabels[:, i], p, average='binary')
            f1s[j, i] = scoref1
    PP = np.array(P)
    prediction = []
    T = np.empty(28)
    for i in range(28):
        T[i] = rng[np.where(f1s[:, i] == np.max(f1s[:, i]))[0][0]]
    print('Probability threshold maximizing CV F1-score for each class:')
    print(T)
    for row in tqdm(range(submit.shape[0])):

        str_label = ''

        for col in range(PP.shape[1]):
            if (PP[row, col] < T[col]):
                str_label += ''
            else:
                str_label += str(col) + ' '
        prediction.append(str_label.strip())

    submit['Predicted'] = np.array(prediction)
    submit.to_csv('submit.csv', index=False)