Ejemplo n.º 1
0
 def get_flat_data(self, dataset):
     images, labels, classes, masks, meta, conf, nodule_size, rating_weights, z = \
         prepare_data(dataset, rating_format='raw', verbose=True, reshuffle=False)
     if self.model_size != self.data_size:
         if self.seq_model:
             images = format_data_as_sequence(images,
                                              embed_size=self.model_size)
         else:
             images = np.array([
                 crop_center(im, msk, size=self.model_size)[0]
                 for im, msk in zip(images, masks)
             ])
     return images, labels, classes, masks, meta, conf, nodule_size, rating_weights, z
Ejemplo n.º 2
0
def prepare_data_direct(data,
                        objective='malignancy',
                        rating_format='w_mean',
                        rating_scale='none',
                        weighted_rating=False,
                        num_of_classes=2,
                        balanced=False,
                        verbose=0,
                        reshuffle=True):
    rating_format = 'raw' if 'distance-matrix' in objective else rating_format
    images, ratings, classes, masks, meta, conf, nod_size, rating_weights, z = \
        prepare_data(data, rating_format=rating_format, scaling=rating_scale, verbose=verbose, reshuffle=reshuffle)

    if objective == 'malignancy':
        from keras.utils.np_utils import to_categorical
        labels = to_categorical(classes, num_of_classes)
    elif objective == 'rating':
        labels = ratings
    elif objective == 'size':
        labels = nod_size
    elif objective == 'rating_size':
        labels = ratings, nod_size
    elif objective == 'distance-matrix':
        labels = np.array([(r, rw) for r, rw in zip(ratings, rating_weights)])
    elif objective == 'rating_distance-matrix':
        mean_rating = np.array([r.mean(axis=0) for r in ratings])
        #rating_for_dm = np.array([(a, b) for a, b in zip(ratings, rating_weights)])
        rating_for_dm = [(a, b) for a, b in zip(ratings, rating_weights)]
        labels = mean_rating, rating_for_dm
    else:
        assert False

    Nb = np.count_nonzero(0 == classes)
    Nm = np.count_nonzero(1 == classes)
    #Nu = np.count_nonzero(2 == classes)
    N = np.minimum(Nb, Nm)

    if balanced:
        new_order = np.random.permutation(2 * N)
        labels_ = np.argmax(classes, axis=1)
        images = select_balanced(images, labels_, N, new_order)
        labels = select_balanced(labels, labels_, N, new_order)
        classes = select_balanced(classes, labels_, N, new_order)
        masks = select_balanced(masks, labels_, N, new_order)
        if verbose:
            Nb = np.count_nonzero(1 - np.argmax(classes, axis=1))
            Nm = np.count_nonzero(np.argmax(classes, axis=1))
            print("Balanced - Benign: {}, Malignant: {}".format(Nb, Nm))

    return images, labels, classes, masks, meta, conf
Ejemplo n.º 3
0
    def prepare_data(self, data_subset_id, dataset_type='Clean', configuration=None):

        images, labels, classes, masks, meta, conf = \
            prepare_data(load_nodule_dataset(size=self.data_size, res=self.data_res, sample=self.data_sample, dataset_type=dataset_type,
                                             configuration=configuration)[data_subset_id],
                         reshuffle=False,
                         return_meta=True,
                         verbose=1)
        self.images = np.array([crop_center(im, msk, size=self.net_in_size)[0]
                           for im, msk in zip(images, masks)])
        self.meta   = meta
        self.labels = labels
        self.masks  = masks
        print("Image size changed to {}".format(self.images.shape))
        print('Mask not updated')
Ejemplo n.º 4
0
def prepare_data_siamese(data,
                         objective="malignancy",
                         rating_distance='mean',
                         balanced=False,
                         verbose=0):
    if verbose:
        print('prepare_data_siamese:')
    images, ratings, classes, masks, meta, conf, nod_size, _, _ = \
        prepare_data(data, rating_format='raw', reshuffle=True, verbose=verbose)

    N = len(images)
    benign_filter = np.where(classes == 0)[0]
    malign_filter = np.where(classes == 1)[0]
    M = min(benign_filter.shape[0], malign_filter.shape[0])

    if balanced:
        malign_filter = malign_filter[:M]
        benign_filter = benign_filter[:M]

    #   Handle Patches
    # =========================

    imgs_benign, imgs_malign = [images[x] for x in benign_filter
                                ], [images[x] for x in malign_filter]
    different, d_size = select_different_pair(imgs_benign, imgs_malign, n=M)
    same, sb_size, sm_size = select_same_pairs(imgs_benign, imgs_malign)

    image_pairs = same + different
    image_sub1 = [pair[0] for pair in image_pairs]
    image_sub2 = [pair[1] for pair in image_pairs]

    if objective == "malignancy":
        similarity_labels = np.concatenate(
            [np.repeat(0, len(same)),
             np.repeat(1, len(different))])
    elif objective == "rating":
        lbls_benign, lbls_malign = ratings[benign_filter], ratings[
            malign_filter]
        diff_lbls, d_size = select_different_pair(lbls_benign,
                                                  lbls_malign,
                                                  n=M)
        same_lbls, sb_size, sm_size = select_same_pairs(
            lbls_benign, lbls_malign)

        label_pairs = same_lbls + diff_lbls
        if rating_distance == 'mean':
            similarity_labels = np.array(
                [np.sqrt((a - b).dot(a - b)) for a, b in label_pairs])
        elif rating_distance == 'clusters':
            assert False
        else:
            assert False
    else:
        print("ERR: {} is not a valid objective".format(objective))
        assert (False)

    #   Handle Masks
    # =========================

    mask_benign, mask_malign = [masks[x] for x in benign_filter
                                ], [masks[x] for x in malign_filter]
    different_mask, d = select_different_pair(mask_benign, mask_malign, n=M)
    same_mask, sb, sm = select_same_pairs(mask_benign, mask_malign)
    assert (d == d_size)
    assert ((sb == sb_size) and (sm == sm_size))

    mask_pairs = same_mask + different_mask
    mask_sub1 = [pair[0] for pair in mask_pairs]
    mask_sub2 = [pair[1] for pair in mask_pairs]

    #   Handle Meta
    # =========================
    meta_benign, meta_malign = reorder(meta, benign_filter), reorder(
        meta, malign_filter)
    different_meta, d = select_different_pair(meta_benign, meta_malign, n=M)
    same_meta, sb, sm = select_same_pairs(meta_benign, meta_malign)
    assert (d == d_size)
    assert ((sb == sb_size) and (sm == sm_size))

    meta_pairs = same_meta + different_meta
    meta_sub1, meta_sub2 = zip(*meta_pairs)

    #   Final touch
    # =========================

    size = similarity_labels.shape[0]
    assert size == len(image_sub1)
    assert size == len(image_sub2)

    # assign confidence classes (weights are resolved online per batch)
    confidence = np.concatenate([
        np.repeat('SB', sb_size),
        np.repeat('SM', sm_size),
        np.repeat('D', d_size)
    ])

    if verbose:
        print(
            "{} pairs of same / {} pairs of different. {} total number of pairs"
            .format(len(same), len(different), size))

    new_order = np.random.permutation(size)

    return ((reorder(image_sub1, new_order), reorder(image_sub2, new_order)),
            similarity_labels[new_order], (reorder(mask_sub1, new_order),
                                           reorder(mask_sub2, new_order)),
            confidence[new_order], (reorder(meta_sub1, new_order),
                                    reorder(meta_sub2, new_order)))
Ejemplo n.º 5
0
def prepare_data_siamese_simple(data,
                                siamese_rating_factor,
                                objective="malignancy",
                                rating_distance='mean',
                                verbose=0):
    if verbose:
        print('prepare_data_siamese_simple:')
    images, ratings, classes, masks, meta, conf, nod_size, rating_weights, z = \
        prepare_data(data, rating_format='raw', scaling="none", reshuffle=True, verbose=verbose)

    N = len(images)

    #   Handle Patches
    # =========================

    image_pairs = select_pairs(images)
    image_sub1 = [pair[0] for pair in image_pairs]
    image_sub2 = [pair[1] for pair in image_pairs]

    #   Handle Labels
    # =========================

    rating_pairs = select_pairs(ratings)
    rating_weight_pairs = select_pairs(rating_weights)

    confidence = np.ones(len(image_pairs))

    if objective in ["rating", "rating_size"]:

        if rating_distance == 'mean':
            similarity_ratings = np.array(
                [np.sqrt((a - b).dot(a - b)) for a, b in rating_pairs])
        elif rating_distance == 'clusters':
            similarity_ratings = []
            confidence = []
            for r1, r2 in rating_pairs:
                distance, std = rating_clusters_distance_and_std(r1, r2)
                similarity_ratings += [distance]
                confidence += [std]
            similarity_ratings = np.array(similarity_ratings)
            confidence = np.array(confidence)
            confidence = 1 - .5 * confidence / (confidence + .5)
        elif 'weighted_clusters':
            similarity_ratings = []
            confidence = []
            for r, w in zip(rating_pairs, rating_weight_pairs):
                distance, std = rating_clusters_distance_and_std(r[0],
                                                                 r[1],
                                                                 'euclidean',
                                                                 weight_a=w[0],
                                                                 weight_b=w[1])
                similarity_ratings += [distance]
                confidence += [std]
            similarity_ratings = np.array(similarity_ratings)
            confidence = np.array(confidence)
            confidence = 1 - .5 * confidence / (confidence + .5)
        else:
            assert False
        similarity_ratings *= siamese_rating_factor

    if objective in ['size', 'rating_size']:
        size_pairs = select_pairs(nod_size)
        similarity_size = np.array(
            [np.sqrt((a - b).dot(a - b)) for a, b in size_pairs])

        if similarity_size.ndim == 1:
            similarity_ratings = np.expand_dims(similarity_size, axis=1)

    if similarity_ratings.ndim == 1:
        similarity_ratings = np.expand_dims(similarity_ratings, axis=1)

    if objective == "rating":
        similarity_labels = similarity_ratings,
    elif objective == 'size':
        similarity_labels = similarity_size,
    elif objective == 'rating_size':
        similarity_labels = similarity_ratings, similarity_size
    else:
        print("ERR: {} is not a valid objective".format(objective))
        assert False

    #   Handle Masks
    # =========================

    mask_pairs = select_pairs(masks)
    mask_sub1 = [pair[0] for pair in mask_pairs]
    mask_sub2 = [pair[1] for pair in mask_pairs]

    #   Handle Meta
    # =========================
    meta_pairs = select_pairs(meta)
    meta_sub1 = [pair[0] for pair in meta_pairs]
    meta_sub2 = [pair[1] for pair in meta_pairs]

    #   Final touch
    # =========================

    size = similarity_labels[0].shape[0]
    assert size == len(image_sub1)
    assert size == len(image_sub2)

    # assign confidence classes (weights are resolved online per batch)
    #confidence = np.concatenate([  np.repeat('SB', sb_size),
    #                               np.repeat('SM', sm_size),
    #                               np.repeat('D',  d_size)
    #                            ])

    #confidence = np.repeat('SB', N)
    #onfidence = []
    #for r1, r2 in rating_pairs:
    #    dm = cdist(r1, r2, 'euclidean')
    #    d0 = np.max(dm, axis=0)
    #    d1 = np.max(dm, axis=1)
    #    distance = 0.5 * np.mean(d0) + 0.5 * np.mean(d1)
    #    confidence += [distance]
    #confidence = 1.0 - np.array(confidence)/(8.0 + 0.25*np.array(confidence))

    new_order = np.random.permutation(size)

    return ((reorder(image_sub1, new_order), reorder(image_sub2, new_order)),
            tuple([s[new_order] for s in similarity_labels
                   ]), (reorder(mask_sub1, new_order),
                        reorder(mask_sub2, new_order)), confidence[new_order],
            (reorder(meta_sub1, new_order), reorder(meta_sub2, new_order)))
Ejemplo n.º 6
0
def prepare_data_triplet(data,
                         objective="malignancy",
                         rating_distance="mean",
                         balanced=False,
                         return_confidence=False,
                         return_meta=False,
                         verbose=0):
    if verbose:
        print('prepare_data_triplet:')
    images, ratings, classes, masks, meta, conf, nod_size, _ \
        = prepare_data(data, rating_format="raw", scaling="none", reshuffle=True, verbose=verbose)

    N = images.shape[0]

    if balanced:
        print('Create a balanced split')
        benign_filter = np.where(classes == 0)[0]
        malign_filter = np.where(classes == 1)[0]
        M = min(benign_filter.shape[0], malign_filter.shape[0])
        M12 = M // 2
        M = M12 * 2
        malign_filter_a = malign_filter[:M12]
        malign_filter_b = malign_filter[M12:]
        benign_filter_a = benign_filter[:M12]
        benign_filter_b = benign_filter[M12:]
    else:
        rating_trips = select_triplets(ratings)
        distance = l2_distance if rating_distance == 'mean' else cluster_distance
        trip_rank_status = check_triplet_order(rating_trips,
                                               rating_distance=distance)

    #   Handle Patches
    # =========================

    if balanced:
        image_trips = make_balanced_trip(images, benign_filter_a,
                                         benign_filter_b, malign_filter_a,
                                         malign_filter_b)
    else:
        image_trips = select_triplets(images)
        image_trips = arrange_triplet(image_trips, trip_rank_status)
    image_sub1 = np.array([pair[0] for pair in image_trips])
    image_sub2 = np.array([pair[1] for pair in image_trips])
    image_sub3 = np.array([pair[2] for pair in image_trips])

    similarity_labels = np.array([0] * N)

    #   Handle Masks
    # =========================

    if balanced:
        mask_trips = make_balanced_trip(masks, benign_filter_a,
                                        benign_filter_b, malign_filter_a,
                                        malign_filter_b)
    else:
        mask_trips = select_triplets(masks)
        mask_trips = arrange_triplet(mask_trips, trip_rank_status)
    mask_sub1 = np.array([pair[0] for pair in mask_trips])
    mask_sub2 = np.array([pair[1] for pair in mask_trips])
    mask_sub3 = np.array([pair[2] for pair in mask_trips])

    #   Handle Meta
    # =========================
    if return_meta:
        if balanced:
            meta_trips = make_balanced_trip(meta, benign_filter_a,
                                            benign_filter_b, malign_filter_a,
                                            malign_filter_b)
        else:
            meta_trips = select_triplets(meta)
            meta_trips = arrange_triplet(meta_trips, trip_rank_status)
        meta_sub1 = np.array([pair[0] for pair in meta_trips])
        meta_sub2 = np.array([pair[1] for pair in meta_trips])
        meta_sub3 = np.array([pair[2] for pair in meta_trips])

    #   Final touch
    # =========================

    size = image_sub1.shape[0]
    assert M * 2 == size

    confidence = np.repeat('SB', size)
    if objective == 'rating':
        if return_confidence == "rating":
            conf_trips = select_triplets(conf)
            conf_trips = arrange_triplet(conf_trips, trip_rank_status)
            confidence = get_triplet_confidence(conf_trips)
            confidence = np.array(confidence)
        elif return_confidence == "rating_distance":
            confidence = calc_rating_distance_confidence(trip_rank_status)
            confidence = np.array(confidence)

    new_order = np.random.permutation(size)

    if return_meta:
        return ((image_sub1[new_order], image_sub2[new_order]),
                similarity_labels[new_order], (mask_sub1[new_order],
                                               mask_sub2[new_order]),
                confidence[new_order], (reorder(meta_sub1, new_order),
                                        reorder(meta_sub2, new_order),
                                        reorder(meta_sub3, new_order)))
    else:
        return ((image_sub1[new_order], image_sub2[new_order],
                 image_sub3[new_order]), similarity_labels[new_order],
                (mask_sub1[new_order], mask_sub2[new_order],
                 mask_sub3[new_order]), confidence[new_order])