def get_flat_data(self, dataset): images, labels, classes, masks, meta, conf, nodule_size, rating_weights, z = \ prepare_data(dataset, rating_format='raw', verbose=True, reshuffle=False) if self.model_size != self.data_size: if self.seq_model: images = format_data_as_sequence(images, embed_size=self.model_size) else: images = np.array([ crop_center(im, msk, size=self.model_size)[0] for im, msk in zip(images, masks) ]) return images, labels, classes, masks, meta, conf, nodule_size, rating_weights, z
def prepare_data_direct(data, objective='malignancy', rating_format='w_mean', rating_scale='none', weighted_rating=False, num_of_classes=2, balanced=False, verbose=0, reshuffle=True): rating_format = 'raw' if 'distance-matrix' in objective else rating_format images, ratings, classes, masks, meta, conf, nod_size, rating_weights, z = \ prepare_data(data, rating_format=rating_format, scaling=rating_scale, verbose=verbose, reshuffle=reshuffle) if objective == 'malignancy': from keras.utils.np_utils import to_categorical labels = to_categorical(classes, num_of_classes) elif objective == 'rating': labels = ratings elif objective == 'size': labels = nod_size elif objective == 'rating_size': labels = ratings, nod_size elif objective == 'distance-matrix': labels = np.array([(r, rw) for r, rw in zip(ratings, rating_weights)]) elif objective == 'rating_distance-matrix': mean_rating = np.array([r.mean(axis=0) for r in ratings]) #rating_for_dm = np.array([(a, b) for a, b in zip(ratings, rating_weights)]) rating_for_dm = [(a, b) for a, b in zip(ratings, rating_weights)] labels = mean_rating, rating_for_dm else: assert False Nb = np.count_nonzero(0 == classes) Nm = np.count_nonzero(1 == classes) #Nu = np.count_nonzero(2 == classes) N = np.minimum(Nb, Nm) if balanced: new_order = np.random.permutation(2 * N) labels_ = np.argmax(classes, axis=1) images = select_balanced(images, labels_, N, new_order) labels = select_balanced(labels, labels_, N, new_order) classes = select_balanced(classes, labels_, N, new_order) masks = select_balanced(masks, labels_, N, new_order) if verbose: Nb = np.count_nonzero(1 - np.argmax(classes, axis=1)) Nm = np.count_nonzero(np.argmax(classes, axis=1)) print("Balanced - Benign: {}, Malignant: {}".format(Nb, Nm)) return images, labels, classes, masks, meta, conf
def prepare_data(self, data_subset_id, dataset_type='Clean', configuration=None): images, labels, classes, masks, meta, conf = \ prepare_data(load_nodule_dataset(size=self.data_size, res=self.data_res, sample=self.data_sample, dataset_type=dataset_type, configuration=configuration)[data_subset_id], reshuffle=False, return_meta=True, verbose=1) self.images = np.array([crop_center(im, msk, size=self.net_in_size)[0] for im, msk in zip(images, masks)]) self.meta = meta self.labels = labels self.masks = masks print("Image size changed to {}".format(self.images.shape)) print('Mask not updated')
def prepare_data_siamese(data, objective="malignancy", rating_distance='mean', balanced=False, verbose=0): if verbose: print('prepare_data_siamese:') images, ratings, classes, masks, meta, conf, nod_size, _, _ = \ prepare_data(data, rating_format='raw', reshuffle=True, verbose=verbose) N = len(images) benign_filter = np.where(classes == 0)[0] malign_filter = np.where(classes == 1)[0] M = min(benign_filter.shape[0], malign_filter.shape[0]) if balanced: malign_filter = malign_filter[:M] benign_filter = benign_filter[:M] # Handle Patches # ========================= imgs_benign, imgs_malign = [images[x] for x in benign_filter ], [images[x] for x in malign_filter] different, d_size = select_different_pair(imgs_benign, imgs_malign, n=M) same, sb_size, sm_size = select_same_pairs(imgs_benign, imgs_malign) image_pairs = same + different image_sub1 = [pair[0] for pair in image_pairs] image_sub2 = [pair[1] for pair in image_pairs] if objective == "malignancy": similarity_labels = np.concatenate( [np.repeat(0, len(same)), np.repeat(1, len(different))]) elif objective == "rating": lbls_benign, lbls_malign = ratings[benign_filter], ratings[ malign_filter] diff_lbls, d_size = select_different_pair(lbls_benign, lbls_malign, n=M) same_lbls, sb_size, sm_size = select_same_pairs( lbls_benign, lbls_malign) label_pairs = same_lbls + diff_lbls if rating_distance == 'mean': similarity_labels = np.array( [np.sqrt((a - b).dot(a - b)) for a, b in label_pairs]) elif rating_distance == 'clusters': assert False else: assert False else: print("ERR: {} is not a valid objective".format(objective)) assert (False) # Handle Masks # ========================= mask_benign, mask_malign = [masks[x] for x in benign_filter ], [masks[x] for x in malign_filter] different_mask, d = select_different_pair(mask_benign, mask_malign, n=M) same_mask, sb, sm = select_same_pairs(mask_benign, mask_malign) assert (d == d_size) assert ((sb == sb_size) and (sm == sm_size)) mask_pairs = same_mask + different_mask mask_sub1 = [pair[0] for pair in mask_pairs] mask_sub2 = [pair[1] for pair in mask_pairs] # Handle Meta # ========================= meta_benign, meta_malign = reorder(meta, benign_filter), reorder( meta, malign_filter) different_meta, d = select_different_pair(meta_benign, meta_malign, n=M) same_meta, sb, sm = select_same_pairs(meta_benign, meta_malign) assert (d == d_size) assert ((sb == sb_size) and (sm == sm_size)) meta_pairs = same_meta + different_meta meta_sub1, meta_sub2 = zip(*meta_pairs) # Final touch # ========================= size = similarity_labels.shape[0] assert size == len(image_sub1) assert size == len(image_sub2) # assign confidence classes (weights are resolved online per batch) confidence = np.concatenate([ np.repeat('SB', sb_size), np.repeat('SM', sm_size), np.repeat('D', d_size) ]) if verbose: print( "{} pairs of same / {} pairs of different. {} total number of pairs" .format(len(same), len(different), size)) new_order = np.random.permutation(size) return ((reorder(image_sub1, new_order), reorder(image_sub2, new_order)), similarity_labels[new_order], (reorder(mask_sub1, new_order), reorder(mask_sub2, new_order)), confidence[new_order], (reorder(meta_sub1, new_order), reorder(meta_sub2, new_order)))
def prepare_data_siamese_simple(data, siamese_rating_factor, objective="malignancy", rating_distance='mean', verbose=0): if verbose: print('prepare_data_siamese_simple:') images, ratings, classes, masks, meta, conf, nod_size, rating_weights, z = \ prepare_data(data, rating_format='raw', scaling="none", reshuffle=True, verbose=verbose) N = len(images) # Handle Patches # ========================= image_pairs = select_pairs(images) image_sub1 = [pair[0] for pair in image_pairs] image_sub2 = [pair[1] for pair in image_pairs] # Handle Labels # ========================= rating_pairs = select_pairs(ratings) rating_weight_pairs = select_pairs(rating_weights) confidence = np.ones(len(image_pairs)) if objective in ["rating", "rating_size"]: if rating_distance == 'mean': similarity_ratings = np.array( [np.sqrt((a - b).dot(a - b)) for a, b in rating_pairs]) elif rating_distance == 'clusters': similarity_ratings = [] confidence = [] for r1, r2 in rating_pairs: distance, std = rating_clusters_distance_and_std(r1, r2) similarity_ratings += [distance] confidence += [std] similarity_ratings = np.array(similarity_ratings) confidence = np.array(confidence) confidence = 1 - .5 * confidence / (confidence + .5) elif 'weighted_clusters': similarity_ratings = [] confidence = [] for r, w in zip(rating_pairs, rating_weight_pairs): distance, std = rating_clusters_distance_and_std(r[0], r[1], 'euclidean', weight_a=w[0], weight_b=w[1]) similarity_ratings += [distance] confidence += [std] similarity_ratings = np.array(similarity_ratings) confidence = np.array(confidence) confidence = 1 - .5 * confidence / (confidence + .5) else: assert False similarity_ratings *= siamese_rating_factor if objective in ['size', 'rating_size']: size_pairs = select_pairs(nod_size) similarity_size = np.array( [np.sqrt((a - b).dot(a - b)) for a, b in size_pairs]) if similarity_size.ndim == 1: similarity_ratings = np.expand_dims(similarity_size, axis=1) if similarity_ratings.ndim == 1: similarity_ratings = np.expand_dims(similarity_ratings, axis=1) if objective == "rating": similarity_labels = similarity_ratings, elif objective == 'size': similarity_labels = similarity_size, elif objective == 'rating_size': similarity_labels = similarity_ratings, similarity_size else: print("ERR: {} is not a valid objective".format(objective)) assert False # Handle Masks # ========================= mask_pairs = select_pairs(masks) mask_sub1 = [pair[0] for pair in mask_pairs] mask_sub2 = [pair[1] for pair in mask_pairs] # Handle Meta # ========================= meta_pairs = select_pairs(meta) meta_sub1 = [pair[0] for pair in meta_pairs] meta_sub2 = [pair[1] for pair in meta_pairs] # Final touch # ========================= size = similarity_labels[0].shape[0] assert size == len(image_sub1) assert size == len(image_sub2) # assign confidence classes (weights are resolved online per batch) #confidence = np.concatenate([ np.repeat('SB', sb_size), # np.repeat('SM', sm_size), # np.repeat('D', d_size) # ]) #confidence = np.repeat('SB', N) #onfidence = [] #for r1, r2 in rating_pairs: # dm = cdist(r1, r2, 'euclidean') # d0 = np.max(dm, axis=0) # d1 = np.max(dm, axis=1) # distance = 0.5 * np.mean(d0) + 0.5 * np.mean(d1) # confidence += [distance] #confidence = 1.0 - np.array(confidence)/(8.0 + 0.25*np.array(confidence)) new_order = np.random.permutation(size) return ((reorder(image_sub1, new_order), reorder(image_sub2, new_order)), tuple([s[new_order] for s in similarity_labels ]), (reorder(mask_sub1, new_order), reorder(mask_sub2, new_order)), confidence[new_order], (reorder(meta_sub1, new_order), reorder(meta_sub2, new_order)))
def prepare_data_triplet(data, objective="malignancy", rating_distance="mean", balanced=False, return_confidence=False, return_meta=False, verbose=0): if verbose: print('prepare_data_triplet:') images, ratings, classes, masks, meta, conf, nod_size, _ \ = prepare_data(data, rating_format="raw", scaling="none", reshuffle=True, verbose=verbose) N = images.shape[0] if balanced: print('Create a balanced split') benign_filter = np.where(classes == 0)[0] malign_filter = np.where(classes == 1)[0] M = min(benign_filter.shape[0], malign_filter.shape[0]) M12 = M // 2 M = M12 * 2 malign_filter_a = malign_filter[:M12] malign_filter_b = malign_filter[M12:] benign_filter_a = benign_filter[:M12] benign_filter_b = benign_filter[M12:] else: rating_trips = select_triplets(ratings) distance = l2_distance if rating_distance == 'mean' else cluster_distance trip_rank_status = check_triplet_order(rating_trips, rating_distance=distance) # Handle Patches # ========================= if balanced: image_trips = make_balanced_trip(images, benign_filter_a, benign_filter_b, malign_filter_a, malign_filter_b) else: image_trips = select_triplets(images) image_trips = arrange_triplet(image_trips, trip_rank_status) image_sub1 = np.array([pair[0] for pair in image_trips]) image_sub2 = np.array([pair[1] for pair in image_trips]) image_sub3 = np.array([pair[2] for pair in image_trips]) similarity_labels = np.array([0] * N) # Handle Masks # ========================= if balanced: mask_trips = make_balanced_trip(masks, benign_filter_a, benign_filter_b, malign_filter_a, malign_filter_b) else: mask_trips = select_triplets(masks) mask_trips = arrange_triplet(mask_trips, trip_rank_status) mask_sub1 = np.array([pair[0] for pair in mask_trips]) mask_sub2 = np.array([pair[1] for pair in mask_trips]) mask_sub3 = np.array([pair[2] for pair in mask_trips]) # Handle Meta # ========================= if return_meta: if balanced: meta_trips = make_balanced_trip(meta, benign_filter_a, benign_filter_b, malign_filter_a, malign_filter_b) else: meta_trips = select_triplets(meta) meta_trips = arrange_triplet(meta_trips, trip_rank_status) meta_sub1 = np.array([pair[0] for pair in meta_trips]) meta_sub2 = np.array([pair[1] for pair in meta_trips]) meta_sub3 = np.array([pair[2] for pair in meta_trips]) # Final touch # ========================= size = image_sub1.shape[0] assert M * 2 == size confidence = np.repeat('SB', size) if objective == 'rating': if return_confidence == "rating": conf_trips = select_triplets(conf) conf_trips = arrange_triplet(conf_trips, trip_rank_status) confidence = get_triplet_confidence(conf_trips) confidence = np.array(confidence) elif return_confidence == "rating_distance": confidence = calc_rating_distance_confidence(trip_rank_status) confidence = np.array(confidence) new_order = np.random.permutation(size) if return_meta: return ((image_sub1[new_order], image_sub2[new_order]), similarity_labels[new_order], (mask_sub1[new_order], mask_sub2[new_order]), confidence[new_order], (reorder(meta_sub1, new_order), reorder(meta_sub2, new_order), reorder(meta_sub3, new_order))) else: return ((image_sub1[new_order], image_sub2[new_order], image_sub3[new_order]), similarity_labels[new_order], (mask_sub1[new_order], mask_sub2[new_order], mask_sub3[new_order]), confidence[new_order])