def fit(self, n=None, metric='l2', normalization='None', epoch=None, label=None): self.n = n if self.multi_epcch: assert (epoch is not None) assert (self.epochs is not None) epoch_idx = np.argwhere(epoch == self.epochs)[0][0] embedding = self.embedding[epoch_idx] else: embedding = self.embedding if label is not None: embedding = embedding[self.labels == label] if self.n is None: self.n = embedding.shape[0] - 1 nbrs = NearestNeighbors(n_neighbors=(self.n + 1), algorithm='auto', metric=metric).fit( rating_normalize(embedding, normalization)) distances, indices = nbrs.kneighbors( rating_normalize(embedding, normalization)) self.indices = indices[:, 1:] self.distances = distances[:, 1:]
def evaluate_rating_space(self, norm='none', ignore_labels=False): if np.concatenate(self.labels).ndim == 1 or ignore_labels: print('calc_from_meta') self.rating = [rating_normalize(calc_rating(meta, method='raw'), method=norm) for meta in self.meta_data] else: print('calc_from_labels') self.rating = [rating_normalize(lbl, method=norm) for lbl in self.labels] self.rating_distance_matrix = None # reset after recalculating the ratings
def fit(self, n=None, metric='l1', normalization='None'): if n is None: self.n = self.embedding.shape[0] - 1 else: self.n = n nbrs = NearestNeighbors(n_neighbors=(self.n + 1), algorithm='auto', metric=metric).fit( rating_normalize(self.embedding, normalization)) distances, indices = nbrs.kneighbors( rating_normalize(self.embedding, normalization)) self.indices = indices[:, 1:] self.distances = distances[:, 1:]
def prepare_data(data, rating_format='raw', reshuffle=False, verbose=0, scaling="none"): # Entry: # 0 'patch' # 1 'mask' # 2 'class' # 3 'info' # 4 'size # 5 'rating' # 6 'rating_weights' # 7 'z' N = len(data) old_size = data[0][0].shape # ============================ # data: images and masks # ============================ if data[0][0].ndim == 2: images = [np.expand_dims(entry[0], axis=-1) for entry in data] masks = [np.expand_dims(entry[1], axis=-1) for entry in data] else: images = [np.array(entry[0]) for entry in data] masks = [np.array(entry[1]) for entry in data] if verbose: print('prepare_data:') print("\tImage size changed from {} to {}".format( old_size, images[0].shape)) print("\tImage Range = [{:.1f}, {:.1f}]".format( np.max(images[0]), np.min(images[0]))) print("\tMasks Range = [{}, {}]".format(np.max(masks[0]), np.min(masks[0]))) # ============================ # labels: classes and ratings # ============================ classes = np.array([entry[2] for entry in data]).reshape(N, 1) rating_weights = None if rating_format == 'raw': ratings = np.array( [rating_normalize(entry[5], scaling) for entry in data]) rating_weights = np.array([entry[6] for entry in data]) elif rating_format == 'mean': ratings = np.array([ rating_normalize(np.mean(entry[5], axis=0), scaling) for entry in data ]).reshape(N, 9) elif rating_format == 'w_mean': w_mean = lambda R, W: np.sum(np.diag(W).dot(R) / np.sum(W), axis=0) ratings = np.array([ rating_normalize(w_mean(entry[5], entry[6]), scaling) for entry in data ]).reshape(N, 9) else: print("ERR: Illegual rating_format given ({})".format(rating_format)) assert (False) if verbose: print("benign:{}, malignant: {}, unknown: {}".format( np.count_nonzero(classes == 0), np.count_nonzero(classes == 1), np.count_nonzero(classes == 2))) # ============================ # meta: meta, nodule-size, slice confidence and z-value # ============================ # for nodule-size use the rescaled mask area # # nodule_size = np.array([entry[4] for entry in data]).reshape(N, 1) # sorted_size = np.sort(nodule_size, axis=0).flatten() # L = len(sorted_size) # tresh = sorted_size[range(0, L, L//5)] nodule_size = np.array([np.count_nonzero(q) for q in masks]).reshape(N, 1) * 0.5 * 0.5 tresh = [0, 15, 30, 60, 120] nodule_size = np.digitize(nodule_size, tresh) z = np.array([entry[7] for entry in data]).reshape(N, 1) # confidence # only relevant for full dataset and should first be reconsidered # conf = np.array([np.min(entry[6]) for entry in data]) # mean rating based objective conf = 1 - .5 * np.array([ rating_normalize(np.std(entry[5], axis=0).mean(), scaling) for entry in data ]) meta = [entry[3] for entry in data] if reshuffle: new_order = np.random.permutation(N) # print('permutation: {}'.format(new_order[:20])) images = reorder(images, new_order) masks = reorder(masks, new_order) classes = classes[new_order] ratings = ratings[new_order] rating_weights = rating_weights[ new_order] if rating_weights is not None else None meta = reorder(meta, new_order) nodule_size = nodule_size[new_order] z = z[new_order] conf = conf[new_order] return images, ratings, classes, masks, meta, conf, nodule_size, rating_weights, z
filename = 'LIDC/NodulePatches144-0.5-IByMalignancy.p' M, B, U = pickle.load(open(filename, 'br')) raw_dataset = load_nodule_raw_dataset(size=144, res=0.5, sample='Normal')[0] dataset = [(crop_center(entry['patch'] * (1.0 + 0.0 * entry['mask']), entry['mask'], 128)[0], np.mean(entry['rating'], axis=0), entry['label'], entry['info'], entry['size']) for entry in raw_dataset] #dataset += [(normalize(entry['patch'], mean_, std_, [-1000, 400])*(1.0+0.0*entry['mask']), np.mean(entry['rating'], axis=0), -1, entry['info'], entry['size']) for entry in U[:len(U)//5]] images = [scale(entry[0]) for entry in dataset] rating = [entry[1] for entry in dataset] n_rating = [rating_normalize(entry[1], 'Norm') for entry in dataset] malig_map = [entry[2] for entry in dataset] meta_data = [entry[3] for entry in dataset] size_arr = np.array([entry[-1] for entry in dataset]) print(np.min(n_rating, axis=0)) print(np.max(n_rating, axis=0)) # 1) Select References # ====================== size_map = {} for mal in np.unique(malig_map): size_map[mal] = split_by_size(size_arr[np.array(malig_map) == mal]) for s in np.unique(size_map[mal]): cnt = np.count_nonzero((size_map[mal] == s).astype('uint'))
def evaluate_rating_space(self, norm='none'): self.rating = [ rating_normalize(calc_rating(meta, method='mean'), method=norm) for meta in self.meta_data ] self.rating_distance_matrix = None # reset after recalculating the ratings
idx_symmetry = np.zeros(len(metrics)) idx_symmetry_std = np.zeros(len(metrics)) idx_concentration = np.zeros(len(metrics)) idx_concentration_std = np.zeros(len(metrics)) idx_contrast = np.zeros(len(metrics)) idx_contrast_std = np.zeros(len(metrics)) idx_kummar = np.zeros(len(metrics)) rating = [] for entry in data: rating.append(np.mean(entry['rating'], axis=0)) rating = np.vstack(rating) plt.figure() plt.subplot(211) plt.hist(calc_distance_matrix(rating_normalize(rating, 'Scale'), method='l2').flatten(), bins=500) plt.title('L2') plt.ylabel('Scaled') plt.subplot(212) plt.hist(calc_distance_matrix(rating_normalize(rating, 'Norm'), method='l2').flatten(), bins=500) #plt.title('l2-norm') plt.ylabel('Normalized') for metric, m, in zip(metrics, range(len(metrics))): plt.figure() norm = 'None' if len(metric) > 5 and metric[-4:] == 'Norm':
#metrics = [ 'euclidean' # L2 # , 'chebyshev' # L-inf # , 'cosine' # , 'hamming' # , 'manhaten' # ] #metrics = ['cityblock', 'euclidean', 'seuclidean', 'minkowski3', 'chebyshev', 'cosine', 'correlation', 'hamming', 'mahalanobis', 'braycurtis', 'canberra', 'jaccard', 'emd'] metrics = ['cityblock', 'euclidean', 'chebyshev', 'cosine', 'canberra'] normalization = 'Scale' # 'None', 'Scale', 'Normal' dataset = pickle.load(open('LIDC/NodulePatches128-0.5.p', 'br')) print("Loaded {} entries".format(len(dataset))) Ratings = np.concatenate([ rating_normalize(entry['rating'], method=normalization) for entry in dataset ]) print("Ratings speard over {} annotations".format(Ratings.shape)) for metric in metrics[:]: t1, t2 = test_rules(metric) print("Metric: {} -\t{}\t{}".format(metric, t1, t2)) for metric in metrics[:]: # intra # ======= intra = [] for entry in dataset: