def config_filenames(self, net_type, use_core, keep_spatial_dim=False): # init file managers Weights = File.Weights(net_type, output_dir=input_dir) if use_core: if keep_spatial_dim: Embed = File.Embed('SP_' + net_type, output_dir=output_dir) else: Embed = File.Embed(net_type, output_dir=output_dir) else: if net_type == 'dir': Embed = File.Pred(type='malig', pre='dir', output_dir=output_dir) elif net_type == 'dirR': Embed = File.Pred(type='rating', pre='dirR', output_dir=output_dir) elif net_type == 'dirS': Embed = File.Pred(type='size', pre='dirS', output_dir=output_dir) elif net_type == 'dirRS': # assert False # save rating and size in seperate files Embed = {} Embed['R'] = File.Pred(type='rating', pre='dirRS', output_dir=output_dir) Embed['S'] = File.Pred(type='size', pre='dirRS', output_dir=output_dir) else: print('{} not recognized'.format(net_type)) assert False return Weights, Embed
def embed_correlate(network_type, run, post, epochs, rating_norm='none'): pear_corr = [] kend_corr = [] for e in epochs: # pred, labels_test, meta = pickle.load(open(loader.pred_filename(run, epoch=e, post=post), 'br')) file = FileManager.Embed(network_type) Reg = RatingCorrelator(file.name(run=run, epoch=e, dset=post)) Reg.evaluate_embed_distance_matrix(method='euclidean', round=(rating_norm == 'Round')) Reg.evaluate_rating_space(norm=rating_norm) Reg.evaluate_rating_distance_matrix(method='euclidean') Reg.linear_regression() # Reg.scatter('embed', 'rating', xMethod="euclidean", yMethod='euclidean', sub=False) p, s, k = Reg.correlate_retrieval('embed', 'rating') pear_corr.append(p) kend_corr.append(k) epochs = np.array(epochs) pear_corr = np.array(pear_corr) kend_corr = np.array(kend_corr) plt.figure() plt.plot(epochs, pear_corr) plt.plot(epochs, kend_corr) plt.grid(which='major', axis='y') plt.title('embed_' + run + '_' + post) plt.xlabel('epochs') plt.ylabel('correlation') plt.legend(['pearson', 'kendall'])
def eval_classification(run, net_type, metric, epochs, dset, NN=[7, 11, 17], cross_validation=False, n_groups=5): Embed = FileManager.Embed(net_type) Pred_L1O = [[] for i in range(n_groups)] valid_epochs = [[] for i in range(n_groups)] if cross_validation: # Load embed_source = [ Embed(run + 'c{}'.format(c), dset) for c in range(n_groups) ] Ret = Retriever(title='{}-{}'.format(net_type, run), dset=dset) for i, source in enumerate(embed_source): Ret.load_embedding([source], multi_epcch=True) for E in epochs: # Calc pred_l1o = [] try: for N in NN: pred_l1o.append( Ret.classify_kfold(epoch=E, n=N, k_fold=10, metric=metric)) Pred_L1O[i].append(np.array(pred_l1o)) valid_epochs[i].append(E) except: print("Epoch {} - no calculated embedding".format(E)) Pred_L1O[i] = np.array(Pred_L1O[i]) valid_epochs[i] = np.array(valid_epochs[i]) combined_epochs = merge_epochs(valid_epochs, min_element=max(n_groups - 1, 1)) P, P_std = mean_cross_validated_index_with_std(Pred_L1O, valid_epochs, combined_epochs) else: for E in epochs: # Load embed_source = Embed(run, E, dset) Ret = Retriever(title='{}-{}'.format(net_type, run), dset=dset) Ret.load_embedding(embed_source) # Calc pred_l1o = [] for N in NN: pred_l1o.append(Ret.classify_leave1out(n=N, metric=metric)[1]) Pred_L1O.append(np.array(pred_l1o)) P, P_std = np.mean(Pred_L1O, axis=-1), np.std(Pred_L1O, axis=-1) return P, P_std, combined_epochs
def __init__(self, network = 'dir', pooling='max', categorize=False): self.network = network self.Weights = FileManager.Weights(network) self.Embed = FileManager.Embed(network) self.data_size = 144 self.data_res = '0.5I' # 'Legacy' self.data_sample = 'Normal' self.net_in_size = 128 self.net_input_shape = (self.net_in_size, self.net_in_size, 1) self.net_out_size = 128 self.net_normalize = True self.net_pool = pooling self.categorize = categorize self.model = None
if __name__ == "__main__": # # Current Metrics: # 'chebyshev' # 'euclidean' # 'cosine' # 'corrlation' # # To evaluate similarity of two Distance-Metrices: # Kendall tau distance # Spearman's rank correlation # Distance Correlation from Network import FileManager Embed = FileManager.Embed('siam') Reg = RatingCorrelator(Embed(run='064X',epoch=30,dset='Valid')) Reg.evaluate_embed_distance_matrix(method='euclidean') Reg.evaluate_rating_space() Reg.evaluate_rating_distance_matrix(method='euclidean') Reg.linear_regression() Reg.scatter('embed', 'rating', xMethod="euclidean", yMethod='euclidean', sub=True) #Reg.scatter('malig', 'rating', yMethod='euclidean', sub=True) #Reg.scatter('embed', 'malig', sub=True) #Reg.malig_regression(method='euclidean') Reg.correlate('malig', 'rating')
from experiments import CrossValidationManager import numpy as np run = '888' DataGroups = [ FileManager.Dataset('Primary', i, './Dataset').load(size=160, res=0.5) for i in range(5) ] expected_datast_size = [len(d) for d in DataGroups] label_stats = [ np.bincount([element['label'] for element in DataGroups[i]]) for i in range(5) ] [ print('group id {} => total:{}, benign:{}, malig:{}, unknown:{}'.format( i, expected_datast_size[i], label_stats[i][0], label_stats[i][1], label_stats[i][2])) for i in range(5) ] cv = CrossValidationManager('RET') for i in range(10): # conf in conf_names: conf = cv.get_run_id(i) #dataset_size = len(FileManager.DatasetFromPredication().load(run='{}c{}'.format(run, conf), goal='Test', epoch=70)) dataset_size = len( FileManager.Embed(pre='dirRD').load(run='{}c{}'.format(run, conf), dset='Valid')) group_id = cv.get_test(i) print('#{} ({})- expected: {}, actual: {} (group id = {})'.format( i, conf, expected_datast_size[group_id[0]], dataset_size, group_id))
run=run, net_type=net_type, dset=dset, metric=metric, epochs=epochs, cross_validation=True) data[run_id, 0] = acc data[run_id, 1] = prec data[run_id, 2] = index dataStd[run_id, 0] = acc_std dataStd[run_id, 1] = prec_std dataStd[run_id, 2] = index_std Embed = FileManager.Embed(net_type) embed_source = [ Embed(run + 'c{}'.format(c), dset) for c in configurations ] pm, pm_std, km, km_std, pr, pr_std, kr, kr_std, _ = eval_correlation( embed_source, metric=metric, rating_metric='euclidean', rating_norm=rating_norm, epochs=epochs) data[run_id, 3] = pm data[run_id, 4] = pr #data[run_id, 5] = km #data[run_id, 6] = kr
wRuns = [ '813c0' ] # ['512cc0', '251c0'] #['064X', '078X', '026'] #['064X', '071' (is actually 071X), '078X', '081', '082'] wRunsNet = ['dirR'] # ['dirRS', 'dirR'] #, 'dir'] run_metrics = ['l2'] select = 0 rating_normalizaion = 'None' # 'None', 'Normal', 'Scale' doRatingRet = True doPCA = False if doRatingRet: Embed = FileManager.Embed(wRunsNet[select]) #N = 5 #testData, validData, trainData = load_nodule_raw_dataset(size=160, res=0.5, sample='Normal') ##if dset is 'Train': data = trainData #if dset is 'Test': data = testData #if dset is 'Valid': data = validData #Ret = Retriever(title='Ratings', dset=set) #Ret.load_rating(data) #et.fit(N) #info, nod_ids = Ret.show_ret(15) #info, nod_ids = Ret.show_ret(135) #info, nod_ids = Ret.show_ret(135) #anns = getAnnotation(info, nodule_ids=nod_ids, return_all=True) #pickle.dump(anns, open('tmp.p', 'bw'))
import numpy as np import matplotlib.pyplot as plt from Network import FileManager from Analysis import Retriever from Analysis.metric_space_indexes import k_occurrences net_type = 'dirD' config = 0 dset = 'Valid' K = 2 res = {} for run, label in zip(['821', '822'], ['Pearson-loss', 'KL-loss']): print(run + ': ' + label + '\n' + '*' * 20) embed_source = FileManager.Embed(net_type)(run + 'c{}'.format(config), dset) Ret = Retriever(title='{}'.format(''), dset=dset) Ret.load_embedding(embed_source, multi_epcch=True) Ret.fit(metric='euclidean', epoch=60) indices, distances = Ret.ret_nbrs() # get Hubs k_occ = k_occurrences(indices, K) hubs_indices = np.argsort(k_occ)[-3:] res[run] = hubs_indices, indices print([(a, b) for a, b in zip(hubs_indices, k_occ[hubs_indices])]) for run, label in zip(['821', '822'], ['Pearson-loss', 'KL-loss']): print(run + ': ' + label + '\n' + '*' * 20)
def eval_retrieval(run, net_type, metric, epochs, dset, NN=[7, 11, 17], cross_validation=False, n_groups=5): Embed = FileManager.Embed(net_type) Prec, Prec_b, Prec_m = [[] for i in range(n_groups) ], [[] for i in range(n_groups) ], [[] for i in range(n_groups)] valid_epochs = [[] for i in range(n_groups)] if cross_validation: # Load embed_source = [ Embed(run + 'c{}'.format(c), dset) for c in range(n_groups) ] Ret = Retriever(title='{}-{}'.format(net_type, run), dset=dset) for i, source in enumerate(embed_source): Ret.load_embedding(source, multi_epcch=True) for E in epochs: # Calc prec, prec_b, prec_m = [], [], [] try: Ret.fit(np.max(NN), metric=metric, epoch=E) except: print("Epoch {} - no calculated embedding".format(E)) continue for N in NN: p, pb, pm = Ret.evaluate_precision(n=N) prec.append(p) prec_b.append(pb) prec_m.append(pm) Prec[i].append(np.array(prec)) Prec_b[i].append(np.array(prec_b)) Prec_m[i].append(np.array(prec_m)) valid_epochs[i].append(E) Prec[i] = np.array(Prec[i]) Prec_b[i] = np.array(Prec_b[i]) Prec_m[i] = np.array(Prec_m[i]) valid_epochs[i] = np.array(valid_epochs[i]) combined_epochs = epochs # merge_epochs(valid_epochs) P, P_std = mean_cross_validated_index_with_std(Prec, valid_epochs, combined_epochs) #P, P_std = np.mean(np.mean(Prec, axis=-1), axis=0), np.mean(np.std(Prec, axis=-1), axis=0) combined = 2 * np.array(Prec_b) * np.array(Prec_m) / ( np.array(Prec_b) + np.array(Prec_m)) #F1, F1_std = np.mean(np.mean(combined, axis=-1), axis=0), np.mean(np.std(combined, axis=-1), axis=0) F1, F1_std = mean_cross_validated_index_with_std( combined, valid_epochs, combined_epochs) else: for E in epochs: Ret = Retriever(title='', dset='') if cross_validation: embed_source = [ Embed(run + 'c{}'.format(c), E, dset) for c in range(n_groups) ] else: embed_source = Embed(run, E, dset) Ret.load_embedding(embed_source) prec, prec_b, prec_m = [], [], [] Ret.fit(np.max(NN), metric=metric) for N in NN: p, pm, pb = Ret.evaluate_precision(n=N) prec.append(p) prec_b.append(pb) prec_m.append(pm) Prec.append(np.array(prec)) Prec_b.append(np.array(prec_b)) Prec_m.append(np.array(prec_m)) Prec = np.array(Prec) Prec_m = np.array(Prec_m) Prec_b = np.array(Prec_b) f1 = 2 * Prec_b * Prec_m / (Prec_b + Prec_m) P, P_std = np.mean(Prec, axis=-1), np.std(Prec, axis=-1) F1, F1_std = np.mean(f1, axis=-1), np.std(f1, axis=-1) return P, P_std, F1, F1_std, valid_epochs
def eval_embed_space(run, net_type, metric, rating_metric, epochs, dset, rating_norm='none', cross_validation=False, n_groups=5): # init Embed = FileManager.Embed(net_type) embed_source = [ Embed(run + 'c{}'.format(c), dset) for c in range(n_groups) ] idx_hubness, idx_symmetry, idx_concentration, idx_contrast, idx_kummar, idx_featCorr, idx_sampCorr \ = [[] for i in range(n_groups)], [[] for i in range(n_groups)], [[] for i in range(n_groups)], \ [[] for i in range(n_groups)], [[] for i in range(n_groups)], [[] for i in range(n_groups)], \ [[] for i in range(n_groups)] valid_epochs = [[] for i in range(n_groups)] # calculate Ret = Retriever(title='{}'.format(run), dset=dset) for i, source in enumerate(embed_source): embd, epoch_mask = Ret.load_embedding(source, multi_epcch=True) for e in epochs: try: epoch_idx = np.argwhere(e == epoch_mask)[0][0] Ret.fit(metric=metric, epoch=e) indices, distances = Ret.ret_nbrs() # hubness idx_hubness[i].append(calc_hubness(indices)) # symmetry idx_symmetry[i].append(calc_symmetry(indices)) # kumar index tau, l_e = kumar(distances, res=0.01) idx_kummar[i].append(tau) # concentration & contrast idx_concentration[i].append(concentration(distances)) idx_contrast[i].append(relative_contrast_imp(distances)) valid_epochs[i].append(e) # correlation idx_featCorr[i].append(features_correlation(embd[epoch_idx])) idx_sampCorr[i].append(samples_correlation(embd[epoch_idx])) except: print("Epoch {} - no calculated embedding".format(e)) valid_epochs[i] = np.array(valid_epochs[i]) idx_hubness[i] = np.array(list(zip(*idx_hubness[i]))) idx_symmetry[i] = np.array(list(zip(*idx_symmetry[i]))) idx_concentration[i] = np.array(list(zip(*idx_concentration[i]))) idx_contrast[i] = np.array(list(zip(*idx_contrast[i]))) idx_kummar[i] = np.array([idx_kummar[i]]) idx_featCorr[i] = np.array([idx_featCorr[i]]) idx_sampCorr[i] = np.array([idx_sampCorr[i]]) combined_epochs = [ i for i, c in enumerate(np.bincount(np.concatenate(valid_epochs))) if c > 3 ] idx_hubness = mean_cross_validated_index(idx_hubness, valid_epochs, combined_epochs) idx_symmetry = mean_cross_validated_index(idx_symmetry, valid_epochs, combined_epochs) idx_concentration = np.zeros_like( idx_hubness ) #mean_cross_validated_index(idx_concentration, valid_epochs, combined_epochs) idx_contrast = np.zeros_like( idx_hubness ) # mean_cross_validated_index(idx_contrast, valid_epochs, combined_epochs) idx_kummar = np.zeros_like( idx_hubness ) # mean_cross_validated_index(idx_kummar, valid_epochs, combined_epochs) idx_featCorr = np.zeros_like( idx_hubness ) # mean_cross_validated_index(idx_featCorr, valid_epochs, combined_epochs) idx_sampCorr = np.zeros_like( idx_hubness ) # mean_cross_validated_index(idx_sampCorr, valid_epochs, combined_epochs) return combined_epochs, idx_hubness, idx_symmetry, idx_concentration, idx_contrast, idx_kummar, idx_featCorr, idx_sampCorr
def eval_correlation(run, net_type, metric, rating_metric, epochs, dset, objective='rating', rating_norm='none', cross_validation=False, n_groups=5, seq=False): Embed = FileManager.Embed(net_type) if cross_validation: # Load if n_groups > 1: embed_source = [ Embed(run + 'c{}'.format(c), dset) for c in range(n_groups) ] else: embed_source = [Embed(run + 'c{}'.format(c), dset) for c in [1]] valid_epochs = [[] for i in range(n_groups)] Pm, Km, Pr, Kr = [[] for i in range(n_groups) ], [[] for i in range(n_groups) ], [[] for i in range(n_groups) ], [[] for i in range(n_groups)] PmStd, KmStd, PrStd, KrStd = [[] for i in range(n_groups) ], [[] for i in range(n_groups) ], [[] for i in range(n_groups) ], [[] for i in range(n_groups)] for c_idx, source in enumerate(embed_source): Reg = RatingCorrelator(source, conf=c_idx, multi_epoch=True, seq=seq) # load rating data cache_filename = 'output/cached_{}_{}_{}.p'.format( objective, source.split('/')[-1][6:-2], c_idx) if not Reg.load_cached_rating_distance(cache_filename): print('evaluating rating distance matrix...') Reg.evaluate_rating_space(norm=rating_norm, ignore_labels=False) Reg.evaluate_rating_distance_matrix( method=rating_metric, clustered_rating_distance=True, weighted=True) Reg.dump_rating_distance_to_cache(cache_filename) #print('\tno dump for rating distance matrix...') if objective == 'size': print('evaluating size distance matrix...') Reg.evaluate_size_distance_matrix() for E in epochs: # Calc try: Reg.evaluate_embed_distance_matrix(method=metric, epoch=E) except: #print("Epoch {} - no calculated embedding".format(E)) continue pm, _, km = Reg.correlate_retrieval( 'embed', 'malig' if objective == 'rating' else 'size', verbose=False) pr, _, kr = Reg.correlate_retrieval('embed', 'rating', verbose=False) valid_epochs[c_idx].append(E) Pm[c_idx].append(pm[0]) Km[c_idx].append(km[0]) Pr[c_idx].append(pr[0]) Kr[c_idx].append(kr[0]) PmStd[c_idx].append(pm[1]) KmStd[c_idx].append(km[1]) PrStd[c_idx].append(pr[1]) KrStd[c_idx].append(kr[1]) Pm[c_idx] = np.expand_dims(Pm[c_idx], axis=0) Km[c_idx] = np.expand_dims(Km[c_idx], axis=0) Pr[c_idx] = np.expand_dims(Pr[c_idx], axis=0) Kr[c_idx] = np.expand_dims(Kr[c_idx], axis=0) PmStd[c_idx] = np.expand_dims(PmStd[c_idx], axis=0) KmStd[c_idx] = np.expand_dims(KmStd[c_idx], axis=0) PrStd[c_idx] = np.expand_dims(PrStd[c_idx], axis=0) KrStd[c_idx] = np.expand_dims(KrStd[c_idx], axis=0) else: assert False for E in epochs: Ret = Retriever(title='', dset='') if cross_validation: embed_source = [ Embed(run + 'c{}'.format(c), E, dset) for c in range(n_groups) ] else: embed_source = Embed(run, E, dset) Ret.load_embedding(embed_source) prec, prec_b, prec_m = [], [], [] Ret.fit(np.max(NN), metric=metric) for N in NN: p, pm, pb = Ret.evaluate_precision(n=N) prec.append(p) prec_b.append(pb) prec_m.append(pm) Prec.append(np.array(prec)) Prec_b.append(np.array(prec_b)) Prec_m.append(np.array(prec_m)) merged_epochs = merge_epochs(valid_epochs, min_element=max(n_groups - 1, 1)) Pm = mean_cross_validated_index(Pm, valid_epochs, merged_epochs) Km = mean_cross_validated_index(Km, valid_epochs, merged_epochs) Pr = mean_cross_validated_index(Pr, valid_epochs, merged_epochs) Kr = mean_cross_validated_index(Kr, valid_epochs, merged_epochs) PmStd = std_cross_validated_index(PmStd, valid_epochs, merged_epochs) KmStd = std_cross_validated_index(KmStd, valid_epochs, merged_epochs) PrStd = std_cross_validated_index(PrStd, valid_epochs, merged_epochs) KrStd = std_cross_validated_index(KrStd, valid_epochs, merged_epochs) return np.squeeze(Pm), np.squeeze(PmStd), np.squeeze(Km), np.squeeze( KmStd), np.squeeze(Pr), np.squeeze(PrStd), np.squeeze(Kr), np.squeeze( KrStd), np.array(merged_epochs)