def config_filenames(self, net_type, use_core, keep_spatial_dim=False): # init file managers Weights = File.Weights(net_type, output_dir=input_dir) if use_core: if keep_spatial_dim: Embed = File.Embed('SP_' + net_type, output_dir=output_dir) else: Embed = File.Embed(net_type, output_dir=output_dir) else: if net_type == 'dir': Embed = File.Pred(type='malig', pre='dir', output_dir=output_dir) elif net_type == 'dirR': Embed = File.Pred(type='rating', pre='dirR', output_dir=output_dir) elif net_type == 'dirS': Embed = File.Pred(type='size', pre='dirS', output_dir=output_dir) elif net_type == 'dirRS': # assert False # save rating and size in seperate files Embed = {} Embed['R'] = File.Pred(type='rating', pre='dirRS', output_dir=output_dir) Embed['S'] = File.Pred(type='size', pre='dirRS', output_dir=output_dir) else: print('{} not recognized'.format(net_type)) assert False return Weights, Embed
def embed_correlate(network_type, run, post, epochs, rating_norm='none'): pear_corr = [] kend_corr = [] for e in epochs: # pred, labels_test, meta = pickle.load(open(loader.pred_filename(run, epoch=e, post=post), 'br')) file = FileManager.Embed(network_type) Reg = RatingCorrelator(file.name(run=run, epoch=e, dset=post)) Reg.evaluate_embed_distance_matrix(method='euclidean', round=(rating_norm == 'Round')) Reg.evaluate_rating_space(norm=rating_norm) Reg.evaluate_rating_distance_matrix(method='euclidean') Reg.linear_regression() # Reg.scatter('embed', 'rating', xMethod="euclidean", yMethod='euclidean', sub=False) p, s, k = Reg.correlate_retrieval('embed', 'rating') pear_corr.append(p) kend_corr.append(k) epochs = np.array(epochs) pear_corr = np.array(pear_corr) kend_corr = np.array(kend_corr) plt.figure() plt.plot(epochs, pear_corr) plt.plot(epochs, kend_corr) plt.grid(which='major', axis='y') plt.title('embed_' + run + '_' + post) plt.xlabel('epochs') plt.ylabel('correlation') plt.legend(['pearson', 'kendall'])
def dir_rating_accuracy(run, post, net_type, epochs, n_groups=5): #images, predict, meta_data, labels, masks = pred_loader.load(run, epochs[-1], post) rating_property = [ 'Subtlety', 'Internalstructure', 'Calcification', 'Sphericity', 'Margin', 'Lobulation', 'Spiculation', 'Texture', 'Malignancy' ] PredFile = FileManager.Pred(type='rating', pre=net_type) acc = np.zeros([len(epochs), n_groups, len(rating_property)]) for c, run_config in enumerate( [run + 'c{}'.format(config) for config in range(n_groups)]): predict, valid_epochs, images, meta_data, classes, labels, masks = PredFile.load( run=run_config, dset=post) labels = np.array([np.mean(t, axis=0) for t in labels]) for i, e in enumerate(epochs): try: idx = int(np.argwhere(valid_epochs == e)) except: print('skip epoch {}'.format(e)) continue for ridx, r in enumerate(rating_property): acc[i, c, ridx] = accuracy(labels[:, ridx], predict[idx, :, ridx]) acc = np.mean(acc, axis=1) plt.figure() plt.title('Rating Acc') plt.plot(epochs, acc) plt.legend(rating_property) return acc
def dir_rating_view(run, post, epochs, net_type='dirR', factor=1.0): # load #images, predict, meta_data, labels, masks = pred_loader.load(run, epochs[-1], post) PredFile = FileManager.Pred(type='rating', pre=net_type) predict, epochs, meta_data, images, classes, labels, masks, _, _, _ = PredFile.load( run=run + 'c0', dset=post) # prepare images = np.squeeze(images) labels = np.array([np.mean(l, axis=0) for l in labels]) labels = np.round(factor * labels).astype('int') predict = np.round(factor * predict[-1]).astype('int') #plot select = [5, 23, 27, 51] plt.figure('view_' + run + '_' + post) for pid, i in enumerate(select): plt.subplot(2, 2, pid + 1) plt.imshow(images[i]) plt.title( np.array2string(labels[i], prefix='L') + '\n' + np.array2string(predict[i], prefix='P')) plt.xticks([]) plt.yticks([]) if pid >= 0: dl = l2(labels[i], labels[select[0]]) dp = l2(predict[i], predict[select[0]]) plt.ylabel("{:.1f}\n{:.1f}".format(dl, dp))
def __init__(self, network = 'dir', pooling='max', categorize=False): self.network = network self.Weights = FileManager.Weights(network) self.Embed = FileManager.Embed(network) self.data_size = 144 self.data_res = '0.5I' # 'Legacy' self.data_sample = 'Normal' self.net_in_size = 128 self.net_input_shape = (self.net_in_size, self.net_in_size, 1) self.net_out_size = 128 self.net_normalize = True self.net_pool = pooling self.categorize = categorize self.model = None
def eval_classification(run, net_type, metric, epochs, dset, NN=[7, 11, 17], cross_validation=False, n_groups=5): Embed = FileManager.Embed(net_type) Pred_L1O = [[] for i in range(n_groups)] valid_epochs = [[] for i in range(n_groups)] if cross_validation: # Load embed_source = [ Embed(run + 'c{}'.format(c), dset) for c in range(n_groups) ] Ret = Retriever(title='{}-{}'.format(net_type, run), dset=dset) for i, source in enumerate(embed_source): Ret.load_embedding([source], multi_epcch=True) for E in epochs: # Calc pred_l1o = [] try: for N in NN: pred_l1o.append( Ret.classify_kfold(epoch=E, n=N, k_fold=10, metric=metric)) Pred_L1O[i].append(np.array(pred_l1o)) valid_epochs[i].append(E) except: print("Epoch {} - no calculated embedding".format(E)) Pred_L1O[i] = np.array(Pred_L1O[i]) valid_epochs[i] = np.array(valid_epochs[i]) combined_epochs = merge_epochs(valid_epochs, min_element=max(n_groups - 1, 1)) P, P_std = mean_cross_validated_index_with_std(Pred_L1O, valid_epochs, combined_epochs) else: for E in epochs: # Load embed_source = Embed(run, E, dset) Ret = Retriever(title='{}-{}'.format(net_type, run), dset=dset) Ret.load_embedding(embed_source) # Calc pred_l1o = [] for N in NN: pred_l1o.append(Ret.classify_leave1out(n=N, metric=metric)[1]) Pred_L1O.append(np.array(pred_l1o)) P, P_std = np.mean(Pred_L1O, axis=-1), np.std(Pred_L1O, axis=-1) return P, P_std, combined_epochs
wRuns = [ '813c0' ] # ['512cc0', '251c0'] #['064X', '078X', '026'] #['064X', '071' (is actually 071X), '078X', '081', '082'] wRunsNet = ['dirR'] # ['dirRS', 'dirR'] #, 'dir'] run_metrics = ['l2'] select = 0 rating_normalizaion = 'None' # 'None', 'Normal', 'Scale' doRatingRet = True doPCA = False if doRatingRet: Embed = FileManager.Embed(wRunsNet[select]) #N = 5 #testData, validData, trainData = load_nodule_raw_dataset(size=160, res=0.5, sample='Normal') ##if dset is 'Train': data = trainData #if dset is 'Test': data = testData #if dset is 'Valid': data = validData #Ret = Retriever(title='Ratings', dset=set) #Ret.load_rating(data) #et.fit(N) #info, nod_ids = Ret.show_ret(15) #info, nod_ids = Ret.show_ret(135) #info, nod_ids = Ret.show_ret(135) #anns = getAnnotation(info, nodule_ids=nod_ids, return_all=True) #pickle.dump(anns, open('tmp.p', 'bw'))
in_size = 128 out_size = 128 normalize = True load = False evaluate = False force = False # 0 Test # 1 Validation # 2 Training DataSubSet = 2 run = '000' epoch = 5 WeightsFile = FileManager.Weights('siamR').name(run, epoch=epoch) pred_file_format = '.\output\embed\pred_siam{}_E{}_{}.p' def pred_filename(run, epoch, post): return pred_file_format.format(run, epoch, post) ## ========================= ## ## ======= Load Data ======= ## ## ========================= ## if DataSubSet == 0: post = "Test" elif DataSubSet == 1:
import numpy as np import matplotlib.pyplot as plt from Network import FileManager from Analysis import Retriever from Analysis.metric_space_indexes import k_occurrences net_type = 'dirD' config = 0 dset = 'Valid' K = 2 res = {} for run, label in zip(['821', '822'], ['Pearson-loss', 'KL-loss']): print(run + ': ' + label + '\n' + '*' * 20) embed_source = FileManager.Embed(net_type)(run + 'c{}'.format(config), dset) Ret = Retriever(title='{}'.format(''), dset=dset) Ret.load_embedding(embed_source, multi_epcch=True) Ret.fit(metric='euclidean', epoch=60) indices, distances = Ret.ret_nbrs() # get Hubs k_occ = k_occurrences(indices, K) hubs_indices = np.argsort(k_occ)[-3:] res[run] = hubs_indices, indices print([(a, b) for a, b in zip(hubs_indices, k_occ[hubs_indices])]) for run, label in zip(['821', '822'], ['Pearson-loss', 'KL-loss']): print(run + ': ' + label + '\n' + '*' * 20)
def dir_rating_correlate(run, post, epochs, rating_norm='none', clustered_rating_distance=True, n_groups=5): pear_corr = [[] for i in range(n_groups)] kend_corr = [[] for i in range(n_groups)] plot_data_filename = './Plots/Data/rating_correlation_{}{}.p'.format( 'dirR', run) try: print('SKIPING') assert False pear_corr, kend_corr = pickle.load(open(plot_data_filename, 'br')) print("Loaded results for {}".format(run)) except: print("Evaluating Rating Correlation for {}".format(run)) for c, run_config in enumerate( [run + 'c{}'.format(config) for config in range(n_groups)]): PredFile = FileManager.Pred(type='rating', pre='dirR') Reg = RatingCorrelator(PredFile(run=run_config, dset=post), multi_epoch=True) for e in epochs: Reg.evaluate_embed_distance_matrix( method='euclidean', epoch=e, round=(rating_norm == 'Round')) Reg.evaluate_rating_space(norm=rating_norm) Reg.evaluate_rating_distance_matrix( method='euclidean', clustered_rating_distance=clustered_rating_distance) Reg.linear_regression() # Reg.scatter('embed', 'rating', xMethod="euclidean", yMethod='euclidean', sub=False) p, s, k = Reg.correlate_retrieval( 'embed', 'rating', round=(rating_norm == 'Round'), verbose=False) pear_corr[c].append(p) kend_corr[c].append(k) pear_corr[c] = np.array(pear_corr[c]) kend_corr[c] = np.array(kend_corr[c]) pear_corr = np.mean(pear_corr, axis=0) kend_corr = np.mean(kend_corr, axis=0) print('NO DUMP') #pickle.dump((pear_corr, kend_corr), open(plot_data_filename, 'bw')) pear_corr = smooth(pear_corr[:, 0]), smooth(pear_corr[:, 1]) kend_corr = smooth(kend_corr[:, 0]), smooth(kend_corr[:, 1]) epochs = np.array(epochs) plt.figure('Rating2Rating:' + run + '-' + post) q = plt.plot(epochs, pear_corr[0]) plt.plot(epochs, pear_corr[0] + pear_corr[1], color=q[0].get_color(), ls='--', alpha=alpha) plt.plot(epochs, pear_corr[0] - pear_corr[1], color=q[0].get_color(), ls='--', alpha=alpha) q = plt.plot(epochs, kend_corr[0]) plt.plot(epochs, kend_corr[0] + kend_corr[1], color=q[0].get_color(), ls='--', alpha=alpha) plt.plot(epochs, kend_corr[0] - kend_corr[1], color=q[0].get_color(), ls='--', alpha=alpha) plt.grid(which='major', axis='y') plt.title('rating_' + run + '_' + post) plt.xlabel('epochs') plt.ylabel('correlation') plt.legend(['pearson', '', '', 'kendall', '', ''])
def dir_size_rmse(run, post, epochs, net_type, dist='RMSE', weighted=False, n_groups=5): plot_data_filename = './Plots/Data/size{}_{}{}.p'.format( dist, net_type, run) try: assert False R = pickle.load(open(plot_data_filename, 'br')) print("Loaded results for {}".format(run)) except: print("Evaluating Size RMSE for {}".format(run)) PredFile = FileManager.Pred(type='size', pre=net_type) R = np.zeros([len(epochs), n_groups]) for c, run_config in enumerate( [run + 'c{}'.format(config) for config in range(n_groups)]): predict, valid_epochs, images, meta_data, classes, labels, masks = PredFile.load( run=run_config, dset=post) labels = np.array(labels) for i, e in enumerate(epochs): print(" Epoch {}:".format(e)) try: idx = int(np.argwhere(valid_epochs == e)) except: print('skip epoch {}'.format(e)) continue pred = predict[idx] ''' W = np.ones(labels.shape[0]) if weighted: assert False w = np.histogram(labels[:, r], bins=np.array(range(64))+0.5)[0] w = 1 - w / np.sum(w) pred_w = np.minimum(np.maximum(pred[:, r], 1.0), max_val) W = w[np.round(pred_w - 1).astype('int')] if dist=='RMSE': err = W.dot((pred - labels)**2) err = np.sqrt(err/np.sum(W)) elif dist=='ABS': err = W.dot(np.abs(pred - labels)) / np.sum(W) else: print('{} unrecognized distance'.format(dist)) assert False ''' rmse = np.sqrt(np.mean(np.sum((pred - labels)**2, axis=1))) R[i, c] = rmse R = np.mean(R, axis=1) pickle.dump(R, open(plot_data_filename, 'bw')) # smooth R = smooth(R) plt.figure(dist + ' ' + net_type + run + '-' + post) plt.title('Size ' + dist) plt.plot(epochs, R) #plt.legend(rating_property+['Overall']) plt.grid(True, axis='y') return R
def eval_embed_space(run, net_type, metric, rating_metric, epochs, dset, rating_norm='none', cross_validation=False, n_groups=5): # init Embed = FileManager.Embed(net_type) embed_source = [ Embed(run + 'c{}'.format(c), dset) for c in range(n_groups) ] idx_hubness, idx_symmetry, idx_concentration, idx_contrast, idx_kummar, idx_featCorr, idx_sampCorr \ = [[] for i in range(n_groups)], [[] for i in range(n_groups)], [[] for i in range(n_groups)], \ [[] for i in range(n_groups)], [[] for i in range(n_groups)], [[] for i in range(n_groups)], \ [[] for i in range(n_groups)] valid_epochs = [[] for i in range(n_groups)] # calculate Ret = Retriever(title='{}'.format(run), dset=dset) for i, source in enumerate(embed_source): embd, epoch_mask = Ret.load_embedding(source, multi_epcch=True) for e in epochs: try: epoch_idx = np.argwhere(e == epoch_mask)[0][0] Ret.fit(metric=metric, epoch=e) indices, distances = Ret.ret_nbrs() # hubness idx_hubness[i].append(calc_hubness(indices)) # symmetry idx_symmetry[i].append(calc_symmetry(indices)) # kumar index tau, l_e = kumar(distances, res=0.01) idx_kummar[i].append(tau) # concentration & contrast idx_concentration[i].append(concentration(distances)) idx_contrast[i].append(relative_contrast_imp(distances)) valid_epochs[i].append(e) # correlation idx_featCorr[i].append(features_correlation(embd[epoch_idx])) idx_sampCorr[i].append(samples_correlation(embd[epoch_idx])) except: print("Epoch {} - no calculated embedding".format(e)) valid_epochs[i] = np.array(valid_epochs[i]) idx_hubness[i] = np.array(list(zip(*idx_hubness[i]))) idx_symmetry[i] = np.array(list(zip(*idx_symmetry[i]))) idx_concentration[i] = np.array(list(zip(*idx_concentration[i]))) idx_contrast[i] = np.array(list(zip(*idx_contrast[i]))) idx_kummar[i] = np.array([idx_kummar[i]]) idx_featCorr[i] = np.array([idx_featCorr[i]]) idx_sampCorr[i] = np.array([idx_sampCorr[i]]) combined_epochs = [ i for i, c in enumerate(np.bincount(np.concatenate(valid_epochs))) if c > 3 ] idx_hubness = mean_cross_validated_index(idx_hubness, valid_epochs, combined_epochs) idx_symmetry = mean_cross_validated_index(idx_symmetry, valid_epochs, combined_epochs) idx_concentration = np.zeros_like( idx_hubness ) #mean_cross_validated_index(idx_concentration, valid_epochs, combined_epochs) idx_contrast = np.zeros_like( idx_hubness ) # mean_cross_validated_index(idx_contrast, valid_epochs, combined_epochs) idx_kummar = np.zeros_like( idx_hubness ) # mean_cross_validated_index(idx_kummar, valid_epochs, combined_epochs) idx_featCorr = np.zeros_like( idx_hubness ) # mean_cross_validated_index(idx_featCorr, valid_epochs, combined_epochs) idx_sampCorr = np.zeros_like( idx_hubness ) # mean_cross_validated_index(idx_sampCorr, valid_epochs, combined_epochs) return combined_epochs, idx_hubness, idx_symmetry, idx_concentration, idx_contrast, idx_kummar, idx_featCorr, idx_sampCorr
from Network import FileManager from experiments import CrossValidationManager import numpy as np run = '888' DataGroups = [ FileManager.Dataset('Primary', i, './Dataset').load(size=160, res=0.5) for i in range(5) ] expected_datast_size = [len(d) for d in DataGroups] label_stats = [ np.bincount([element['label'] for element in DataGroups[i]]) for i in range(5) ] [ print('group id {} => total:{}, benign:{}, malig:{}, unknown:{}'.format( i, expected_datast_size[i], label_stats[i][0], label_stats[i][1], label_stats[i][2])) for i in range(5) ] cv = CrossValidationManager('RET') for i in range(10): # conf in conf_names: conf = cv.get_run_id(i) #dataset_size = len(FileManager.DatasetFromPredication().load(run='{}c{}'.format(run, conf), goal='Test', epoch=70)) dataset_size = len( FileManager.Embed(pre='dirRD').load(run='{}c{}'.format(run, conf), dset='Valid')) group_id = cv.get_test(i) print('#{} ({})- expected: {}, actual: {} (group id = {})'.format(
def dir_rating_params_correlate(run, post, epochs, net_type, rating_norm='none', configurations=list(range(5)), USE_CACHE=True, DUMP=True): reference = [0.7567, 0.5945, 0.7394, 0.5777, 0.6155, 0.7445, 0.6481] # 0, 0, rating_property = [ 'Subtlety', 'Sphericity', 'Margin', 'Lobulation', 'Spiculation', 'Texture', 'Malignancy' ] # 'Internalstructure', 'Calcification', mask = [True, False, False, True, True, True, True, True, True] pear_corr = [[] for i in configurations] plot_data_filename = './Plots/Data/rating_params_correlation_{}{}.p'.format( net_type, run) try: if USE_CACHE is False: print('SKIPPING') assert False pear_corr = pickle.load(open(plot_data_filename, 'br')) print("Loaded results for {}".format(run)) except: print("Evaluating Rating Correlation for {}".format(run)) for c, run_config in enumerate( [run + 'c{}'.format(config) for config in configurations]): PredFile = FileManager.Pred(type='rating', pre=net_type) Reg = RatingCorrelator(PredFile(run=run_config, dset=post), multi_epoch=True, conf=c) Reg.evaluate_rating_space(norm=rating_norm) #valid_epochs = [] for e in epochs: p = Reg.correlate_to_ratings(epoch=e, round=(rating_norm == 'Round')) if not np.all(np.isfinite(p[mask])): print('nan at: conf={}, epoch={}'.format(c, e)) pear_corr[c].append(p[mask]) #valid_epochs.append(e) pear_corr[c] = np.array(pear_corr[c]) pear_corr = np.mean(pear_corr, axis=0) if DUMP: pickle.dump(pear_corr, open(plot_data_filename, 'bw')) else: print('NO DUMP') for i, e in enumerate(epochs): print("=" * 20) print(" Epoch {}:".format(e)) print("-" * 20) for p, property in enumerate(rating_property): print("\t{}: \t{:.2f}".format(property, pear_corr[i, p])) #print("\t" + ("-" * 10)) #print("\toverall: \t{:.2f}".format(R[i, 9])) for p in range(pear_corr.shape[1]): pear_corr[:, p] = smooth(pear_corr[:, p], window_length=5, polyorder=2) epochs = np.array(epochs) plt.figure('RatingParams2Rating:' + run + '-' + post) q = plt.plot(epochs, pear_corr, linewidth=2.5) for line, ref in zip(q, reference): plt.plot(epochs, ref * np.ones_like(epochs), color=line.get_color(), ls='--', linewidth=4, alpha=0.6) plt.grid(which='major', axis='y') plt.title('rating_' + run + '_' + post) plt.xlabel('epochs') plt.ylabel('correlation') plt.legend(rating_property)
def dir_rating_rmse(run, post, epochs, net_type, dist='RMSE', weighted=False, configurations=list(range(5)), USE_CACHE=True, DUMP=True): #images, predict, meta_data, labels, masks = pred_loader.load(run, epochs[-1], post) rating_property = [ 'Subtlety', 'Internalstructure', 'Calcification', 'Sphericity', 'Margin', 'Lobulation', 'Spiculation', 'Texture', 'Malignancy' ] plot_data_filename = './Plots/Data/{}_{}{}.p'.format(dist, net_type, run) try: if USE_CACHE is False: print("skipping...") assert False R = pickle.load(open(plot_data_filename, 'br')) print("Loaded results for {}".format(run)) except: print("Evaluating RMSE for {}".format(run)) PredFile = FileManager.Pred(type='rating', pre=net_type) R = np.zeros([len(epochs), 10, len(configurations)]) for c, run_config in enumerate( [run + 'c{}'.format(config) for config in configurations]): predict, valid_epochs, images, meta_data, classes, labels, masks, conf, rating_weights, z = PredFile.load( run=run_config, dset=post) labels = np.array([np.mean(l, axis=0) for l in labels]) for i, e in enumerate(epochs): #print("=" * 20) #print(" Epoch {}:".format(e)) #print("-" * 20) try: idx = int(np.argwhere(valid_epochs == e)) except: print('skip epoch {}'.format(e)) continue pred = predict[idx] for r, max_val in zip(range(9), [5, 5, 6, 5, 5, 5, 5, 5, 5]): #print("{}:".format(rating_property[r])) W = np.ones(labels.shape[0]) if weighted: w = np.histogram(labels[:, r], bins=np.array(range(max_val + 1)) + 0.5)[0] #print("\tcounts - {}".format(w)) w = 1 - w / np.sum(w) w /= (len(w) - 1) assert np.abs(w.sum() - 1) < 1e-6 #print("\tweighted by {}".format(w)) #pred_w = np.minimum(np.maximum(pred[:, r], 1.0), max_val) W = w[np.round(labels[:, r] - 1).astype('int')] if dist == 'RMSE': err = W.dot((pred[:, r] - labels[:, r])**2) err = np.sqrt(err / np.sum(W)) elif dist == 'ABS': err = W.dot( np.abs(pred[:, r] - labels[:, r])) / np.sum(W) else: print('{} unrecognized distance'.format(dist)) assert False #print("rmse: \t{:.2f}".format(err)) R[i, r, c] = err rmse = np.sqrt(np.mean(np.sum((pred - labels)**2, axis=1))) #print("=" * 20) #print("overall: \t{:.2f}".format(rmse)) R[i, 9, c] = rmse R = np.mean(R, axis=2) for i, e in enumerate(epochs): print("=" * 20) print(" Epoch {}:".format(e)) print("-" * 20) for p, property in enumerate(rating_property): print("\t{}: \t{:.2f}".format(property, R[i, p])) print("\t" + ("-" * 10)) print("\toverall: \t{:.2f}".format(R[i, 9])) if DUMP: pickle.dump(R, open(plot_data_filename, 'bw')) else: print("No Dump") # smooth for r in range(9): R[:, r] = smooth(R[:, r]) plt.figure(dist + ' ' + run + '-' + post) plt.title('Rating ' + dist) plt.plot(epochs, R) plt.legend(rating_property + ['Overall']) plt.grid(True, axis='y') return R
def eval_retrieval(run, net_type, metric, epochs, dset, NN=[7, 11, 17], cross_validation=False, n_groups=5): Embed = FileManager.Embed(net_type) Prec, Prec_b, Prec_m = [[] for i in range(n_groups) ], [[] for i in range(n_groups) ], [[] for i in range(n_groups)] valid_epochs = [[] for i in range(n_groups)] if cross_validation: # Load embed_source = [ Embed(run + 'c{}'.format(c), dset) for c in range(n_groups) ] Ret = Retriever(title='{}-{}'.format(net_type, run), dset=dset) for i, source in enumerate(embed_source): Ret.load_embedding(source, multi_epcch=True) for E in epochs: # Calc prec, prec_b, prec_m = [], [], [] try: Ret.fit(np.max(NN), metric=metric, epoch=E) except: print("Epoch {} - no calculated embedding".format(E)) continue for N in NN: p, pb, pm = Ret.evaluate_precision(n=N) prec.append(p) prec_b.append(pb) prec_m.append(pm) Prec[i].append(np.array(prec)) Prec_b[i].append(np.array(prec_b)) Prec_m[i].append(np.array(prec_m)) valid_epochs[i].append(E) Prec[i] = np.array(Prec[i]) Prec_b[i] = np.array(Prec_b[i]) Prec_m[i] = np.array(Prec_m[i]) valid_epochs[i] = np.array(valid_epochs[i]) combined_epochs = epochs # merge_epochs(valid_epochs) P, P_std = mean_cross_validated_index_with_std(Prec, valid_epochs, combined_epochs) #P, P_std = np.mean(np.mean(Prec, axis=-1), axis=0), np.mean(np.std(Prec, axis=-1), axis=0) combined = 2 * np.array(Prec_b) * np.array(Prec_m) / ( np.array(Prec_b) + np.array(Prec_m)) #F1, F1_std = np.mean(np.mean(combined, axis=-1), axis=0), np.mean(np.std(combined, axis=-1), axis=0) F1, F1_std = mean_cross_validated_index_with_std( combined, valid_epochs, combined_epochs) else: for E in epochs: Ret = Retriever(title='', dset='') if cross_validation: embed_source = [ Embed(run + 'c{}'.format(c), E, dset) for c in range(n_groups) ] else: embed_source = Embed(run, E, dset) Ret.load_embedding(embed_source) prec, prec_b, prec_m = [], [], [] Ret.fit(np.max(NN), metric=metric) for N in NN: p, pm, pb = Ret.evaluate_precision(n=N) prec.append(p) prec_b.append(pb) prec_m.append(pm) Prec.append(np.array(prec)) Prec_b.append(np.array(prec_b)) Prec_m.append(np.array(prec_m)) Prec = np.array(Prec) Prec_m = np.array(Prec_m) Prec_b = np.array(Prec_b) f1 = 2 * Prec_b * Prec_m / (Prec_b + Prec_m) P, P_std = np.mean(Prec, axis=-1), np.std(Prec, axis=-1) F1, F1_std = np.mean(f1, axis=-1), np.std(f1, axis=-1) return P, P_std, F1, F1_std, valid_epochs
def run(choose_model="DIR", epochs=200, config=0, skip_validation=False, no_training=False, config_name='LEGACY', load_data_from_predications=False): np.random.seed(1337) random.seed(1337) tf.set_random_seed(1234) K.set_session(tf.Session(graph=tf.get_default_graph())) ## --------------------------------------- ## ## ------- General Setup ----------------- ## ## --------------------------------------- ## #data dataset_type = 'Primary' data_size = 160 if no_training: data_size = 160 res = 0.5 # 'Legacy' #0.7 #0.5 #'0.5I' sample = 'Normal' # 'UniformNC' #'Normal' #'Uniform' data_run = '813' data_epoch = 70 return_predicted_ratings = not no_training use_gen = True #model model_size = 128 input_shape = (model_size, model_size, 1) normalize = True out_size = 128 do_augment = True if no_training: do_augment = False preload_weight = None print("-" * 30) print("Running {} for --** {} **-- model, with #{} configuration".format( "training" if not no_training else "validation", choose_model, config)) if load_data_from_predications: print( "\tdata_run = {}, \n\tdata_epoch = {}, return_predicted_ratings = {}" .format(data_run, data_epoch, return_predicted_ratings)) else: print( "\tdata_size = {},\n\tmodel_size = {},\n\tres = {},\n\tdo_augment = {}" .format(data_size, model_size, res, do_augment)) print("\tdataset_type = {}".format(dataset_type)) print("-" * 30) model = None data_augment_params = { 'max_angle': 30, 'flip_ratio': 0.5, 'crop_stdev': 0.15, 'epoch': 0 } data_loader = build_loader( size=data_size, res=res, sample=sample, dataset_type=dataset_type, config_name=config_name, configuration=config, run=data_run, epoch=data_epoch, load_data_from_predictions=load_data_from_predications, return_predicted_ratings=return_predicted_ratings) ## --------------------------------------- ## ## ------- Prepare Direct Architecture ------- ## ## --------------------------------------- ## if choose_model is "DIR": # run = '300' # SPIE avg-pool (data-aug, balanced=False,class_weight=True) # run = '301' # SPIE max-pool (data-aug, balanced=False,class_weight=True) # run = '302' # SPIE rmac-pool (data-aug, balanced=False,class_weight=True) # run = 'zzz' model = DirectArch(miniXception_loader, input_shape, output_size=out_size, normalize=normalize, pooling='msrmac') model.model.summary() model.compile(learning_rate=1e-3, decay=0) if use_gen: generator = DataGeneratorDir( data_loader, val_factor=0 if skip_validation else 1, balanced=False, data_size=data_size, model_size=model_size, batch_size=32, do_augment=do_augment, augment=data_augment_params, use_class_weight=True, use_confidence=False) model.load_generator(generator) else: dataset = load_nodule_dataset(size=data_size, res=res, sample=sample) images_train, labels_train, class_train, masks_train, _ = prepare_data_direct( dataset[2], num_of_classes=2) images_valid, labels_valid, class_valid, masks_valid, _ = prepare_data_direct( dataset[1], num_of_classes=2) images_train = np.array([ crop_center(im, msk, size=model_size)[0] for im, msk in zip(images_train, masks_train) ]) images_valid = np.array([ crop_center(im, msk, size=model_size)[0] for im, msk in zip(images_valid, masks_valid) ]) model.load_data(images_train, labels_train, images_valid, labels_valid, batch_size=32) if choose_model is "DIR_RATING": ### CLEAN SET # run = '800' # rmac conf:size # run = '801' # rmac conf:none # run = '802' # rmac conf:rating-std # run = '803' # max conf:none ### PRIMARY SET # run = '810' # rmac conf:size # run = '811' # rmac conf:none # run = '812' # rmac conf:rating-std # run = '813' # max conf:none # run = '814' # max separated_prediction # run = '820' # dirD, max, logcoh-loss # run = '821' # dirD, max, pearson-loss # run = '822' # dirD, max, KL-rank-loss # run = '823' # dirD, max, poisson-rank-loss # run = '824' # dirD, max, categorical-cross-entropy-loss # run = '825' # dirD, max, ranked-pearson-loss # run = '826' # dirD, max, KL-normalized-rank-loss # run = '827' # dirD, max, KL-normalized-rank-loss (local-scaled) softmax # run = '828' # dirD, max, KL-normalized-rank-loss (local-scaled) l2 # run = '829' # dirD, max, ranked-pearson-loss (local-scaled) # run = '830' # dirD, rmac, logcoh-loss # run = '831' # dirD, rmac, pearson-loss # run = '832' # dirD, rmac, KL-rank-loss # run = '833' # dirD, rmac, poisson-rank-loss # run = '834' # dirD, rmac, categorical-cross-entropy-loss # run = '835' # dirD, rmac, ranked-pearson-loss # run = '836' # dirD, rmac, KL-normalized-rank-loss # run = '841' # dirD, max, pearson-loss pre:dirR813-50 # run = '842b' # dirD, max, KL-rank-loss pre:dirR813-50 (b:lr-4) # run = '846' # dirD, max, KL-norm-loss pre:dirR813-50 # run = '851' # dirD, rmac, pearson-loss pre:dirR813-50 # run = '852' # dirD, rmac, KL-rank-loss pre:dirR813-50 # run = '856' # dirD, rmac, KL-norm-loss pre:dirR813-50 # run = '860' # dirD, max, KL-loss pre:dirR813-50 (b:lr-4, freeze:7) # run = '861' # dirD, max, KL-loss pre:dirR813-50 (b:lr-4, freeze:17) # run = '862' # dirD, max, KL-loss pre:dirR813-50 (b:lr-4, freeze:28) # run = '863' # dirD, max, KL-loss pre:dirR813-50 (b:lr-4, freeze:39) # run = '870' # dirRD, max, KL-loss schd: 00 # run = '871' # dirRD, max, KL-loss schd: 01 # run = '872' # dirRD, max, KL-loss schd: 02 # run = '873' # dirRD, max, KL-loss schd: 03 # run = '874' # dirRD, max, KL-loss schd: 04 # run = '875' # dirRD, max, KL-loss schd: 05 # run = '876' # dirRD, max, KL-loss schd: 06 # run = '877b' # dirRD, max, KL-loss schd: 07b # run = '878' # dirRD, max, KL-loss schd: 08 # run = '879' # dirRD, max, KL-loss schd: 09 # run = '888' # dirRD, max, KL-loss schd: 08, on partial data SUP # run = '882' # dirRD, max, KL-loss schd: run = '898b' # dirRD, max, KL-loss schd: 08, on partial data UNSUP # run = '890b' # dirR # run = '892b' # dirRD, max, KL-loss # run = 'ccc' obj = 'rating_distance-matrix' # 'distance-matrix' 'rating' 'rating-size' rating_scale = 'none' reg_loss = None # {'SampleCorrelation': 0.0} # 'Dispersion', 'Std', 'FeatureCorrelation', 'SampleCorrelation' batch_size = 32 epoch_pre = 50 preload_weight = None # FileManager.Weights('dirR', output_dir=input_dir).name(run='813c{}'.format(config), epoch=epoch_pre) # FileManager.Weights('dirR', output_dir=input_dir).name(run='251c{}'.format(config), epoch=epoch_pre) model = DirectArch(miniXception_loader, input_shape, output_size=out_size, objective=obj, separated_prediction=False, normalize=normalize, pooling='max', l1_regularization=None, regularization_loss=reg_loss, batch_size=batch_size) if preload_weight is not None: model.load_core_weights(preload_weight, 39) # 7: freeze 1 blocks # 17: freeze 2 blocks # 28: freeze 3 blocks # 39: freeze 4 blocks model.model.summary() should_use_scheduale = (reg_loss is not None) or (obj in [ 'rating_size', 'rating_distance-matrix' ]) # scheduale 00: 870 # sched = [{'epoch': 00, 'weights': [0.9, 0.1]}, # {'epoch': 40, 'weights': [0.5, 0.5]}, # {'epoch': 80, 'weights': [0.1, 0.9]}] \ # if should_use_scheduale else [] # scheduale 01: 871 # sched = [{'epoch': 00, 'weights': [1.0, 0.0]}, # {'epoch': 50, 'weights': [0.0, 1.0]}] \ # if should_use_scheduale else [] # scheduale 02: 872 # sched = [{'epoch': 00, 'weights': [0.9, 0.1]}, # {'epoch': 50, 'weights': [0.1, 0.9]}] \ # if should_use_scheduale else [] # scheduale 03: 873 # sched = [{'epoch': 00, 'weights': [0.9, 0.1]}, # {'epoch': 50, 'weights': [0.5, 0.5]}, # {'epoch': 100, 'weights': [0.1, 0.9]}] \ # if should_use_scheduale else [] # scheduale 04: 874 # sched = [{'epoch': 00, 'weights': [1.0, 0.0]}, # {'epoch': 50, 'weights': [0.0, 0.1]}] \ # if should_use_scheduale else [] # scheduale 05: 875 # sched = [{'epoch': 00, 'weights': [1.0, 0.0]}, # {'epoch': 50, 'weights': [0.0, 1.0]}, # {'epoch': 100, 'weights': [0.0, 0.1]}] \ # if should_use_scheduale else [] # scheduale 06: 876 # sched = [{'epoch': 00, 'weights': [0.9, 0.1]}, # {'epoch': 40, 'weights': [0.5, 0.5]}, # {'epoch': 60, 'weights': [0.1, 0.1]}, # {'epoch': 80, 'weights': [0.0, 0.1]}, # {'epoch': 100, 'weights': [0.0, 0.05]}] \ # if should_use_scheduale else [] # scheduale 07b: 877b # sched = [{'epoch': 00, 'weights': [1.0, 0.0]}, # {'epoch': 50, 'weights': [0.0, 1.0]}, # {'epoch': 80, 'weights': [0.0, 0.1]}, # {'epoch': 100, 'weights': [0.0, 0.05]}] \ # if should_use_scheduale else [] # scheduale 08b: 878 # sched = [{'epoch': 00, 'weights': [0.9, 0.1]}, # {'epoch': 40, 'weights': [0.5, 0.5]}, # {'epoch': 80, 'weights': [0.0, 0.1]}] \ # if should_use_scheduale else [] # scheduale 09: 879 # sched = [{'epoch': 00, 'weights': [0.9, 0.1]}, # {'epoch': 20, 'weights': [0.7, 0.3]}, # {'epoch': 40, 'weights': [0.5, 0.5]}, # {'epoch': 60, 'weights': [0.3, 0.3]}, # {'epoch': 80, 'weights': [0.0, 0.1]}] \ # if should_use_scheduale else [] # scheduale 892/882 sched = [{'epoch': 00, 'weights': [0.9, 0.1]}, {'epoch': 80, 'weights': [0.5, 0.5]}, {'epoch': 120, 'weights': [0.0, 0.1]}] \ if should_use_scheduale else [] loss = dict() loss['predictions'] = 'logcosh' loss['predictions_size'] = 'logcosh' loss['distance_matrix'] = distance_matrix_rank_loss_adapter( K_losses.kullback_leibler_divergence, 'KL') # distance_matrix_logcosh # pearson_correlation # distance_matrix_rank_loss_adapter(K_losses.kullback_leibler_divergence, 'KL') # distance_matrix_rank_loss_adapter(K_losses.poisson, 'poisson') # distance_matrix_rank_loss_adapter(K_losses.categorical_crossentropy, 'entropy') model.compile( learning_rate=1e-3 if (preload_weight is None) else 1e-4, loss=loss, scheduale=sched ) # mean_squared_logarithmic_error, binary_crossentropy, logcosh if use_gen: generator = DataGeneratorDir( data_loader, val_factor=0 if skip_validation else 1, data_size=data_size, model_size=model_size, batch_size=batch_size, objective=obj, rating_scale=rating_scale, weighted_rating=('distance-matrix' in obj), balanced=False, do_augment=do_augment, augment=data_augment_params, use_class_weight=False, use_confidence=False) model.load_generator(generator) else: dataset = load_nodule_dataset(size=data_size, res=res, sample=sample, dataset_type=dataset_type) images_train, labels_train, masks_train = prepare_data_direct( dataset[2], objective='rating', rating_scale=rating_scale) images_valid, labels_valid, masks_valid = prepare_data_direct( dataset[1], objective='rating', rating_scale=rating_scale) images_train = np.array([ crop_center(im, msk, size=model_size)[0] for im, msk in zip(images_train, masks_train) ]) images_valid = np.array([ crop_center(im, msk, size=model_size)[0] for im, msk in zip(images_valid, masks_valid) ]) model.load_data(images_train, labels_train, images_valid, labels_valid, batch_size=batch_size) ## --------------------------------------- ## ## ------- Prepare Siamese Architecture ------ ## ## --------------------------------------- ## if choose_model is "SIAM": # run = '300' # l1, avg-pool (data-aug, balanced=True, class_weight=False) # run = '301' # l1, max-pool (data-aug, balanced=True, class_weight=False) # run = '302' # l1, rmac-pool (data-aug, balanced=True, class_weight=False) # run = '310' # l2, avg-pool (data-aug, balanced=True, class_weight=False) # run = '311' # l2, max-pool (data-aug, balanced=True, class_weight=False) # run = '312' # l2, rmac-pool (data-aug, balanced=True, class_weight=False) # run = '320' # cos, avg-pool (data-aug, balanced=True, class_weight=False) # run = '321' # cos, max-pool (data-aug, balanced=True, class_weight=False) # run = '322b' # cos, rmac-pool (data-aug, balanced=True, class_weight=False) # b/c - changed margin-loss params # run = '313c' # l2, max-pool MARGINAL-LOSS (data-aug, balanced=True, class_weight=False) # run = '314c' # l2, rmac-pool MARGINAL-LOSS (data-aug, balanced=True, class_weight=False) # run = '323c' # cos, max-pool MARGINAL-LOSS (data-aug, balanced=True, class_weight=False) # run = '324c' # cos, rmac-pool MARGINAL-LOSS (data-aug, balanced=True, class_weight=False) # run = 'zzz' batch_size = 64 if local else 128 # model generator = DataGeneratorSiam(data_loader, data_size=data_size, model_size=model_size, batch_size=batch_size, val_factor=0 if skip_validation else 3, balanced=True, objective="malignancy", do_augment=do_augment, augment=data_augment_params, use_class_weight=False) model = SiamArch(miniXception_loader, input_shape, output_size=out_size, batch_size=batch_size, distance='l2', normalize=normalize, pooling='msrmac') model.model.summary() model.compile(learning_rate=1e-3, decay=0) if use_gen: model.load_generator(generator) else: imgs_trn, lbl_trn = generator.next_train().__next__() imgs_val, lbl_val = generator.next_val().__next__() model.load_data(imgs_trn, lbl_trn, imgs_val, lbl_val) if choose_model is "SIAM_RATING": ### clean set # run = '400' # l2-rmac no-conf # run = '401' # cosine-rmac no-conf # run = '402' # l2-rmac conf # run = '403' # cosine-rmac conf # run = '404' # l2-max no-conf # run = '405' # cosine-max no-conf ### primary set # run = '410' # l2-rmac no-conf # run = '411' # cosine-rmac no-conf # run = '412' # l2-rmac conf # run = '413' # cosine-rmac conf # run = '414' # l2-max no-conf # run = '415' # cosine-max no-conf run = 'zzz' obj = 'rating' # rating / size / rating_size batch_size = 16 if local else 64 reg_loss = None # {'SampleCorrating_clusters_distance_and_stdrelation': 0.1} # 'Dispersion', 'Std', 'FeatureCorrelation', 'SampleCorrelation' epoch_pre = 60 preload_weight = None # FileManager.Weights('dirR', output_dir=input_dir).name(run='251c{}'.format(config), epoch=70) should_use_scheduale = (reg_loss is not None) or (obj == 'rating_size') ''' sched = [{'epoch': 00, 'weights': [0.1, 0.9]}, {'epoch': 30, 'weights': [0.4, 0.6]}, {'epoch': 60, 'weights': [0.6, 0.4]}, {'epoch': 80, 'weights': [0.9, 0.1]}, {'epoch': 100, 'weights': [1.0, 0.0]}] \ if should_use_scheduale else [] ''' sched = [{'epoch': 00, 'weights': [0.1, 0.9]}, {'epoch': 20, 'weights': [0.4, 0.6]}, {'epoch': 30, 'weights': [0.6, 0.4]}, {'epoch': 50, 'weights': [0.9, 0.1]}, {'epoch': 80, 'weights': [1.0, 0.0]}] \ if should_use_scheduale else [] # model generator = DataGeneratorSiam(data_loader, data_size=data_size, model_size=model_size, batch_size=batch_size, train_facotr=2, val_factor=0 if skip_validation else 3, balanced=False, objective=obj, weighted_rating=True, do_augment=do_augment, augment=data_augment_params, use_class_weight=False, use_confidence=False) model = SiamArch(miniXception_loader, input_shape, output_size=out_size, objective=obj, batch_size=batch_size, distance='cosine', normalize=normalize, pooling='rmac', regularization_loss=reg_loss, l1_regularization=False) if preload_weight is not None: model.load_core_weights(preload_weight) model.model.summary() model.compile(learning_rate=1e-3, decay=0, loss='logcosh', scheduale=sched) # mean_squared_error, logcosh model.load_generator(generator) ## --------------------------------------- ## ## ------- Prepare Triplet Architecture ------ ## ## --------------------------------------- ## if choose_model is "TRIPLET": # run = '000' # rmac softplus, b16 # run = '001' # rmac hinge, b16, pre:dirR813-50 # run = '002' # rmac hinge, b32, pre:dirR813-50 # run = '003' # rmac hinge, b64, pre:dirR813-50 # run = '004' # rmac hinge, b128, pre:dirR813-50 # run = '005' # rmac hinge, b64, pre:dirR813-50 run = '006' # rmac rank, b64, pre:dirR813-50 # run = 'zzz' objective = 'rating' use_rank_loss = True batch_size = 16 if local else 64 gen = True epoch_pre = 50 preload_weight = FileManager.Weights( 'dirR', output_dir=input_dir).name(run='813c{}'.format(config), epoch=epoch_pre) # model model = TripArch(miniXception_loader, input_shape, objective=objective, output_size=out_size, distance='l2', normalize=True, pooling='msrmac', categorize=use_rank_loss) if preload_weight is not None: model.load_core_weights(preload_weight) model.model.summary() model.compile(learning_rate=1e-3, decay=0) generator = DataGeneratorTrip(data_loader, data_size=data_size, model_size=model_size, batch_size=batch_size, objective=objective, balanced=(objective == 'malignancy'), categorize=use_rank_loss, val_factor=0 if skip_validation else 1, train_factor=2, do_augment=do_augment, augment=data_augment_params, use_class_weight=False, use_confidence=False) if gen: model.load_generator(generator) else: imgs_trn, lbl_trn = generator.next_train().__next__() imgs_val, lbl_val = generator.next_val().__next__() model.load_data(imgs_trn, lbl_trn, imgs_val, lbl_val) ## --------------------------------------- ## ## ------- RUN ------ ## ## --------------------------------------- ## cnf_id = config if config_name == 'LEGACY' else CrossValidationManager( config_name).get_run_id(config) run_name = '{}{}c{}'.format('', run, cnf_id) print('Current Run: {}'.format(run_name)) if no_training: model.last_epoch = epochs model.run = run_name else: model.train(run=run_name, epoch=(0 if preload_weight is None else epoch_pre), n_epoch=epochs, gen=use_gen, do_graph=False) return model
def eval_correlation(run, net_type, metric, rating_metric, epochs, dset, objective='rating', rating_norm='none', cross_validation=False, n_groups=5, seq=False): Embed = FileManager.Embed(net_type) if cross_validation: # Load if n_groups > 1: embed_source = [ Embed(run + 'c{}'.format(c), dset) for c in range(n_groups) ] else: embed_source = [Embed(run + 'c{}'.format(c), dset) for c in [1]] valid_epochs = [[] for i in range(n_groups)] Pm, Km, Pr, Kr = [[] for i in range(n_groups) ], [[] for i in range(n_groups) ], [[] for i in range(n_groups) ], [[] for i in range(n_groups)] PmStd, KmStd, PrStd, KrStd = [[] for i in range(n_groups) ], [[] for i in range(n_groups) ], [[] for i in range(n_groups) ], [[] for i in range(n_groups)] for c_idx, source in enumerate(embed_source): Reg = RatingCorrelator(source, conf=c_idx, multi_epoch=True, seq=seq) # load rating data cache_filename = 'output/cached_{}_{}_{}.p'.format( objective, source.split('/')[-1][6:-2], c_idx) if not Reg.load_cached_rating_distance(cache_filename): print('evaluating rating distance matrix...') Reg.evaluate_rating_space(norm=rating_norm, ignore_labels=False) Reg.evaluate_rating_distance_matrix( method=rating_metric, clustered_rating_distance=True, weighted=True) Reg.dump_rating_distance_to_cache(cache_filename) #print('\tno dump for rating distance matrix...') if objective == 'size': print('evaluating size distance matrix...') Reg.evaluate_size_distance_matrix() for E in epochs: # Calc try: Reg.evaluate_embed_distance_matrix(method=metric, epoch=E) except: #print("Epoch {} - no calculated embedding".format(E)) continue pm, _, km = Reg.correlate_retrieval( 'embed', 'malig' if objective == 'rating' else 'size', verbose=False) pr, _, kr = Reg.correlate_retrieval('embed', 'rating', verbose=False) valid_epochs[c_idx].append(E) Pm[c_idx].append(pm[0]) Km[c_idx].append(km[0]) Pr[c_idx].append(pr[0]) Kr[c_idx].append(kr[0]) PmStd[c_idx].append(pm[1]) KmStd[c_idx].append(km[1]) PrStd[c_idx].append(pr[1]) KrStd[c_idx].append(kr[1]) Pm[c_idx] = np.expand_dims(Pm[c_idx], axis=0) Km[c_idx] = np.expand_dims(Km[c_idx], axis=0) Pr[c_idx] = np.expand_dims(Pr[c_idx], axis=0) Kr[c_idx] = np.expand_dims(Kr[c_idx], axis=0) PmStd[c_idx] = np.expand_dims(PmStd[c_idx], axis=0) KmStd[c_idx] = np.expand_dims(KmStd[c_idx], axis=0) PrStd[c_idx] = np.expand_dims(PrStd[c_idx], axis=0) KrStd[c_idx] = np.expand_dims(KrStd[c_idx], axis=0) else: assert False for E in epochs: Ret = Retriever(title='', dset='') if cross_validation: embed_source = [ Embed(run + 'c{}'.format(c), E, dset) for c in range(n_groups) ] else: embed_source = Embed(run, E, dset) Ret.load_embedding(embed_source) prec, prec_b, prec_m = [], [], [] Ret.fit(np.max(NN), metric=metric) for N in NN: p, pm, pb = Ret.evaluate_precision(n=N) prec.append(p) prec_b.append(pb) prec_m.append(pm) Prec.append(np.array(prec)) Prec_b.append(np.array(prec_b)) Prec_m.append(np.array(prec_m)) merged_epochs = merge_epochs(valid_epochs, min_element=max(n_groups - 1, 1)) Pm = mean_cross_validated_index(Pm, valid_epochs, merged_epochs) Km = mean_cross_validated_index(Km, valid_epochs, merged_epochs) Pr = mean_cross_validated_index(Pr, valid_epochs, merged_epochs) Kr = mean_cross_validated_index(Kr, valid_epochs, merged_epochs) PmStd = std_cross_validated_index(PmStd, valid_epochs, merged_epochs) KmStd = std_cross_validated_index(KmStd, valid_epochs, merged_epochs) PrStd = std_cross_validated_index(PrStd, valid_epochs, merged_epochs) KrStd = std_cross_validated_index(KrStd, valid_epochs, merged_epochs) return np.squeeze(Pm), np.squeeze(PmStd), np.squeeze(Km), np.squeeze( KmStd), np.squeeze(Pr), np.squeeze(PrStd), np.squeeze(Kr), np.squeeze( KrStd), np.array(merged_epochs)
run=run, net_type=net_type, dset=dset, metric=metric, epochs=epochs, cross_validation=True) data[run_id, 0] = acc data[run_id, 1] = prec data[run_id, 2] = index dataStd[run_id, 0] = acc_std dataStd[run_id, 1] = prec_std dataStd[run_id, 2] = index_std Embed = FileManager.Embed(net_type) embed_source = [ Embed(run + 'c{}'.format(c), dset) for c in configurations ] pm, pm_std, km, km_std, pr, pr_std, kr, kr_std, _ = eval_correlation( embed_source, metric=metric, rating_metric='euclidean', rating_norm=rating_norm, epochs=epochs) data[run_id, 3] = pm data[run_id, 4] = pr #data[run_id, 5] = km #data[run_id, 6] = kr
for conf in [1]: # range(n_groups): run = run_id + 'c{}'.format(conf) if True: print("Predicting Rating for " + run) PredRating = PredictRating(pooling=pooling) PredRating.load_dataset(data_subset_id=DataSubSet, full=full, include_unknown=include_unknown, size=128, rating_scale=rating_scale, configuration=conf) preds = [] valid_epochs = [] for e in epochs: WeightsFile = FileManager.Weights('dirR').name(run, epoch=e) PredFile = FileManager.Pred(type='rating', pre='dirR') out_file = PredFile(run=run, dset=post) data, out_filename = PredRating.predict_rating( WeightsFile, out_file) images_test, pred, meta_test, classes_test, labels_test, masks_test = data preds.append(np.expand_dims(pred, axis=0)) preds = np.concatenate(preds, axis=0) pickle.dump((preds, np.array(epochs), meta_test, images_test, classes_test, labels_test, masks_test), open(out_filename, 'bw')) else: print("Predicting Malignancy for " + run) PredMal = PredictMal(pooling=pooling) for e in epochs:
if __name__ == "__main__": # # Current Metrics: # 'chebyshev' # 'euclidean' # 'cosine' # 'corrlation' # # To evaluate similarity of two Distance-Metrices: # Kendall tau distance # Spearman's rank correlation # Distance Correlation from Network import FileManager Embed = FileManager.Embed('siam') Reg = RatingCorrelator(Embed(run='064X',epoch=30,dset='Valid')) Reg.evaluate_embed_distance_matrix(method='euclidean') Reg.evaluate_rating_space() Reg.evaluate_rating_distance_matrix(method='euclidean') Reg.linear_regression() Reg.scatter('embed', 'rating', xMethod="euclidean", yMethod='euclidean', sub=True) #Reg.scatter('malig', 'rating', yMethod='euclidean', sub=True) #Reg.scatter('embed', 'malig', sub=True) #Reg.malig_regression(method='euclidean') Reg.correlate('malig', 'rating')
# =================== inp_size = 144 net_size = 128 out_size = 128 input_shape = (net_size, net_size, 1) res = 'Legacy' sample = 'Normal' #'UniformNC' # 0 Test # 1 Validation # 2 Training DataSubSet = 1 dsets = ['Test', 'Valid', 'Train'] Weights = FileManager.Weights('siam') wRuns = ['078X'] wEpchs= [24] run = wRuns[0] epoch = wEpchs[0] # Load Data # ================= images, labels, masks, meta = \ prepare_data(load_nodule_dataset(size=inp_size, res=res, sample=sample)[DataSubSet], categorize=False, reshuffle=False, return_meta=True,