def dir_rating_correlate(run, post, epochs, rating_norm='none', clustered_rating_distance=True, n_groups=5): pear_corr = [[] for i in range(n_groups)] kend_corr = [[] for i in range(n_groups)] plot_data_filename = './Plots/Data/rating_correlation_{}{}.p'.format( 'dirR', run) try: print('SKIPING') assert False pear_corr, kend_corr = pickle.load(open(plot_data_filename, 'br')) print("Loaded results for {}".format(run)) except: print("Evaluating Rating Correlation for {}".format(run)) for c, run_config in enumerate( [run + 'c{}'.format(config) for config in range(n_groups)]): PredFile = FileManager.Pred(type='rating', pre='dirR') Reg = RatingCorrelator(PredFile(run=run_config, dset=post), multi_epoch=True) for e in epochs: Reg.evaluate_embed_distance_matrix( method='euclidean', epoch=e, round=(rating_norm == 'Round')) Reg.evaluate_rating_space(norm=rating_norm) Reg.evaluate_rating_distance_matrix( method='euclidean', clustered_rating_distance=clustered_rating_distance) Reg.linear_regression() # Reg.scatter('embed', 'rating', xMethod="euclidean", yMethod='euclidean', sub=False) p, s, k = Reg.correlate_retrieval( 'embed', 'rating', round=(rating_norm == 'Round'), verbose=False) pear_corr[c].append(p) kend_corr[c].append(k) pear_corr[c] = np.array(pear_corr[c]) kend_corr[c] = np.array(kend_corr[c]) pear_corr = np.mean(pear_corr, axis=0) kend_corr = np.mean(kend_corr, axis=0) print('NO DUMP') #pickle.dump((pear_corr, kend_corr), open(plot_data_filename, 'bw')) pear_corr = smooth(pear_corr[:, 0]), smooth(pear_corr[:, 1]) kend_corr = smooth(kend_corr[:, 0]), smooth(kend_corr[:, 1]) epochs = np.array(epochs) plt.figure('Rating2Rating:' + run + '-' + post) q = plt.plot(epochs, pear_corr[0]) plt.plot(epochs, pear_corr[0] + pear_corr[1], color=q[0].get_color(), ls='--', alpha=alpha) plt.plot(epochs, pear_corr[0] - pear_corr[1], color=q[0].get_color(), ls='--', alpha=alpha) q = plt.plot(epochs, kend_corr[0]) plt.plot(epochs, kend_corr[0] + kend_corr[1], color=q[0].get_color(), ls='--', alpha=alpha) plt.plot(epochs, kend_corr[0] - kend_corr[1], color=q[0].get_color(), ls='--', alpha=alpha) plt.grid(which='major', axis='y') plt.title('rating_' + run + '_' + post) plt.xlabel('epochs') plt.ylabel('correlation') plt.legend(['pearson', '', '', 'kendall', '', ''])
def embed_correlate(network_type, run, post, epochs, rating_norm='none'): pear_corr = [] kend_corr = [] for e in epochs: # pred, labels_test, meta = pickle.load(open(loader.pred_filename(run, epoch=e, post=post), 'br')) file = FileManager.Embed(network_type) Reg = RatingCorrelator(file.name(run=run, epoch=e, dset=post)) Reg.evaluate_embed_distance_matrix(method='euclidean', round=(rating_norm == 'Round')) Reg.evaluate_rating_space(norm=rating_norm) Reg.evaluate_rating_distance_matrix(method='euclidean') Reg.linear_regression() # Reg.scatter('embed', 'rating', xMethod="euclidean", yMethod='euclidean', sub=False) p, s, k = Reg.correlate_retrieval('embed', 'rating') pear_corr.append(p) kend_corr.append(k) epochs = np.array(epochs) pear_corr = np.array(pear_corr) kend_corr = np.array(kend_corr) plt.figure() plt.plot(epochs, pear_corr) plt.plot(epochs, kend_corr) plt.grid(which='major', axis='y') plt.title('embed_' + run + '_' + post) plt.xlabel('epochs') plt.ylabel('correlation') plt.legend(['pearson', 'kendall'])
def eval_correlation(embed_source, metric, rating_metric, epochs, objective='rating', rating_norm='none', local_scaling=False, seq=False): n_configs = len(embed_source) valid_epochs = [[] for i in range(n_configs)] Pm, Km, Pr, Kr = [[[] for i in range(n_configs)] for j in range(4)] PmStd, KmStd, PrStd, KrStd = [[[] for i in range(n_configs)] for j in range(4)] for c_idx, source in enumerate(embed_source): Reg = RatingCorrelator(source, conf=c_idx, multi_epoch=True, seq=seq) # load rating data cache_filename = 'output/cached_{}_{}_{}.p'.format( objective, source.split('/')[-1][6:-2], c_idx) if True: # not Reg.load_cached_rating_distance(cache_filename): print('evaluating rating distance matrix...') Reg.evaluate_rating_space(norm=rating_norm, ignore_labels=False) Reg.evaluate_rating_distance_matrix(method=rating_metric, clustered_rating_distance=True, weighted=True, local_scaling=local_scaling) Reg.dump_rating_distance_to_cache(cache_filename) #print('\tno dump for rating distance matrix...') if objective == 'size': print('evaluating size distance matrix...') Reg.evaluate_size_distance_matrix() for E in epochs: # Calc try: Reg.evaluate_embed_distance_matrix(method=metric, epoch=E) except: #print("Epoch {} - no calculated embedding".format(E)) continue pm, _, km = Reg.correlate_retrieval( 'embed', 'malig' if objective == 'rating' else 'size', verbose=False) pr, _, kr = Reg.correlate_retrieval('embed', 'rating', verbose=False) valid_epochs[c_idx].append(E) Pm[c_idx].append(pm[0]) Km[c_idx].append(km[0]) Pr[c_idx].append(pr[0]) Kr[c_idx].append(kr[0]) PmStd[c_idx].append(pm[1]) KmStd[c_idx].append(km[1]) PrStd[c_idx].append(pr[1]) KrStd[c_idx].append(kr[1]) Pm[c_idx] = np.expand_dims(Pm[c_idx], axis=0) Km[c_idx] = np.expand_dims(Km[c_idx], axis=0) Pr[c_idx] = np.expand_dims(Pr[c_idx], axis=0) Kr[c_idx] = np.expand_dims(Kr[c_idx], axis=0) PmStd[c_idx] = np.expand_dims(PmStd[c_idx], axis=0) KmStd[c_idx] = np.expand_dims(KmStd[c_idx], axis=0) PrStd[c_idx] = np.expand_dims(PrStd[c_idx], axis=0) KrStd[c_idx] = np.expand_dims(KrStd[c_idx], axis=0) merged_epochs = merge_epochs(valid_epochs, min_element=max(n_configs - 1, 1)) Pm = mean_cross_validated_index(Pm, valid_epochs, merged_epochs) Km = mean_cross_validated_index(Km, valid_epochs, merged_epochs) Pr = mean_cross_validated_index(Pr, valid_epochs, merged_epochs) Kr = mean_cross_validated_index(Kr, valid_epochs, merged_epochs) PmStd = std_cross_validated_index(PmStd, valid_epochs, merged_epochs) KmStd = std_cross_validated_index(KmStd, valid_epochs, merged_epochs) PrStd = std_cross_validated_index(PrStd, valid_epochs, merged_epochs) KrStd = std_cross_validated_index(KrStd, valid_epochs, merged_epochs) return np.squeeze(Pm), np.squeeze(PmStd), np.squeeze(Km), np.squeeze( KmStd), np.squeeze(Pr), np.squeeze(PrStd), np.squeeze(Kr), np.squeeze( KrStd), np.array(merged_epochs)
def dir_rating_params_correlate(run, post, epochs, net_type, rating_norm='none', configurations=list(range(5)), USE_CACHE=True, DUMP=True): reference = [0.7567, 0.5945, 0.7394, 0.5777, 0.6155, 0.7445, 0.6481] # 0, 0, rating_property = [ 'Subtlety', 'Sphericity', 'Margin', 'Lobulation', 'Spiculation', 'Texture', 'Malignancy' ] # 'Internalstructure', 'Calcification', mask = [True, False, False, True, True, True, True, True, True] pear_corr = [[] for i in configurations] plot_data_filename = './Plots/Data/rating_params_correlation_{}{}.p'.format( net_type, run) try: if USE_CACHE is False: print('SKIPPING') assert False pear_corr = pickle.load(open(plot_data_filename, 'br')) print("Loaded results for {}".format(run)) except: print("Evaluating Rating Correlation for {}".format(run)) for c, run_config in enumerate( [run + 'c{}'.format(config) for config in configurations]): PredFile = FileManager.Pred(type='rating', pre=net_type) Reg = RatingCorrelator(PredFile(run=run_config, dset=post), multi_epoch=True, conf=c) Reg.evaluate_rating_space(norm=rating_norm) #valid_epochs = [] for e in epochs: p = Reg.correlate_to_ratings(epoch=e, round=(rating_norm == 'Round')) if not np.all(np.isfinite(p[mask])): print('nan at: conf={}, epoch={}'.format(c, e)) pear_corr[c].append(p[mask]) #valid_epochs.append(e) pear_corr[c] = np.array(pear_corr[c]) pear_corr = np.mean(pear_corr, axis=0) if DUMP: pickle.dump(pear_corr, open(plot_data_filename, 'bw')) else: print('NO DUMP') for i, e in enumerate(epochs): print("=" * 20) print(" Epoch {}:".format(e)) print("-" * 20) for p, property in enumerate(rating_property): print("\t{}: \t{:.2f}".format(property, pear_corr[i, p])) #print("\t" + ("-" * 10)) #print("\toverall: \t{:.2f}".format(R[i, 9])) for p in range(pear_corr.shape[1]): pear_corr[:, p] = smooth(pear_corr[:, p], window_length=5, polyorder=2) epochs = np.array(epochs) plt.figure('RatingParams2Rating:' + run + '-' + post) q = plt.plot(epochs, pear_corr, linewidth=2.5) for line, ref in zip(q, reference): plt.plot(epochs, ref * np.ones_like(epochs), color=line.get_color(), ls='--', linewidth=4, alpha=0.6) plt.grid(which='major', axis='y') plt.title('rating_' + run + '_' + post) plt.xlabel('epochs') plt.ylabel('correlation') plt.legend(rating_property)
def eval_correlation(run, net_type, metric, rating_metric, epochs, dset, objective='rating', rating_norm='none', cross_validation=False, n_groups=5, seq=False): Embed = FileManager.Embed(net_type) if cross_validation: # Load if n_groups > 1: embed_source = [ Embed(run + 'c{}'.format(c), dset) for c in range(n_groups) ] else: embed_source = [Embed(run + 'c{}'.format(c), dset) for c in [1]] valid_epochs = [[] for i in range(n_groups)] Pm, Km, Pr, Kr = [[] for i in range(n_groups) ], [[] for i in range(n_groups) ], [[] for i in range(n_groups) ], [[] for i in range(n_groups)] PmStd, KmStd, PrStd, KrStd = [[] for i in range(n_groups) ], [[] for i in range(n_groups) ], [[] for i in range(n_groups) ], [[] for i in range(n_groups)] for c_idx, source in enumerate(embed_source): Reg = RatingCorrelator(source, conf=c_idx, multi_epoch=True, seq=seq) # load rating data cache_filename = 'output/cached_{}_{}_{}.p'.format( objective, source.split('/')[-1][6:-2], c_idx) if not Reg.load_cached_rating_distance(cache_filename): print('evaluating rating distance matrix...') Reg.evaluate_rating_space(norm=rating_norm, ignore_labels=False) Reg.evaluate_rating_distance_matrix( method=rating_metric, clustered_rating_distance=True, weighted=True) Reg.dump_rating_distance_to_cache(cache_filename) #print('\tno dump for rating distance matrix...') if objective == 'size': print('evaluating size distance matrix...') Reg.evaluate_size_distance_matrix() for E in epochs: # Calc try: Reg.evaluate_embed_distance_matrix(method=metric, epoch=E) except: #print("Epoch {} - no calculated embedding".format(E)) continue pm, _, km = Reg.correlate_retrieval( 'embed', 'malig' if objective == 'rating' else 'size', verbose=False) pr, _, kr = Reg.correlate_retrieval('embed', 'rating', verbose=False) valid_epochs[c_idx].append(E) Pm[c_idx].append(pm[0]) Km[c_idx].append(km[0]) Pr[c_idx].append(pr[0]) Kr[c_idx].append(kr[0]) PmStd[c_idx].append(pm[1]) KmStd[c_idx].append(km[1]) PrStd[c_idx].append(pr[1]) KrStd[c_idx].append(kr[1]) Pm[c_idx] = np.expand_dims(Pm[c_idx], axis=0) Km[c_idx] = np.expand_dims(Km[c_idx], axis=0) Pr[c_idx] = np.expand_dims(Pr[c_idx], axis=0) Kr[c_idx] = np.expand_dims(Kr[c_idx], axis=0) PmStd[c_idx] = np.expand_dims(PmStd[c_idx], axis=0) KmStd[c_idx] = np.expand_dims(KmStd[c_idx], axis=0) PrStd[c_idx] = np.expand_dims(PrStd[c_idx], axis=0) KrStd[c_idx] = np.expand_dims(KrStd[c_idx], axis=0) else: assert False for E in epochs: Ret = Retriever(title='', dset='') if cross_validation: embed_source = [ Embed(run + 'c{}'.format(c), E, dset) for c in range(n_groups) ] else: embed_source = Embed(run, E, dset) Ret.load_embedding(embed_source) prec, prec_b, prec_m = [], [], [] Ret.fit(np.max(NN), metric=metric) for N in NN: p, pm, pb = Ret.evaluate_precision(n=N) prec.append(p) prec_b.append(pb) prec_m.append(pm) Prec.append(np.array(prec)) Prec_b.append(np.array(prec_b)) Prec_m.append(np.array(prec_m)) merged_epochs = merge_epochs(valid_epochs, min_element=max(n_groups - 1, 1)) Pm = mean_cross_validated_index(Pm, valid_epochs, merged_epochs) Km = mean_cross_validated_index(Km, valid_epochs, merged_epochs) Pr = mean_cross_validated_index(Pr, valid_epochs, merged_epochs) Kr = mean_cross_validated_index(Kr, valid_epochs, merged_epochs) PmStd = std_cross_validated_index(PmStd, valid_epochs, merged_epochs) KmStd = std_cross_validated_index(KmStd, valid_epochs, merged_epochs) PrStd = std_cross_validated_index(PrStd, valid_epochs, merged_epochs) KrStd = std_cross_validated_index(KrStd, valid_epochs, merged_epochs) return np.squeeze(Pm), np.squeeze(PmStd), np.squeeze(Km), np.squeeze( KmStd), np.squeeze(Pr), np.squeeze(PrStd), np.squeeze(Kr), np.squeeze( KrStd), np.array(merged_epochs)