def eval_classification(run, net_type, metric, epochs, dset, NN=[7, 11, 17], cross_validation=False, n_groups=5): Embed = FileManager.Embed(net_type) Pred_L1O = [[] for i in range(n_groups)] valid_epochs = [[] for i in range(n_groups)] if cross_validation: # Load embed_source = [ Embed(run + 'c{}'.format(c), dset) for c in range(n_groups) ] Ret = Retriever(title='{}-{}'.format(net_type, run), dset=dset) for i, source in enumerate(embed_source): Ret.load_embedding([source], multi_epcch=True) for E in epochs: # Calc pred_l1o = [] try: for N in NN: pred_l1o.append( Ret.classify_kfold(epoch=E, n=N, k_fold=10, metric=metric)) Pred_L1O[i].append(np.array(pred_l1o)) valid_epochs[i].append(E) except: print("Epoch {} - no calculated embedding".format(E)) Pred_L1O[i] = np.array(Pred_L1O[i]) valid_epochs[i] = np.array(valid_epochs[i]) combined_epochs = merge_epochs(valid_epochs, min_element=max(n_groups - 1, 1)) P, P_std = mean_cross_validated_index_with_std(Pred_L1O, valid_epochs, combined_epochs) else: for E in epochs: # Load embed_source = Embed(run, E, dset) Ret = Retriever(title='{}-{}'.format(net_type, run), dset=dset) Ret.load_embedding(embed_source) # Calc pred_l1o = [] for N in NN: pred_l1o.append(Ret.classify_leave1out(n=N, metric=metric)[1]) Pred_L1O.append(np.array(pred_l1o)) P, P_std = np.mean(Pred_L1O, axis=-1), np.std(Pred_L1O, axis=-1) return P, P_std, combined_epochs
from Network import FileManager from Analysis import Retriever from Analysis.metric_space_indexes import k_occurrences net_type = 'dirD' config = 0 dset = 'Valid' K = 2 res = {} for run, label in zip(['821', '822'], ['Pearson-loss', 'KL-loss']): print(run + ': ' + label + '\n' + '*' * 20) embed_source = FileManager.Embed(net_type)(run + 'c{}'.format(config), dset) Ret = Retriever(title='{}'.format(''), dset=dset) Ret.load_embedding(embed_source, multi_epcch=True) Ret.fit(metric='euclidean', epoch=60) indices, distances = Ret.ret_nbrs() # get Hubs k_occ = k_occurrences(indices, K) hubs_indices = np.argsort(k_occ)[-3:] res[run] = hubs_indices, indices print([(a, b) for a, b in zip(hubs_indices, k_occ[hubs_indices])]) for run, label in zip(['821', '822'], ['Pearson-loss', 'KL-loss']): print(run + ': ' + label + '\n' + '*' * 20) hubs_indices, indices = res[run] rev_nn = [(hub_id, np.where(np.any(hub_id == indices[:, :K], axis=1)))
def eval_retrieval(run, net_type, metric, epochs, dset, NN=[7, 11, 17], cross_validation=False, n_groups=5): Embed = FileManager.Embed(net_type) Prec, Prec_b, Prec_m = [[] for i in range(n_groups) ], [[] for i in range(n_groups) ], [[] for i in range(n_groups)] valid_epochs = [[] for i in range(n_groups)] if cross_validation: # Load embed_source = [ Embed(run + 'c{}'.format(c), dset) for c in range(n_groups) ] Ret = Retriever(title='{}-{}'.format(net_type, run), dset=dset) for i, source in enumerate(embed_source): Ret.load_embedding(source, multi_epcch=True) for E in epochs: # Calc prec, prec_b, prec_m = [], [], [] try: Ret.fit(np.max(NN), metric=metric, epoch=E) except: print("Epoch {} - no calculated embedding".format(E)) continue for N in NN: p, pb, pm = Ret.evaluate_precision(n=N) prec.append(p) prec_b.append(pb) prec_m.append(pm) Prec[i].append(np.array(prec)) Prec_b[i].append(np.array(prec_b)) Prec_m[i].append(np.array(prec_m)) valid_epochs[i].append(E) Prec[i] = np.array(Prec[i]) Prec_b[i] = np.array(Prec_b[i]) Prec_m[i] = np.array(Prec_m[i]) valid_epochs[i] = np.array(valid_epochs[i]) combined_epochs = epochs # merge_epochs(valid_epochs) P, P_std = mean_cross_validated_index_with_std(Prec, valid_epochs, combined_epochs) #P, P_std = np.mean(np.mean(Prec, axis=-1), axis=0), np.mean(np.std(Prec, axis=-1), axis=0) combined = 2 * np.array(Prec_b) * np.array(Prec_m) / ( np.array(Prec_b) + np.array(Prec_m)) #F1, F1_std = np.mean(np.mean(combined, axis=-1), axis=0), np.mean(np.std(combined, axis=-1), axis=0) F1, F1_std = mean_cross_validated_index_with_std( combined, valid_epochs, combined_epochs) else: for E in epochs: Ret = Retriever(title='', dset='') if cross_validation: embed_source = [ Embed(run + 'c{}'.format(c), E, dset) for c in range(n_groups) ] else: embed_source = Embed(run, E, dset) Ret.load_embedding(embed_source) prec, prec_b, prec_m = [], [], [] Ret.fit(np.max(NN), metric=metric) for N in NN: p, pm, pb = Ret.evaluate_precision(n=N) prec.append(p) prec_b.append(pb) prec_m.append(pm) Prec.append(np.array(prec)) Prec_b.append(np.array(prec_b)) Prec_m.append(np.array(prec_m)) Prec = np.array(Prec) Prec_m = np.array(Prec_m) Prec_b = np.array(Prec_b) f1 = 2 * Prec_b * Prec_m / (Prec_b + Prec_m) P, P_std = np.mean(Prec, axis=-1), np.std(Prec, axis=-1) F1, F1_std = np.mean(f1, axis=-1), np.std(f1, axis=-1) return P, P_std, F1, F1_std, valid_epochs
def eval_correlation(run, net_type, metric, rating_metric, epochs, dset, objective='rating', rating_norm='none', cross_validation=False, n_groups=5, seq=False): Embed = FileManager.Embed(net_type) if cross_validation: # Load if n_groups > 1: embed_source = [ Embed(run + 'c{}'.format(c), dset) for c in range(n_groups) ] else: embed_source = [Embed(run + 'c{}'.format(c), dset) for c in [1]] valid_epochs = [[] for i in range(n_groups)] Pm, Km, Pr, Kr = [[] for i in range(n_groups) ], [[] for i in range(n_groups) ], [[] for i in range(n_groups) ], [[] for i in range(n_groups)] PmStd, KmStd, PrStd, KrStd = [[] for i in range(n_groups) ], [[] for i in range(n_groups) ], [[] for i in range(n_groups) ], [[] for i in range(n_groups)] for c_idx, source in enumerate(embed_source): Reg = RatingCorrelator(source, conf=c_idx, multi_epoch=True, seq=seq) # load rating data cache_filename = 'output/cached_{}_{}_{}.p'.format( objective, source.split('/')[-1][6:-2], c_idx) if not Reg.load_cached_rating_distance(cache_filename): print('evaluating rating distance matrix...') Reg.evaluate_rating_space(norm=rating_norm, ignore_labels=False) Reg.evaluate_rating_distance_matrix( method=rating_metric, clustered_rating_distance=True, weighted=True) Reg.dump_rating_distance_to_cache(cache_filename) #print('\tno dump for rating distance matrix...') if objective == 'size': print('evaluating size distance matrix...') Reg.evaluate_size_distance_matrix() for E in epochs: # Calc try: Reg.evaluate_embed_distance_matrix(method=metric, epoch=E) except: #print("Epoch {} - no calculated embedding".format(E)) continue pm, _, km = Reg.correlate_retrieval( 'embed', 'malig' if objective == 'rating' else 'size', verbose=False) pr, _, kr = Reg.correlate_retrieval('embed', 'rating', verbose=False) valid_epochs[c_idx].append(E) Pm[c_idx].append(pm[0]) Km[c_idx].append(km[0]) Pr[c_idx].append(pr[0]) Kr[c_idx].append(kr[0]) PmStd[c_idx].append(pm[1]) KmStd[c_idx].append(km[1]) PrStd[c_idx].append(pr[1]) KrStd[c_idx].append(kr[1]) Pm[c_idx] = np.expand_dims(Pm[c_idx], axis=0) Km[c_idx] = np.expand_dims(Km[c_idx], axis=0) Pr[c_idx] = np.expand_dims(Pr[c_idx], axis=0) Kr[c_idx] = np.expand_dims(Kr[c_idx], axis=0) PmStd[c_idx] = np.expand_dims(PmStd[c_idx], axis=0) KmStd[c_idx] = np.expand_dims(KmStd[c_idx], axis=0) PrStd[c_idx] = np.expand_dims(PrStd[c_idx], axis=0) KrStd[c_idx] = np.expand_dims(KrStd[c_idx], axis=0) else: assert False for E in epochs: Ret = Retriever(title='', dset='') if cross_validation: embed_source = [ Embed(run + 'c{}'.format(c), E, dset) for c in range(n_groups) ] else: embed_source = Embed(run, E, dset) Ret.load_embedding(embed_source) prec, prec_b, prec_m = [], [], [] Ret.fit(np.max(NN), metric=metric) for N in NN: p, pm, pb = Ret.evaluate_precision(n=N) prec.append(p) prec_b.append(pb) prec_m.append(pm) Prec.append(np.array(prec)) Prec_b.append(np.array(prec_b)) Prec_m.append(np.array(prec_m)) merged_epochs = merge_epochs(valid_epochs, min_element=max(n_groups - 1, 1)) Pm = mean_cross_validated_index(Pm, valid_epochs, merged_epochs) Km = mean_cross_validated_index(Km, valid_epochs, merged_epochs) Pr = mean_cross_validated_index(Pr, valid_epochs, merged_epochs) Kr = mean_cross_validated_index(Kr, valid_epochs, merged_epochs) PmStd = std_cross_validated_index(PmStd, valid_epochs, merged_epochs) KmStd = std_cross_validated_index(KmStd, valid_epochs, merged_epochs) PrStd = std_cross_validated_index(PrStd, valid_epochs, merged_epochs) KrStd = std_cross_validated_index(KrStd, valid_epochs, merged_epochs) return np.squeeze(Pm), np.squeeze(PmStd), np.squeeze(Km), np.squeeze( KmStd), np.squeeze(Pr), np.squeeze(PrStd), np.squeeze(Kr), np.squeeze( KrStd), np.array(merged_epochs)