def visualize_glayout_without_training(layout="tsne", **kwargs): _args = get_args(**kwargs) pprint_args(_args) train_d, val_d, test_d = get_dataset_or_loader( _args.dataset_class, _args.dataset_name, _args.data_root, batch_size=_args.batch_size, seed=_args.seed, ) data = train_d[0] plot_graph_layout(data.x.numpy(), data.y.numpy(), data.edge_index.numpy(), args=_args, edge_to_attention=None, key="raw", layout=layout)
def get_model_and_preds(data, **kwargs): _args = get_args(**kwargs) _args.verbose = 1 _args.save_model = False _args.epochs = 300 pprint_args(_args) _alloc_gpu = blind_other_gpus(num_gpus_total=_args.num_gpus_total, num_gpus_to_use=_args.num_gpus_to_use, gpu_deny_list=_args.gpu_deny_list) if _alloc_gpu: cprint("Use GPU the ID of which is {}".format(_alloc_gpu), "yellow") _alloc_gpu_id = _alloc_gpu[0] if _alloc_gpu else 1 model, ret = run(_args, gpu_id=_alloc_gpu_id, return_model=True) model = model.to("cpu") model.eval() with torch.no_grad(): output = model(data.x, data.edge_index)[data.test_mask].cpu().numpy() pred_labels = np.argmax(output, axis=1) return model, pred_labels
def visualize_attention_metric_for_multiple_models( name_prefix_and_kwargs: List[Tuple[str, Dict]], unit_width_per_name=3, extension="png"): res = None total_args, num_layers, custom_key_list, name_prefix_list = None, None, [], [] kld1_list, kld2_list, jsd_list, ent_list = [], [], [], [] # [L * M, N] for name_prefix, kwargs in name_prefix_and_kwargs: args = get_args(**kwargs) custom_key_list.append(args.custom_key) num_layers = args.num_layers train_d, val_d, test_d = get_dataset_or_loader( args.dataset_class, args.dataset_name, args.data_root, batch_size=args.batch_size, seed=args.seed, ) if val_d is None and test_d is None: data_list = [train_d[0]] else: data_list = [] for _data in chain(train_d, val_d, test_d): if _data.x.size(0) != len(_data.agreement_dist): _data.agreement_dist = [ _ad for _ad in _data.agreement_dist[0] ] _data.uniform_att_dist = [ _uad for _uad in _data.uniform_att_dist[0] ] data_list.append(_data) gpu_id = [ int( np.random.choice([ g for g in range(args.num_gpus_total) if g not in args.gpu_deny_list ], 1)) ][0] if args.verbose >= 1: pprint_args(args) cprint("Use GPU the ID of which is {}".format(gpu_id), "yellow") device = "cpu" if gpu_id is None \ else torch.device('cuda:{}'.format(gpu_id) if torch.cuda.is_available() else 'cpu') model, ret = run(args, gpu_id=gpu_id, return_model=True) kld1_layer, kld2_layer, jsd_layer, ent_layer, *res = \ get_attention_metric_for_single_model_and_multiple_data(model, data_list, device) kld1_list += kld1_layer kld2_list += kld2_layer jsd_list += jsd_layer ent_list += ent_layer name_prefix_list.append(name_prefix) total_args = args torch.cuda.empty_cache() total_args.custom_key = "-".join(sorted(custom_key_list)) plot_kld_jsd_ent(kld1_list, kld2_list, jsd_list, ent_list, *res, num_layers=num_layers, model_args=total_args, epoch=-1, name_prefix_list=name_prefix_list, unit_width_per_name=unit_width_per_name, extension=extension, flierprops={ "marker": "x", "markersize": 12 })
for rk, rv in ret.items(): results[rk].append(rv) return results if __name__ == '__main__': num_total_runs = 7 main_args = get_args( model_name="GAT", dataset_class="PygNodePropPredDataset", dataset_name="ogbn-arxiv", # ogbn-products, ogbn-arxiv custom_key="EV13NS", ) pprint_args(main_args) if len(main_args.gpu_deny_list) == main_args.num_gpus_total: alloc_gpu = [None] cprint("Use CPU", "yellow") else: alloc_gpu = blind_other_gpus(num_gpus_total=main_args.num_gpus_total, num_gpus_to_use=main_args.num_gpus_to_use, gpu_deny_list=main_args.gpu_deny_list) if not alloc_gpu: alloc_gpu = [ int( np.random.choice([ g for g in range(main_args.num_gpus_total) if g not in main_args.gpu_deny_list ], 1))
def analyze_rpg_by_degree_and_homophily(degree_list: List[float], homophily_list: List[float], legend_list: List[str], model_list: List[str], custom_key_list: List[str], att_lambda_list: List[float], l2_lambda_list: List[float], num_total_runs: int, num_nodes_per_class: int = 500, num_classes: int = 10, verbose=2, is_test=False, plot_part_by_part=False, draw_plot=True, draw_diff_between_first=False, extension="pdf"): def to_log10(v, eps=1e-5): return float(np.log10(v + eps)) base_key = "analysis_rpg" + ("" if not is_test else "_test") base_path = os.path.join("../figs", base_key) best_meta_dict = defaultdict(dict) deg_and_legend_to_mean_over_hp_list, deg_and_legend_to_std_over_hp_list = {}, {} for deg in degree_list: avg_deg_ratio = deg / num_nodes_per_class for legend, model, key in zip(legend_list, model_list, custom_key_list): base_kwargs = { "model_name": model, "dataset_class": "RandomPartitionGraph", "dataset_name": f"rpg-{num_classes}-{num_nodes_per_class}-h-d", "custom_key": key, } args = get_args(**base_kwargs) args.verbose = verbose deg_and_legend = (deg, legend) if is_test: args.epochs = 2 mean_over_hp_list, std_over_hp_list = [], [] for hp in homophily_list: args.dataset_name = f"rpg-{num_classes}-{num_nodes_per_class}-{hp}-{avg_deg_ratio}" model_key, model_path = _get_key_and_makedirs( args=args, base_path=base_path, args_prefix=legend) max_mean_perf = -1 for att_lambda in att_lambda_list: for l2_lambda in l2_lambda_list: args.att_lambda = att_lambda args.l2_lambda = l2_lambda pprint_args(args) result_key = (att_lambda, l2_lambda) result_path = os.path.join( model_path, "ms_result_{}.pkl".format(s_join("-", result_key))) try: many_seeds_result = pickle.load( open(result_path, "rb")) cprint("Load: {}".format(result_path), "blue") except FileNotFoundError: many_seeds_result = run_with_many_seeds_with_gpu( args, num_total_runs) with open(result_path, "wb") as f: pickle.dump(many_seeds_result, f) cprint("Dump: {}".format(result_path), "green") garbage_collection_cuda() cprint("Garbage collected", "green") cur_mean_perf = float( np.mean( many_seeds_result["test_perf_at_best_val"])) cur_std_perf = float( np.std(many_seeds_result["test_perf_at_best_val"])) if cur_mean_perf > max_mean_perf: max_mean_perf = cur_mean_perf best_meta_dict[model_key][ "mean_perf"] = cur_mean_perf best_meta_dict[model_key][ "std_perf"] = cur_std_perf best_meta_dict[model_key][ "att_lambda"] = att_lambda best_meta_dict[model_key]["l2_lambda"] = l2_lambda best_meta_dict[model_key][ "many_seeds_result"] = many_seeds_result if not args.is_super_gat: break mean_over_hp_list.append( best_meta_dict[model_key]["mean_perf"]) std_over_hp_list.append(best_meta_dict[model_key]["std_perf"]) deg_and_legend_to_mean_over_hp_list[ deg_and_legend] = mean_over_hp_list deg_and_legend_to_std_over_hp_list[ deg_and_legend] = std_over_hp_list pprint(deg_and_legend_to_mean_over_hp_list) if not draw_plot: return plot_line_with_std( tuple_to_mean_list= deg_and_legend_to_mean_over_hp_list, # (deg, legend) -> List[perf] by homophily tuple_to_std_list=deg_and_legend_to_std_over_hp_list, x_label="Homophily", y_label="Test Accuracy", name_label_list=["Avg. Degree", "Model"], x_list=homophily_list, hue="Model", style="Model", col="Avg. Degree", hue_order=legend_list, x_lim=(0, None), custom_key=base_key, extension=extension, ) hp_and_legend_to_mean_over_deg_list, hp_and_legend_to_std_over_deg_list = defaultdict( list), defaultdict(list) legend_to_mean_std_num_agreed_neighbors_list = defaultdict(list) for deg, legend in deg_and_legend_to_mean_over_hp_list.keys(): mean_over_hp_list = deg_and_legend_to_mean_over_hp_list[(deg, legend)] std_over_hp_list = deg_and_legend_to_std_over_hp_list[(deg, legend)] for hp, mean_of_hp, std_of_hp in zip(homophily_list, mean_over_hp_list, std_over_hp_list): hp_and_legend = (hp, legend) hp_and_legend_to_mean_over_deg_list[hp_and_legend].append( mean_of_hp) hp_and_legend_to_std_over_deg_list[hp_and_legend].append(std_of_hp) legend_to_mean_std_num_agreed_neighbors_list[legend].append( (mean_of_hp, std_of_hp, hp * deg)) mean_perf_list = [] num_agreed_neighbors_list = [] model_legend_list = [] for legend, mean_std_num_agr_neighbors_list in legend_to_mean_std_num_agreed_neighbors_list.items( ): for mean_perf, std_perf, num_agr_neighbors in sorted( mean_std_num_agr_neighbors_list, key=lambda t: t[2]): mean_perf_list.append(mean_perf) model_legend_list.append(legend) num_agreed_neighbors_list.append(num_agr_neighbors) plot_scatter( xs=num_agreed_neighbors_list, ys=mean_perf_list, hues=model_legend_list, xlabel="Avg. Number of Agreed Neighbors", ylabel="Test Performance (Acc.)", hue_name="Model", custom_key=base_key, ) plot_line_with_std( tuple_to_mean_list=hp_and_legend_to_mean_over_deg_list, tuple_to_std_list=hp_and_legend_to_std_over_deg_list, x_label="Avg. Degree (Log10)", # Log y_label="Test Accuracy", name_label_list=["Homophily", "Model"], x_list=[to_log10(d) for d in degree_list], # Log hue="Model", style="Model", col="Homophily", aspect=0.75, hue_order=legend_list, x_lim=(None, None), custom_key=base_key, extension=extension, ) if plot_part_by_part: # manual. # deg: [2.5, 5.0, 10.0, 25.0, 50.0, 75.0, 100.0] def filtered_by_hp(hp_list, num_deg=None): return ({ (hp, legend): (mean_list if not num_deg else mean_list[:num_deg]) for (hp, legend), mean_list in hp_and_legend_to_mean_over_deg_list.items() if hp in hp_list }, {(hp, legend): (std_list if not num_deg else std_list[:num_deg]) for (hp, legend ), std_list in hp_and_legend_to_std_over_deg_list.items() if hp in hp_list}) def get_mean_diff(h_and_l_to_m_over_d_list, first_legend, x100=True): h_and_l_to_mean_diff_over_d_list = dict() for (hp, legend), mean_list in h_and_l_to_m_over_d_list.items(): if legend == first_legend: continue mean_list_of_first = h_and_l_to_m_over_d_list[(hp, first_legend)] mean_diff_list = (np.asarray(mean_list) - np.asarray(mean_list_of_first)) if x100: mean_diff_list = 100 * mean_diff_list mean_diff_list = mean_diff_list.tolist() h_and_l_to_mean_diff_over_d_list[(hp, legend)] = mean_diff_list return h_and_l_to_mean_diff_over_d_list if 0.1 in degree_list: b1, b2, b3, b4 = [0.1, 0.3, 0.5], [0.7], [0.9], [0.7, 0.9] else: b1, b2, b3, b4 = [0.2, 0.4], [0.6], [0.8], [0.6, 0.8] hp135_and_legend_to_mean_over_deg_list, hp135_and_legend_to_std_over_deg_list = filtered_by_hp( b1) hp7_and_legend_to_mean_over_deg_list, hp7_and_legend_to_std_over_deg_list = filtered_by_hp( b2) hp9_and_legend_to_mean_over_deg_list, hp9_and_legend_to_std_over_deg_list = filtered_by_hp( b3) hp79_and_legend_to_mean_over_deg_list, hp79_and_legend_to_std_over_deg_list = filtered_by_hp( b4) if draw_diff_between_first: lf = legend_list[0] hp135_and_legend_to_mean_over_deg_list = get_mean_diff( hp135_and_legend_to_mean_over_deg_list, lf) hp7_and_legend_to_mean_over_deg_list = get_mean_diff( hp7_and_legend_to_mean_over_deg_list, lf) hp79_and_legend_to_mean_over_deg_list = get_mean_diff( hp79_and_legend_to_mean_over_deg_list, lf) hp9_and_legend_to_mean_over_deg_list = get_mean_diff( hp9_and_legend_to_mean_over_deg_list, lf) hp135_and_legend_to_std_over_deg_list = None hp7_and_legend_to_std_over_deg_list = None hp79_and_legend_to_std_over_deg_list = None hp9_and_legend_to_std_over_deg_list = None legend_list = legend_list[1:] y_lim = None y_label = "Diff. of Test Acc. vs. GO (%p)" else: y_lim = None y_label = "Test Accuracy", degree_list = np.log10(degree_list).tolist() palette = ["grey", "#1976D2", "#D32F2F"] plot_line_with_std( tuple_to_mean_list=hp135_and_legend_to_mean_over_deg_list, tuple_to_std_list=hp135_and_legend_to_std_over_deg_list, x_label="Avg. Degree (Log10)", y_label=y_label, name_label_list=["Homophily", "Model"], x_list=degree_list, hue="Model", style="Model", col="Homophily", aspect=0.9, hue_order=legend_list, legend=False, x_lim=(0, None), y_lim=y_lim, palette=palette, custom_key=base_key + "_part135", extension=extension, ) plot_line_with_std( tuple_to_mean_list=hp79_and_legend_to_mean_over_deg_list, tuple_to_std_list=hp79_and_legend_to_std_over_deg_list, x_label="Avg. Degree (Log10)", y_label="Test Accuracy", name_label_list=["Homophily", "Model"], x_list=degree_list, hue="Model", style="Model", col="Homophily", aspect=0.9, hue_order=legend_list, legend="full", x_lim=(0, None), y_lim=y_lim, use_ylabel=False, palette=palette, custom_key=base_key + "_part79", extension=extension, ) plot_line_with_std( tuple_to_mean_list=hp7_and_legend_to_mean_over_deg_list, tuple_to_std_list=hp7_and_legend_to_std_over_deg_list, x_label="Avg. Degree (Log10)", y_label="Test Accuracy", name_label_list=["Homophily", "Model"], x_list=degree_list, hue="Model", style="Model", col="Homophily", aspect=1.0, hue_order=legend_list, legend=False, x_lim=(0, None), y_lim=y_lim, use_ylabel=False, palette=palette, custom_key=base_key + "_part7", extension=extension, ) plot_line_with_std( tuple_to_mean_list=hp9_and_legend_to_mean_over_deg_list, tuple_to_std_list=hp9_and_legend_to_std_over_deg_list, x_label="Avg. Degree (Log10)", y_label="Test Accuracy", name_label_list=["Homophily", "Model"], x_list=degree_list, hue="Model", style="Model", col="Homophily", aspect=1.0, hue_order=legend_list, legend="full", x_lim=(0, None), y_lim=y_lim, use_ylabel=False, palette=palette, custom_key=base_key + "_part9", extension=extension, )