Esempio n. 1
0
def visualize_glayout_without_training(layout="tsne", **kwargs):
    _args = get_args(**kwargs)
    pprint_args(_args)
    train_d, val_d, test_d = get_dataset_or_loader(
        _args.dataset_class,
        _args.dataset_name,
        _args.data_root,
        batch_size=_args.batch_size,
        seed=_args.seed,
    )
    data = train_d[0]
    plot_graph_layout(data.x.numpy(),
                      data.y.numpy(),
                      data.edge_index.numpy(),
                      args=_args,
                      edge_to_attention=None,
                      key="raw",
                      layout=layout)
Esempio n. 2
0
def get_model_and_preds(data, **kwargs):
    _args = get_args(**kwargs)
    _args.verbose = 1
    _args.save_model = False
    _args.epochs = 300
    pprint_args(_args)

    _alloc_gpu = blind_other_gpus(num_gpus_total=_args.num_gpus_total,
                                  num_gpus_to_use=_args.num_gpus_to_use,
                                  gpu_deny_list=_args.gpu_deny_list)
    if _alloc_gpu:
        cprint("Use GPU the ID of which is {}".format(_alloc_gpu), "yellow")
    _alloc_gpu_id = _alloc_gpu[0] if _alloc_gpu else 1

    model, ret = run(_args, gpu_id=_alloc_gpu_id, return_model=True)

    model = model.to("cpu")
    model.eval()
    with torch.no_grad():
        output = model(data.x, data.edge_index)[data.test_mask].cpu().numpy()
        pred_labels = np.argmax(output, axis=1)
    return model, pred_labels
Esempio n. 3
0
def visualize_attention_metric_for_multiple_models(
        name_prefix_and_kwargs: List[Tuple[str, Dict]],
        unit_width_per_name=3,
        extension="png"):
    res = None
    total_args, num_layers, custom_key_list, name_prefix_list = None, None, [], []
    kld1_list, kld2_list, jsd_list, ent_list = [], [], [], []  # [L * M, N]
    for name_prefix, kwargs in name_prefix_and_kwargs:
        args = get_args(**kwargs)
        custom_key_list.append(args.custom_key)
        num_layers = args.num_layers

        train_d, val_d, test_d = get_dataset_or_loader(
            args.dataset_class,
            args.dataset_name,
            args.data_root,
            batch_size=args.batch_size,
            seed=args.seed,
        )
        if val_d is None and test_d is None:
            data_list = [train_d[0]]
        else:
            data_list = []
            for _data in chain(train_d, val_d, test_d):
                if _data.x.size(0) != len(_data.agreement_dist):
                    _data.agreement_dist = [
                        _ad for _ad in _data.agreement_dist[0]
                    ]
                    _data.uniform_att_dist = [
                        _uad for _uad in _data.uniform_att_dist[0]
                    ]
                data_list.append(_data)

        gpu_id = [
            int(
                np.random.choice([
                    g for g in range(args.num_gpus_total)
                    if g not in args.gpu_deny_list
                ], 1))
        ][0]

        if args.verbose >= 1:
            pprint_args(args)
            cprint("Use GPU the ID of which is {}".format(gpu_id), "yellow")

        device = "cpu" if gpu_id is None \
            else torch.device('cuda:{}'.format(gpu_id) if torch.cuda.is_available() else 'cpu')

        model, ret = run(args, gpu_id=gpu_id, return_model=True)

        kld1_layer, kld2_layer, jsd_layer, ent_layer, *res = \
            get_attention_metric_for_single_model_and_multiple_data(model, data_list, device)
        kld1_list += kld1_layer
        kld2_list += kld2_layer
        jsd_list += jsd_layer
        ent_list += ent_layer
        name_prefix_list.append(name_prefix)
        total_args = args

        torch.cuda.empty_cache()

    total_args.custom_key = "-".join(sorted(custom_key_list))
    plot_kld_jsd_ent(kld1_list,
                     kld2_list,
                     jsd_list,
                     ent_list,
                     *res,
                     num_layers=num_layers,
                     model_args=total_args,
                     epoch=-1,
                     name_prefix_list=name_prefix_list,
                     unit_width_per_name=unit_width_per_name,
                     extension=extension,
                     flierprops={
                         "marker": "x",
                         "markersize": 12
                     })
Esempio n. 4
0
        for rk, rv in ret.items():
            results[rk].append(rv)
    return results


if __name__ == '__main__':

    num_total_runs = 7

    main_args = get_args(
        model_name="GAT",
        dataset_class="PygNodePropPredDataset",
        dataset_name="ogbn-arxiv",  # ogbn-products, ogbn-arxiv
        custom_key="EV13NS",
    )
    pprint_args(main_args)

    if len(main_args.gpu_deny_list) == main_args.num_gpus_total:
        alloc_gpu = [None]
        cprint("Use CPU", "yellow")
    else:
        alloc_gpu = blind_other_gpus(num_gpus_total=main_args.num_gpus_total,
                                     num_gpus_to_use=main_args.num_gpus_to_use,
                                     gpu_deny_list=main_args.gpu_deny_list)
        if not alloc_gpu:
            alloc_gpu = [
                int(
                    np.random.choice([
                        g for g in range(main_args.num_gpus_total)
                        if g not in main_args.gpu_deny_list
                    ], 1))
Esempio n. 5
0
def analyze_rpg_by_degree_and_homophily(degree_list: List[float],
                                        homophily_list: List[float],
                                        legend_list: List[str],
                                        model_list: List[str],
                                        custom_key_list: List[str],
                                        att_lambda_list: List[float],
                                        l2_lambda_list: List[float],
                                        num_total_runs: int,
                                        num_nodes_per_class: int = 500,
                                        num_classes: int = 10,
                                        verbose=2,
                                        is_test=False,
                                        plot_part_by_part=False,
                                        draw_plot=True,
                                        draw_diff_between_first=False,
                                        extension="pdf"):
    def to_log10(v, eps=1e-5):
        return float(np.log10(v + eps))

    base_key = "analysis_rpg" + ("" if not is_test else "_test")
    base_path = os.path.join("../figs", base_key)

    best_meta_dict = defaultdict(dict)

    deg_and_legend_to_mean_over_hp_list, deg_and_legend_to_std_over_hp_list = {}, {}

    for deg in degree_list:

        avg_deg_ratio = deg / num_nodes_per_class

        for legend, model, key in zip(legend_list, model_list,
                                      custom_key_list):

            base_kwargs = {
                "model_name": model,
                "dataset_class": "RandomPartitionGraph",
                "dataset_name": f"rpg-{num_classes}-{num_nodes_per_class}-h-d",
                "custom_key": key,
            }
            args = get_args(**base_kwargs)
            args.verbose = verbose
            deg_and_legend = (deg, legend)

            if is_test:
                args.epochs = 2

            mean_over_hp_list, std_over_hp_list = [], []
            for hp in homophily_list:

                args.dataset_name = f"rpg-{num_classes}-{num_nodes_per_class}-{hp}-{avg_deg_ratio}"
                model_key, model_path = _get_key_and_makedirs(
                    args=args, base_path=base_path, args_prefix=legend)

                max_mean_perf = -1

                for att_lambda in att_lambda_list:
                    for l2_lambda in l2_lambda_list:
                        args.att_lambda = att_lambda
                        args.l2_lambda = l2_lambda
                        pprint_args(args)

                        result_key = (att_lambda, l2_lambda)
                        result_path = os.path.join(
                            model_path,
                            "ms_result_{}.pkl".format(s_join("-", result_key)))

                        try:
                            many_seeds_result = pickle.load(
                                open(result_path, "rb"))
                            cprint("Load: {}".format(result_path), "blue")

                        except FileNotFoundError:
                            many_seeds_result = run_with_many_seeds_with_gpu(
                                args, num_total_runs)
                            with open(result_path, "wb") as f:
                                pickle.dump(many_seeds_result, f)
                                cprint("Dump: {}".format(result_path), "green")
                                garbage_collection_cuda()
                                cprint("Garbage collected", "green")

                        cur_mean_perf = float(
                            np.mean(
                                many_seeds_result["test_perf_at_best_val"]))
                        cur_std_perf = float(
                            np.std(many_seeds_result["test_perf_at_best_val"]))
                        if cur_mean_perf > max_mean_perf:
                            max_mean_perf = cur_mean_perf
                            best_meta_dict[model_key][
                                "mean_perf"] = cur_mean_perf
                            best_meta_dict[model_key][
                                "std_perf"] = cur_std_perf
                            best_meta_dict[model_key][
                                "att_lambda"] = att_lambda
                            best_meta_dict[model_key]["l2_lambda"] = l2_lambda
                            best_meta_dict[model_key][
                                "many_seeds_result"] = many_seeds_result

                    if not args.is_super_gat:
                        break

                mean_over_hp_list.append(
                    best_meta_dict[model_key]["mean_perf"])
                std_over_hp_list.append(best_meta_dict[model_key]["std_perf"])

            deg_and_legend_to_mean_over_hp_list[
                deg_and_legend] = mean_over_hp_list
            deg_and_legend_to_std_over_hp_list[
                deg_and_legend] = std_over_hp_list

    pprint(deg_and_legend_to_mean_over_hp_list)

    if not draw_plot:
        return

    plot_line_with_std(
        tuple_to_mean_list=
        deg_and_legend_to_mean_over_hp_list,  # (deg, legend) -> List[perf] by homophily
        tuple_to_std_list=deg_and_legend_to_std_over_hp_list,
        x_label="Homophily",
        y_label="Test Accuracy",
        name_label_list=["Avg. Degree", "Model"],
        x_list=homophily_list,
        hue="Model",
        style="Model",
        col="Avg. Degree",
        hue_order=legend_list,
        x_lim=(0, None),
        custom_key=base_key,
        extension=extension,
    )

    hp_and_legend_to_mean_over_deg_list, hp_and_legend_to_std_over_deg_list = defaultdict(
        list), defaultdict(list)
    legend_to_mean_std_num_agreed_neighbors_list = defaultdict(list)

    for deg, legend in deg_and_legend_to_mean_over_hp_list.keys():
        mean_over_hp_list = deg_and_legend_to_mean_over_hp_list[(deg, legend)]
        std_over_hp_list = deg_and_legend_to_std_over_hp_list[(deg, legend)]
        for hp, mean_of_hp, std_of_hp in zip(homophily_list, mean_over_hp_list,
                                             std_over_hp_list):
            hp_and_legend = (hp, legend)
            hp_and_legend_to_mean_over_deg_list[hp_and_legend].append(
                mean_of_hp)
            hp_and_legend_to_std_over_deg_list[hp_and_legend].append(std_of_hp)

            legend_to_mean_std_num_agreed_neighbors_list[legend].append(
                (mean_of_hp, std_of_hp, hp * deg))

    mean_perf_list = []
    num_agreed_neighbors_list = []
    model_legend_list = []
    for legend, mean_std_num_agr_neighbors_list in legend_to_mean_std_num_agreed_neighbors_list.items(
    ):
        for mean_perf, std_perf, num_agr_neighbors in sorted(
                mean_std_num_agr_neighbors_list, key=lambda t: t[2]):
            mean_perf_list.append(mean_perf)
            model_legend_list.append(legend)
            num_agreed_neighbors_list.append(num_agr_neighbors)

    plot_scatter(
        xs=num_agreed_neighbors_list,
        ys=mean_perf_list,
        hues=model_legend_list,
        xlabel="Avg. Number of Agreed Neighbors",
        ylabel="Test Performance (Acc.)",
        hue_name="Model",
        custom_key=base_key,
    )

    plot_line_with_std(
        tuple_to_mean_list=hp_and_legend_to_mean_over_deg_list,
        tuple_to_std_list=hp_and_legend_to_std_over_deg_list,
        x_label="Avg. Degree (Log10)",  # Log
        y_label="Test Accuracy",
        name_label_list=["Homophily", "Model"],
        x_list=[to_log10(d) for d in degree_list],  # Log
        hue="Model",
        style="Model",
        col="Homophily",
        aspect=0.75,
        hue_order=legend_list,
        x_lim=(None, None),
        custom_key=base_key,
        extension=extension,
    )

    if plot_part_by_part:  # manual.

        # deg: [2.5, 5.0, 10.0, 25.0, 50.0, 75.0, 100.0]
        def filtered_by_hp(hp_list, num_deg=None):
            return ({
                (hp, legend):
                (mean_list if not num_deg else mean_list[:num_deg])
                for (hp, legend), mean_list in
                hp_and_legend_to_mean_over_deg_list.items() if hp in hp_list
            }, {(hp, legend): (std_list if not num_deg else std_list[:num_deg])
                for (hp, legend
                     ), std_list in hp_and_legend_to_std_over_deg_list.items()
                if hp in hp_list})

        def get_mean_diff(h_and_l_to_m_over_d_list, first_legend, x100=True):
            h_and_l_to_mean_diff_over_d_list = dict()
            for (hp, legend), mean_list in h_and_l_to_m_over_d_list.items():
                if legend == first_legend:
                    continue
                mean_list_of_first = h_and_l_to_m_over_d_list[(hp,
                                                               first_legend)]
                mean_diff_list = (np.asarray(mean_list) -
                                  np.asarray(mean_list_of_first))
                if x100:
                    mean_diff_list = 100 * mean_diff_list
                mean_diff_list = mean_diff_list.tolist()
                h_and_l_to_mean_diff_over_d_list[(hp, legend)] = mean_diff_list
            return h_and_l_to_mean_diff_over_d_list

        if 0.1 in degree_list:
            b1, b2, b3, b4 = [0.1, 0.3, 0.5], [0.7], [0.9], [0.7, 0.9]
        else:
            b1, b2, b3, b4 = [0.2, 0.4], [0.6], [0.8], [0.6, 0.8]

        hp135_and_legend_to_mean_over_deg_list, hp135_and_legend_to_std_over_deg_list = filtered_by_hp(
            b1)
        hp7_and_legend_to_mean_over_deg_list, hp7_and_legend_to_std_over_deg_list = filtered_by_hp(
            b2)
        hp9_and_legend_to_mean_over_deg_list, hp9_and_legend_to_std_over_deg_list = filtered_by_hp(
            b3)
        hp79_and_legend_to_mean_over_deg_list, hp79_and_legend_to_std_over_deg_list = filtered_by_hp(
            b4)

        if draw_diff_between_first:
            lf = legend_list[0]
            hp135_and_legend_to_mean_over_deg_list = get_mean_diff(
                hp135_and_legend_to_mean_over_deg_list, lf)
            hp7_and_legend_to_mean_over_deg_list = get_mean_diff(
                hp7_and_legend_to_mean_over_deg_list, lf)
            hp79_and_legend_to_mean_over_deg_list = get_mean_diff(
                hp79_and_legend_to_mean_over_deg_list, lf)
            hp9_and_legend_to_mean_over_deg_list = get_mean_diff(
                hp9_and_legend_to_mean_over_deg_list, lf)
            hp135_and_legend_to_std_over_deg_list = None
            hp7_and_legend_to_std_over_deg_list = None
            hp79_and_legend_to_std_over_deg_list = None
            hp9_and_legend_to_std_over_deg_list = None
            legend_list = legend_list[1:]
            y_lim = None
            y_label = "Diff. of Test Acc. vs. GO (%p)"
        else:
            y_lim = None
            y_label = "Test Accuracy",

        degree_list = np.log10(degree_list).tolist()

        palette = ["grey", "#1976D2", "#D32F2F"]

        plot_line_with_std(
            tuple_to_mean_list=hp135_and_legend_to_mean_over_deg_list,
            tuple_to_std_list=hp135_and_legend_to_std_over_deg_list,
            x_label="Avg. Degree (Log10)",
            y_label=y_label,
            name_label_list=["Homophily", "Model"],
            x_list=degree_list,
            hue="Model",
            style="Model",
            col="Homophily",
            aspect=0.9,
            hue_order=legend_list,
            legend=False,
            x_lim=(0, None),
            y_lim=y_lim,
            palette=palette,
            custom_key=base_key + "_part135",
            extension=extension,
        )
        plot_line_with_std(
            tuple_to_mean_list=hp79_and_legend_to_mean_over_deg_list,
            tuple_to_std_list=hp79_and_legend_to_std_over_deg_list,
            x_label="Avg. Degree (Log10)",
            y_label="Test Accuracy",
            name_label_list=["Homophily", "Model"],
            x_list=degree_list,
            hue="Model",
            style="Model",
            col="Homophily",
            aspect=0.9,
            hue_order=legend_list,
            legend="full",
            x_lim=(0, None),
            y_lim=y_lim,
            use_ylabel=False,
            palette=palette,
            custom_key=base_key + "_part79",
            extension=extension,
        )
        plot_line_with_std(
            tuple_to_mean_list=hp7_and_legend_to_mean_over_deg_list,
            tuple_to_std_list=hp7_and_legend_to_std_over_deg_list,
            x_label="Avg. Degree (Log10)",
            y_label="Test Accuracy",
            name_label_list=["Homophily", "Model"],
            x_list=degree_list,
            hue="Model",
            style="Model",
            col="Homophily",
            aspect=1.0,
            hue_order=legend_list,
            legend=False,
            x_lim=(0, None),
            y_lim=y_lim,
            use_ylabel=False,
            palette=palette,
            custom_key=base_key + "_part7",
            extension=extension,
        )
        plot_line_with_std(
            tuple_to_mean_list=hp9_and_legend_to_mean_over_deg_list,
            tuple_to_std_list=hp9_and_legend_to_std_over_deg_list,
            x_label="Avg. Degree (Log10)",
            y_label="Test Accuracy",
            name_label_list=["Homophily", "Model"],
            x_list=degree_list,
            hue="Model",
            style="Model",
            col="Homophily",
            aspect=1.0,
            hue_order=legend_list,
            legend="full",
            x_lim=(0, None),
            y_lim=y_lim,
            use_ylabel=False,
            palette=palette,
            custom_key=base_key + "_part9",
            extension=extension,
        )