Example #1
0
def create_H(args):
    source_dataset = Dataset(args.source_dataset)
    target_dataset = Dataset(args.target_dataset)
    src_degrees = np.array(source_dataset.get_nodes_degrees())
    trg_degrees = np.array(target_dataset.get_nodes_degrees())
    # import pdb
    # pdb.set_trace()

    # src_degrees = src_degrees/src_degrees.max()
    # trg_degrees = trg_degrees/trg_degrees.max()
    #
    # distance_matrix = np.zeros((len(src_degrees), len(trg_degrees)))
    # for src_idx, src_deg in enumerate(src_degrees):
    #     for trg_idx, trg_deg in enumerate(trg_degrees):
    #         distance_matrix[src_idx,trg_idx] = np.abs(src_deg-trg_deg)
    # max_distance = distance_matrix.max()
    # H = 1-distance_matrix/max_distance
    # H = H.T

    H = np.zeros((len(trg_degrees), len(src_degrees)))
    for i in range(H.shape[0]):
        H[i, :] = np.abs(trg_degrees[i] - src_degrees) / max(
            [src_degrees.max(), trg_degrees[i]])
    H = H / H.sum()

    # H = np.zeros((len(trg_degrees),len(src_degrees)))
    # for i, trg_deg in enumerate(trg_degrees):
    #     for j, src_deg in enumerate(src_degrees):
    #         H[i,j]=1-min([src_deg,trg_deg])/max([src_deg,trg_deg])
    # idxs_trg = np.random.choice(H.shape[0],2000000,replace=True)
    # idxs_src = np.random.choice(H.shape[1],2000000,replace=True)
    # H[idxs_trg,idxs_src]=0

    print("H shape: ", H.shape)
    if not os.path.exists(args.out_dir):
        os.makedirs(args.out_dir)
    np.save(args.out_dir + "/H2.npy", H)
    print("H has been saved to ", args.out_dir)
        file.write(json.dumps(res))


if __name__ == "__main__":
    args = parse_args()
    source_dataset = Dataset(args.source_dataset)
    target_dataset = Dataset(args.target_dataset)
    groundtruth = graph_utils.load_gt(args.groundtruth, source_dataset.id2idx,
                                      target_dataset.id2idx, 'dict')
    source_groundtruth_nodes = list(groundtruth.keys())
    target_groundtruth_nodes = list(groundtruth.values())
    source_idx2id = {v: k for k, v in source_dataset.id2idx.items()}
    target_idx2id = {v: k for k, v in target_dataset.id2idx.items()}
    source_gt_id = [source_idx2id[node] for node in source_groundtruth_nodes]
    target_gt_id = [target_idx2id[node] for node in target_groundtruth_nodes]
    source_care_deg = source_dataset.get_nodes_degrees(
    )[source_groundtruth_nodes]
    target_care_deg = target_dataset.get_nodes_degrees(
    )[target_groundtruth_nodes]

    print("Number of nodes in groundtruth: {}".format(len(groundtruth)))
    print("Num source nodes: {}".format(len(source_dataset.G.nodes())))
    print("Num target nodes: {}".format(len(target_dataset.G.nodes())))
    # print("Source care deg: {}".format(source_care_deg))
    # print("Source care deg: {}".format(target_care_deg))
    G1 = filter_nodes(source_dataset.G, source_gt_id, 1)
    G2 = filter_nodes(target_dataset.G, target_gt_id, 1)
    # save G
    source_node_left = G1.nodes()
    target_node_left = G2.nodes()
    # save id2idx
    source_id2idx = {