def create_H(args): source_dataset = Dataset(args.source_dataset) target_dataset = Dataset(args.target_dataset) src_degrees = np.array(source_dataset.get_nodes_degrees()) trg_degrees = np.array(target_dataset.get_nodes_degrees()) # import pdb # pdb.set_trace() # src_degrees = src_degrees/src_degrees.max() # trg_degrees = trg_degrees/trg_degrees.max() # # distance_matrix = np.zeros((len(src_degrees), len(trg_degrees))) # for src_idx, src_deg in enumerate(src_degrees): # for trg_idx, trg_deg in enumerate(trg_degrees): # distance_matrix[src_idx,trg_idx] = np.abs(src_deg-trg_deg) # max_distance = distance_matrix.max() # H = 1-distance_matrix/max_distance # H = H.T H = np.zeros((len(trg_degrees), len(src_degrees))) for i in range(H.shape[0]): H[i, :] = np.abs(trg_degrees[i] - src_degrees) / max( [src_degrees.max(), trg_degrees[i]]) H = H / H.sum() # H = np.zeros((len(trg_degrees),len(src_degrees))) # for i, trg_deg in enumerate(trg_degrees): # for j, src_deg in enumerate(src_degrees): # H[i,j]=1-min([src_deg,trg_deg])/max([src_deg,trg_deg]) # idxs_trg = np.random.choice(H.shape[0],2000000,replace=True) # idxs_src = np.random.choice(H.shape[1],2000000,replace=True) # H[idxs_trg,idxs_src]=0 print("H shape: ", H.shape) if not os.path.exists(args.out_dir): os.makedirs(args.out_dir) np.save(args.out_dir + "/H2.npy", H) print("H has been saved to ", args.out_dir)
file.write(json.dumps(res)) if __name__ == "__main__": args = parse_args() source_dataset = Dataset(args.source_dataset) target_dataset = Dataset(args.target_dataset) groundtruth = graph_utils.load_gt(args.groundtruth, source_dataset.id2idx, target_dataset.id2idx, 'dict') source_groundtruth_nodes = list(groundtruth.keys()) target_groundtruth_nodes = list(groundtruth.values()) source_idx2id = {v: k for k, v in source_dataset.id2idx.items()} target_idx2id = {v: k for k, v in target_dataset.id2idx.items()} source_gt_id = [source_idx2id[node] for node in source_groundtruth_nodes] target_gt_id = [target_idx2id[node] for node in target_groundtruth_nodes] source_care_deg = source_dataset.get_nodes_degrees( )[source_groundtruth_nodes] target_care_deg = target_dataset.get_nodes_degrees( )[target_groundtruth_nodes] print("Number of nodes in groundtruth: {}".format(len(groundtruth))) print("Num source nodes: {}".format(len(source_dataset.G.nodes()))) print("Num target nodes: {}".format(len(target_dataset.G.nodes()))) # print("Source care deg: {}".format(source_care_deg)) # print("Source care deg: {}".format(target_care_deg)) G1 = filter_nodes(source_dataset.G, source_gt_id, 1) G2 = filter_nodes(target_dataset.G, target_gt_id, 1) # save G source_node_left = G1.nodes() target_node_left = G2.nodes() # save id2idx source_id2idx = {