def process(self): data_list = [] indices_val = list(range(225011, 249456)) dp.get_dataset("ZINC_val", regression=True) node_labels = pre.get_all_node_labels("ZINC_full", True, True) targets = pre.read_targets("ZINC_val", list(range(0, 24445))) node_labels_1 = node_labels[225011:249456] matrices = pre.get_all_matrices_wl("ZINC_val", list(range(0, 24445))) targets_1 = targets for i, m in enumerate(matrices): edge_index_1 = torch.tensor(matrices[i][0]).t().contiguous() edge_index_2 = torch.tensor(matrices[i][1]).t().contiguous() data = Data() data.edge_index_1 = edge_index_1 data.edge_index_2 = edge_index_2 data.x = torch.from_numpy(np.array(node_labels_1[i])).to( torch.float) data.y = data.y = torch.from_numpy(np.array([targets_1[i] ])).to(torch.float) data_list.append(data) data, slices = self.collate(data_list) torch.save((data, slices), self.processed_paths[0])
def process(self): data_list = [] dp.get_dataset("ZINC_test", regression=True) # TODO Change this node_labels = pre.get_all_node_labels("ZINC_full", True, True) targets = pre.read_targets("ZINC_test", list(range(0, 5000))) node_labels_1 = node_labels[220011:225011] matrices = pre.get_all_matrices_wl("ZINC_test", list(range(0, 5000))) targets_1 = targets for i, m in enumerate(matrices): edge_index_1 = torch.tensor(matrices[i][0]).t().contiguous() edge_index_2 = torch.tensor(matrices[i][1]).t().contiguous() data = Data() data.edge_index_1 = edge_index_1 data.edge_index_2 = edge_index_2 # one_hot = np.eye(492)[node_labels[i]] data.x = torch.from_numpy(np.array(node_labels_1[i])).to( torch.float) data.y = data.y = torch.from_numpy(np.array([targets_1[i] ])).to(torch.float) data_list.append(data) data, slices = self.collate(data_list) torch.save((data, slices), self.processed_paths[0])
def process(self): data_list = [] indices_train = [] indices_val = [] indices_test = [] infile = open("test_al_50.index", "r") for line in infile: indices_test = line.split(",") indices_test = [int(i) for i in indices_test] infile = open("val_al_50.index", "r") for line in infile: indices_val = line.split(",") indices_val = [int(i) for i in indices_val] infile = open("train_al_50.index", "r") for line in infile: indices_train = line.split(",") indices_train = [int(i) for i in indices_train] targets = dp.get_dataset("alchemy_full", multigregression=True) tmp1 = targets[indices_train].tolist() tmp2 = targets[indices_val].tolist() tmp3 = targets[indices_test].tolist() targets = tmp1 targets.extend(tmp2) targets.extend(tmp3) node_labels = pre.get_all_node_labels_alchem_1(True, True, indices_train, indices_val, indices_test) edge_labels = pre.get_all_edge_labels_alchem_1(True, True, indices_train, indices_val, indices_test) matrices = pre.get_all_matrices_1("alchemy_full", indices_train) matrices.extend(pre.get_all_matrices_1("alchemy_full", indices_val)) matrices.extend(pre.get_all_matrices_1("alchemy_full", indices_test)) for i, m in enumerate(matrices): data = Data() data.edge_index = torch.tensor(matrices[i]).t().contiguous() one_hot = np.eye(6)[node_labels[i]] data.x = torch.from_numpy(one_hot).to(torch.float) one_hot = np.eye(4)[edge_labels[i]] data.edge_attr = torch.from_numpy(one_hot).to(torch.float) data.y = torch.from_numpy(np.array([targets[i]])).to(torch.float) print(data.y.size()) data_list.append(data) data, slices = self.collate(data_list) torch.save((data, slices), self.processed_paths[0])
def process(self): data_list = [] targets = dp.get_dataset("alchemy_full", multigregression=True).tolist() node_labels = pre.get_all_node_labels("alchemy_full", True, True) matrices = pre.get_all_matrices("alchemy_full", list(range(202579))) for i, m in enumerate(matrices): edge_index_1 = torch.tensor(matrices[i][0]).t().contiguous() edge_index_2 = torch.tensor(matrices[i][1]).t().contiguous() data = Data() data.edge_index_1 = edge_index_1 data.edge_index_2 = edge_index_2 one_hot = np.eye(83)[node_labels[i]] data.x = torch.from_numpy(one_hot).to(torch.float) data.y = data.y = torch.from_numpy(np.array([targets[i] ])).to(torch.float) data_list.append(data) data, slices = self.collate(data_list) torch.save((data, slices), self.processed_paths[0])
def process(self): data_list = [] targets = dp.get_dataset("ZINC_full", regression=True) targets = targets.tolist() node_labels = pre.get_all_node_labels_1("ZINC_full", True) edge_labels = pre.get_all_edge_labels_1("ZINC_full") matrices = pre.get_all_matrices_1("ZINC_full", list(range(0, 249456))) for i, m in enumerate(matrices): data = Data() data.edge_index = torch.tensor(matrices[i]).t().contiguous() one_hot = np.eye(28)[node_labels[i]] data.x = torch.from_numpy(one_hot).to(torch.float) one_hot = np.eye(3)[edge_labels[i]] data.edge_attr = torch.from_numpy(one_hot).to(torch.float) data.y = torch.from_numpy(np.array([targets[i]])).to(torch.float) data_list.append(data) data, slices = self.collate(data_list) torch.save((data, slices), self.processed_paths[0])
def process(self): data_list = [] indices_train = [] indices_val = [] indices_test = [] infile = open("train.index.txt", "r") for line in infile: indices_train = line.split(",") indices_train = [int(i) for i in indices_train] infile = open("val.index.txt", "r") for line in infile: indices_val = line.split(",") indices_val = [int(i) for i in indices_val] infile = open("test.index.txt", "r") for line in infile: indices_test = line.split(",") indices_test = [int(i) for i in indices_test] dp.get_dataset("ZINC_train", regression=True) dp.get_dataset("ZINC_test", regression=True) dp.get_dataset("ZINC_val", regression=True) node_labels = pre.get_all_node_labels_ZINC(True, True, indices_train, indices_val, indices_test) targets = pre.read_targets("ZINC_train", indices_train) targets.extend(pre.read_targets("ZINC_val", indices_val)) targets.extend(pre.read_targets("ZINC_test", indices_test)) matrices = pre.get_all_matrices_dwl("ZINC_train", indices_train) matrices.extend(pre.get_all_matrices_dwl("ZINC_val", indices_val)) matrices.extend(pre.get_all_matrices_dwl("ZINC_test", indices_test)) for i, m in enumerate(matrices): edge_index_1_l = torch.tensor(matrices[i][0]).t().contiguous() edge_index_1_g = torch.tensor(matrices[i][1]).t().contiguous() edge_index_2_l = torch.tensor(matrices[i][2]).t().contiguous() edge_index_2_g = torch.tensor(matrices[i][3]).t().contiguous() data = Data() data.edge_index_1_l = edge_index_1_l data.edge_index_1_g = edge_index_1_g data.edge_index_2_l = edge_index_2_l data.edge_index_2_g = edge_index_2_g one_hot = np.eye(445)[node_labels[i]] data.x = torch.from_numpy(one_hot).to(torch.float) data.y = data.y = torch.from_numpy(np.array([targets[i] ])).to(torch.float) data_list.append(data) data, slices = self.collate(data_list) torch.save((data, slices), self.processed_paths[0])
def process(self): data_list = [] indices_train = [] indices_val = [] indices_test = [] indices_train = [] indices_val = [] infile = open("train_50.index.txt", "r") for line in infile: indices_train = line.split(",") indices_train = [int(i) for i in indices_train] infile = open("val_50.index.txt", "r") for line in infile: indices_val = line.split(",") indices_val = [int(i) for i in indices_val] indices_test = list(range(0, 5000)) dp.get_dataset("ZINC_train") dp.get_dataset("ZINC_test") dp.get_dataset("ZINC_val") node_labels = pre.get_all_node_labels_ZINC_connected( True, True, indices_train, indices_val, indices_test) targets = pre.read_targets("ZINC_train", indices_train) targets.extend(pre.read_targets("ZINC_val", indices_val)) targets.extend(pre.read_targets("ZINC_test", indices_test)) matrices = pre.get_all_matrices_connected("ZINC_train", indices_train) matrices.extend(pre.get_all_matrices_connected("ZINC_val", indices_val)) matrices.extend( pre.get_all_matrices_connected("ZINC_test", indices_test)) for i, m in enumerate(matrices): edge_index_1 = torch.tensor(matrices[i][0]).t().contiguous().long() edge_index_2 = torch.tensor(matrices[i][1]).t().contiguous().long() data = Data() data.edge_index_1 = edge_index_1 data.edge_index_2 = edge_index_2 one_hot = np.eye(445)[node_labels[i]] data.x = torch.from_numpy(one_hot).to(torch.float) data.y = data.y = torch.from_numpy(np.array([targets[i] ])).to(torch.float) data_list.append(data) data, slices = self.collate(data_list) torch.save((data, slices), self.processed_paths[0])
def run(with_install=True): if with_install: install_dependencies() base_path = os.path.join("kernels", "node_labels") ds_name = "ENZYMES" classes = dp.get_dataset(ds_name) G = tud_to_networkx(ds_name) print(f"Number of graphs in data set is {len(G)}") print(f"Number of classes {len(set(classes.tolist()))}") labels = get_labels(G) graph_dict = get_graph_dict(G, classes) print_graph_information(graph_dict) visualize(graph_dict[6][7]) graph_dict[6][7].number_of_nodes() data = load_data() eval_wl(data, classes) max_nodes = max(map(lambda x: x.number_of_nodes(), G)) histograms = csr_matrix((len(G), max_nodes)) for i, g in enumerate(G): for n, d in g.degree(): histograms[i, n] = d histogram_gram = histograms @ histograms.T centrality = csr_matrix((len(G), max_nodes)) for i, g in enumerate(G): for n, d in nx.degree_centrality(g).items(): centrality[i, n] = d centrality_gram = centrality @ centrality.T val = data["vectors"]["wl"][2].T.dot(histograms) val = data["vectors"]["wl"][2].T.dot(histograms) print(val.shape) normalized = [aux.normalize_feature_vector(val)] print(normalized[0].shape) print( ke.linear_svm_evaluation(normalized, classes, num_repetitions=10, all_std=True))
def eval_all(data): """Evaluates the kernels on the data. Args: data ([type]): [description] Returns: [type]: [description] """ classes = dp.get_dataset('ENZYMES') result = {} for data_type in data.keys(): mode = 'LINEAR' if data_type == 'vectors' else 'KERNEL' result[data_type] = {} print('MODE:', mode) for kernel in data[data_type]: print(f'\nEvaluating {kernel} SVM...') result[data_type][kernel] = eval_kernel(data[data_type][kernel], classes, mode) print(f'{data_type}-{kernel} : {result[data_type][kernel]}') return result
def process(self): data_list = [] targets = dp.get_dataset("QM9", multigregression=True).tolist() attributes = pre.get_all_attributes("QM9") node_labels = pre.get_all_node_labels("QM9", False, False) matrices = pre.get_all_matrices("QM9", list(range(129433))) for i, m in enumerate(matrices): edge_index_1 = torch.tensor(matrices[i][0]).t().contiguous() edge_index_2 = torch.tensor(matrices[i][1]).t().contiguous() data = Data() data.edge_index_1 = edge_index_1 data.edge_index_2 = edge_index_2 one_hot = np.eye(3)[node_labels[i]] data.x = torch.from_numpy(one_hot).to(torch.float) # Continuous information. data.first = torch.from_numpy(np.array( attributes[i][0])[:, 0:13]).to(torch.float) data.first_coord = torch.from_numpy( np.array(attributes[i][0])[:, 13:]).to(torch.float) data.second = torch.from_numpy( np.array(attributes[i][1])[:, 0:13]).to(torch.float) data.second_coord = torch.from_numpy( np.array(attributes[i][1])[:, 13:]).to(torch.float) data.dist = torch.norm(data.first_coord - data.second_coord, p=2, dim=-1).view(-1, 1) data.edge_attr = torch.from_numpy(np.array(attributes[i][2])).to( torch.float) data.y = torch.from_numpy(np.array([targets[i]])).to(torch.float) data_list.append(data) data, slices = self.collate(data_list) torch.save((data, slices), self.processed_paths[0])
def process(self): data_list = [] indices_train = list(range(0, 220011)) indices_val = list(range(0, 24445)) indices_test = list(range(0, 5000)) dp.get_dataset("ZINC_train", regression=True) dp.get_dataset("ZINC_test", regression=True) dp.get_dataset("ZINC_val", regression=True) node_labels = pre.get_all_node_labels_ZINC(True, True, indices_train, indices_val, indices_test) targets = pre.read_targets("ZINC_train", indices_train) targets.extend(pre.read_targets("ZINC_val", indices_val)) targets.extend(pre.read_targets("ZINC_test", indices_test)) node_labels = node_labels[0:50000] matrices = pre.get_all_matrices_dwl("ZINC_train", list(range(0, 50000))) targets = targets[0:50000] for i, m in enumerate(matrices): edge_index_1_l = torch.tensor(matrices[i][0]).t().contiguous() edge_index_1_g = torch.tensor(matrices[i][1]).t().contiguous() edge_index_2_l = torch.tensor(matrices[i][2]).t().contiguous() edge_index_2_g = torch.tensor(matrices[i][3]).t().contiguous() data = Data() data.edge_index_1_l = edge_index_1_l data.edge_index_1_g = edge_index_1_g data.edge_index_2_l = edge_index_2_l data.edge_index_2_g = edge_index_2_g data.x = torch.from_numpy(np.array(node_labels[i])).to(torch.float) data.y = data.y = torch.from_numpy(np.array([targets[i] ])).to(torch.float) data_list.append(data) data, slices = self.collate(data_list) torch.save((data, slices), self.processed_paths[0])
import auxiliarymethods.auxiliary_methods as aux import auxiliarymethods.datasets as dp import kernel_baselines as kb from auxiliarymethods.kernel_evaluation import linear_svm_evaluation # Download dataset. classes = dp.get_dataset("MOLT-4") use_labels, use_edge_labels = True, True all_matrices = [] # Compute 1-WL kernel for 1 to 5 iterations. for i in range(1, 6): # Use node labels and edge labels. gm = kb.compute_wl_1_sparse(dataset, i, use_labels, use_edge_labels) # Apply \ell_2 normalization. gm_n = aux.normalize_feature_vector(gm) all_matrices.append(gm_n) # Perform 10 repetitions of 10-CV using LIBINEAR. print( linear_svm_evaluation(all_matrices, classes, num_repetitions=10, all_std=True))
import auxiliarymethods.datasets as dp from auxiliarymethods.gnn_evaluation import gnn_evaluation dataset = "PROTEINS" # Download dataset. dp.get_dataset(dataset) # Output dataset as a list of graphs. graph_db = tud_to_networkx(dataset)
def main(): num_reps = 10 # Smaller datasets. dataset = [["IMDB-BINARY", False], ["IMDB-MULTI", False], ["NCI1", True], ["NCI109", True], ["PROTEINS", True], ["PTC_FM", True], ["REDDIT-BINARY", False], ["ENZYMES", True]] results = [] for d, use_labels in dataset: dp.get_dataset(d) acc, s_1, s_2 = gnn_evaluation(GIN0, d, [1, 2, 3, 4, 5], [32, 64, 128], max_num_epochs=200, batch_size=64, start_lr=0.01, num_repetitions=num_reps, all_std=True) print(d + " " + "GIN0 " + str(acc) + " " + str(s_1) + " " + str(s_2)) results.append(d + " " + "GIN0 " + str(acc) + " " + str(s_1) + " " + str(s_2)) acc, s_1, s_2 = gnn_evaluation(GIN, d, [1, 2, 3, 4, 5], [32, 64, 128], max_num_epochs=200, batch_size=64, start_lr=0.01, num_repetitions=num_reps, all_std=True) print(d + " " + "GIN " + str(acc) + " " + str(s_1) + " " + str(s_2)) results.append(d + " " + "GIN " + str(acc) + " " + str(s_1) + " " + str(s_2)) num_reps = 3 # Larger datasets with edge labels. dataset = [["Yeast", True], ["YeastH", True], ["UACC257", True], ["UACC257H", True], ["OVCAR-8", True], ["OVCAR-8H", True]] dataset = [["YeastH", True], ["UACC257", True], ["UACC257H", True], ["OVCAR-8", True], ["OVCAR-8H", True]] for d, use_labels in dataset: dp.get_dataset(d) acc, s_1, s_2 = gnn_evaluation(GINE, d, [2], [64], max_num_epochs=200, batch_size=64, start_lr=0.01, num_repetitions=num_reps, all_std=True) print(d + " " + "GINE " + str(acc) + " " + str(s_1) + " " + str(s_2)) results.append(d + " " + "GINE " + str(acc) + " " + str(s_1) + " " + str(s_2)) acc, s_1, s_2 = gnn_evaluation(GINE0, d, [2], [64], max_num_epochs=200, batch_size=64, start_lr=0.01, num_repetitions=num_reps, all_std=True) print(d + " " + "GINE0 " + str(acc) + " " + str(s_1) + " " + str(s_2)) results.append(d + " " + "GINE0 " + str(acc) + " " + str(s_1) + " " + str(s_2))
import auxiliarymethods.auxiliary_methods as aux import auxiliarymethods.datasets as dp import kernel_baselines as kb from auxiliarymethods.kernel_evaluation import kernel_svm_evaluation # Download dataset. classes = dp.get_dataset("ENZYMES") use_labels, use_edge_labels = True, False all_matrices = [] # Compute 1-WL kernel for 1 to 5 iterations. for i in range(1, 6): # Use node labels and no edge labels. gm = kb.compute_wl_1_dense("ENZYMES", i, use_labels, use_edge_labels) # Apply cosine normalization. gm = aux.normalize_gram_matrix(gm) all_matrices.append(gm) # Perform 10 repetitions of 10-CV using LIBSVM. print(kernel_svm_evaluation(all_matrices, classes, num_repetitions=10, all_std=True))
def main(): ### Smaller datasets using LIBSVM. dataset = [["ENZYMES", True], ["IMDB-BINARY", False], ["IMDB-MULTI", False], ["NCI1", True], ["PROTEINS", True], ["REDDIT-BINARY", False]] # Number of repetitions of 10-CV. num_reps = 10 results = [] for dataset, use_labels in dataset: classes = dp.get_dataset(dataset) # 1-WL kernel, number of iterations in [1:6]. all_matrices = [] for i in range(1, 6): gm = kb.compute_wl_1_dense(dataset, i, use_labels, False) gm_n = aux.normalize_gram_matrix(gm) all_matrices.append(gm_n) acc, s_1, s_2 = kernel_svm_evaluation(all_matrices, classes, num_repetitions=num_reps, all_std=True) print(dataset + " " + "WL1 " + str(acc) + " " + str(s_1) + " " + str(s_2)) results.append(dataset + " " + "WL1 " + str(acc) + " " + str(s_1) + " " + str(s_2)) # WLOA kernel, number of iterations in [1:6]. all_matrices = [] for i in range(1, 6): gm = kb.compute_wloa_dense(dataset, i, use_labels, False) gm_n = aux.normalize_gram_matrix(gm) all_matrices.append(gm_n) acc, s_1, s_2 = kernel_svm_evaluation(all_matrices, classes, num_repetitions=num_reps, all_std=True) print(dataset + " " + "WLOA " + str(acc) + " " + str(s_1) + " " + str(s_2)) results.append(dataset + " " + "WLOA " + str(acc) + " " + str(s_1) + " " + str(s_2)) # Graphlet kernel. all_matrices = [] gm = kb.compute_graphlet_dense(dataset, use_labels, False) gm_n = aux.normalize_gram_matrix(gm) all_matrices.append(gm_n) acc, s_1, s_2 = kernel_svm_evaluation(all_matrices, classes, num_repetitions=num_reps, all_std=True) print(dataset + " " + "GR " + str(acc) + " " + str(s_1) + " " + str(s_2)) results.append(dataset + " " + "GR " + str(acc) + " " + str(s_1) + " " + str(s_2)) # Shortest-path kernel. all_matrices = [] gm = kb.compute_shortestpath_dense(dataset, use_labels) gm_n = aux.normalize_gram_matrix(gm) all_matrices.append(gm_n) acc, s_1, s_2 = kernel_svm_evaluation(all_matrices, classes, num_repetitions=num_reps, all_std=True) print(dataset + " " + "SP " + str(acc) + " " + str(s_1) + " " + str(s_2)) results.append(dataset + " " + "SP " + str(acc) + " " + str(s_1) + " " + str(s_2)) # Number of repetitions of 10-CV. num_reps = 3 ### Larger datasets using LIBLINEAR with edge labels. dataset = [["MOLT-4", True, True], ["Yeast", True, True], ["MCF-7", True, True], ["github_stargazers", False, False], ["reddit_threads", False, False]] for d, use_labels, use_edge_labels in dataset: dataset = d classes = dp.get_dataset(dataset) # 1-WL kernel, number of iterations in [1:6]. all_matrices = [] for i in range(1, 6): gm = kb.compute_wl_1_sparse(dataset, i, use_labels, use_edge_labels) gm_n = aux.normalize_feature_vector(gm) all_matrices.append(gm_n) acc, s_1, s_2 = linear_svm_evaluation(all_matrices, classes, num_repetitions=num_reps, all_std=True) print(d + " " + "WL1SP " + str(acc) + " " + str(s_1) + " " + str(s_2)) results.append(d + " " + "WL1SP " + str(acc) + " " + str(s_1) + " " + str(s_2)) # Graphlet kernel, number of iterations in [1:6]. all_matrices = [] gm = kb.compute_graphlet_sparse(dataset, use_labels, use_edge_labels) gm_n = aux.normalize_feature_vector(gm) all_matrices.append(gm_n) acc, s_1, s_2 = linear_svm_evaluation(all_matrices, classes, num_repetitions=num_reps, all_std=True) print(d + " " + "GRSP " + str(acc) + " " + str(s_1) + " " + str(s_2)) results.append(d + " " + "GRSP " + str(acc) + " " + str(s_1) + " " + str(s_2)) # Shortest-path kernel. all_matrices = [] gm = kb.compute_shortestpath_sparse(dataset, use_labels) gm_n = aux.normalize_feature_vector(gm) all_matrices.append(gm_n) acc, s_1, s_2 = linear_svm_evaluation(all_matrices, classes, num_repetitions=num_reps, all_std=True) print(d + " " + "SPSP " + str(acc) + " " + str(s_1) + " " + str(s_2)) results.append(d + " " + "SPSP " + str(acc) + " " + str(s_1) + " " + str(s_2)) for r in results: print(r)
def process(self): data_list = [] indices_train = [] indices_val = [] indices_test = [] infile = open("test_al_10.index", "r") for line in infile: indices_test = line.split(",") indices_test = [int(i) for i in indices_test] infile = open("val_al_10.index", "r") for line in infile: indices_val = line.split(",") indices_val = [int(i) for i in indices_val] infile = open("train_al_10.index", "r") for line in infile: indices_train = line.split(",") indices_train = [int(i) for i in indices_train] targets = dp.get_dataset("alchemy_full", multigregression=True) tmp_1 = targets[indices_train].tolist() tmp_2 = targets[indices_val].tolist() tmp_3 = targets[indices_test].tolist() targets = tmp_1 targets.extend(tmp_2) targets.extend(tmp_3) node_labels = pre.get_all_node_labels_allchem_3( True, True, indices_train, indices_val, indices_test) matrices = pre.get_all_matrices_3_connected("alchemy_full", indices_train) matrices.extend( pre.get_all_matrices_3_connected("alchemy_full", indices_val)) matrices.extend( pre.get_all_matrices_3_connected("alchemy_full", indices_test)) for i, m in enumerate(matrices): edge_index_1 = torch.tensor(matrices[i][0]).t().contiguous() edge_index_2 = torch.tensor(matrices[i][1]).t().contiguous() edge_index_3 = torch.tensor(matrices[i][2]).t().contiguous() data = Data() data.edge_index_1 = edge_index_1 data.edge_index_2 = edge_index_2 data.edge_index_3 = edge_index_3 one_hot = np.eye(163)[node_labels[i]] data.x = torch.from_numpy(one_hot).to(torch.float) print(edge_index_1.max(), edge_index_2.max(), edge_index_3.max(), data.x.size()) data.y = data.y = torch.from_numpy(np.array([targets[i] ])).to(torch.float) data_list.append(data) data, slices = self.collate(data_list) torch.save((data, slices), self.processed_paths[0])
def main(): num_reps = 10 ### Smaller datasets. dataset = [["IMDB-BINARY", False], ["IMDB-MULTI", False], ["NCI1", True], ["PROTEINS", True], ["REDDIT-BINARY", False], ["ENZYMES", True]] results = [] for d, use_labels in dataset: # Download dataset. dp.get_dataset(d) # GIN, dataset d, layers in [1:6], hidden dimension in {32,64,128}. acc, s_1, s_2 = gnn_evaluation(GIN, d, [1, 2, 3, 4, 5], [32, 64, 128], max_num_epochs=200, batch_size=64, start_lr=0.01, num_repetitions=num_reps, all_std=True) print(d + " " + "GIN " + str(acc) + " " + str(s_1) + " " + str(s_2)) results.append(d + " " + "GIN " + str(acc) + " " + str(s_1) + " " + str(s_2)) # GIN with jumping knowledge, dataset d, layers in [1:6], hidden dimension in {32,64,128}. acc, s_1, s_2 = gnn_evaluation(GINWithJK, d, [1, 2, 3, 4, 5], [32, 64, 128], max_num_epochs=200, batch_size=64, start_lr=0.01, num_repetitions=num_reps, all_std=True) print(d + " " + "GINWithJK " + str(acc) + " " + str(s_1) + " " + str(s_2)) results.append(d + " " + "GINWithJK " + str(acc) + " " + str(s_1) + " " + str(s_2)) num_reps = 3 print(num_reps) ### Midscale datasets. dataset = [["MOLT-4", True, True], ["Yeast", True, True], ["MCF-7", True, True]] for d, use_labels, _ in dataset: print(d) dp.get_dataset(d) # GINE (GIN with edge labels), dataset d, 3 layers, hidden dimension in {64}. acc, s_1, s_2 = gnn_evaluation(GINE, d, [3], [64], max_num_epochs=200, batch_size=64, start_lr=0.01, num_repetitions=num_reps, all_std=True) print(d + " " + "GINE " + str(acc) + " " + str(s_1) + " " + str(s_2)) results.append(d + " " + "GINE " + str(acc) + " " + str(s_1) + " " + str(s_2)) # GINE (GIN with edge labels) with jumping knowledge, dataset d, 3 layers, hidden dimension in {64}. acc, s_1, s_2 = gnn_evaluation(GINEWithJK, d, [3], [64], max_num_epochs=200, batch_size=64, start_lr=0.01, num_repetitions=num_reps, all_std=True) print(d + " " + "GINEJK " + str(acc) + " " + str(s_1) + " " + str(s_2)) results.append(d + " " + "GINEJK " + str(acc) + " " + str(s_1) + " " + str(s_2)) dataset = [["reddit_threads", False, False], ["github_stargazers", False, False], ] for d, use_labels, _ in dataset: print(d) dp.get_dataset(d) # GINE (GIN with edge labels), dataset d, 3 layers, hidden dimension in {64}. acc, s_1, s_2 = gnn_evaluation(GIN, d, [3], [64], max_num_epochs=200, batch_size=64, start_lr=0.01, num_repetitions=num_reps, all_std=True) print(d + " " + "GIN " + str(acc) + " " + str(s_1) + " " + str(s_2)) results.append(d + " " + "GIN " + str(acc) + " " + str(s_1) + " " + str(s_2)) # GINE (GIN with edge labels) with jumping knowledge, dataset d, 3 layers, hidden dimension in {64}. acc, s_1, s_2 = gnn_evaluation(GINWithJK, d, [3], [64], max_num_epochs=200, batch_size=64, start_lr=0.01, num_repetitions=num_reps, all_std=True) print(d + " " + "GINJK " + str(acc) + " " + str(s_1) + " " + str(s_2)) results.append(d + " " + "GINJK " + str(acc) + " " + str(s_1) + " " + str(s_2)) for r in results: print(r)