def __init__(self, args=None): dataset = "jknet_cora" path = osp.join(osp.dirname(osp.realpath(__file__)), "../..", "data", dataset) if not osp.exists(path): os.makedirs(path) super(CoraDataset, self).__init__(path) with open(self.processed_paths[0], 'rb') as fin: load_data = pickle.load(fin) self.num_nodes = load_data['node_num'] data = Data() data.x = load_data['xs'] data.y = load_data['ys'] train_size = int(self.num_nodes * 0.8) train_mask = np.zeros((self.num_nodes, ), dtype=bool) train_idx = np.random.choice(np.arange(self.num_nodes), size=train_size, replace=False) train_mask[train_idx] = True test_mask = np.ones((self.num_nodes, ), dtype=bool) test_mask[train_idx] = False val_mask = test_mask edges = load_data['edges'] edges = np.array(edges, dtype=int).transpose((1, 0)) data.edge_index = torch.from_numpy(edges) data.train_mask = torch.from_numpy(train_mask) data.test_mask = torch.from_numpy(test_mask) data.val_mask = torch.from_numpy(val_mask) data.x = torch.Tensor(data.x) data.y = torch.LongTensor(data.y) self.data = data self.num_classes = torch.max(self.data.y).item() + 1
def read_gtn_data(self, folder): edges = pickle.load(open(osp.join(folder, 'edges.pkl'), 'rb')) labels = pickle.load(open(osp.join(folder, 'labels.pkl'), 'rb')) node_features = pickle.load( open(osp.join(folder, 'node_features.pkl'), 'rb')) data = Data() data.x = torch.from_numpy(node_features).type(torch.FloatTensor) num_nodes = edges[0].shape[0] node_type = np.zeros((num_nodes), dtype=int) assert len(edges) == 4 assert len(edges[0].nonzero()) == 2 node_type[edges[0].nonzero()[0]] = 0 node_type[edges[0].nonzero()[1]] = 1 node_type[edges[1].nonzero()[0]] = 1 node_type[edges[1].nonzero()[1]] = 0 node_type[edges[2].nonzero()[0]] = 0 node_type[edges[2].nonzero()[1]] = 2 node_type[edges[3].nonzero()[0]] = 2 node_type[edges[3].nonzero()[1]] = 0 print(node_type) data.pos = torch.from_numpy(node_type) edge_list = [] for i, edge in enumerate(edges): edge_tmp = torch.from_numpy( np.vstack((edge.nonzero()[0], edge.nonzero()[1]))).type(torch.LongTensor) edge_list.append(edge_tmp) data.edge_index = torch.cat(edge_list, 1) A = [] for i, edge in enumerate(edges): edge_tmp = torch.from_numpy( np.vstack((edge.nonzero()[0], edge.nonzero()[1]))).type(torch.LongTensor) value_tmp = torch.ones(edge_tmp.shape[1]).type(torch.FloatTensor) A.append((edge_tmp, value_tmp)) edge_tmp = torch.stack( (torch.arange(0, num_nodes), torch.arange(0, num_nodes))).type(torch.LongTensor) value_tmp = torch.ones(num_nodes).type(torch.FloatTensor) A.append((edge_tmp, value_tmp)) data.adj = A data.train_node = torch.from_numpy(np.array(labels[0])[:, 0]).type( torch.LongTensor) data.train_target = torch.from_numpy(np.array(labels[0])[:, 1]).type( torch.LongTensor) data.valid_node = torch.from_numpy(np.array(labels[1])[:, 0]).type( torch.LongTensor) data.valid_target = torch.from_numpy(np.array(labels[1])[:, 1]).type( torch.LongTensor) data.test_node = torch.from_numpy(np.array(labels[2])[:, 0]).type( torch.LongTensor) data.test_target = torch.from_numpy(np.array(labels[2])[:, 1]).type( torch.LongTensor) y = np.zeros((num_nodes), dtype=int) x_index = torch.cat((data.train_node, data.valid_node, data.test_node)) y_index = torch.cat( (data.train_target, data.valid_target, data.test_target)) y[x_index.numpy()] = y_index.numpy() data.y = torch.from_numpy(y) self.data = data