Example #1
0
    def __init__(self, args=None):
        dataset = "jknet_cora"
        path = osp.join(osp.dirname(osp.realpath(__file__)), "../..", "data", dataset)
        if not osp.exists(path):
            os.makedirs(path)
        super(CoraDataset, self).__init__(path)
        with open(self.processed_paths[0], 'rb') as fin:
            load_data = pickle.load(fin)
        self.num_nodes = load_data['node_num']

        data = Data()
        data.x = load_data['xs']
        data.y = load_data['ys']

        train_size = int(self.num_nodes * 0.8)
        train_mask = np.zeros((self.num_nodes, ), dtype=bool)
        train_idx = np.random.choice(np.arange(self.num_nodes), size=train_size, replace=False)
        train_mask[train_idx] = True
        test_mask = np.ones((self.num_nodes, ), dtype=bool)
        test_mask[train_idx] = False
        val_mask = test_mask

        edges = load_data['edges']
        edges = np.array(edges, dtype=int).transpose((1, 0))

        data.edge_index = torch.from_numpy(edges)
        data.train_mask = torch.from_numpy(train_mask)
        data.test_mask = torch.from_numpy(test_mask)
        data.val_mask = torch.from_numpy(val_mask)
        data.x = torch.Tensor(data.x)
        data.y = torch.LongTensor(data.y)

        self.data = data
        self.num_classes = torch.max(self.data.y).item() + 1
Example #2
0
    def read_gtn_data(self, folder):
        data = sio.loadmat(osp.join(folder, 'data.mat'))
        if self.name == 'han-acm' or self.name == 'han-imdb':
            truelabels, truefeatures = data['label'], data['feature'].astype(float)
        elif self.name == 'han-dblp':
            truelabels, truefeatures = data['label'], data['features'].astype(float)
        num_nodes = truefeatures.shape[0]
        if self.name == 'han-acm':
            rownetworks = [data['PAP'] - np.eye(num_nodes), data['PLP'] - np.eye(num_nodes)]
        elif self.name == 'han-dblp':
            rownetworks = [data['net_APA'] - np.eye(num_nodes), data['net_APCPA'] - np.eye(num_nodes), data['net_APTPA'] - np.eye(num_nodes)]
        elif self.name == 'han-imdb':
            rownetworks = [data['MAM'] - np.eye(num_nodes), data['MDM'] - np.eye(num_nodes), data['MYM'] - np.eye(num_nodes)]

        y = truelabels
        train_idx = data['train_idx']
        val_idx = data['val_idx']
        test_idx = data['test_idx']

        train_mask = sample_mask(train_idx, y.shape[0])
        val_mask = sample_mask(val_idx, y.shape[0])
        test_mask = sample_mask(test_idx, y.shape[0])

        y_train = np.argmax(y[train_mask, :], axis=1)
        y_val = np.argmax(y[val_mask, :], axis=1)
        y_test = np.argmax(y[test_mask, :], axis=1)

        data = Data()
        A = []                     
        for i, edge in enumerate(rownetworks):
            edge_tmp = torch.from_numpy(np.vstack((edge.nonzero()[0], edge.nonzero()[1]))).type(torch.LongTensor)
            value_tmp = torch.ones(edge_tmp.shape[1]).type(torch.FloatTensor)
            A.append((edge_tmp, value_tmp))
        edge_tmp = torch.stack((torch.arange(0,num_nodes), torch.arange(0,num_nodes))).type(torch.LongTensor)
        value_tmp = torch.ones(num_nodes).type(torch.FloatTensor)
        A.append((edge_tmp, value_tmp))
        data.adj = A

        data.x = torch.from_numpy(truefeatures).type(torch.FloatTensor)

        data.train_node = torch.from_numpy(train_idx[0]).type(torch.LongTensor)
        data.train_target = torch.from_numpy(y_train).type(torch.LongTensor)
        data.valid_node = torch.from_numpy(val_idx[0]).type(torch.LongTensor)
        data.valid_target = torch.from_numpy(y_val).type(torch.LongTensor)
        data.test_node = torch.from_numpy(test_idx[0]).type(torch.LongTensor)
        data.test_target = torch.from_numpy(y_test).type(torch.LongTensor)

        self.data = data
Example #3
0
    def read_gtn_data(self, folder):
        edges = pickle.load(open(osp.join(folder, 'edges.pkl'), 'rb'))
        labels = pickle.load(open(osp.join(folder, 'labels.pkl'), 'rb'))
        node_features = pickle.load(
            open(osp.join(folder, 'node_features.pkl'), 'rb'))

        data = Data()
        data.x = torch.from_numpy(node_features).type(torch.FloatTensor)

        num_nodes = edges[0].shape[0]

        A = []

        for i, edge in enumerate(edges):
            edge_tmp = torch.from_numpy(
                np.vstack((edge.nonzero()[0],
                           edge.nonzero()[1]))).type(torch.LongTensor)
            value_tmp = torch.ones(edge_tmp.shape[1]).type(torch.FloatTensor)
            A.append((edge_tmp, value_tmp))
        edge_tmp = torch.stack(
            (torch.arange(0, num_nodes),
             torch.arange(0, num_nodes))).type(torch.LongTensor)
        value_tmp = torch.ones(num_nodes).type(torch.FloatTensor)
        A.append((edge_tmp, value_tmp))
        data.adj = A

        data.train_node = torch.from_numpy(np.array(labels[0])[:, 0]).type(
            torch.LongTensor)
        data.train_target = torch.from_numpy(np.array(labels[0])[:, 1]).type(
            torch.LongTensor)
        data.valid_node = torch.from_numpy(np.array(labels[1])[:, 0]).type(
            torch.LongTensor)
        data.valid_target = torch.from_numpy(np.array(labels[1])[:, 1]).type(
            torch.LongTensor)
        data.test_node = torch.from_numpy(np.array(labels[2])[:, 0]).type(
            torch.LongTensor)
        data.test_target = torch.from_numpy(np.array(labels[2])[:, 1]).type(
            torch.LongTensor)

        self.data = data
Example #4
0
    def read_gtn_data(self, folder):
        edges = pickle.load(open(osp.join(folder, 'edges.pkl'), 'rb'))
        labels = pickle.load(open(osp.join(folder, 'labels.pkl'), 'rb'))
        node_features = pickle.load(
            open(osp.join(folder, 'node_features.pkl'), 'rb'))

        data = Data()
        data.x = torch.from_numpy(node_features).type(torch.FloatTensor)

        num_nodes = edges[0].shape[0]

        node_type = np.zeros((num_nodes), dtype=int)
        assert len(edges) == 4
        assert len(edges[0].nonzero()) == 2

        node_type[edges[0].nonzero()[0]] = 0
        node_type[edges[0].nonzero()[1]] = 1
        node_type[edges[1].nonzero()[0]] = 1
        node_type[edges[1].nonzero()[1]] = 0
        node_type[edges[2].nonzero()[0]] = 0
        node_type[edges[2].nonzero()[1]] = 2
        node_type[edges[3].nonzero()[0]] = 2
        node_type[edges[3].nonzero()[1]] = 0

        print(node_type)
        data.pos = torch.from_numpy(node_type)

        edge_list = []
        for i, edge in enumerate(edges):
            edge_tmp = torch.from_numpy(
                np.vstack((edge.nonzero()[0],
                           edge.nonzero()[1]))).type(torch.LongTensor)
            edge_list.append(edge_tmp)
        data.edge_index = torch.cat(edge_list, 1)

        A = []
        for i, edge in enumerate(edges):
            edge_tmp = torch.from_numpy(
                np.vstack((edge.nonzero()[0],
                           edge.nonzero()[1]))).type(torch.LongTensor)
            value_tmp = torch.ones(edge_tmp.shape[1]).type(torch.FloatTensor)
            A.append((edge_tmp, value_tmp))
        edge_tmp = torch.stack(
            (torch.arange(0, num_nodes),
             torch.arange(0, num_nodes))).type(torch.LongTensor)
        value_tmp = torch.ones(num_nodes).type(torch.FloatTensor)
        A.append((edge_tmp, value_tmp))
        data.adj = A

        data.train_node = torch.from_numpy(np.array(labels[0])[:, 0]).type(
            torch.LongTensor)
        data.train_target = torch.from_numpy(np.array(labels[0])[:, 1]).type(
            torch.LongTensor)
        data.valid_node = torch.from_numpy(np.array(labels[1])[:, 0]).type(
            torch.LongTensor)
        data.valid_target = torch.from_numpy(np.array(labels[1])[:, 1]).type(
            torch.LongTensor)
        data.test_node = torch.from_numpy(np.array(labels[2])[:, 0]).type(
            torch.LongTensor)
        data.test_target = torch.from_numpy(np.array(labels[2])[:, 1]).type(
            torch.LongTensor)

        y = np.zeros((num_nodes), dtype=int)
        x_index = torch.cat((data.train_node, data.valid_node, data.test_node))
        y_index = torch.cat(
            (data.train_target, data.valid_target, data.test_target))
        y[x_index.numpy()] = y_index.numpy()
        data.y = torch.from_numpy(y)
        self.data = data
Example #5
0
    def read_gtn_data(self, folder):
        data = sio.loadmat(osp.join(folder, "data.mat"))
        if self.name == "han-acm" or self.name == "han-imdb":
            truelabels, truefeatures = data["label"], data["feature"].astype(
                float)
        elif self.name == "han-dblp":
            truelabels, truefeatures = data["label"], data["features"].astype(
                float)
        num_nodes = truefeatures.shape[0]
        if self.name == "han-acm":
            rownetworks = [
                data["PAP"] - np.eye(num_nodes),
                data["PLP"] - np.eye(num_nodes)
            ]
        elif self.name == "han-dblp":
            rownetworks = [
                data["net_APA"] - np.eye(num_nodes),
                data["net_APCPA"] - np.eye(num_nodes),
                data["net_APTPA"] - np.eye(num_nodes),
            ]
        elif self.name == "han-imdb":
            rownetworks = [
                data["MAM"] - np.eye(num_nodes),
                data["MDM"] - np.eye(num_nodes),
                data["MYM"] - np.eye(num_nodes),
            ]

        y = truelabels
        train_idx = data["train_idx"]
        val_idx = data["val_idx"]
        test_idx = data["test_idx"]

        train_mask = sample_mask(train_idx, y.shape[0])
        val_mask = sample_mask(val_idx, y.shape[0])
        test_mask = sample_mask(test_idx, y.shape[0])

        y_train = np.argmax(y[train_mask, :], axis=1)
        y_val = np.argmax(y[val_mask, :], axis=1)
        y_test = np.argmax(y[test_mask, :], axis=1)

        data = Data()
        A = []
        for i, edge in enumerate(rownetworks):
            edge_tmp = torch.from_numpy(
                np.vstack((edge.nonzero()[0],
                           edge.nonzero()[1]))).type(torch.LongTensor)
            value_tmp = torch.ones(edge_tmp.shape[1]).type(torch.FloatTensor)
            A.append((edge_tmp, value_tmp))
        edge_tmp = torch.stack(
            (torch.arange(0, num_nodes),
             torch.arange(0, num_nodes))).type(torch.LongTensor)
        value_tmp = torch.ones(num_nodes).type(torch.FloatTensor)
        A.append((edge_tmp, value_tmp))
        data.adj = A

        data.x = torch.from_numpy(truefeatures).type(torch.FloatTensor)

        data.train_node = torch.from_numpy(train_idx[0]).type(torch.LongTensor)
        data.train_target = torch.from_numpy(y_train).type(torch.LongTensor)
        data.valid_node = torch.from_numpy(val_idx[0]).type(torch.LongTensor)
        data.valid_target = torch.from_numpy(y_val).type(torch.LongTensor)
        data.test_node = torch.from_numpy(test_idx[0]).type(torch.LongTensor)
        data.test_target = torch.from_numpy(y_test).type(torch.LongTensor)

        self.data = data