def load_npy_data(data_file, name): path = get_dir(osp.join(data_file, name + '.content')) idx_features_labels = np.genfromtxt(path, dtype=np.dtype(str)) features = sp.csr_matrix(idx_features_labels[:, 1:-1], dtype=np.float32) labels = encode_onehot(idx_features_labels[:, -1]) # build graph idx = np.array(idx_features_labels[:, 0], dtype=np.int32) idx_map = {j: i for i, j in enumerate(idx)} path = get_dir(osp.join(data_file, name + '.cites')) edges_unordered = np.genfromtxt(path, dtype=np.int32) edges = np.array(list(map(idx_map.get, edges_unordered.flatten())), dtype=np.int32).reshape(edges_unordered.shape) adj = sp.coo_matrix((np.ones(edges.shape[0]), (edges[:, 0], edges[:, 1])), shape=(labels.shape[0], labels.shape[0]), dtype=np.float32) # build symmetric adjacency matrix adj = adj + adj.T.multiply(adj.T > adj) - adj.multiply(adj.T > adj) print('Dataset has {} nodes, {} edges, {} features.'.format( adj.shape[0], edges.shape[0], features.shape[1])) return features.todense(), adj, labels
def read_edgelist_label_data(folder, prefix, save_path): graph_path = get_dir(osp.join(folder, '{}.ungraph'.format(prefix))) cmty_path = get_dir(osp.join(folder, '{}.cmty'.format(prefix))) G = nx.read_edgelist(graph_path, nodetype=int, create_using=nx.Graph()) num_node = G.number_of_nodes() nodes = np.array(list(G.nodes())) print('node number: ', num_node) with open(graph_path) as f: context = f.readlines() print('edge number: ', len(context)) # tow line, each line has two corresponding node, total len(context) edges edge_index = np.zeros((2, len(context))) for i, line in enumerate(context): edge_index[:, i] = list(map(int, line.strip().split('\t'))) with open(cmty_path) as f: context = f.readlines() print('class number: ', len(context)) # if node m belongs to class k, thus (m, k) = 1 label = np.zeros((num_node, len(context))) for i, line in enumerate(context): line = map(int, line.strip().split('\t')) for node in line: label[node, i] = 1 np.savez(osp.join(save_path, prefix), nodes=nodes, edge_index=edge_index, label=label)
def get_data(self, name): if not osp.exists(get_dir(osp.join(self.processed, name + '.npz'))): # raw_file_names = ['{}.{}'.format(s, f) for s, f in product([name], ['ungraph', 'cmty'])] # for file_name in raw_file_names: # download_url('{}/{}'.format(self.url, file_name), self.raw_dir) read_edgelist_label_data(self.raw_dir, name, self.processed) npzfile = np.load(get_dir(osp.join(self.processed, name + '.npz'))) return npzfile['nodes'], npzfile['edge_index'], npzfile['label']
def __init__(self, root): super(Build_dataset, self).__init__() self.root = root self.raw_dir = osp.join(root, 'raw') self.processed = osp.join(root, 'processed') if not osp.exists(self.processed): os.makedirs(get_dir(self.processed)) self.url = 'https://github.com/THUDM/ProNE/raw/master/data'
from cogdl.models import Build_model from para_config import Config from cogdl.tasks import Build_task from cogdl.datasets import Build_dataset from cogdl.data import get_dir if __name__ == '__main__': args = Config() if args.gpu: os.environ["CUDA_VISIBLE_DEVICES"] = args.device_id gpus = tf.config.experimental.list_physical_devices('GPU') tf.config.experimental.set_memory_growth(gpus[0], True) random.seed(args.seed[0]) np.random.seed(args.seed[0]) args.set_model('gcn') args.set_dataset('cora') args.set_task('node_classification') task = Build_task(args).build() result = task.train() print(result) result_file = get_dir( osp.join(args.save_dir, args.dataset + '_' + args.model + '.json')) json_str = json.dumps(result) with open(result_file, 'w') as json_file: json_file.write(json_str)