def load_npy_data(data_file, name):

    path = get_dir(osp.join(data_file, name + '.content'))
    idx_features_labels = np.genfromtxt(path, dtype=np.dtype(str))
    features = sp.csr_matrix(idx_features_labels[:, 1:-1], dtype=np.float32)
    labels = encode_onehot(idx_features_labels[:, -1])

    # build graph
    idx = np.array(idx_features_labels[:, 0], dtype=np.int32)
    idx_map = {j: i for i, j in enumerate(idx)}
    path = get_dir(osp.join(data_file, name + '.cites'))
    edges_unordered = np.genfromtxt(path, dtype=np.int32)
    edges = np.array(list(map(idx_map.get, edges_unordered.flatten())),
                     dtype=np.int32).reshape(edges_unordered.shape)
    adj = sp.coo_matrix((np.ones(edges.shape[0]), (edges[:, 0], edges[:, 1])),
                        shape=(labels.shape[0], labels.shape[0]),
                        dtype=np.float32)

    # build symmetric adjacency matrix
    adj = adj + adj.T.multiply(adj.T > adj) - adj.multiply(adj.T > adj)

    print('Dataset has {} nodes, {} edges, {} features.'.format(
        adj.shape[0], edges.shape[0], features.shape[1]))

    return features.todense(), adj, labels
Beispiel #2
0
def read_edgelist_label_data(folder, prefix, save_path):
    graph_path = get_dir(osp.join(folder, '{}.ungraph'.format(prefix)))
    cmty_path = get_dir(osp.join(folder, '{}.cmty'.format(prefix)))

    G = nx.read_edgelist(graph_path, nodetype=int, create_using=nx.Graph())
    num_node = G.number_of_nodes()
    nodes = np.array(list(G.nodes()))
    print('node number: ', num_node)
    with open(graph_path) as f:
        context = f.readlines()
        print('edge number: ', len(context))
        # tow line, each line has two corresponding node, total len(context) edges
        edge_index = np.zeros((2, len(context)))
        for i, line in enumerate(context):
            edge_index[:, i] = list(map(int, line.strip().split('\t')))

    with open(cmty_path) as f:
        context = f.readlines()
        print('class number: ', len(context))
        # if node m belongs to class k, thus (m, k) = 1
        label = np.zeros((num_node, len(context)))
        for i, line in enumerate(context):
            line = map(int, line.strip().split('\t'))
            for node in line:
                label[node, i] = 1
    np.savez(osp.join(save_path, prefix), nodes=nodes, edge_index=edge_index, label=label)
Beispiel #3
0
    def get_data(self, name):
        if not osp.exists(get_dir(osp.join(self.processed, name + '.npz'))):
            # raw_file_names = ['{}.{}'.format(s, f) for s, f in product([name], ['ungraph', 'cmty'])]
            # for file_name in raw_file_names:
            #     download_url('{}/{}'.format(self.url, file_name), self.raw_dir)
            read_edgelist_label_data(self.raw_dir, name, self.processed)

        npzfile = np.load(get_dir(osp.join(self.processed, name + '.npz')))
        return npzfile['nodes'], npzfile['edge_index'], npzfile['label']
Beispiel #4
0
 def __init__(self, root):
     super(Build_dataset, self).__init__()
     self.root = root
     self.raw_dir = osp.join(root, 'raw')
     self.processed = osp.join(root, 'processed')
     if not osp.exists(self.processed):
         os.makedirs(get_dir(self.processed))
     self.url = 'https://github.com/THUDM/ProNE/raw/master/data'
from cogdl.models import Build_model
from para_config import Config
from cogdl.tasks import Build_task
from cogdl.datasets import Build_dataset
from cogdl.data import get_dir

if __name__ == '__main__':

    args = Config()

    if args.gpu:
        os.environ["CUDA_VISIBLE_DEVICES"] = args.device_id
        gpus = tf.config.experimental.list_physical_devices('GPU')
        tf.config.experimental.set_memory_growth(gpus[0], True)

    random.seed(args.seed[0])
    np.random.seed(args.seed[0])
    args.set_model('gcn')
    args.set_dataset('cora')
    args.set_task('node_classification')

    task = Build_task(args).build()
    result = task.train()
    print(result)

    result_file = get_dir(
        osp.join(args.save_dir, args.dataset + '_' + args.model + '.json'))
    json_str = json.dumps(result)
    with open(result_file, 'w') as json_file:
        json_file.write(json_str)