Ejemplo n.º 1
0
    def __init__(self,
                 G_list,
                 features='default',
                 normalize=True,
                 assign_feat='default',
                 max_num_nodes=0):
        # keep all variables derived with *self* except for *adj_all*
        self.adj_all = []
        self.len_all = []
        self.feature_all = []
        self.label_all = []

        self.assign_feat_all = []

        if max_num_nodes == 0:
            self.max_num_nodes = max([G.number_of_nodes() for G in G_list])
        else:
            self.max_num_nodes = max_num_nodes

        #if features == 'default':
        self.feat_dim = util.node_dict(G_list[0])[0]['feat'].shape[0]

        for G in G_list:
            adj = np.array(
                nx.to_numpy_matrix(G))  # related to load_date.py 95 rows
            if normalize:
                sqrt_deg = np.diag(
                    1.0 / np.sqrt(np.sum(adj, axis=0, dtype=float).squeeze()))
                adj = np.matmul(np.matmul(sqrt_deg, adj), sqrt_deg)
            self.adj_all.append(adj)
            self.len_all.append(G.number_of_nodes())
            self.label_all.append(G.graph['label'])
            # feat matrix: max_num_nodes x feat_dim
            if features == 'default':
                f = np.zeros((self.max_num_nodes, self.feat_dim), dtype=float)
                for i, u in enumerate(G.nodes()):
                    f[i, :] = util.node_dict(G)[u]['feat']
                self.feature_all.append(f)
            elif features == 'id':
                self.feature_all.append(np.identity(self.max_num_nodes))
            elif features == 'deg-num':
                degs = np.sum(np.array(adj), 1)
                degs = np.expand_dims(np.pad(
                    degs, [0, self.max_num_nodes - G.number_of_nodes()], 0),
                                      axis=1)
                self.feature_all.append(degs)
            elif features == 'deg':
                self.max_deg = 10
                degs = np.sum(np.array(adj), 1).astype(int)
                degs[degs > max_deg] = max_deg
                feat = np.zeros((len(degs), self.max_deg + 1))
                feat[np.arange(len(degs)), degs] = 1
                feat = np.pad(feat,
                              ((0, self.max_num_nodes - G.number_of_nodes()),
                               (0, 0)),
                              'constant',
                              constant_values=0)

                f = np.zeros((self.max_num_nodes, self.feat_dim), dtype=float)
                for i, u in enumerate(util.node_iter(G)):
                    f[i, :] = util.node_dict(G)[u]['feat']

                feat = np.concatenate((feat, f), axis=1)

                self.feature_all.append(feat)
            elif features == 'struct':
                self.max_deg = 10
                degs = np.sum(np.array(adj), 1).astype(int)
                degs[degs > 10] = 10
                feat = np.zeros((len(degs), self.max_deg + 1))
                feat[np.arange(len(degs)), degs] = 1
                degs = np.pad(feat,
                              ((0, self.max_num_nodes - G.number_of_nodes()),
                               (0, 0)),
                              'constant',
                              constant_values=0)

                clusterings = np.array(list(nx.clustering(G).values()))
                clusterings = np.expand_dims(np.pad(
                    clusterings, [0, self.max_num_nodes - G.number_of_nodes()],
                    'constant'),
                                             axis=1)
                g_feat = np.hstack([degs, clusterings])
                if 'feat' in util.node_dict(G)[0]:
                    node_feats = np.array([
                        util.node_dict(G)[i]['feat']
                        for i in range(G.number_of_nodes())
                    ])
                    node_feats = np.pad(
                        node_feats,
                        ((0, self.max_num_nodes - G.number_of_nodes()),
                         (0, 0)), 'constant')
                    g_feat = np.hstack([g_feat, node_feats])

                self.feature_all.append(g_feat)

            if assign_feat == 'id':
                self.assign_feat_all.append(
                    np.hstack((np.identity(self.max_num_nodes),
                               self.feature_all[-1])))
            else:
                self.assign_feat_all.append(self.feature_all[-1])

        self.feat_dim = self.feature_all[0].shape[1]
        self.assign_feat_dim = self.assign_feat_all[0].shape[1]
Ejemplo n.º 2
0
def read_graphfile(datadir, dataname, max_nodes=None):
    ''' Read data from https://ls11-www.cs.tu-dortmund.de/staff/morris/graphkerneldatasets
        graph index starts with 1 in file

    Returns:
        List of networkx objects with graph and node labels
    '''
    prefix = os.path.join(datadir, dataname, dataname)
    filename_graph_indic = prefix + '_graph_indicator.txt'
    # index of graphs that a given node belongs to
    graph_indic={}
    with open(filename_graph_indic) as f:
        i=1
        for line in f:
            line=line.strip("\n")
            graph_indic[i]=int(line)
            i+=1

    filename_nodes=prefix + '_node_labels.txt'
    node_labels=[]
    try:
        with open(filename_nodes) as f:
            for line in f:
                line=line.strip("\n")
                node_labels+=[int(line) - 1]
        num_unique_node_labels = max(node_labels) + 1
    except IOError:
        print('No node labels')
 
    filename_node_attrs=prefix + '_node_attributes.txt'
    node_attrs=[]
    try:
        with open(filename_node_attrs) as f:
            for line in f:
                line = line.strip("\s\n")
                attrs = [float(attr) for attr in re.split("[,\s]+", line) if not attr == '']
                node_attrs.append(np.array(attrs))
    except IOError:
        print('No node attributes')
       
    label_has_zero = False
    filename_graphs=prefix + '_graph_labels.txt'
    graph_labels=[]

    # assume that all graph labels appear in the dataset 
    #(set of labels don't have to be consecutive)
    label_vals = []
    with open(filename_graphs) as f:
        for line in f:
            line=line.strip("\n")
            val = int(line)
            #if val == 0:
            #    label_has_zero = True
            if val not in label_vals:
                label_vals.append(val)
            graph_labels.append(val)
    #graph_labels = np.array(graph_labels)
    label_map_to_int = {val: i for i, val in enumerate(label_vals)}
    graph_labels = np.array([label_map_to_int[l] for l in graph_labels])
    #if label_has_zero:
    #    graph_labels += 1
    
    filename_adj=prefix + '_A.txt'
    adj_list={i:[] for i in range(1,len(graph_labels)+1)}    
    index_graph={i:[] for i in range(1,len(graph_labels)+1)}
    num_edges = 0
    with open(filename_adj) as f:
        for line in f:
            line=line.strip("\n").split(",")
            e0,e1=(int(line[0].strip(" ")),int(line[1].strip(" ")))
            adj_list[graph_indic[e0]].append((e0,e1))
            index_graph[graph_indic[e0]]+=[e0,e1]
            num_edges += 1
    for k in index_graph.keys():
        index_graph[k]=[u-1 for u in set(index_graph[k])]

    graphs=[]
    for i in range(1,1+len(adj_list)):
        # indexed from 1 here
        G=nx.from_edgelist(adj_list[i])
        if max_nodes is not None and G.number_of_nodes() > max_nodes:
            continue
      
        # add features and labels
        G.graph['label'] = graph_labels[i-1]
        for u in util.node_iter(G):
            if len(node_labels) > 0:
                node_label_one_hot = [0] * num_unique_node_labels
                node_label = node_labels[u-1]
                node_label_one_hot[node_label] = 1
                util.node_dict(G)[u]['label'] = node_label_one_hot
            if len(node_attrs) > 0:
                util.node_dict(G)[u]['feat'] = node_attrs[u-1]
        if len(node_attrs) > 0:
            G.graph['feat_dim'] = node_attrs[0].shape[0]

        # relabeling
        mapping={}
        it=0
        for n in util.node_iter(G):
            mapping[n]=it
            it+=1
            
        # indexed from 0
        graphs.append(nx.relabel_nodes(G, mapping))
    return graphs
Ejemplo n.º 3
0
def read_graphfile2(datadir, dataname, max_nodes=None):
    ''' Read data from https://ls11-www.cs.tu-dortmund.de/staff/morris/graphkerneldatasets
        graph index starts with 1 in file

    Returns:
        List of networkx objects with graph and node labels
    '''
    prefix = os.path.join(datadir, dataname, dataname)
    filename_graph_indic = prefix + '_graph_indicator.txt'
    # index of graphs that a given node belongs to
    graph_indic = {}  # {} 表示字典
    with open(filename_graph_indic) as f:
        i = 1
        for line in f:
            line = line.strip("\n")  #strip("\n") 去掉换行符号
            graph_indic[i] = int(line)
            i += 1

    filename_nodes = prefix + '_node_labels.txt'
    node_labels = []
    try:
        with open(filename_nodes) as f:
            for line in f:
                line = line.strip("\n")
                node_labels += [int(line) - 1]  #俩个列表之间直接相加
        num_unique_node_labels = max(node_labels) + 1
    except IOError:
        print('No node labels')

    filename_node_attrs = prefix + '_node_attributes.txt'
    node_attrs = []
    try:
        with open(filename_node_attrs) as f:
            for line in f:
                line = line.strip("\s\n")
                attrs = [
                    float(attr) for attr in re.split("[,\s]+", line)
                    if not attr == ''
                ]
                node_attrs.append(np.array(attrs))

    except IOError:
        print('No node attributes')

    label_has_zero = False
    filename_graphs = prefix + '_graph_labels.txt'
    graph_labels = []

    # assume that all graph labels appear in the dataset
    # (set of labels don't have to be consecutive)
    label_vals = []
    with open(filename_graphs) as f:
        for line in f:
            line = line.strip("\n")
            val = int(line)
            # if val == 0:
            #    label_has_zero = True
            if val not in label_vals:
                label_vals.append(val)
            graph_labels.append(val)
    # graph_labels = np.array(graph_labels)
    label_map_to_int = {val: i for i, val in enumerate(label_vals)}
    graph_labels = np.array([label_map_to_int[l] for l in graph_labels])
    # if label_has_zero:
    #    graph_labels += 1

    filename_adj = prefix + '_A.txt'
    #underlying is the source code

    # adj_list = {i: [] for i in range(1, len(graph_labels) + 1)}  #
    # # common_adj = [] #modify by zy
    # index_graph = {i: [] for i in range(1, len(graph_labels) + 1)}  #
    # num_edges = 0
    # with open(filename_adj) as f:
    #     for line in f:
    #         line = line.strip("\n").split(",")
    #         e0, e1 = (int(line[0].strip(" ")), int(line[1].strip(" ")))
    #         adj_list[graph_indic[e0]].append((e0, e1))  #
    #
    #         #if e1 > max_nodes:           #modify by zy
    #             #break                    #modify by zy
    #         #common_adj.append((e0, e1))  #modify by zy
    #         index_graph[graph_indic[e0]]+=[e0,e1]#
    #         num_edges += 1
    # for k in index_graph.keys():
    #      index_graph[k]=[u-1 for u in set(index_graph[k])]
    #
    #
    # graphs = []
    # for i in range(1, 1 + len(adj_list)):
    #     # indexed from 1 here
    #     G=nx.from_edgelist(adj_list[i]) # related to graph_sampler.py 29 rows
    #     #print(isinstance(G,Dictionary))
    #     if max_nodes is not None and G.number_of_nodes() > max_nodes:
    #         continue
    #
    #     # add features and labels
    #     G.graph['label'] = graph_labels[i - 1]
    #     for u in util.node_iter(G):
    #         if len(node_labels) > 0:
    #             node_label_one_hot = [0] * num_unique_node_labels
    #             node_label = node_labels[u - 1]
    #             node_label_one_hot[node_label] = 1
    #             util.node_dict(G)[u]['label'] = node_label_one_hot
    #         if len(node_attrs) > 0:
    #             util.node_dict(G)[u]['feat'] = node_attrs[u - 1]
    #     if len(node_attrs) > 0:
    #         G.graph['feat_dim'] = node_attrs[0].shape[0]
    #
    #     # relabeling
    #     mapping = {}
    #
    #
    #
    #     it = 0
    #     for n in util.node_iter(G):
    #         mapping[n] = it
    #         it += 1
    #     #NewG=nx.relabel_nodes(G,mapping)
    #     # indexed from 0
    #     graphs.append(nx.relabel_nodes(G, mapping))
    # return graphs

    common_adj = []
    num_edges = 0
    with open(filename_adj) as f:
        for line in f:
            line = line.strip("\n").split(",")
            e0, e1 = (int(line[0].strip(" ")), int(line[1].strip(" ")))
            if e1 > max_nodes:
                break
            common_adj.append((e0, e1))
            num_edges += 1

    Hlist = []
    for i in range(1, 1 + len(graph_labels)):
        # indexed from 1 here
        LAlist = []
        #LAlist[]用于存放标签和属性特征
        G = nx.from_edgelist(common_adj)  # related to graph_sampler.py 29 rows
        # print(type(G))
        if max_nodes is not None and G.number_of_nodes() > max_nodes:
            continue

        # add features and labels
        G.graph['label'] = graph_labels[i - 1]
        LAlist.append(G.graph['label'])

        TList = []
        for u in util.node_iter(G):

            # if len(node_labels) > 0:
            #     node_label_one_hot = [0] * num_unique_node_labels
            #     node_label = node_labels[u - 1]
            #     node_label_one_hot[node_label] = 1
            #     util.node_dict(G)[u]['label'] = node_label_one_hot
            if len(node_attrs) > 0:
                util.node_dict(G)[u]['feat'] = node_attrs[(u - 1) +
                                                          (i - 1) * 2592]
                TList.append(util.node_dict(G)[u]['feat'])
        LAlist.append(np.array(TList))

        if len(node_attrs) > 0:
            G.graph['feat_dim'] = node_attrs[0].shape[0]

        # relabeling
        mapping = {}
        it = 0
        for n in util.node_iter(G):
            mapping[n] = it
            it += 1
        New_G = nx.relabel_nodes(G, mapping)
        # indexed from 0
        Hlist.append(LAlist)
    return Hlist, New_G