Beispiel #1
0
def _wl_spkernel_do(Gn, node_label, edge_label, height):
    """Calculate Weisfeiler-Lehman shortest path kernels between graphs.
    
    Parameters
    ----------
    Gn : List of NetworkX graph
        List of graphs between which the kernels are calculated.       
    node_label : string
        node attribute used as label.      
    edge_label : string
        edge attribute used as label.       
    height : int
        subtree height.
        
    Return
    ------
    Kmatrix : Numpy matrix
        Kernel matrix, each element of which is the Weisfeiler-Lehman kernel between 2 praphs.
    """
    pass
    from pygraph.utils.utils import getSPGraph

    # init.
    height = int(height)
    Kmatrix = np.zeros((len(Gn), len(Gn)))  # init kernel

    Gn = [getSPGraph(G, edge_weight=edge_label)
          for G in Gn]  # get shortest path graphs of Gn

    # initial for height = 0
    for i in range(0, len(Gn)):
        for j in range(i, len(Gn)):
            for e1 in Gn[i].edges(data=True):
                for e2 in Gn[j].edges(data=True):
                    if e1[2]['cost'] != 0 and e1[2]['cost'] == e2[2][
                            'cost'] and ((e1[0] == e2[0] and e1[1] == e2[1]) or
                                         (e1[0] == e2[1] and e1[1] == e2[0])):
                        Kmatrix[i][j] += 1
            Kmatrix[j][i] = Kmatrix[i][j]

    # iterate each height
    for h in range(1, height + 1):
        all_set_compressed = {
        }  # a dictionary mapping original labels to new ones in all graphs in this iteration
        num_of_labels_occured = 0  # number of the set of letters that occur before as node labels at least once in all graphs
        for G in Gn:  # for each graph
            set_multisets = []
            for node in G.nodes(data=True):
                # Multiset-label determination.
                multiset = [
                    G.node[neighbors][node_label] for neighbors in G[node[0]]
                ]
                # sorting each multiset
                multiset.sort()
                multiset = node[1][node_label] + ''.join(
                    multiset)  # concatenate to a string and add the prefix
                set_multisets.append(multiset)

            # label compression
            set_unique = list(
                set(set_multisets))  # set of unique multiset labels
            # a dictionary mapping original labels to new ones.
            set_compressed = {}
            # if a label occured before, assign its former compressed label, else assign the number of labels occured + 1 as the compressed label
            for value in set_unique:
                if value in all_set_compressed.keys():
                    set_compressed.update({value: all_set_compressed[value]})
                else:
                    set_compressed.update(
                        {value: str(num_of_labels_occured + 1)})
                    num_of_labels_occured += 1

            all_set_compressed.update(set_compressed)

            # relabel nodes
            for node in G.nodes(data=True):
                node[1][node_label] = set_compressed[set_multisets[node[0]]]

        # calculate subtree kernel with h iterations and add it to the final kernel
        for i in range(0, len(Gn)):
            for j in range(i, len(Gn)):
                for e1 in Gn[i].edges(data=True):
                    for e2 in Gn[j].edges(data=True):
                        if e1[2]['cost'] != 0 and e1[2]['cost'] == e2[2][
                                'cost'] and (
                                    (e1[0] == e2[0] and e1[1] == e2[1]) or
                                    (e1[0] == e2[1] and e1[1] == e2[0])):
                            Kmatrix[i][j] += 1
                Kmatrix[j][i] = Kmatrix[i][j]

    return Kmatrix
Beispiel #2
0
def wrapper_getSPGraph(weight, itr_item):
    g = itr_item[0]
    i = itr_item[1]
    return i, getSPGraph(g, edge_weight=weight)