Esempio n. 1
0
    def transform(self, graph_list):
        """
        TODO
        """

        n = len(graph_list)  #number of graphs
        phi = {
        }  #dictionary representing the phi vector for each graph. phi[r][c]=v each row is a graph. each column is a feature

        for i in range(n):  #for each graph
            r = 1
            total_feats = {}

            while (r < self.k + 1):
                current_feats = {(i, k): v
                                 for (k, v) in self.getFeaturesApproximated(
                                     graph_list[i], r, self.h).items()}
                for (key, value) in current_feats.iteritems():
                    if total_feats.get(key) == None:
                        total_feats[key] = value
                    else:
                        total_feats[key] += value
                r += 1

            phi.update(total_feats)

        ve = convert_to_sparse_matrix(phi)
        if self.normalization:
            ve = pp.normalize(ve, norm='l2', axis=1)
        return ve
Esempio n. 2
0
    def __transform_serial(self,
                           G_list,
                           approximated=True,
                           keepdictionary=False):
        """
        Private method that converts a networkx graph list into a instance-features matrix
        @type G_list: networkx graph generator
        @param G_list: list of the graph to convert
        
        @type approximated: boolean
        @param approximated: true if use a hash function with probable collisions during feature decomposition. False no collision guaranteed
        
        @type keepdictionary: boolean
        @param keepdictionary: True if the instance-feature matrix is kept as a dictionary. Else is a csr_matrix
        
        @rtype: scipy.sparse.csr_matrix
        @return: the instance-features matrix
        """
        feature_dict = {}
        MapEncToId = None
        if not keepdictionary:
            MapEncToId = self.UniqueMap()
        for instance_id, G in enumerate(G_list):

            feature_dict.update(
                self.__transform(instance_id, G, approximated, MapEncToId))
#        if keepdictionary:
#            return (convert_to_sparse_matrix( feature_dict, MapEncToId ),feature_dict)
#        else:
#            return convert_to_sparse_matrix( feature_dict, MapEncToId )
        ve = convert_to_sparse_matrix(feature_dict)
        if self.normalization:
            ve = normalize(ve, norm='l2', axis=1)
        return ve
Esempio n. 3
0
 def transform_serial_explicit(self,G_list,approximated=True):
     list_dict={}
     for instance_id, G in enumerate(G_list):
         if self.show:
             drawGraph(G)
         list_dict.update(self.__transform_explicit(instance_id, G, approximated))
     
     return convert_to_sparse_matrix(list_dict)
Esempio n. 4
0
    def transform(self, graph_list):
        """
        TODO
        """
        n = len(graph_list)  #number of graphs

        # list of the orthogonalized phi: phis[i] is the phi of the i-th iteration of the WL test.
        phis = []
        for i in range(self.h + 1):
            phis.append({})

        NodeIdToLabelId = [
            0
        ] * n  # NodeIdToLabelId[i][j] is labelid of node j in graph i
        label_lookup = {}  #map from features to corresponding id
        label_counter = 0  #incremental value for label ids

        for i in range(n):  #for each graph
            NodeIdToLabelId[i] = {}

            for j in graph_list[i].nodes():  #for each node
                if not label_lookup.has_key(
                        graph_list[i].node[j]['label']
                ):  #update label_lookup and label ids from first iteration that consider node's labels
                    label_lookup[graph_list[i].node[j]
                                 ['label']] = label_counter
                    NodeIdToLabelId[i][j] = label_counter
                    label_counter += 1
                else:
                    NodeIdToLabelId[i][j] = label_lookup[graph_list[i].node[j]
                                                         ['label']]

                if self.__version == 0:  #consider old FS features
                    feature = self.__fsfeatsymbol + str(
                        label_lookup[graph_list[i].node[j]['label']])
                    if not phis[0].has_key((i, feature)):
                        phis[0][(i, feature)] = 0.0
                    phis[0][(i, feature)] += 1.0

        ### MAIN LOOP
        it = 0
        NewNodeIdToLabelId = copy.deepcopy(
            NodeIdToLabelId)  #labels id of nex iteration

        while it <= self.h:  #each iteration compute the next labellings (that are contexts of the previous)
            label_lookup = {}

            for i in range(n):  #for each graph
                for j in graph_list[i].nodes(
                ):  #for each node, consider its neighbourhood
                    neighbors = []
                    for u in graph_list[i].neighbors(j):
                        neighbors.append(NodeIdToLabelId[i][u])
                    neighbors.sort()  #sorting neighbours

                    long_label_string = str(
                        NodeIdToLabelId[i]
                        [j]) + self.__startsymbol  #compute new labels id
                    for u in neighbors:
                        long_label_string += str(u) + self.__conjsymbol
                    long_label_string = long_label_string[:-1] + self.__endsymbol

                    if not label_lookup.has_key(long_label_string):
                        label_lookup[long_label_string] = label_counter
                        NewNodeIdToLabelId[i][j] = label_counter
                        label_counter += 1
                    else:
                        NewNodeIdToLabelId[i][j] = label_lookup[
                            long_label_string]

                    if self.__version == 0 and it < self.h:  #consider FS features
                        feature = self.__fsfeatsymbol + str(
                            NewNodeIdToLabelId[i][j])
                        if not phis[it].has_key((i, feature)):
                            phis[it][(i, feature)] = 0.0
                        phis[it][(i, feature)] += 1.0

                    #adding feature with contexts
                    if it < self.h:
                        feature = str(
                            NodeIdToLabelId[i]
                            [j]) + self.__contextsymbol + str(
                                NewNodeIdToLabelId[i][j])  #with context
                    else:
                        feature = str(NodeIdToLabelId[i][j])  #null context
                    if not phis[it].has_key((i, feature)):
                        phis[it][(i, feature)] = 0.0
                    phis[it][(i, feature)] += 1.0

            NodeIdToLabelId = copy.deepcopy(
                NewNodeIdToLabelId)  #update current labels id
            it = it + 1

        ves = [convert_to_sparse_matrix(phi) for phi in phis]
        if self.normalization:
            ves = [pp.normalize(ve, norm='l2', axis=1) for ve in ves]
        return ves
Esempio n. 5
0
    def transform(self, graph_list):
        """
        TODO
        """
        n = len(graph_list)  #number of graphs

        phi = {
        }  #dictionary representing the phi vector for each graph. phi[r][c]=v each row is a graph. each column is a feature

        NodeIdToLabelId = [
            0
        ] * n  # NodeIdToLabelId[i][j] is labelid of node j in graph i
        label_lookup = {}  #map from features to corresponding id
        label_counter = 0  #incremental value for label ids

        for i in range(n):  #for each graph
            NodeIdToLabelId[i] = {}

            for j in graph_list[i].nodes():  #for each node
                if not label_lookup.has_key(
                        graph_list[i].node[j]['label']
                ):  #update label_lookup and label ids from first iteration that consider node's labels
                    label_lookup[graph_list[i].node[j]
                                 ['label']] = label_counter
                    NodeIdToLabelId[i][j] = label_counter
                    label_counter += 1
                else:
                    NodeIdToLabelId[i][j] = label_lookup[graph_list[i].node[j]
                                                         ['label']]

                feature = self.__fsfeatsymbol + str(
                    label_lookup[graph_list[i].node[j]['label']])
                if not phi.has_key((i, feature)):
                    phi[(i, feature)] = 0.0
                phi[(i, feature)] += 1.0

        ### MAIN LOOP
        it = 0
        NewNodeIdToLabelId = copy.deepcopy(
            NodeIdToLabelId)  #labels id of nex iteration

        while it < self.h:  #each iteration compute the next labellings (that are contexts of the previous)
            label_lookup = {}

            for i in range(n):  #for each graph
                for j in graph_list[i].nodes(
                ):  #for each node, consider its neighbourhood
                    neighbors = []
                    for u in graph_list[i].neighbors(j):
                        neighbors.append(NodeIdToLabelId[i][u])
                    neighbors.sort()  #sorting neighbours

                    long_label_string = str(
                        NodeIdToLabelId[i]
                        [j]) + self.__startsymbol  #compute new labels id
                    for u in neighbors:
                        long_label_string += str(u) + self.__conjsymbol
                    long_label_string = long_label_string[:-1] + self.__endsymbol

                    if not label_lookup.has_key(long_label_string):
                        label_lookup[long_label_string] = label_counter
                        NewNodeIdToLabelId[i][j] = label_counter
                        label_counter += 1
                    else:
                        NewNodeIdToLabelId[i][j] = label_lookup[
                            long_label_string]

                    feature = self.__fsfeatsymbol + str(
                        NewNodeIdToLabelId[i][j])
                    if not phi.has_key((i, feature)):
                        phi[(i, feature)] = 0.0
                    phi[(i, feature)] += 1.0

            NodeIdToLabelId = copy.deepcopy(
                NewNodeIdToLabelId)  #update current labels id
            it = it + 1

        ve = convert_to_sparse_matrix(phi)
        if self.normalization:
            ve = pp.normalize(ve, norm='l2', axis=1)
        return ve