Beispiel #1
0
    def parse_input(self, X):
        """Parse and create features for the NSPD kernel.

        Parameters
        ----------
        X : iterable
            For the input to pass the test, we must have:
            Each element must be an iterable with at most three features and at
            least one. The first that is obligatory is a valid graph structure
            (adjacency matrix or edge_dictionary) while the second is
            node_labels and the third edge_labels (that correspond to the given
            graph format). A valid input also consists of graph type objects.

        Returns
        -------
        M : dict
            A dictionary with keys all the distances from 0 to self.d
            and values the the np.arrays with rows corresponding to the
            non-null input graphs and columns to the enumerations of tuples
            consisting of pairs of hash values and radius, from all the given
            graphs of the input (plus the fitted one's on transform).

        """
        if not isinstance(X, collections.Iterable):
            raise TypeError('input must be an iterable\n')
        else:
            # Hold the number of graphs
            ng = 0

            # Holds all the data for combinations of r, d
            data = collections.defaultdict(dict)

            # Index all keys for combinations of r, d
            all_keys = collections.defaultdict(dict)
            for (idx, x) in enumerate(iter(X)):
                is_iter = False
                if isinstance(x, collections.Iterable):
                    is_iter, x = True, list(x)
                if is_iter and len(x) in [0, 3]:
                    if len(x) == 0:
                        warnings.warn('Ignoring empty element' +
                                      ' on index: ' + str(idx))
                        continue
                    else:
                        g = Graph(x[0], x[1], x[2])
                        g.change_format("adjacency")
                elif type(x) is Graph:
                    g = Graph(
                        x.get_adjacency_matrix(),
                        x.get_labels(purpose="adjacency", label_type="vertex"),
                        x.get_labels(purpose="adjacency", label_type="edge"))
                else:
                    raise TypeError('each element of X must have either ' +
                                    'a graph with labels for node and edge ' +
                                    'or 3 elements consisting of a graph ' +
                                    'type object, labels for vertices and ' +
                                    'labels for edges.')

                # Bring to the desired format
                g.change_format(self._graph_format)

                # Take the vertices
                vertices = set(g.get_vertices(purpose=self._graph_format))

                # Extract the dicitionary
                ed = g.get_edge_dictionary()

                # Convert edges to tuples
                edges = {(j, k) for j in ed.keys() for k in ed[j].keys()}

                # Extract labels for nodes
                Lv = g.get_labels(purpose=self._graph_format)
                # and for edges
                Le = g.get_labels(purpose=self._graph_format,
                                  label_type="edge")

                # Produce all the neighborhoods and the distance pairs
                # up to the desired radius and maximum distance
                N, D, D_pair = g.produce_neighborhoods(self.r,
                                                       purpose="dictionary",
                                                       with_distances=True,
                                                       d=self.d)

                # Hash all the neighborhoods
                H = self._hash_neighborhoods(vertices, edges, Lv, Le, N,
                                             D_pair)

                if self._method_calling == 1:
                    for d in filterfalse(lambda x: x not in D,
                                         range(self.d + 1)):
                        for (A, B) in D[d]:
                            for r in range(self.r + 1):
                                key = (H[r, A], H[r, B])
                                keys = all_keys[r, d]
                                idx = keys.get(key, None)
                                if idx is None:
                                    idx = len(keys)
                                    keys[key] = idx
                                data[r, d][ng, idx] = data[r, d].get(
                                    (ng, idx), 0) + 1

                elif self._method_calling == 3:
                    for d in filterfalse(lambda x: x not in D,
                                         range(self.d + 1)):
                        for (A, B) in D[d]:
                            # Based on the edges of the bidirected graph
                            for r in range(self.r + 1):
                                keys = all_keys[r, d]
                                fit_keys = self._fit_keys[r, d]
                                key = (H[r, A], H[r, B])
                                idx = fit_keys.get(key, None)
                                if idx is None:
                                    idx = keys.get(key, None)
                                    if idx is None:
                                        idx = len(keys) + len(fit_keys)
                                        keys[key] = idx
                                data[r, d][ng, idx] = data[r, d].get(
                                    (ng, idx), 0) + 1
                ng += 1
            if ng == 0:
                raise ValueError('parsed input is empty')

            if self._method_calling == 1:
                # A feature matrix for all levels
                M = dict()

                for (key, d) in filterfalse(lambda a: len(a[1]) == 0,
                                            iteritems(data)):
                    indexes, data = zip(*iteritems(d))
                    rows, cols = zip(*indexes)
                    M[key] = csr_matrix((data, (rows, cols)),
                                        shape=(ng, len(all_keys[key])),
                                        dtype=np.int64)
                self._fit_keys = all_keys
                self._ngx = ng

            elif self._method_calling == 3:
                # A feature matrix for all levels
                M = dict()

                for (key, d) in filterfalse(lambda a: len(a[1]) == 0,
                                            iteritems(data)):
                    indexes, data = zip(*iteritems(d))
                    rows, cols = zip(*indexes)
                    M[key] = csr_matrix(
                        (data, (rows, cols)),
                        shape=(ng,
                               len(all_keys[key]) + len(self._fit_keys[key])),
                        dtype=np.int64)

                self._ngy = ng

            return M
    def parse_input(self, X):
        """Parse and create features for multiscale_laplacian kernel.

        Parameters
        ----------
        X : iterable
            For the input to pass the test, we must have:
            Each element must be an iterable with at most three features and at
            least one. The first that is obligatory is a valid graph structure
            (adjacency matrix or edge_dictionary) while the second is
            node_labels and the third edge_labels (that correspond to the given
            graph format). A valid input also consists of graph type objects.

        Returns
        -------
        out : list
            Tuples consisting of the Adjacency matrix, phi, phi_outer
            dictionary of neihborhood indexes and inverse laplacians
            up to level self.L and the inverse Laplacian of A.

        """
        if not isinstance(X, collections.Iterable):
            raise TypeError('input must be an iterable\n')
        else:
            ng = 0
            out = list()
            start = time.time()
            for (idx, x) in enumerate(iter(X)):
                is_iter = False
                if isinstance(x, collections.Iterable):
                    is_iter, x = True, list(x)
                if is_iter and len(x) in [0, 2, 3]:
                    if len(x) == 0:
                        warnings.warn('Ignoring empty element ' +
                                      'on index: ' + str(idx))
                        continue
                    else:
                        x = Graph(x[0], x[1], {}, self._graph_format)
                elif type(x) is not Graph:
                    x.desired_format(self._graph_format)
                else:
                    raise TypeError('each element of X must be either a ' +
                                    'graph or an iterable with at least 1 ' +
                                    'and at most 3 elements\n')
                ng += 1
                phi_d = x.get_labels()
                A = x.get_adjacency_matrix()
                N = x.produce_neighborhoods(r=self.L, sort_neighbors=False)
                try:
                    phi = np.array([list(phi_d[i]) for i in range(A.shape[0])])
                except TypeError:
                    raise TypeError('Features must be iterable and castable ' +
                                    'in total to a numpy array.')
                phi_outer = np.dot(phi, phi.T)

                Lap = laplacian(A).astype(float)
                _increment_diagonal_(Lap, self.heta)
                L = inv(Lap)

                Q = dict()
                for level in range(1, self.L + 1):
                    Q[level] = dict()
                    for (key, item) in iteritems(N[level]):
                        Q[level][key] = dict()
                        Q[level][key]["n"] = np.array(item)
                        if len(item) < A.shape[0]:
                            laplac = laplacian(A[item, :][:,
                                                          item]).astype(float)
                            _increment_diagonal_(laplac, self.heta)
                            laplac = inv(laplac)
                        else:
                            laplac = L
                        Q[level][key]["l"] = laplac

                out.append((A, phi, phi_outer, Q, L))

            if self.verbose:
                print("Preprocessing took:", time.time() - start, "s.")
            if ng == 0:
                raise ValueError('parsed input is empty')

            return out