Ejemplo n.º 1
0
    def fit(self, X, y=None):
        """Fit a dataset, for a transformer.

        Parameters
        ----------
        X : iterable
            Each element must be an iterable with at most three features and at
            least one. The first that is obligatory is a valid graph structure
            (adjacency matrix or edge_dictionary) while the second is
            node_labels and the third edge_labels (that fitting the given graph
            format). The train samples.

        y : None
            There is no need of a target in a transformer, yet the pipeline API
            requires this parameter.

        Returns
        -------
        self : object
        Returns self.

        """
        self._method_calling = 1
        self._is_transformed = False
        # Input validation and parsing
        self.initialize()
        if X is None:
            raise ValueError('`fit` input cannot be None')
        else:
            if not isinstance(X, collections.Iterable):
                raise TypeError('input must be an iterable\n')

            i = 0
            out = list()
            gs = list()
            self._labels_hash_dict, labels_hash_set = dict(), set()
            for (idx, x) in enumerate(iter(X)):
                is_iter = isinstance(x, collections.Iterable)
                if is_iter:
                    x = list(x)
                if is_iter and len(x) in [0, 1, 2, 3]:
                    if len(x) == 0:
                        warnings.warn('Ignoring empty element on index: '
                                      + str(idx))
                        continue
                    elif len(x) == 1:
                        warnings.warn(
                            'Ignoring empty element on index: '
                            + str(i) + '\nLabels must be provided.')
                    else:
                        x = Graph(x[0], x[1], {}, self._graph_format)
                        vertices = list(x.get_vertices(purpose="any"))
                        Labels = x.get_labels(purpose="any")
                elif type(x) is Graph:
                    vertices = list(x.get_vertices(purpose="any"))
                    Labels = x.get_labels(purpose="any")
                else:
                    raise TypeError('each element of X must be either '
                                    'a graph object or a list with at '
                                    'least a graph like object and '
                                    'node labels dict \n')

                g = (vertices, Labels,
                     {n: x.neighbors(n, purpose="any") for n in vertices})

                # collect all the labels
                labels_hash_set |= set(itervalues(Labels))
                gs.append(g)
                i += 1

            if i == 0:
                raise ValueError('parsed input is empty')

            # Hash labels
            if len(labels_hash_set) > self._max_number:
                warnings.warn('Number of labels is smaller than'
                              'the biggest possible.. '
                              'Collisions will appear on the '
                              'new labels.')

                # If labels exceed the biggest possible size
                nl, nrl = list(), len(labels_hash_set)
                while nrl > self._max_number:
                    nl += self.random_state_.choice(self._max_number,
                                                    self._max_number,
                                                    replace=False).tolist()
                    nrl -= self._max_number
                if nrl > 0:
                    nl += self.random_state_.choice(self._max_number,
                                                    nrl,
                                                    replace=False).tolist()
                # unify the collisions per element.

            else:
                # else draw n random numbers.
                nl = self.random_state_.choice(self._max_number, len(labels_hash_set),
                                               replace=False).tolist()

            self._labels_hash_dict = dict(zip(labels_hash_set, nl))

            # for all graphs
            for vertices, labels, neighbors in gs:
                new_labels = {v: self._labels_hash_dict[l]
                              for v, l in iteritems(labels)}
                g = (vertices, new_labels, neighbors,)
                gr = {0: self.NH_(g)}
                for r in range(1, self.R):
                    gr[r] = self.NH_(gr[r-1])

                # save the output for all levels
                out.append(gr)

        self.X = out

        # Return the transformer
        return self
Ejemplo n.º 2
0
    def transform(self, X):
        """Calculate the kernel matrix, between given and fitted dataset.

        Parameters
        ----------
        X : iterable
            Each element must be an iterable with at most three features and at
            least one. The first that is obligatory is a valid graph structure
            (adjacency matrix or edge_dictionary) while the second is
            node_labels and the third edge_labels (that fitting the given graph
            format). If None the kernel matrix is calculated upon fit data.
            The test samples.

        Returns
        -------
        K : numpy array, shape = [n_targets, n_input_graphs]
            corresponding to the kernel matrix, a calculation between
            all pairs of graphs between target an features

        """
        self._method_calling = 3
        # Check is fit had been called
        check_is_fitted(self, ['X'])

        # Input validation and parsing
        if X is None:
            raise ValueError('`transform` input cannot be None')
        else:
            if not isinstance(X, collections.Iterable):
                raise TypeError('input must be an iterable\n')

            i = 0
            out = list()
            for (idx, x) in enumerate(iter(X)):
                is_iter = isinstance(x, collections.Iterable)
                if is_iter:
                    x = list(x)
                if is_iter and len(x) in [0, 1, 2, 3]:
                    if len(x) == 0:
                        warnings.warn('Ignoring empty element on index: '
                                      + str(idx))
                        continue
                    elif len(x) == 1:
                        warnings.warn(
                            'Ignoring empty element on index: '
                            + str(i) + '\nLabels must be provided.')
                    else:
                        x = Graph(x[0], x[1], {}, self._graph_format)
                        vertices = list(x.get_vertices(purpose="any"))
                        Labels = x.get_labels(purpose="any")
                elif type(x) is Graph:
                    vertices = list(x.get_vertices(purpose="any"))
                    Labels = x.get_labels(purpose="any")
                else:
                    raise TypeError('each element of X must be either '
                                    'a graph object or a list with at '
                                    'least a graph like object and '
                                    'node labels dict \n')

                # Hash based on the labels of fit
                new_labels = {v: self._labels_hash_dict.get(l, None)
                              for v, l in iteritems(Labels)}

                # Radix sort the other
                g = ((vertices, new_labels) +
                     ({n: x.neighbors(n, purpose="any")
                       for n in vertices},))

                gr = {0: self.NH_(g)}
                for r in range(1, self.R):
                    gr[r] = self.NH_(gr[r-1])

                # save the output for all levels
                out.append(gr)
                i += 1

                if i == 0:
                    raise ValueError('parsed input is empty')

        # Transform - calculate kernel matrix
        # Output is always normalized
        km = self._calculate_kernel_matrix(out)
        self._is_transformed = True
        return km