Exemple #1
0
def plot_samples_distance(dataset, sortbyattr=None):
    """Plot the euclidean distances between all samples of a dataset.

    Parameters
    ----------
    dataset : Dataset
      Providing the samples.
    sortbyattr : None or str
      If None, the samples distances will be in the same order as their
      appearance in the dataset. Alternatively, the name of a samples
      attribute can be given, which wil then be used to sort/group the
      samples, e.g. to investigate the similarity samples by label or by
      chunks.
    """
    if sortbyattr is not None:
        slicer = []
        for attr in dataset.sa[sortbyattr].unique:
            slicer += \
                get_samples_by_attr(dataset, sortbyattr, attr).tolist()
        samples = dataset.samples[slicer]
    else:
        samples = dataset.samples

    ed = np.sqrt(squared_euclidean_distance(samples))

    pl.imshow(ed)
    pl.colorbar()
Exemple #2
0
def plot_samples_distance(dataset, sortbyattr=None):
    """Plot the euclidean distances between all samples of a dataset.

    Parameters
    ----------
    dataset : Dataset
      Providing the samples.
    sortbyattr : None or str
      If None, the samples distances will be in the same order as their
      appearance in the dataset. Alternatively, the name of a samples
      attribute can be given, which wil then be used to sort/group the
      samples, e.g. to investigate the similarity samples by label or by
      chunks.
    """
    if sortbyattr is not None:
        slicer = []
        for attr in dataset.sa[sortbyattr].unique:
            slicer += \
                get_samples_by_attr(dataset, sortbyattr, attr).tolist()
        samples = dataset.samples[slicer]
    else:
        samples = dataset.samples

    ed = np.sqrt(squared_euclidean_distance(samples))

    pl.imshow(ed)
    pl.colorbar()
Exemple #3
0
    def _train(self, ds):
        # local binding
        chunks_attr = self.__chunks_attr
        params = self.__params
        param_est = self.__param_est

        # populate a dictionary with tuples of (mean,std) for all chunks, or
        # a global value that is is used for the whole data
        if not params is None:
            # we got mean and std already
            if not isinstance(params, dict):
                # turn into dict, otherwise assume that we have parameters per
                # chunk
                params = {'__all__': params}
        else:
            # no parameters given, need to estimate
            if not param_est is None:
                est_attr, est_attr_values = param_est
                # which samples to use for estimation
                est_ids = set(get_samples_by_attr(ds, est_attr,
                                                  est_attr_values))
            else:
                est_ids = slice(None)

            # now we can either do it one for all, or per chunk
            if not chunks_attr is None:
                # per chunk estimate
                params = {}
                for c in ds.sa[chunks_attr].unique:
                    slicer = np.where(ds.sa[chunks_attr].value == c)[0]
                    if not isinstance(est_ids, slice):
                        slicer = list(est_ids.intersection(set(slicer)))
                    params[c] = self._compute_params(ds.samples[slicer])
            else:
                # global estimate
                params = {'__all__': self._compute_params(ds.samples[est_ids])}


        self.__params_dict = params
Exemple #4
0
    def _train(self, ds):
        # local binding
        chunks_attr = self.__chunks_attr
        params = self.__params
        param_est = self.__param_est

        # populate a dictionary with tuples of (mean,std) for all chunks, or
        # a global value that is is used for the whole data
        if not params is None:
            # we got mean and std already
            if not isinstance(params, dict):
                # turn into dict, otherwise assume that we have parameters per
                # chunk
                params = {'__all__': params}
        else:
            # no parameters given, need to estimate
            if not param_est is None:
                est_attr, est_attr_values = param_est
                # which samples to use for estimation
                est_ids = set(get_samples_by_attr(ds, est_attr,
                                                  est_attr_values))
            else:
                est_ids = slice(None)

            # now we can either do it one for all, or per chunk
            if not chunks_attr is None:
                # per chunk estimate
                params = {}
                for c in ds.sa[chunks_attr].unique:
                    slicer = np.where(ds.sa[chunks_attr].value == c)[0]
                    if not isinstance(est_ids, slice):
                        slicer = list(est_ids.intersection(set(slicer)))
                    params[c] = self._compute_params(ds.samples[slicer])
            else:
                # global estimate
                params = {'__all__': self._compute_params(ds.samples[est_ids])}


        self.__params_dict = params