Ejemplo n.º 1
0
    def fade(self, dat, h):
        """
        Fades this dataset into the dataset dat via the mask
        h. The mask is a vector of the same dimension as one data
        sample in self or dat. For each example xi in this dataset and
        the corresponding example yi in the dataset dat, the operation

        :math:`z_{ij} = h_j \cdot x_{ij} + (1-h_j)\cdot y_{ij}`

        is carried out. For that reason h must have entries between
        zero and one.

        :param dat: other dataset which is faded into this one. It must have the same dimension as this dataset.
        :type dat: natter.DataModule.Data
        :param h: mask of the same dimension as a single vector in dat. All entries must be between zero and one.
        :type h: numpy.array
        """

        if dat.size() != self.size():
            raise Errors.DimensionalityError(
                'Dimensionalities of two datasets do not match!')
        if len(h.shape) < 2:
            h = reshape(h, (self.X.shape[0], 1))

        self.X = self.X * h + (1 - h) * dat.X
        self.addToHistory([
            'Faded with dataset %s with history' % (dat.name),
            list(dat.history)
        ])
Ejemplo n.º 2
0
    def size(self, dim=(0, 1)):
        """
        Returns the size of the data matrix *X*. Works just like numpy.size.

        :param dim: Dimension for which the size is to be computed (=0 --> number of examples; =1 --> number of dimensions)
        :type dim: int or tuple of int
        :returns: The requested dimensionality
        :rtype: int or tuple of int
        """
        if not (type(dim) == int) and len(dim) == 2:
            sh = shape(self.X)
            if len(sh) < 2:
                return (1, sh[0])
            else:
                return sh
        elif type(dim) == int:
            sh = shape(self.X)
            if len(sh) < 2:
                if dim == 0:
                    return 1.0
                else:
                    return sh[0]
            else:
                return sh[dim]
        else:
            raise Errors.DimensionalityError(
                'Data matrices cannot have more than two dimensions!')
Ejemplo n.º 3
0
    def scale(self, s):
        """
        Scales *X* with the array *s*. If *s* has the same
        dimensionality as the number of examples, each dimension gets
        scaled with *s*. If *s* has the same dimension as the number
        of dimensions, each example is scaled with *s*. If *s* has the
        same shape as *X*, *X* and *s* are simply multiplied.

        *s* can also be stored in a Data object.

        :param s: The scale factor
        :type s: numpy.array or natter.DataModule.Data
        """
        name = ''
        scaledwhat = ''

        if not (type(s) == ndarray):  # then we assume that s is a data object
            name = s.name
            s = s.X
        else:
            name = 'array'

        sh = s.shape
        if len(sh) == 1 or sh[0] == 1 or sh[1] == 1:
            if sh[0] == self.X.shape[0]:
                s = reshape(s, (self.X.shape[0], 1))
                scaledwhat = 'each dimension'
            elif sh[0] == self.X.shape[1]:
                s = reshape(s, (1, self.X.shape[1]))
                scaledwhat = 'each example'
            elif (sh[0] == 1 and sh[1] != self.X.shape[1]) or (
                    sh[1] == 1 and sh[0] != self.X.shape[0]):
                raise Errors.DimensionalityError(
                    'Dimensionality of s must either be equal to the number of examples or the number of dimensions'
                )
        elif sh[0] != self.X.shape[0] or sh[1] != self.X.shape[1]:
            raise Errors.DimensionalityError('Dimensions of s do not match!')
        else:
            scaledwhat = 'whole data'
        self.history.append('Scaled ' + scaledwhat + ' with ' + name)

        self.X = self.X * s
        return self
Ejemplo n.º 4
0
def MarginalHistogramEqualization(psource, ptarget=None):
    """
    Creates a non-linear filter that changes the marginal distribution
    of each single data dimension independently. For that sake it
    takes two ISA models and performs a histogram equalization on each
    of the marginal distributions.

    *Important*: The ISA models must have one-dimensional subspaces!

    If ptarget is omitted, it will be set to a N(0,I) Gaussian by default.

    :param psource: Source distribution which must be a natter.Distributions.ISA model with one-dimensional subspaces
    :type psource: natter.Distributions.ISA
    :param ptarget: Target distribution which must be a natter.Distributions.ISA model with one-dimensional subspaces
    :type ptarget: natter.Distributions.ISA
    :returns: A non-linear filter that changes for marginal distributions of the data from the respective psource into the respective ptarget
    :rtype: natter.Transforms.NonlinearTransform

    """
    from natter.Distributions import ISA, Gaussian

    if not isinstance(psource, ISA):
        raise TypeError(
            'Transform.TransformFactory.MarginalHistogramEqualization: psource must be an ISA model'
        )
    else:
        psource = psource.copy()

    if not ptarget == None and not isinstance(ptarget, ISA):
        raise TypeError(
            'Transform.TransformFactory.MarginalHistogramEqualization: ptarget must be an ISA model'
        )

    for ss in psource['S']:
        if len(ss) != 1:
            raise Errors.DimensionalityError(
                'Transform.TransformFactory.MarginalHistogramEqualization: psource must have one-dimensional subspaces'
            )

    if ptarget == None:
        ptarget = ISA(S=[(k, ) for k in range(psource['n'])],
                      P=[Gaussian(n=1) for k in range(psource['n'])])
    else:
        ptarget = ptarget.copy()

    g = lambda dat: reduce(lambda x, y: x.stack(y), [
        ptarget['P'][k].ppf(psource['P'][k].cdf(dat[k, :]))
        for k in range(psource['n'])
    ])
    gdet = lambda y: psource.loglik(y) - ptarget.loglik(g(y))

    name = 'Marginal Histogram Equalization Transform: %s --> %s' % (
        psource['P'][0].name, ptarget['P'][0].name)
    return NonlinearTransform(g, name, logdetJ=gdet)
Ejemplo n.º 5
0
    def plot(self, ax=None, plottype='scatter', **kwargs):
        """
        Plots a scatter plot of the data points. This method works only for two-dimensional data.

        :param ax: If specified the data is plotted to this axes object.
        :type ax: matplotlib.pyplot.Axis
        :param plottype: plot type; possible choice are 'scatter' or 'loghist' (default 'scatter').
        :type plottype: string
        :param kwargs: arguments passed directly to matplotlib.pyplot.Axis.scatter function
        :type kwargs: dict
        :raises: natter.Auxiliary.Errors.DimensionalityError
        :returns: The axes object.
        :rtype: matplotlib.pyplot.Axis
        """
        if not len(self.X) == 2:
            raise Errors.DimensionalityError(
                'Data object must have dimension 2 for plotting!')
        else:
            if ax is None:
                fig = figure()
                ax = fig.add_axes([.1, .1, .8, .8])

            if plottype is 'scatter':
                ax.scatter(self.X[0], self.X[1], s=.1, **kwargs)
            else:
                ind = ~(any(isnan(self.X), axis=0)
                        | any(isinf(self.X), axis=0))

                mx = amax(abs(self.X[:, ind].ravel()))
                ex = linspace(-mx, mx, self.X.shape[1] / 4000)
                ey = linspace(-mx, mx, self.X.shape[1] / 4000)

                H, ex, ey = histogram2d(self.X[0, :],
                                        self.X[1, :],
                                        bins=(ex, ey))
                ax.contour(.5 * (ex[1:] + ex[:-1]), .5 * (ey[1:] + ey[:-1]),
                           log(H), **kwargs)

                if ('colors' in kwargs) and (type(kwargs['colors'])
                                             == str) and ('label' in kwargs):
                    ra = ax.axis()
                    ax.plot(ra[1] + 1,
                            ra[3] + 1,
                            color=kwargs['colors'],
                            label=kwargs['label'])
                    ax.axis(ra)

            return ax
Ejemplo n.º 6
0
    def __invert__(self):
        """
        Overloads the ~ operator. Returns a new LinearTransform object
        with the inverse of the linear transform matrix W.

        :returns: A new LinearTransform object representing the inverted matrix W.
        :rtype: natter.Transforms.LinearTransform
        """
        sh = shape(self.W)
        if sh[0] == sh[1]:
            tmp = list(self.history)
            tmp.append('inverted')
            return LinearTransform(inv(self.W), self.name, tmp)
        else:
            raise Errors.DimensionalityError(
                'Transform.__invert__(): Transform must be square!')
Ejemplo n.º 7
0
    def histogram(self, dat, cdf=False, ax=None, plotlegend=True, bins=None):
        """
        Plots a histogram of the data points in dat. This works only
        for 1-dimensional distributions. It also plots the pdf of the distribution.

        :param dat: data points that enter the histogram
        :type dat: natter.DataModule.Data
        :param cdf: boolean that indicates whether the cdf should be plotted or not (default: False)
        :param ax: axes object the histogram is plotted into if it is not None.
        :param plotlegend: boolean indicating whether a legend should be plotted (default: True)
        :param bins: number of bins to be used. If None (default), the bins are automatically determined.
        """

        sh = shape(dat.X)
        if len(sh) > 1 and sh[0] > 1:
            raise Errors.DimensionalityError(
                'Cannont plot data with more than one dimension!')

        if ax == None:
            fig = plt.figure()
            ax = fig.add_axes([.1, .1, .8, .8])
        x = squeeze(dat.X)
        if bins is None:
            bins = max(sh) / 200
        n, bins, patches = ax.hist(x,
                                   bins=bins,
                                   normed=1,
                                   facecolor='blue',
                                   alpha=0.8,
                                   lw=0.0)

        bincenters = 0.5 * (bins[1:] + bins[:-1])
        y = squeeze(self.pdf(Data(bincenters)))
        ax.plot(bincenters, y, 'k--', linewidth=2)

        if hasattr(self, 'cdf') and cdf:
            z = squeeze(self.cdf(Data(bincenters)))
            ax.plot(bincenters, z, 'k.-', linewidth=2)
            if plotlegend:
                plt.legend(('p.d.f.', 'c.d.f.', 'Histogram'), frameon=False)
        elif plotlegend:
            plt.legend(('p.d.f.', 'Histogram'), frameon=False)

        ax.set_xlabel('x')
        ax.set_ylabel('Probability')
        ax.set_xlim(min(x), max(x))
        ax.grid(True)
Ejemplo n.º 8
0
    def stack(self, dat):
        """
        Stacks the current dataset with a copy of the dataset dat. Both must
        have the same number of examples.

        :param dat: Other data object with the same number of examples
        :type dat: natter.DataModule.Data
        """

        if dat.numex() != self.numex():
            raise Errors.DimensionalityError(
                'Number of examples of two datasets do not match!')

        self.X = vstack((self.X, dat.copy().X))

        self.addToHistory([
            'Stacked with dataset %s with history' % (dat.name),
            list(dat.history)
        ])

        return self
Ejemplo n.º 9
0
def SSA2D(linearfilter=None, data=None, *args, **kwargs):
    """
    Creates a nonlinear filter either from the given linear SSA filter
    or learns the linear filter on given data set using the
    LinearTransformFactory.SSA() method. The SSA2D filter computes the
    sum of the squared responses of 1st and 2nd, 3rd and 4th, ...
    component thus returns n/2 dimensions.

    :param linearfilter: the linear filter stage of the nonlinear filter
    :type linearfilter: natter.Transforms.LinearTransform
    :param data: Alternatively data on which the linear filter is learned
    :type data: natter.DataModule.Data

    :returns: bib-linear transform
    :rtype: natter.Transforms.NonlinearTransform

    """
    if linearfilter is None and not data is None:
        U = SSA(data, *args, **kwargs)
    elif not linearfilter is None:
        U = linearfilter
    else:
        raise ValueError(
            'in NonlinearTransformFactory.SSA2D both linearfilter and data cannot be None'
        )

    if mod(U.W.shape[0], 2) == 1:
        raise Errors.DimensionalityError(
            'Transform must have even dimension number')

    g = ElementWise(lambda x: x**2)
    g.name = 'Elementwise squaring'
    M = LinearTransform(eye(U.W.shape[0]).reshape(U.W.shape[0] // 2, 2,
                                                  U.W.shape[0]).sum(1),
                        name='Summing over 2D subspaces')
    nonlinearfilter = M * g * U
    nonlinearfilter.name = '2D SSA filter'

    return nonlinearfilter
Ejemplo n.º 10
0
    def logDetJacobian(self, dat=None):
        """
        Computes the determinant of the logarithm of the Jacobians
        determinant for the linear transformation (which is in this
        case only the log-determinant of W). If *dat* is specified it
        returns as many copies of the log determinant as there are
        data points in *dat*.

        :param dat: Data for which the log-det-Jacobian is to be computed.
        :type dat: natter.DataModule.Data
        :returns: The log-det-Jacobian
        :rtype: float (if dat=None) or numpy.array (if dat!=None)

        """

        sh = shape(self.W)
        if sh[0] == sh[1]:
            if dat == None:
                return log(abs(det(self.W)))
            else:
                return array(dat.size(1) * [log(abs(det(self.W)))])
        else:
            raise Errors.DimensionalityError(
                'Can only compute log det of square filter matrix')
Ejemplo n.º 11
0
def fminboundnD(f, x0, LB, UB, tol=1e-3, *args):
    """

    Multidimensional gradient free optimization with box constraints on the variables.


    I ported this function from a Matlab function someone else posted
    in the internet. Unfortunately, I cannot find the source anymore.
    Who was it and reference him. If you were the author, please
    contact me and you get acknowledged ([email protected]).

    :param f: function to be minimized (takes a vector x and args)
    :type f: python function
    :param x0: starting value
    :type x0: numpy.ndarray
    :param LB: lower bounds
    :type LB: numpy.ndarray
    :param UB: upper bounds
    :type UB: numpy.ndarray
    :param tol: convergence tolerance
    :type tol: float
    :param args:  additional parameters for the function f
    :type args: list

    :returns: optimized x
    :rtype: numpy.ndarray
    """
    xsize = shape(x0)
    x0 = x0.flatten()
    n = len(x0)

    if (n != len(LB)) or (n != len(UB)):
        raise Errors.DimensionalityError(
            'x0 is incompatible in size with either LB or UB.')

    # 0 --> unconstrained variable
    # 1 --> lower bound only
    # 2 --> upper bound only
    # 3 --> dual finite bounds
    # 4 --> fixed variable
    BoundClass = [0] * n

    for i in xrange(n):
        k = isfinite(LB[i]) + 2 * isfinite(UB[i])
        BoundClass[i] = k
        if (k == 3) and (LB[i] == UB[i]):
            BoundClass[i] = 4

    # transform starting values into their unconstrained
    # surrogates. Check for infeasible starting guesses.
    x0u = x0.copy()
    k = 0

    for i in xrange(n):
        if BoundClass[i] == 1:
            # lower bound only
            if x0[i] <= LB[i]:
                # infeasible starting value. Use bound.
                x0u[k] = 0.0
            else:
                x0u[k] = abs(x0[i] - LB[i])
            k += 1
        elif BoundClass[i] == 2:
            # upper bound only
            if x0[i] >= UB[i]:
                # infeasible starting value. use bound.
                x0u[k] = 0.0
            else:
                x0u[k] = abs(UB[i] - x0[i])
            k += 1
        elif BoundClass[i] == 3:
            # lower and upper bounds
            if x0[i] <= LB[i]:
                # infeasible starting value
                x0u[k] = -pi / 2.0
            elif x0[i] >= UB[i]:
                # infeasible starting value
                x0u[k] = pi / 2
            else:
                x0u[k] = 2 * (x0[i] - LB[i]) / (UB[i] - LB[i]) - 1.0
                x0u[k] = 2.0 * pi + arcsin(
                    max(array([-1.0, min(array([1.0, x0u[k]]))])))
            k += 1
        elif BoundClass[i] == 0:
            x0u[k] = x0[i]
            k += 1

    if k <= n:
        x0u = x0u[:k]

    # were all the variables fixed?
    if len(x0u) == 0:
        # All variables were fixed. quit immediately, setting the
        # appropriate parameters, then return.

        # undo the variable transformations into the original space
        x = _xtransform(x0u, LB, UB, BoundClass, n)

        # final reshape
        x = reshape(x, xsize)
        return x

    # now we can call fmin
    f2 = lambda t: f(_xtransform(t, LB, UB, BoundClass, n), *args)

    xu = fmin(f2, x0u, xtol=tol, *args)
    # undo the variable transformations into the original space
    x = _xtransform(xu, LB, UB, BoundClass, n)

    # final reshape
    return reshape(x, xsize)