Ejemplo n.º 1
0
    def __init__(self, input_dim, connections):
        """Create a generic switchboard.

        The input and output dimension as well as dtype have to be fixed
        at initialization time.

        Keyword arguments:
        input_dim -- Dimension of the input data (number of connections).
        connections -- 1d Array or sequence with an entry for each output
            connection, containing the corresponding index of the
            input connection.
        """
        # check connections for inconsistencies
        if len(connections) == 0:
            err = "Received empty connection list."
            raise SwitchboardException(err)
        if numx.nanmax(connections) >= input_dim:
            err = ("One or more switchboard connection "
                   "indices exceed the input dimension.")
            raise SwitchboardException(err)
        # checks passed
        self.connections = numx.array(connections)
        output_dim = len(connections)
        super(Switchboard, self).__init__(input_dim=input_dim,
                                          output_dim=output_dim)
        # try to invert connections
        if self.input_dim == self.output_dim and len(
                numx.unique(self.connections)) == self.input_dim:
            self.inverse_connections = numx.argsort(self.connections)
        else:
            self.inverse_connections = None
Ejemplo n.º 2
0
    def _execute(self, x):
        #----------------------------------------------------
        # similar algorithm to that within self.stop_training()
        #  refer there for notes & comments on code
        #----------------------------------------------------
        N = self.data.shape[0]
        Nx = x.shape[0]
        W = numx.zeros((Nx, N), dtype=self.dtype)

        k, r = self.k, self.r
        d_out = self.output_dim
        Q_diag_idx = numx.arange(k)

        for row in range(Nx):
            #find nearest neighbors of x in M
            M_xi = self.data - x[row]
            nbrs = numx.argsort((M_xi**2).sum(1))[:k]
            M_xi = M_xi[nbrs]

            #find corrected covariance matrix Q
            Q = mult(M_xi, M_xi.T)
            if r is None and k > d_out:
                sig2 = (svd(M_xi, compute_uv=0))**2
                r = numx.sum(sig2[d_out:])
                Q[Q_diag_idx, Q_diag_idx] += r
            if r is not None:
                Q[Q_diag_idx, Q_diag_idx] += r

            #solve for weights
            w = self._refcast(numx_linalg.solve(Q, numx.ones(k)))
            w /= w.sum()
            W[row, nbrs] = w

        #multiply weights by result of SVD from training
        return numx.dot(W, self.training_projection)
Ejemplo n.º 3
0
    def _execute(self, x):
        #----------------------------------------------------
        # similar algorithm to that within self.stop_training()
        #  refer there for notes & comments on code
        #----------------------------------------------------
        N = self.data.shape[0]
        Nx = x.shape[0]
        W = numx.zeros((Nx, N), dtype=self.dtype)

        k, r = self.k, self.r
        d_out = self.output_dim
        Q_diag_idx = numx.arange(k)

        for row in range(Nx):
            #find nearest neighbors of x in M
            M_xi = self.data-x[row]
            nbrs = numx.argsort( (M_xi**2).sum(1) )[:k]
            M_xi = M_xi[nbrs]

            #find corrected covariance matrix Q
            Q = mult(M_xi, M_xi.T)
            if r is None and k > d_out:
                sig2 = (svd(M_xi, compute_uv=0))**2
                r = numx.sum(sig2[d_out:])
                Q[Q_diag_idx, Q_diag_idx] += r
            if r is not None:
                Q[Q_diag_idx, Q_diag_idx] += r

            #solve for weights
            w = self._refcast(numx_linalg.solve(Q , numx.ones(k)))
            w /= w.sum()
            W[row, nbrs] = w

        #multiply weights by result of SVD from training
        return numx.dot(W, self.training_projection)
Ejemplo n.º 4
0
    def __init__(self, input_dim, connections):
        """Create a generic switchboard.

        The input and output dimension as well as dtype have to be fixed
        at initialization time.

        Keyword arguments:
        input_dim -- Dimension of the input data (number of connections).
        connections -- 1d Array or sequence with an entry for each output
            connection, containing the corresponding index of the
            input connection.
        """
        # check connections for inconsistencies
        if len(connections) == 0:
            err = "Received empty connection list."
            raise SwitchboardException(err)
        if numx.nanmax(connections) >= input_dim:
            err = ("One or more switchboard connection "
                   "indices exceed the input dimension.")
            raise SwitchboardException(err)
        # checks passed
        self.connections = numx.array(connections)
        output_dim = len(connections)
        super(Switchboard, self).__init__(input_dim=input_dim,
                                          output_dim=output_dim)
        # try to invert connections
        if (self.input_dim == self.output_dim and
            len(numx.unique(self.connections)) == self.input_dim):
            self.inverse_connections = numx.argsort(self.connections)
        else:
            self.inverse_connections = None
Ejemplo n.º 5
0
 def _inverse(self, x):
     """Take the mean of overlapping values."""
     n_y_cons = numx.bincount(self.connections)  # n. connections to y_i
     y_cons = numx.argsort(self.connections)  # x indices for y_i
     y = numx.zeros((len(x), self.input_dim))
     i_x_counter = 0  # counter for processed x indices
     i_y = 0  # current y index
     while True:
         n_cons = n_y_cons[i_y]
         if n_cons > 0:
             y[:, i_y] = old_div(
                 numx.sum(x[:, y_cons[i_x_counter:i_x_counter + n_cons]],
                          axis=1), n_cons)
             i_x_counter += n_cons
             if i_x_counter >= self.output_dim:
                 break
         i_y += 1
     return y
Ejemplo n.º 6
0
 def _inverse(self, x):
     """Take the mean of overlapping values."""
     n_y_cons = numx.bincount(self.connections)  # n. connections to y_i
     y_cons = numx.argsort(self.connections)  # x indices for y_i
     y = numx.zeros((len(x), self.input_dim))
     i_x_counter = 0  # counter for processed x indices
     i_y = 0  # current y index
     while True:
         n_cons = n_y_cons[i_y]
         if n_cons > 0:
             y[:,i_y] = old_div(numx.sum(x[:,y_cons[i_x_counter:
                                            i_x_counter + n_cons]],
                                 axis=1), n_cons)
             i_x_counter += n_cons
             if i_x_counter >= self.output_dim:
                 break
         i_y += 1
     return y
Ejemplo n.º 7
0
    def _stop_training(self):
        Cumulator._stop_training(self)

        if self.verbose:
            msg = ('training LLE on %i points'
                   ' in %i dimensions...' %
                   (self.data.shape[0], self.data.shape[1]))
            print msg

        # some useful quantities
        M = self.data
        N = M.shape[0]
        k = self.k
        r = self.r

        # indices of diagonal elements
        W_diag_idx = numx.arange(N)
        Q_diag_idx = numx.arange(k)

        if k > N:
            err = ('k=%i must be less than or '
                   'equal to number of training points N=%i' % (k, N))
            raise TrainingException(err)

        # determines number of output dimensions: if desired_variance
        # is specified, we need to learn it from the data. Otherwise,
        # it's easy
        learn_outdim = False
        if self.output_dim is None:
            if self.desired_variance is None:
                self.output_dim = self.input_dim
            else:
                learn_outdim = True

        # do we need to automatically determine the regularization term?
        auto_reg = r is None

        # determine number of output dims, precalculate useful stuff
        if learn_outdim:
            Qs, sig2s, nbrss = self._adjust_output_dim()

        # build the weight matrix
        #XXX future work:
        #XXX   for faster implementation, W should be a sparse matrix
        W = numx.zeros((N, N), dtype=self.dtype)

        if self.verbose:
            print ' - constructing [%i x %i] weight matrix...' % W.shape

        for row in range(N):
            if learn_outdim:
                Q = Qs[row, :, :]
                nbrs = nbrss[row, :]
            else:
                # -----------------------------------------------
                #  find k nearest neighbors
                # -----------------------------------------------
                M_Mi = M - M[row]
                nbrs = numx.argsort((M_Mi**2).sum(1))[1:k + 1]
                M_Mi = M_Mi[nbrs]
                # compute covariance matrix of distances
                Q = mult(M_Mi, M_Mi.T)

            # -----------------------------------------------
            #  compute weight vector based on neighbors
            # -----------------------------------------------

            #Covariance matrix may be nearly singular:
            # add a diagonal correction to prevent numerical errors
            if auto_reg:
                # automatic mode: correction is equal to the sum of
                # the (d_in-d_out) unused variances (as in deRidder &
                # Duin)
                if learn_outdim:
                    sig2 = sig2s[row, :]
                else:
                    sig2 = svd(M_Mi, compute_uv=0)**2
                r = numx.sum(sig2[self.output_dim:])
                Q[Q_diag_idx, Q_diag_idx] += r
            else:
                # Roweis et al instead use "a correction that
                #   is small compared to the trace" e.g.:
                # r = 0.001 * float(Q.trace())
                # this is equivalent to assuming 0.1% of the variance is unused
                Q[Q_diag_idx, Q_diag_idx] += r * Q.trace()

            #solve for weight
            # weight is w such that sum(Q_ij * w_j) = 1 for all i
            # XXX refcast is due to numpy bug: floats become double
            w = self._refcast(numx_linalg.solve(Q, numx.ones(k)))
            w /= w.sum()

            #update row of the weight matrix
            W[nbrs, row] = w

        if self.verbose:
            msg = (' - finding [%i x %i] null space of weight matrix\n'
                   '     (may take a while)...' % (self.output_dim, N))
            print msg

        self.W = W.copy()
        #to find the null space, we need the bottom d+1
        #  eigenvectors of (W-I).T*(W-I)
        #Compute this using the svd of (W-I):
        W[W_diag_idx, W_diag_idx] -= 1.

        #XXX future work:
        #XXX  use of upcoming ARPACK interface for bottom few eigenvectors
        #XXX   of a sparse matrix will significantly increase the speed
        #XXX   of the next step
        if self.svd:
            sig, U = nongeneral_svd(W.T, range=(2, self.output_dim + 1))
        else:
            # the following code does the same computation, but uses
            # symeig, which computes only the required eigenvectors, and
            # is much faster. However, it could also be more unstable...
            WW = mult(W, W.T)
            # regularizes the eigenvalues, does not change the eigenvectors:
            WW[W_diag_idx, W_diag_idx] += 0.1
            sig, U = symeig(WW, range=(2, self.output_dim + 1), overwrite=True)

        self.training_projection = U
Ejemplo n.º 8
0
    def _stop_training(self):
        Cumulator._stop_training(self)

        k = self.k
        M = self.data
        N = M.shape[0]

        if k > N:
            err = ('k=%i must be less than'
                   ' or equal to number of training points N=%i' % (k, N))
            raise TrainingException(err)

        if self.verbose:
            print 'performing HLLE on %i points in %i dimensions...' % M.shape

        # determines number of output dimensions: if desired_variance
        # is specified, we need to learn it from the data. Otherwise,
        # it's easy
        learn_outdim = False
        if self.output_dim is None:
            if self.desired_variance is None:
                self.output_dim = self.input_dim
            else:
                learn_outdim = True

        # determine number of output dims, precalculate useful stuff
        if learn_outdim:
            Qs, sig2s, nbrss = self._adjust_output_dim()

        d_out = self.output_dim

        #dp = d_out + (d_out-1) + (d_out-2) + ...
        dp = d_out * (d_out + 1) / 2

        if min(k, N) <= d_out:
            err = ('k=%i and n=%i (number of input data points) must be'
                   ' larger than output_dim=%i' % (k, N, d_out))
            raise TrainingException(err)

        if k < 1 + d_out + dp:
            wrn = ('The number of neighbours, k=%i, is smaller than'
                   ' 1 + output_dim + output_dim*(output_dim+1)/2 = %i,'
                   ' which might result in unstable results.' %
                   (k, 1 + d_out + dp))
            _warnings.warn(wrn, MDPWarning)

        #build the weight matrix
        #XXX   for faster implementation, W should be a sparse matrix
        W = numx.zeros((N, dp * N), dtype=self.dtype)

        if self.verbose:
            print ' - constructing [%i x %i] weight matrix...' % W.shape

        for row in range(N):
            if learn_outdim:
                nbrs = nbrss[row, :]
            else:
                # -----------------------------------------------
                #  find k nearest neighbors
                # -----------------------------------------------
                M_Mi = M - M[row]
                nbrs = numx.argsort((M_Mi**2).sum(1))[1:k + 1]

            #-----------------------------------------------
            #  center the neighborhood using the mean
            #-----------------------------------------------
            nbrhd = M[nbrs]  # this makes a copy
            nbrhd -= nbrhd.mean(0)

            #-----------------------------------------------
            #  compute local coordinates
            #   using a singular value decomposition
            #-----------------------------------------------
            U, sig, VT = svd(nbrhd)
            nbrhd = U.T[:d_out]
            del VT

            #-----------------------------------------------
            #  build Hessian estimator
            #-----------------------------------------------
            Yi = numx.zeros((dp, k), dtype=self.dtype)
            ct = 0
            for i in range(d_out):
                Yi[ct:ct + d_out - i, :] = nbrhd[i] * nbrhd[i:, :]
                ct += d_out - i
            Yi = numx.concatenate(
                [numx.ones((1, k), dtype=self.dtype), nbrhd, Yi], 0)

            #-----------------------------------------------
            #  orthogonalize linear and quadratic forms
            #   with QR factorization
            #  and make the weights sum to 1
            #-----------------------------------------------
            if k >= 1 + d_out + dp:
                Q, R = numx_linalg.qr(Yi.T)
                w = Q[:, d_out + 1:d_out + 1 + dp]
            else:
                q, r = _mgs(Yi.T)
                w = q[:, -dp:]

            S = w.sum(0)  #sum along columns
            #if S[i] is too small, set it equal to 1.0
            # this prevents weights from blowing up
            S[numx.where(numx.absolute(S) < 1E-4)] = 1.0
            #print w.shape, S.shape, (w/S).shape
            #print W[nbrs, row*dp:(row+1)*dp].shape
            W[nbrs, row * dp:(row + 1) * dp] = w / S

        #-----------------------------------------------
        # To find the null space, we want the
        #  first d+1 eigenvectors of W.T*W
        # Compute this using an svd of W
        #-----------------------------------------------

        if self.verbose:
            msg = (' - finding [%i x %i] '
                   'null space of weight matrix...' % (d_out, N))
            print msg

        #XXX future work:
        #XXX  use of upcoming ARPACK interface for bottom few eigenvectors
        #XXX   of a sparse matrix will significantly increase the speed
        #XXX   of the next step

        if self.svd:
            sig, U = nongeneral_svd(W.T, range=(2, d_out + 1))
            Y = U * numx.sqrt(N)
        else:
            WW = mult(W, W.T)
            # regularizes the eigenvalues, does not change the eigenvectors:
            W_diag_idx = numx.arange(N)
            WW[W_diag_idx, W_diag_idx] += 0.01
            sig, U = symeig(WW, range=(2, self.output_dim + 1), overwrite=True)
            Y = U * numx.sqrt(N)
            del WW
        del W

        #-----------------------------------------------
        # Normalize Y
        #
        # Alternative way to do it:
        #  we need R = (Y.T*Y)^(-1/2)
        #   do this with an SVD of Y            del VT

        #      Y = U*sig*V.T
        #      Y.T*Y = (V*sig.T*U.T) * (U*sig*V.T)
        #            = V * (sig*sig.T) * V.T
        #            = V * sig^2 V.T
        #   so
        #      R = V * sig^-1 * V.T
        # The code is:
        #    U, sig, VT = svd(Y)
        #    del U
        #    S = numx.diag(sig**-1)
        #    self.training_projection = mult(Y, mult(VT.T, mult(S, VT)))
        #-----------------------------------------------
        if self.verbose:
            print ' - normalizing null space...'

        C = sqrtm(mult(Y.T, Y))
        self.training_projection = mult(Y, C)
Ejemplo n.º 9
0
    def _adjust_output_dim(self):
        # this function is called if we need to compute the number of
        # output dimensions automatically; some quantities that are
        # useful later are pre-calculated to spare precious time

        if self.verbose:
            print ' - adjusting output dim:'

        #otherwise, we need to compute output_dim
        #                  from desired_variance
        M = self.data
        k = self.k
        N, d_in = M.shape

        m_est_array = []
        Qs = numx.zeros((N, k, k))
        sig2s = numx.zeros((N, d_in))
        nbrss = numx.zeros((N, k), dtype='i')

        for row in range(N):
            #-----------------------------------------------
            #  find k nearest neighbors
            #-----------------------------------------------
            M_Mi = M - M[row]
            nbrs = numx.argsort((M_Mi**2).sum(1))[1:k + 1]
            M_Mi = M_Mi[nbrs]
            # compute covariance matrix of distances
            Qs[row, :, :] = mult(M_Mi, M_Mi.T)
            nbrss[row, :] = nbrs

            #-----------------------------------------------
            # singular values of M_Mi give the variance:
            #   use this to compute intrinsic dimensionality
            #   at this point
            #-----------------------------------------------
            sig2 = (svd(M_Mi, compute_uv=0))**2
            sig2s[row, :sig2.shape[0]] = sig2

            #-----------------------------------------------
            # use sig2 to compute intrinsic dimensionality of the
            #   data at this neighborhood.  The dimensionality is the
            #   number of eigenvalues needed to sum to the total
            #   desired variance
            #-----------------------------------------------
            sig2 /= sig2.sum()
            S = sig2.cumsum()
            m_est = S.searchsorted(self.desired_variance)
            if m_est > 0:
                m_est += (self.desired_variance - S[m_est - 1]) / sig2[m_est]
            else:
                m_est = self.desired_variance / sig2[m_est]
            m_est_array.append(m_est)

        m_est_array = numx.asarray(m_est_array)
        self.output_dim = int(numx.ceil(numx.median(m_est_array)))
        if self.verbose:
            msg = ('      output_dim = %i'
                   ' for variance of %.2f' %
                   (self.output_dim, self.desired_variance))
            print msg

        return Qs, sig2s, nbrss
Ejemplo n.º 10
0
    def _stop_training(self):
        Cumulator._stop_training(self)

        if self.verbose:
            msg = ('training LLE on %i points'
                   ' in %i dimensions...' % (self.data.shape[0],
                                             self.data.shape[1]))
            print msg

        # some useful quantities
        M = self.data
        N = M.shape[0]
        k = self.k
        r = self.r

        # indices of diagonal elements
        W_diag_idx = numx.arange(N)
        Q_diag_idx = numx.arange(k)

        if k > N:
            err = ('k=%i must be less than or '
                   'equal to number of training points N=%i' % (k, N))
            raise TrainingException(err)

        # determines number of output dimensions: if desired_variance
        # is specified, we need to learn it from the data. Otherwise,
        # it's easy
        learn_outdim = False
        if self.output_dim is None:
            if self.desired_variance is None:
                self.output_dim = self.input_dim
            else:
                learn_outdim = True

        # do we need to automatically determine the regularization term?
        auto_reg = r is None

        # determine number of output dims, precalculate useful stuff
        if learn_outdim:
            Qs, sig2s, nbrss = self._adjust_output_dim()

        # build the weight matrix
        #XXX future work:
        #XXX   for faster implementation, W should be a sparse matrix
        W = numx.zeros((N, N), dtype=self.dtype)

        if self.verbose:
            print ' - constructing [%i x %i] weight matrix...' % W.shape

        for row in range(N):
            if learn_outdim:
                Q = Qs[row, :, :]
                nbrs = nbrss[row, :]
            else:
                # -----------------------------------------------
                #  find k nearest neighbors
                # -----------------------------------------------
                M_Mi = M-M[row]
                nbrs = numx.argsort((M_Mi**2).sum(1))[1:k+1]
                M_Mi = M_Mi[nbrs]
                # compute covariance matrix of distances
                Q = mult(M_Mi, M_Mi.T)

            # -----------------------------------------------
            #  compute weight vector based on neighbors
            # -----------------------------------------------

            #Covariance matrix may be nearly singular:
            # add a diagonal correction to prevent numerical errors
            if auto_reg:
                # automatic mode: correction is equal to the sum of
                # the (d_in-d_out) unused variances (as in deRidder &
                # Duin)
                if learn_outdim:
                    sig2 = sig2s[row, :]
                else:
                    sig2 = svd(M_Mi, compute_uv=0)**2
                r = numx.sum(sig2[self.output_dim:])
                Q[Q_diag_idx, Q_diag_idx] += r
            else:
                # Roweis et al instead use "a correction that
                #   is small compared to the trace" e.g.:
                # r = 0.001 * float(Q.trace())
                # this is equivalent to assuming 0.1% of the variance is unused
                Q[Q_diag_idx, Q_diag_idx] += r*Q.trace()

            #solve for weight
            # weight is w such that sum(Q_ij * w_j) = 1 for all i
            # XXX refcast is due to numpy bug: floats become double
            w = self._refcast(numx_linalg.solve(Q, numx.ones(k)))
            w /= w.sum()

            #update row of the weight matrix
            W[nbrs, row] = w

        if self.verbose:
            msg = (' - finding [%i x %i] null space of weight matrix\n'
                   '     (may take a while)...' % (self.output_dim, N))
            print msg

        self.W = W.copy()
        #to find the null space, we need the bottom d+1
        #  eigenvectors of (W-I).T*(W-I)
        #Compute this using the svd of (W-I):
        W[W_diag_idx, W_diag_idx] -= 1.

        #XXX future work:
        #XXX  use of upcoming ARPACK interface for bottom few eigenvectors
        #XXX   of a sparse matrix will significantly increase the speed
        #XXX   of the next step
        if self.svd:
            sig, U = nongeneral_svd(W.T, range=(2, self.output_dim+1))
        else:
            # the following code does the same computation, but uses
            # symeig, which computes only the required eigenvectors, and
            # is much faster. However, it could also be more unstable...
            WW = mult(W, W.T)
            # regularizes the eigenvalues, does not change the eigenvectors:
            WW[W_diag_idx, W_diag_idx] += 0.1
            sig, U = symeig(WW, range=(2, self.output_dim+1), overwrite=True)

        self.training_projection = U
Ejemplo n.º 11
0
    def _stop_training(self):
        Cumulator._stop_training(self)

        k = self.k
        M = self.data
        N = M.shape[0]

        if k > N:
            err = ('k=%i must be less than'
                   ' or equal to number of training points N=%i' % (k, N))
            raise TrainingException(err)

        if self.verbose:
            print 'performing HLLE on %i points in %i dimensions...' % M.shape

        # determines number of output dimensions: if desired_variance
        # is specified, we need to learn it from the data. Otherwise,
        # it's easy
        learn_outdim = False
        if self.output_dim is None:
            if self.desired_variance is None:
                self.output_dim = self.input_dim
            else:
                learn_outdim = True

        # determine number of output dims, precalculate useful stuff
        if learn_outdim:
            Qs, sig2s, nbrss = self._adjust_output_dim()

        d_out = self.output_dim

        #dp = d_out + (d_out-1) + (d_out-2) + ...
        dp = d_out*(d_out+1)/2

        if min(k, N) <= d_out:
            err = ('k=%i and n=%i (number of input data points) must be'
                   ' larger than output_dim=%i' % (k, N, d_out))
            raise TrainingException(err)

        if k < 1+d_out+dp:
            wrn = ('The number of neighbours, k=%i, is smaller than'
                   ' 1 + output_dim + output_dim*(output_dim+1)/2 = %i,'
                   ' which might result in unstable results.'
                   % (k, 1+d_out+dp))
            _warnings.warn(wrn, MDPWarning)

        #build the weight matrix
        #XXX   for faster implementation, W should be a sparse matrix
        W = numx.zeros((N, dp*N), dtype=self.dtype)

        if self.verbose:
            print ' - constructing [%i x %i] weight matrix...' % W.shape

        for row in range(N):
            if learn_outdim:
                nbrs = nbrss[row, :]
            else:
                # -----------------------------------------------
                #  find k nearest neighbors
                # -----------------------------------------------
                M_Mi = M-M[row]
                nbrs = numx.argsort((M_Mi**2).sum(1))[1:k+1]

            #-----------------------------------------------
            #  center the neighborhood using the mean
            #-----------------------------------------------
            nbrhd = M[nbrs] # this makes a copy
            nbrhd -= nbrhd.mean(0)

            #-----------------------------------------------
            #  compute local coordinates
            #   using a singular value decomposition
            #-----------------------------------------------
            U, sig, VT = svd(nbrhd)
            nbrhd = U.T[:d_out]
            del VT

            #-----------------------------------------------
            #  build Hessian estimator
            #-----------------------------------------------
            Yi = numx.zeros((dp, k), dtype=self.dtype)
            ct = 0
            for i in range(d_out):
                Yi[ct:ct+d_out-i, :] = nbrhd[i] * nbrhd[i:, :]
                ct += d_out-i
            Yi = numx.concatenate([numx.ones((1, k), dtype=self.dtype),
                                   nbrhd, Yi], 0)

            #-----------------------------------------------
            #  orthogonalize linear and quadratic forms
            #   with QR factorization
            #  and make the weights sum to 1
            #-----------------------------------------------
            if k >= 1+d_out+dp:
                Q, R = numx_linalg.qr(Yi.T)
                w = Q[:, d_out+1:d_out+1+dp]
            else:
                q, r = _mgs(Yi.T)
                w = q[:, -dp:]

            S = w.sum(0) #sum along columns
            #if S[i] is too small, set it equal to 1.0
            # this prevents weights from blowing up
            S[numx.where(numx.absolute(S)<1E-4)] = 1.0
            #print w.shape, S.shape, (w/S).shape
            #print W[nbrs, row*dp:(row+1)*dp].shape
            W[nbrs, row*dp:(row+1)*dp] = w / S

        #-----------------------------------------------
        # To find the null space, we want the
        #  first d+1 eigenvectors of W.T*W
        # Compute this using an svd of W
        #-----------------------------------------------

        if self.verbose:
            msg = (' - finding [%i x %i] '
                   'null space of weight matrix...' % (d_out, N))
            print msg

        #XXX future work:
        #XXX  use of upcoming ARPACK interface for bottom few eigenvectors
        #XXX   of a sparse matrix will significantly increase the speed
        #XXX   of the next step

        if self.svd:
            sig, U = nongeneral_svd(W.T, range=(2, d_out+1))
            Y = U*numx.sqrt(N)
        else:
            WW = mult(W, W.T)
            # regularizes the eigenvalues, does not change the eigenvectors:
            W_diag_idx = numx.arange(N)
            WW[W_diag_idx, W_diag_idx] += 0.01
            sig, U = symeig(WW, range=(2, self.output_dim+1), overwrite=True)
            Y = U*numx.sqrt(N)
            del WW
        del W

        #-----------------------------------------------
        # Normalize Y
        #
        # Alternative way to do it:
        #  we need R = (Y.T*Y)^(-1/2)
        #   do this with an SVD of Y            del VT

        #      Y = U*sig*V.T
        #      Y.T*Y = (V*sig.T*U.T) * (U*sig*V.T)
        #            = V * (sig*sig.T) * V.T
        #            = V * sig^2 V.T
        #   so
        #      R = V * sig^-1 * V.T
        # The code is:
        #    U, sig, VT = svd(Y)
        #    del U
        #    S = numx.diag(sig**-1)
        #    self.training_projection = mult(Y, mult(VT.T, mult(S, VT)))
        #-----------------------------------------------
        if self.verbose:
            print ' - normalizing null space...'

        C = sqrtm(mult(Y.T, Y))
        self.training_projection = mult(Y, C)
Ejemplo n.º 12
0
    def _adjust_output_dim(self):
        # this function is called if we need to compute the number of
        # output dimensions automatically; some quantities that are
        # useful later are pre-calculated to spare precious time

        if self.verbose:
            print ' - adjusting output dim:'

        #otherwise, we need to compute output_dim
        #                  from desired_variance
        M = self.data
        k = self.k
        N, d_in = M.shape

        m_est_array = []
        Qs = numx.zeros((N, k, k))
        sig2s = numx.zeros((N, d_in))
        nbrss = numx.zeros((N, k), dtype='i')

        for row in range(N):
            #-----------------------------------------------
            #  find k nearest neighbors
            #-----------------------------------------------
            M_Mi = M-M[row]
            nbrs = numx.argsort((M_Mi**2).sum(1))[1:k+1]
            M_Mi = M_Mi[nbrs]
            # compute covariance matrix of distances
            Qs[row, :, :] = mult(M_Mi, M_Mi.T)
            nbrss[row, :] = nbrs

            #-----------------------------------------------
            # singular values of M_Mi give the variance:
            #   use this to compute intrinsic dimensionality
            #   at this point
            #-----------------------------------------------
            sig2 = (svd(M_Mi, compute_uv=0))**2
            sig2s[row, :sig2.shape[0]] = sig2

            #-----------------------------------------------
            # use sig2 to compute intrinsic dimensionality of the
            #   data at this neighborhood.  The dimensionality is the
            #   number of eigenvalues needed to sum to the total
            #   desired variance
            #-----------------------------------------------
            sig2 /= sig2.sum()
            S = sig2.cumsum()
            m_est = S.searchsorted(self.desired_variance)
            if m_est > 0:
                m_est += (self.desired_variance-S[m_est-1])/sig2[m_est]
            else:
                m_est = self.desired_variance/sig2[m_est]
            m_est_array.append(m_est)

        m_est_array = numx.asarray(m_est_array)
        self.output_dim = int( numx.ceil( numx.median(m_est_array) ) )
        if self.verbose:
            msg = ('      output_dim = %i'
                   ' for variance of %.2f' % (self.output_dim,
                                              self.desired_variance))
            print msg

        return Qs, sig2s, nbrss