Exemple #1
0
def test_circular_flow():
    flow = CircularOnlineFlow([
        BogusNode(input_dim=2, output_dim=2),
        BogusOnlineDiffDimNode(input_dim=2, output_dim=4),
        BogusNode(input_dim=4, output_dim=4),
        BogusOnlineDiffDimNode(input_dim=4, output_dim=2)
    ])

    for inp_node_idx in range(len(flow)):
        flow.set_input_node(inp_node_idx)
        for out_node_idx in range(len(flow)):
            flow.set_output_node(out_node_idx)
            inp = numx.ones((1, flow[0].input_dim))
            flow.train(inp)
            out = flow(inp)
            assert (out.shape[1] == flow[out_node_idx].output_dim)
            assert (flow.get_stored_input().shape[1] == flow[3].output_dim)

    flow = CircularOnlineFlow([
        BogusNode(),
        BogusOnlineNodeReturnSum(),
        BogusNode(),
        BogusOnlineNodeReturnSum()
    ])

    flow.set_flow_iterations(3)
    inp = numx.ones((1, 2))
    flow.train(inp)
    out = flow(inp)
    assert_array_equal(out, inp * 140)
    assert (flow[1].get_current_train_iteration() == flow._flow_iterations)
Exemple #2
0
def test_online_flow_node():
    rng = mdp.numx_rand.RandomState(seed=1)
    flow1 = OnlineFlow([
        BogusNode(input_dim=2, output_dim=2),
        BogusOnlineNode(input_dim=2, output_dim=2),
        BogusNode(input_dim=2, output_dim=2),
        BogusOnlineDiffDimNode(input_dim=2, output_dim=4),
    ])
    node1 = mdp.hinet.OnlineFlowNode(flow1, numx_rng=rng)

    flow2 = OnlineFlow([
        BogusNode(input_dim=2, output_dim=2),
        BogusOnlineNode(input_dim=2, output_dim=2),
        BogusNode(input_dim=2, output_dim=2),
        BogusOnlineDiffDimNode(input_dim=2, output_dim=4),
    ])
    node2 = mdp.hinet.FlowNode(flow2)

    # number of training phases = number of trainable nodes + 1(if the first node is not-trainable).
    assert (node1.get_remaining_train_phase() == 3)

    inp = numx.ones((2, 2))
    out1 = node1(inp)
    out2 = node2(inp)
    assert_array_equal(out1, out2)
    assert (node1.is_training())
    assert (flow1[1].is_training())
    assert (flow1[3].is_training())
    assert (not node2.is_training())
    for _n in node2.flow:
        assert (not _n.is_training())

    assert (node1.numx_rng == rng)
    assert (node1.numx_rng == node1._flow[1].numx_rng)
    assert (node1.numx_rng == node1._flow[3].numx_rng)

    flow = mdp.OnlineFlow([
        BogusNode(),
        BogusOnlineNodeReturnSum(),
        BogusOnlineNodeReturnSum(),
        BogusNode(input_dim=5, output_dim=5)
    ])

    node = mdp.hinet.OnlineFlowNode(flow)

    inp = numx.ones((1, 5))
    assert (flow[1].get_current_train_iteration() == 0)
    out = node(inp)
    out = node(inp)
    # check if all the node dimensions are fixed.
    for _n in flow:
        assert ((_n.input_dim, _n.output_dim) == (inp.shape[1], inp.shape[1]))
    assert ((node.input_dim, node.output_dim) == (inp.shape[1], inp.shape[1]))
    # check if only training was carried out once
    assert (flow[1].get_current_train_iteration() == 1)
Exemple #3
0
def test_flow_train_args():
    #check args len
    flow = _get_default_flow(node_class=BogusOnlineNodeWithTrainArgs)
    _args_needed = flow._train_args_needed_list
    _keys = flow._train_arg_keys_list
    assert (mdp.numx.all(_args_needed))
    for val in _keys:
        assert (len(val) == 2)
    flow = _get_default_flow()
    _args_needed = flow._train_args_needed_list
    _keys = flow._train_arg_keys_list
    assert (not mdp.numx.all(_args_needed))
    for val in _keys:
        assert (len(val) == 0)

    # train with args
    flow = _get_default_flow(node_class=BogusOnlineNodeWithTrainArgs)
    flow[-2] = BogusOnlineNode()

    inp = numx.ones((1, 3)) * 2
    x = [(inp, (1, 2), None, (3, 4))]
    flow.train(x)
    out = flow(inp)
    [assert_array_equal(f.sum, i + inp) for i, f in enumerate(flow)]
    assert_array_equal(out, len(flow) + inp)
    rec = flow.inverse(out)
    assert_array_equal(rec, inp)
Exemple #4
0
    def _execute(self, x):
        degree = self._degree
        dim = self.input_dim
        n = x.shape[1]

        # preallocate memory
        dexp = numx.zeros((self.output_dim, x.shape[0]), dtype=self.dtype)
        # copy monomials of degree 1
        dexp[0:n, :] = x.T

        k = n
        prec_end = 0
        next_lens = numx.ones((dim+1, ))
        next_lens[0] = 0
        for i in range(2, degree+1):
            prec_start = prec_end
            prec_end += nmonomials(i-1, dim)
            prec = dexp[prec_start:prec_end, :]

            lens = next_lens[:-1].cumsum(axis=0)
            next_lens = numx.zeros((dim+1, ))
            for j in range(dim):
                factor = prec[lens[j]:, :]
                len_ = factor.shape[0]
                dexp[k:k+len_, :] = x[:, j] * factor
                next_lens[j+1] = len_
                k = k+len_

        return dexp.T
Exemple #5
0
    def _train(self, x, labels):
        """Update the internal structures according to the input data 'x'.

        x -- a matrix having different variables on different columns
             and observations on the rows.
        labels -- can be a list, tuple or array of labels (one for each data point)
              or a single label, in which case all input data is assigned to
              the same class.
        """

        # if weights are not yet initialised, initialise them
        if not len(self.weights):
            self.weights = numx.ones(self.input_dim)

        for xi, labeli in mdp.utils.izip_stretched(x, labels):
            new_weights = self.weights
            new_offset = self.offset_weight

            rate = self.learning_rate * (labeli - self._label(xi))
            for j in range(self.input_dim):
                new_weights[j] = self.weights[j] + rate * xi[j]

            # the offset corresponds to a node with input 1 all the time
            new_offset = self.offset_weight + rate * 1

            self.weights = new_weights
            self.offset_weight = new_offset
Exemple #6
0
 def test_mixed_dict(self):
     """Test msg being a dict containing an array."""
     rescont = MessageResultContainer()
     msg1 = {
         "f": 2,
         "a": np.zeros((10, 3), 'int'),
         "b": "aaa",
         "c": 1,
     }
     msg2 = {
         "a": np.ones((15, 3), 'int'),
         "b": "bbb",
         "c": 3,
         "d": 1,
     }
     rescont.add_message(msg1)
     rescont.add_message(msg2)
     combined_msg = rescont.get_message()
     a = np.zeros((25, 3), 'int')
     a[10:] = 1
     reference_msg = {"a": a, "c": 4, "b": "aaabbb", "d": 1, "f": 2}
     assert np.all(reference_msg["a"] == reference_msg["a"])
     combined_msg.pop("a")
     reference_msg.pop("a")
     assert combined_msg == reference_msg
 def test_mixed_dict(self):
     """Test msg being a dict containing an array."""
     rescont = MessageResultContainer()
     msg1 = {
         "f": 2,
         "a": np.zeros((10,3), 'int'),
         "b": "aaa",
         "c": 1,
     }
     msg2 = {
         "a": np.ones((15,3), 'int'),
         "b": "bbb",
         "c": 3,
         "d": 1,
     }
     rescont.add_message(msg1)
     rescont.add_message(msg2)
     combined_msg = rescont.get_message()
     a = np.zeros((25,3), 'int')
     a[10:] = 1
     reference_msg = {"a": a, "c": 4, "b": "aaabbb", "d": 1, "f": 2}
     assert np.all(reference_msg["a"] == reference_msg["a"])
     combined_msg.pop("a")
     reference_msg.pop("a")
     assert combined_msg == reference_msg
def test_VartimeSFANode3():
    """Test whether different inputs for the same behavior result in the same
    output - without time dependence.
    """
    numx.random.seed(seed=10)
    # sample
    x1 = numx.random.random((1500, 2))
    x2 = numx.random.random((1500, 2))
    x3 = numx.random.random((1500, 2))
    xlen = x1.shape[0]
    dt_const = 1.
    dt_ones = numx.ones((xlen - 1, ))
    dt_none = None
    # initialize the nodes
    varsfa1 = VartimeSFANode()
    varsfa2 = VartimeSFANode()
    varsfa3 = VartimeSFANode()

    # update the estimators
    varsfa1.train(x1, dt=dt_const, time_dep=False)
    varsfa1.train(x2, dt=dt_const, time_dep=False)
    varsfa1.train(x3, dt=dt_const, time_dep=False)
    varsfa2.train(x1, dt=dt_ones, time_dep=False)
    varsfa2.train(x2, dt=dt_ones, time_dep=False)
    varsfa2.train(x3, dt=dt_ones, time_dep=False)
    varsfa3.train(x1, dt=dt_none, time_dep=False)
    varsfa3.train(x2, dt=dt_none, time_dep=False)
    varsfa3.train(x3, dt=dt_none, time_dep=False)
    # quit
    varsfa1.stop_training()
    varsfa2.stop_training()
    varsfa3.stop_training()

    assert_array_almost_equal(abs(varsfa1.sf), abs(varsfa2.sf), decimal=10)
    assert_array_almost_equal(abs(varsfa1.sf), abs(varsfa3.sf), decimal=10)
Exemple #9
0
 def _add_constant(self, x):
     """Add a constant term to the vector 'x'.
     x -> [1 x]
     """
     return numx.concatenate((numx.ones(
         (x.shape[0], 1), dtype=self.dtype), x),
                             axis=1)
Exemple #10
0
    def _train(self, x, labels):
        """Update the internal structures according to the input data 'x'.

        x -- a matrix having different variables on different columns
             and observations on the rows.
        labels -- can be a list, tuple or array of labels (one for each data point)
              or a single label, in which case all input data is assigned to
              the same class.
        """

        # if weights are not yet initialised, initialise them
        if not len(self.weights):
            self.weights = numx.ones(self.input_dim)

        for xi, labeli in mdp.utils.izip_stretched(x, labels):
            new_weights = self.weights
            new_offset = self.offset_weight

            rate = self.learning_rate * (labeli - self._label(xi))
            for j in range(self.input_dim):
                new_weights[j] = self.weights[j] + rate * xi[j]

            # the offset corresponds to a node with input 1 all the time
            new_offset = self.offset_weight + rate * 1

            self.weights = new_weights
            self.offset_weight = new_offset
def test_VartimeCovarianceMatrix6():
    """Test whether different inputs for the same behavior result in the same
    output - with time dependence.
    """
    numx.random.seed(seed=10)
    # sample
    x1 = numx.random.random((1500, 2))
    x2 = numx.random.random((1500, 2))
    x3 = numx.random.random((1500, 2))
    xlen = x1.shape[0]
    dt_const = 1.
    dt_ones = numx.ones((xlen, ))
    dt_none = None

    varcov1 = VartimeCovarianceMatrix()
    varcov2 = VartimeCovarianceMatrix()
    varcov3 = VartimeCovarianceMatrix()

    varcov1.update(x1, dt_const, time_dep=True)
    varcov2.update(x1, dt_ones[1:], time_dep=True)
    varcov3.update(x1, dt_none, time_dep=True)
    varcov1.update(x2, dt_const, time_dep=True)
    varcov2.update(x2, dt_ones, time_dep=True)
    varcov3.update(x2, dt_none, time_dep=True)
    varcov1.update(x3, dt_const, time_dep=True)
    varcov2.update(x3, dt_ones, time_dep=True)
    varcov3.update(x3, dt_none, time_dep=True)

    varC1, varAvg1, varTlen1 = varcov1.fix(center=True)
    varC2, varAvg2, varTlen2 = varcov2.fix(center=True)
    varC3, varAvg3, varTlen3 = varcov3.fix(center=True)

    assert_array_almost_equal(varC1, varC2, decimal=10)
    assert_array_almost_equal(varC1, varC3, decimal=10)
Exemple #12
0
def test_circular_online_flow_node_external_with_iterations():

    #external training with iterations.
    flow1 = mdp.CircularOnlineFlow([
        BogusNode(),
        BogusOnlineNodeReturnSum(),
        BogusNode(),
        BogusOnlineNodeReturnSum()
    ])
    flow_iters = 5
    flow1.set_flow_iterations(flow_iters)
    node1 = mdp.hinet.CircularOnlineFlowNode(
        flow1, numx_rng=mdp.numx_rand.RandomState(seed=1))
    flow2 = mdp.CircularOnlineFlow([
        BogusNode(),
        BogusOnlineNodeReturnSum(),
        BogusNode(),
        BogusOnlineNodeReturnSum()
    ])
    node2 = mdp.hinet.OnlineFlowNode(
        flow2, numx_rng=mdp.numx_rand.RandomState(seed=1))

    # number of phases = flow_iters * (number of trainable nodes + 1 if the first node is not trainable)
    assert (node1.get_remaining_train_phase() == 3 * flow_iters)

    inp = numx.ones((1, 2))
    out1 = node1(inp)  # One train (includes 3 iterations) and execute
    x = inp
    for _ in range(flow_iters):
        node2.train(x)
        x = node2.execute(x)
    assert_array_equal(out1, x)
    assert_array_equal(node1.get_stored_input(), x)
Exemple #13
0
    def _execute(self, x):
        #----------------------------------------------------
        # similar algorithm to that within self.stop_training()
        #  refer there for notes & comments on code
        #----------------------------------------------------
        N = self.data.shape[0]
        Nx = x.shape[0]
        W = numx.zeros((Nx, N), dtype=self.dtype)

        k, r = self.k, self.r
        d_out = self.output_dim
        Q_diag_idx = numx.arange(k)

        for row in range(Nx):
            #find nearest neighbors of x in M
            M_xi = self.data-x[row]
            nbrs = numx.argsort( (M_xi**2).sum(1) )[:k]
            M_xi = M_xi[nbrs]

            #find corrected covariance matrix Q
            Q = mult(M_xi, M_xi.T)
            if r is None and k > d_out:
                sig2 = (svd(M_xi, compute_uv=0))**2
                r = numx.sum(sig2[d_out:])
                Q[Q_diag_idx, Q_diag_idx] += r
            if r is not None:
                Q[Q_diag_idx, Q_diag_idx] += r

            #solve for weights
            w = self._refcast(numx_linalg.solve(Q , numx.ones(k)))
            w /= w.sum()
            W[row, nbrs] = w

        #multiply weights by result of SVD from training
        return numx.dot(W, self.training_projection)
Exemple #14
0
def test_circular_online_flow_node_internal_stored_inputs():

    # internal training with stored inputs. (check 1 loop output with default output)
    flow1 = mdp.CircularOnlineFlow([
        BogusNode(),
        BogusOnlineNodeReturnSum(),
        BogusNode(),
        BogusOnlineNodeReturnSum()
    ])
    flow1.ignore_input(True)
    node1 = mdp.hinet.CircularOnlineFlowNode(
        flow1, numx_rng=mdp.numx_rand.RandomState(seed=1))
    flow2 = mdp.CircularOnlineFlow([
        BogusNode(),
        BogusOnlineNodeReturnSum(),
        BogusNode(),
        BogusOnlineNodeReturnSum()
    ])
    node2 = mdp.hinet.OnlineFlowNode(
        flow2, numx_rng=mdp.numx_rand.RandomState(seed=1))

    inp = numx.ones((1, 2))
    node1.set_stored_input(inp)
    out1 = node1(inp)  # One train and execute
    out2 = node2(inp)  # One train and execute
    assert_array_equal(out1, out2)
    assert_array_equal(node1._stored_input, out2)
Exemple #15
0
def test_circular_online_flow_node_internal_training():

    # internal training (check errors without stored inputs)
    flow1 = mdp.CircularOnlineFlow([
        BogusNode(),
        BogusOnlineNodeReturnSum(),
        BogusNode(),
        BogusOnlineNodeReturnSum()
    ])
    flow1.ignore_input(True)
    node1 = mdp.hinet.CircularOnlineFlowNode(
        flow1, numx_rng=mdp.numx_rand.RandomState(seed=1))
    flow2 = mdp.CircularOnlineFlow([
        BogusNode(),
        BogusOnlineNodeReturnSum(),
        BogusNode(),
        BogusOnlineNodeReturnSum()
    ])
    node2 = mdp.hinet.OnlineFlowNode(
        flow2, numx_rng=mdp.numx_rand.RandomState(seed=1))

    assert (
        node1.get_remaining_train_phase() == node2.get_remaining_train_phase())
    assert (node1.get_stored_input() is None)
    inp = numx.ones((1, 2))
    try:
        node1.train(inp)
        raise Exception("node trained internally without any stored inputs.")
    except mdp.TrainingException:
        pass
Exemple #16
0
def test_circular_online_flow_node_different_output():

    # default setting with different output_node. Check stored_input
    flow1 = mdp.CircularOnlineFlow([
        BogusNode(),
        BogusOnlineNodeReturnSum(),
        BogusNode(),
        BogusOnlineNodeReturnSum()
    ])
    flow1.set_output_node(2)
    node1 = mdp.hinet.CircularOnlineFlowNode(
        flow1, numx_rng=mdp.numx_rand.RandomState(seed=1))
    flow2 = mdp.CircularOnlineFlow([
        BogusNode(),
        BogusOnlineNodeReturnSum(),
        BogusNode(),
        BogusOnlineNodeReturnSum()
    ])

    node2 = mdp.hinet.OnlineFlowNode(
        flow2, numx_rng=mdp.numx_rand.RandomState(seed=1))

    assert (
        node1.get_remaining_train_phase() == node2.get_remaining_train_phase())
    assert (node1.get_stored_input() is None)
    inp = numx.ones((1, 2))
    out1 = node1(inp)  # One train and execute
    out2 = node2(inp)  # One train and execute
    assert_array_equal(node1.get_stored_input(), out2)
    assert (not (out1 != out2).all())
Exemple #17
0
    def _execute(self, x):
        #----------------------------------------------------
        # similar algorithm to that within self.stop_training()
        #  refer there for notes & comments on code
        #----------------------------------------------------
        N = self.data.shape[0]
        Nx = x.shape[0]
        W = numx.zeros((Nx, N), dtype=self.dtype)

        k, r = self.k, self.r
        d_out = self.output_dim
        Q_diag_idx = numx.arange(k)

        for row in range(Nx):
            #find nearest neighbors of x in M
            M_xi = self.data - x[row]
            nbrs = numx.argsort((M_xi**2).sum(1))[:k]
            M_xi = M_xi[nbrs]

            #find corrected covariance matrix Q
            Q = mult(M_xi, M_xi.T)
            if r is None and k > d_out:
                sig2 = (svd(M_xi, compute_uv=0))**2
                r = numx.sum(sig2[d_out:])
                Q[Q_diag_idx, Q_diag_idx] += r
            if r is not None:
                Q[Q_diag_idx, Q_diag_idx] += r

            #solve for weights
            w = self._refcast(numx_linalg.solve(Q, numx.ones(k)))
            w /= w.sum()
            W[row, nbrs] = w

        #multiply weights by result of SVD from training
        return numx.dot(W, self.training_projection)
Exemple #18
0
def verify_ICANode(icanode, rand_func = uniform, vars=3, N=8000, prec=3):
    dim = (N, vars)
    mat,mix,inp = get_random_mix(rand_func=rand_func,mat_dim=dim)
    icanode.train(inp)
    act_mat = icanode.execute(inp)
    cov = mdp.utils.cov2((mat-mean(mat,axis=0))/std(mat,axis=0), act_mat)
    maxima = numx.amax(abs(cov), axis=0)
    assert_array_almost_equal(maxima,numx.ones(vars), prec)
Exemple #19
0
def verify_ICANode(icanode, rand_func=uniform, vars=3, N=8000, prec=3):
    dim = (N, vars)
    mat, mix, inp = get_random_mix(rand_func=rand_func, mat_dim=dim)
    icanode.train(inp)
    act_mat = icanode.execute(inp)
    cov = mdp.utils.cov2((mat - mean(mat, axis=0)) / std(mat, axis=0), act_mat)
    maxima = numx.amax(abs(cov), axis=0)
    assert_array_almost_equal(maxima, numx.ones(vars), prec)
Exemple #20
0
    def _execute(self, x):
        """Expansion of the data.

        :param x: The data to be expanded. Observations/samples must
            be along the first axis, variables along the second.
        :type x: numpy.ndarray

        :returns: The expansion of x with observations/samples along the
            first axis and corresponding function values (expansion)
            along the second axis.
        :rtype: numpy.ndarray
        """

        num_vars = x.shape[1]
        num_samples = x.shape[0]
        deg = self.degree

        _with0 = hasattr(self, "with0") and self.with0
        dim = self.expanded_dim(num_vars)
        dim += 1 if not self.with0 else 0
        result = np.empty(
            [num_samples, dim], dtype=self.dtype)

        if self.check:
            self.check_domain(x)

        result[:, 0] = 1.
        pos = 1

        if deg > 1:
            for cur_var in range(num_vars):
                # preset index for current variable
                pos, n, special = self.r_init(result, x, pos, cur_var)
                # single variable recursion
                while n <= deg:
                            # recursion step
                    result[:, pos] = self.recf(
                        result, x, special, n, cur_var, pos)
                    n += 1
                    pos += 1
        # in case input is unreasonable
        elif deg == 1:
            result[:, 0] = 1
            for i in range(num_vars):
                result[:, i+1] = x[:, i]
        elif self.with0:
            return np.ones((num_samples, num_vars))
        else:
            return None

        todoList = []
        for i in range(1, num_vars):
            todoList.append((i*deg, i, 1))
        # compute the rest of the "simplex" product
        while len(todoList) > 0:
            # pos = process(*todoList.pop(0), deg, pos, result, todoList)
            pos = process(*todoList.pop(0)+(deg, pos, result, todoList))
        return (result if _with0 else result[:, 1:])
Exemple #21
0
def test_flow():
    flow = _get_default_flow()
    inp = numx.ones((1, 3)) * 2
    flow.train(inp)
    out = flow(inp)
    [assert_array_equal(f.sum, i + inp) for i, f in enumerate(flow)]
    assert_array_equal(out, len(flow) + inp)
    rec = flow.inverse(out)
    assert_array_equal(rec, inp)
Exemple #22
0
    def _add_constant(self, x):
        """Add a constant term to the vector 'x'.
        x -> [1 x]

        :param x: The vector a constant term is appended to.
        :type x: numpy.ndarray

        :return: The altered vector.
        :rtype: numpy.ndarray
        """
        return numx.concatenate((numx.ones((x.shape[0], 1),
                                           dtype=self.dtype), x), axis=1)
Exemple #23
0
 def _stop_training(self):
     """Organize the sample data."""
     ordered_samples = []
     for label in self._label_samples:
         ordered_samples.append(numx.concatenate(self._label_samples[label]))
         self.ordered_labels.append(label)
     del self._label_samples
     self.samples = numx.concatenate(ordered_samples)
     self.n_samples = len(self.samples)
     self.sample_label_indices = numx.concatenate(
         [numx.ones(len(ordered_samples[i]), dtype="int32") * i for i in range(len(self.ordered_labels))]
     )
Exemple #24
0
 def _stop_training(self):
     """Organize the sample data."""
     ordered_samples = []
     for label in self._label_samples:
         ordered_samples.append(numx.concatenate(
             self._label_samples[label]))
         self.ordered_labels.append(label)
     del self._label_samples
     self.samples = numx.concatenate(ordered_samples)
     self.n_samples = len(self.samples)
     self.sample_label_indices = numx.concatenate([
         numx.ones(len(ordered_samples[i]), dtype="int32") * i
         for i in range(len(self.ordered_labels))
     ])
Exemple #25
0
def test_clone_online_layer():
    nodes = BogusOnlineNode(input_dim=2, output_dim=2)
    layer = mdp.hinet.CloneOnlineLayer(
        nodes, n_nodes=2, numx_rng=mdp.numx_rand.RandomState(seed=1))
    assert (layer.input_dim == 4)
    assert (layer.output_dim == 4)
    assert (layer.numx_rng == nodes.numx_rng)

    inp = numx.ones((1, 4))
    layer.train(inp)
    out = layer(inp)
    assert_array_equal(nodes.sum, inp[:, :2] + inp[:, :2])
    assert_array_equal(out[:, :2], nodes(inp[:, :2]))
    assert_array_equal(out[:, 2:4], nodes(inp[:, :2]))
Exemple #26
0
    def get_minima(self):
        """
        Return the tuple (minima, indices).
        Minima are sorted in ascending order.

        If the training phase has not been completed yet, call
        stop_training.
        """
        self._if_training_stop_training()
        cols = self.input_dim
        n = self.n
        hit = self.hit
        im = numx.zeros((n, cols), dtype=self.itype)
        m = numx.ones((n, cols), dtype=self.dtype)
        for c in range(cols):
            m[:, c], im[:, c] = hit[c].get_minima()
        return m, im
Exemple #27
0
    def get_minima(self):
        """
        Return the tuple (minima, indices).
        Minima are sorted in ascending order.

        If the training phase has not been completed yet, call
        stop_training.
        """
        self._if_training_stop_training()
        cols = self.input_dim
        n = self.n
        hit = self.hit
        im = numx.zeros((n, cols), dtype=self.itype)
        m = numx.ones((n, cols), dtype=self.dtype)
        for c in range(cols):
            m[:, c], im[:, c] = hit[c].get_minima()
        return m, im
Exemple #28
0
 def get_minima(self):
     """
     Return the tuple defining the minima fulfilling specified criteria.
     If the training phase has not been completed yet, call stop_training.
     
     :return: A tuple containing minima and their corresponding indices
         as numpy.ndarrays (see example in definition of the 
         ``OneDimensionalHitParade.update()`` function). The minima are sorted
         in descending order.
     :rtype: tuple
     """
     self._if_training_stop_training()
     cols = self.input_dim
     n = self.n
     hit = self.hit
     im = numx.zeros((n, cols), dtype=self.itype)
     m = numx.ones((n, cols), dtype=self.dtype)
     for c in range(cols):
         m[:, c], im[:, c] = hit[c].get_minima()
     return m, im
Exemple #29
0
 def get_minima(self):
     """
     Return the tuple defining the minima fulfilling specified criteria.
     If the training phase has not been completed yet, call stop_training.
     
     :return: A tuple containing minima and their corresponding indices
         as numpy.ndarrays (see example in definition of the 
         ``OneDimensionalHitParade.update()`` function). The minima are sorted
         in descending order.
     :rtype: tuple
     """
     self._if_training_stop_training()
     cols = self.input_dim
     n = self.n
     hit = self.hit
     im = numx.zeros((n, cols), dtype=self.itype)
     m = numx.ones((n, cols), dtype=self.dtype)
     for c in range(cols):
         m[:, c], im[:, c] = hit[c].get_minima()
     return m, im
Exemple #30
0
def test_online_layer():
    nodes = [
        BogusOnlineNode(input_dim=2, output_dim=2),
        BogusOnlineDiffDimNode(input_dim=4, output_dim=8),
        BogusNode(input_dim=3, output_dim=3)
    ]
    layer = mdp.hinet.OnlineLayer(nodes,
                                  numx_rng=mdp.numx_rand.RandomState(seed=2))
    assert (layer.input_dim == 9)
    assert (layer.output_dim == 13)
    assert (layer.numx_rng == nodes[0].numx_rng)
    assert (layer.numx_rng == nodes[1].numx_rng)

    inp = numx.ones((1, 9))
    layer.train(inp)
    out = layer(inp)
    assert_array_equal(nodes[0].sum, inp[:, :2])
    assert_array_equal(nodes[1].sum, inp[:, :4])
    assert_array_equal(out[:, :2], nodes[0](inp[:, :2]))
    assert_array_equal(out[:, 2:-3], nodes[1](inp[:, :4]))
    assert_array_equal(out[:, -3:], nodes[2](inp[:, :3]))
def test_VartimeCovarianceMatrix2():
    """Test whether the trapezoidal integrator returns the expected
    based on the analytically adjusted results of the regular one.
    """

    # sample
    x = numx.random.random((10000, 2))
    dt = numx.ones(x.shape[0] - 1, )
    # initialize the estimators
    cov = CovarianceMatrix(bias=True)
    uncov = VartimeCovarianceMatrix()
    # update the estimators
    cov.update(x)
    uncov.update(x, dt, time_dep=False)
    # quit estimating
    unC, unAvg, unTlen = uncov.fix(center=False)
    C, avg, tlen = cov.fix(center=False)

    assert_array_almost_equal(unC * unTlen,
                              C * tlen - numx.outer(x[0], x[0]) / 2. -
                              numx.outer(x[-1], x[-1]) / 2.,
                              decimal=10)
def test_VartimeCovarianceMatrix1():
    """Test if the new trapezoidal rule integrator deviates substantially
    more than the regular one - with and without noisy input.
    """

    # initialize the estimators
    cov = CovarianceMatrix()
    uncov = VartimeCovarianceMatrix()
    # set sample distribution parameters
    tC = [[2., 0.5], [0.5, 1]]
    tAvg = [0, 0]
    # generate some data
    x = numx.random.multivariate_normal(tAvg, tC, 300000)
    # update the estimators
    cov.update(x)
    uncov.update(x, numx.ones((x.shape[0] - 1, )), time_dep=False)
    # quit estimating
    unC, unAvg, unTlen = uncov.fix()
    C, avg, tlen = cov.fix()

    # same for uneven, random time increments
    uncov = VartimeCovarianceMatrix()
    inc = (numx.random.rand(x.shape[0] - 1) - .5) * .2 + 1.
    uncov.update(x, inc, time_dep=False)
    unC2, unAvg2, unTlen2 = uncov.fix()

    # precision of step function covariance estimator
    prec = numx.linalg.norm(tC - C)
    # precision of trapezoidal covariance matrix estimator
    # using non random step sizes
    unPrec = numx.linalg.norm(tC - unC)
    # precision of trapezoidal covariance matrix estimator
    # using random step sizes
    unPrec2 = numx.linalg.norm(tC - unC2)

    # allow deviation from standard by factor of .01 and .2 respectively
    assert_allclose(unPrec, prec, .01)
    assert_allclose(unPrec2, prec, .2)
Exemple #33
0
    def _stop_training(self):
        Cumulator._stop_training(self)

        if self.verbose:
            msg = ('training LLE on %i points'
                   ' in %i dimensions...' %
                   (self.data.shape[0], self.data.shape[1]))
            print msg

        # some useful quantities
        M = self.data
        N = M.shape[0]
        k = self.k
        r = self.r

        # indices of diagonal elements
        W_diag_idx = numx.arange(N)
        Q_diag_idx = numx.arange(k)

        if k > N:
            err = ('k=%i must be less than or '
                   'equal to number of training points N=%i' % (k, N))
            raise TrainingException(err)

        # determines number of output dimensions: if desired_variance
        # is specified, we need to learn it from the data. Otherwise,
        # it's easy
        learn_outdim = False
        if self.output_dim is None:
            if self.desired_variance is None:
                self.output_dim = self.input_dim
            else:
                learn_outdim = True

        # do we need to automatically determine the regularization term?
        auto_reg = r is None

        # determine number of output dims, precalculate useful stuff
        if learn_outdim:
            Qs, sig2s, nbrss = self._adjust_output_dim()

        # build the weight matrix
        #XXX future work:
        #XXX   for faster implementation, W should be a sparse matrix
        W = numx.zeros((N, N), dtype=self.dtype)

        if self.verbose:
            print ' - constructing [%i x %i] weight matrix...' % W.shape

        for row in range(N):
            if learn_outdim:
                Q = Qs[row, :, :]
                nbrs = nbrss[row, :]
            else:
                # -----------------------------------------------
                #  find k nearest neighbors
                # -----------------------------------------------
                M_Mi = M - M[row]
                nbrs = numx.argsort((M_Mi**2).sum(1))[1:k + 1]
                M_Mi = M_Mi[nbrs]
                # compute covariance matrix of distances
                Q = mult(M_Mi, M_Mi.T)

            # -----------------------------------------------
            #  compute weight vector based on neighbors
            # -----------------------------------------------

            #Covariance matrix may be nearly singular:
            # add a diagonal correction to prevent numerical errors
            if auto_reg:
                # automatic mode: correction is equal to the sum of
                # the (d_in-d_out) unused variances (as in deRidder &
                # Duin)
                if learn_outdim:
                    sig2 = sig2s[row, :]
                else:
                    sig2 = svd(M_Mi, compute_uv=0)**2
                r = numx.sum(sig2[self.output_dim:])
                Q[Q_diag_idx, Q_diag_idx] += r
            else:
                # Roweis et al instead use "a correction that
                #   is small compared to the trace" e.g.:
                # r = 0.001 * float(Q.trace())
                # this is equivalent to assuming 0.1% of the variance is unused
                Q[Q_diag_idx, Q_diag_idx] += r * Q.trace()

            #solve for weight
            # weight is w such that sum(Q_ij * w_j) = 1 for all i
            # XXX refcast is due to numpy bug: floats become double
            w = self._refcast(numx_linalg.solve(Q, numx.ones(k)))
            w /= w.sum()

            #update row of the weight matrix
            W[nbrs, row] = w

        if self.verbose:
            msg = (' - finding [%i x %i] null space of weight matrix\n'
                   '     (may take a while)...' % (self.output_dim, N))
            print msg

        self.W = W.copy()
        #to find the null space, we need the bottom d+1
        #  eigenvectors of (W-I).T*(W-I)
        #Compute this using the svd of (W-I):
        W[W_diag_idx, W_diag_idx] -= 1.

        #XXX future work:
        #XXX  use of upcoming ARPACK interface for bottom few eigenvectors
        #XXX   of a sparse matrix will significantly increase the speed
        #XXX   of the next step
        if self.svd:
            sig, U = nongeneral_svd(W.T, range=(2, self.output_dim + 1))
        else:
            # the following code does the same computation, but uses
            # symeig, which computes only the required eigenvectors, and
            # is much faster. However, it could also be more unstable...
            WW = mult(W, W.T)
            # regularizes the eigenvalues, does not change the eigenvectors:
            WW[W_diag_idx, W_diag_idx] += 0.1
            sig, U = symeig(WW, range=(2, self.output_dim + 1), overwrite=True)

        self.training_projection = U
Exemple #34
0
 def _execute(self, x):
     return numx.dot(x, numx.ones((self.input_dim, self.output_dim)))
 def _add_constant(self, x):
     """Add a constant term to the vector 'x'.
     x -> [1 x]
     """
     return numx.concatenate((numx.ones((x.shape[0], 1),
                                        dtype=self.dtype), x), axis=1)
Exemple #36
0
    def _stop_training(self):
        Cumulator._stop_training(self)

        k = self.k
        M = self.data
        N = M.shape[0]

        if k > N:
            err = ('k=%i must be less than'
                   ' or equal to number of training points N=%i' % (k, N))
            raise TrainingException(err)

        if self.verbose:
            print 'performing HLLE on %i points in %i dimensions...' % M.shape

        # determines number of output dimensions: if desired_variance
        # is specified, we need to learn it from the data. Otherwise,
        # it's easy
        learn_outdim = False
        if self.output_dim is None:
            if self.desired_variance is None:
                self.output_dim = self.input_dim
            else:
                learn_outdim = True

        # determine number of output dims, precalculate useful stuff
        if learn_outdim:
            Qs, sig2s, nbrss = self._adjust_output_dim()

        d_out = self.output_dim

        #dp = d_out + (d_out-1) + (d_out-2) + ...
        dp = d_out*(d_out+1)/2

        if min(k, N) <= d_out:
            err = ('k=%i and n=%i (number of input data points) must be'
                   ' larger than output_dim=%i' % (k, N, d_out))
            raise TrainingException(err)

        if k < 1+d_out+dp:
            wrn = ('The number of neighbours, k=%i, is smaller than'
                   ' 1 + output_dim + output_dim*(output_dim+1)/2 = %i,'
                   ' which might result in unstable results.'
                   % (k, 1+d_out+dp))
            _warnings.warn(wrn, MDPWarning)

        #build the weight matrix
        #XXX   for faster implementation, W should be a sparse matrix
        W = numx.zeros((N, dp*N), dtype=self.dtype)

        if self.verbose:
            print ' - constructing [%i x %i] weight matrix...' % W.shape

        for row in range(N):
            if learn_outdim:
                nbrs = nbrss[row, :]
            else:
                # -----------------------------------------------
                #  find k nearest neighbors
                # -----------------------------------------------
                M_Mi = M-M[row]
                nbrs = numx.argsort((M_Mi**2).sum(1))[1:k+1]

            #-----------------------------------------------
            #  center the neighborhood using the mean
            #-----------------------------------------------
            nbrhd = M[nbrs] # this makes a copy
            nbrhd -= nbrhd.mean(0)

            #-----------------------------------------------
            #  compute local coordinates
            #   using a singular value decomposition
            #-----------------------------------------------
            U, sig, VT = svd(nbrhd)
            nbrhd = U.T[:d_out]
            del VT

            #-----------------------------------------------
            #  build Hessian estimator
            #-----------------------------------------------
            Yi = numx.zeros((dp, k), dtype=self.dtype)
            ct = 0
            for i in range(d_out):
                Yi[ct:ct+d_out-i, :] = nbrhd[i] * nbrhd[i:, :]
                ct += d_out-i
            Yi = numx.concatenate([numx.ones((1, k), dtype=self.dtype),
                                   nbrhd, Yi], 0)

            #-----------------------------------------------
            #  orthogonalize linear and quadratic forms
            #   with QR factorization
            #  and make the weights sum to 1
            #-----------------------------------------------
            if k >= 1+d_out+dp:
                Q, R = numx_linalg.qr(Yi.T)
                w = Q[:, d_out+1:d_out+1+dp]
            else:
                q, r = _mgs(Yi.T)
                w = q[:, -dp:]

            S = w.sum(0) #sum along columns
            #if S[i] is too small, set it equal to 1.0
            # this prevents weights from blowing up
            S[numx.where(numx.absolute(S)<1E-4)] = 1.0
            #print w.shape, S.shape, (w/S).shape
            #print W[nbrs, row*dp:(row+1)*dp].shape
            W[nbrs, row*dp:(row+1)*dp] = w / S

        #-----------------------------------------------
        # To find the null space, we want the
        #  first d+1 eigenvectors of W.T*W
        # Compute this using an svd of W
        #-----------------------------------------------

        if self.verbose:
            msg = (' - finding [%i x %i] '
                   'null space of weight matrix...' % (d_out, N))
            print msg

        #XXX future work:
        #XXX  use of upcoming ARPACK interface for bottom few eigenvectors
        #XXX   of a sparse matrix will significantly increase the speed
        #XXX   of the next step

        if self.svd:
            sig, U = nongeneral_svd(W.T, range=(2, d_out+1))
            Y = U*numx.sqrt(N)
        else:
            WW = mult(W, W.T)
            # regularizes the eigenvalues, does not change the eigenvectors:
            W_diag_idx = numx.arange(N)
            WW[W_diag_idx, W_diag_idx] += 0.01
            sig, U = symeig(WW, range=(2, self.output_dim+1), overwrite=True)
            Y = U*numx.sqrt(N)
            del WW
        del W

        #-----------------------------------------------
        # Normalize Y
        #
        # Alternative way to do it:
        #  we need R = (Y.T*Y)^(-1/2)
        #   do this with an SVD of Y            del VT

        #      Y = U*sig*V.T
        #      Y.T*Y = (V*sig.T*U.T) * (U*sig*V.T)
        #            = V * (sig*sig.T) * V.T
        #            = V * sig^2 V.T
        #   so
        #      R = V * sig^-1 * V.T
        # The code is:
        #    U, sig, VT = svd(Y)
        #    del U
        #    S = numx.diag(sig**-1)
        #    self.training_projection = mult(Y, mult(VT.T, mult(S, VT)))
        #-----------------------------------------------
        if self.verbose:
            print ' - normalizing null space...'

        C = sqrtm(mult(Y.T, Y))
        self.training_projection = mult(Y, C)
Exemple #37
0
def test_flow_container_privmethods():
    mat, mix, inp = get_random_mix(mat_dim=(100, 3))
    flow = _get_default_flow()
    # test __len__
    assert_equal(len(flow), len(flow.flow))
    # test __?etitem__, integer key
    for i in range(len(flow)):
        assert flow[i]==flow.flow[i], \
               '__getitem__  returned wrong node %d' % i
        new_node = BogusOnlineNode()
        flow[i] = new_node
        assert flow[i] == new_node, '__setitem__ did not set node %d' % i
    # test __?etitem__, normal slice -> this fails for python < 2.2 and
    # if Flow is a subclassed from builtin 'list'
    flowslice = flow[0:2]
    assert isinstance(flowslice,mdp.OnlineFlow), \
           '__getitem__ slice is not an OnlineFlow instance'
    assert len(flowslice) == 2, '__getitem__ returned wrong slice size'
    new_nodes_list = [BogusOnlineNode(), BogusOnlineNode()]
    flow[:2] = new_nodes_list
    assert (flow[0] == new_nodes_list[0]) and \
           (flow[1] == new_nodes_list[1]), '__setitem__ did not set slice'
    # test__?etitem__, extended slice
    flowslice = flow[:2:1]
    assert isinstance(flowslice,mdp.OnlineFlow), \
           '__getitem__ slice is not a Flow instance'
    assert len(flowslice) == 2, '__getitem__ returned wrong slice size'
    new_nodes_list = [BogusOnlineNode(), BogusOnlineNode()]
    flow[:2:1] = new_nodes_list
    assert (flow[0] == new_nodes_list[0]) and \
           (flow[1] == new_nodes_list[1]), '__setitem__ did not set slice'
    # test __delitem__, integer key
    copy_flow = mdp.OnlineFlow(flow[:])
    del copy_flow[0]
    assert len(copy_flow) == len(flow) - 1, '__delitem__ did not del'
    for i in range(len(copy_flow)):
        assert copy_flow[i] == flow[i + 1], '__delitem__ deleted wrong node'
    # test __delitem__, normal slice
    copy_flow = mdp.OnlineFlow(flow[:])
    del copy_flow[:2]
    assert len(copy_flow) == len(flow)-2, \
           '__delitem__ did not del normal slice'
    assert copy_flow[0] == flow[2], \
           '__delitem__ deleted wrong normal slice'
    # test __delitem__, extended slice
    copy_flow = mdp.OnlineFlow(flow[:])
    del copy_flow[:2:1]
    assert len(copy_flow) == len(flow)-2, \
           '__delitem__ did not del extended slice'
    assert copy_flow[0] == flow[2], \
           '__delitem__ deleted wrong extended slice'
    # test __add__
    newflow = flow + flow
    assert len(newflow) == len(flow) * 2, '__add__ did not work'

    # test __add__ with Node
    flow = _get_default_flow()
    newflow = flow + BogusNode()
    assert len(newflow) == len(flow) + 1, '__add__ did not work'

    # test insert with Node
    flow[1] = BogusNode()
    inp = numx.ones((1, 3)) * 2
    flow.train(inp)
    out = flow(inp)
    rec = flow.inverse(out)
    assert_array_equal(rec, inp)

    # test __add__ with TrainableNode
    flow = _get_default_flow()
    newflow = flow + BogusNodeTrainable()
    assert len(newflow) == len(flow) + 1, '__add__ did not work'

    # test insert with TrainableNode
    try:
        flow[1] = BogusNodeTrainable()
    except TypeError:
        pass
    else:
        raise Exception('This is not supposed to work!')
Exemple #38
0
    def _stop_training(self):
        Cumulator._stop_training(self)

        k = self.k
        M = self.data
        N = M.shape[0]

        if k > N:
            err = ('k=%i must be less than'
                   ' or equal to number of training points N=%i' % (k, N))
            raise TrainingException(err)

        if self.verbose:
            print 'performing HLLE on %i points in %i dimensions...' % M.shape

        # determines number of output dimensions: if desired_variance
        # is specified, we need to learn it from the data. Otherwise,
        # it's easy
        learn_outdim = False
        if self.output_dim is None:
            if self.desired_variance is None:
                self.output_dim = self.input_dim
            else:
                learn_outdim = True

        # determine number of output dims, precalculate useful stuff
        if learn_outdim:
            Qs, sig2s, nbrss = self._adjust_output_dim()

        d_out = self.output_dim

        #dp = d_out + (d_out-1) + (d_out-2) + ...
        dp = d_out * (d_out + 1) / 2

        if min(k, N) <= d_out:
            err = ('k=%i and n=%i (number of input data points) must be'
                   ' larger than output_dim=%i' % (k, N, d_out))
            raise TrainingException(err)

        if k < 1 + d_out + dp:
            wrn = ('The number of neighbours, k=%i, is smaller than'
                   ' 1 + output_dim + output_dim*(output_dim+1)/2 = %i,'
                   ' which might result in unstable results.' %
                   (k, 1 + d_out + dp))
            _warnings.warn(wrn, MDPWarning)

        #build the weight matrix
        #XXX   for faster implementation, W should be a sparse matrix
        W = numx.zeros((N, dp * N), dtype=self.dtype)

        if self.verbose:
            print ' - constructing [%i x %i] weight matrix...' % W.shape

        for row in range(N):
            if learn_outdim:
                nbrs = nbrss[row, :]
            else:
                # -----------------------------------------------
                #  find k nearest neighbors
                # -----------------------------------------------
                M_Mi = M - M[row]
                nbrs = numx.argsort((M_Mi**2).sum(1))[1:k + 1]

            #-----------------------------------------------
            #  center the neighborhood using the mean
            #-----------------------------------------------
            nbrhd = M[nbrs]  # this makes a copy
            nbrhd -= nbrhd.mean(0)

            #-----------------------------------------------
            #  compute local coordinates
            #   using a singular value decomposition
            #-----------------------------------------------
            U, sig, VT = svd(nbrhd)
            nbrhd = U.T[:d_out]
            del VT

            #-----------------------------------------------
            #  build Hessian estimator
            #-----------------------------------------------
            Yi = numx.zeros((dp, k), dtype=self.dtype)
            ct = 0
            for i in range(d_out):
                Yi[ct:ct + d_out - i, :] = nbrhd[i] * nbrhd[i:, :]
                ct += d_out - i
            Yi = numx.concatenate(
                [numx.ones((1, k), dtype=self.dtype), nbrhd, Yi], 0)

            #-----------------------------------------------
            #  orthogonalize linear and quadratic forms
            #   with QR factorization
            #  and make the weights sum to 1
            #-----------------------------------------------
            if k >= 1 + d_out + dp:
                Q, R = numx_linalg.qr(Yi.T)
                w = Q[:, d_out + 1:d_out + 1 + dp]
            else:
                q, r = _mgs(Yi.T)
                w = q[:, -dp:]

            S = w.sum(0)  #sum along columns
            #if S[i] is too small, set it equal to 1.0
            # this prevents weights from blowing up
            S[numx.where(numx.absolute(S) < 1E-4)] = 1.0
            #print w.shape, S.shape, (w/S).shape
            #print W[nbrs, row*dp:(row+1)*dp].shape
            W[nbrs, row * dp:(row + 1) * dp] = w / S

        #-----------------------------------------------
        # To find the null space, we want the
        #  first d+1 eigenvectors of W.T*W
        # Compute this using an svd of W
        #-----------------------------------------------

        if self.verbose:
            msg = (' - finding [%i x %i] '
                   'null space of weight matrix...' % (d_out, N))
            print msg

        #XXX future work:
        #XXX  use of upcoming ARPACK interface for bottom few eigenvectors
        #XXX   of a sparse matrix will significantly increase the speed
        #XXX   of the next step

        if self.svd:
            sig, U = nongeneral_svd(W.T, range=(2, d_out + 1))
            Y = U * numx.sqrt(N)
        else:
            WW = mult(W, W.T)
            # regularizes the eigenvalues, does not change the eigenvectors:
            W_diag_idx = numx.arange(N)
            WW[W_diag_idx, W_diag_idx] += 0.01
            sig, U = symeig(WW, range=(2, self.output_dim + 1), overwrite=True)
            Y = U * numx.sqrt(N)
            del WW
        del W

        #-----------------------------------------------
        # Normalize Y
        #
        # Alternative way to do it:
        #  we need R = (Y.T*Y)^(-1/2)
        #   do this with an SVD of Y            del VT

        #      Y = U*sig*V.T
        #      Y.T*Y = (V*sig.T*U.T) * (U*sig*V.T)
        #            = V * (sig*sig.T) * V.T
        #            = V * sig^2 V.T
        #   so
        #      R = V * sig^-1 * V.T
        # The code is:
        #    U, sig, VT = svd(Y)
        #    del U
        #    S = numx.diag(sig**-1)
        #    self.training_projection = mult(Y, mult(VT.T, mult(S, VT)))
        #-----------------------------------------------
        if self.verbose:
            print ' - normalizing null space...'

        C = sqrtm(mult(Y.T, Y))
        self.training_projection = mult(Y, C)
Exemple #39
0
    def _stop_training(self):
        Cumulator._stop_training(self)

        if self.verbose:
            msg = ('training LLE on %i points'
                   ' in %i dimensions...' % (self.data.shape[0],
                                             self.data.shape[1]))
            print msg

        # some useful quantities
        M = self.data
        N = M.shape[0]
        k = self.k
        r = self.r

        # indices of diagonal elements
        W_diag_idx = numx.arange(N)
        Q_diag_idx = numx.arange(k)

        if k > N:
            err = ('k=%i must be less than or '
                   'equal to number of training points N=%i' % (k, N))
            raise TrainingException(err)

        # determines number of output dimensions: if desired_variance
        # is specified, we need to learn it from the data. Otherwise,
        # it's easy
        learn_outdim = False
        if self.output_dim is None:
            if self.desired_variance is None:
                self.output_dim = self.input_dim
            else:
                learn_outdim = True

        # do we need to automatically determine the regularization term?
        auto_reg = r is None

        # determine number of output dims, precalculate useful stuff
        if learn_outdim:
            Qs, sig2s, nbrss = self._adjust_output_dim()

        # build the weight matrix
        #XXX future work:
        #XXX   for faster implementation, W should be a sparse matrix
        W = numx.zeros((N, N), dtype=self.dtype)

        if self.verbose:
            print ' - constructing [%i x %i] weight matrix...' % W.shape

        for row in range(N):
            if learn_outdim:
                Q = Qs[row, :, :]
                nbrs = nbrss[row, :]
            else:
                # -----------------------------------------------
                #  find k nearest neighbors
                # -----------------------------------------------
                M_Mi = M-M[row]
                nbrs = numx.argsort((M_Mi**2).sum(1))[1:k+1]
                M_Mi = M_Mi[nbrs]
                # compute covariance matrix of distances
                Q = mult(M_Mi, M_Mi.T)

            # -----------------------------------------------
            #  compute weight vector based on neighbors
            # -----------------------------------------------

            #Covariance matrix may be nearly singular:
            # add a diagonal correction to prevent numerical errors
            if auto_reg:
                # automatic mode: correction is equal to the sum of
                # the (d_in-d_out) unused variances (as in deRidder &
                # Duin)
                if learn_outdim:
                    sig2 = sig2s[row, :]
                else:
                    sig2 = svd(M_Mi, compute_uv=0)**2
                r = numx.sum(sig2[self.output_dim:])
                Q[Q_diag_idx, Q_diag_idx] += r
            else:
                # Roweis et al instead use "a correction that
                #   is small compared to the trace" e.g.:
                # r = 0.001 * float(Q.trace())
                # this is equivalent to assuming 0.1% of the variance is unused
                Q[Q_diag_idx, Q_diag_idx] += r*Q.trace()

            #solve for weight
            # weight is w such that sum(Q_ij * w_j) = 1 for all i
            # XXX refcast is due to numpy bug: floats become double
            w = self._refcast(numx_linalg.solve(Q, numx.ones(k)))
            w /= w.sum()

            #update row of the weight matrix
            W[nbrs, row] = w

        if self.verbose:
            msg = (' - finding [%i x %i] null space of weight matrix\n'
                   '     (may take a while)...' % (self.output_dim, N))
            print msg

        self.W = W.copy()
        #to find the null space, we need the bottom d+1
        #  eigenvectors of (W-I).T*(W-I)
        #Compute this using the svd of (W-I):
        W[W_diag_idx, W_diag_idx] -= 1.

        #XXX future work:
        #XXX  use of upcoming ARPACK interface for bottom few eigenvectors
        #XXX   of a sparse matrix will significantly increase the speed
        #XXX   of the next step
        if self.svd:
            sig, U = nongeneral_svd(W.T, range=(2, self.output_dim+1))
        else:
            # the following code does the same computation, but uses
            # symeig, which computes only the required eigenvectors, and
            # is much faster. However, it could also be more unstable...
            WW = mult(W, W.T)
            # regularizes the eigenvalues, does not change the eigenvectors:
            WW[W_diag_idx, W_diag_idx] += 0.1
            sig, U = symeig(WW, range=(2, self.output_dim+1), overwrite=True)

        self.training_projection = U