Beispiel #1
0
    def output(self, x, index_selection_func=None):
        out_idxs = self.out_idxs
        if index_selection_func is not None:
            out_idxs = index_selection_func(out_idxs)

        if self.l_params is None:
            sparse = sparse_block_dot_SS(
                self.W,
                x,
                self.in_idxs,
                self.b,
                out_idxs
            )
        else:
            sparse = sparse_block_dot_SS(
                self.l_params[0].dimshuffle(
                    *self.l_param_map[0]
                )*self.W,
                #self.W,
                x,
                self.in_idxs,
                self.l_params[1].dimshuffle(
                    *self.l_param_map[1]
                )*self.b,
                out_idxs
            )

        return (sparse if self.activation is None
                else self.activation(sparse))
Beispiel #2
0
def h_softmax_gpu(W1, b1, W2, b2, x, n_outputs, n_classes,
                  n_outputs_per_class, batch_size, target=None):
    """
    GPU-only version of a two-layer hierarchical softmax.
    See hierarchical_softmax's docstring for the description of the arguments.
    """
    W1 = as_cuda_ndarray_variable(W1)
    b1 = as_cuda_ndarray_variable(b1)
    W2 = as_cuda_ndarray_variable(W2)
    b2 = as_cuda_ndarray_variable(b2)
    x = as_cuda_ndarray_variable(x)

    # First softmax which computes the probabilities of belonging to each class
    class_probs = tensor.nnet.softmax(tensor.dot(x, W1) + b1)

    if target is None:
        # Computes the probabilites of all the outputs

        class_ids = tensor.tile(tensor.arange(n_classes, dtype="int32")[None, :], (batch_size, 1))

        # Second softmax that computes the output probabilities
        activations = sparse_block_dot_SS(
            W2[None, :, :, :], x[:, None, :],
            tensor.zeros((batch_size, 1), dtype='int32'), b2, class_ids)

        output_probs = tensor.nnet.softmax(activations.reshape((-1, n_outputs_per_class)))
        output_probs = output_probs.reshape((batch_size, n_classes, -1))
        output_probs = class_probs[:, :, None] * output_probs
        output_probs = output_probs.reshape((batch_size, -1))
        output_probs = output_probs[:, :n_outputs]

    else:
        # Computes the probabilities of the outputs specified by the targets

        # Flattens the targets
        target = target.flatten()

        # Classes to which belong each target
        target_classes = target // n_outputs_per_class

        # Outputs to which belong each target inside a class
        target_outputs_in_class = target % n_classes

        # Second softmax that computes the output probabilities
        activations = sparse_block_dot_SS(
            W2[None, :, :, :], x[:, None, :],
            tensor.zeros((batch_size, 1), dtype='int32'), b2,
            target_classes[:, None])

        output_probs = tensor.nnet.softmax(activations[:, 0, :])
        target_class_probs = class_probs[tensor.arange(batch_size), target_classes]
        output_probs = output_probs[tensor.arange(batch_size),
                                    target_outputs_in_class]
        output_probs = target_class_probs * output_probs

    return output_probs
Beispiel #3
0
    def output(self, x, index_selection_func=None):
        if self.n_out > 1:
            iWin = self.k

            if self.n_in == 1:
                iWin = 1

            rnd_proj = T.dot(
                x.reshape((x.shape[0], x.shape[1]*x.shape[2])),
                self.rand_proj_mat
            )

            if index_selection_func is not None:
                self.out_idxs = index_selection_func(rnd_proj)
            else:
                self.out_idxs = T.argsort(rnd_proj)
            self.out_idxs = T.sort(self.out_idxs[:, -self.k:])

            # self.out_idxs.set_value(
            #     np.random.randint(0, self.n_out, (self.batch_size, self.k))
            # )

        sparse = sparse_block_dot_SS(
            self.W,
            x,
            self.in_idxs,
            self.b,
            self.out_idxs
        )

        return (sparse if self.activation is None
                else self.activation(sparse))
Beispiel #4
0
    def output(self, x, index_selection_func=None):
        out_idxs = self.out_idxs
        if index_selection_func is not None:
            out_idxs = index_selection_func(out_idxs)

        if self.l_params is None:
            sparse = sparse_block_dot_SS(self.W, x, self.in_idxs, self.b,
                                         out_idxs)
        else:
            sparse = sparse_block_dot_SS(
                self.l_params[0].dimshuffle(*self.l_param_map[0]) * self.W,
                #self.W,
                x,
                self.in_idxs,
                self.l_params[1].dimshuffle(*self.l_param_map[1]) * self.b,
                out_idxs)

        return (sparse if self.activation is None else self.activation(sparse))
Beispiel #5
0
def test_blocksparse():
    b = tensor.fmatrix()
    W = tensor.ftensor4()
    h = tensor.ftensor3()
    iIdx = tensor.lmatrix()
    oIdx = tensor.lmatrix()

    o = sparse_block_dot_SS(W, h, iIdx, b, oIdx)

    f = theano.function([W, h, iIdx, b, oIdx], o)

    W_val, h_val, iIdx_val, b_val, oIdx_val = blocksparse_data()

    th_out = f(W_val, h_val, iIdx_val, b_val, oIdx_val)
    ref_out = blocksparse(W_val, h_val, iIdx_val, b_val, oIdx_val)

    utt.assert_allclose(ref_out, th_out)
Beispiel #6
0
def test_blocksparse():
    b = tensor.fmatrix()
    W = tensor.ftensor4()
    h = tensor.ftensor3()
    iIdx = tensor.lmatrix()
    oIdx = tensor.lmatrix()

    o = sparse_block_dot_SS(W, h, iIdx, b, oIdx)

    f = theano.function([W, h, iIdx, b, oIdx], o, mode=mode_with_gpu)

    W_val, h_val, iIdx_val, b_val, oIdx_val = blocksparse_data()

    th_out = f(W_val, h_val, iIdx_val, b_val, oIdx_val)
    ref_out = blocksparse(W_val, h_val, iIdx_val, b_val, oIdx_val)

    utt.assert_allclose(ref_out, th_out)
Beispiel #7
0
def test_blocksparseF():
    b = tensor.fmatrix()
    W = tensor.ftensor4()
    h = tensor.ftensor3()
    iIdx = tensor.lmatrix()
    oIdx = tensor.lmatrix()

    o = sparse_block_dot_SS(
        GpuDimShuffle((False, False, False, False),
                      (0, 1, 3, 2))(as_cuda_ndarray_variable(W)), h, iIdx, b,
        oIdx)

    f = theano.function([W, h, iIdx, b, oIdx], o)

    W_val, h_val, iIdx_val, b_val, oIdx_val = blocksparse_data()

    th_out = f(numpy.swapaxes(W_val, 2, 3), h_val, iIdx_val, b_val, oIdx_val)
    ref_out = blocksparse(W_val, h_val, iIdx_val, b_val, oIdx_val)

    utt.assert_allclose(ref_out, th_out)
Beispiel #8
0
def test_blocksparseF():
    b = tensor.fmatrix()
    W = tensor.ftensor4()
    h = tensor.ftensor3()
    iIdx = tensor.lmatrix()
    oIdx = tensor.lmatrix()

    o = sparse_block_dot_SS(GpuDimShuffle((False, False, False, False),
                                          (0, 1, 3, 2))(
                                              as_cuda_ndarray_variable(W)),
                            h, iIdx, b, oIdx)

    f = theano.function([W, h, iIdx, b, oIdx], o, mode=mode_with_gpu)

    W_val, h_val, iIdx_val, b_val, oIdx_val = blocksparse_data()

    th_out = f(numpy.swapaxes(W_val, 2, 3), h_val, iIdx_val, b_val, oIdx_val)
    ref_out = blocksparse(W_val, h_val, iIdx_val, b_val, oIdx_val)

    utt.assert_allclose(ref_out, th_out)
Beispiel #9
0
    def output(self, x, index_selection_func=None):
        if self.n_out > 1:
            iWin = self.k

            if self.n_in == 1:
                iWin = 1

            rnd_proj = T.dot(x.reshape((x.shape[0], x.shape[1] * x.shape[2])),
                             self.rand_proj_mat)

            if index_selection_func is not None:
                self.out_idxs = index_selection_func(rnd_proj)
            else:
                self.out_idxs = T.argsort(rnd_proj)
            self.out_idxs = T.sort(self.out_idxs[:, -self.k:])

            # self.out_idxs.set_value(
            #     np.random.randint(0, self.n_out, (self.batch_size, self.k))
            # )

        sparse = sparse_block_dot_SS(self.W, x, self.in_idxs, self.b,
                                     self.out_idxs)

        return (sparse if self.activation is None else self.activation(sparse))