Example #1
0
    def test_dot_infershape(self):
        b = tensor.fmatrix()
        W = tensor.ftensor4()
        h = tensor.ftensor3()
        iIdx = tensor.imatrix()
        oIdx = tensor.imatrix()

        self._compile_and_check(
            [W, h, iIdx, b, oIdx], [sparse_block_dot(W, h, iIdx, b, oIdx)], self.gemv_data(), self.gemv_class
        )
Example #2
0
    def test_dot_infershape(self):
        b = tensor.fmatrix()
        W = tensor.ftensor4()
        h = tensor.ftensor3()
        iIdx = tensor.imatrix()
        oIdx = tensor.imatrix()

        self._compile_and_check([W, h, iIdx, b, oIdx],
                                [sparse_block_dot(W, h, iIdx, b, oIdx)],
                                self.gemv_data(), self.gemv_class)
Example #3
0
def test_blocksparse_gpu_gemv_opt():
    b = tensor.fmatrix()
    W = tensor.ftensor4()
    h = tensor.ftensor3()
    iIdx = tensor.lmatrix()
    oIdx = tensor.lmatrix()

    o = sparse_block_dot(W, h, iIdx, b, oIdx)

    f = theano.function([W, h, iIdx, b, oIdx], o, mode=mode_with_gpu)

    assert isinstance(f.maker.fgraph.toposort()[-2].op, GpuSparseBlockGemv)
Example #4
0
def test_blocksparse_gpu_gemv_opt():
    b = tensor.fmatrix()
    W = tensor.ftensor4()
    h = tensor.ftensor3()
    iIdx = tensor.lmatrix()
    oIdx = tensor.lmatrix()

    o = sparse_block_dot(W, h, iIdx, b, oIdx)

    f = theano.function([W, h, iIdx, b, oIdx], o, mode=mode_with_gpu)

    assert sum(1 for n in f.maker.fgraph.apply_nodes
               if isinstance(n.op, GpuSparseBlockGemv)) == 1
Example #5
0
def test_blocksparse_gpu_gemv_opt():
    b = tensor.fmatrix()
    W = tensor.ftensor4()
    h = tensor.ftensor3()
    iIdx = tensor.lmatrix()
    oIdx = tensor.lmatrix()

    o = sparse_block_dot(W, h, iIdx, b, oIdx)

    f = theano.function([W, h, iIdx, b, oIdx], o, mode=mode_with_gpu)

    assert sum(1 for n in f.maker.fgraph.apply_nodes
               if isinstance(n.op, GpuSparseBlockGemv)) == 1
Example #6
0
def test_blocksparse_gpu_outer_opt():
    b = tensor.fmatrix()
    W = tensor.ftensor4()
    h = tensor.ftensor3()
    iIdx = tensor.lmatrix()
    oIdx = tensor.lmatrix()

    o = sparse_block_dot(W, h, iIdx, b, oIdx)

    f = theano.function([W, h, iIdx, b, oIdx], [o, tensor.grad(o.sum(),
                                                               wrt=W)],
                        mode=mode_with_gpu)

    assert isinstance(f.maker.fgraph.toposort()[-2].op, GpuSparseBlockOuter)
Example #7
0
def test_blocksparse_inplace_gemv_opt():
    b = tensor.fmatrix()
    W = tensor.ftensor4()
    h = tensor.ftensor3()
    iIdx = tensor.lmatrix()
    oIdx = tensor.lmatrix()

    o = sparse_block_dot(W, h, iIdx, b, oIdx)

    f = theano.function([W, h, iIdx, b, oIdx], o)

    if theano.config.mode == "FAST_COMPILE":
        assert not f.maker.fgraph.toposort()[-1].op.inplace
    else:
        assert f.maker.fgraph.toposort()[-1].op.inplace
Example #8
0
def test_blocksparse_inplace_outer_opt():
    b = tensor.fmatrix()
    W = tensor.ftensor4()
    h = tensor.ftensor3()
    iIdx = tensor.lmatrix()
    oIdx = tensor.lmatrix()

    o = sparse_block_dot(W, h, iIdx, b, oIdx)

    f = theano.function([W, h, iIdx, b, oIdx], [o, tensor.grad(o.sum(), wrt=W)])

    if theano.config.mode == "FAST_COMPILE":
        assert not f.maker.fgraph.toposort()[-1].op.inplace
        assert check_stack_trace(f, ops_to_check=sparse_block_outer)
    else:
        assert f.maker.fgraph.toposort()[-1].op.inplace
        assert check_stack_trace(f, ops_to_check=sparse_block_outer_inplace)
Example #9
0
def test_blocksparse_inplace_outer_opt():
    b = tensor.fmatrix()
    W = tensor.ftensor4()
    h = tensor.ftensor3()
    iIdx = tensor.lmatrix()
    oIdx = tensor.lmatrix()

    o = sparse_block_dot(W, h, iIdx, b, oIdx)

    f = theano.function([W, h, iIdx, b, oIdx], [o, tensor.grad(o.sum(), wrt=W)])

    if theano.config.mode == "FAST_COMPILE":
        assert not f.maker.fgraph.toposort()[-1].op.inplace
        assert check_stack_trace(f, ops_to_check=sparse_block_outer)
    else:
        assert f.maker.fgraph.toposort()[-1].op.inplace
        assert check_stack_trace(f, ops_to_check=sparse_block_outer_inplace)
Example #10
0
def test_blocksparse_inplace_outer_opt():
    b = tensor.fmatrix()
    W = tensor.ftensor4()
    h = tensor.ftensor3()
    iIdx = tensor.lmatrix()
    oIdx = tensor.lmatrix()

    o = sparse_block_dot(W, h, iIdx, b, oIdx)

    theano.printing.debugprint(tensor.grad(o.sum(), wrt=W))

    f = theano.function([W, h, iIdx, b, oIdx],
                        [o, tensor.grad(o.sum(), wrt=W)])

    if theano.config.mode == "FAST_COMPILE":
        assert not f.maker.fgraph.toposort()[-1].op.inplace
    else:
        assert f.maker.fgraph.toposort()[-1].op.inplace
Example #11
0
    def test_sparseblockdot(self):
        # Compares the numpy version of sparseblockgemv to sparse_block_dot.

        b = tensor.fmatrix()
        W = tensor.ftensor4()
        h = tensor.ftensor3()
        iIdx = tensor.imatrix()
        oIdx = tensor.imatrix()

        o = sparse_block_dot(W, h, iIdx, b, oIdx)

        f = theano.function([W, h, iIdx, b, oIdx], o, mode=self.mode)

        W_val, h_val, iIdx_val, b_val, oIdx_val = self.gemv_data()

        th_out = f(W_val, h_val, iIdx_val, b_val, oIdx_val)

        ref_out = self.gemv_numpy(b_val.take(oIdx_val, axis=0), W_val, h_val,
                                  iIdx_val, oIdx_val)

        utt.assert_allclose(ref_out, th_out)
Example #12
0
    def test_sparseblockdot(self):
        """
        Compares the numpy version of sparseblockgemv to sparse_block_dot.
        """
        b = tensor.fmatrix()
        W = tensor.ftensor4()
        h = tensor.ftensor3()
        iIdx = tensor.imatrix()
        oIdx = tensor.imatrix()

        o = sparse_block_dot(W, h, iIdx, b, oIdx)

        f = theano.function([W, h, iIdx, b, oIdx], o, mode=self.mode)

        W_val, h_val, iIdx_val, b_val, oIdx_val = BlockSparse_Gemv_and_Outer.gemv_data()

        th_out = f(W_val, h_val, iIdx_val, b_val, oIdx_val)

        ref_out = BlockSparse_Gemv_and_Outer.gemv_numpy(b_val.take(oIdx_val, axis=0), W_val, h_val, iIdx_val, oIdx_val)

        utt.assert_allclose(ref_out, th_out)
Example #13
0
 def metaop(b, h, W):
     return sparse_block_dot(W, h, iIdx, b, oIdx)
def h_softmax(x,
              batch_size,
              n_outputs,
              n_classes,
              n_outputs_per_class,
              W1,
              b1,
              W2,
              b2,
              target=None):
    """ Two-level hierarchical softmax.

    The architecture is composed of two softmax layers: the first predicts the
    class of the input x while the second predicts the output of the input x in
    the predicted class.
    More explanations can be found in the original paper [1]_.

    If target is specified, it will only compute the outputs of the
    corresponding targets. Otherwise, if target is None, it will compute all
    the outputs.

    The outputs are grouped in the same order as they are initially defined.

    .. versionadded:: 0.7.1

    Parameters
    ----------
    x: tensor of shape (batch_size, number of features)
        the minibatch input of the two-layer hierarchical softmax.
    batch_size: int
        the size of the minibatch input x.
    n_outputs: int
        the number of outputs.
    n_classes: int
        the number of classes of the two-layer hierarchical softmax. It
        corresponds to the number of outputs of the first softmax. See note at
        the end.
    n_outputs_per_class: int
        the number of outputs per class. See note at the end.
    W1: tensor of shape (number of features of the input x, n_classes)
        the weight matrix of the first softmax, which maps the input x to the
        probabilities of the classes.
    b1: tensor of shape (n_classes,)
        the bias vector of the first softmax layer.
    W2: tensor of shape (n_classes, number of features of the input x, n_outputs_per_class)
        the weight matrix of the second softmax, which maps the input x to
        the probabilities of the outputs.
    b2: tensor of shape (n_classes, n_outputs_per_class)
        the bias vector of the second softmax layer.
    target: tensor of shape either (batch_size,) or (batch_size, 1)
        (optional, default None)
        contains the indices of the targets for the minibatch
        input x. For each input, the function computes the output for its
        corresponding target. If target is None, then all the outputs are
        computed for each input.

    Returns
    -------
    output_probs: tensor of shape (batch_size, n_outputs) or (batch_size, 1)
        Output of the two-layer hierarchical softmax for input x. If target is
        not specified (None), then all the outputs are computed and the
        returned tensor has shape (batch_size, n_outputs). Otherwise, when
        target is specified, only the corresponding outputs are computed and
        the returned tensor has thus shape (batch_size, 1).

    Notes
    -----
    The product of n_outputs_per_class and n_classes has to be greater or equal
    to n_outputs. If it is strictly greater, then the irrelevant outputs will
    be ignored.
    n_outputs_per_class and n_classes have to be the same as the corresponding
    dimensions of the tensors of W1, b1, W2 and b2.
    The most computational efficient configuration is when n_outputs_per_class
    and n_classes are equal to the square root of n_outputs.

    References
    ----------
    .. [1] J. Goodman, "Classes for Fast Maximum Entropy Training,"
        ICASSP, 2001, <http://arxiv.org/abs/cs/0108006>`.
    """

    # First softmax that computes the probabilities of belonging to each class
    class_probs = theano.tensor.nnet.softmax(T.dot(x, W1) + b1)

    if target is None:  # Computes the probabilites of all the outputs

        # Second softmax that computes the output probabilities
        activations = T.tensordot(x, W2, (1, 1)) + b2
        output_probs = theano.tensor.nnet.softmax(
            activations.reshape((-1, n_outputs_per_class)))
        output_probs = output_probs.reshape((batch_size, n_classes, -1))
        output_probs = class_probs.dimshuffle(0, 1, 'x') * output_probs
        output_probs = output_probs.reshape((batch_size, -1))
        # output_probs.shape[1] is n_classes * n_outputs_per_class, which might
        # be greater than n_outputs, so we ignore the potential irrelevant
        # outputs with the next line:
        output_probs = output_probs[:, :n_outputs]

    else:  # Computes the probabilities of the outputs specified by the targets

        target = target.flatten()

        # Classes to which belong each target
        target_classes = target // n_outputs_per_class

        # Outputs to which belong each target inside a class
        target_outputs_in_class = target % n_outputs_per_class

        # Second softmax that computes the output probabilities
        activations = sparse_block_dot(W2.dimshuffle('x', 0, 1, 2),
                                       x.dimshuffle(0, 'x', 1),
                                       T.zeros((batch_size, 1), dtype='int32'),
                                       b2, target_classes.dimshuffle(0, 'x'))

        output_probs = theano.tensor.nnet.softmax(activations.dimshuffle(0, 2))
        target_class_probs = class_probs[T.arange(batch_size), target_classes]
        output_probs = output_probs[T.arange(batch_size),
                                    target_outputs_in_class]
        output_probs = target_class_probs * output_probs

    return output_probs
Example #15
0
 def metaop(b, h, W):
     return sparse_block_dot(W, h, iIdx, b, oIdx)