コード例 #1
0
ファイル: denet_sparse_op.py プロジェクト: zhjpqq/denet
    def make_node(self, fmap, bbox, dy):
        fmap = as_cuda_ndarray_variable(fmap)
        bbox = as_cuda_ndarray_variable(bbox)
        dy = as_cuda_ndarray_variable(dy)

        assert bbox.ndim == 4 and dy.ndim == 4
        return theano.Apply(self, [fmap, bbox, dy], [fmap.type()])
コード例 #2
0
    def make_node(self, images, top_down):
        """
        .. todo::

            WRITEME
        """
        images = as_cuda_ndarray_variable(images)
        top_down = as_cuda_ndarray_variable(top_down)

        assert images.ndim == 4
        assert top_down.ndim == 4

        channels_broadcastable = images.type.broadcastable[0]
        batch_broadcastable = images.type.broadcastable[3]

        rows_broadcastable = False
        cols_broadcastable = False

        houtput_broadcastable = (channels_broadcastable, rows_broadcastable,
                cols_broadcastable, batch_broadcastable)
        houtput_type = CudaNdarrayType(broadcastable=houtput_broadcastable)
        houtput = houtput_type()

        poutput_broadcastable = (channels_broadcastable, rows_broadcastable,
                cols_broadcastable, batch_broadcastable)
        poutput_type = CudaNdarrayType(broadcastable=poutput_broadcastable)
        poutput = poutput_type()

        return Apply(self, [images, top_down], [houtput, poutput])
コード例 #3
0
ファイル: cula.py プロジェクト: LEEKYOUNGHUN/Theano
    def make_node(self, inp1, inp2):
        inp1 = as_cuda_ndarray_variable(inp1)
        inp2 = as_cuda_ndarray_variable(inp2)

        assert inp1.ndim == 2
        assert inp2.ndim == 2
        return theano.Apply(self, [inp1, inp2], [self.output_type(inp1)()])
コード例 #4
0
ファイル: denet_sparse_op.py プロジェクト: zhjpqq/denet
    def make_node(self, fmap, bbox):
        fmap = as_cuda_ndarray_variable(fmap)
        bbox = as_cuda_ndarray_variable(bbox)

        assert fmap.ndim == 4
        assert bbox.ndim == 4
        return theano.Apply(self, [fmap, bbox], [fmap.type()])
コード例 #5
0
    def make_node(self, output_spike, H_out, weights):
        if output_spike.type.ndim != 4:
            raise TypeError('output_spike must be 4D tensor')
        if H_out.type.ndim != 4:
            raise TypeError('H_out must be 4D tensor')
        if weights.type.ndim != 4:
            raise TypeError('weights must be 4D tensor')
        # if LR.type.ndim != 1:
        #     raise TypeError('LR must be 1D tensor')
        # if weight_update.type.ndim != 4:
        #     raise TypeError('weight_update must be 4D tensor')

        output_spike = as_cuda_ndarray_variable(output_spike)
        H_out = as_cuda_ndarray_variable(H_out)
        weights = as_cuda_ndarray_variable(weights)
        # LR= as_cuda_ndarray_variable(LR)
        #weight_update = as_cuda_ndarray_variable(weight_update)

        print 'MAKENODE: ', output_spike.shape, H_out.shape, weights.shape
        # broadcastable = [output_spike.type.broadcastable[0], H_out.type.broadcastable[0],weights.type.broadcastable[0],
        #                  weight_update,False, False, False, False]
        # otype = CudaNdarrayType(broadcastable=[False] * 4)
        broadcastable = [False, False, False, False, False]
        return Apply(self, [output_spike, H_out, weights],
                     [CudaNdarrayType(broadcastable)()])
コード例 #6
0
    def make_node(self, inp1, inp2):
        inp1 = as_cuda_ndarray_variable(inp1)
        inp2 = as_cuda_ndarray_variable(inp2)

        assert inp1.ndim == 2
        assert inp2.ndim == 2
        return theano.Apply(self, [inp1, inp2], [self.output_type(inp1)()])
コード例 #7
0
    def make_node(self, images, top_down):
        """
        .. todo::

            WRITEME
        """
        images = as_cuda_ndarray_variable(images)
        top_down = as_cuda_ndarray_variable(top_down)

        assert images.ndim == 4
        assert top_down.ndim == 4

        channels_broadcastable = images.type.broadcastable[0]
        batch_broadcastable = images.type.broadcastable[3]

        rows_broadcastable = False
        cols_broadcastable = False

        houtput_broadcastable = (channels_broadcastable, rows_broadcastable,
                                 cols_broadcastable, batch_broadcastable)
        houtput_type = CudaNdarrayType(broadcastable=houtput_broadcastable)
        houtput = houtput_type()

        poutput_broadcastable = (channels_broadcastable, rows_broadcastable,
                                 cols_broadcastable, batch_broadcastable)
        poutput_type = CudaNdarrayType(broadcastable=poutput_broadcastable)
        poutput = poutput_type()

        return Apply(self, [images, top_down], [houtput, poutput])
コード例 #8
0
ファイル: gruop.py プロジェクト: cjopengler/lmkit
    def make_node(self, initial_state, inp_state, inp_update, inp_reset,
                  state_to_state, state_to_update, state_to_reset):

        weights = [state_to_state, state_to_update, state_to_reset]

        batch_size = inp_state.shape[1]
        assert initial_state.dtype == "float32"
        assert initial_state.ndim == 1

        initial_state = as_cuda_ndarray_variable(
            tensor.repeat(initial_state[None, :], batch_size, 0))

        for i, w in enumerate(weights):
            weights[i] = as_cuda_ndarray_variable(w)

        inputs = [inp_state, inp_update, inp_reset]
        for i, b in enumerate(inputs):
            inputs[i] = as_cuda_ndarray_variable(b)

        for w in weights:
            assert w.dtype == "float32"
            assert w.ndim == 2

        for i in inputs:
            assert i.dtype == "float32"
            assert i.ndim == 3

        out_type = CudaNdarrayType((False, False))
        return theano.Apply(self, [initial_state] + inputs + weights,
                            [out_type()])
コード例 #9
0
ファイル: PoolHWBCOp.py プロジェクト: papar22/returnn-hmm
 def make_node(self, X, DY):
     X = gpu_contiguous(as_cuda_ndarray_variable(X))
     DY = gpu_contiguous(as_cuda_ndarray_variable(DY))
     assert X.dtype == "float32"
     assert DY.dtype == "float32"
     assert X.ndim == 4
     assert DY.ndim == 4
     return theano.Apply(self, [X, DY], [X.type()])
コード例 #10
0
ファイル: blocksparse.py プロジェクト: truell20/grammarVAE
 def make_node(self, o, x, y, xIdx, yIdx, alpha=None):
     one = tensor.constant(numpy.asarray(1.0, dtype='float32'))
     o = basic_ops.as_cuda_ndarray_variable(o)
     x = basic_ops.as_cuda_ndarray_variable(x)
     y = basic_ops.as_cuda_ndarray_variable(y)
     if alpha is None:
         alpha = one
     return Apply(self, [o, x, y, xIdx, yIdx, alpha], [o.type()])
コード例 #11
0
ファイル: PoolHWBCOp.py プロジェクト: chagge/returnn
 def make_node(self, X, DY):
   X = gpu_contiguous(as_cuda_ndarray_variable(X))
   DY = gpu_contiguous(as_cuda_ndarray_variable(DY))
   assert X.dtype == "float32"
   assert DY.dtype == "float32"
   assert X.ndim == 4
   assert DY.ndim == 4
   return theano.Apply(self, [X, DY], [X.type()])
コード例 #12
0
ファイル: blocksparse.py プロジェクト: poolio/Theano
 def make_node(self, o, x, y, xIdx, yIdx, alpha=None):
     one = tensor.constant(numpy.asarray(1.0, dtype="float32"))
     o = basic_ops.as_cuda_ndarray_variable(o)
     x = basic_ops.as_cuda_ndarray_variable(x)
     y = basic_ops.as_cuda_ndarray_variable(y)
     if alpha is None:
         alpha = one
     return Apply(self, [o, x, y, xIdx, yIdx, alpha], [o.type()])
コード例 #13
0
 def make_node(self, X, sizes):
   X = gpu_contiguous(as_cuda_ndarray_variable(X))
   sizes = gpu_contiguous(as_cuda_ndarray_variable(sizes))
   assert X.dtype == "float32"
   assert X.ndim == 4
   assert sizes.dtype == "float32"
   assert sizes.ndim == 2
   return theano.Apply(self, [X, sizes], [X.type()])
コード例 #14
0
ファイル: nnet.py プロジェクト: takuhironoda/Theano
 def make_node(self, x, b, y_idx):
     # N.B. won't work when we don't cast y_idx to float anymore
     x = as_cuda_ndarray_variable(x)
     b = as_cuda_ndarray_variable(b)
     y_idx = as_cuda_ndarray_variable(y_idx)
     nll = y_idx.type()
     sm = x.type()
     am = y_idx.type()
     return Apply(self, [x, b, y_idx], [nll, sm, am])
コード例 #15
0
    def make_node(self, inp1, inp2):
        inp1 = as_cuda_ndarray_variable(inp1)
        inp2 = as_cuda_ndarray_variable(inp2)

        assert inp1.ndim == 2
        assert inp2.ndim == 2
        return theano.Apply(
            self, [inp1, inp2],
            [CudaNdarrayType(broadcastable=[False] * inp1.type.ndim)()])
コード例 #16
0
ファイル: nnet.py プロジェクト: Ambier/Theano
 def make_node(self, x, b, y_idx):
     # N.B. won't work when we don't cast y_idx to float anymore
     x = as_cuda_ndarray_variable(x)
     b = as_cuda_ndarray_variable(b)
     y_idx = as_cuda_ndarray_variable(y_idx)
     nll = y_idx.type()
     sm = x.type()
     am = y_idx.type()
     return Apply(self, [x, b, y_idx], [nll, sm, am])
コード例 #17
0
def h_softmax_gpu(W1, b1, W2, b2, x, n_outputs, n_classes,
                  n_outputs_per_class, batch_size, target=None):
    """
    GPU-only version of a two-layer hierarchical softmax.
    See hierarchical_softmax's docstring for the description of the arguments.
    """
    W1 = as_cuda_ndarray_variable(W1)
    b1 = as_cuda_ndarray_variable(b1)
    W2 = as_cuda_ndarray_variable(W2)
    b2 = as_cuda_ndarray_variable(b2)
    x = as_cuda_ndarray_variable(x)

    # First softmax which computes the probabilities of belonging to each class
    class_probs = tensor.nnet.softmax(tensor.dot(x, W1) + b1)

    if target is None:
        # Computes the probabilites of all the outputs

        class_ids = tensor.tile(tensor.arange(n_classes, dtype="int32")[None, :], (batch_size, 1))

        # Second softmax that computes the output probabilities
        activations = sparse_block_dot_SS(
            W2[None, :, :, :], x[:, None, :],
            tensor.zeros((batch_size, 1), dtype='int32'), b2, class_ids)

        output_probs = tensor.nnet.softmax(activations.reshape((-1, n_outputs_per_class)))
        output_probs = output_probs.reshape((batch_size, n_classes, -1))
        output_probs = class_probs[:, :, None] * output_probs
        output_probs = output_probs.reshape((batch_size, -1))
        output_probs = output_probs[:, :n_outputs]

    else:
        # Computes the probabilities of the outputs specified by the targets

        # Flattens the targets
        target = target.flatten()

        # Classes to which belong each target
        target_classes = target // n_outputs_per_class

        # Outputs to which belong each target inside a class
        target_outputs_in_class = target % n_classes

        # Second softmax that computes the output probabilities
        activations = sparse_block_dot_SS(
            W2[None, :, :, :], x[:, None, :],
            tensor.zeros((batch_size, 1), dtype='int32'), b2,
            target_classes[:, None])

        output_probs = tensor.nnet.softmax(activations[:, 0, :])
        target_class_probs = class_probs[tensor.arange(batch_size), target_classes]
        output_probs = output_probs[tensor.arange(batch_size),
                                    target_outputs_in_class]
        output_probs = target_class_probs * output_probs

    return output_probs
コード例 #18
0
ファイル: GpuConv3D.py プロジェクト: 317070/Theano
def local_gpu_conv3d(node):
    if isinstance(node.op, Conv3D):
        if numpy.any([i.owner and isinstance(i.owner.op, HostFromGpu)
                      for i in node.inputs]):
            if numpy.all([o.type.dtype == 'float32' for o in node.outputs]):
                V, W, b, d = node.inputs
                return [host_from_gpu(gpu_convd(as_cuda_ndarray_variable(V),
                                                as_cuda_ndarray_variable(W),
                                                as_cuda_ndarray_variable(b),
                                                d))]
コード例 #19
0
ファイル: fftconv.py プロジェクト: cfsmile/Theano
    def make_node(self, inp1, inp2):
        inp1 = basic_ops.gpu_contiguous(basic_ops.as_cuda_ndarray_variable(inp1))
        inp2 = basic_ops.gpu_contiguous(basic_ops.as_cuda_ndarray_variable(inp2))

        assert inp1.dtype == "float32"
        assert inp2.dtype == "float32"
        assert inp1.ndim == 4  # (batch, a, b, real/imag)
        assert inp2.ndim == 4

        return theano.Apply(self, [inp1, inp2], [self.output_type(inp1)()])
コード例 #20
0
ファイル: CuDNNConvHWBCOp.py プロジェクト: chagge/returnn
 def make_node(self, X, W, b):
   X = gpu_contiguous(as_cuda_ndarray_variable(X))
   W = gpu_contiguous(as_cuda_ndarray_variable(W))
   b = gpu_contiguous(as_cuda_ndarray_variable(b))
   assert X.dtype == "float32"
   assert W.dtype == "float32"
   assert b.dtype == "float32"
   assert X.ndim == 4
   assert W.ndim == 4
   assert b.ndim == 1
   return theano.Apply(self, [X, W, b], [X.type()])
コード例 #21
0
def local_gpu_conv_grad3d(node):
    if isinstance(node.op, ConvGrad3D):
        if numpy.any([i.owner and isinstance(i.owner.op, HostFromGpu)
                      for i in node.inputs]):
            if numpy.all([o.type.dtype == 'float32' for o in node.outputs]):
                V, d, WShape, dCdH = node.inputs
                return [host_from_gpu(gpu_conv_grad3d(
                    as_cuda_ndarray_variable(V),
                    d,
                    WShape,
                    as_cuda_ndarray_variable(dCdH)))]
コード例 #22
0
 def make_node(self, X, W, b):
     X = gpu_contiguous(as_cuda_ndarray_variable(X))
     W = gpu_contiguous(as_cuda_ndarray_variable(W))
     b = gpu_contiguous(as_cuda_ndarray_variable(b))
     assert X.dtype == "float32"
     assert W.dtype == "float32"
     assert b.dtype == "float32"
     assert X.ndim == 4
     assert W.ndim == 4
     assert b.ndim == 1
     return theano.Apply(self, [X, W, b], [X.type()])
コード例 #23
0
ファイル: GpuConvGrad3D.py プロジェクト: 5730279821-TA/Theano
def local_gpu_conv_grad3d(node):
    if isinstance(node.op, ConvGrad3D):
        if numpy.any([i.owner and isinstance(i.owner.op, HostFromGpu)
                      for i in node.inputs]):
            if numpy.all([o.type.dtype == 'float32' for o in node.outputs]):
                V, d, WShape, dCdH = node.inputs
                return [host_from_gpu(gpu_conv_grad3d(
                    as_cuda_ndarray_variable(V),
                    d,
                    WShape,
                    as_cuda_ndarray_variable(dCdH)))]
コード例 #24
0
ファイル: myblas.py プロジェクト: tiwariHD/BinaryNet
    def make_node(self, img, kern):
        img = as_cuda_ndarray_variable(img)
        kern = as_cuda_ndarray_variable(kern)
        if img.type.ndim != 4:
            raise TypeError('img must be 4D tensor')
        if kern.type.ndim != 4:
            raise TypeError('kern must be 4D tensor')

        broadcastable = [img.type.broadcastable[0], kern.type.broadcastable[0],
                         False, False]
        return Apply(self, [img, kern], [CudaNdarrayType(broadcastable)()])
コード例 #25
0
    def make_node(self, inp1, inp2):
        inp1 = basic_ops.gpu_contiguous(
            basic_ops.as_cuda_ndarray_variable(inp1))
        inp2 = basic_ops.gpu_contiguous(
            basic_ops.as_cuda_ndarray_variable(inp2))

        assert inp1.dtype == "float32"
        assert inp2.dtype == "float32"
        assert inp1.ndim == 4  # (batch, a, b, real/imag)
        assert inp2.ndim == 4

        return theano.Apply(self, [inp1, inp2], [self.output_type(inp1)()])
コード例 #26
0
ファイル: op.py プロジェクト: adbrebs/factored_output_layer
    def make_node(self, V, U, UinvT, Q, H, Y_indexes, Y_values, learning_rate,
                  use_qtilde=0, use_lower=1, invup_mode=1,
                  stabilize_period=10, unfactorize_period=100,debug_print=0):

        # The following are supposed to reside on the GPU
        V = as_cuda_ndarray_variable(V)
        U = as_cuda_ndarray_variable(U)
        UinvT = as_cuda_ndarray_variable(UinvT)
        Q = as_cuda_ndarray_variable(Q)
        H = as_cuda_ndarray_variable(H)

        # The following are on the CPU
        Y_indexes = as_tensor_variable(Y_indexes)
        Y_values = as_tensor_variable(Y_values)
        learning_rate = as_tensor_variable(learning_rate)
        use_qtilde = as_tensor_variable(use_qtilde)
        use_lower = as_tensor_variable(use_lower)
        invup_mode = as_tensor_variable(invup_mode)
        stabilize_period = as_tensor_variable(stabilize_period)
        unfactorize_period = as_tensor_variable(unfactorize_period)
        debug_print = as_tensor_variable(debug_print)

        # print "@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@"
        # for k,v in locals().items():
        #     print k,':',type(v)
        # print "@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@"

        params = [V, U, UinvT, Q, H, Y_indexes, Y_values, learning_rate,
                  use_qtilde, use_lower, invup_mode, stabilize_period,
                  unfactorize_period, debug_print]

        # make sure parameters are either all of dtype float32 or all of dtype float64 (except for Y_indexes which are integers)
        elem_type = V.dtype
        if elem_type != "float32" and elem_type != "float64":
            raise TypeError("LargeSparseTargets parameter V must have dtype of float32 or float64")

        check_tensor_variables_ndim_and_dtype(0, elem_type, ["learning_rate"], locals() )
        check_tensor_variables_ndim_and_dtype(2, elem_type, ["V", "U", "UinvT", "Q", "H", "Y_values"], locals() )
        check_tensor_variables_ndim_and_dtype(2, "int32", ["Y_indexes"], locals() )

        # T.matrix(elem_type)
            
        # Now properly set up outputs to compute
        if self.what_to_output==0: # output scalar cost
            outputs = [ T.scalar(elem_type) ]
        elif self.what_to_output==1: # output grad_H
            outputs = [ CudaNdarrayType(broadcastable=(False,False))() ]
        elif self.what_to_output==2: # output cost and grad_H
            outputs = [ T.scalar(elem_type), CudaNdarrayType(broadcastable=(False,False))() ]
        else:
            raise ValueError("Invalid value for what_to_output: must be 0,1, or 2")
        
        return Apply(self, params, outputs)
コード例 #27
0
ファイル: GpuConvTransp3D.py プロジェクト: 317070/Theano
    def make_node(self, W, b, d, H, RShape=None):
        W_ = as_cuda_ndarray_variable(W)
        b_ = as_cuda_ndarray_variable(b)
        d_ = T.as_tensor_variable(d)
        H_ = as_cuda_ndarray_variable(H)
        if RShape:
            RShape_ = T.as_tensor_variable(RShape)
        else:
            RShape_ = T.as_tensor_variable([-1, -1, -1])

        return theano.Apply(self, inputs=[W_, b_, d_, H_, RShape_],
                            outputs=[CudaNdarrayType(dtype=H_.dtype,
                                                     broadcastable=(False,)*5)()])
コード例 #28
0
    def make_node(self, W, b, d, H, RShape=None):
        W_ = as_cuda_ndarray_variable(W)
        b_ = as_cuda_ndarray_variable(b)
        d_ = T.as_tensor_variable(d)
        H_ = as_cuda_ndarray_variable(H)
        if RShape:
            RShape_ = T.as_tensor_variable(RShape)
        else:
            RShape_ = T.as_tensor_variable([-1, -1, -1])

        return theano.Apply(self, inputs=[W_, b_, d_, H_, RShape_],
                            outputs=[CudaNdarrayType(dtype=H_.dtype,
                                                     broadcastable=(False,)*5)()])
コード例 #29
0
    def make_node(self, V, d, WShape, dCdH):
        """
        :param V: visible
        :param d: strides
        :param WShape: shapes of the weights -> shape of this op output
        :param dCdH: other input with what V will be convolved.
        """
        V_ = as_cuda_ndarray_variable(V)
        d_ = T.as_tensor_variable(d)
        WShape_ = T.as_tensor_variable(WShape)
        dCdH_ = as_cuda_ndarray_variable(dCdH)

        return theano.Apply(self, inputs=[V_, d_, WShape_, dCdH_],
                            outputs = [ CudaNdarrayType(dtype=V_.dtype, broadcastable=(False,)*5)()])
コード例 #30
0
ファイル: blocksparse.py プロジェクト: truell20/grammarVAE
    def make_node(self, o, W, h, inputIdx, outputIdx):
        o = basic_ops.as_cuda_ndarray_variable(o)
        W = basic_ops.as_cuda_ndarray_variable(W)
        h = basic_ops.as_cuda_ndarray_variable(h)
        assert o.ndim == 3
        assert W.ndim == 4
        assert h.ndim == 3
        assert inputIdx.ndim == 2
        assert outputIdx.ndim == 2

        assert inputIdx.type.dtype in discrete_dtypes
        assert outputIdx.type.dtype in discrete_dtypes

        return Apply(self, [o, W, h, inputIdx, outputIdx], [o.type()])
コード例 #31
0
 def make_node(self, X, regions_y, regions_x, out_size):
   X = gpu_contiguous(as_cuda_ndarray_variable(X))
   assert X.dtype == "float32"
   assert X.ndim == 4
   regions_y = gpu_contiguous(as_cuda_ndarray_variable(regions_y))
   assert regions_y.dtype == "float32"
   assert regions_y.ndim == 2
   regions_x = gpu_contiguous(as_cuda_ndarray_variable(regions_x))
   assert regions_x.dtype == "float32"
   assert regions_x.ndim == 2, regions_x.ndim
   out_size = T.as_tensor_variable(out_size)
   assert out_size.dtype == "float32"
   assert out_size.ndim == 1
   return theano.Apply(self, [X, regions_y, regions_x, out_size], [X.type()])
コード例 #32
0
ファイル: GpuConv3D.py プロジェクト: Dimitris0mg/Theano
    def make_node(self, V, W, b, d):
        """
            :param V: Visible unit, input
            :param W: Weights, filter
            :param b: bias
            :param d: strides when moving the filter over the input
        """
        V_ = as_cuda_ndarray_variable(V)
        W_ = as_cuda_ndarray_variable(W)
        b_ = as_cuda_ndarray_variable(b)
        d_ = T.as_tensor_variable(d)

        return theano.Apply(self, inputs=[V_, W_, b_, d_],
                            outputs = [ CudaNdarrayType(dtype=V_.dtype, broadcastable=(V_.broadcastable[0],W_.broadcastable[0],False,False,False))() ] )
コード例 #33
0
ファイル: blocksparse.py プロジェクト: poolio/Theano
    def make_node(self, o, W, h, inputIdx, outputIdx):
        o = basic_ops.as_cuda_ndarray_variable(o)
        W = basic_ops.as_cuda_ndarray_variable(W)
        h = basic_ops.as_cuda_ndarray_variable(h)
        assert o.ndim == 3
        assert W.ndim == 4
        assert h.ndim == 3
        assert inputIdx.ndim == 2
        assert outputIdx.ndim == 2

        assert inputIdx.type.dtype in discrete_dtypes
        assert outputIdx.type.dtype in discrete_dtypes

        return Apply(self, [o, W, h, inputIdx, outputIdx], [o.type()])
コード例 #34
0
ファイル: GpuConvGrad3D.py プロジェクト: wycg1984/Theano
    def make_node(self, V, d, WShape, dCdH):
        """
        :param V: visible
        :param d: strides
        :param WShape: shapes of the weights -> shape of this op output
        :param dCdH: other input with what V will be convolved.
        """
        V_ = as_cuda_ndarray_variable(V)
        d_ = T.as_tensor_variable(d)
        WShape_ = T.as_tensor_variable(WShape)
        dCdH_ = as_cuda_ndarray_variable(dCdH)

        return theano.Apply(self, inputs=[V_, d_, WShape_, dCdH_],
                            outputs = [ CudaNdarrayType(dtype=V_.dtype, broadcastable=(False,)*5)()])
コード例 #35
0
 def make_node(self, X, DY, regions_y, regions_x):
   X = gpu_contiguous(as_cuda_ndarray_variable(X))
   assert X.dtype == "float32"
   assert X.ndim == 4
   DY = gpu_contiguous(as_cuda_ndarray_variable(DY))
   assert DY.dtype == "float32"
   assert DY.ndim == 4
   regions_y = gpu_contiguous(as_cuda_ndarray_variable(regions_y))
   assert regions_y.dtype == "float32"
   assert regions_y.ndim == 2
   regions_x = gpu_contiguous(as_cuda_ndarray_variable(regions_x))
   assert regions_x.dtype == "float32"
   assert regions_x.ndim == 2, regions_x.ndim
   return theano.Apply(self, [X, DY, regions_y, regions_x], [X.type()])
コード例 #36
0
 def make_node(self, X, DY, regions_y, regions_x):
     X = gpu_contiguous(as_cuda_ndarray_variable(X))
     assert X.dtype == "float32"
     assert X.ndim == 4
     DY = gpu_contiguous(as_cuda_ndarray_variable(DY))
     assert DY.dtype == "float32"
     assert DY.ndim == 4
     regions_y = gpu_contiguous(as_cuda_ndarray_variable(regions_y))
     assert regions_y.dtype == "float32"
     assert regions_y.ndim == 2
     regions_x = gpu_contiguous(as_cuda_ndarray_variable(regions_x))
     assert regions_x.dtype == "float32"
     assert regions_x.ndim == 2, regions_x.ndim
     return theano.Apply(self, [X, DY, regions_y, regions_x], [X.type()])
コード例 #37
0
ファイル: GpuConv3D.py プロジェクト: yarikoptic/Theano
def local_gpu_conv3d(node):
    if isinstance(node.op, Conv3D):
        if numpy.any([
                i.owner and isinstance(i.owner.op, HostFromGpu)
                for i in node.inputs
        ]):
            if numpy.all([o.type.dtype == 'float32' for o in node.outputs]):
                V, W, b, d = node.inputs
                return [
                    host_from_gpu(
                        gpu_convd(as_cuda_ndarray_variable(V),
                                  as_cuda_ndarray_variable(W),
                                  as_cuda_ndarray_variable(b), d))
                ]
コード例 #38
0
 def make_node(self, X, regions_y, regions_x, out_size):
     X = gpu_contiguous(as_cuda_ndarray_variable(X))
     assert X.dtype == "float32"
     assert X.ndim == 4
     regions_y = gpu_contiguous(as_cuda_ndarray_variable(regions_y))
     assert regions_y.dtype == "float32"
     assert regions_y.ndim == 2
     regions_x = gpu_contiguous(as_cuda_ndarray_variable(regions_x))
     assert regions_x.dtype == "float32"
     assert regions_x.ndim == 2, regions_x.ndim
     out_size = T.as_tensor_variable(out_size)
     assert out_size.dtype == "float32"
     assert out_size.ndim == 1
     return theano.Apply(self, [X, regions_y, regions_x, out_size],
                         [X.type()])
コード例 #39
0
ファイル: lstm_cell.py プロジェクト: papar22/returnn-hmm
  def make_node(self, Z, V_h, c, i):
    Z = gpu_contiguous(as_cuda_ndarray_variable(Z))
    V_h = gpu_contiguous(as_cuda_ndarray_variable(V_h))
    c = gpu_contiguous(as_cuda_ndarray_variable(c))
    i = gpu_contiguous(as_cuda_ndarray_variable(i))
    assert Z.dtype == "float32"
    assert V_h.dtype == "float32"
    assert c.dtype == 'float32'
    assert c.ndim == 2
    assert Z.ndim == 2
    assert i.ndim == 1
    assert V_h.ndim == 2

    #results: output Y, (gates and cell state) H
    return theano.Apply(self, [Z, V_h, c, i], [Z.type(), Z.type(), c.type()])
コード例 #40
0
ファイル: blstm.py プロジェクト: papar22/returnn-hmm
    def make_node(self, V_f, V_b, c_f, c_b, idx_f, idx_b, Dd_f, Dd_b, DY_f,
                  DY_b, Y_f, Y_b, H_f, H_b):
        V_f = gpu_contiguous(as_cuda_ndarray_variable(V_f))
        V_b = gpu_contiguous(as_cuda_ndarray_variable(V_b))
        c_f = gpu_contiguous(as_cuda_ndarray_variable(c_f))
        c_b = gpu_contiguous(as_cuda_ndarray_variable(c_b))
        DY_f = gpu_contiguous(as_cuda_ndarray_variable(DY_f))
        DY_b = gpu_contiguous(as_cuda_ndarray_variable(DY_b))
        idx_f = gpu_contiguous(
            as_cuda_ndarray_variable(T.cast(idx_f, 'float32')))
        idx_b = gpu_contiguous(
            as_cuda_ndarray_variable(T.cast(idx_b, 'float32')))
        Dd_f = gpu_contiguous(as_cuda_ndarray_variable(Dd_f))
        Dd_b = gpu_contiguous(as_cuda_ndarray_variable(Dd_b))
        assert V_f.dtype == "float32"
        assert V_b.dtype == "float32"
        assert DY_f.dtype == 'float32'
        assert DY_b.dtype == 'float32'
        assert Y_f.dtype == 'float32'
        assert Y_b.dtype == 'float32'
        assert H_f.dtype == 'float32'
        assert H_b.dtype == 'float32'
        assert c_f.dtype == 'float32'
        assert c_b.dtype == 'float32'
        assert V_f.ndim == 2
        assert V_b.ndim == 2
        assert DY_f.ndim == 3
        assert DY_b.ndim == 3
        assert Y_f.ndim == 3
        assert Y_b.ndim == 3
        assert H_f.ndim == 3
        assert H_b.ndim == 3
        assert c_f.ndim == 2
        assert c_b.ndim == 2
        assert idx_f.ndim == 2
        assert idx_b.ndim == 2

        return theano.Apply(self, [
            V_f, V_b, c_f, c_b, idx_f, idx_b, Dd_f, Dd_b, DY_f, DY_b, Y_f, Y_b,
            H_f, H_b
        ], [
            H_f.type(),
            H_b.type(),
            V_f.type(),
            V_b.type(),
            c_f.type(),
            c_b.type()
        ])
コード例 #41
0
 def grad(self, inputs, dout):
     images, = inputs
     acts, denoms = self(images)
     dout, _ = dout  # Ignore the gradient on "denoms"
     dout = as_cuda_ndarray_variable(dout)
     grad_op = CrossMapNormUndo(self._size_f, self._add_scale, self._pow_scale, self._blocked, inplace=False)
     return [grad_op(images, acts, denoms, dout)[0]]
コード例 #42
0
ファイル: fftconv.py プロジェクト: Ambier/Theano
    def make_node(self, inp):
        inp = basic_ops.gpu_contiguous(
            basic_ops.as_cuda_ndarray_variable(inp))

        assert inp.dtype == "float32"

        return theano.Apply(self, [inp], [self.output_type(inp)()])
コード例 #43
0
  def make_node(self, X, W1, W2, W3, W4, V_h1, V_h2, V_h3, V_h4, V_v1, V_v2, V_v3, V_v4, b1, b2, b3, b4, sizes):
    var_names = ["X", "W1", "W2", "W3", "W4", "V_h1", "V_h2", "V_h3", "V_h4",
                 "V_v1", "V_v2", "V_v3", "V_v4", "b1", "b2", "b3", "b4"]
    lcl = locals()
    for var_name in var_names:
      lcl[var_name] = gpu_contiguous(as_cuda_ndarray_variable(lcl[var_name]))
      assert lcl[var_name].dtype == "float32"
    #note: sizes lives on the CPU!
    sizes = T.as_tensor_variable(sizes)
    assert sizes.dtype == "float32"

    assert lcl["X"].ndim == 4
    assert lcl["W1"].ndim == 2
    assert lcl["W2"].ndim == 2
    assert lcl["W3"].ndim == 2
    assert lcl["W4"].ndim == 2
    assert lcl["V_h1"].ndim == 2
    assert lcl["V_h2"].ndim == 2
    assert lcl["V_h3"].ndim == 2
    assert lcl["V_h4"].ndim == 2
    assert lcl["V_v1"].ndim == 2
    assert lcl["V_v2"].ndim == 2
    assert lcl["V_v3"].ndim == 2
    assert lcl["V_v4"].ndim == 2
    assert lcl["b1"].ndim == 1
    assert lcl["b2"].ndim == 1
    assert lcl["b3"].ndim == 1
    assert lcl["b4"].ndim == 1
    assert sizes.ndim == 2

    all_vars = [lcl[var_name] for var_name in var_names] + [sizes]

    #results: outputs Y1, Y2, Y3, Y4, (gates and cell states) H1, H2, H3, H4
    return theano.Apply(self, all_vars, [lcl["X"].type() for _ in xrange(8)])
コード例 #44
0
  def make_node(self, X, W1, W2, W3, W4, V_h1, V_h2, V_h3, V_h4, V_v1, V_v2, V_v3, V_v4,
                b1, b2, b3, b4, sizes, DY1, DY2, DY3, DY4, Y1, Y2, Y3, Y4, H1, H2, H3, H4):

    var_names = ["X", "W1", "W2", "W3", "W4", "V_h1", "V_h2", "V_h3", "V_h4",
                 "V_v1", "V_v2", "V_v3", "V_v4", "b1", "b2", "b3", "b4",
                 "DY1", "DY2", "DY3", "DY4", "Y1", "Y2", "Y3", "Y4",
                 "H1", "H2", "H3", "H4"]
    lcl = locals()
    for var_name in var_names:
      lcl[var_name] = gpu_contiguous(as_cuda_ndarray_variable(lcl[var_name]))
      assert lcl[var_name].dtype == "float32"
    #note: sizes lives on the CPU!
    sizes = T.as_tensor_variable(sizes)
    assert sizes.dtype == "float32"
    expected_ndims = [4] + ([2] * 12) + ([1] * 4) + ([4] * 12)
    assert len(var_names) == len(expected_ndims), (len(var_names), len(expected_ndims))
    for var_name, expected_ndim in zip(var_names, expected_ndims):
      assert lcl[var_name].ndim == expected_ndim, \
          (var_name, lcl[var_name].name, lcl[var_name].ndim, expected_ndim)
    assert sizes.ndim == 2
    all_vars_no_sizes = [lcl[var_name] for var_name in var_names]
    all_vars = all_vars_no_sizes[:17] + [sizes] + all_vars_no_sizes[17:]
    inputs_vars = all_vars[:17]

    return theano.Apply(self, all_vars, [v.type() for v in inputs_vars])
コード例 #45
0
    def make_node(self, p, h, gp, gh):
        p = as_cuda_ndarray_variable(p)
        h = as_cuda_ndarray_variable(h)
        gp = as_cuda_ndarray_variable(gp)
        gh = as_cuda_ndarray_variable(gh)

        assert p.ndim == 4
        assert h.ndim == 4
        assert gp.ndim == 4
        assert gh.ndim == 4
        try:
            nb_channel = int(get_scalar_constant_value(h.shape[0]))
            assert nb_channel % 16 == 0
        except NotScalarConstantError:
                    pass
        return Apply(self, [p, h, gp, gh], [p.type(), h.type()])
コード例 #46
0
    def make_node(self, *inputs):
        _inputs = [gpu_contiguous(as_cuda_ndarray_variable(i)) for i in inputs]
        if self.nin > 0 and len(_inputs) != self.nin:
            raise TypeError('Wrong argument count', (self.nin, len(_inputs)))
        for i in _inputs[1:]:
            if i.type.ndim != inputs[0].type.ndim:
                raise TypeError('different ranks among inputs')

        if any([any(i.type.broadcastable) for i in inputs]):
            raise Exception("pycuda don't support broadcasted dimensions")
        assert len(inputs)==2#TODO remove

        otype = CudaNdarrayType(broadcastable=[False]*_inputs[0].type.ndim)
        assert self.nout == 1

        fct_name = "pycuda_elemwise_%s"%str(self.scalar_op)
        out_node = Apply(self, _inputs, [otype() for o in xrange(self.nout)])
        in_name = ["i"+str(id) for id in range(len(inputs))]
        out_name = ["o"+str(id) for id in range(self.nout)]
        c_code = self.scalar_op.c_code(out_node, "some_name", tuple([n+"[i]"for n in in_name]), tuple(n+"[i]"for n in out_name), {})
        c_code_param = ", ".join([var.type.dtype_specs()[1]+" *"+name for var,name in zip(inputs,in_name) + zip(out_node.outputs,out_name)]+["int size"])
        mod = SourceModule("""
#include<Python.h>
#include <numpy/arrayobject.h>
  __global__ void %s(%s)
  {
    int i = (blockIdx.x+blockIdx.y*gridDim.x)*(blockDim.x*blockDim.y);
    i += threadIdx.x + threadIdx.y*blockDim.x;
    if(i<size){
        %s
    }
  }
  """%(fct_name,c_code_param,c_code))
        self.pycuda_fct = mod.get_function(fct_name)
        return out_node
コード例 #47
0
    def make_node(self, images, maxout, gz):
        images = as_cuda_ndarray_variable(images)
        maxout = as_cuda_ndarray_variable(maxout)
        gz = as_cuda_ndarray_variable(gz)

        assert images.ndim == 4
        assert maxout.ndim == 4
        assert gz.ndim == 4
        try:
            # Note : `get_scalar_constant_value` returns a ndarray not a
            # int
            nb_channel = int(get_scalar_constant_value(images.shape[0]))
            assert nb_channel % 16 == 0
        except NotScalarConstantError:
            pass
        return Apply(self, [images, maxout, gz], [images.type()])
コード例 #48
0
ファイル: pool.py プロジェクト: gdesjardins/pylearn2_bio
    def make_node(self, images, maxout, gz):
        images = as_cuda_ndarray_variable(images)
        maxout = as_cuda_ndarray_variable(maxout)
        gz = as_cuda_ndarray_variable(gz)

        assert images.ndim == 4
        assert maxout.ndim == 4
        assert gz.ndim == 4
        try:
            # Note : `get_scalar_constant_value` returns a ndarray not a
            # int
            nb_channel = int(get_scalar_constant_value(images.shape[0]))
            assert nb_channel % 16 == 0
        except NotScalarConstantError:
                    pass
        return Apply(self, [images, maxout, gz], [images.type()])
コード例 #49
0
    def make_node(self, X, W1, W2, V_h1, V_h2, V_v1, V_v2, b1, b2, sizes):
        var_names = [
            "X", "W1", "W2", "V_h1", "V_h2", "V_v1", "V_v2", "b1", "b2"
        ]
        lcl = locals()
        for var_name in var_names:
            lcl[var_name] = gpu_contiguous(
                as_cuda_ndarray_variable(lcl[var_name]))
            assert lcl[var_name].dtype == "float32"
        #note: sizes lives on the CPU!
        sizes = T.as_tensor_variable(sizes)
        assert sizes.dtype == "float32"

        assert lcl["X"].ndim == 4
        assert lcl["W1"].ndim == 2
        assert lcl["W2"].ndim == 2
        assert lcl["V_h1"].ndim == 2
        assert lcl["V_h2"].ndim == 2
        assert lcl["V_v1"].ndim == 2
        assert lcl["V_v2"].ndim == 2
        assert lcl["b1"].ndim == 1
        assert lcl["b2"].ndim == 1
        assert sizes.ndim == 2

        all_vars = [lcl[var_name] for var_name in var_names] + [sizes]

        #results: outputs Y1, Y2, Y3, Y4, (gates and cell states) H1, H2, H3, H4
        return theano.Apply(self, all_vars,
                            [lcl["X"].type() for _ in range(4)])
コード例 #50
0
ファイル: dnn.py プロジェクト: wanasit/Theano
    def make_node(self, img, kern, desc):
        img = as_cuda_ndarray_variable(img)
        kern = as_cuda_ndarray_variable(kern)
        if img.type.ndim != 4:
            raise TypeError('img must be 4D tensor')
        if kern.type.ndim != 4:
            raise TypeError('kern must be 4D tensor')

        if not isinstance(desc.type, CDataType) \
                or desc.type.ctype != 'cudnnConvolutionDescriptor_t':
            raise TypeError('desc must be cudnnConvolutionDescriptor_t')

        broadcastable = (img.type.broadcastable[0], kern.type.broadcastable[0],
                         False, False)
        return Apply(self, [img, kern, desc],
                     [CudaNdarrayType(broadcastable)()])
コード例 #51
0
 def make_node(self, V, W, b, d):
     """
         :param V: Visible unit, input
         :param W: Weights, filter
         :param b: bias
         :param d: strides when moving the filter over the input
     """
     V_ = as_cuda_ndarray_variable(V)
     W_ = as_cuda_ndarray_variable(W)
     b_ = as_cuda_ndarray_variable(b)
     d_ = T.as_tensor_variable(d)
     broad = (V_.broadcastable[0], W_.broadcastable[0], False, False, False)
     return theano.Apply(
         self,
         inputs=[V_, W_, b_, d_],
         outputs=[CudaNdarrayType(dtype=V_.dtype, broadcastable=broad)()])
コード例 #52
0
ファイル: img_acts.py プロジェクト: Rt0220/pylearn2
    def grad(self, inputs, g_outputs):
        """
        .. todo::

            WRITEME
        """
        hid_acts, filters, output_shape = inputs
        g_images, = g_outputs
        g_images = as_cuda_ndarray_variable(g_images)
        assert not isinstance(g_images, list)

        global FilterActs
        global WeightActs
        if FilterActs is None:
            from pylearn2.sandbox.cuda_convnet.filter_acts import FilterActs
            from pylearn2.sandbox.cuda_convnet.weight_acts import WeightActs

        g_filters = WeightActs(stride=self.stride,
                partial_sum=self.partial_sum, pad=self.pad)(
                        g_images, hid_acts, filters.shape[1:3])[0]
        assert not isinstance(g_filters, list)
        g_hid_acts = FilterActs(stride=self.stride, pad=self.pad,
                partial_sum=self.partial_sum)(g_images, filters)

        return [g_hid_acts, g_filters, DisconnectedType()()]
コード例 #53
0
ファイル: dnn.py プロジェクト: Stan-ST-SUN/Theano
 def local_softmax_dnn(node):
     raise_no_dnn()
     if isinstance(node.op, GpuSoftmax):
         ins = node.inputs[0].dimshuffle(0, 1, 'x', 'x')
         out = GpuDnnSoftmax('bc01', 'accurate', 'channel')(gpu_contiguous(ins))
         out = as_cuda_ndarray_variable(out.dimshuffle(0, 1))
         return [out]
コード例 #54
0
    def make_node(self, X, W1, W2, V_h1, V_h2, V_v1, V_v2, b1, b2, sizes, DY1,
                  DY2, Y1, Y2, H1, H2):

        var_names = [
            "X", "W1", "W2", "V_h1", "V_h2", "V_v1", "V_v2", "b1", "b2", "DY1",
            "DY2", "Y1", "Y2", "H1", "H2"
        ]
        lcl = locals()
        for var_name in var_names:
            lcl[var_name] = gpu_contiguous(
                as_cuda_ndarray_variable(lcl[var_name]))
            assert lcl[var_name].dtype == "float32"
        #note: sizes lives on the CPU!
        sizes = T.as_tensor_variable(sizes)
        assert sizes.dtype == "float32"
        expected_ndims = [4] + ([2] * 6) + ([1] * 2) + ([4] * 6)
        assert len(var_names) == len(expected_ndims), (len(var_names),
                                                       len(expected_ndims))
        for var_name, expected_ndim in zip(var_names, expected_ndims):
            assert lcl[var_name].ndim == expected_ndim, \
                (var_name, lcl[var_name].name, lcl[var_name].ndim, expected_ndim)
        assert sizes.ndim == 2
        all_vars_no_sizes = [lcl[var_name] for var_name in var_names]
        all_vars = all_vars_no_sizes[:9] + [sizes] + all_vars_no_sizes[9:]
        inputs_vars = all_vars[:9]

        return theano.Apply(self, all_vars, [v.type() for v in inputs_vars])
コード例 #55
0
ファイル: fftconv.py プロジェクト: corcra/uRNN
    def make_node(self, inp):
        inp = basic_ops.gpu_contiguous(
            basic_ops.as_cuda_ndarray_variable(inp))

        assert inp.dtype == "float32"

        return theano.Apply(self, [inp], [self.output_type(inp)()])
コード例 #56
0
ファイル: dnn.py プロジェクト: Cfetra/Theano
    def make_node(self, kern, topgrad, desc):
        kern = as_cuda_ndarray_variable(kern)
        topgrad = as_cuda_ndarray_variable(topgrad)
        if kern.type.ndim != 4:
            raise TypeError('kern must be 4D tensor')
        if topgrad.type.ndim != 4:
            raise TypeError('topgrad must be 4D tensor')

        if not isinstance(desc.type, CDataType) \
                or desc.type.ctype != 'cudnnConvolutionDescriptor_t':
            raise TypeError('desc must be cudnnConvolutionDescriptor_t')

        broadcastable = [topgrad.type.broadcastable[0],
                         kern.type.broadcastable[1],
                         False, False]
        return Apply(self, [kern, topgrad, desc],
                     [CudaNdarrayType(broadcastable)()])
コード例 #57
0
ファイル: dnn.py プロジェクト: c0g/Theano
    def make_node(self, img, kern, desc):
        img = as_cuda_ndarray_variable(img)
        kern = as_cuda_ndarray_variable(kern)
        if img.type.ndim != 4:
            raise TypeError('img must be 4D tensor')
        if kern.type.ndim != 4:
            raise TypeError('kern must be 4D tensor')

        if not isinstance(desc.type, CDataType) \
                or desc.type.ctype != 'cudnnConvolutionDescriptor_t':
            raise TypeError('desc must be cudnnConvolutionDescriptor_t')

        broadcastable = (img.type.broadcastable[0],
                         kern.type.broadcastable[0],
                         False, False)
        return Apply(self, [img, kern, desc],
                     [CudaNdarrayType(broadcastable)()])