def test_softmax(self): if not dnn.dnn_available(): raise SkipTest(dnn.dnn_available.msg) t = T.ftensor4('t') rand_tensor = numpy.asarray( numpy.random.rand(5, 4, 3, 2), dtype='float32' ) self._compile_and_check( [t], [dnn.GpuDnnSoftmax('bc01', 'accurate', 'channel')(t)], [rand_tensor], dnn.GpuDnnSoftmax ) self._compile_and_check( [t], [ T.grad( dnn.GpuDnnSoftmax( 'bc01', 'accurate', 'channel' )(t).mean(), t ) ], [rand_tensor], dnn.GpuDnnSoftmaxGrad )
def test_log_softmax(): # This is a test for an optimization that depends on CuDNN v3 or # more recent. Don't test if the CuDNN version is too old. if not cuda.dnn.dnn_available() or cuda.dnn.version() < (3000, 3000): raise SkipTest(cuda.dnn.dnn_available.msg) x = T.ftensor4() softmax_out = dnn.GpuDnnSoftmax('bc01', 'accurate', 'channel')(x) log_out = T.log(T.as_tensor_variable(softmax_out)) f = theano.function([x], log_out, mode=mode_with_gpu) # Ensure that the optimization has been applied dnn_softmax_nodes = [ n for n in f.maker.fgraph.toposort() if isinstance(n.op, cuda.dnn.GpuDnnSoftmax) ] assert len(dnn_softmax_nodes) == 1 assert dnn_softmax_nodes[0].op.algo == "log" # Ensure that the output of the function is valid input_shapes = [(3, 4, 5, 6), (1025, 2, 3, 4), (2, 1025, 3, 4), (2, 3, 1025, 4), (2, 3, 4, 1025), (66000, 2, 3, 4), (2, 66000, 3, 4), (2, 3, 66000, 4), (2, 3, 4, 66000)] for inp_shape in input_shapes: input_val = numpy.random.normal(0, 1, inp_shape).astype("float32") out = f(input_val) expected_out = numpy.log( numpy.exp(input_val) / numpy.exp(input_val).sum(1)[:, None, :, :]) utt.assert_allclose(out, expected_out)
def softmax(x, axis=1, force_builtin=False): """ Calculate softmax (pseudo probabilities). Parameters ---------- x: T.Tensor Input tensor. axis: int Axis on which to apply softmax. force_builtin: bool force usage of ``theano.tensor.nnet.softmax`` (more stable). Returns ------- T.Tensor ``x`` with softmax applied, same shape. """ if dnn_avail and config.use_manual_cudnn_conv: # order must always be bc01, and x must always be 4d if axis==1: dnn_sm = dnn.GpuDnnSoftmax('bc01', 'accurate', 'channel') if x.ndim==4: logger.debug("Using cuDNN softmax") y = dnn_sm(x) return y elif x.ndim==5: # remap to 4d, this is memory friendly logger.debug("Using cuDNN softmax") sh = x.shape y = dnn_sm(x.flatten(4)).reshape(sh) return y # if axis!=1 use own softmax (I don't want to do dimshuffles just to use # dnn, it is not that much faster anyway) if x.ndim==2 and axis==1: return T.nnet.softmax(x) elif force_builtin: raise NotImplementedError() else: e_x = T.exp(x - x.max(axis, keepdims=True)) y = e_x / e_x.sum(axis, keepdims=True) return y
def test_conv(): if False: sig_shape = (100000, 20) fil_shape = (20, 30) x_val = np.random.rand(*sig_shape).astype(np.float32) W_val = np.random.rand(*fil_shape).astype( np.float32) # (nof, z, ch, xf, yf) x = T.TensorType('float32', (False, ) * 2, name='x_cnn_input')() W = elektronn2.neuromancer.variables.VariableParam(W_val) y1 = T.dot(x, W) y2 = dot(x, W, 1) g1 = theano.grad(T.log(y1).sum(), [x]) g2 = theano.grad(T.log(y2).sum(), [x]) f1 = utils.make_func([x], y1, profile_execution=10) f2 = utils.make_func([x], y2, profile_execution=10) r1 = f1(x_val) r2 = f2(x_val) assert np.allclose(r1, r2) sig_shape = (1, 5, 300, 200) fil_shape = (7, 5, 1, 1) x_val = np.random.rand(*sig_shape).astype(np.float32) W_val = np.random.rand(*fil_shape).astype( np.float32) # (nof, z, ch, xf, yf) x = T.TensorType('float32', (False, ) * 4, name='x_cnn_input')() W = elektronn2.neuromancer.variables.VariableParam(W_val) y1 = conv(x, W, w_shape=fil_shape) y2 = conv2d(x, W) g1 = theano.grad(T.log(y1).sum(), [x]) g2 = theano.grad(T.log(y2).sum(), [x]) f1 = utils.make_func([x], y1, profile_execution=5) f2 = utils.make_func([x], y2, profile_execution=5) r1 = f1(x_val) r2 = f2(x_val) assert np.allclose(r1, r2) sig_shape = (1, 100, 5, 300, 200) # x_shape = (None, 100, 5, 300, 200) fil_shape = (7, 3, 5, 3, 3) x_val = np.random.rand(*sig_shape).astype(np.float32) W_val = np.random.rand(*fil_shape).astype( np.float32) # (nof, z, ch, xf, yf) x = T.TensorType('float32', (False, ) * 5, name='x_cnn_input')() W = elektronn2.neuromancer.variables.VariableParam(W_val) y1 = conv(x, W, x_shape=sig_shape, w_shape=fil_shape) y2 = conv3d(x, W) g1 = theano.grad(T.log(y1).sum(), [x]) g2 = theano.grad(T.log(y2).sum(), [x]) f1 = utils.make_func([x], y1, profile_execution=5) f2 = utils.make_func([x], y2, profile_execution=5) r1 = f1(x_val) r2 = f2(x_val) assert np.allclose(r1, r2) sig_shape = (1, 1, 100) fil_shape = (1, 1, 20) x_val = np.random.rand(*sig_shape).astype(np.float32) W_val = np.random.rand(*fil_shape).astype( np.float32) # (nof, z, ch, xf, yf) x = T.TensorType('float32', (False, ) * 3, name='x_cnn_input')() W = elektronn2.neuromancer.variables.VariableParam(W_val) y1 = conv(x, W, w_shape=fil_shape) f1 = elektronn2.neuromancer.graphutils.make_func([x], y1, profile_execution=10) r1 = f1(x_val) r2 = np.convolve(x_val[0, 0], W_val[0, 0], mode='valid')[None, None] assert np.allclose(r1, r2) if True: sig_shape = (1, 5, 100, 300, 200) fil_shape = (7, 5, 3, 3, 3) x_val = np.random.rand(*sig_shape).astype(np.float32) W_val = np.random.rand(*fil_shape).astype( np.float32) # (nof, ch, zf xf, yf) x = T.TensorType('float32', (False, ) * 5, name='x_cnn_input')() W = elektronn2.neuromancer.variables.VariableParam(W_val) # test conv y1 = dnn.dnn_conv3d(x, W, border_mode='valid') y2 = conv3d(x.dimshuffle(0, 2, 1, 3, 4), W.dimshuffle(0, 2, 1, 3, 4)).dimshuffle(0, 2, 1, 3, 4) f1 = elektronn2.neuromancer.graphutils.make_func([x], y1, profile_execution=5) f2 = elektronn2.neuromancer.graphutils.make_func([x], y2, profile_execution=5) r3 = np.array(f1(x_val)) r4 = f2(x_val) assert np.allclose( r3, r4 ) # cudnn and reshaped conv2d3d give same result, but cudnn ist faster! y1 = dnn.dnn_conv3d(x, W, border_mode='valid') y1 = dnn.dnn_pool(y1, (2, 2, 2), stride=(2, 2, 2), pad=(0, 0, 0), mode='max') f1 = elektronn2.neuromancer.graphutils.make_func([x], y1, profile_execution=5) r3 = np.array(f1(x_val)) y2 = conv3d(x.dimshuffle(0, 2, 1, 3, 4), W.dimshuffle(0, 2, 1, 3, 4)) y2 = pooling(y2, (2, 2, 2)) f2 = elektronn2.neuromancer.graphutils.make_func([x], y2, profile_execution=5) r4 = f2(x_val) assert np.allclose(r3, r4.transpose( 0, 2, 1, 3, 4)) # pooling als works, not it is not so much faster anymore.... y1 = dnn.dnn_conv3d(x, W, border_mode='valid') y1 = dnn.dnn_pool(y1, (2, 2, 2), stride=(2, 2, 2), pad=(0, 0, 0), mode='max') sm = dnn.GpuDnnSoftmax('bc01', 'fast', 'channel') sh = y1.shape y1 = sm(y1.flatten(4)).reshape(sh) f1 = elektronn2.neuromancer.graphutils.make_func([x], y1, profile_execution=5) r3 = np.array(f1(x_val)) y2 = conv3d(x.dimshuffle(0, 2, 1, 3, 4), W.dimshuffle(0, 2, 1, 3, 4)) y2 = pooling(y2, (2, 2, 2)) y2 = softmax(y2, axis=2) f2 = elektronn2.neuromancer.graphutils.make_func([x], y2, profile_execution=5) r4 = f2(x_val) assert np.allclose(r3, r4.transpose(0, 2, 1, 3, 4), atol=1e-5) # sm also works but diff is ~1e-5