Esempio n. 1
0
def test_dnn_conv_merge():
    if not cuda.dnn.dnn_available():
        raise SkipTest(cuda.dnn.dnn_available.msg)
    img = T.ftensor4()
    kern = T.ftensor4()
    out = T.ftensor4()

    b = 1
    c = 4
    f = 3
    ih = 5
    iw = 8
    kh = 2
    kw = 6
    img_val = numpy.random.random((b, c, ih, iw)).astype("float32")
    kern_val = numpy.random.random((f, c, kh, kw)).astype("float32")
    out_val = numpy.random.random((b, f, ih - kh + 1, iw - kw + 1)).astype("float32")

    conv = dnn.dnn_conv(img, kern)
    gw = theano.grad(conv.sum(), kern)
    gi = theano.grad(conv.sum(), img)

    lr = numpy.asarray(0.05, dtype="float32")

    if cuda.dnn.version() == -1:
        # Can't merge alpha with cudnn v1
        fr = conv + out
        wr = kern + gw
        ir = img + gi
    else:
        fr = lr * (conv + out)
        wr = kern + lr * gw
        ir = img + lr * gi

    f1 = theano.function([img, kern, out], [fr, wr, ir], mode=mode_with_gpu)
    assert isinstance(f1.maker.fgraph.outputs[0].owner.inputs[0].owner.op, dnn.GpuDnnConv)
    assert isinstance(f1.maker.fgraph.outputs[1].owner.inputs[0].owner.op, dnn.GpuDnnConvGradW)
    assert isinstance(f1.maker.fgraph.outputs[2].owner.inputs[0].owner.op, dnn.GpuDnnConvGradI)

    mode = mode_with_gpu
    mode = mode.excluding("local_dnn_conv_alpha_merge")
    mode = mode.excluding("local_dnn_convw_alpha_merge")
    mode = mode.excluding("local_dnn_convi_alpha_merge")
    mode = mode.excluding("local_dnn_conv_output_merge")
    mode = mode.excluding("local_dnn_convw_output_merge")
    mode = mode.excluding("local_dnn_convi_output_merge")

    f2 = theano.function([img, kern, out], [fr, wr, ir], mode=mode)

    assert not isinstance(f2.maker.fgraph.outputs[0].owner.inputs[0].owner.op, dnn.GpuDnnConv)
    assert not isinstance(f2.maker.fgraph.outputs[1].owner.inputs[0].owner.op, dnn.GpuDnnConvGradW)
    assert not isinstance(f2.maker.fgraph.outputs[2].owner.inputs[0].owner.op, dnn.GpuDnnConvGradI)

    out_f1 = f1(img_val, kern_val, out_val)
    out_f2 = f2(img_val, kern_val, out_val)

    assert len(out_f1) == len(out_f2)

    for v1, v2 in zip(out_f1, out_f2):
        utt.assert_allclose(v1, v2)
Esempio n. 2
0
    def test_conv_gradi(self):
        if not dnn.dnn_available():
            raise SkipTest(dnn.dnn_available.msg)
        img = T.ftensor4("img")
        kerns = T.ftensor4("kerns")
        out = T.ftensor4("out")
        img_val = numpy.asarray(numpy.random.rand(3, 4, 5, 6), dtype="float32")
        kern_vals = numpy.asarray(numpy.random.rand(3, 4, 5, 6), dtype="float32")

        for params in product(["valid"], [(1, 1)], ["conv", "cross"]):  # Should this work for 'full'?
            temp_kerns = kerns.dimshuffle(1, 0, 2, 3)
            shape = (
                img_val.shape[0],
                kern_vals.shape[1],
                img_val.shape[2] + kern_vals.shape[2] - 1,
                img_val.shape[3] + kern_vals.shape[3] - 1,
            )
            out_vals = numpy.zeros(shape, dtype="float32")
            desc = dnn.GpuDnnConvDesc(border_mode=params[0], subsample=params[1], conv_mode=params[2])(
                out.shape, temp_kerns.shape
            )
            conv_grad_i = dnn.GpuDnnConvGradI()(temp_kerns, img, out, desc)
            self._compile_and_check(
                [temp_kerns, img, out], [conv_grad_i], [kern_vals, img_val, out_vals], dnn.GpuDnnConvGradI
            )
Esempio n. 3
0
def test_dnn_conv_merge():
    # This test that we merge correctly multiple dnn_conv.
    if not dnn.dnn_available(test_ctx_name):
        raise SkipTest(dnn.dnn_available.msg)
    img_shp = [2, 5, 6, 8]
    kern_shp = [3, 5, 5, 6]
    img = T.ftensor4('img')
    kern = T.ftensor4('kern')
    out = T.ftensor4('out')
    desc = dnn.GpuDnnConvDesc(
        border_mode='valid')(kern.shape)

    # Test forward op
    o1 = dnn.dnn_conv(img, kern)
    o2 = dnn.dnn_conv(img, kern)
    f = theano.function([img, kern], [o1, o2], mode=mode_with_gpu)
    d1, d2 = f(numpy.random.rand(*img_shp).astype('float32'),
               numpy.random.rand(*kern_shp).astype('float32'))
    topo = f.maker.fgraph.toposort()
    assert len([n for n in topo if isinstance(n.op, dnn.GpuDnnConv)]) == 1

    # Test grad w op
    o1 = dnn.GpuDnnConvGradW()(img, kern, out, desc)
    o2 = dnn.GpuDnnConvGradW()(img, kern, out, desc)
    f = theano.function([img, kern, out], [o1, o2], mode=mode_with_gpu)
    topo = f.maker.fgraph.toposort()
    assert len([n for n in topo if isinstance(n.op, dnn.GpuDnnConvGradW)]) == 1

    # Test grad i op
    o1 = dnn.GpuDnnConvGradI()(img, kern, out, desc)
    o2 = dnn.GpuDnnConvGradI()(img, kern, out, desc)
    f = theano.function([img, kern, out], [o1, o2], mode=mode_with_gpu)
    topo = f.maker.fgraph.toposort()
    assert len([n for n in topo if isinstance(n.op, dnn.GpuDnnConvGradI)]) == 1
Esempio n. 4
0
    def test_conv_gradw(self):
        if not dnn.dnn_available():
            raise SkipTest(dnn.dnn_available.msg)
        img = T.ftensor4("img")
        kerns = T.ftensor4("kerns")
        out = T.ftensor4("out")
        img_val = numpy.asarray(numpy.random.rand(2, 5, 6, 8), dtype="float32")
        kern_vals = numpy.asarray(numpy.random.rand(2, 1, 5, 6), dtype="float32")
        out_vals = numpy.zeros((3, 3, 1, 1), dtype="float32")

        for params in product(["valid", "full"], [(1, 1)], ["conv", "cross"]):  # strides besides (1, 1)
            temp_img = img.dimshuffle(1, 0, 2, 3)
            temp_kerns = kerns
            if params[2] == "conv":
                temp_kerns = temp_kerns[:, :, ::-1, ::-1]
            temp_kerns = temp_kerns.dimshuffle(1, 0, 2, 3)
            shape = (
                kern_vals.shape[1],
                img_val.shape[1],
                img_val.shape[2] - kern_vals.shape[2] + 1,
                img_val.shape[3] - kern_vals.shape[3] + 1,
            )
            out_vals = numpy.zeros(shape, dtype="float32")
            desc = dnn.GpuDnnConvDesc(border_mode=params[0], subsample=params[1], conv_mode=params[2])(
                temp_img.shape, out.shape
            )
            conv_grad_w = dnn.GpuDnnConvGradW()(temp_img, temp_kerns, out, desc)
            self._compile_and_check(
                [temp_img, temp_kerns, out], [conv_grad_w], [img_val, kern_vals, out_vals], dnn.GpuDnnConvGradW
            )
Esempio n. 5
0
    def test_conv(self):
        img = T.ftensor4('img')
        kerns = T.ftensor4('kerns')
        img_val = numpy.asarray(
            numpy.random.rand(3, 4, 5, 6),
            dtype='float32'
        )
        kern_vals = numpy.asarray(
            numpy.random.rand(3, 4, 5, 6),
            dtype='float32'
        )

        for params in product(
            ['valid', 'full'],
            [(1, 1), (2, 2)],
            ['conv', 'cross']
        ):
            desc = dnn.GpuDnnConvDesc(
                border_mode=params[0],
                subsample=params[1],
                conv_mode=params[2]
            )(img.shape, kerns.shape)
            conv = dnn.GpuDnnConv()(img_val, kern_vals, desc)
            self._compile_and_check(
                [img, kerns],
                [conv],
                [img_val, kern_vals],
                dnn.GpuDnnConv
            )
Esempio n. 6
0
def test_upsample_pool():
    pool_size = (1,2)
    pool_stride = (1,2)
    out = T.ftensor4()
    inputs = T.ftensor4()
    
    actual_in = upsample_pool(out, inputs, pool_size, pool_stride)
    upsample_pool_fn = theano.function([out, inputs], actual_in)
    # needs pool size, stride= 1,2
    output = np.float32([[[[5,4,1]]]])
    inputs = np.float32([[[[3,0,8,4,5,6]]]])
    upsampled = upsample_pool_fn(output, inputs)
    assert np.allclose([[[[ 5.,  0.,  4.,  0.,  0.,  1.]]]],
                       upsampled)
    
    # Test for pooling across several channels
    pool_size = (1,2)
    pool_stride = (1,2)
    out = T.ftensor4()
    inputs = T.ftensor4()
    
    actual_in = upsample_pool(out, inputs, pool_size, pool_stride)
    upsample_pool_fn = theano.function([out, inputs], actual_in)
    # needs pool size, stride= 1,2
    output = np.float32([[[[-3,5,2]],[[5,4,1]]]])
    inputs = np.float32([[[[2,1,3,4,1,-7]], [[3,0,8,4,5,6]]]])
    upsampled = upsample_pool_fn(output, inputs)
    assert np.allclose([[[[ -3.,  0.,  0.,  5.,  2.,  0.]],
                         [[ 5.,  0.,  4.,  0.,  0.,  1.]]]],
                       upsampled)
Esempio n. 7
0
def create_back_conv_z_b_fn(min_in, max_in):
    inputs = T.ftensor4()
    weights = T.ftensor4()
    out_relevances = T.ftensor4()
    in_relevances = relevance_conv_z_b(out_relevances, inputs, weights, min_in, max_in)
    back_relevance_conv_fn = theano.function([out_relevances, inputs, weights], in_relevances)
    return back_relevance_conv_fn
Esempio n. 8
0
def create_back_conv_z_plus_fn():
    inputs = T.ftensor4()
    weights = T.ftensor4()
    out_relevances = T.ftensor4()
    in_relevances = relevance_conv_z_plus(out_relevances, inputs, weights)
    back_relevance_conv_fn = theano.function([out_relevances, inputs, weights], in_relevances)
    return back_relevance_conv_fn
    def test_logical_shapes(self):
        seed_rng()
        for stride in range(1, 4):
            kshp = (10, 2, 10, 10)
            featshp = (3, 10, 11, 11)

            a = tensor.ftensor4()
            A = tensor.ftensor4()

            # Need to transpose first two dimensions of kernel, and reverse
            # index kernel image dims (for correlation)
            kernel_rotated = tensor.transpose(A, axes=[1, 0, 2, 3])

            featshp_logical = (featshp[0], featshp[1], featshp[2] * stride,
                               featshp[3] * stride)
            kshp_rotated = (kshp[1], kshp[0], kshp[2], kshp[3])
            #print featshp, kshp_rotated, featshp_logical[1:], kshp[2:]
            image_estimate = tensor.nnet.conv2d(a, kernel_rotated,
                                                border_mode='full',
                                                image_shape=featshp,
                                                filter_shape=kshp_rotated,
                                                imshp_logical=featshp_logical[1:],
                                                kshp_logical=kshp[2:])

            func = theano.function([a, A], image_estimate, mode=theano_mode)
            #theano.printing.debugprint(func,)
            assert any([isinstance(node.op, theano.sandbox.cuda.blas.GpuConv)
                        for node in func.maker.fgraph.toposort()])

            a_in = numpy.random.randn(*featshp).astype("float32")
            A_in = numpy.random.randn(*kshp).astype("float32")

            func(a_in, A_in)
Esempio n. 10
0
    def test_grad_types(self):
        # This function simply tests the behaviour of the AbstractConv
        # Ops, not their optimizations
        cpu_input = tensor.ftensor4()
        cpu_filters = tensor.ftensor4()
        cpu_topgrad = tensor.ftensor4()
        gpu_input = gpu_ftensor4()
        gpu_filters = gpu_ftensor4()
        gpu_topgrad = gpu_ftensor4()

        out_shape = tensor.lvector()

        # Check the gradient of the forward conv2d
        for input, filters in itertools.product((cpu_input, gpu_input), (cpu_filters, gpu_filters)):
            output = conv.conv2d(input, filters)
            grad_input, grad_filters = theano.grad(output.sum(), wrt=(input, filters))
            assert grad_input.type == input.type, (grad_input, grad_input.type, input, input.type)
            assert grad_filters.type == filters.type, (grad_filters, grad_filters.type, filters, filters.type)

        # Check the gradient of gradweight
        for input, topgrad in itertools.product((cpu_input, gpu_input), (cpu_topgrad, gpu_topgrad)):
            grad_filters = conv.AbstractConv2d_gradWeights()(input, topgrad, out_shape)
            grad_input, grad_topgrad = theano.grad(grad_filters.sum(), wrt=(input, topgrad))

            assert grad_input.type == input.type, (grad_input, grad_input.type, input, input.type)
            assert grad_topgrad.type == topgrad.type, (grad_topgrad, grad_topgrad.type, topgrad, topgrad.type)

        # Check the gradient of gradinputs
        for filters, topgrad in itertools.product((cpu_filters, gpu_filters), (cpu_topgrad, gpu_topgrad)):
            grad_input = conv.AbstractConv2d_gradInputs()(filters, topgrad, out_shape)
            grad_filters, grad_topgrad = theano.grad(grad_input.sum(), wrt=(filters, topgrad))

            assert grad_filters.type == filters.type, (grad_filters, grad_filters.type, filters, filters.type)
            assert grad_topgrad.type == topgrad.type, (grad_topgrad, grad_topgrad.type, topgrad, topgrad.type)
Esempio n. 11
0
    def test_infer_shape(self):
        shape = (100, 40, 6, 3)
        images = numpy.ones(shape).astype('float32')
        x = T.ftensor4()
        f = self._compile_and_check([x],
                                    [images2neibs(
                                        x, neib_shape=(2, 1),
                                        mode='valid')],
                                    [images],
                                    Images2Neibs
                                    )
        f = self._compile_and_check([x],
                                    [images2neibs(
                                        x, neib_shape=(2, 3),
                                        mode='valid')],
                                    [images],
                                    Images2Neibs
                                    )
        shape = (100, 40, 5, 4)
        images = numpy.ones(shape).astype('float32')
        x = T.ftensor4()
        f = self._compile_and_check([x],
                                    [images2neibs(
                                        x, neib_shape=(2, 1),
                                        mode='ignore_borders')],
                                    [images],
                                    Images2Neibs
                                    )
        shape = (100, 40, 5, 3)
        images = numpy.ones(shape).astype('float32')
        x = T.ftensor4()
        f = self._compile_and_check([x],
                                    [images2neibs(
                                        x, neib_shape=(2, 3),
                                        mode='ignore_borders')],
                                    [images],
                                    Images2Neibs
                                    )

        shape = (100, 40, 6, 7)
        images = numpy.ones(shape).astype('float32')
        x = T.ftensor4()
        f = self._compile_and_check([x],
                                    [images2neibs(
                                        x, neib_shape=(2, 2),
                                        mode='ignore_borders')],
                                    [images],
                                    Images2Neibs
                                    )
        shape = (100, 40, 5, 10)
        images = numpy.ones(shape).astype('float32')
        x = T.ftensor4()
        f = self._compile_and_check([x],
                                    [images2neibs(
                                        x, neib_shape=(3, 3),
                                        mode='wrap_centered')],
                                    [images],
                                    Images2Neibs
                                    )
Esempio n. 12
0
 def test_conv_gradw(self, border_mode, conv_mode):
     self._test_conv_gradw(T.ftensor4('img'),
                           T.ftensor4('kerns'),
                           T.ftensor4('out'),
                           numpy.random.rand(2, 5, 6, 8),
                           numpy.random.rand(2, 1, 5, 6),
                           border_mode,
                           conv_mode,
                           (1, 1))
Esempio n. 13
0
def test_dnn_conv_border_mode():
    if not cuda.dnn.dnn_available():
        raise SkipTest(cuda.dnn.dnn_available.msg)
    img = T.ftensor4()
    kern = T.ftensor4()

    dnn.dnn_conv(img, kern, border_mode=1)
    dnn.dnn_conv(img, kern, border_mode=(2, 3))
    dnn.dnn_conv(img, kern, border_mode='full')
    dnn.dnn_conv(img, kern, border_mode='valid')
Esempio n. 14
0
def test_dnn_conv_border_mode():
    if not dnn.dnn_available(test_ctx_name):
        raise SkipTest(dnn.dnn_available.msg)
    img = T.ftensor4()
    kern = T.ftensor4()

    dnn.dnn_conv(img, kern, border_mode=1)
    dnn.dnn_conv(img, kern, border_mode=(2, 3))
    dnn.dnn_conv(img, kern, border_mode="full")
    dnn.dnn_conv(img, kern, border_mode="valid")
Esempio n. 15
0
def test_local_lift_abstractconv_gpu_shape():
    prev = theano.config.on_opt_error
    try:
        theano.config.on_opt_error = "raise"
        s = tensor.ivector()
        a = tensor.ftensor4()
        b = tensor.ftensor4()
        c = tensor.nnet.abstract_conv.AbstractConv2d_gradWeights()(a, b, s)
        theano.function([s, a, b], c, mode=mode_with_gpu)
    finally:
        theano.config.on_opt_error = prev
Esempio n. 16
0
    def test_conv_no_bias(self):
        images = T.ftensor4('inputs')
        weights = T.ftensor4('weights')
        convOut = conv2d(images, weights, input_shape=(12, 3, 256, 256), filter_shape=(12, 3, 3, 3), filter_flip=False)

        theano.printing.pydotprint(convOut, outfile="Conv_before_opt.png", var_with_name_simple=True)
        fopt = theano.function(inputs=[images, weights], outputs=convOut, mode=mode_with_mkl)
        theano.printing.pydotprint(fopt, outfile="Conv_OPT_after_opt.png", var_with_name_simple=True)

        fori = theano.function(inputs=[images, weights], outputs=convOut, mode=mode_without_mkl)
        theano.printing.pydotprint(fori, outfile="Conv_Original_after_opt.png", var_with_name_simple=True)
Esempio n. 17
0
    def setup(self, bottom, top):
        if len(bottom) != 2:
            raise Exception("The layer needs two inputs!")

        probs = T.ftensor4()
        labels = T.ftensor4()

        count = T.sum(labels, axis=(1, 2, 3), keepdims=True)
        loss_balanced = -T.mean(T.sum(labels * T.log(probs), axis=(1, 2, 3), keepdims=True) / count)

        self.forward_theano = theano.function([probs, labels], loss_balanced)
        self.backward_theano = theano.function([probs, labels], T.grad(loss_balanced, probs))
Esempio n. 18
0
def test_tensor4_roc_auc_scores():
    true = np.random.binomial(n=1, p=.5, size=(20, 30, 40, 50)).astype('float32')
    predicted = np.random.random((20, 30, 40, 50)).astype('float32')
    yt, yp = T.ftensor4('yt'), T.ftensor4('yp')
    refscore = tmetrics.classification.last_axis_roc_auc_scores(true, predicted)
    roc_auc_scores = tmetrics.classification.roc_auc_scores(yt, yp)
    f = theano.function([yt, yp], roc_auc_scores)
    score = f(true, predicted)
    print 'refscore'
    print refscore
    print 'score'
    print score
    assert np.allclose(refscore, score, equal_nan=True)
Esempio n. 19
0
    def test_conv(self, algo, border_mode, conv_mode):
        if algo == 'winograd' and dnn.version(raises=False) < 5000:
            raise SkipTest(dnn.dnn_available.msg)

        self._test_conv(T.ftensor4('img'),
                        T.ftensor4('kerns'),
                        T.ftensor4('out'),
                        numpy.random.rand(7, 2, 8, 4),
                        numpy.random.rand(8, 2, 4, 3),
                        border_mode,
                        conv_mode,
                        [(1, 1), (2, 2)],
                        algo)
Esempio n. 20
0
def test_local_abstractconv_gemm():
    """ We test it here as this is the optimization only that we test.
    This test gh-4036"""
    image = tensor.ftensor4()
    W = tensor.ftensor4()
    conv = tensor.nnet.conv2d(image,
                         W,
                         input_shape=(1, 32, 32, 32),
                         filter_shape=(32, 32, 3, 3),
                         border_mode='half')
    f = theano.function([image, W], [conv], mode=mode_with_gpu)
    f(numpy.random.rand(1, 32, 32, 32).astype('float32'),
      numpy.random.rand(32, 32, 3, 3).astype('float32'))
Esempio n. 21
0
    def setup(self, bottom, top):
        if len(bottom) != 2:
            raise Exception("The layer needs two inputs!")

        probs = T.ftensor4()
        probs_smooth_log = T.ftensor4()

        probs_smooth = T.exp(probs_smooth_log)

        loss = T.mean(T.sum(probs_smooth * T.log(probs_smooth / probs), axis=1))

        self.forward_theano = theano.function([probs, probs_smooth_log], loss)
        self.backward_theano = theano.function([probs, probs_smooth_log], T.grad(loss, [probs, probs_smooth_log]))
Esempio n. 22
0
def test_conv_a_b():
    inputs = T.ftensor4()
    weights = T.ftensor4()
    relevances = T.ftensor4()
    bias = T.fvector()
    in_rel = relevance_conv_a_b(inputs, weights, 
        relevances, a=2, b=1, bias=bias)
    in_rel_fn = theano.function([inputs, weights, relevances, bias], 
        in_rel)
    in_relevance = in_rel_fn(np.array([[[[-1,-2,3]]]], dtype=np.float32), 
               np.array([[[[1,-1]]]], dtype=np.float32)[:,:,::-1,::-1],
               np.array([[[[4,2]]]], dtype=np.float32),
               np.array([0], dtype=np.float32))
    assert np.allclose([[[[-4, 2*4 - 4/5.0,-6/5.0]]]], in_relevance)
Esempio n. 23
0
    def test_conv_with_bias(self):
        images = T.ftensor4('inputs')
        weights = T.ftensor4('weights')
        bias = T.vector('bias')

        convOut = conv2d(images, weights, input_shape=(12, 3, 256, 256), filter_shape=(12, 3, 3, 3), filter_flip=False)
        convOutBias = convOut + bias.dimshuffle('x', 0, 'x', 'x')

        theano.printing.pydotprint(convOutBias, outfile="ConvBias_before_opt.png", var_with_name_simple=True)
        fopt = theano.function(inputs=[images, weights, bias], outputs=convOutBias, mode=mode_with_mkl)
        theano.printing.pydotprint(fopt, outfile="ConvBias_OPT_after_opt.png", var_with_name_simple=True)

        fori = theano.function(inputs=[images, weights, bias], outputs=convOutBias, mode=mode_without_mkl)
        theano.printing.pydotprint(fori, outfile="ConvBias_Original_after_opt.png", var_with_name_simple=True)
Esempio n. 24
0
def test_dnn_conv_inplace():
    """This test that we have inplace work correctly even when
    GpuAllocEmpty get merged together.

    """
    if not dnn.dnn_available(test_ctx_name):
        raise SkipTest(dnn.dnn_available.msg)
    img_shp = [2, 5, 6, 8]
    kern_shp = [3, 5, 5, 6]
    img = T.ftensor4('img')
    kern = T.ftensor4('kern')
    out = T.ftensor4('out')
    desc1 = dnn.GpuDnnConvDesc(border_mode='valid', conv_mode='conv')(
        kern.shape)
    desc2 = dnn.GpuDnnConvDesc(
        border_mode='valid', conv_mode='cross')(kern.shape)

    # Test forward op
    o1 = dnn.dnn_conv(img, kern, conv_mode='conv')
    o2 = dnn.dnn_conv(img, kern, conv_mode='cross')
    f = theano.function([img, kern], [o1, o2], mode=mode_with_gpu)
    d1, d2 = f(numpy.random.rand(*img_shp).astype('float32'),
               numpy.random.rand(*kern_shp).astype('float32'))
    topo = f.maker.fgraph.toposort()
    convs = [n for n in topo if isinstance(n.op, dnn.GpuDnnConv)]
    assert len(convs) == 2
    assert all([node.op.inplace for node in convs])
    assert len([n for n in topo if isinstance(n.op, GpuAllocEmpty)]) == 2

    # Test grad w op
    out = GpuAllocEmpty(kern.dtype, test_ctx_name)(*kern.shape)
    o1 = dnn.GpuDnnConvGradW()(img, kern, out, desc1)
    o2 = dnn.GpuDnnConvGradW()(img, kern, out, desc2)
    f = theano.function([img, kern], [o1, o2], mode=mode_with_gpu)
    topo = f.maker.fgraph.toposort()
    convs = [n for n in topo if isinstance(n.op, dnn.GpuDnnConvGradW)]
    assert len(convs) == 2
    assert all([node.op.inplace for node in convs])
    assert len([n for n in topo if isinstance(n.op, GpuAllocEmpty)]) == 2

    # Test grad i op
    out = GpuAllocEmpty(img.dtype, test_ctx_name)(*img.shape)
    o1 = dnn.GpuDnnConvGradI()(img, kern, out, desc1)
    o2 = dnn.GpuDnnConvGradI()(img, kern, out, desc2)
    f = theano.function([img, kern], [o1, o2], mode=mode_with_gpu)
    topo = f.maker.fgraph.toposort()
    convs = [n for n in topo if isinstance(n.op, dnn.GpuDnnConvGradI)]
    assert len(convs) == 2
    assert all([node.op.inplace for node in convs])
    assert len([n for n in topo if isinstance(n.op, GpuAllocEmpty)]) == 2
Esempio n. 25
0
    def test_conv_gradw(self):
        if not dnn.dnn_available():
            raise SkipTest(dnn.dnn_available.msg)
        img = T.ftensor4('img')
        kerns = T.ftensor4('kerns')
        out = T.ftensor4('out')
        img_val = numpy.asarray(
            numpy.random.rand(2, 5, 6, 8),
            dtype='float32'
        )
        kern_vals = numpy.asarray(
            numpy.random.rand(2, 1, 5, 6),
            dtype='float32'
        )
        out_vals = numpy.zeros((3, 3, 1, 1), dtype='float32')

        for params in product(
            ['valid', 'full'],
            [(1, 1)],  # strides besides (1, 1)
            ['conv', 'cross']
        ):
            temp_img = img.dimshuffle(1, 0, 2, 3)
            temp_kerns = kerns
            if params[2] == 'conv':
                temp_kerns = temp_kerns[:, :, ::-1, ::-1]
            temp_kerns = temp_kerns.dimshuffle(1, 0, 2, 3)
            shape = (
                kern_vals.shape[1], img_val.shape[1],
                img_val.shape[2] - kern_vals.shape[2] + 1,
                img_val.shape[3] - kern_vals.shape[3] + 1
            )
            out_vals = numpy.zeros(shape, dtype='float32')
            desc = dnn.GpuDnnConvDesc(
                border_mode=params[0],
                subsample=params[1],
                conv_mode=params[2]
            )(temp_img.shape, out.shape)
            conv_grad_w = dnn.GpuDnnConvGradW()(
                temp_img,
                temp_kerns,
                out,
                desc,
            )
            self._compile_and_check(
                [temp_img, temp_kerns, out],
                [conv_grad_w],
                [img_val, kern_vals, out_vals],
                dnn.GpuDnnConvGradW
            )
Esempio n. 26
0
 def make_node(self, x, x2, x3, x4, x5):
     # check that the theano version has support for __props__.
     # This next line looks like it has a typo,
     # but it's actually a way to detect the theano version
     # is sufficiently recent to support the use of __props__.
     assert hasattr(self, '_props'), "Your version of theano is too old to support __props__."
     x = tensor.as_tensor_variable(x)
     x2 = tensor.as_tensor_variable(x2)
     x3 = tensor.as_tensor_variable(x3)
     x4 = tensor.as_tensor_variable(x4)
     x5 = tensor.as_tensor_variable(x5)
     
     if prm.att_doc:
         if prm.compute_emb:
             td = tensor.itensor4().type()
         else:
             td = tensor.ftensor4().type()
         tm = tensor.ftensor3().type()
     else:
         if prm.compute_emb:
             td = tensor.itensor3().type()
         else:
             td = tensor.ftensor3().type()
         tm = tensor.fmatrix().type()
     return theano.Apply(self, [x,x2,x3,x4,x5], [td, tm, \
                                        tensor.fmatrix().type(), tensor.ivector().type()])
Esempio n. 27
0
    def test_pool(self):
        if not dnn.dnn_available(test_ctx_name):
            raise SkipTest(dnn.dnn_available.msg)
        img = T.ftensor4('img')
        img_val = numpy.asarray(
            numpy.random.rand(2, 3, 4, 5),
            dtype='float32'
        )

        # 'average_exc_pad' is disabled for versions < 4004
        if dnn.version(raises=False) < 4004:
            modes = ['max', 'average_inc_pad']
        else:
            modes = ['max', 'average_inc_pad', 'average_exc_pad']

        for params in product(
            [(1, 1), (2, 2), (3, 3)],
            [(1, 1), (2, 2), (3, 3)],
            modes
        ):
            self._compile_and_check(
                [img],
                [dnn.GpuDnnPool(mode=params[2])(img, params[0], params[1], (0, 0))],
                [img_val],
                dnn.GpuDnnPool
            )
Esempio n. 28
0
    def test_softmax(self):
        if not dnn.dnn_available(test_ctx_name):
            raise SkipTest(dnn.dnn_available.msg)
        t = T.ftensor4('t')
        rand_tensor = numpy.asarray(
            numpy.random.rand(5, 4, 3, 2),
            dtype='float32'
        )
        self._compile_and_check(
            [t],
            [dnn.GpuDnnSoftmax('accurate', 'channel')(t)],
            [rand_tensor],
            dnn.GpuDnnSoftmax
        )

        self._compile_and_check(
            [t],
            [
                T.grad(
                    dnn.GpuDnnSoftmax(
                        'accurate',
                        'channel'
                    )(t).mean(),
                    t
                )
            ],
            [rand_tensor],
            dnn.GpuDnnSoftmaxGrad
        )
Esempio n. 29
0
def test_dnn_tag():
    """
    Test that if cudnn isn't avail we crash and that if it is avail, we use it.
    """
    x = T.ftensor4()
    old = theano.config.on_opt_error
    theano.config.on_opt_error = "raise"

    sio = StringIO()
    handler = logging.StreamHandler(sio)
    logging.getLogger('theano.compile.tests.test_dnn').addHandler(handler)
    # Silence original handler when intentionnally generating warning messages
    logging.getLogger('theano').removeHandler(theano.logging_default_handler)
    raised = False
    try:
        f = theano.function(
            [x],
            pool_2d(x, ds=(2, 2), ignore_border=True),
            mode=mode_with_gpu.including("cudnn"))
    except (AssertionError, RuntimeError):
        assert not dnn.dnn_available(test_ctx_name)
        raised = True
    finally:
        theano.config.on_opt_error = old
        logging.getLogger(
            'theano.compile.tests.test_dnn').removeHandler(handler)
        logging.getLogger('theano').addHandler(theano.logging_default_handler)

    if not raised:
        assert dnn.dnn_available(test_ctx_name)
        assert any([isinstance(n.op, dnn.GpuDnnPool)
                    for n in f.maker.fgraph.toposort()])
Esempio n. 30
0
    def __init__(self, config=None, defaults=defaults, inputs_hook=None, hiddens_hook=None, params_hook=None,
                 use_data_layer=None, rand_crop=None, batch_size=None):
        # combine everything by passing to Model's init
        super(AlexNet, self).__init__(**{arg: val for (arg, val) in locals().iteritems() if arg is not 'self'})
        # configs can now be accessed through self dictionary

        if self.inputs_hook or self.hiddens_hook or self.params_hook:
            log.error("Inputs_hook, hiddens_hook, and params_hook not implemented yet for AlexNet!")

        self.flag_datalayer = self.use_data_layer

        ####################
        # Theano variables #
        ####################
        # allocate symbolic variables for the data
        # 'rand' is a random array used for random cropping/mirroring of data
        self.x = T.ftensor4('x')
        self.y = T.lvector('y')
        self.rand = T.fvector('rand')

        ##########
        # params #
        ##########
        self.params = []

        # make the network!
        self.build_computation_graph()
    def setup(self):
        """
        Set up the model to train.
        """

        # input_words: shape (n_batch, n_sentence, sentence_len)
        input_words = T.itensor3()
        n_batch, n_sentences, sentence_len = input_words.shape
        # query_words: shape (n_batch, query_len)
        query_words = T.imatrix()
        # correct_output: shape (n_batch, ?, num_output_words)
        correct_output = T.ftensor3()

        # graph_num_new_nodes: shape(n_batch, n_sentence)
        graph_num_new_nodes = T.imatrix()
        # graph_new_node_strengths: shape(n_batch, n_sentence, new_nodes_per_iter)
        graph_new_node_strengths = T.ftensor3()
        # graph_new_node_ids: shape(n_batch, n_sentence, new_nodes_per_iter, num_node_ids)
        graph_new_node_ids = T.ftensor4()
        # graph_new_edges: shape(n_batch, n_sentence, pad_graph_size, pad_graph_size, num_edge_types)
        graph_new_edges = T.TensorType('floatX', (False, ) * 5)()

        def _build(with_correct_graph, snap_to_best, using_dropout,
                   evaluate_accuracy):
            info = {}
            # Process each sentence, flattened to (?, sentence_len)
            flat_input_words = input_words.reshape([-1, sentence_len])
            flat_input_reprs, flat_ref_matrices = self.input_transformer.process(
                flat_input_words)
            # flat_input_reprs of shape (?, input_repr_size)
            # flat_ref_matrices of shape (?, num_node_ids, input_repr_size)
            input_reprs = flat_input_reprs.reshape(
                [n_batch, n_sentences, self.input_repr_size])
            ref_matrices = flat_ref_matrices.reshape([
                n_batch, n_sentences, self.num_node_ids, self.input_repr_size
            ])

            query_repr, query_ref_matrix = self.input_transformer.process(
                query_words)

            if using_dropout:
                iter_dropouts = []
                states_mask = util.make_dropout_mask(
                    (self.node_state_size, ), self.dropout_keep, self.srng)
                if self.nodes_mutable:
                    iter_dropouts.extend(
                        self.node_state_updater.dropout_masks(
                            self.srng, states_mask))
                if len(self.word_node_mapping) > 0:
                    iter_dropouts.extend(
                        self.direct_reference_updater.dropout_masks(
                            self.srng, states_mask))
                if self.intermediate_propagate != 0:
                    iter_dropouts.extend(
                        self.intermediate_propagator.dropout_masks(
                            self.srng, states_mask))
                if self.dynamic_nodes:
                    iter_dropouts.extend(
                        self.new_node_adder.dropout_masks(self.srng))
                iter_dropouts.extend(
                    self.edge_state_updater.dropout_masks(self.srng))
            else:
                iter_dropouts = []
                states_mask = None

            def _iter_fn(input_repr,
                         ref_matrix,
                         gstate,
                         correct_num_new_nodes=None,
                         correct_new_strengths=None,
                         correct_new_node_ids=None,
                         correct_edges=None,
                         dropout_masks=None):
                # If necessary, update node state
                if self.nodes_mutable:
                    gstate, dropout_masks = self.node_state_updater.process(
                        gstate, input_repr, dropout_masks)

                if len(self.word_node_mapping) > 0:
                    gstate, dropout_masks = self.direct_reference_updater.process(
                        gstate, ref_matrix, dropout_masks)

                # If necessary, propagate node state
                if self.intermediate_propagate != 0:
                    gstate, dropout_masks = self.intermediate_propagator.process_multiple(
                        gstate, self.intermediate_propagate, dropout_masks)

                node_loss = None
                node_accuracy = None
                # Propose and vote on new nodes
                if self.dynamic_nodes:
                    new_strengths, new_ids, dropout_masks = self.new_node_adder.get_candidates(
                        gstate, input_repr, self.new_nodes_per_iter,
                        dropout_masks)
                    # new_strengths and correct_new_strengths are of shape (n_batch, new_nodes_per_iter)
                    # new_ids and correct_new_node_ids are of shape (n_batch, new_nodes_per_iter, num_node_ids)
                    if with_correct_graph:
                        perm_idxs = np.array(
                            list(
                                itertools.permutations(
                                    range(self.new_nodes_per_iter))))
                        permuted_correct_str = correct_new_strengths[:,
                                                                     perm_idxs]
                        permuted_correct_ids = correct_new_node_ids[:,
                                                                    perm_idxs]
                        # due to advanced indexing, we should have shape (n_batch, permutation, new_nodes_per_iter, num_node_ids)
                        ext_new_str = T.shape_padaxis(new_strengths, 1)
                        ext_new_ids = T.shape_padaxis(new_ids, 1)
                        strength_ll = permuted_correct_str * T.log(
                            ext_new_str +
                            util.EPSILON) + (1 - permuted_correct_str) * T.log(
                                1 - ext_new_str + util.EPSILON)
                        ids_ll = permuted_correct_ids * T.log(ext_new_ids +
                                                              util.EPSILON)
                        reduced_perm_lls = T.sum(strength_ll, axis=2) + T.sum(
                            ids_ll, axis=[2, 3])
                        if self.best_node_match_only:
                            node_loss = -T.max(reduced_perm_lls, 1)
                        else:
                            full_ll = util.reduce_log_sum(reduced_perm_lls, 1)
                            # Note that some of these permutations are identical, since we likely did not add the maximum
                            # amount of nodes. Thus we will have added repeated elements here.
                            # We have log(x+x+...+x) = log(kx), where k is the repetition factor and x is the probability we want
                            # log(kx) = log(k) + log(x)
                            # Our repetition factor k is given by (new_nodes_per_iter - correct_num_new_nodes)!
                            # Recall that n! = gamma(n+1)
                            # so log(x) = log(kx) - log(gamma(k+1))
                            log_rep_factor = T.gammaln(
                                T.cast(
                                    self.new_nodes_per_iter -
                                    correct_num_new_nodes + 1, 'floatX'))
                            scaled_ll = full_ll - log_rep_factor
                            node_loss = -scaled_ll
                        if evaluate_accuracy:
                            best_match_idx = T.argmax(reduced_perm_lls, 1)
                            # should be of shape (n_batch), indexing the best permutation
                            best_correct_str = permuted_correct_str[
                                T.arange(n_batch), best_match_idx]
                            best_correct_ids = permuted_correct_ids[
                                T.arange(n_batch), best_match_idx]
                            snapped_strengths = util.independent_best(
                                new_strengths)
                            snapped_ids = util.categorical_best(
                                new_ids) * T.shape_padright(snapped_strengths)
                            close_strengths = T.all(
                                T.isclose(best_correct_str, snapped_strengths),
                                (1))
                            close_ids = T.all(
                                T.isclose(best_correct_ids, snapped_ids),
                                (1, 2))
                            node_accuracy = T.and_(close_strengths, close_ids)
                        # now substitute in the correct nodes
                        gstate = gstate.with_additional_nodes(
                            correct_new_strengths, correct_new_node_ids)
                    elif snap_to_best:
                        snapped_strengths = util.independent_best(
                            new_strengths)
                        snapped_ids = util.categorical_best(new_ids)
                        gstate = gstate.with_additional_nodes(
                            snapped_strengths, snapped_ids)
                    else:
                        gstate = gstate.with_additional_nodes(
                            new_strengths, new_ids)

                # Update edge state
                gstate, dropout_masks = self.edge_state_updater.process(
                    gstate, input_repr, dropout_masks)
                if with_correct_graph:
                    cropped_correct_edges = correct_edges[:, :gstate.n_nodes, :
                                                          gstate.n_nodes, :]
                    edge_lls = cropped_correct_edges * T.log(
                        gstate.edge_strengths +
                        util.EPSILON) + (1 - cropped_correct_edges) * T.log(
                            1 - gstate.edge_strengths + util.EPSILON)
                    # edge_lls currently penalizes for edges connected to nodes that do not exist
                    # we do not want it to do this, so we mask it with node strengths
                    mask_src = util.shape_padaxes(gstate.node_strengths,
                                                  [2, 3])
                    mask_dest = util.shape_padaxes(gstate.node_strengths,
                                                   [1, 3])
                    masked_edge_lls = edge_lls * mask_src * mask_dest
                    edge_loss = -T.sum(masked_edge_lls, axis=[1, 2, 3])
                    if evaluate_accuracy:
                        snapped_edges = util.independent_best(
                            gstate.edge_strengths)
                        close_edges = T.isclose(cropped_correct_edges,
                                                snapped_edges)
                        ok_mask = 1 - T.cast(
                            mask_src * mask_dest, 'int8'
                        )  # its OK for things not to match if node strengths are NOT both 1
                        edge_accuracy = T.all(T.or_(close_edges, ok_mask),
                                              (1, 2, 3))
                        overall_accuracy = edge_accuracy if node_accuracy is None else T.and_(
                            node_accuracy, edge_accuracy)
                    else:
                        overall_accuracy = None
                    gstate = gstate.with_updates(
                        edge_strengths=cropped_correct_edges)
                    return gstate, node_loss, edge_loss, overall_accuracy
                elif snap_to_best:
                    snapped_edges = util.independent_best(
                        gstate.edge_strengths)
                    gstate = gstate.with_updates(edge_strengths=snapped_edges)
                    return gstate
                else:
                    return gstate

            # Scan over each sentence
            def _scan_fn(
                input_repr, *stuff
            ):  # (input_repr, [ref_matrix?], [*correct_graph_stuff?], [dropout_masks?], *flat_graph_state, pad_graph_size)
                stuff = list(stuff)

                if len(self.word_node_mapping) > 0:
                    ref_matrix = stuff[0]
                    stuff = stuff[1:]
                else:
                    ref_matrix = None

                if with_correct_graph:
                    c_num_new_nodes, c_new_strengths, c_new_node_ids, c_edges = stuff[:
                                                                                      4]
                    stuff = stuff[4:]

                if using_dropout:
                    dropout_masks = stuff[:len(iter_dropouts)]
                    stuff = stuff[len(iter_dropouts):]
                else:
                    dropout_masks = None

                flat_graph_state = stuff[:-1]
                pad_graph_size = stuff[-1]
                gstate = GraphState.unflatten_from_const_size(flat_graph_state)

                if with_correct_graph:
                    gstate, node_loss, edge_loss, overall_accuracy = _iter_fn(
                        input_repr,
                        ref_matrix,
                        gstate,
                        c_num_new_nodes,
                        c_new_strengths,
                        c_new_node_ids,
                        c_edges,
                        dropout_masks=dropout_masks)
                else:
                    gstate = _iter_fn(input_repr,
                                      ref_matrix,
                                      gstate,
                                      dropout_masks=dropout_masks)

                retvals = gstate.flatten_to_const_size(pad_graph_size)
                if with_correct_graph:
                    if self.dynamic_nodes:
                        retvals.append(node_loss)
                    retvals.append(edge_loss)
                    if evaluate_accuracy:
                        retvals.append(overall_accuracy)
                return retvals

            if self.dynamic_nodes:
                initial_gstate = GraphState.create_empty(
                    n_batch, self.num_node_ids, self.node_state_size,
                    self.num_edge_types)
            else:
                initial_gstate = GraphState.create_full_unique(
                    n_batch, self.num_node_ids, self.node_state_size,
                    self.num_edge_types)

            # Account for all nodes, plus the extra padding node to prevent GPU unpleasantness
            if self.dynamic_nodes:
                pad_graph_size = n_sentences * self.new_nodes_per_iter + 1
            else:
                pad_graph_size = self.num_node_ids
            outputs_info = initial_gstate.flatten_to_const_size(pad_graph_size)
            prepped_input = input_reprs.dimshuffle([1, 0, 2])
            sequences = [prepped_input]
            if len(self.word_node_mapping) > 0:
                sequences.append(ref_matrices.dimshuffle([1, 0, 2, 3]))
            if with_correct_graph:
                sequences.append(graph_num_new_nodes.swapaxes(0, 1))
                sequences.append(graph_new_node_strengths.swapaxes(0, 1))
                sequences.append(graph_new_node_ids.swapaxes(0, 1))
                sequences.append(graph_new_edges.swapaxes(0, 1))

                if self.dynamic_nodes:
                    outputs_info.extend([None])
                if evaluate_accuracy:
                    outputs_info.extend([None])
                outputs_info.extend([None])
            if using_dropout:
                sequences.extend(iter_dropouts)
            all_scan_out, _ = theano.scan(_scan_fn,
                                          sequences=sequences,
                                          outputs_info=outputs_info,
                                          non_sequences=[pad_graph_size])
            graph_accurate_list = None
            if with_correct_graph:
                if evaluate_accuracy:
                    full_graph_accuracy = all_scan_out[-1]
                    all_scan_out = all_scan_out[:-1]
                    graph_accurate_list = T.all(full_graph_accuracy, 0)
                    info["graph_accuracy"] = T.sum(graph_accurate_list,
                                                   dtype='floatX') / T.cast(
                                                       n_batch, 'floatX')
                if self.dynamic_nodes:
                    all_flat_gstates = all_scan_out[:-2]
                    node_loss, edge_loss = all_scan_out[-2:]
                    reduced_node_loss = T.sum(node_loss) / T.cast(
                        n_batch, 'floatX')
                    reduced_edge_loss = T.sum(edge_loss) / T.cast(
                        n_batch, 'floatX')
                    avg_graph_loss = (reduced_node_loss +
                                      reduced_edge_loss) / T.cast(
                                          input_words.shape[1], 'floatX')
                    info["node_loss"] = reduced_node_loss
                    info["edge_loss"] = reduced_edge_loss
                else:
                    all_flat_gstates = all_scan_out[:-1]
                    edge_loss = all_scan_out[-1]
                    reduced_edge_loss = T.sum(edge_loss) / T.cast(
                        n_batch, 'floatX')
                    avg_graph_loss = reduced_edge_loss / T.cast(
                        input_words.shape[1], 'floatX')
                    info["edge_loss"] = reduced_edge_loss
            else:
                all_flat_gstates = all_scan_out

            if self.sequence_representation:
                # Each part of all_flat_gstates is of shape (n_sentences, n_batch, ...)
                # except for the last one, which we handle separately
                # Swap to (n_batch, n_sentences, ...)
                # Then flatten to (n_batch*n_sentences, ...) for further processing
                final_flat_gstate = [
                    x.swapaxes(0, 1).reshape(T.concatenate([[-1],
                                                            x.shape[2:]]),
                                             ndim=(x.ndim - 1))
                    for x in all_flat_gstates[:-1]
                ]
                # As for the last one, we need to get a single scalar value. The last one will be the biggest
                # so we will take that. Note that this will introduce a bunch of zero-nodes, but thats
                # OK and we can process that later. (We REQUIRE that padding in graph_state makes zero strength
                # nodes here!)
                final_flat_gstate.append(all_flat_gstates[-1][-1])
                # We also need to repeat query_repr and query_ref_matrix so that they broadcast together
                query_repr = T.extra_ops.repeat(query_repr, n_sentences, 0)
                query_ref_matrix = T.extra_ops.repeat(query_ref_matrix,
                                                      n_sentences, 0)
            else:
                # Extract last timestep
                final_flat_gstate = [x[-1] for x in all_flat_gstates]
            final_gstate = GraphState.unflatten_from_const_size(
                final_flat_gstate)

            if self.train_with_query:
                if self.wipe_node_state:
                    final_gstate = final_gstate.with_updates(
                        node_states=T.zeros_like(final_gstate.node_states))

                qnsu_dropout_masks = self.query_node_state_updater.dropout_masks(
                    self.srng, states_mask)
                query_gstate, _ = self.query_node_state_updater.process(
                    final_gstate, query_repr, qnsu_dropout_masks)

                if len(self.word_node_mapping) > 0:
                    qdru_dropout_masks = self.query_direct_reference_updater.dropout_masks(
                        self.srng, states_mask)
                    query_gstate, _ = self.query_direct_reference_updater.process(
                        query_gstate, query_ref_matrix, qdru_dropout_masks)

                fp_dropout_masks = self.final_propagator.dropout_masks(
                    self.srng, states_mask)
                propagated_gstate, _ = self.final_propagator.process_multiple(
                    query_gstate, self.final_propagate, fp_dropout_masks)

                agg_dropout_masks = self.aggregator.dropout_masks(self.srng)
                aggregated_repr, _ = self.aggregator.process(
                    propagated_gstate,
                    agg_dropout_masks)  # shape (n_batch, output_repr_size)

                if self.sequence_representation:
                    # aggregated_repr is of shape (n_batch*n_sentences, repr_width)
                    # We want to split back to timesteps: (n_batch, n_sentences, repr_width)
                    agg_repr_seq = aggregated_repr.reshape(
                        [n_batch, n_sentences, -1])
                    # Now collapse it to a summary representation
                    aggsum_dropout_masks = self.aggregate_summarizer.dropout_masks(
                        self.srng)
                    aggregated_repr, _ = self.aggregate_summarizer.process(
                        agg_repr_seq, aggsum_dropout_masks)
                    # At this point aggregated_repr is (n_batch, repr_width) as desired

                max_seq_len = correct_output.shape[1]
                if self.output_format == ModelOutputFormat.sequence:
                    final_output = self.output_processor.process(
                        aggregated_repr,
                        max_seq_len)  # shape (n_batch, ?, num_output_words)
                else:
                    final_output = self.output_processor.process(
                        aggregated_repr)

                if snap_to_best:
                    final_output = self.output_processor.snap_to_best(
                        final_output)

                if self.output_format == ModelOutputFormat.subset:
                    elemwise_loss = T.nnet.binary_crossentropy(
                        final_output, correct_output)
                    query_loss = T.sum(elemwise_loss)
                else:
                    flat_final_output = final_output.reshape(
                        [-1, self.num_output_words])
                    flat_correct_output = correct_output.reshape(
                        [-1, self.num_output_words])
                    timewise_loss = T.nnet.categorical_crossentropy(
                        flat_final_output, flat_correct_output)
                    query_loss = T.sum(timewise_loss)
                query_loss = query_loss / T.cast(n_batch, 'floatX')
                info["query_loss"] = query_loss
            else:
                final_output = T.zeros([])

            full_loss = np.array(0.0, np.float32)
            if with_correct_graph:
                full_loss = full_loss + avg_graph_loss
            if self.train_with_query:
                full_loss = full_loss + query_loss

            if self.train_with_query:
                adjusted_query_gstates = [
                    x.reshape(T.concatenate([[n_batch, n_sentences],
                                             x.shape[1:]]),
                              ndim=(x.ndim + 1))
                    if self.sequence_representation else T.shape_padaxis(x, 1)
                    for x in query_gstate.flatten()
                ]
                adjusted_prop_gstates = [
                    x.reshape(T.concatenate([[n_batch, n_sentences],
                                             x.shape[1:]]),
                              ndim=(x.ndim + 1))
                    if self.sequence_representation else T.shape_padaxis(x, 1)
                    for x in propagated_gstate.flatten()
                ]
                full_flat_gstates = [
                    T.concatenate([a.swapaxes(0, 1), b, c], 1) for a, b, c in
                    zip(all_flat_gstates[:-1], adjusted_query_gstates,
                        adjusted_prop_gstates)
                ]
            else:
                full_flat_gstates = [
                    a.swapaxes(0, 1) for a in all_flat_gstates[:-1]
                ]
                max_seq_len = T.iscalar()
            return full_loss, final_output, full_flat_gstates, graph_accurate_list, max_seq_len, info

        train_loss, _, _, _, _, train_info = _build(self.train_with_graph,
                                                    False, True, False)
        adam_updates = Adam(train_loss, self.params, lr=self.learning_rate_var)

        self.info_keys = list(train_info.keys())

        print("Compiling...")

        optimizer = theano.compile.predefined_optimizers[
            'fast_run' if self.check_mode ==
            'debug' else theano.config.optimizer]
        optimizer = optimizer.excluding(
            "scanOp_pushout_output", "remove_constants_and_unused_inputs_scan")
        if self.check_mode == 'nan':
            mode = NanGuardMode(optimizer=optimizer,
                                nan_is_error=True,
                                inf_is_error=True,
                                big_is_error=True)
        elif self.check_mode == 'debug':
            mode = DebugMode(optimizer=optimizer,
                             check_isfinite=False,
                             check_py_code=False,
                             stability_patience=1)
            theano.tensor.TensorType.filter_checks_isfinite = False
        else:
            mode = theano.Mode(optimizer=optimizer)
        self.train_fn = theano.function([
            input_words, query_words, correct_output, graph_num_new_nodes,
            graph_new_node_strengths, graph_new_node_ids, graph_new_edges
        ], [train_loss] + list(train_info.values()),
                                        updates=adam_updates,
                                        allow_input_downcast=True,
                                        on_unused_input='ignore',
                                        mode=mode)

        eval_loss, _, full_flat_gstates, graph_accurate_list, _, eval_info = _build(
            self.train_with_graph, False, False, True)
        self.eval_info_keys = list(eval_info.keys())
        self.eval_fn = theano.function([
            input_words, query_words, correct_output, graph_num_new_nodes,
            graph_new_node_strengths, graph_new_node_ids, graph_new_edges
        ], [eval_loss, graph_accurate_list] + list(eval_info.values()),
                                       allow_input_downcast=True,
                                       on_unused_input='ignore',
                                       mode=mode)

        self.debug_test_fn = theano.function([
            input_words, query_words, correct_output, graph_num_new_nodes,
            graph_new_node_strengths, graph_new_node_ids, graph_new_edges
        ],
                                             full_flat_gstates,
                                             allow_input_downcast=True,
                                             on_unused_input='ignore',
                                             mode=mode)

        test_loss, final_output, full_flat_gstates, _, max_seq_len, _ = _build(
            False, False, False, False)
        self.fuzzy_test_fn = theano.function(
            [input_words, query_words] +
            ([max_seq_len] if self.output_format == ModelOutputFormat.sequence
             else []), [final_output] + full_flat_gstates,
            allow_input_downcast=True,
            on_unused_input='ignore',
            mode=mode)

        test_loss, final_output, full_flat_gstates, _, max_seq_len, _ = _build(
            False, True, False, False)
        self.snap_test_fn = theano.function(
            [input_words, query_words] +
            ([max_seq_len] if self.output_format == ModelOutputFormat.sequence
             else []), [final_output] + full_flat_gstates,
            allow_input_downcast=True,
            on_unused_input='ignore',
            mode=mode)
Esempio n. 32
0
    def __init__(self, trainset, testset, testDataset2, num_user, num_item,
                 dim, reg, lr, prefix):
        self.trainset = trainset
        self.testset = testset
        self.testDataset2 = testDataset2
        self.reg = numpy.float32(reg)
        self.lr = numpy.float32(lr)
        self.num_item = num_item
        self.video_features = self.trainset.video_features

        T.config.compute_test_value = 'warn'

        u = T.ivector('u')  #[num_sample,]
        iv = T.ivector('iv')  #[num_sample,]
        jv = T.ivector('jv')  #[num_sample,]
        mask_frame = T.itensor3(
            'mask_frame')  #[num_sample, num_video, num_frame]
        mask = T.imatrix('mask')  #[num_sample, num_video]

        feat = T.ftensor4('feat')

        u.tag.test_value = np.asarray([0, 1, 2], dtype='int32')
        iv.tag.test_value = np.asarray([4, 5, 2], dtype='int32')
        jv.tag.test_value = np.asarray([1, 3, 0], dtype='int32')
        mask.tag.test_value = np.asarray([[1, 1, 0], [1, 0, 0], [1, 1, 1]],
                                         dtype='int32')
        # feat_idx.tag.test_value = np.asarray([[3,4,-1],[5,-1,-1],[6,2,4]],dtype='int32')

        rng = np.random
        layers = []

        Uemb = UsrEmblayer(rng, num_user, dim, 'usremblayer', prefix)
        Vemb = VidEmblayer(rng, num_item, dim, 'videmblayer', prefix)

        layers.append(Uemb)
        layers.append(Vemb)
        uemb_vec = GetuEmbLayer(u, Uemb.output, 'uemb', prefix)
        iemb_vec = GetvEmbLayer(iv, Vemb.output, 'v1emb', prefix)
        jemb_vec = GetvEmbLayer(jv, Vemb.output, 'v2emb', prefix)

        layers.append(
            AttentionLayer_Feat(rng, 1000, uemb_vec.output, feat, dim, dim,
                                mask_frame, 'attentionlayer_feat', prefix))

        layers.append(
            AttentionLayer_Item(rng, uemb_vec.output, layers[-1].output, dim,
                                dim, mask, 'attentionlayer_item', prefix))

        u_vec = uemb_vec.output + layers[-1].output
        self.layers = layers
        y_ui = T.dot(u_vec, iemb_vec.output.T).diagonal()
        y_uj = T.dot(u_vec, jemb_vec.output.T).diagonal()
        self.params = []
        loss = -T.sum(T.log(T.nnet.sigmoid(y_ui - y_uj)))
        for layer in layers:
            self.params += layer.params  #[U,V,W_Tran,Wu,Wv,b,c]
        #regularizer = self.reg * ((uemb_vec.output ** 2).sum() + (iemb_vec.output ** 2).sum() + (jemb_vec.output ** 2).sum() +
        #                          (self.params[2] ** 2).sum() + (self.params[3] ** 2).sum() + (self.params[4] ** 2).sum() +
        #                            (self.params[5] ** 2).sum())

        regularizer = self.reg * ((uemb_vec.output**2).sum() +
                                  (iemb_vec.output**2).sum() +
                                  (jemb_vec.output**2).sum())

        for param in self.params[2:]:
            regularizer += self.reg * (param**2).sum()

        loss = regularizer + loss

        updates = [(param, param - self.lr * T.grad(loss, param))
                   for param in self.params]

        self.train_model = theano.function(
            inputs=[u, iv, jv, mask_frame, mask, feat],
            outputs=loss,
            updates=updates)

        self.test_model = theano.function(
            inputs=[u, mask_frame, mask, feat],
            outputs=[
                u_vec, Vemb.output, layers[-1].atten, layers[-2].atten
            ],  #for test: layers[-2].output,layers[-2].items_emb,layers[-2].atten
        )
Esempio n. 33
0
    def __init__(self, config, testMode):

        self.config = config

        batch_size = config['batch_size']
        lib_conv = config['lib_conv']
        useLayers = config['useLayers']
        #imgWidth = config['imgWidth']
        #imgHeight = config['imgHeight']
        initWeights = config['initWeights']  #if we wish to initialize alexnet with some weights. #need to make changes in layers.py to accept initilizing weights
        if initWeights:
            weightsDir = config['weightsDir']
            weightFileTag = config['weightFileTag']
        prob_drop = config['prob_drop']

        # ##################### BUILD NETWORK ##########################
        x = T.ftensor4('x')
        mean = T.ftensor4('mean')
        #y = T.lvector('y')

        print '... building the model'
        self.layers = []
        params = []
        weight_types = []

        if useLayers >= 1:
            convpool_layer1 = ConvPoolLayer(input=x-mean,
                                        image_shape=(3, None, None, batch_size),
                                        filter_shape=(3, 11, 11, 96),
                                        convstride=4, padsize=0, group=1, 
                                        poolsize=3, poolstride=2, 
                                        bias_init=0.0, lrn=True,
                                        lib_conv=lib_conv,
                                        initWeights=initWeights, weightsDir=weightsDir, weightFiles=['W_0'+weightFileTag, 'b_0'+weightFileTag]
                                        )
            self.layers.append(convpool_layer1)
            params += convpool_layer1.params
            weight_types += convpool_layer1.weight_type

        if useLayers >= 2:
            convpool_layer2 = ConvPoolLayer(input=convpool_layer1.output,
                                        image_shape=(96, None, None, batch_size),    #change from 27 to appropriate value sbased on conv1's output
                                        filter_shape=(96, 5, 5, 256), 
                                        convstride=1, padsize=2, group=2, 
                                        poolsize=3, poolstride=2, 
                                        bias_init=0.1, lrn=True,
                                        lib_conv=lib_conv,
                                        initWeights=initWeights, weightsDir=weightsDir, weightFiles=['W0_1'+weightFileTag, 'W1_1'+weightFileTag, 'b0_1'+weightFileTag, 'b1_1'+weightFileTag]
                                        )
            self.layers.append(convpool_layer2)
            params += convpool_layer2.params
            weight_types += convpool_layer2.weight_type

        if useLayers >= 3:
            convpool_layer3 = ConvPoolLayer(input=convpool_layer2.output,
                                        image_shape=(256, None, None, batch_size),
                                        filter_shape=(256, 3, 3, 384), 
                                        convstride=1, padsize=1, group=1, 
                                        poolsize=1, poolstride=0, 
                                        bias_init=0.0, lrn=False,
                                        lib_conv=lib_conv,
                                        initWeights=initWeights, weightsDir=weightsDir, weightFiles=['W_2'+weightFileTag, 'b_2'+weightFileTag]
                                        )
            self.layers.append(convpool_layer3)
            params += convpool_layer3.params
            weight_types += convpool_layer3.weight_type

        if useLayers >= 4:
            convpool_layer4 = ConvPoolLayer(input=convpool_layer3.output,
                                        image_shape=(384, None, None, batch_size),
                                        filter_shape=(384, 3, 3, 384), 
                                        convstride=1, padsize=1, group=2, 
                                        poolsize=1, poolstride=0, 
                                        bias_init=0.1, lrn=False,
                                        lib_conv=lib_conv,
                                        initWeights=initWeights, weightsDir=weightsDir, weightFiles=['W0_3'+weightFileTag, 'W1_3'+weightFileTag, 'b0_3'+weightFileTag, 'b1_3'+weightFileTag]
                                        )
            self.layers.append(convpool_layer4)
            params += convpool_layer4.params
            weight_types += convpool_layer4.weight_type

        if useLayers >= 5:
            convpool_layer5 = ConvPoolLayer(input=convpool_layer4.output,
                                        image_shape=(384, None, None, batch_size),
                                        filter_shape=(384, 3, 3, 256), 
                                        convstride=1, padsize=1, group=2, 
                                        poolsize=3, poolstride=2, 
                                        bias_init=0.0, lrn=False,
                                        lib_conv=lib_conv,
                                        initWeights=initWeights, weightsDir=weightsDir, weightFiles=['W0_4'+weightFileTag, 'W1_4'+weightFileTag, 'b0_4'+weightFileTag, 'b1_4'+weightFileTag]
                                        )
            self.layers.append(convpool_layer5)
            params += convpool_layer5.params
            weight_types += convpool_layer5.weight_type

        if useLayers >= 6:
            fc_layer6_input = T.flatten(convpool_layer5.output.dimshuffle(3, 0, 1, 2), 2)
            fc_layer6 = FCLayer(input=fc_layer6_input, n_in=9216, n_out=4096, initWeights=initWeights, weightsDir=weightsDir, weightFiles=['W_5'+weightFileTag, 'b_5'+weightFileTag])
            self.layers.append(fc_layer6)
            params += fc_layer6.params
            weight_types += fc_layer6.weight_type
            if testMode:
                dropout_layer6 = fc_layer6
            else:
                dropout_layer6 = DropoutLayer(fc_layer6.output, n_in=4096, n_out=4096, prob_drop=prob_drop)

        if useLayers >= 7:
            fc_layer7 = FCLayer(input=dropout_layer6.output, n_in=4096, n_out=4096, initWeights=initWeights, weightsDir=weightsDir, weightFiles=['W_6'+weightFileTag, 'b_6'+weightFileTag])
            self.layers.append(fc_layer7)
            params += fc_layer7.params
            weight_types += fc_layer7.weight_type
            if testMode:
                dropout_layer6 = fc_layer7
            else:
                dropout_layer7 = DropoutLayer(fc_layer7.output, n_in=4096, n_out=4096, prob_drop=prob_drop)

        if useLayers >= 8:
            softmax_layer8 = SoftmaxLayer(input=dropout_layer7.output, n_in=4096, n_out=1000, initWeights=initWeights, weightsDir=weightsDir, weightFiles=['W_7'+weightFileTag, 'b_7'+weightFileTag])
            self.layers.append(softmax_layer8)
            params += softmax_layer8.params
            weight_types += softmax_layer8.weight_type

        # #################### NETWORK BUILT #######################

        self.output = self.layers[useLayers-1]
        self.params = params
        self.x = x
        self.mean = mean
        self.weight_types = weight_types
        self.batch_size = batch_size
        self.useLayers = useLayers
        self.outLayer = self.layers[useLayers-1]

        meanVal = np.load(config['mean_file'])
        meanVal = meanVal[:, :, :, np.newaxis].astype('float32')   #x is 4d, with 'batch' number of images. meanVal has only '1' in the 'batch' dimension. subtraction wont work.
        meanVal = np.tile(meanVal,(1,1,1,batch_size))
        self.meanVal = meanVal
        #meanVal = np.zeros([3,imgHeight,imgWidth,2], dtype='float32')

        if useLayers >= 8:  #if last layer is softmax, then its output is y_pred
            finalOut = self.outLayer.y_pred
        else:
            finalOut = self.outLayer.output
        self.forwardFunction = theano.function([self.x, In(self.mean, value=meanVal)], [finalOut])
Esempio n. 34
0
    def __init__(self,
                 model_network=None,
                 gamma=0.99,
                 learning_method="rmsprop",
                 batch_size=32,
                 input_size=None,
                 learning_params=None,
                 dnn_type=True,
                 clip_delta=0,
                 scale=255.,
                 double_q=False,
                 prioritized_exp_replay=False,
                 heads_num=1,
                 action_num=0):

        x = T.ftensor4()
        next_x = T.ftensor4()
        a = T.ivector()
        r = T.fvector()
        terminal = T.ivector()

        self.heads_num = heads_num
        self.action_num = action_num

        self.x_shared = theano.shared(
            np.zeros(tuple([batch_size] + input_size[1:]), dtype='float32'))
        self.next_x_shared = theano.shared(
            np.zeros(tuple([batch_size] + input_size[1:]), dtype='float32'))
        self.a_shared = theano.shared(np.zeros((batch_size), dtype='int32'))
        self.terminal_shared = theano.shared(
            np.zeros((batch_size), dtype='int32'))
        self.r_shared = theano.shared(np.zeros((batch_size), dtype='float32'))

        self.Q_model = Model(model_network,
                             input_size=input_size,
                             dnn_type=dnn_type)
        self.Q_prime_model = Model(model_network,
                                   input_size=input_size,
                                   dnn_type=dnn_type)

        if double_q:
            alt_actions = T.argmax(self.Q_model.apply(next_x / scale), axis=1)
            alt_actions = theano.gradient.disconnected_grad(alt_actions)
            y = r + (T.ones_like(terminal)-terminal)*gamma*\
            self.Q_prime_model.apply(next_x/scale)[T.arange(alt_actions.shape[0]), alt_actions]
        else:
            q_stack = self.Q_prime_model.apply(next_x / scale)
            q_list = [
                q_stack[T.arange(a.shape[0]),
                        k * self.action_num:(k + 1) * self.action_num]
                for k in range(self.heads_num)
            ]
            y_list = [
                r + (T.ones_like(terminal) - terminal) * gamma *
                T.max(q_list[k], axis=1) for k in range(self.heads_num)
            ]

            y_concat = theano.tensor.concatenate(y_list, axis=0)

            y = r + (T.ones_like(terminal) - terminal) * gamma * T.max(
                self.Q_prime_model.apply(next_x / scale), axis=1)

        all_q_vals = self.Q_model.apply(x / scale)
        q_vals = all_q_vals[T.arange(a.shape[0]), a]

        q_vals_list = [
            all_q_vals[T.arange(a.shape[0]), a + k * self.heads_num]
            for k in range(self.heads_num)
        ]
        q_vals_concat = theano.tensor.concatenate(q_vals_list, axis=0)

        # td_errors = y-q_vals

        td_errors = y_concat - q_vals_concat
        """
    if clip_delta > 0:
      td_errors = td_errors.clip(-clip_delta, clip_delta)
    cost = 0.5*td_errors**2
    """
        if clip_delta > 0:
            #TOOK THIS FROM GITHUB CODE

            # If we simply take the squared clipped diff as our loss,
            # then the gradient will be zero whenever the diff exceeds
            # the clip bounds. To avoid this, we extend the loss
            # linearly past the clip point to keep the gradient constant
            # in that regime.
            #
            # This is equivalent to declaring d loss/d q_vals to be
            # equal to the clipped diff, then backpropagating from
            # there, which is what the DeepMind implementation does.
            quadratic_part = T.minimum(abs(td_errors), clip_delta)
            linear_part = abs(td_errors) - quadratic_part
            cost = 0.5 * quadratic_part**2 + clip_delta * linear_part
        else:
            cost = 0.5 * td_errors**2
        #"""

        cost = T.sum(cost)

        print self.Q_model.params
        self.learning_method = self.Q_model.get_learning_method(
            learning_method, **learning_params)
        grads = T.grad(cost, self.Q_model.params)
        param_updates = self.learning_method.apply(self.Q_model.params, grads)

        target_updates = OrderedDict()
        for t, b in zip(self.Q_prime_model.params, self.Q_model.params):
            target_updates[t] = b

        givens = {
            x: self.x_shared,
            a: self.a_shared,
            r: self.r_shared,
            terminal: self.terminal_shared,
            next_x: self.next_x_shared
        }

        # print 'fast compile'
        # theano.config.mode = 'FAST_COMPILE'
        print "building"
        self.train_model = theano.function([],
                                           td_errors,
                                           updates=param_updates,
                                           givens=givens)
        print "compiled train_model (1/3)"
        self.pred_score = theano.function([],
                                          all_q_vals,
                                          givens={x: self.x_shared})
        print "compiled pred_score (2/3)"
        self.update_target_params = theano.function([], [],
                                                    updates=target_updates)
        print "compiled update_target_params (3/3)"
        self.update_target_params()
        print "updated target params"
Esempio n. 35
0
import theano
from theano import tensor, config
from blocks.bricks import BatchNormalization, Rectifier, Linear, Softmax, MLP, BatchNormalizedMLP, FeedforwardSequence, Rectifier
from blocks.bricks.conv import Convolutional, ConvolutionalSequence, Flattener, MaxPooling
from blocks.initialization import IsotropicGaussian, Uniform, Constant
from blocks.select import Selector
from blocks.graph import ComputationGraph, apply_dropout
from blocks.filter import VariableFilter
from blocks.roles import OUTPUT
import numpy

from elementary_blocks_simple import VGG, top_direction_block, StructuredCost

images = tensor.ftensor4('images')
labels = tensor.ftensor4('labels')

def build_model(images, labels):
    
    vgg = VGG(layer='conv4_4')
    vgg.push_initialization_config()
    vgg.initialize()

    tdb = top_direction_block()
    tdb.push_initialization_config()
    tdb.initialize()

    # Construct feedforward sequence
    ss_seq = FeedforwardSequence([vgg.apply, tdb.apply])
    ss_seq.push_initialization_config()
    ss_seq.initialize()
    
Esempio n. 36
0
def test_dnn_conv_alpha_output_merge():
    if not dnn.dnn_available(test_ctx_name):
        raise SkipTest(dnn.dnn_available.msg)
    img = T.ftensor4()
    kern = T.ftensor4()
    out = T.ftensor4()

    b = 1
    c = 4
    f = 3
    ih = 5
    iw = 8
    kh = 2
    kw = 6
    img_val = numpy.random.random((b, c, ih, iw)).astype('float32')
    kern_val = numpy.random.random((f, c, kh, kw)).astype('float32')
    out_val = numpy.random.random((b, f, ih - kh + 1,
                                   iw - kw + 1)).astype('float32')

    conv = dnn.dnn_conv(img, kern)
    gw = theano.grad(conv.sum(), kern)
    gi = theano.grad(conv.sum(), img)

    lr = numpy.asarray(0.05, dtype='float32')

    fr = lr * (conv + out)
    wr = kern + lr * gw
    ir = img + lr * gi

    f1 = theano.function([img, kern, out], [fr, wr, ir], mode=mode_with_gpu)
    assert isinstance(f1.maker.fgraph.outputs[0].owner.inputs[0].owner.op,
                      dnn.GpuDnnConv)
    assert isinstance(f1.maker.fgraph.outputs[1].owner.inputs[0].owner.op,
                      dnn.GpuDnnConvGradW)
    assert isinstance(f1.maker.fgraph.outputs[2].owner.inputs[0].owner.op,
                      dnn.GpuDnnConvGradI)

    mode = mode_with_gpu
    mode = mode.excluding('local_dnn_conv_alpha_merge')
    mode = mode.excluding('local_dnn_convw_alpha_merge')
    mode = mode.excluding('local_dnn_convi_alpha_merge')
    mode = mode.excluding('local_dnn_conv_output_merge')
    mode = mode.excluding('local_dnn_convw_output_merge')
    mode = mode.excluding('local_dnn_convi_output_merge')

    f2 = theano.function([img, kern, out], [fr, wr, ir], mode=mode)

    assert not isinstance(f2.maker.fgraph.outputs[0].owner.inputs[0].owner.op,
                          dnn.GpuDnnConv)
    assert not isinstance(f2.maker.fgraph.outputs[1].owner.inputs[0].owner.op,
                          dnn.GpuDnnConvGradW)
    assert not isinstance(f2.maker.fgraph.outputs[2].owner.inputs[0].owner.op,
                          dnn.GpuDnnConvGradI)

    out_f1 = f1(img_val, kern_val, out_val)
    out_f2 = f2(img_val, kern_val, out_val)

    assert len(out_f1) == len(out_f2)

    for v1, v2 in zip(out_f1, out_f2):
        utt.assert_allclose(v1, v2)
Esempio n. 37
0
def random_epoch_train_begining(learning_rate=0.05,
                                weight_decay=0.001,
                                nkerns=[20, 50],
                                n_epochs=200,
                                batch_size=500,
                                dataset='mnist.pkl.gz',
                                name_given='test'):

    #name = 'FashionMnist_'+str(learning_rate)+'_'+str(weight_decay) + '_' + str(nkerns) + 'Rand_Trans_Relu2_Begin'
    name = name_given
    rng = numpy.random.RandomState(23455)
    datasets = loaddata_mnist(dataset)

    train_set_x, train_set_y = datasets[0]
    valid_set_x, valid_set_y = datasets[1]
    test_set_x, test_set_y = datasets[2]

    n_train = train_set_x.get_value(borrow=True).shape[0]
    n_valid = valid_set_x.get_value(borrow=True).shape[0]
    n_test = test_set_x.get_value(borrow=True).shape[0]

    test_set_x = test_set_x.reshape((n_test, 1, 28, 28))
    valid_set_x = valid_set_x.reshape((n_valid, 1, 28, 28))
    train_set_x = train_set_x.reshape((n_train, 1, 28, 28))

    temp_train_set_x = theano.shared(numpy.zeros(train_set_x.shape.eval(),
                                                 dtype=theano.config.floatX),
                                     borrow=True)
    temp_train_set_xx = T.Rebroadcast((1, True))(temp_train_set_x)

    temp_valid_set_x = theano.shared(numpy.zeros(valid_set_x.shape.eval(),
                                                 dtype=theano.config.floatX),
                                     borrow=True)
    temp_valid_set_xx = T.Rebroadcast((1, True))(temp_valid_set_x)

    temp_test_set_x = theano.shared(numpy.zeros(test_set_x.shape.eval(),
                                                dtype=theano.config.floatX),
                                    borrow=True)
    temp_test_set_xx = T.Rebroadcast((1, True))(temp_test_set_x)

    n_train_batches = n_train // batch_size
    n_valid_batches = n_valid // batch_size
    n_test_batches = n_test // batch_size

    x = T.matrix('x')
    y = T.ivector('y')
    index = T.lscalar()
    dummy = T.ftensor4('dummy')

    update_train = (temp_train_set_x, dummy)
    update_valid = (temp_valid_set_x, dummy)
    update_test = (temp_test_set_x, dummy)

    replace_train = theano.function([dummy],
                                    temp_train_set_x,
                                    updates=[update_train])
    replace_valid = theano.function([dummy],
                                    temp_valid_set_x,
                                    updates=[update_valid])
    replace_test = theano.function([dummy],
                                   temp_test_set_x,
                                   updates=[update_test])

    print('... loading the model')

    layer0_input = x.reshape((batch_size, 1, 28, 28))

    layer0 = LeNetConvPoolLayer(rng,
                                input=layer0_input,
                                image_shape=(batch_size, 1, 28, 28),
                                filter_shape=(nkerns[0], 1, 5, 5),
                                poolsize=(2, 2))

    layer1 = LeNetConvPoolLayer(rng,
                                input=layer0.output,
                                image_shape=(batch_size, nkerns[0], 12, 12),
                                filter_shape=(nkerns[1], nkerns[0], 5, 5),
                                poolsize=(2, 2))

    layer2_input = layer1.output.flatten(2)

    # construct a fully-connected sigmoidal layer
    layer2 = HiddenLayer(rng,
                         input=layer2_input,
                         n_in=nkerns[1] * 4 * 4,
                         n_out=500,
                         activation=T.tanh)

    # classify the values of the fully-connected sigmoidal layer
    layer3 = LogisticRegression(input=layer2.output, n_in=500, n_out=10)

    cost = layer3.negative_log_likelihood(y)

    params = layer3.params + layer2.params + layer1.params + layer0.params

    grads = T.grad(cost, params)

    updates = [(param_i,
                param_i - learning_rate * (grad_i + weight_decay * param_i))
               for param_i, grad_i in zip(params, grads)]

    patience_increase = 2
    improvement_threshold = 0.995

    start_time = timeit.default_timer()

    rand_trans_x = numpy.random.random_integers(-10, 10, 200)
    rand_trans_y = numpy.random.random_integers(-10, 10, 200)
    numpy.save('rand_trans_x.npy', rand_trans_x)
    numpy.save('rand_trans_y.npy', rand_trans_y)
    error_line = numpy.zeros(n_epochs)

    test_model = theano.function(
        [index],
        layer3.errors(y),
        givens={
            layer0.input: temp_test_set_xx[index * 500:(index + 1) * 500],
            y: test_set_y[index * 500:(index + 1) * 500]
        })

    validate_model = theano.function(
        [index],
        layer3.errors(y),
        givens={
            layer0.input: temp_valid_set_xx[index * 500:(index + 1) * 500],
            y: valid_set_y[index * 500:(index + 1) * 500]
        })

    train_model = theano.function(
        [index],
        cost,
        updates=updates,
        givens={
            layer0.input: temp_train_set_xx[index * 500:(index + 1) * 500],
            y: train_set_y[index * 500:(index + 1) * 500]
        })

    print('... training')

    best_validation_loss = numpy.inf
    best_iter = 0
    test_score = 0.
    patience = 20000
    validation_frequency = min(n_train_batches, patience // 2)
    epoch = 0
    done_looping = False
    while (epoch < n_epochs) and (not done_looping):

        horizontal = rand_trans_x[epoch]
        vertical = rand_trans_y[epoch]

        tran_test_set_x = theano_translation_updating(test_set_x, horizontal,
                                                      vertical).reshape(
                                                          (-1, 1, 28, 28))
        tran_valid_set_x = theano_translation_updating(valid_set_x, horizontal,
                                                       vertical).reshape(
                                                           (-1, 1, 28, 28))
        tran_train_set_x = theano_translation_updating(train_set_x, horizontal,
                                                       vertical).reshape(
                                                           (-1, 1, 28, 28))

        replace_test(tran_test_set_x)
        replace_valid(tran_valid_set_x)
        replace_train(tran_train_set_x)

        epoch = epoch + 1

        for minibatch_index in range(n_train_batches):

            iter = (epoch - 1) * n_train_batches + minibatch_index

            if iter % 100 == 0:
                print('training @ iter = ', iter)
            cost_ij = train_model(minibatch_index)

            if (iter + 1) % validation_frequency == 0:

                # compute zero-one loss on validation set
                validation_losses = [
                    validate_model(i) for i in range(n_valid_batches)
                ]
                this_validation_loss = numpy.mean(validation_losses)
                print('Horizontal Shift:', horizontal, 'Vertical Shift:',
                      vertical)
                print('epoch %i, minibatch %i/%i, validation error %f %%' %
                      (epoch, minibatch_index + 1, n_train_batches,
                       this_validation_loss * 100.))

                error_line[epoch - 1] = this_validation_loss

                # if we got the best validation score until now
                if this_validation_loss < best_validation_loss:

                    # improve patience if loss improvement is good enough
                    if this_validation_loss < best_validation_loss * \
                            improvement_threshold:
                        patience = max(patience, iter * patience_increase)

                    # save best validation score and iteration number
                    best_validation_loss = this_validation_loss
                    best_iter = iter

                    # test it on the test set
                    test_losses = [
                        test_model(i) for i in range(n_test_batches)
                    ]
                    test_score = numpy.mean(test_losses)
                    print(('     epoch %i, minibatch %i/%i, test error of '
                           'best model %f %%') %
                          (epoch, minibatch_index + 1, n_train_batches,
                           test_score * 100.))

            if patience <= iter:
                done_looping = True
                break

    [t_layer0, t_layer1, t_layer2_input, t_layer2, t_layer3] = \
        [layer0, layer1, layer2_input, layer2, layer3]

    with open(name + '.pkl', 'wb') as f:
        pickle.dump([t_layer0, t_layer1, t_layer2_input, t_layer2, t_layer3],
                    f)

    error_line = error_line[0:epoch - 1] * 100
    scipy.io.savemat(name + '.mat', mdict={'Error_Spectrum': error_line})

    end_time = timeit.default_timer()
    print('Optimization complete.')
    print('Best validation score of %f %% obtained at iteration %i, '
          'with test performance %f %%' %
          (best_validation_loss * 100., best_iter + 1, test_score * 100.))
    print('The code for file ran for %.2fm' % ((end_time - start_time) / 60.))
Esempio n. 38
0
    def test_pseudo_grad(self):
        cnn = CNN()

        X = T.ftensor4('X')
        y = T.fmatrix('y')

        predictions = cnn(X)

        print cnn.description()

        loss = T.mean(objectives.categorical_accuracy(predictions, y))
        loss += 1.0e-5 * cnn.reg()

        upd = pseudograd(loss,
                         cnn.params(learnable=True),
                         temperature=1.0e+1,
                         learning_rate=1.0e-2)

        train = theano.function([X, y], loss, updates=upd)

        import subprocess as sb

        try:
            import mnist
        except:
            sb.check_call(
                'wget -q -nc https://raw.githubusercontent.com/amitgroup/amitgroup/master/amitgroup/io/mnist.py',
                shell=True)
        finally:
            import mnist

        try:
            X, y = mnist.load_mnist(dataset='training', path='mnist/')
            X = X.reshape(-1, 1, 28, 28).astype('float32')
            y = onehot(y, 10)

            X_test, y_test = mnist.load_mnist(dataset='testing', path='mnist/')
            X_test = X_test.reshape(-1, 1, 28, 28).astype('float32')
            y_test = onehot(y_test, 10)
        except:
            sb.check_call("""
        mkdir -p mnist && {
          cd mnist;
          wget -q -nc http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz &&
          wget -q -nc http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz &&
          wget -q -nc http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz &&
          wget -q -nc http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz &&
          gunzip *.gz
        }
        """,
                          shell=True)
        finally:
            X, y = mnist.load_mnist(dataset='training', path='mnist/')
            X = X.reshape(-1, 1, 28, 28).astype('float32')
            y = onehot(y, 10)

            X_test, y_test = mnist.load_mnist(dataset='testing', path='mnist/')
            X_test = X_test.reshape(-1, 1, 28, 28).astype('float32')
            y_test = onehot(y_test, 10)

        n_batches = 2**10
        losses = np.zeros(shape=(n_batches))
        for i, indx in enumerate(
                BatchStreams.random_batch_stream(X.shape[0],
                                                 batch_size=32,
                                                 n_batches=n_batches)):
            losses[i] = train(X[indx], y[indx])

        plt.figure()
        plt.plot(losses)
        plt.show()

        assert False
Esempio n. 39
0
def RelationStackMaker(chips,
                       params,
                       graph=False,
                       weighted=False,
                       batched=False):
    if batched:
        emb_input = T.itensor3('emb_input')
        entities_tv = [
            T.fmatrix('enidx_' + str(i)).astype(theano.config.floatX)
            for i in range(params['num_entity'])
        ]
        sample_weights = T.fvector('sample_weight')
        if graph:
            if weighted:
                masks = T.ftensor4('child_mask')
            else:
                masks = T.ftensor3('child_mask')
        else:
            masks = T.fmatrix('batch_mask')

    else:
        emb_input = T.imatrix('emb_input')
        entities_tv = [
            T.fvector('enidx_' + str(i)).astype(theano.config.floatX)
            for i in range(params['num_entity'])
        ]
        sample_weights = T.fvector('sample_weight')
        if graph:
            if weighted:
                masks = T.ftensor3('child_mask')
            else:
                masks = T.fmatrix('child_mask')
        else:
            masks = None
    #print masks, type(masks), masks.ndim
    current_chip = Start(params['voc_size'], emb_input)
    print '\n', 'Building Stack now', '\n', 'Start: ', params[
        'voc_size'], 'out_tv dim:', current_chip.output_tv.ndim
    instantiated_chips = stackLayers(chips,
                                     current_chip,
                                     params,
                                     entity_size=params['num_entity'])
    regularizable_params = computeLayers(instantiated_chips,
                                         current_chip,
                                         params,
                                         entities_input=entities_tv,
                                         mask=masks,
                                         sample_weights=sample_weights)
    ### Debug use: Get the attention co-efficiency and visualize. ###
    for c in instantiated_chips:
        if c[1].endswith('Entity_Att'):
            assert hasattr(c[0], 'att_wt_arry')
            assert hasattr(c[0], 'entity_tvs')
            attention_weights = c[0].att_wt_arry
            entity_tvs = c[0].entity_tvs

    current_chip = instantiated_chips[-1][0]
    if current_chip.output_tv.ndim == 2:
        pred_y = current_chip.output_tv  #T.argmax(current_chip.output_tv, axis=1)
    else:
        pred_y = current_chip.output_tv  #T.argmax(current_chip.output_tv) #, axis=1)
    gold_y = (current_chip.gold_y if hasattr(current_chip, 'gold_y') else None)
    # Show all parameters that would be needed in this system
    params_needed = calculate_params_needed(instantiated_chips)
    print "Parameters Needed", params_needed
    for k in params_needed:
        assert k in params, k
        print k, params[k]
    assert hasattr(current_chip, 'score')
    cost = current_chip.score  #/ params['nsentences']
    cost_arr = [cost]
    for layer in instantiated_chips[:-1]:
        if hasattr(layer[0], 'score'):
            print layer[1]
            cost += params['cost_coef'] * layer[0].score
            cost_arr.append(params['cost_coef'] * layer[0].score)

    grads = T.grad(cost, wrt=regularizable_params)
    #[params[k] for k in params if (hasattr(params[k], 'is_regularizable') and params[k].is_regularizable)])
    print 'Regularizable parameters:'
    for k, v in params.items():
        if hasattr(v, 'is_regularizable'):
            print k, v, v.is_regularizable
    if graph or batched:
        #return (emb_input, masks, entities_tv, attention_weights, entity_tvs, gold_y, pred_y, cost, grads, regularizable_params)
        return (emb_input, masks, entities_tv, sample_weights, gold_y, pred_y,
                cost, grads, regularizable_params)
    else:
        return (emb_input, entities_tv, sample_weights, gold_y, pred_y, cost,
                grads, regularizable_params, sample_weights)
    def __init__(self,
                 class_size,
                 architecture,
                 n_hidden_neurons=30,
                 conv_type="class"):
        """
        Initialization of Classification neural network.
        :param class_size: Number of output classes for neural network.
        :param n_hidden_neurons: Number of hidden neurons in every hidden layer in neural network architecture.
        :param conv_type: "class" for classification and "reg" for regression.
        :param architecture: architecture of neural network (supported in classification problem).
        """

        self.class_size = class_size
        self.n_hidden_neurons = n_hidden_neurons

        self.n_kernels = 32
        self.k = 6
        self.final_image_size = 2816  # TODO: Manual fast fix change to a better solution!

        X = T.ftensor4()
        Y = T.fmatrix()

        self.w_h = nnet.init_weights((self.n_kernels, 1, 1, self.k * 4))
        self.w_h2 = nnet.init_weights(
            (self.n_kernels * 2, self.n_kernels, 1, self.k))
        self.w_h3 = nnet.init_weights(
            (self.n_kernels * 4, self.n_kernels * 2, 1, self.k))
        self.w_h4 = nnet.init_weights(
            (self.final_image_size, self.n_hidden_neurons))
        self.w_h5 = nnet.init_weights(
            (self.n_hidden_neurons, self.n_hidden_neurons))
        self.w_o = nnet.init_weights((self.n_hidden_neurons, self.class_size))

        if conv_type == "reg":
            self.noise_py_x = nnet.conv_model_reg(X, self.w_h, self.w_h2,
                                                  self.w_h3, self.w_h4,
                                                  self.w_h5, self.w_o, 0., 0.)
            self.py_x = nnet.conv_model_reg(X, self.w_h, self.w_h2, self.w_h3,
                                            self.w_h4, self.w_h5, self.w_o, 0.,
                                            0.)
            self.cost = nnet.rmse(self.noise_py_x, Y)
            self.params = [
                self.w_h, self.w_h2, self.w_h3, self.w_h4, self.w_h5, self.w_o
            ]  # 3c2f

        elif conv_type == "class":

            if architecture == "3c2f":

                self.noise_py_x = nnet.conv_model(X, self.w_h, self.w_h2,
                                                  self.w_h3, self.w_h4,
                                                  self.w_h5, self.w_o, 0,
                                                  0)  #3c2f
                self.py_x = nnet.conv_model(X, self.w_h, self.w_h2, self.w_h3,
                                            self.w_h4, self.w_h5, self.w_o, 0.,
                                            0.)  #3c2f
                self.params = [
                    self.w_h, self.w_h2, self.w_h3, self.w_h4, self.w_h5,
                    self.w_o
                ]  #3c2f

            elif architecture == "2c1f":

                self.noise_py_x = nnet.conv_model2(X, self.w_h, self.w_h2,
                                                   self.w_h4, self.w_o, 0.0,
                                                   0.0)  #2c1f
                self.py_x = nnet.conv_model2(X, self.w_h, self.w_h2, self.w_h4,
                                             self.w_o, 0., 0.)  #2c1f
                self.params = [self.w_h, self.w_h2, self.w_h4, self.w_o]  #2c1f

            elif architecture == "1c2f":

                self.noise_py_x = nnet.conv_model3(X, self.w_h, self.w_h4,
                                                   self.w_h5, self.w_o, 0.0,
                                                   0.0)  #1c2f
                self.py_x = nnet.conv_model3(X, self.w_h, self.w_h4, self.w_h5,
                                             self.w_o, 0., 0.)  #1c2f
                self.params = [self.w_h, self.w_h4, self.w_o]  # 1c2f

            self.cost = T.mean(
                T.nnet.categorical_crossentropy(self.noise_py_x, Y))

        updates = nnet.RMSprop(self.cost, self.params, lr=0.001)

        self.train = theano.function(inputs=[X, Y],
                                     outputs=self.cost,
                                     updates=updates,
                                     allow_input_downcast=True)
        self.predict_ = theano.function(inputs=[X],
                                        outputs=self.py_x,
                                        allow_input_downcast=True)
Esempio n. 41
0
    def train_net(self,
                  train,
                  train_targets,
                  valid,
                  valid_targets,
                  init_learning_rate=3 * 1e-5,
                  batch_size=256,
                  n_units_1=128,
                  n_units_2=128,
                  n_units_3=128,
                  num_epochs=140):

        start_time = time.time()

        input_var = T.ftensor4('inputs')
        target_var = T.ivector('targets')

        # Build net
        network = lasagne.layers.InputLayer(shape=(None, train.shape[1],
                                                   train.shape[2],
                                                   train.shape[3]),
                                            input_var=input_var)

        network = lasagne.layers.batch_norm(
            lasagne.layers.Conv2DLayer(
                network,
                num_filters=n_units_1,
                filter_size=(5, 5),
                pad="same",
                stride=1,
                W=lasagne.init.HeNormal(),
                b=lasagne.init.Constant(val=0.0),
                nonlinearity=lasagne.nonlinearities.rectify))

        network = lasagne.layers.MaxPool2DLayer(network, pool_size=3, stride=2)

        network = lasagne.layers.batch_norm(
            lasagne.layers.Conv2DLayer(
                network,
                num_filters=n_units_2,
                filter_size=(5, 5),
                pad="same",
                stride=1,
                W=lasagne.init.HeNormal(),
                b=lasagne.init.Constant(val=0.0),
                nonlinearity=lasagne.nonlinearities.rectify))

        network = lasagne.layers.MaxPool2DLayer(network, pool_size=3, stride=2)

        network = lasagne.layers.batch_norm(
            lasagne.layers.Conv2DLayer(
                network,
                num_filters=n_units_3,
                filter_size=(5, 5),
                pad="same",
                stride=1,
                W=lasagne.init.HeNormal(),
                b=lasagne.init.Constant(val=0.0),
                nonlinearity=lasagne.nonlinearities.rectify))

        network = lasagne.layers.MaxPool2DLayer(network, pool_size=3, stride=2)

        network = lasagne.layers.DenseLayer(
            network,
            num_units=self.num_classes,
            nonlinearity=lasagne.nonlinearities.softmax)

        # Define Theano functions
        params = lasagne.layers.get_all_params(network, trainable=True)
        prediction = lasagne.layers.get_output(network)
        loss = lasagne.objectives.categorical_crossentropy(
            prediction, target_var)
        loss = loss.mean()

        test_prediction = lasagne.layers.get_output(network,
                                                    deterministic=True)
        test_loss = lasagne.objectives.categorical_crossentropy(
            test_prediction, target_var)
        test_loss = test_loss.mean()

        test_acc = T.mean(T.eq(T.argmax(test_prediction, axis=1), target_var),
                          dtype=theano.config.floatX)

        learning_rate = theano.shared(np.float32(init_learning_rate))

        updates = lasagne.updates.adam(loss,
                                       params,
                                       learning_rate=learning_rate)

        train_fn = theano.function([input_var, target_var],
                                   loss,
                                   updates=updates)

        val_fn = theano.function([input_var, target_var],
                                 [test_loss, test_acc])

        print("Starting training...")

        learning_curve = np.zeros([num_epochs])
        cost = np.zeros([num_epochs])
        train_loss = np.zeros([num_epochs])
        valid_loss = np.zeros([num_epochs])

        for e in range(num_epochs):

            epoch_start_time = time.time()
            train_err = 0
            train_batches = 0

            for batch in self.iterate_minibatches(train,
                                                  train_targets,
                                                  batch_size,
                                                  shuffle=True):
                inputs, targets = batch
                train_err += train_fn(inputs, targets)
                train_batches += 1

            val_err = 0
            val_acc = 0
            val_batches = 0
            for batch in self.iterate_minibatches(valid,
                                                  valid_targets,
                                                  batch_size,
                                                  shuffle=False):
                inputs, targets = batch
                err, acc = val_fn(inputs, targets)
                val_err += err
                val_acc += acc
                val_batches += 1

            print("Epoch {} of {} took {:.3f}s".format(
                e + 1, num_epochs,
                time.time() - epoch_start_time))
            print("  training loss:\t\t{:.6f}".format(train_err /
                                                      train_batches))
            print("  validation loss:\t\t{:.6f}".format(val_err / val_batches))
            print("  validation accuracy:\t\t{:.2f} %".format(
                val_acc / val_batches * 100))

            learning_curve[e] = 1 - val_acc / val_batches
            cost[e] = time.time() - start_time
            train_loss[e] = train_err / train_batches
            valid_loss[e] = val_err / val_batches

        return learning_curve, cost, train_loss, valid_loss
Esempio n. 42
0
import theano
from confusionmatrix import ConfusionMatrix
from lasagne.objectives import *
from lasagne.updates import *
import theano.tensor as T
from theano.tensor import *
from theano.tensor.signal import downsample
import lasagne
import numpy as np
import try_DP as DP
from theano.tensor import nnet
import lasagne.layers.dnn

dtensor5 = TensorType('float32', (False,)*5)
input_var = T.ftensor4('XY')
input2_var = T.ftensor4('XZ')
input3_var = T.ftensor4('YZ')
target_var = T.matrix('Y_train')
x1 = T.matrix('x1')
x2 = T.matrix('x2')
x3 = T.matrix('x3')
PS = 29

# Build Neural Network:
# Conv Net XY Plane
input = lasagne.layers.InputLayer((None, 1, PS, PS), input_var=input_var)

l_conv_1 = lasagne.layers.dnn.Conv2DDNNLayer(input, 20, (9,9))

l_maxpool_1 = lasagne.layers.dnn.Pool2DDNNLayer(l_conv_1, (3,3))
Esempio n. 43
0
def main5():
    docs = T.ftensor4("docs")
    dsnv = T.fvector("dsn")
    swnm = T.fmatrix("swn")
    dw = T.fmatrix("dw")
    sw = T.fmatrix("sw")

    def localConv(doc, dsn, swnv, dww, sww):
        #         t = T.arange(docSentenceSize)
        #         ccc = docs[t.nonzero()]

        t = T.arange(dsn).nonzero()

        t = (T.arange(10000) < dsn).nonzero()
        #         print t
        #         t=T.arange(dsn)
        docSub = doc[t]
        p = printing.Print('docSub')
        docSub = p(docSub)
        swnvSub = swnv[t]

        def sentenceConv(sen, wn, sww):
            t = (T.arange(10000) < wn).nonzero()
            senSub = sen[t]
            convRes = theano.tensor.signal.conv.conv2d(senSub, sww)
            sentence_pool = theano.tensor.signal.downsample.max_pool_2d(
                convRes, (100000, 1)).flatten(1)
            return sentence_pool

        sentenceLayer, _ = theano.scan(
            fn=lambda sen, wn, sww: sentenceConv(sen, wn, sww),
            non_sequences=[sww],
            sequences=[docSub, swnvSub])

        convRes = theano.tensor.signal.conv.conv2d(sentenceLayer, dww)

        sentence_pool = theano.tensor.signal.downsample.max_pool_2d(
            convRes, (100000, 1)).flatten(1)
        return sentence_pool

    res, _ = theano.scan(fn=lambda doc, dsn, swnv, dww, sww: localConv(
        doc, dsn, swnv, dww, sww),
                         non_sequences=[dw, sw],
                         sequences=[docs, dsnv, swnm])

    #     p = printing.Print('res')
    #     res = p(res)
    cost = res.sum()
    g = T.grad(cost, [dw, sw])

    f = theano.function([docs, dsnv, swnm, dw, sw], g)

    d = [[[[2, 2, 3, 4], [1, 2, 3, 4], [3, 1, 2, 3], [6, 4, 2, 1],
           [0, 0, 0, 0]],
          [[4, 3, 2, 1], [4, 6, 9, 2], [6, 6, 3, 1], [2, 5, 2, 9],
           [3, 2, 1, 7]]],
         [[[9, 8, 7, 6], [5, 4, 3, 2], [1, 9, 8, 7], [6, 5, 4, 3],
           [0, 0, 0, 0]],
          [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0],
           [0, 0, 0, 0]]]]

    docSentenceCount = [2, 1]
    sentenceWordCount = [[4, 5], [4, 0]]
    docF = [[1, 1]]
    senF = [[1, 2], [1, 2]]
    print f(d, docSentenceCount, sentenceWordCount, docF, senF)

    print "All finished!"
def CCD(Number_Conv_Layer = 4 ,
        Number_Conv_feature = [32,32,64,128] ,
        Kernel_size = [3,3,5,5],
        Activation_Conv = [relu,relu,relu, relu],#[T.tanh,T.tanh,T.tanh, T.tanh],# 
        pooling_size = [3,3,3,3],
        stride = [2,2,2,2],
        Number_Hidden_Layer = 3 ,
        Number_Hidden_feature = [256, 324, 128] ,
        Activation_Hidden = [relu,relu,relu ],#[T.tanh,T.tanh,T.tanh ],#
        learning_rate =  0.01,
        momentum = 0.9,
        batch_size = 50,
        input_size = 256,
        Number_Classes = 2, 
        n_epochs  = 300,
        N_train_example = 22500,
        N_test_example = 2500 ,
        color = True,
        ):
		  
    print '... building the model'
    if color:
        c = 3
    else:
        c =1
    rng = np.random.RandomState( )
    x = T.ftensor4('x')
    layer_input_size = (batch_size, c, input_size, input_size) 
    Next_layer_input = x.reshape(layer_input_size)
    Filter_shape = (Number_Conv_feature[0], c,Kernel_size[0], Kernel_size[0] )
    
       
    Conv_layer =[]
    Params =[]
    for i in range(Number_Conv_Layer):
    
        Conv_layer.append([])
        Conv_layer[i]= LeNetConvPoolLayer(
			    rng,
			    input = Next_layer_input ,
			    image_shape= layer_input_size,
			    filter_shape= Filter_shape,
			    poolsize=(pooling_size[i], pooling_size[i]),
			    activation =Activation_Conv[i] ,
			    stride =( stride[i], stride[i]))
	if i ==0:
	  zeros = T.zeros([batch_size,Number_Conv_feature[0] , 127,127 ], dtype='float32')
	  Conv_layer[0].output = T.set_subtensor(zeros[:,:,:126,:126], Conv_layer[0].output)
	if i ==2:
	  zeros = T.zeros([batch_size,Number_Conv_feature[i] ,   29, 29 ], dtype='float32')
	  Conv_layer[i].output = T.set_subtensor(zeros[:,:,:28,:28], Conv_layer[i].output)
        if Number_Conv_Layer-1 != i:
            Filter_shape = (Number_Conv_feature[i+1], Filter_shape[0],Kernel_size[i+1], Kernel_size[i+1] )
             
        layer_input_size =  (batch_size, Number_Conv_feature[i],1+int( np.floor(((layer_input_size[2] - Kernel_size[i] + 1)-pooling_size[i]+1)/(1.0*stride[i]))),
                            1+int(np.floor(((layer_input_size[2] - Kernel_size[i] + 1)-pooling_size[i]+1)/(1.0*stride[i]))))
        Next_layer_input  = Conv_layer[i].output 
        Params += Conv_layer[i].params
        

   
    
    Next_layer_input = Next_layer_input.flatten(2)
    layer_input_size = (batch_size, layer_input_size[1]*layer_input_size[2]*layer_input_size[3])
    
    Hidden_layer =[]
    for i in range(Number_Hidden_Layer):
        Hidden_layer.append([])
        Hidden_layer[i]= HiddenLayer(
            rng=rng,
            input=Next_layer_input,
            n_in= layer_input_size[1],
            n_out= Number_Hidden_feature[i],
            activation=Activation_Hidden[i]
                ) 
    
        layer_input_size = (batch_size,Number_Hidden_feature[i] )
  
        Next_layer_input  = Hidden_layer[i].output 
        Params += Hidden_layer[i].params
  
    X_t, y_t = load_color()			
    
 
    y = T.ivector('y') 
    logRegressionLayer = LogisticRegression(
            input=Next_layer_input,
            n_in=layer_input_size[1],
            n_out=Number_Classes 
        )
    Params += logRegressionLayer.params
    Reg = 0
    Reg_cov = 0
    ind_param =[8,10,12]
    for pp in ind_param:
 		Reg += T.sum(abs(Params[pp])) 
    
    cost = logRegressionLayer.negative_log_likelihood( y) + 0.001*Reg  
    #gparams = T.grad(cost,  Params )
    learning_r = T.fscalar()
    updates =gradient_updates_momentum(cost, Params , learning_r   , momentum )
 
    Train_function = theano.function(
        inputs=[x,y, learning_r ],
        outputs=cost,
        updates=updates
        )
      
    Get_Error = theano.function(
                [x,y],
                logRegressionLayer.errors(y) 
                )
    Get_NLL = theano.function(
                [x,y],
                cost
                )
 
 
    print '... training'
 
    n_train_batches =  N_train_example /batch_size
    n_test_batches =  N_test_example /batch_size
    Test_list_scores = []
    Train_list_scores = []
    layer_input_size = (batch_size, c, input_size, input_size) 
    for epoch in range( n_epochs):
        
        print '--- epoch: ', epoch
                 
        minibatch_avg_cost_total  = 0.0
        for minibatch_index in xrange(n_train_batches):
                srng = np.random.RandomState(rng.randint(999999))
             
                train_set_x=  np.array(X_t[minibatch_index * batch_size:(minibatch_index + 1) * batch_size], dtype='float32') /np.array(X_t[minibatch_index * batch_size:(minibatch_index + 1) * batch_size], dtype='float32').max()
          
                train_set_y=  np.array(y_t[minibatch_index * batch_size:(minibatch_index + 1) * batch_size] , dtype='int32') 
         
              
 
                minibatch_avg_cost = Train_function(train_set_x,train_set_y, learning_rate)
                minibatch_avg_cost_total += minibatch_avg_cost
        
 
        test_losses = [Get_Error(
                              np.array(X_t[N_train_example  + i * batch_size:N_train_example + (i + 1) * batch_size].reshape(layer_input_size), dtype='float32')/np.array(X_t[N_train_example  + i * batch_size:N_train_example + (i + 1) * batch_size], dtype='float32').max(), 
                              np.array(y_t[N_train_example+ i * batch_size:N_train_example + (i + 1) * batch_size] , dtype='int32') 
                                     ) 
                              for i in xrange(n_test_batches)
                        ]
        test_NLL = [Get_NLL(
                              np.array(X_t[N_train_example  + i * batch_size:N_train_example + (i + 1) * batch_size].reshape(layer_input_size), dtype='float32')/np.array(X_t[N_train_example  + i * batch_size:N_train_example + (i + 1) * batch_size] , dtype='float32').max(), 
                              np.array(y_t[N_train_example+ i * batch_size:N_train_example + (i + 1) * batch_size] , dtype='int32') 
                                     ) 
                              for i in xrange(n_test_batches)
                        ]
	
        this_test_loss = np.mean(test_losses)
 
	
	#learning_rate *= 0.995
        """if learning_rate> 0.0005:
	            learning_rate *= 0.95
        else:
		    learning_rate = 0.0005"""
        Test_list_scores.append(float(this_test_loss))
        print '........................Test error:' ,this_test_loss
                
        train_loss = [Get_Error(
                              np.array(X_t[ i * batch_size: (i + 1) * batch_size].reshape(layer_input_size), dtype='float32')/np.array(X_t[ i * batch_size: (i + 1) * batch_size], dtype='float32').max(),  
                              np.array(y_t[   i * batch_size: (i + 1) * batch_size] , dtype='int32') 
                                     ) 
                              for i in xrange(n_train_batches)
                        ]
        train_NLL = [Get_NLL(
                              np.array(X_t[ i * batch_size: (i + 1) * batch_size].reshape(layer_input_size), dtype='float32')/np.array(X_t[ i * batch_size: (i + 1) * batch_size] , dtype='float32').max(),  
                              np.array(y_t[   i * batch_size: (i + 1) * batch_size] , dtype='int32') 
                                     ) 
                              for i in xrange(n_train_batches)
                        ]      
                    
        this_train_loss = np.mean(train_loss)
        
 

	  print this_train_loss
        print 'NLL ...... ' , np.mean(train_NLL ), np.mean(test_NLL)
        Train_list_scores.append(float(this_train_loss))
 



        if (epoch+1)%50 ==0 or epoch==70:
	      plt.figure(1)
	      plt.plot(np.arange(len(Test_list_scores)), np.array(Test_list_scores) , label= 'test error')
	      plt.hold(True)
	      plt.plot(np.arange(len(Train_list_scores)), np.array(Train_list_scores) , label= 'train error')
	      plt.legend(loc='upper left')
	      plt.hold(False)
	      plt.savefig('./MSE_test' +str(epoch)+'.png')
Esempio n. 45
0
    def build_model(self):

        print 'VGGNet_11 (shallow) 3/19'

        self.name = 'vggnet'

        # input shape in c01b
        self.channels = 3  # 'c' mean(R,G,B) = (103.939, 116.779, 123.68)
        self.input_width = self.config[
            'input_width']  # '0' single scale training 224
        self.input_height = self.config[
            'input_height']  # '1' single scale training 224
        self.batch_size = self.config['batch_size']  # 'b'
        b = self.batch_size

        # output dimension
        self.n_softmax_out = self.config['n_softmax_out']

        # start graph construction from scratch
        self.x = T.ftensor4('x')

        self.y = T.lvector('y')

        x_shuffled = self.x.dimshuffle(3, 0, 1, 2)  # c01b to bc01

        layers = []
        params = []
        weight_types = []  # for distinguishing w and b later

        # bc01 from now on

        conv_3x3 = Conv(
            input=x_shuffled,
            input_shape=(b, self.channels, self.input_width,
                         self.input_height),  # (b, 3, 224, 224)
            convstride=1,
            padsize=1,
            W=Normal((64, self.channels, 3, 3), std=0.3),  # bc01
            b=Constant((64, ), val=0.2),
            printinfo=self.verbose
            #output_shape = (b, 64, 224, 224)
        )

        pool_2x2 = Pool(
            input=conv_3x3,
            #input_shape=conv_3x3.output_shape, # (b, 64, 224, 224)
            poolsize=2,
            poolstride=2,
            poolpad=0,
            mode='max',
            printinfo=self.verbose
            #output_shape = (b, 64, 112, 112)
        )

        conv_3x3 = Conv(
            input=pool_2x2,
            #input_shape=pool_2x2.output_shape, # (b, 64, 112, 112)
            convstride=1,
            padsize=1,
            W=Normal((128, pool_2x2.output_shape[1], 3, 3), std=0.1),  # bc01
            b=Constant((128, ), val=0.02),
            printinfo=self.verbose
            #output_shape = (b, 128, 112, 112)
        )

        pool_2x2 = Pool(
            input=conv_3x3,
            #input_shape=conv_3x3.output_shape, # (b, 128, 112, 112)
            poolsize=2,
            poolstride=2,
            poolpad=0,
            mode='max',
            printinfo=self.verbose
            #output_shape = (b, 128, 56, 56)
        )

        conv_3x3 = Conv(
            input=pool_2x2,
            #input_shape=pool_2x2.output_shape, # (b, 128, 56, 56)
            convstride=1,
            padsize=1,
            W=Normal((256, pool_2x2.output_shape[1], 3, 3), std=0.05),  # bc01
            b=Constant((256, ), val=0.02),
            printinfo=self.verbose
            #output_shape = (b, 256, 56, 56)
        )
        conv_3x3 = Conv(
            input=conv_3x3,
            #input_shape=conv_3x3.output_shape, # (b, 256, 56, 56)
            convstride=1,
            padsize=1,
            W=Normal((256, conv_3x3.output_shape[1], 3, 3), std=0.05),  # bc01
            b=Constant((256, ), val=0.01),
            printinfo=self.verbose
            #output_shape = (b, 256, 56, 56)
        )

        pool_2x2 = Pool(
            input=conv_3x3,
            #input_shape=conv_3x3.output_shape, # (b, 256, 56, 56)
            poolsize=2,
            poolstride=2,
            poolpad=0,
            mode='max',
            printinfo=self.verbose
            #output_shape = (b, 256, 28, 28)
        )

        conv_3x3 = Conv(
            input=pool_2x2,
            #input_shape=pool_2x2.output_shape, # (b, 256, 28, 28)
            convstride=1,
            padsize=1,
            W=Normal((512, pool_2x2.output_shape[1], 3, 3), std=0.05),  # bc01
            b=Constant((512, ), val=0.02),
            printinfo=self.verbose
            #output_shape = (b, 512, 28, 28)
        )
        conv_3x3 = Conv(
            input=conv_3x3,
            #input_shape=conv_3x3.output_shape, # (b, 512, 28, 28)
            convstride=1,
            padsize=1,
            W=Normal((512, conv_3x3.output_shape[1], 3, 3), std=0.01),  # bc01
            b=Constant((512, ), val=0.01),
            printinfo=self.verbose
            #output_shape = (b, 512, 28, 28)
        )

        pool_2x2 = Pool(
            input=conv_3x3,
            #input_shape=conv_3x3.output_shape, # (b, 512, 28, 28)
            poolsize=2,
            poolstride=2,
            poolpad=0,
            mode='max',
            printinfo=self.verbose
            #output_shape = (b, 512, 14, 14)
        )

        conv_3x3 = Conv(
            input=pool_2x2,
            #input_shape=pool_2x2.output_shape, # (b, 512, 14, 14)
            convstride=1,
            padsize=1,
            W=Normal((512, pool_2x2.output_shape[1], 3, 3), std=0.005),  # bc01
            b=Constant((512, )),
            printinfo=self.verbose
            #output_shape = (b, 512, 14, 14)
        )
        conv_3x3 = Conv(
            input=conv_3x3,
            #input_shape=conv_3x3.output_shape, # (b, 512, 14, 14)
            convstride=1,
            padsize=1,
            W=Normal((512, conv_3x3.output_shape[1], 3, 3), std=0.005),  # bc01
            b=Constant((512, )),
            printinfo=self.verbose
            #output_shape = (b, 512, 14, 14)
        )

        pool_2x2 = Pool(
            input=conv_3x3,
            #input_shape=conv_3x3.output_shape, # (b, 512, 14, 14)
            poolsize=2,
            poolstride=2,
            poolpad=0,
            mode='max',
            printinfo=self.verbose
            #output_shape = (b, 512, 7, 7)
        )

        flatten = Flatten(
            input=pool_2x2,  #5
            #input_shape = pool_2x2.output_shape, # (b, 512, 7, 7)
            axis=2,  # expand dimensions after the first dimension
            printinfo=self.verbose
            #output_shape = (b, 25088)
        )
        fc_4096 = FC(input=flatten,
                     n_out=4096,
                     W=Normal((flatten.output_shape[1], 4096), std=0.001),
                     b=Constant((4096, ), val=0.01),
                     printinfo=self.verbose
                     #input_shape = flatten.output_shape # (b, 25088)
                     )
        dropout = Dropout(input=fc_4096,
                          n_out=fc_4096.output_shape[1],
                          prob_drop=0.5,
                          printinfo=self.verbose
                          #input_shape = fc_4096.output_shape # (b, 4096)
                          )
        fc_4096 = FC(input=dropout,
                     n_out=4096,
                     W=Normal((dropout.output_shape[1], 4096), std=0.005),
                     b=Constant((4096, ), val=0.01),
                     printinfo=self.verbose
                     #input_shape = dropout.output_shape # (b, 4096)
                     )
        dropout = Dropout(input=fc_4096,
                          n_out=fc_4096.output_shape[1],
                          prob_drop=0.5,
                          printinfo=self.verbose
                          #input_shape = fc_4096.output_shape # (b, 4096)
                          )
        softmax = Softmax(input=dropout,
                          n_out=self.n_softmax_out,
                          W=Normal(
                              (dropout.output_shape[1], self.n_softmax_out),
                              std=0.005),
                          b=Constant((self.n_softmax_out, ), val=0),
                          printinfo=self.verbose
                          #input_shape = dropout.output_shape # (b, 4096)
                          )

        self.output_layer = softmax

        self.output = self.output_layer.output

        self.layers = get_layers(lastlayer=self.output_layer)

        self.layers = [layer for layer in self.layers \
            if layer.name not in ['LRN\t','Pool\t','Flatten\t','Dropout'+ str(0.5)]]

        self.params, self.weight_types = get_params(self.layers)

        # training related
        self.base_lr = np.float32(self.config['learning_rate'])
        self.shared_lr = theano.shared(self.base_lr)
        self.step_idx = 0
        self.mu = self.config['momentum']  # def: 0.9 # momentum
        self.eta = self.config['weight_decay']  #0.0002 # weight decay

        self.shared_x = theano.shared(np.zeros(
            (3, self.input_width, self.input_height,
             self.config['file_batch_size']),
            dtype=theano.config.floatX),
                                      borrow=True)

        self.shared_y = theano.shared(np.zeros(
            (self.config['file_batch_size'], ), dtype=int),
                                      borrow=True)

        # shared variable for storing momentum before exchanging momentum(delta w)
        self.vels = [
            theano.shared(param_i.get_value() * 0.) for param_i in self.params
        ]

        # shared variable for accepting momentum during exchanging momentum(delta w)
        self.vels2 = [
            theano.shared(param_i.get_value() * 0.) for param_i in self.params
        ]

        self.train = None
        self.val = None
        self.inference = None
        self.get_vel = None
        self.descent_vel = None
 def setUp(self):
     self.input = tensor.ftensor4()
     self.filters = tensor.ftensor4()
     self.topgrad = tensor.ftensor4()
Esempio n. 47
0
    def __init__(self, dim_z, x_train, x_test, diff=None, magic=5000):
        ####################################### SETTINGS ###################################
        self.x_train = x_train
        self.x_test = x_test
        self.diff = diff
        self.batch_size = 100.
        self.learning_rate = theano.shared(np.float32(0.0008))
        self.momentum = 0.3
        self.performance = {"train": [], "test": []}
        self.inpt = T.ftensor4(name='input')
        self.df = T.fmatrix(name='differential')
        self.dim_z = dim_z
        self.generative_z = theano.shared(np.float32(np.zeros([1, dim_z])))
        self.activation = relu
        self.generative = False
        self.out_distribution = False
        #self.y = T.matrix(name="y")
        self.in_filters = [64, 64, 64]
        self.filter_lengths = [10., 10., 10.]
        self.params = []
        #magic = 73888.
        self.magic = magic

        self.dropout_symbolic = T.fscalar()
        self.dropout_prob = theano.shared(np.float32(0.0))
        ####################################### LAYERS ######################################
        # LAYER 1 ##############################
        self.conv1 = one_d_conv_layer(self.inpt,
                                      self.in_filters[0],
                                      1,
                                      self.filter_lengths[0],
                                      param_names=["W1", 'b1'])
        self.params += self.conv1.params
        self.bn1 = batchnorm(self.conv1.output)
        self.nl1 = self.activation(self.bn1.X)
        self.maxpool1 = ds.max_pool_2d(self.nl1, [3, 1],
                                       st=[2, 1],
                                       ignore_border=False).astype(
                                           theano.config.floatX)
        self.layer1_out = dropout(self.maxpool1, self.dropout_symbolic)
        #self.layer1_out = self.maxpool1
        # LAYER2 ################################
        self.flattened = T.flatten(self.layer1_out, outdim=2)
        # Variational Layer #####################
        self.latent_layer = variational_gauss_layer(self.flattened, self.magic,
                                                    dim_z)
        self.params += self.latent_layer.params
        self.latent_out = self.latent_layer.output
        # Hidden Layer #########################
        self.hidden_layer = hidden_layer(self.latent_out, dim_z, self.magic)
        self.params += self.hidden_layer.params
        self.hid_out = dropout(
            self.activation(self.hidden_layer.output).reshape(
                (self.inpt.shape[0], self.in_filters[-1],
                 int(self.magic / self.in_filters[-1]), 1)),
            self.dropout_symbolic)
        # Devonvolutional 1 ######################
        self.deconv1 = one_d_deconv_layer(self.hid_out,
                                          1,
                                          self.in_filters[2],
                                          self.filter_lengths[2],
                                          pool=2.,
                                          param_names=["W3", 'b3'],
                                          distribution=False)
        self.params += self.deconv1.params
        #self.nl_deconv1 = dropout(self.activation(self.deconv1.output),self.dropout_symbolic)
        self.tanh_out = self.deconv1.output
        self.last_layer = self.deconv1

        if self.out_distribution == True:
            self.trunk_sigma = self.last_layer.log_sigma[:, :, :self.inpt.
                                                         shape[2], :]
        self.trunc_output = self.tanh_out[:, :, :self.inpt.shape[2], :]

        ################################### FUNCTIONS ######################################################
        self.get_latent_states = theano.function(
            [self.inpt],
            self.latent_out,
            givens=[[self.dropout_symbolic, self.dropout_prob]])
        #self.prior_debug = theano.function([self.inpt],[self.latent_out,self.latent_layer.mu_encoder,self.latent_layer.log_sigma_encoder,self.latent_layer.prior])
        #self.get_prior = theano.function([self.inpt],self.latent_layer.prior)
        #self.convolve1 = theano.function([self.inpt],self.layer1_out)
        #self.convolve2 = theano.function([self.inpt],self.layer2_out)
        self.output = theano.function(
            [self.inpt],
            self.trunc_output,
            givens=[[self.dropout_symbolic, self.dropout_prob]])
        self.get_flattened = theano.function(
            [self.inpt],
            self.flattened,
            givens=[[self.dropout_symbolic, self.dropout_prob]])
        #self.deconvolve1 = theano.function([self.inpt],self.deconv1.output)
        #self.deconvolve2 = theano.function([self.inpt],self.deconv2.output)
        #self.sig_out = theano.function([self.inpt],T.flatten(self.trunk_sigma,outdim=2))
        self.output = theano.function(
            [self.inpt],
            self.trunc_output,
            givens=[[self.dropout_symbolic, self.dropout_prob]])
        #self.generate_from_z = theano.function([self.inpt],self.trunc_output,givens = [[self.latent_out,self.generative_z]])
        self.generate_from_z = theano.function(
            [self.inpt],
            self.trunc_output,
            givens=[[self.dropout_symbolic, self.dropout_prob],
                    [self.latent_out, self.generative_z]])

        self.cost = self.MSE()
        self.mse = self.MSE()
        #self.likelihood = self.log_px_z()
        #self.get_cost = theano.function([self.inpt],[self.cost,self.mse])

        #self.get_likelihood = theano.function([self.layer1.inpt],[self.likelihood])
        self.derivatives = T.grad(self.cost, self.params)
        #self.get_gradients = theano.function([self.inpt],self.derivatives)
        self.updates = adam(self.params, self.derivatives, self.learning_rate)
        #self.updates =momentum_update(self.params,self.derivatives,self.learning_rate,self.momentum)
        self.train_model = theano.function(
            inputs=[self.inpt, self.df],
            outputs=self.cost,
            updates=self.updates,
            givens=[[self.dropout_symbolic, self.dropout_prob]])
Esempio n. 48
0
def test_pooling():
    if not dnn.dnn_available():
        raise SkipTest(dnn.dnn_available.msg)

    x = T.ftensor4()
    for mode, pad in product(('max', 'average_inc_pad', 'average_exc_pad'),
                             ((0, 0), (1, 0), (1, 0), (2, 3), (3, 2))):
        if mode == 'max':
            func = T.max
        else:
            func = T.mean

        if pad != (0, 0) and func is T.mean:
            continue

        for ws in (4, 2, 5):
            for stride in (2, 3):
                if stride > ws:
                    continue
                if pad[0] > stride or pad[1] > stride:
                    # Not implemented
                    continue
                # We will check that the opt introduced it.
                out1 = max_pool_2d(x, (ws, ws),
                                   st=(stride, stride),
                                   ignore_border=True,
                                   padding=pad, mode=mode)
                out2 = pool_2d_i2n(x, ds=(ws, ws), strides=(stride, stride),
                                   pad=pad,
                                   pool_function=func)
                mode_without_gpu2 = mode_without_gpu.including()
                mode_without_gpu2.check_isfinite = False
                f1 = theano.function([x], out1, mode=mode_with_gpu)
                assert any([isinstance(node.op, dnn.GpuDnnPool)
                            for node in f1.maker.fgraph.apply_nodes])
                f2 = theano.function([x], out2, mode=mode_without_gpu2)
                assert not any([isinstance(node.op, dnn.GpuDnnPool)
                                for node in f2.maker.fgraph.apply_nodes])
                for shp in [(1, 10, 100, 100),
                            (1, 3, 99, 99),
                            (32, 1, 147, 197),
                            ]:
                    data = numpy.random.normal(0, 1, shp).astype("float32")
                    a = f1(data)
                    b = f2(data)

                    utt.assert_allclose(a, b)

        # Test the grad
        for shp in [(1, 1, 2, 2),
                    (1, 1, 3, 3)]:
            data = numpy.random.normal(0, 1, shp).astype("float32") * 10

            ws = 2
            stride = 2
            if pad[0] > stride or pad[1] > stride:
                # Not implemented
                continue

            # This test the CPU grad + opt + GPU implemtentation
            def fn(x):
                return max_pool_2d(x, (ws, ws), ignore_border=True,
                                   padding=pad, mode=mode)
            utt.verify_grad(fn, [data],
                            cast_to_output_type=False,
                            mode=mode_with_gpu)
            # Confirm that the opt would have inserted it.
            fg = theano.function([x], theano.grad(fn(x).sum(), x),
                                 mode=mode_with_gpu)
            assert any([isinstance(node.op, dnn.GpuDnnPoolGrad)
                        for node in fg.maker.fgraph.toposort()])

            # Test the GPU grad + GPU implementation
            def fn(x):
                dnn_op = dnn.dnn_pool(
                    x, ws=(ws, ws),
                    stride=(stride, stride),
                    pad=pad,
                    mode=mode)
                return dnn_op
            utt.verify_grad(fn, [data],
                            cast_to_output_type=False,
                            mode=mode_with_gpu)
            # Confirm that we get the good op.
            fg = theano.function([x], theano.grad(fn(x).sum(), x),
                                 mode=mode_with_gpu)
            assert any([isinstance(node.op, dnn.GpuDnnPoolGrad)
                        for node in fg.maker.fgraph.toposort()])
            g_out = fg(data)

            # Compare against the CPU result
            out = max_pool_2d(x, (ws, ws),
                              padding=pad,
                              ignore_border=True, mode=mode)
            fc = theano.function([x], theano.grad(out.sum(), x),
                                 mode=mode_without_gpu)
            if mode == 'max':
                assert any([isinstance(node.op, MaxPoolGrad)
                            for node in fc.maker.fgraph.toposort()])
            else:
                assert any([isinstance(node.op, AveragePoolGrad)
                            for node in fc.maker.fgraph.toposort()])
            c_out = fc(data)
            utt.assert_allclose(c_out, g_out)
Esempio n. 49
0
def build_model_L(in_channel=3, out_channel=3, kernel_size=(3,3), stride=(1,1), pad='valid', dilation=(1,1), num_groups=1):
    input_var = tensor.ftensor4('x')  # (B, C, H, W)
    input0 = InputLayer(shape=(None, in_channel, None, None), input_var=input_var, name='input0')
    tconv0  = TransposedConv2DLayer(input0, num_filters=out_channel, filter_size=kernel_size, stride=stride, crop=pad, nonlinearity=LACT.linear,
                        name='tconv0')
    return tconv0
Esempio n. 50
0
# Load MNIST
# ntrain = # of samples in randomly chosen subset
# This is to reproduce Fig. 5 in the paper
#--------------------------

parser = argparse.ArgumentParser()
parser.add_argument('--ntrain', nargs=1, type=int)
parser.add_argument('--epochs', nargs=1, type=float)
args = parser.parse_args()

trX, teX, trY, teY = mnist(args.ntrain, onehot=True)

trX = trX.reshape(-1, 1, 28, 28)
teX = teX.reshape(-1, 1, 28, 28)

X = T.ftensor4()
Y = T.fmatrix()
lr = T.scalar()
epochs = T.scalar()

#-------------------------
# Init Basis and Alphas
#-------------------------

bases_L1 = 10
sigma_L1 = 1.5
bases_L2 = 6
sigma_L2 = 1
bases_L3 = 6
sigma_L3 = 1
Esempio n. 51
0
 def test_conv_gradw(self, border_mode, conv_mode):
     self._test_conv_gradw(T.ftensor4('img'), T.ftensor4('kerns'),
                           T.ftensor4('out'), numpy.random.rand(2, 5, 6, 8),
                           numpy.random.rand(2, 1, 5, 6), border_mode,
                           conv_mode, (1, 1))
Esempio n. 52
0
    def __init__(self,
                 K,
                 conv_layer_sizes,
                 hidden_layer_sizes,
                 gamma,
                 max_experiences=500000,
                 min_experiences=50000,
                 batch_sz=32):
        self.K = K
        lr = np.float32(2.5e-4)
        mu = np.float32(0)
        decay = np.float32(0.99)

        # inputs and targets
        X = T.ftensor4('X')
        G = T.fvector('G')
        actions = T.ivector('actions')

        # create the graph
        self.conv_layers = []
        num_input_filters = 4  # number of filters / color channels
        for num_output_filters, filtersz, stride in conv_layer_sizes:
            layer = ConvLayer(num_input_filters, num_output_filters, filtersz,
                              stride)
            self.conv_layers.append(layer)
            num_input_filters = num_output_filters

        # get conv output size
        Z = X / 255.0
        for layer in self.conv_layers:
            Z = layer.forward(Z)
        conv_out = Z.flatten(ndim=2)
        conv_out_op = theano.function(inputs=[X],
                                      outputs=conv_out,
                                      allow_input_downcast=True)
        test = conv_out_op(np.random.randn(1, 4, IM_HEIGHT, IM_WIDTH))
        flattened_ouput_size = test.shape[1]

        # print("test.shape:", test.shape)
        # print("flattened_ouput_size:", flattened_ouput_size)

        # build fully connected layers
        self.layers = []
        M1 = flattened_ouput_size
        for M2 in hidden_layer_sizes:
            layer = HiddenLayer(M1, M2)
            self.layers.append(layer)
            M1 = M2

        # final layer
        layer = HiddenLayer(M1, K, lambda x: x)
        self.layers.append(layer)

        # collect params for copy
        self.params = []
        for layer in (self.conv_layers + self.layers):
            self.params += layer.params
        caches = [
            theano.shared(np.ones_like(p.get_value()) * 0.1)
            for p in self.params
        ]
        velocities = [theano.shared(p.get_value() * 0) for p in self.params]

        # calculate final output and cost
        Z = conv_out
        for layer in self.layers:
            Z = layer.forward(Z)
        Y_hat = Z

        selected_action_values = Y_hat[T.arange(actions.shape[0]), actions]
        cost = T.mean((G - selected_action_values)**2)

        # create train function
        grads = T.grad(cost, self.params)
        g_update = [(p, p + v)
                    for p, v, g in zip(self.params, velocities, grads)]
        c_update = [(c, decay * c + (np.float32(1) - decay) * g * g)
                    for c, g in zip(caches, grads)]
        v_update = [(v, mu * v - lr * g / T.sqrt(c))
                    for v, c, g in zip(velocities, caches, grads)]
        # v_update = [(v, mu*v - lr*g) for v, g in zip(velocities, grads)]
        # c_update = []
        updates = c_update + g_update + v_update

        # compile functions
        self.train_op = theano.function(inputs=[X, G, actions],
                                        updates=updates,
                                        allow_input_downcast=True)
        self.predict_op = theano.function(inputs=[X],
                                          outputs=Y_hat,
                                          allow_input_downcast=True)

        # create replay memory
        self.experience = []
        self.max_experiences = max_experiences
        self.min_experiences = min_experiences
        self.batch_sz = batch_sz
        self.gamma = gamma
Esempio n. 53
0
test_Tstamp = 1

pat = readPatMS.new(1,1)
num_patches = np.shape(image.extract_patches_2d(pat.data[0,20:160,17:192,0],patch_size))[0]

train_patches = np.zeros([num_patches, num_channels, patch_size[0], patch_size[1]])
trpatches_truth = np.zeros([num_patches])

shared_data = theano.shared(numpy.asarray(train_patches,dtype = theano.config.floatX),borrow = True)
shared_truth = theano.shared(numpy.asarray(trpatches_truth,dtype = 'int32'),borrow = True)

rng = numpy.random.RandomState(23455)

#Define Theano Tensors
nz = T.lscalar()
x = T.ftensor4('x')  
y = T.ivector('y')  

######################
# BUILD ACTUAL MODEL #
######################
print '... building the model'

layer0input = x.dimshuffle(1,0,2,3)
layer0 = ConvPoolLayer(
    rng,
    input=layer0input,
    image_shape=(num_patches, num_channels, 19, 19),
    filter_shape=(nkerns[0], num_channels, 5, 5),
    poolsize=(2, 2)
)
Esempio n. 54
0
def test_pooling():
    if not cuda.dnn.dnn_available():
        raise SkipTest(cuda.dnn.dnn_available.msg)

    x = T.ftensor4()

    for func in (T.max, T.mean):
        for ws in (2, 4, 5):
            for stride in (2, 3):
                if stride > ws:
                    continue
                if ws == stride and func is T.max:
                    # We will check that the opt introduced it.
                    out1 = max_pool_2d(x, (ws, ws), ignore_border=True)
                else:
                    out1 = cuda.dnn.dnn_pool(
                        x,
                        ws=(ws, ws),
                        stride=(stride, stride),
                        mode='max' if func is T.max else "average")
                out2 = pool_2d_i2n(x,
                                   ds=(ws, ws),
                                   strides=(stride, stride),
                                   pool_function=func)

                f1 = theano.function([x], out1, mode=mode_with_gpu)
                assert any([
                    isinstance(node.op, cuda.dnn.GpuDnnPool)
                    for node in f1.maker.fgraph.apply_nodes
                ])
                f2 = theano.function([x], out2, mode=mode_with_gpu)
                assert not any([
                    isinstance(node.op, cuda.dnn.GpuDnnPool)
                    for node in f2.maker.fgraph.apply_nodes
                ])
                for shp in [
                    (1, 10, 100, 100),
                    (1, 3, 99, 99),
                    (32, 1, 147, 197),
                ]:
                    data = numpy.random.normal(0, 1, shp).astype("float32")
                    a = f1(data).__array__()

                    b = f2(data).__array__()
                    assert numpy.allclose(a,
                                          b,
                                          atol=numpy.finfo(numpy.float32).eps)

        # Test the grad
        for shp in [(1, 1, 2, 2), (1, 1, 3, 3)]:
            data = numpy.random.normal(0, 1, shp).astype("float32") * 10

            ws = 2
            strides = 2

            # This test the CPU grad + opt + GPU implemtentation
            def fn(x):
                return max_pool_2d(x, (ws, ws), ignore_border=True)

            theano.tests.unittest_tools.verify_grad(fn, [data],
                                                    cast_to_output_type=False,
                                                    mode=mode_with_gpu)
            # Confirm that the opt would have inserted it.
            f = theano.function([x],
                                theano.grad(fn(x).sum(), x),
                                mode=mode_with_gpu)
            assert any([
                isinstance(node.op, cuda.dnn.GpuDnnPoolGrad)
                for node in f.maker.fgraph.toposort()
            ])

            # Test the GPU grad + GPU implementation
            def fn(x):
                dnn_op = cuda.dnn.dnn_pool(
                    x,
                    ws=(ws, ws),
                    stride=(stride, stride),
                    mode='max' if func is T.max else "average")
                return dnn_op

            theano.tests.unittest_tools.verify_grad(fn, [data],
                                                    cast_to_output_type=False,
                                                    mode=mode_with_gpu)
            # Confirm that we get the good op.
            f = theano.function([x],
                                theano.grad(fn(x).sum(), x),
                                mode=mode_with_gpu)
            assert any([
                isinstance(node.op, cuda.dnn.GpuDnnPoolGrad)
                for node in f.maker.fgraph.toposort()
            ])
            g_out = f(data)

            if func is T.max:
                # Compare again the CPU result
                out = max_pool_2d(x, (ws, ws), ignore_border=True)
                f = theano.function([x],
                                    theano.grad(out.sum(), x),
                                    mode=mode_without_gpu)
                assert any([
                    isinstance(node.op, DownsampleFactorMaxGrad)
                    for node in f.maker.fgraph.toposort()
                ])
                c_out = f(data)
                assert numpy.allclose(c_out, g_out)
Esempio n. 55
0
import theano
from confusionmatrix import ConfusionMatrix
from lasagne.objectives import *
from lasagne.updates import *
import theano.tensor as T
from theano.tensor import *
from theano.tensor.signal import pool
import lasagne
import numpy as np
import DP1 as DP
from theano.tensor import nnet
import lasagne.layers.dnn
dtensor5 = TensorType('float32', (False, ) * 5)

input_var = T.ftensor4('XY')
target_var = T.ivector('Y_train')
x1 = T.matrix('x1')
PS = 15
P2 = 3
# Build Neural Network:
# Conv Net XY Plane
input = lasagne.layers.InputLayer((None, 15, PS, PS), input_var=input_var)

l_conv_1 = lasagne.layers.dnn.Conv2DDNNLayer(input, 20, (3, 3))

l_maxpool_1 = lasagne.layers.dnn.Pool2DDNNLayer(l_conv_1, (2, 2))

l_conv_2 = lasagne.layers.dnn.Conv2DDNNLayer(l_maxpool_1, 20, (3, 3))

l_conv_3 = lasagne.layers.dnn.Conv2DDNNLayer(l_conv_2, 20, (3, 3))
Esempio n. 56
0
    def __init__(self, config):

        self.config = config
        self.verbose = self.config['verbose']
        self.name = 'alexnet'
        batch_size = config['batch_size']
        flag_datalayer = config['use_data_layer']
        lib_conv = config['lib_conv']
        n_softmax_out=config['n_softmax_out']
        # ##################### BUILD NETWORK ##########################
        # allocate symbolic variables for the data
        # 'rand' is a random array used for random cropping/mirroring of data
        x = T.ftensor4('x')
        y = T.lvector('y')
        rand = T.fvector('rand')
        lr = T.scalar('lr')

        if self.verbose: print 'AlexNet 2/16'
        self.layers = []
        params = []
        weight_types = []

        if flag_datalayer:
            data_layer = DataLayer(input=x, image_shape=(3, 256, 256,
                                                         batch_size),
                                   cropsize=227, rand=rand, mirror=True,
                                   flag_rand=config['rand_crop'])

            layer1_input = data_layer.output
        else:
            layer1_input = x

        convpool_layer1 = ConvPoolLayer(input=layer1_input,
                                        image_shape=(3, 227, 227, batch_size),
                                        filter_shape=(3, 11, 11, 96),
                                        convstride=4, padsize=0, group=1,
                                        poolsize=3, poolstride=2,
                                        bias_init=0.0, lrn=True,
                                        lib_conv=lib_conv,
                                        verbose = self.verbose
                                        )
        self.layers.append(convpool_layer1)
        params += convpool_layer1.params
        weight_types += convpool_layer1.weight_type

        convpool_layer2 = ConvPoolLayer(input=convpool_layer1.output,
                                        image_shape=(96, 27, 27, batch_size),
                                        filter_shape=(96, 5, 5, 256),
                                        convstride=1, padsize=2, group=2,
                                        poolsize=3, poolstride=2,
                                        bias_init=0.1, lrn=True,
                                        lib_conv=lib_conv,
                                        verbose = self.verbose
                                        )
        self.layers.append(convpool_layer2)
        params += convpool_layer2.params
        weight_types += convpool_layer2.weight_type

        convpool_layer3 = ConvPoolLayer(input=convpool_layer2.output,
                                        image_shape=(256, 13, 13, batch_size),
                                        filter_shape=(256, 3, 3, 384),
                                        convstride=1, padsize=1, group=1,
                                        poolsize=1, poolstride=0,
                                        bias_init=0.0, lrn=False,
                                        lib_conv=lib_conv,
                                        verbose = self.verbose
                                        )
        self.layers.append(convpool_layer3)
        params += convpool_layer3.params
        weight_types += convpool_layer3.weight_type

        convpool_layer4 = ConvPoolLayer(input=convpool_layer3.output,
                                        image_shape=(384, 13, 13, batch_size),
                                        filter_shape=(384, 3, 3, 384),
                                        convstride=1, padsize=1, group=2,
                                        poolsize=1, poolstride=0,
                                        bias_init=0.1, lrn=False,
                                        lib_conv=lib_conv,
                                        verbose = self.verbose
                                        )
        self.layers.append(convpool_layer4)
        params += convpool_layer4.params
        weight_types += convpool_layer4.weight_type

        convpool_layer5 = ConvPoolLayer(input=convpool_layer4.output,
                                        image_shape=(384, 13, 13, batch_size),
                                        filter_shape=(384, 3, 3, 256),
                                        convstride=1, padsize=1, group=2,
                                        poolsize=3, poolstride=2,
                                        bias_init=0.0, lrn=False,
                                        lib_conv=lib_conv,
                                        verbose = self.verbose
                                        )
        self.layers.append(convpool_layer5)
        params += convpool_layer5.params
        weight_types += convpool_layer5.weight_type

        fc_layer6_input = T.flatten(
            convpool_layer5.output.dimshuffle(3, 0, 1, 2), 2)
        fc_layer6 = FCLayer(input=fc_layer6_input, 
                            n_in=9216,
                            n_out=4096,
                            verbose = self.verbose
                            )
        self.layers.append(fc_layer6)
        params += fc_layer6.params
        weight_types += fc_layer6.weight_type

        dropout_layer6 = DropoutLayer(fc_layer6.output, 
                                      n_in=4096, 
                                      n_out=4096, 
                                      verbose = self.verbose)

        fc_layer7 = FCLayer(input=dropout_layer6.output, 
                            n_in=4096, 
                            n_out=4096,
                            verbose = self.verbose
                            )
        self.layers.append(fc_layer7)
        params += fc_layer7.params
        weight_types += fc_layer7.weight_type

        dropout_layer7 = DropoutLayer(fc_layer7.output, 
                                      n_in=4096, 
                                      n_out=4096,
                                      verbose = self.verbose)

        softmax_layer8 = SoftmaxLayer(input=dropout_layer7.output, 
                                      n_in=4096, 
                                      n_out=n_softmax_out,
                                      verbose = self.verbose)
        self.layers.append(softmax_layer8)
        params += softmax_layer8.params
        weight_types += softmax_layer8.weight_type

        # #################### NETWORK BUILT #######################
        self.p_y_given_x = softmax_layer8.p_y_given_x
        self.y_pred = softmax_layer8.y_pred
        
        self.output = self.p_y_given_x
        
        
        self.cost = softmax_layer8.negative_log_likelihood(y)
        self.error = softmax_layer8.errors(y)
        if n_softmax_out < 5:        
            self.error_top_5 = softmax_layer8.errors_top_x(y, n_softmax_out)
        else:        
            self.error_top_5 = softmax_layer8.errors_top_x(y, 5)       
        self.params = params
        
        # inputs
        self.x = x
        self.y = y
        self.rand = rand
        self.lr = lr
        self.shared_x = theano.shared(np.zeros((3, config['input_width'], 
                                                  config['input_height'], 
                                                  config['file_batch_size']), # for loading large batch
                                                  dtype=theano.config.floatX),  
                                                  borrow=True)
                                              
        self.shared_y = theano.shared(np.zeros((config['file_batch_size'],), 
                                          dtype=int),   borrow=True)
        self.shared_lr = theano.shared(np.float32(config['learning_rate']))
        
        # training related
        self.base_lr = np.float32(config['learning_rate'])
        self.step_idx = 0
        self.mu = config['momentum'] # def: 0.9 # momentum
        self.eta = config['weight_decay'] #0.0002 # weight decay
        self.weight_types = weight_types
        self.batch_size = batch_size

                                          
        self.grads = T.grad(self.cost,self.params)
        
        subb_ind = T.iscalar('subb')  # sub batch index
        #print self.shared_x[:,:,:,subb_ind*self.batch_size:(subb_ind+1)*self.batch_size].shape.eval()
        self.subb_ind = subb_ind
        self.shared_x_slice = self.shared_x[:,:,:,subb_ind*self.batch_size:(subb_ind+1)*self.batch_size]
        self.shared_y_slice = self.shared_y[subb_ind*self.batch_size:(subb_ind+1)*self.batch_size]
Esempio n. 57
0
def main():
    # load the training and validation data sets
    train_X, test_X, train_y, test_y = load_data_cv('data/train.csv')

    X = T.ftensor4()
    Y = T.fmatrix()

    # set up theano functions to generate output by feeding data through network
    output_layer = lasagne_model()
    output_train = lasagne.layers.get_output(output_layer, X)
    output_valid = lasagne.layers.get_output(output_layer,
                                             X,
                                             deterministic=True)

    # set up the loss that we aim to minimize
    loss_train = T.mean(T.nnet.categorical_crossentropy(output_train, Y))
    loss_valid = T.mean(T.nnet.categorical_crossentropy(output_valid, Y))

    # prediction functions for classifications
    pred = T.argmax(output_train, axis=1)
    pred_valid = T.argmax(output_valid, axis=1)

    # get parameters from network and set up sgd with nesterov momentum to update parameters
    params = lasagne.layers.get_all_params(output_layer)
    updates = nesterov_momentum(loss_train,
                                params,
                                learning_rate=0.003,
                                momentum=0.9)

    # set up training and prediction functions
    train = theano.function(inputs=[X, Y],
                            outputs=loss_train,
                            updates=updates,
                            allow_input_downcast=True)
    valid = theano.function(inputs=[X, Y],
                            outputs=loss_valid,
                            allow_input_downcast=True)
    predict_valid = theano.function(inputs=[X],
                                    outputs=pred_valid,
                                    allow_input_downcast=True)

    # loop over training functions for however many iterations, print information while training
    train_eval = []
    valid_eval = []
    valid_acc = []
    try:
        for i in range(45):
            train_loss = batch_iterator(train_X, train_y, BATCHSIZE, train)
            train_eval.append(train_loss)
            valid_loss = valid(test_X, test_y)
            valid_eval.append(valid_loss)
            acc = np.mean(np.argmax(test_y, axis=1) == predict_valid(test_X))
            valid_acc.append(acc)
            print 'iter:', i, '| Tloss:', train_loss, '| Vloss:', valid_loss, '| valid acc:', acc

    except KeyboardInterrupt:
        pass

    # save weights
    all_params = helper.get_all_param_values(output_layer)
    f = gzip.open('data/weights.pklz', 'wb')
    pickle.dump(all_params, f)
    f.close()

    # plot loss and accuracy
    train_eval = np.array(train_eval)
    valid_eval = np.array(valid_eval)
    valid_acc = np.array(valid_acc)
    sns.set_style("whitegrid")
    pyplot.plot(train_eval, linewidth=3, label='train loss')
    pyplot.plot(valid_eval, linewidth=3, label='valid loss')
    pyplot.legend(loc=2)
    pyplot.twinx()
    pyplot.plot(valid_acc, linewidth=3, label='valid accuracy', color='r')
    pyplot.grid()
    pyplot.ylim([.9, 1])
    pyplot.legend(loc=1)
    pyplot.savefig('data/training_plot.png')
Esempio n. 58
0
    def setUp(self):
        self.input = tensor.ftensor4()
        self.filters = tensor.ftensor4()
        self.topgrad = tensor.ftensor4()

        self.constant_tensor = numpy.zeros((3, 5, 7, 11), dtype='float32')
Esempio n. 59
0
# gpu id
gpu_id = 1

# create the mem recorder object
mem_recorder = gpu_memory_recorder(gpu_id=gpu_id,
                                   process_id=current_process_id,
                                   log_dir=log_dir,
                                   log_filename=mem_usage_filename,
                                   recording_interval=interval)

# start recording
mem_recorder.start_recording()


# write some theano code
x = T.ftensor4()
y = T.ftensor4()

z = 2 * x + y

f = theano.function([x, y], z.mean())

# do some computation
for i in xrange(100):
    a = np.random.sample((i, 100, 100, 100)).astype('float32')
    b = np.random.sample((i, 100, 100, 100)).astype('float32')

    c = f(a, b)

    # we can generate the chart at any point after we started recording.
    # the 50 means it will use only the last 50 data points when
    def initializeModel(self):
        '''
            define your deep learning model
        '''
        print 'defining model'

        X = T.ftensor4()
        Y = T.fmatrix()

        #initialize your weghts, kernels
        # format n kernels, n channels, kernel_w x kernel_h
        # 20 kernels on gray scale image with 5 x 5 sized kernel
        w1 = self.init_weights((20, 3, 5, 5),
                               weightType='Xavier',
                               caffeLayerName='conv1')

        # 50 20-channel 5 x 5 sized kernel
        w2 = self.init_weights((50, 20, 5, 5),
                               weightType='Xavier',
                               caffeLayerName='conv2')

        # flatten the inputs and pass to fully connected layer
        w4 = self.init_weights((7200, 1000), weightType='Xavier')

        # flatten the inputs and pass to fully connected layer
        w5 = self.init_weights((1000, 500), weightType='Xavier')

        # flatten the inputs and pass to fully connected layer
        w_output = self.init_weights((500, 2), weightType='Xavier')

        # define your deep model
        if (self.dropout_params == None):
            # if there is no default dropout params mentioned, just set them manually
            self.dropout_params = {}
            self.dropout_params['conv'] = 0.1
            self.dropout_params['fc'] = 0.2
        print 'initializing with dropout_params: ', self.dropout_params[
            'conv'], self.dropout_params['fc']
        noise_l1, noise_l2, noise_l3, noise_l4, noise_l5, noise_py_x, convOut1 = self.model(
            X,
            w1,
            w2,
            w4,
            w5,
            w_output,
            p_drop_conv=self.dropout_params['conv'],
            p_drop_hidden=self.dropout_params['fc'])

        # get your label from the predicted probabilties
        y_x = T.argmax(noise_py_x, axis=1)
        # y_x = noise_py_x >= 0.5

        self.learning_rate = 0.0001

        self.params = [w1, w2, w4, w5, w_output]

        L1_norm = self.getL1Norm(self.params)
        L2_norm = self.getL2Norm(self.params)

        # pd = np.array(self.params)
        # mean cross entropy with L2 regularization
        self.cost = T.mean(T.nnet.categorical_crossentropy(noise_py_x, Y))

        self.paramUpdates = self.RMSprop(self.cost,
                                         self.params,
                                         lr=self.learning_rate)
        #self.paramUpdates = self.MomentumOptimizer(self.cost, self.params, lr = self.learning_rate)

        if (self.modelToLoad != None):
            self.loadThisModel(self.modelToLoad)

        # self.cost = T.mean((T.nnet.binary_crossentropy(noise_py_x, Y)))

        print 'compiling functions'
        print 'current learning rate: ', self.learning_rate
        start_compilation_time = time.clock()
        if (self.mode == "Train"):
            print 'compiling train function startin at ', strftime(
                "%Y-%m-%d %H:%M:%S")
            self.train = theano.function(inputs=[X, Y],
                                         outputs=self.cost,
                                         updates=self.paramUpdates,
                                         allow_input_downcast=True)
        print 'compiling predict function'
        self.predict = theano.function(inputs=[X],
                                       outputs=y_x,
                                       allow_input_downcast=True)
        print 'compiling predictProb function'
        self.predictProb = theano.function(inputs=[X],
                                           outputs=noise_py_x,
                                           allow_input_downcast=True)
        end_compilation_time = time.clock()
        self.getFirstLayerOutput = theano.function(inputs=[X],
                                                   outputs=convOut1)
        print 'compiled the functions, ended at ', strftime(
            "%Y-%m-%d %H:%M:%S")
        print 'time takent compile the functions: ', end_compilation_time - start_compilation_time