Beispiel #1
0
def relevance_conv_z_plus(out_relevances, inputs, weights, bias=None):
    if bias is not None:
        log.warning("Bias not respected for conv z_plus")
    # hack for negative inputs
    # inputs = T.abs_(inputs)
    weights_plus = weights * T.gt(weights, 0)
    norms_for_relevances = conv2d(inputs, weights_plus)
    # prevent division by 0...
    # adds 1 to every entry that is 0 -> sets 0s to 1
    relevances_are_0 = T.eq(norms_for_relevances, 0)
    norms_for_relevances += relevances_are_0 * 1

    normed_relevances = out_relevances / norms_for_relevances
    # upconv
    in_relevances = conv2d(normed_relevances, weights_plus.dimshuffle(1, 0, 2, 3)[:, :, ::-1, ::-1], border_mode="full")

    in_relevances_proper = in_relevances * inputs

    # Correct for those parts where all inputs of a relevance were
    # zero, spread relevance equally them
    pool_ones = T.ones(weights_plus.shape, dtype=np.float32)
    # mean across channel, 0, 1 dims (hope this is correct?)
    pool_fractions = pool_ones / T.prod(weights_plus.shape[1:]).astype(theano.config.floatX)
    in_relevances_from_0 = conv2d(
        out_relevances * relevances_are_0, pool_fractions.dimshuffle(1, 0, 2, 3), subsample=(1, 1), border_mode="full"
    )

    in_relevances_proper += in_relevances_from_0

    return in_relevances_proper
Beispiel #2
0
    def lecun_lcn(self, X, kernel_size=7, threshold = 1e-4, use_divisor=False):
        """
        Yann LeCun's local contrast normalization
        Orginal code in Theano by: Guillaume Desjardins
        """

        filter_shape = (1, 1, kernel_size, kernel_size)
        filters = gaussian_filter(kernel_size).reshape(filter_shape)
        filters = shared(_asarray(filters, dtype=floatX), borrow=True)

        convout = conv2d(X, filters=filters, filter_shape=filter_shape, 
                            border_mode='full')

        # For each pixel, remove mean of kernel_sizexkernel_size neighborhood
        mid = int(floor(kernel_size/2.))
        new_X = X - convout[:,:,mid:-mid,mid:-mid]

        if use_divisor:
            # Scale down norm of kernel_sizexkernel_size patch
            sum_sqr_XX = conv2d(T.sqr(T.abs_(X)), filters=filters, 
                                filter_shape=filter_shape, border_mode='full')

            denom = T.sqrt(sum_sqr_XX[:,:,mid:-mid,mid:-mid])
            per_img_mean = denom.mean(axis=[2,3])
            divisor = T.largest(per_img_mean.dimshuffle(0,1,'x','x'), denom)
            divisor = T.maximum(divisor, threshold)

            new_X /= divisor

        return new_X#T.cast(new_X, floatX)
Beispiel #3
0
def relevance_conv_a_b_abs(inputs, weights, out_relevances, a, b, bias=None):
    assert a is not None
    assert b is not None
    assert a - b == 1
    weights_plus = weights * T.gt(weights, 0)
    weights_neg = weights * T.lt(weights, 0)

    plus_norm = conv2d(T.abs_(inputs), weights_plus)
    # stabilize, prevent division by 0
    eps = 1e-4
    plus_norm += T.eq(plus_norm, 0) * eps
    plus_rel_normed = out_relevances / plus_norm
    in_rel_plus = conv2d(plus_rel_normed, weights_plus.dimshuffle(1, 0, 2, 3)[:, :, ::-1, ::-1], border_mode="full")
    in_rel_plus *= T.abs_(inputs)

    # minuses to get positive outputs, since will be subtracted
    # at end of function
    neg_norm = -conv2d(T.abs_(inputs), weights_neg)
    neg_norm += T.eq(neg_norm, 0) * eps
    neg_rel_normed = out_relevances / neg_norm
    in_rel_neg = -conv2d(neg_rel_normed, weights_neg.dimshuffle(1, 0, 2, 3)[:, :, ::-1, ::-1], border_mode="full")
    in_rel_neg *= T.abs_(inputs)

    in_relevance = a * in_rel_plus - b * in_rel_neg
    return in_relevance
def convolutional_model(X, w_1, w_2, w_3, w_4, w_5, w_6, p_1, p_2, p_3, p_4, p_5):
    l1 = dropout(T.tanh( max_pool_2d(T.maximum(conv2d(X, w_1, border_mode='full'),0.), (2, 2),ignore_border=True) + b_1.dimshuffle('x', 0, 'x', 'x') ), p_1)
    l2 = dropout(T.tanh( max_pool_2d(T.maximum(conv2d(l1, w_2), 0.), (2, 2),ignore_border=True) + b_2.dimshuffle('x', 0, 'x', 'x') ), p_2)
    l3 = dropout(T.flatten(T.tanh( max_pool_2d(T.maximum(conv2d(l2, w_3), 0.), (2, 2),ignore_border=True) + b_3.dimshuffle('x', 0, 'x', 'x') ), outdim=2), p_3)# flatten to switch back to 1d layers
    l4 = dropout(T.maximum(T.dot(l3, w_4), 0.), p_4)
    l5 = dropout(T.maximum(T.dot(l4, w_5), 0.), p_5)
    return T.dot(l5, w_6)
Beispiel #5
0
def _backward_negative_z(inputs, weights, normed_relevances, bias=None):
    inputs_plus = inputs * T.gt(inputs, 0)
    weights_plus = weights * T.gt(weights, 0)
    inputs_minus = inputs * T.lt(inputs, 0)
    weights_minus = weights * T.lt(weights, 0)
    # Compute weights+ * inputs- and weights- * inputs+
    negative_part_a = conv2d(
        normed_relevances, weights_plus.dimshuffle(1, 0, 2, 3)[:, :, ::-1, ::-1], border_mode="full"
    )
    negative_part_a *= inputs_minus
    negative_part_b = conv2d(
        normed_relevances, weights_minus.dimshuffle(1, 0, 2, 3)[:, :, ::-1, ::-1], border_mode="full"
    )
    negative_part_b *= inputs_plus

    together = negative_part_a + negative_part_b
    if bias is not None:
        bias_negative = bias * T.lt(bias, 0)
        bias_relevance = bias_negative.dimshuffle("x", 0, "x", "x") * normed_relevances
        # Divide bias by weight size before convolving back
        # mean across channel, 0, 1 dims (hope this is correct?)
        fraction_bias = bias_relevance / T.prod(weights.shape[1:]).astype(theano.config.floatX)
        bias_rel_in = conv2d(
            fraction_bias, T.ones_like(weights).dimshuffle(1, 0, 2, 3)[:, :, ::-1, ::-1], border_mode="full"
        )
        together += bias_rel_in
    return together
Beispiel #6
0
def theano_kernel_derivative(imshp,kshp,featshp,stride=1):

    features = T.tensor4(dtype=theano.config.floatX)
    kernel = T.tensor4(dtype=theano.config.floatX)
    image = T.tensor4(dtype=theano.config.floatX)

    # Need to transpose first two dimensions of kernel, and reverse index kernel image dims (for correlation)
    kernel_rotated = T.transpose(kernel[:,:,::-1,::-1],axes=[1,0,2,3])

    featshp_logical = (featshp[0],featshp[1],featshp[2]*stride,featshp[3]*stride)
    kshp_rotated = (kshp[1], kshp[0], kshp[2], kshp[3])
    image_estimate = conv2d(features,kernel_rotated,border_mode='full',
                            image_shape=featshp,filter_shape=kshp_rotated,
                            imshp_logical=featshp_logical[1:],kshp_logical=kshp[2:])

    image_error = image - image_estimate

    image_error_rot = T.transpose(image_error,[1,0,2,3])[:,:,::-1,::-1]
    imshp_rot = (imshp[1],imshp[0],imshp[2],imshp[3])
    featshp_rot = (featshp[1],featshp[0],featshp[2],featshp[3])
    features_rot = T.transpose(features,[1,0,2,3])

    featshp_rot_logical = (featshp_rot[0],featshp_rot[1],featshp_rot[2]*stride,featshp_rot[3]*stride)
    kernel_grad_rot = -conv2d(image_error_rot,features_rot,
                              image_shape=imshp_rot,filter_shape=featshp_rot,
                              imshp_logical=imshp_rot[1:],kshp_logical=featshp_rot_logical[2:])
    kernel_grad = T.transpose(kernel_grad_rot,[1,0,2,3])

    return function(inputs=[image,features,kernel],outputs=kernel_grad)
Beispiel #7
0
def model(X, params, featMaps, pieces, pDropConv, pDropHidden):
    lnum = 0  # conv: (32, 32) pool: (16, 16)
    layer = conv2d(X, params[lnum][0], border_mode='half') + \
            params[lnum][1].dimshuffle('x', 0, 'x', 'x')
    layer = maxout(layer, featMaps[lnum], pieces[lnum])
    layer = pool_2d(layer, (2, 2), st=(2, 2), ignore_border=False, mode='max')
    layer = basicUtils.dropout(layer, pDropConv)
    lnum += 1  # conv: (16, 16) pool: (8, 8)
    layer = conv2d(layer, params[lnum][0], border_mode='half') + \
            params[lnum][1].dimshuffle('x', 0, 'x', 'x')
    layer = maxout(layer, featMaps[lnum], pieces[lnum])
    layer = pool_2d(layer, (2, 2), st=(2, 2), ignore_border=False, mode='max')
    layer = basicUtils.dropout(layer, pDropConv)
    lnum += 1  # conv: (8, 8) pool: (4, 4)
    layer = conv2d(layer, params[lnum][0], border_mode='half') + \
            params[lnum][1].dimshuffle('x', 0, 'x', 'x')
    layer = maxout(layer, featMaps[lnum], pieces[lnum])
    layer = pool_2d(layer, (2, 2), st=(2, 2), ignore_border=False, mode='max')
    layer = basicUtils.dropout(layer, pDropConv)
    lnum += 1
    layer = T.flatten(layer, outdim=2)
    layer = T.dot(layer, params[lnum][0]) + params[lnum][1].dimshuffle('x', 0)
    layer = relu(layer, alpha=0)
    layer = basicUtils.dropout(layer, pDropHidden)
    lnum += 1
    layer = T.dot(layer, params[lnum][0]) + params[lnum][1].dimshuffle('x', 0)
    layer = relu(layer, alpha=0)
    layer = basicUtils.dropout(layer, pDropHidden)
    lnum += 1
    return softmax(T.dot(layer, params[lnum][0]) + params[lnum][1].dimshuffle('x', 0))  # 如果使用nnet中的softmax训练产生NAN
Beispiel #8
0
def relevance_conv_z(out_relevances, inputs, weights, bias=None):
    norms_for_relevances = conv2d(inputs, weights)
    if bias is not None:
        norms_for_relevances += bias.dimshuffle("x", 0, "x", "x")
    # stabilize
    # prevent division by 0 and division by small numbers
    eps = 1e-4
    norms_for_relevances += T.sgn(norms_for_relevances) * eps
    norms_for_relevances += T.eq(norms_for_relevances, 0) * eps

    normed_relevances = out_relevances / norms_for_relevances
    # upconv
    in_relevances = conv2d(normed_relevances, weights.dimshuffle(1, 0, 2, 3)[:, :, ::-1, ::-1], border_mode="full")

    in_relevances_proper = in_relevances * inputs

    if bias is not None:
        bias_relevance = bias.dimshuffle("x", 0, "x", "x") * normed_relevances
        # Divide bias by weight size before convolving back
        # mean across channel, 0, 1 dims (hope this is correct?)
        fraction_bias = bias_relevance / T.prod(weights.shape[1:]).astype(theano.config.floatX)
        bias_rel_in = conv2d(
            fraction_bias, T.ones_like(weights).dimshuffle(1, 0, 2, 3)[:, :, ::-1, ::-1], border_mode="full"
        )
        in_relevances_proper += bias_rel_in

    return in_relevances_proper
def metaOp1(i, j, X, w1, w2, b1, b2):
    # (n,1,r,c)**(16,1,3,3)=(n,16,r,c)
    hiddens = conv2d(X[:, j, :, :, :], w1[i, j, :, :, :, :], border_mode='half') + b1[i, j, :, :, :, :]
    hiddens = relu(hiddens, alpha=0)
    # (n,16,r,c)**(1,16,1,1)=(n,1,r,c)
    outputs = conv2d(hiddens, w2[i, j, :, :, :, :], border_mode='valid') + b2[i, j, :, :, :, :]
    outputs = relu(outputs, alpha=0)
    return outputs
Beispiel #10
0
def metaOp(i, j, X, w1, w2, b1, b2):
    # (n,1,r,c)**(16,1,3,3)=(n,16,r,c)
    hiddens = conv2d(X[:, j, :, :, :], w1[i, j, :, :, :, :], border_mode='half') + b1[i, j, :, :, :, :]
    hiddens = relu(hiddens, alpha=0)
    # 在元操作中就需要包含relu激活
    # return conv2d(hiddens, w2[i, j, :, :, :, :], border_mode='valid') + b2[i, j, :, :, :, :]
    # (n,16,r,c)**(1,16,1,1)=(n,1,r,c)
    outputs = conv2d(hiddens, w2[i, j, :, :, :, :], border_mode='valid') + b2[i, j, :, :, :, :]
    return T.nnet.relu(outputs)
Beispiel #11
0
	def connect_through(self, alm1_in=None, Pl=None, Ll=None):
		""" connect_through

			@note Note that I made connect_through a separate class method, separate from the automatic initialization, 
			because you can then make changes to the "layer units" or "nodes" before "connecting the layers"
				"""

		if alm1_in is not None:
			self.alm1 = alm1_in

		
		self.Pl = Pl
		alm1 = self.alm1
		c = self.c
		C_lm1,C_l = self.C_ls 
		Wl = self.Wl
		filter_shape = (C_l,C_lm1)+Wl
		assert len(filter_shape) == (2+len(Wl))

		# convolve input feature maps with filters
		if Ll is not None:

			batch_size = alm1.shape[0] # This is m, number of input examples
#			image_shape = (batch_size,C_lm1)+Ll	
			image_shape = (None,C_lm1)+Ll	
			conv_out = conv2d( 
						input=alm1, 
						filters=c, 
						filter_shape=filter_shape,
						input_shape=image_shape)
		else:
			conv_out = conv2d( 
						input=alm1, 
						filters=c, 
						filter_shape=filter_shape)					

		# pool each feature map individually, using maxpooling 
		if Pl is not None:
			pooled_out = pool.pool_2d(
						input=conv_out,
						ws=Pl,
						ignore_border=True)
		
			# add bias term
			if self.psi is None:
				self.al = pooled_out + self.b.dimshuffle('x',0,'x','x')
			else:
				self.al = self.psi( pooled_out + self.b.dimshuffle('x',0,'x','x') )
		else:
			# add bias term
			if self.psi is None:
				self.al = conv_out + self.b.dimshuffle('x',0,'x','x')
			else:
				self.al = self.psi( conv_out + self.b.dimshuffle('x',0,'x','x') )
			

		return self.al
Beispiel #12
0
def CNN(x,c_l1,c_l2,f_l1,f_l2):
    conv1=tensor.nnet.relu(conv2d(x,c_l1)) #default stride=1 --subsample=(1,1) 
    pool1=pool_2d(conv1,(2,2),st=(2,2),ignore_border=True)  #default maxpool
    conv2=tensor.nnet.relu(conv2d(pool1,c_l2))
    pool2=pool_2d(conv2,(2,2),st=(2,2),ignore_border=True)
    fpool2=tensor.flatten(pool2,outdim=2)
    full1=tensor.nnet.relu(tensor.dot(fpool2,f_l1))
    pyx=tensor.nnet.sigmoid(tensor.dot(full1,f_l2))

    return c_l1, c_l2, f_l1, f_l2, pyx
def predict_custom_image(params, testImgFilename='own_0.png', activation= activation_convmlp, testImgFilenameDir = '../data/custom/'):

    test_img_value = filter(str.isdigit, testImgFilename)

    test_img = fli.processImg(testImgFilenameDir, testImgFilename)

    nkerns = [20, 50]
    batch_size = 1
    poolsize = (2, 2)

    layer0_input = test_img.reshape((batch_size, 1, 28, 28)).astype(numpy.float32)

    conv_out_0 = conv2d(
        input=layer0_input,
        filters=params[6],
        input_shape=(batch_size, 1, 28, 28),
        filter_shape=(nkerns[0], 1, 5, 5)
    )

    # downsample each feature map individually, using maxpooling
    pooled_out_0 = downsample.max_pool_2d(
        input=conv_out_0,
        ds=poolsize,
        ignore_border=True
    )

    output_0 = activation(pooled_out_0 + params[7].dimshuffle('x', 0, 'x', 'x'))

    conv_out_1 = conv2d(
        input=output_0,
        filters=params[4],
        input_shape=(batch_size, nkerns[0], 12, 12),
        filter_shape=(nkerns[1], nkerns[0], 5, 5),
    )

    # downsample each feature map individually, using maxpooling
    pooled_out_1 = downsample.max_pool_2d(
        input=conv_out_1,
        ds=poolsize,
        ignore_border=True
    )

    output_1 = activation(pooled_out_1 + params[5].dimshuffle('x', 0, 'x', 'x'))
    output_2 = activation(T.dot(output_1.flatten(2), params[2]) + params[3])

    final_output = T.dot(output_2, params[0]) + params[1]
    p_y_given_x = T.nnet.softmax(final_output)
    y_pred = T.argmax(p_y_given_x, axis=1)
    testfunc = theano.function([], [y_pred[0]])
    prediction = testfunc()[0]
    correct = (int(test_img_value) == prediction)
    print('The prediction ' + str(testfunc()[0]) + ' for ' + testImgFilename + '  is ' + str(correct) + '.')
    return correct
Beispiel #14
0
def _forward_negative_z(inputs, weights, bias=None):
    inputs_plus = inputs * T.gt(inputs, 0)
    weights_plus = weights * T.gt(weights, 0)
    inputs_minus = inputs * T.lt(inputs, 0)
    weights_minus = weights * T.lt(weights, 0)
    negative_part_a = conv2d(inputs_plus, weights_minus)
    negative_part_b = conv2d(inputs_minus, weights_plus)
    together = negative_part_a + negative_part_b
    if bias is not None:
        bias_negative = bias * T.lt(bias, 0)
        together += bias_negative.dimshuffle("x", 0, "x", "x")

    return together
Beispiel #15
0
def conv2d_test_sets():
    def _convert(input, kernel, output, kwargs):
        return [theano.shared(floatX(input)), floatX(kernel), output, kwargs]

    input = np.random.random((3, 1, 16, 16))
    kernel = np.random.random((16, 1, 3, 3))
    output = conv2d(input, kernel).eval()
    yield _convert(input, kernel, output, {})

    input = np.random.random((3, 3, 16, 16))
    kernel = np.random.random((16, 3, 3, 3))
    output = conv2d(input, kernel).eval()
    yield _convert(input, kernel, output, {})
Beispiel #16
0
def modelFlow(X, params):
    lconv1 = relu(conv2d(X, params[0][0], border_mode='full') +
                  params[0][1].dimshuffle('x', 0, 'x', 'x'))
    lds1 = pool_2d(lconv1, (2, 2))

    lconv2 = relu(conv2d(lds1, params[1][0]) +
                  params[1][1].dimshuffle('x', 0, 'x', 'x'))
    lds2 = pool_2d(lconv2, (2, 2))

    lconv3 = relu(conv2d(lds2, params[2][0]) +
                  params[2][1].dimshuffle('x', 0, 'x', 'x'))
    lds3 = pool_2d(lconv3, (2, 2))
    return X, lconv1, lds1, lconv2, lds2, lconv3, lds3
    def __init__(self, input, filter_shape, image_shape, f_params_w, f_params_b, lrn=False, t_style=None, t_content=None, convstride=1, padsize =0, group=1, poolsize = 3, poolstride = 1):

        self.input = input
        #theano.shared(np.asarray(np.input))
        if t_style is not None:
            self.t_style = np.asarray(np.load(t_style),dtype=theano.config.floatX)

        if t_content is not None:
            self.t_content = np.asarray(np.load(t_content),dtype=theano.config.floatX)

        if lrn is True:
            self.lrn_func = CrossChannelNormalization()

        #if padsize==(0,0):
            #padsize='valid'
        if group == 1:
            self.W = theano.shared(np.asarray(np.transpose(np.load(os.path.join(params_path,f_params_w)),(3,0,1,2)),dtype=theano.config.floatX), borrow=True)
            self.b = theano.shared(np.asarray(np.load(os.path.join(params_path,f_params_b)),dtype=theano.config.floatX), borrow=True)
            conv_out = conv2d(input=self.input,filters=self.W,filter_shape=filter_shape,border_mode = padsize,subsample=(convstride, convstride),filter_flip=True)
            #self.params = [self.W, self.b]

        elif group == 2:
            self.filter_shape = np.asarray(filter_shape)
            self.image_shape = np.asarray(image_shape)
            self.filter_shape[0] = self.filter_shape[0] / 2
            self.filter_shape[1] = self.filter_shape[1] / 2
            #self.image_shape[0] = self.image_shape[0] / 2
            self.image_shape[1] = self.image_shape[1] / 2
            self.W0 = theano.shared(np.asarray(np.transpose(np.load(os.path.join(params_path,f_params_w[0])),(3,0,1,2)),dtype=theano.config.floatX), borrow=True)
            self.W1 = theano.shared(np.asarray(np.transpose(np.load(os.path.join(params_path,f_params_w[1])),(3,0,1,2)),dtype=theano.config.floatX), borrow=True)
            self.b0 = theano.shared(np.asarray(np.load(os.path.join(params_path,f_params_b[0])),dtype=theano.config.floatX), borrow=True)
            self.b1 = theano.shared(np.asarray(np.load(os.path.join(params_path,f_params_b[1])),dtype=theano.config.floatX), borrow=True)
            conv_out0 = conv2d(input=self.input[:,:self.image_shape[1],:,:],filters=self.W0,filter_shape=tuple(self.filter_shape),border_mode = padsize,subsample=(convstride, convstride),filter_flip=True) + self.b0.dimshuffle('x', 0, 'x', 'x')
            conv_out1 = conv2d(input=self.input[:,self.image_shape[1]:,:,:],filters=self.W1,filter_shape=tuple(self.filter_shape),border_mode = padsize,subsample=(convstride, convstride),filter_flip=True) + self.b1.dimshuffle('x', 0, 'x', 'x')
            conv_out = T.concatenate([conv_out0, conv_out1],axis=1)
            #self.params = [self.W0, self.b0, self.W1, self.b1]

        else:
            raise AssertionError()

        relu_out = T.maximum(conv_out, 0)
        if poolsize != 1:
            self.output = pool.pool_2d(input=relu_out,ds=(poolsize,poolsize),ignore_border=True, st=(poolstride,poolstride),mode='average_exc_pad')
            #self.output = downsample.max_pool_2d(input=relu_out,ds=(poolsize,poolsize),ignore_border=True, st=(poolstride,poolstride))
        else:
            self.output = relu_out

        if lrn is True:
            # lrn_input = gpu_contiguous(self.output)
            self.output = self.lrn_func(self.output)
def nin2(X, param, shape):
    w1, w2 = param
    map0 = []
    for i in xrange(shape[0]):
        map1 = []
        for j in xrange(shape[1]):
            Xj = X[:, j, :, :].dimshuffle(0, 'x', 1, 2)
            w1ij = w1[i, j, :, :, :].dimshuffle(0, 'x', 1, 2)
            w2ij = w2[i, j, :].dimshuffle('x', 0, 'x', 'x')
            tmp = conv2d(Xj, w1ij, border_mode='valid')
            tmp = relu(tmp, alpha=0)
            map1.append(conv2d(tmp, w2ij, border_mode='valid'))
        map0.append(relu(T.sum(map1, axis=0), alpha=0))
    return T.concatenate(map0, axis=1)
Beispiel #19
0
    def lecun_lcn(self, X, kernel_size=9, threshold = 1e-4, use_divisor=True, border=False):
        """
        Yann LeCun's local contrast normalization
        Orginal code in Theano by: Guillaume Desjardins
        """

        filter_shape = (1, 1, kernel_size, kernel_size)
        filters = gaussian_filter(kernel_size).reshape(filter_shape)
        filters = shared(_asarray(filters, dtype=floatX), borrow=True)
        mid = int(floor(kernel_size/2.))

        if border:
            r = (kernel_size-1)/2
            up = X[:,:,0:1,:].repeat(r,axis=2)
            down = X[:,:,-1:,:].repeat(r,axis=2)
            X_ = T.concatenate([up,X,down],axis=2)
            left = X_[:,:,:,0:1].repeat(r,axis=3)
            right = X_[:,:,:,-1:].repeat(r,axis=3)
            X_ = T.concatenate([left,X_,right],axis=3)

            convout = conv2d(X_, filters=filters, filter_shape=filter_shape, 
                                border_mode='valid')
            centered_X = X - convout

        else:
            convout = conv2d(X, filters=filters, filter_shape=filter_shape, 
                                border_mode='full')

            # For each pixel, remove mean of kernel_sizexkernel_size neighborhood
            centered_X = X - convout[:,:,mid:-mid,mid:-mid]

        if use_divisor:
            # Scale down norm of kernel_sizexkernel_size patch
            sum_sqr_XX = conv2d(T.sqr(X), filters=filters, 
                                filter_shape=filter_shape, border_mode='full')

            sum_sqr_XX = sum_sqr_XX[:,:,mid:-mid,mid:-mid]
            sum_sqr_XX = T.maximum(sum_sqr_XX, threshold)
            denom = T.sqrt(sum_sqr_XX)
            # denom = abs(centered_X)
            per_img_mean = denom.mean(axis=[2,3])
            divisor = T.largest(per_img_mean.dimshuffle(0,1,'x','x'), denom)
            divisor = T.maximum(divisor, threshold)

            new_X = centered_X / divisor
            return new_X

        else: 
            return centered_X
def conv(X, w, b, activation):
    # z = dnn_conv(X, w, border_mode=int(np.floor(w.get_value().shape[-1]/2.)))
    s = int(np.floor(w.get_value().shape[-1]/2.))
    z = conv2d(X, w, border_mode='full')[:, :, s:-s, s:-s]
    if b is not None:
        z += b.dimshuffle('x', 0, 'x', 'x')
    return activation(z)
Beispiel #21
0
def T_l2_cost_conv_dA(x,a,A,imshp,kshp,featshp,stride=(1,1),mask=True):
    image_error, kernel, features = helper_T_l2_cost_conv(x=x,a=a,A=A,imshp=imshp,kshp=kshp,featshp=featshp,stride=stride,mask=mask)

    if stride == (1,1):

        image_error_rot = T.transpose(image_error,[1,0,2,3])[:,:,::-1,::-1]
        imshp_rot = (imshp[1],imshp[0],imshp[2],imshp[3])
        featshp_rot = (featshp[1],featshp[0],featshp[2],featshp[3])
        features_rot = T.transpose(features,[1,0,2,3])

        featshp_rot_logical = (featshp_rot[0],
                               featshp_rot[1],
                               imshp[2] - kshp[2] + 1,
                               imshp[3] - kshp[3] + 1)
        kernel_grad_rot = -1.*conv2d(image_error_rot,features_rot,
                                  image_shape=imshp_rot,filter_shape=featshp_rot,
                                  imshp_logical=imshp_rot[1:],kshp_logical=featshp_rot_logical[2:])
        kernel_grad = T.transpose(kernel_grad_rot,[1,0,2,3])

        reshape_kernel_grad = T.transpose(T.reshape(kernel_grad,(kshp[0],kshp[1]*kshp[2]*kshp[3]),ndim=2))

        return reshape_kernel_grad

    else:
        my_conv = MyConv_view(strides=stride,kshp=kshp)
        kernel_grad = my_conv(image_error,features)

        reshape_kernel_grad = T.transpose(T.reshape(kernel_grad, (kshp[0], kshp[1] * kshp[2] * kshp[3]), ndim=2))

        return reshape_kernel_grad
def conv1d_mc0(input, filters, input_shape=None, filter_shape=None,
               border_mode='valid', subsample=(1,)):
    """
    Using conv2d with width == 1.
    """
    image_shape = input_shape
    if image_shape is None:
        image_shape_mc0 = None
    else:
        # (b, c, i0) to (b, c, 1, i0)
        image_shape_mc0 = (image_shape[0], image_shape[1], 1, image_shape[2])

    if filter_shape is None:
        filter_shape_mc0 = None
    else:
        filter_shape_mc0 = (filter_shape[0], filter_shape[1], 1,
                            filter_shape[2])

    input_mc0 = input.dimshuffle(0, 1, 'x', 2)
    filters_mc0 = filters.dimshuffle(0, 1, 'x', 2)

    conved = conv2d(
        input_mc0, filters_mc0, input_shape=image_shape_mc0,
        filter_shape=filter_shape_mc0, subsample=(1, subsample[0]),
        border_mode=border_mode)
    return conved[:, :, 0, :]  # drop the unused dimension
Beispiel #23
0
    def test_conv_with_bias(self):
        images = T.dtensor4('inputs')
        weights = T.dtensor4('weights')
        bias = T.dvector('bias')

        ishape = [(8, 3, 256, 256), (16, 3, 256, 256), (32, 3, 256, 256), (64, 3, 256, 256)]
        wshape = [(8, 3, 3, 3), (16, 3, 3, 3), (32, 3, 3, 3), (64, 3, 3, 3)]

        for i, ish in enumerate(ishape):
            wsh = wshape[i]
            images_internal = U2IConv(imshp=ish, kshp=wsh)(images)
            convOutBias_internal = Conv2D(imshp=ish, kshp=wsh, filter_flip=False)(images_internal, weights, bias)
            convOutBias_user = I2U()(convOutBias_internal)

            ival = numpy.random.rand(*ish).astype(numpy.float64)
            wval = numpy.random.rand(*wsh).astype(numpy.float64)
            bval = numpy.random.rand(wsh[0]).astype(numpy.float64)

            fopt = theano.function(inputs=[images, weights, bias], outputs=convOutBias_user, mode=mode_with_mkl)
            new_old = fopt(ival, wval, bval)

            convOut = conv2d(images, weights, input_shape=ish, filter_shape=wsh, filter_flip=False)
            convOutBias = convOut + bias.dimshuffle('x', 0, 'x', 'x')
            fori = theano.function(inputs=[images, weights, bias], outputs=convOutBias, mode=mode_without_mkl)
            old_out = fori(ival, wval, bval)

            assert str(fopt.maker.fgraph.toposort()) != str(fori.maker.fgraph.toposort())
            assert numpy.allclose(old_out, new_old)
def conv1d_sc(input, filters, input_shape=None, filter_shape=None,
              border_mode='valid', subsample=(1,)):
    """
    Using conv2d with a single input channel.

    border_mode has to be 'valid' at the moment.
    """
    if border_mode != 'valid':
        log.error("Unsupported border_mode for conv1d_sc: "
                  "%s" % border_mode)
        raise RuntimeError("Unsupported border_mode for conv1d_sc: "
                           "%s" % border_mode)

    image_shape = input_shape
    if image_shape is None:
        image_shape_sc = None
    else:
        # (b, c, i0) to (b, 1, c, i0)
        image_shape_sc = (image_shape[0], 1, image_shape[1], image_shape[2])

    if filter_shape is None:
        filter_shape_sc = None
    else:
        filter_shape_sc = (filter_shape[0], 1, filter_shape[1],
                           filter_shape[2])

    input_sc = input.dimshuffle(0, 'x', 1, 2)
    # We need to flip the channels dimension because it will be convolved over.
    filters_sc = filters.dimshuffle(0, 'x', 1, 2)[:, :, ::-1, :]

    conved = conv2d(input_sc, filters_sc, input_shape=image_shape_sc,
                    filter_shape=filter_shape_sc,
                    subsample=(1, subsample[0]))
    return conved[:, :, 0, :]  # drop the unused dimension
    def __init__(self, rng, input, filter_shape, image_shape, poolsize=(2, 2), activation=T.tanh):
        assert image_shape[1] == filter_shape[1]
        self.input = input
        fan_in = np.prod(filter_shape[1:])
        fan_out = (filter_shape[0] * np.prod(filter_shape[2:]) //
                   np.prod(poolsize))

        W_bound = np.sqrt(6. / (fan_in + fan_out))
        self.W = theano.shared(
            np.asarray(
                rng.uniform(low=-W_bound, high=W_bound, size=filter_shape),
                dtype=theano.config.floatX
            ),
            borrow=True
        )

        b_values = np.zeros((filter_shape[0],), dtype=theano.config.floatX)
        self.b = theano.shared(value=b_values, borrow=True)

        self.conv_out = conv2d(
            input=input,
            filters=self.W,
            filter_shape=filter_shape,
            input_shape=image_shape
        )

        self.output = activation(self.conv_out + self.b.dimshuffle('x', 0, 'x', 'x'))
        self.params = [self.W, self.b]
        self.outshape = (image_shape[0], filter_shape[0], image_shape[2]-filter_shape[2]+1, image_shape[3] - filter_shape[3] + 1)
Beispiel #26
0
    def __init__(self, rng, input, layer_shape, input_shape, pool_size = (2,2)):
        '''
        :param rng: random number generator
        :param input: 4D tensor with shape of input_shape
        :param layer_shape: 4D matrix, out_put_num * input_num * kernel_rows * kernel_cols
        :param input_shape: 4D matrix, batch_size * input_num * image_rows * image_cols
        :param pool_size: pool_size
        :return: Nothing
        '''
        assert input_shape[1] == layer_shape[1]
        self.input = input

        fan_in = np.prod(layer_shape[1:])
        fan_out = (layer_shape[0] * np.prod(layer_shape[2:])) // np.prod(pool_size)

        W_bound = np.sqrt(6.0 / (fan_out + fan_in))

        self.W = theano.shared(np.array(rng.uniform(low = - W_bound, high= W_bound, size = layer_shape), dtype = theano.config.floatX),
                                borrow=True)

        self.b = theano.shared(np.zeros(shape = (layer_shape[0], ), dtype = theano.config.floatX), borrow = True)

        convolution_out = conv2d(input, self.W, filter_shape = layer_shape, input_shape = input_shape) #what will happen if I delete the last two parameters
        pool_out = downsample.pool_2d(convolution_out, pool_size, ignore_border = True)
        self.output = T.tanh(pool_out + self.b.dimshuffle('x', 0, 'x', 'x'))
        self.params = [self.W, self.b]
def conv1d_mc1(input, filters, input_shape=None, filter_shape=None,
               border_mode='valid', subsample=(1,)):
    """
    Using conv2d with height == 1.
    """
    image_shape = input_shape
    if image_shape is None:
        image_shape_mc1 = None
    else:
        # (b, c, i0) to (b, c, i0, 1)
        image_shape_mc1 = (image_shape[0], image_shape[1], image_shape[2], 1)

    if filter_shape is None:
        filter_shape_mc1 = None
    else:
        filter_shape_mc1 = (filter_shape[0], filter_shape[1],
                            filter_shape[2], 1)

    input_mc1 = input.dimshuffle(0, 1, 2, 'x')
    filters_mc1 = filters.dimshuffle(0, 1, 2, 'x')

    conved = conv2d(
        input_mc1, filters_mc1, input_shape=image_shape_mc1,
        filter_shape=filter_shape_mc1, subsample=(subsample[0], 1),
        border_mode=border_mode)
    return conved[:, :, :, 0]  # drop the unused dimension
	def __init__(self,rng,input,filter_shape,image_shape,poolsize=(2,2)):
		assert image_shape[1] == filter_shape[1]
		self.input = input
		fan_in = np.prod(filter_shape)
		fan_out = (filter_shape[0] * np.prod(filter_shape[2:]) //
				   np.prod(poolsize)) 
		w_bound = np.sqrt(6./(fan_out + fan_in))
		self.W = shared(np.asarray(
						rng.uniform(low=-w_bound,
									high=w_bound,
									size = filter_shape),
								dtype = th.config.floatX),
							borrow = True)
		b_values = np.zeros((filter_shape[0],),dtype=th.config.floatX)
		self.b = th.shared(value = b_values, borrow=True)

		conv_out = conv2d(input= input,
						  filters = self.W,
						  filter_shape =filter_shape,
						  input_shape = image_shape)
		pooled_out = downsample.max_pool_2d(input = conv_out,
								  ds = poolsize,
								  ignore_border=True)
		self.output = T.tanh(pooled_out + self.b.dimshuffle('x',0,'x','x'))
		self.params = [self.W,self.b]
		self.input = input
Beispiel #29
0
    def test_conv_no_bias(self):
        images = T.dtensor4('input_conv')
        weights = T.dtensor4('weights')

        images_internal = U2IConv(imshp=(12, 3, 256, 256), kshp=(12, 3, 3, 3))(images)

        convOut = Conv2D(imshp=(12, 3, 256, 256), kshp=(12, 3, 3, 3), filter_flip=False)(images_internal, weights)
        convOut_user = I2U()(convOut)
        convOutLoss = T.mean(convOut_user)
        conv_op_di = T.grad(convOutLoss, images)
        conv_op_dk = T.grad(convOutLoss, weights)
        convOutBack = [conv_op_di, conv_op_dk]

        ival = numpy.random.rand(12, 3, 256, 256).astype(numpy.float64)
        wval = numpy.random.rand(12, 3, 3, 3).astype(numpy.float64)

        fopt = theano.function(inputs=[images, weights], outputs=convOutBack, mode=mode_with_mkl)
        new_out = fopt(ival, wval)

        convOut = conv2d(images, weights, input_shape=(12, 3, 256, 256), filter_shape=(12, 3, 3, 3), filter_flip=False)
        convOutLoss = T.mean(convOut)
        conv_op_di = T.grad(convOutLoss, images)
        conv_op_dk = T.grad(convOutLoss, weights)
        convOutBack = [conv_op_di, conv_op_dk]

        fori = theano.function(inputs=[images, weights], outputs=convOutBack, mode=mode_without_mkl)
        old_out = fori(ival, wval)

        assert len(fopt.maker.fgraph.toposort()) != len(fori.maker.fgraph.toposort())
        assert numpy.allclose(old_out[0], new_out[0])
        assert new_out[0].dtype == 'float64'
Beispiel #30
0
def T_l2_cost_conv(x,a,A,imshp,kshp,mask=True):
    """
    xsz*ysz*nchannels, nimages = x.shape
    xsz*ysz*nfeat, nimages = a.shape
    xsz*ysz*nchannels, nfeat = A.shape
    """

    #imshp = num images, channels, szy, szx
    #kshp = features, channels, szy, szx
    #featshp = num images, features, szy, szx

    featshp = (imshp[0],kshp[0],imshp[2] - kshp[2] + 1,imshp[3] - kshp[3] + 1) # num images, features, szy, szx

    image = T.reshape(T.transpose(x),imshp)
    kernel = T.reshape(T.transpose(A),kshp)
    features = T.reshape(T.transpose(a),featshp)

    # Need to transpose first two dimensions of kernel, and reverse index kernel image dims (for correlation)
    kernel_rotated = T.transpose(kernel[:,:,::-1,::-1],axes=[1,0,2,3])

    image_estimate = conv2d(features,kernel_rotated,border_mode='full')

    if mask:
        image_error_temp = image - image_estimate
        image_error = T.zeros_like(image_error_temp)
        image_error = T.set_subtensor(image_error[:,:,(kshp[2]-1):(imshp[2]-kshp[2]+1),(kshp[3]-1):(imshp[3]-kshp[3]+1)],
                                 image_error_temp[:,:,(kshp[2]-1):(imshp[2]-kshp[2]+1),(kshp[3]-1):(imshp[3]-kshp[3]+1)])
    else:
        image_error = image - image_estimate

    return .5*T.sum(image_error **2)
def convpool(X, W, b, poolsize=(2, 2)):
    conv_out = conv2d(input=X, filters=W)
    pooled_out = pool.pool_2d(input=conv_out, ws=poolsize, ignore_border=True)
    return T.tanh(pooled_out + b.dimshuffle('x', 0, 'x', 'x'))
Beispiel #32
0
def conv(X, w, s = 2, b = None, activation = relu):
    z = conv2d(X, w, border_mode='full')[:, :, s:-s, s:-s]
    if b is not None:
        z += b.dimshuffle('x', 0, 'x', 'x')
    return activation(z)
Beispiel #33
0
 def _train_fprop(self, state_below):
     conv_out = conv2d(state_below,
                       self.W,
                       border_mode=self.border_mode,
                       subsample=self.stride)
     return conv_out + self.b.dimshuffle('x', 0, 'x', 'x')
Beispiel #34
0
    def __init__(self,
                 l,
                 C_ls,
                 Wl,
                 Pl=None,
                 alm1=None,
                 c=None,
                 b=None,
                 activation=T.tanh,
                 rng=None):
        """ Initialize the parameters for the `layer`

		@type rng  : numpy.random.RandomState
		@param rng : random number generator used to initialize weights
		
		@type l    : (positive) integer
		@param l   : layer number label, l=0 (input),1,...L-1, L is "output layer"

		@type Cl : tuple of (positive) integers of size (length) 2, only
		@param Cls : "matrix" size dimensions of Theta or weight mmatrix, of size dims. (s_l, s_lp1) (this is important)
							Bottom line: s_ls = (s_l, s_lp1)

		@type alm1     : theano shared variable or vector of size dims. (m, s_l), m=1,2..., number of training examples
		@oaram alm1    : "nodes" or "units" of layer l

		@type c  : theano shared; of size dims. (s_l,s_{l+1}), i.e. matrix dims. (s_{l}x(s_{l+1})), i.e. \Theta \in \text{Mat}_{\mathbb{R}}(s_l,s_{l+1}) 
		@param c : "weights" or parameters for l, l=1,2, ... L-1
		
		@type b      : theano shared of dim. s_{l+1} or s_lp1; it's now a "row" array of that length s_lp1
		@param b     : intercepts

		@type activation  : theano.Op or function
		@param activation : Non linearity to be applied in the layer(s)
		"""
        C_lm1, C_l = C_ls

        if rng is None:
            rng = np.random.RandomState(1234)

        # num input feature maps * filter height * filter width
        fan_in = C_lm1 * np.prod(Wl)

        # num output feature maps * filter height * filter width / pooling size
        if Pl is not None:
            fan_out = C_l * np.prod(Wl) // np.prod(Pl)
        else:
            fan_out = C_l * np.prod(Wl)

        # make the filter size out of C_ls and Wl
        filter_size = (C_l, C_lm1) + Wl
        assert len(filter_size) == (2 + len(Wl))

        if c is None:
            try:
                c_values = np.asarray(rng.uniform(
                    low=-np.sqrt(6. / (fan_in + fan_out)),
                    high=np.sqrt(6. / (fan_in + fan_out)),
                    size=filter_size),
                                      dtype=theano.config.floatX)

            except MemoryError:
                c_values = np.zeros(filter_size).astype(theano.config.floatX)

            if activation == T.nnet.sigmoid:
                c_values *= np.float32(4)

            c = theano.shared(c_values, name="c" + str(l), borrow=True)

        if b is None:
            b_values = np.zeros((C_l, )).astype(theano.config.floatX)
            b = theano.shared(value=b_values, name='b' + str(l), borrow=True)

        if alm1 is None:
            alm1 = T.tensor4(name='a' + str(l) + 'm1',
                             dtype=theano.config.floatX)

        self.c = c  # size dims. (C_l,C_lm1,W_1,...W_d) i.e. C_l x C_lm1 x W_1,x ... x W_d
        self.b = b  # dims. C_l
        self.alm1 = alm1  # dims. (m,C_lm1,L_1,...L_d)
        self.C_ls = C_ls
        self.Wl = Wl
        self.Pl = Pl

        self.l = l

        if activation is None:
            self.psi = None
        else:
            self.psi = activation

        # do a "basic" convolution, do a "basic" connect through
        conv_out = conv2d(self.alm1, self.c)

        if Pl is not None:
            pooled_out = pool.pool_2d(conv_out, self.Pl, ignore_border=True)
            if self.psi is None:
                self.al = pooled_out + self.b.dimshuffle('x', 0, 'x', 'x')
            else:
                self.al = self.psi(pooled_out +
                                   self.b.dimshuffle('x', 0, 'x', 'x'))
        else:
            if self.psi is None:
                self.al = conv_out + self.b.dimshuffle('x', 0, 'x', 'x')
            else:
                self.al = self.psi(conv_out +
                                   self.b.dimshuffle('x', 0, 'x', 'x'))
    def __init__(self,
                 rng,
                 input,
                 filter_shape,
                 image_shape,
                 poolsize=(2, 2),
                 dropout_percent=0.5,
                 stride=None,
                 pool_method="max",
                 amp_value=None,
                 relu=False,
                 batch_norm=False,
                 precalculated_batchnorm_values=None,
                 batchnorm_slide_percent=0.):
        """
		This is a convolutional layer that accepts 4D data and 
		convolutes the last 2 dimensions
		
		Args:
		    filter_shape: int array (#conv layers, #channels, width,height)
		    dropout_percent: float that randomly disables inputs, prevents
		        overfitting
		    
		    relu: Boolean that determines if output should be rectified
		batch_norm: Boolean that  determines if batch normalization 
		    should be  used
		precalculated_batchnorm_values: float array that will replace
		    per-batch calculated
		sliding_batchnorm_values: float, if nonzero, this will establish a
		    separate update to modify the means and standard deviation
		    after each batch, but not completely 
		"""

        try:
            assert image_shape[1] == filter_shape[1]
        except AssertionError:
            print 'Image shape is ' + str(image_shape[1])
            print 'Filter shape is ' + str(filter_shape[1])
            print "Poolsize is " + str(poolsize)
            raise AssertionError
        self.input = input
        self.precalculated_batchnorm_values = precalculated_batchnorm_values
        self.batchnorm_slide_percent = batchnorm_slide_percent
        #this is the size of the number of inputs to each
        #convolution hidden unit
        fan_in = np.prod(filter_shape[1:])
        #this is the number of output weights per channel divided by the
        #pooling size
        fan_out = (filter_shape[0] * np.prod(filter_shape[2:]))/\
            np.prod(poolsize)
        W_bound = np.sqrt(6. / (fan_in + fan_out))
        #initializes weights with appropriate values and shape
        self.W = theano.shared(np.asarray(rng.uniform(low=-W_bound,
                                                      high=W_bound,
                                                      size=filter_shape),
                                          dtype=theano.config.floatX),
                               borrow=True)
        dropout_matrix = theano.shared(np.asarray(rng.binomial(
            1, 1 - dropout_percent, filter_shape),
                                                  dtype=theano.config.floatX),
                                       name='d',
                                       borrow=True)
        self.dropout_matrix = dropout_matrix
        #initializes biases to zero
        #(important to avoid presumptions about content of image)
        b_values = numpy.zeros((filter_shape[0], ), dtype=theano.config.floatX)
        #creates a shared variable instance (instead of a deep copy)
        self.b = theano.shared(value=b_values, borrow=True)
        self.dropout_weight = theano.shared(
            np.asarray(1., dtype=theano.config.floatX))
        #convolve; dropout matrix should work, but keep an eye on it
        print 'conv image shape: ' + str(image_shape)
        self.params = [self.W, self.b]
        #track input (no longer redundant with batchnorm)
        self.raw_input = input
        if not batch_norm:
            self.input = input
        else:
            print 'implementing batch normalization'
            rn_input = range(image_shape[1])
            #GAMMA is 0 because a constant of 1 is added to the transform
            #to avoid problems with the L2 norm
            self.GAMMA = theano.shared(
                np.float32([
                    0. + rng.uniform(low=-0.001, high=0.001) for _ in rn_input
                ]))
            self.BETA = theano.shared(np.float32([0 for _ in rn_input]))
            self.params += [self.GAMMA, self.BETA]
            if self.precalculated_batchnorm_values <> None:
                self.sd_input = self.precalculated_batchnorm_values[0]
                self.means = self.precalculated_batchnorm_values[1]
            elif self.batchnorm_slide_percent == 0:
                self.sd_input = T.sqrt(T.var(input, (0, 2, 3)) +
                                       0.00001).dimshuffle('x', 0, 'x', 'x')
                self.means = T.mean(input,
                                    (0, 2, 3)).dimshuffle('x', 0, 'x', 'x')
            else:
                #set old values to initialized theano value
                self.sd_input_old = theano.shared(
                    np.float32(np.ones((1, image_shape[1], 1, 1))),
                    broadcastable=(True, False, True, True))
                self.means_old = theano.shared(
                    np.float32(np.zeros((1, image_shape[1], 1, 1))),
                    broadcastable=(True, False, True, True))
                sbsp = self.batchnorm_slide_percent
                self.sd_input = sbsp * self.sd_input_old + \
                    (1.-sbsp)*T.sqrt(T.var(input,(0,2,3))+0.00001).\
                    dimshuffle('x',0,'x','x')
                self.means = sbsp * self.means_old + \
                    (1-sbsp) * T.mean(input,(0,2,3)).\
                    dimshuffle('x',0,'x','x')
            self.input_normalized = (input - self.means) / self.sd_input
            self.input = self.input_normalized * (np.float32(1.) + \
                  self.GAMMA.dimshuffle('x',0,'x','x')) + \
                         self.BETA.dimshuffle('x',0,'x','x')
        if amp_value == None:
            conv_out = conv2d(
                input=self.input,
                filters=self.W * dropout_matrix,
                filter_shape=filter_shape,
                input_shape=image_shape  #keyword changed from "image_shape"
            )
        else:
            conv_out = conv2d(
                input=self.input,
                filters=self.W * dropout_matrix * amp_value,
                filter_shape=filter_shape,
                input_shape=image_shape  #keyword changed from "image_shape"
            )
        #pool (max pooling in this case)
        pooled_out = downsample.max_pool_2d(input=conv_out,
                                            ds=poolsize,
                                            ignore_border=True,
                                            st=stride,
                                            mode=pool_method)
        self.stride = stride
        self.pool_method = pool_method
        self.relu = relu
        x = T.tensor4('x', dtype=theano.config.floatX)
        linmax = function([x], T.maximum(x, 0))
        if not self.relu:
            self.output = T.tanh(pooled_out +
                                 self.b.dimshuffle('x', 0, 'x', 'x'))
        else:
            self.output = linmax(
                T.Tanh(pooled_out + self.b.dimshuffle('x', 0, 'x', 'x')))
        #store parameters
        #extra params to pass to other methods
        self.rng = rng
        self.filter_shape = filter_shape
        self.dropout_percent = dropout_percent
        print 'conv filter shape: %s' % str(self.filter_shape)
Beispiel #36
0
    def __init__(self,
                 rng,
                 input,
                 image_shape,
                 filter_shape,
                 dropout,
                 pool=True,
                 poolsize=(2, 2),
                 border_mode='valid',
                 act='absTanh',
                 bch_norm=False,
                 param_seed=54621,
                 use_params=False,
                 conv_params=None):
        """
        Allocate a ConvPoolLayer with shared variable internal parameters.

        :type rng: np.random.RandomState
        :param rng: a random number generator used to initialize weights

        :type input: theano.tensor.dtensor4
        :param input: symbolic image tensor, of shape image_shape

        :type image_shape: tuple or list of length 4
        :param image_shape: (batch size, num input feature maps,
                             image height, image width)

        :type filter_shape: tuple or list of length 4
        :param filter_shape: (number of filters, num input feature maps,
                              filter height, filter width)

        :type pool: boolean
        :param pool: indicates whether pooling should be used after convolution or not

        :type poolsize: tuple or list of length 2
        :param poolsize: the downsampling (pooling) factor (#rows, #cols)
        """

        assert image_shape[1] == filter_shape[1]
        self.input = input

        if use_params:
            # use previously defined parameters
            self.W = conv_params[0]
            self.b = conv_params[1]
        else:
            # there are "num input feature maps * filter height * filter width"
            # inputs to each hidden unit
            fan_in = np.prod(filter_shape[1:])
            # each unit in the lower layer receives a gradient from:
            # "num output feature maps * filter height * filter width" /
            # pooling size
            fan_out = (filter_shape[0] * np.prod(filter_shape[2:]) /
                       np.prod(poolsize))
            # initialize weights with random weights
            W_bound = np.sqrt(6. / (fan_in + fan_out))

            self.W = theano.shared(np.asarray(rng.uniform(low=-W_bound,
                                                          high=W_bound,
                                                          size=filter_shape),
                                              dtype=theano.config.floatX),
                                   name='Conv_W',
                                   borrow=True)

            # the bias is a 1D tensor -- one bias per output feature map
            b_values = np.zeros((filter_shape[0], ),
                                dtype=theano.config.floatX)
            self.b = theano.shared(value=b_values, name='Conv_b', borrow=True)

        # convolve input feature maps with filters
        conv_out = conv2d(input=input,
                          filters=self.W,
                          filter_shape=filter_shape,
                          input_shape=image_shape,
                          border_mode=border_mode)

        # add the bias term. Since the bias is a vector (1D array), we first
        # reshape it to a tensor of shape (1, n_filters, 1, 1). Each bias will
        # thus be broadcasted across mini-batches and feature map
        # width & height

        if bch_norm:
            np_g = np.ones(filter_shape[0]).astype(theano.config.floatX)
            g = theano.shared(np_g, name='conv_bn_g', borrow=True)
            self.g = g
            normed = (conv_out - conv_out.mean(axis=(0, 2, 3), keepdims=True)
                      ) / (conv_out.std(axis=(0, 2, 3), keepdims=True) + 1E-6)
            pre_act = self.g.dimshuffle('x', 0, 'x',
                                        'x') * normed + self.b.dimshuffle(
                                            'x', 0, 'x', 'x')
        else:
            pre_act = conv_out + self.b.dimshuffle('x', 0, 'x', 'x')

        conv_func_out = apply_act(pre_act, act)

        # if pooling should be done, downsample the images
        if pool:
            # downsample each feature map individually, using maxpooling
            pooled_out = pool_2d(input=conv_func_out,
                                 ds=poolsize,
                                 ignore_border=True)
        else:
            pooled_out = conv_func_out

        # use dropout=1 in train and dropout=0 in test
        self.dropout = dropout
        srng = RandomStreams(seed=param_seed)
        pooled_out = T.switch(
            T.gt(self.dropout, 0),
            pooled_out * srng.normal(
                size=theano.tensor.shape(pooled_out), avg=1.0, std=1.0),
            pooled_out)
        self.output = pooled_out

        # store parameters of this layer
        if bch_norm:
            self.params = [self.W, self.b, self.g]
        else:
            self.params = [self.W, self.b]
	def forward(self, X):
		conv = conv2d(X,self.W)						#Convolution
		max_pool = pool.pool_2d(conv, ws=self.poolsz, ignore_border=True)	#Max-pooling
		return T.nnet.relu(max_pool + self.b.dimshuffle('x', 0, 'x', 'x'))		#Non-linearity
    def __init__(self, rng, input, filter_shape, image_shape, poolsize=(2, 2)):
        """
        Allocate a LeNetConvPoolLayer with shared variable internal parameters.

        :type rng: numpy.random.RandomState
        :param rng: a random number generator used to initialize weights

        :type input: theano.tensor.dtensor4
        :param input: symbolic image tensor, of shape image_shape

        :type filter_shape: tuple or list of length 4
        :param filter_shape: (number of filters, num input feature maps,
                              filter height, filter width)

        :type image_shape: tuple or list of length 4
        :param image_shape: (batch size, num input feature maps,
                             image height, image width)

        :type poolsize: tuple or list of length 2
        :param poolsize: the downsampling (pooling) factor (#rows, #cols)
        """

        assert image_shape[1] == filter_shape[1]
        self.input = input

        # there are "num input feature maps * filter height * filter width"
        # inputs to each hidden unit
        fan_in = numpy.prod(filter_shape[1:])
        # each unit in the lower layer receive a gradient from
        # number out feature maps * filter height * fitler width / poolsize. because stride = 1
        fan_out = (filter_shape[0] * numpy.prod(filter_shape[2:]) //
                   numpy.prod(poolsize))

        # initialize weights with random weights
        W_bound = numpy.sqrt(6. / (fan_in + fan_out))
        self.W = theano.shared(numpy.asarray(rng.uniform(low=-W_bound,
                                                         high=W_bound,
                                                         size=filter_shape),
                                             dtype=theano.config.floatX),
                               borrow=True)

        self.Wv = theano.shared(numpy.zeros(filter_shape,
                                            dtype=theano.config.floatX),
                                borrow=True)

        # bias
        b_values = numpy.zeros((filter_shape[0], ), dtype=theano.config.floatX)
        self.b = theano.shared(value=b_values, borrow=True)
        self.bv = theano.shared(numpy.zeros((filter_shape[0], ),
                                            dtype=theano.config.floatX),
                                borrow=True)
        # Convolve input feature maps with filters
        conv_out = conv2d(input=input,
                          filters=self.W,
                          filter_shape=filter_shape,
                          input_shape=image_shape)

        # downsaple each feature map individually, using maxpooling
        pooled_out = downsample.max_pool_2d(input=conv_out,
                                            ds=poolsize,
                                            ignore_border=True)

        # add the bias term. We first reshape it to a tensor of shape(1, n_filters, 1, 1). Each bias
        # will thus be broadcasted across mini-batches and feature map width& height
        self.output = T.tanh(pooled_out + self.b.dimshuffle('x', 0, 'x', 'x'))

        # store parameters of this layers
        self.params = [self.W, self.b]
        self.velocity = [self.Wv, self.bv]

        # ?? keep track of model input
        self.input = input
    else:
        high = numpy.sqrt(6. / (numpy.sum(shape[:2]) * numpy.prod(shape[2:])))
    shape = shape if n is None else (n, ) + shape
    return theano.shared(numpy.asarray(numpy.random.uniform(low=-high,
                                                            high=high,
                                                            size=shape),
                                       dtype=dtype),
                         name=name)


n_conv1 = 20

W1 = shared_glorot_uniform((n_conv1, 1, 5, 5))

conv1_out = conv2d(input=layer0_input,
                   filters=W1,
                   filter_shape=(n_conv1, 1, 5, 5),
                   input_shape=(batch_size, 1, 28, 28))

from theano.tensor.signal import pool
pooled_out = pool.pool_2d(input=conv1_out, ws=(2, 2), ignore_border=True)

n_conv2 = 50

W2 = shared_glorot_uniform((n_conv2, n_conv1, 5, 5))

conv2_out = conv2d(input=pooled_out,
                   filters=W2,
                   filter_shape=(n_conv2, n_conv1, 5, 5),
                   input_shape=(batch_size, n_conv1, 12, 12))

pooled2_out = pool.pool_2d(input=conv2_out, ws=(2, 2), ignore_border=True)
# initialize shared variable for weights.
# 16 random filters
rng = np.random.RandomState(23455)
w_shp = (24, 3, 9, 9)
w_bound = np.sqrt(3 * 9 * 9)
W_real = np.asarray(rng.uniform(low=-1.0 / w_bound,
                high=1.0 / w_bound,
                size=w_shp),
            dtype=input.dtype)

plot_filters(W_real, sz=(6,4), title='Random filters', show=True)        

W = theano.shared(W_real, name ='W')

# convolution, max-pooling and ReLU
f1 = theano.function([input], conv2d(input, W))
f2 = theano.function([input], pool.pool_2d(input, (16, 16), ignore_border=True))
feat_maps = relu(f2(f1(np.asarray(image_list, dtype='float32').transpose(0,3,1,2))))
feat_maps = feat_maps.reshape(feat_maps.shape[0],np.prod(feat_maps.shape[1:]))

if PCA_dim > 0:
    pca = PCA(n_components=PCA_dim) 

# 10-fold cross-validation
print('performing cross-validation using SVM regression')
n_folds = 10

# shuffle samples
ids = np.random.permutation(feat_maps.shape[0])
feat_maps = feat_maps[ids,:]
labels_ = labels[ids]
Beispiel #41
0
    def __init__(self, rng, filter_shape, image_shape, poolsize=2, xin=None):

        assert image_shape[1] == filter_shape[1]
        self.image_shape = theano.shared(value=np.asarray(image_shape,
                                                          dtype='int16'),
                                         borrow=True)
        self.poolsize = (poolsize, poolsize)
        #self.input = input
        if xin:
            self.x = xin
        else:
            self.x = T.matrix(name='input')
        self.x1 = self.x.reshape(self.image_shape, ndim=4)
        self.filter_shape = filter_shape

        # there are "num input feature maps * filter height * filter width"
        # inputs to each hidden unit
        fan_in = np.prod(filter_shape[1:])
        # each unit in the lower layer receives a gradient from:
        # "num output feature maps * filter height * filter width" /
        #   pooling size
        fan_out = (filter_shape[0] * np.prod(filter_shape[2:]) /
                   np.prod(self.poolsize))
        # initialize weights with random weights
        W_bound = np.sqrt(6. / (fan_in + fan_out))
        self.W = theano.shared(np.asarray(rng.uniform(low=-W_bound,
                                                      high=W_bound,
                                                      size=filter_shape),
                                          dtype=theano.config.floatX),
                               borrow=True)
        self.W_prime = self.W[:, :, ::-1, ::-1]
        self.W_prime = self.W_prime.dimshuffle(1, 0, 2, 3)
        #self.W_prime=self.W_prime[:,::-1]
        #print self.W.get_value()
        #print self.W_prime.eval()
        # the bias is a 1D tensor -- one bias per output feature map
        b_values = np.zeros((filter_shape[0], ), dtype=theano.config.floatX)
        bp_values = np.zeros((filter_shape[1], ), dtype=theano.config.floatX)
        self.b = theano.shared(value=b_values, borrow=True)
        self.b_prime = theano.shared(value=bp_values, borrow=True)

        if poolsize < -1:
            self.x1 = self.x1.repeat(int(-poolsize),
                                     axis=2).repeat(int(-poolsize), axis=3)

        # convolve input feature maps with filters
        conv_out = conv2d(
            input=self.x1,
            filters=self.W,
            filter_shape=filter_shape,
            #image_shape=self.image_shape.eval(),
            border_mode='full')
        bp = (filter_shape[2] - 1) / 2

        conv_out = conv_out[:, :, bp:-bp, bp:-bp]

        # downsample each feature map individually, using maxpooling
        if poolsize > 1:
            try:
                self.pooled_out = pool.pool_2d(input=conv_out,
                                               ws=self.poolsize,
                                               ignore_border=True)
            except:

                self.pooled_out = pool.pool_2d(input=conv_out,
                                               ds=self.poolsize,
                                               ignore_border=True)
        else:
            self.pooled_out = conv_out

        self.hidden = T.maximum(
            0, (self.pooled_out + self.b.dimshuffle('x', 0, 'x', 'x')))

        # store parameters of this layer
        self.params = [self.W, self.b]
    def __init__(self, input, filter_shape, image_shape, poolsize=(2, 2)):
        """
        Allocate a NetConvPoolLayer with shared variable internal parameters.

        :type input: theano.tensor.dtensor4
        :param input: symbolic image tensor, of shape image_shape

        :type filter_shape: tuple or list of length 4
        :param filter_shape: (number of filters, num input feature maps,
                              filter height, filter width)

        :type image_shape: tuple or list of length 4
        :param image_shape: (batch size, num input feature maps,
                             image height, image width)

        :type poolsize: tuple or list of length 2
        :param poolsize: the downsampling (pooling) factor (#rows, #cols)
        """
        assert image_shape[1] == filter_shape[1]
        self.input = input

        # there are "num input feature maps * filter height * filter width"
        # inputs to each hidden unit
        fan_in = np.prod(filter_shape[1:])
        # each unit in the lower layer receives a gradient from:
        # "num output feature maps * filter height * filter width" /
        #   pooling size
        fan_out = (filter_shape[0] * np.prod(filter_shape[2:]) //
                   np.prod(poolsize))
        # initialize weights with random weights
        W_bound = np.sqrt(6. / (fan_in + fan_out))

        self.W = theano.shared(np.random.uniform(low=-W_bound,
                                                 high=W_bound,
                                                 size=filter_shape),
                               dtype=theano.config.floatX,
                               name='W')

        # the bias is a 1D tensor -- one bias per output feature map
        b_values = np.zeros((filter_shape[0], ), dtype=theano.config.floatX)
        self.b = theano.shared(value=b_values, borrow=True)

        # convolve input feature maps with filters
        self.conv_out = conv2d(input=input,
                               filters=self.W,
                               filter_shape=filter_shape,
                               image_shape=image_shape)

        # downsample each feature map individually, using maxpooling
        self.pooled_out = downsample.max_pool_2d(input=self.conv_out,
                                                 ds=poolsize,
                                                 ignore_border=True)

        # add the bias term. Since the bias is a vector (1D array), we first
        # reshape it to a tensor of shape (1, n_filters, 1, 1). Each bias will
        # thus be broadcasted across mini-batches and feature map
        # width & height
        self.output = T.tanh(self.pooled_out +
                             self.b.dimshuffle('x', 0, 'x', 'x'))

        # store parameters of this layer
        self.params = [self.W, self.b]

        # keep track of model input
        self.input = input
Beispiel #43
0
 def forward(self, X):
     conv_out = conv2d(input=X, filters=self.W)
     pooled_out = downsample.max_pool_2d(input=conv_out,
                                         ds=self.poolsz,
                                         ignore_border=True)
     return T.nnet.relu(pooled_out + self.b.dimshuffle('x', 0, 'x', 'x'))
    def __init__(self, rng, input, filter_shape, image_shape, poolsize=(2, 2),
        pool_ignore_border=True, normal=False, std_normal=2, binary=True, normalization=True, eps=1e-4):
        """
        Allocate a LeNetConvPoolLayer with shared variable internal parameters.

        :type rng: numpy.random.RandomState
        :param rng: a random number generator used to initialize weights

        :type input: theano.tensor.dtensor4
        :param input: symbolic image tensor, of shape image_shape

        :type filter_shape: tuple or list of length 4
        :param filter_shape: (number of filters, num input feature maps,
                              filter height, filter width)

        :type image_shape: tuple or list of length 4
        :param image_shape: (batch size, num input feature maps,
                             image height, image width)

        :type poolsize: tuple or list of length 2
        :param poolsize: the downsampling (pooling) factor (#rows, #cols)

        :type binary: boolean
        :param binary: use binarized weights for output or not

        :type epsilon: float
        :param epsilon: normalization variable

        :type normalization: boolean
        :param normalization: normalization output or not        
        """

        assert image_shape[1] == filter_shape[1]
        self.input = input

        # there are "num input feature maps * filter height * filter width"
        # inputs to each hidden unit
        fan_in = numpy.prod(filter_shape[1:])
        # each unit in the lower layer receives a gradient from:
        # "num output feature maps * filter height * filter width" /
        #   pooling size
        fan_out = (filter_shape[0] * numpy.prod(filter_shape[2:]) //
                   numpy.prod(poolsize))
        # initialize weights with random weights
        W_bound = numpy.sqrt(6. / (fan_in + fan_out))
        if normal:
            self.W = theano.shared(
                numpy.asarray(
                    rng.normal(loc=0, scale=std_normal, size=filter_shape),
                    dtype=theano.config.floatX
                ),
                borrow=True
            )
        else:
            self.W = theano.shared(
                numpy.asarray(
                    # rng.uniform(low=-W_bound, high=W_bound, size=filter_shape),
                    2*rng.binomial(n=1, p=.5, size=filter_shape) - 1,
                    dtype=theano.config.floatX
                ),
                borrow=True
            )

        # Generate binarized weights
        self.W_bin = self.binarize()
        # the bias is a 1D tensor -- one bias per output feature map
        b_values = numpy.zeros((filter_shape[0],), dtype=theano.config.floatX)
        self.b = theano.shared(value=b_values, borrow=True)

        # convolve input feature maps with filters
        if binary:
            conv_out = conv2d(
                input=input,
                filters=self.W_bin,
                filter_shape=filter_shape,
                image_shape=image_shape
            )            
        else:
            conv_out = conv2d(
                input=input,
                filters=self.W,
                filter_shape=filter_shape,
                image_shape=image_shape
            )

        if normalization:
            conv_out = (conv_out - T.mean(conv_out))/T.sqrt(T.var(conv_out) + eps)
        # downsample each feature map individually, using maxpooling
        pooled_out = downsample.max_pool_2d(
            input=conv_out,
            ds=poolsize,
            ignore_border=pool_ignore_border
        )

        # add the bias term. Since the bias is a vector (1D array), we first
        # reshape it to a tensor of shape (1, n_filters, 1, 1). Each bias will
        # thus be broadcasted across mini-batches and feature map
        # width & height
        self.output = T.tanh(pooled_out + self.b.dimshuffle('x', 0, 'x', 'x'))

        # store parameters of this layer
        self.params = [self.W, self.b]
        self.params_bin = [self.W_bin, self.b]

        # keep track of model input
        self.input = input
    def __init__(self,
                 rng,
                 is_train,
                 input_data,
                 filter_shape,
                 image_shape,
                 ssample=(1, 1),
                 bordermode='valid',
                 p=0.5):
        """
        Allocate a LeNetConvPoolLayer with shared variable internal parameters.

        :type rng: numpy.random.RandomState
        :param rng: a random number generator used to initialize weights

        :type input: theano.tensor.dtensor4
        :param input: symbolic image tensor, of shape image_shape

        :type filter_shape: tuple or list of length 4
        :param filter_shape: (number of filters, num input feature maps,
                              filter height, filter width)

        :type image_shape: tuple or list of length 4
        :param image_shape: (batch size, num input feature maps,
                             image height, image width)

        :type poolsize: tuple or list of length 2
        :param poolsize: the downsampling (pooling) factor (#rows, #cols)
        """

        assert image_shape[1] == filter_shape[1]

        # there are "num input feature maps * filter height * filter width"
        # inputs to each hidden unit
        fan_in = numpy.prod(filter_shape[1:])
        # each unit in the lower layer receives a gradient from:
        # "num output feature maps * filter height * filter width" /
        #   pooling size
        fan_out = (filter_shape[0] * numpy.prod(filter_shape[2:]) //
                   numpy.prod(ssample))

        # initialize weights with random weights
        W_bound = numpy.sqrt(6. / (fan_in + fan_out))
        self.W = theano.shared(numpy.asarray(rng.uniform(low=-W_bound,
                                                         high=W_bound,
                                                         size=filter_shape),
                                             dtype=theano.config.floatX),
                               borrow=True)

        # the bias is a 1D tensor -- one bias per output feature map
        b_values = numpy.zeros((filter_shape[0], ), dtype=theano.config.floatX)
        self.b = theano.shared(value=b_values, borrow=True)

        gamma_values = numpy.ones((filter_shape[0], ),
                                  dtype=theano.config.floatX)
        self.gamma = theano.shared(value=gamma_values, borrow=True)

        beta_values = numpy.zeros((filter_shape[0], ),
                                  dtype=theano.config.floatX)
        self.beta = theano.shared(value=beta_values, borrow=True)

        # convolve input feature maps with filters
        conv_out = conv2d(input=input_data,
                          filters=self.W,
                          filter_shape=filter_shape,
                          input_shape=image_shape,
                          subsample=ssample,
                          border_mode=bordermode)

        # add the bias term. Since the bias is a vector (1D array), we first
        # reshape it to a tensor of shape (1, n_filters, 1, 1). Each bias will
        # thus be broadcasted across mini-batches and feature map
        # width & height

        lin_output = conv_out + self.b.dimshuffle('x', 0, 'x', 'x')

        bn_output = batch_normalization(
            inputs=lin_output,
            gamma=self.gamma.dimshuffle('x', 0, 'x', 'x'),
            beta=self.beta.dimshuffle('x', 0, 'x', 'x'),
            mean=lin_output.mean((0, ), keepdims=True),
            std=lin_output.std((0, ), keepdims=True),
            mode='low_mem')

        activated_output = T.nnet.relu(bn_output)

        dropped_output = drop(activated_output, p)

        self.output = T.switch(T.neq(is_train, 0), dropped_output,
                               p * activated_output)

        # store parameters of this layer
        self.params = [self.W, self.b, self.gamma, self.beta]

        # keep track of model input
        self.input = input_data
    def __init__(self,
                 emb,
                 pos,
                 nc=2,
                 de=100,
                 disc_h=250,
                 fs=[3, 4, 5],
                 nf=300,
                 emb_reg=False,
                 pos_reg=False,
                 longhist=True):
        '''
            emb :: Embedding Matrix
            nh :: hidden layer size
            nc :: Number of classes
            de :: Dimensionality of word embeddings
            p_drop :: Dropout probability
        '''
        # Source Embeddings
        self.emb = theano.shared(name='Words', value=emb.astype('float32'))
        self.target_emb = theano.shared(name='Wordst',
                                        value=emb.astype('float32'))
        self.avg_emb = theano.shared(name='Wordsa',
                                     value=emb.astype('float32'))

        self.pos = theano.shared(name='Pos', value=pos.astype('float32'))
        self.target_pos = theano.shared(name='Post',
                                        value=pos.astype('float32'))
        self.avg_pos = theano.shared(name='Posta', value=pos.astype('float32'))
        # Targt Embeddings
        # Source Output Weights
        self.w_o = theano.shared(name='w_o',
                                 value=he_normal(
                                     (nf * len(fs), 1)).astype('float32'))
        self.b_o = theano.shared(name='b_o',
                                 value=np.zeros((1, )).astype('float32'))

        # Discriminator Weights
        self.w_h_1 = theano.shared(name='w_h_1',
                                   value=he_normal((nf * len(fs),
                                                    disc_h)).astype('float32'))
        self.b_h_1 = theano.shared(name='b_h_1',
                                   value=np.zeros(
                                       (disc_h, )).astype('float32'))
        self.w_h_2 = theano.shared(name='w_h_2',
                                   value=he_normal(
                                       (disc_h, disc_h)).astype('float32'))
        self.b_h_2 = theano.shared(name='b_h_2',
                                   value=np.zeros(
                                       (disc_h, )).astype('float32'))
        self.w_adv = theano.shared(name='w_adv',
                                   value=he_normal(
                                       (disc_h, 1)).astype('float32'))
        self.b_adv = theano.shared(name='b_adv',
                                   value=np.zeros((1, )).astype('float32'))

        # Update these parameters
        self.params_source = [self.w_o, self.b_o, self.emb, self.pos]
        self.params_target = [self.target_emb, self.target_pos]

        #self.params_discriminator = [self.w_h_1, self.b_h_1, self.w_h_3, self.b_h_3,
        self.params_discriminator = [
            self.w_h_1, self.b_h_1, self.w_h_2, self.b_h_2, self.w_adv,
            self.b_adv
        ]

        source_idxs = T.matrix()
        target_idxs = T.matrix()
        source_e1_pos_idxs = T.matrix()
        source_e2_pos_idxs = T.matrix()
        target_e1_pos_idxs = T.matrix()
        target_e2_pos_idxs = T.matrix()
        source_Y = T.ivector()

        # get word embeddings based on indicies
        source_x_word = self.emb[T.cast(source_idxs, 'int32')]
        source_x_e1_pos = self.pos[T.cast(source_e1_pos_idxs, 'int32')]
        source_x_e2_pos = self.pos[T.cast(source_e2_pos_idxs, 'int32')]
        source_x_word = T.concatenate(
            [source_x_word, source_x_e1_pos, source_x_e2_pos], axis=2)
        mask = T.neq(source_idxs, 0) * 1
        source_x_word = source_x_word * mask.dimshuffle(0, 1, 'x')
        source_x_word = source_x_word.reshape(
            (source_x_word.shape[0], 1, source_x_word.shape[1],
             source_x_word.shape[2]))

        target_x_word = self.target_emb[T.cast(target_idxs, 'int32')]
        target_x_e1_pos = self.target_pos[T.cast(target_e1_pos_idxs, 'int32')]
        target_x_e2_pos = self.target_pos[T.cast(target_e2_pos_idxs, 'int32')]
        target_x_word = T.concatenate(
            [target_x_word, target_x_e1_pos, target_x_e2_pos], axis=2)
        mask2 = T.neq(target_idxs, 0) * 1
        target_x_word = target_x_word * mask2.dimshuffle(0, 1, 'x')
        target_x_word = target_x_word.reshape(
            (target_x_word.shape[0], 1, target_x_word.shape[1],
             target_x_word.shape[2]))

        de = de + 2 * pos.shape[1]

        source_cnn_w, source_cnn_b = cnn_weights(de, fs, nf)
        target_cnn_w, target_cnn_b = cnn_weights(de, fs, nf)
        avg_cnn_w, avg_cnn_b = cnn_weights(de, fs, nf)

        self.params_source += source_cnn_w + source_cnn_b
        self.params_target += target_cnn_w + target_cnn_b
        self.params_avg = avg_cnn_w + avg_cnn_b

        dropout_switch = T.scalar()
        real_attention = T.scalar()

        source_l1_w_all = []
        for w, b, width in zip(source_cnn_w, source_cnn_b, fs):
            l1_w = conv2d(source_x_word,
                          w,
                          image_shape=(None, 1, None, de),
                          filter_shape=(nf, 1, width, de))
            l1_w = rectify(l1_w + b.dimshuffle('x', 0, 'x', 'x'))
            l1_w = T.max(l1_w, axis=2).flatten(2)
            source_l1_w_all.append(l1_w)

        target_l1_w_all = []
        for w, b, width in zip(target_cnn_w, target_cnn_b, fs):
            l1_w = conv2d(target_x_word,
                          w,
                          image_shape=(None, 1, None, de),
                          filter_shape=(nf, 1, width, de))
            l1_w = rectify(l1_w + b.dimshuffle('x', 0, 'x', 'x'))
            l1_w = T.max(l1_w, axis=2).flatten(2)
            target_l1_w_all.append(l1_w)

        source_h = T.concatenate(source_l1_w_all, axis=1)
        source_h = dropout(source_h, dropout_switch, 0.5)

        target_h = T.concatenate(target_l1_w_all, axis=1)
        target_h = dropout(target_h, dropout_switch, 0.5)

        pyx_source = T.nnet.nnet.sigmoid(
            T.dot(source_h, self.w_o) + self.b_o.dimshuffle('x', 0))
        pyx_source = T.clip(pyx_source, 1e-5, 1 - 1e-5)

        self.step = theano.shared(np.float32(0))

        source_h2 = rectify(T.dot(source_h, self.w_h_1) + self.b_h_1)
        source_h2 = dropout(source_h2, dropout_switch, 0.5)
        source_h2 = rectify(T.dot(source_h2, self.w_h_2) + self.b_h_2)
        source_h2 = dropout(source_h2, dropout_switch, 0.5)

        target_h2 = rectify(T.dot(target_h, self.w_h_1) + self.b_h_1)
        target_h2 = dropout(target_h2, dropout_switch, 0.5)
        target_h2 = rectify(T.dot(target_h2, self.w_h_2) + self.b_h_2)
        target_h2 = dropout(target_h2, dropout_switch, 0.5)

        pyx_adv_source = T.nnet.nnet.sigmoid(
            T.dot(source_h2, self.w_adv) + self.b_adv.dimshuffle('x', 0))
        pyx_adv_source = T.clip(pyx_adv_source, 1e-5, 1 - 1e-5)

        pyx_adv_target = T.nnet.nnet.sigmoid(
            T.dot(target_h2, self.w_adv) + self.b_adv.dimshuffle('x', 0))
        pyx_adv_target = T.clip(pyx_adv_target, 1e-5, 1 - 1e-5)

        pyx_test = T.nnet.nnet.sigmoid(
            T.dot(target_h, self.w_o) + self.b_o.dimshuffle('x', 0))

        # Generator Loss
        #L_adv_generator = -.9*T.log(pyx_adv_target).mean() - .1*T.log(1.-pyx_adv_target).mean()
        L_adv_generator = -T.log(pyx_adv_target).mean()
        num_updates = theano.shared(as_floatX(1.).astype("float32"))
        if emb_reg:
            L_adv_generator += .5 * ((self.avg_emb - self.target_emb)**2).sum()
        if pos_reg:
            L_adv_generator += .5 * ((self.avg_pos - self.target_pos)**2).sum()
        if True:
            L_adv_generator += .5 * sum(
                [((s - t)**2).sum()
                 for s, t in zip(self.params_avg, self.params_target[2:])])

        updates_generator, _ = Adam(L_adv_generator,
                                    self.params_target,
                                    lr2=0.0002)
        if not longhist:
            updates_generator.append(
                (self.avg_emb, 0.9 * self.avg_emb + 0.1 * self.target_emb))
            updates_generator.append(
                (self.avg_pos, 0.9 * self.avg_pos + 0.1 * self.target_pos))
            for p, t in zip(self.params_avg, self.params_target[2:]):
                updates_generator.append((p, 0.9 * p + 0.1 * t))
        else:
            updates_generator.append(
                (self.avg_emb, self.avg_emb + self.target_emb))
            updates_generator.append(
                (self.avg_pos, self.avg_pos + self.target_pos))
            updates_generator.append((num_updates, num_updates + 1.))

        self.train_batch_generator = theano.function([target_idxs, target_e1_pos_idxs, target_e2_pos_idxs,\
            dropout_switch],
            L_adv_generator, updates=updates_generator, allow_input_downcast=True, on_unused_input='ignore')

        L_adv_discriminator = -T.log(1 - pyx_adv_target).mean() - (
            (srng2.uniform(low=0.7, high=1., size=pyx_adv_target.shape)) *
            T.log(pyx_adv_source)).mean()
        #L_adv_discriminator = -0.9*T.log(1.-pyx_adv_target).mean() - .9*T.log(pyx_adv_source).mean() - .1*T.log(pyx_adv_target).mean() - .1*T.log(1.-pyx_adv_source).mean()
        #L_adv_discriminator = -T.log(1.-pyx_adv_target).mean() - T.log(pyx_adv_source).mean()
        #L_adv_discriminator += 1e-2*sum([(x**2).sum() for x in self.params_discriminator])

        updates_discriminator, self.disc_lr = Adam(L_adv_discriminator,
                                                   self.params_discriminator,
                                                   lr2=0.0002)

        #L_source = T.nnet.binary_crossentropy(pyx_source.flatten(), source_Y).mean() + 1e-4 * sum([(x**2).sum() for x in self.params_source])
        L_source = T.nnet.binary_crossentropy(
            pyx_source.flatten(),
            source_Y).mean() + 1e-4 * sum([(x**2).sum()
                                           for x in self.params_source])
        updates_source, _ = Adam(L_source, self.params_source, lr2=0.001)

        self.train_batch_source = theano.function([source_idxs, source_e1_pos_idxs, source_e2_pos_idxs, source_Y,\
            dropout_switch],
             L_source, updates=updates_source, allow_input_downcast=True, on_unused_input='ignore')

        self.train_batch_discriminator = theano.function([target_idxs, source_idxs, target_e1_pos_idxs, target_e2_pos_idxs,\
            source_e1_pos_idxs, source_e2_pos_idxs, dropout_switch],
            L_adv_discriminator, updates=updates_discriminator, allow_input_downcast=True, on_unused_input='ignore')

        self.features = theano.function([target_idxs, target_e1_pos_idxs, target_e2_pos_idxs, dropout_switch],\
                target_h, allow_input_downcast=True, on_unused_input='ignore')

        self.predict_proba = theano.function([target_idxs, target_e1_pos_idxs, target_e2_pos_idxs, dropout_switch],\
                pyx_test.flatten(), allow_input_downcast=True, on_unused_input='ignore')
        self.predict_src_proba = theano.function([source_idxs, source_e1_pos_idxs, source_e2_pos_idxs, dropout_switch],\
                pyx_source.flatten(), allow_input_downcast=True, on_unused_input='ignore')
Beispiel #47
0
    def __init__(self,
                 rng,
                 inputVar,
                 cfgParams,
                 copyLayer=None,
                 layerNum=None):
        """
        Allocate a LeNetConvPoolLayer with shared variable internal parameters.

        :type rng: numpy.random.RandomState
        :param rng: a random number generator used to initialize weights

        :type inputVar: theano.tensor.dtensor4
        :param inputVar: symbolic image tensor, of shape image_shape

        :type cfgParams: ConvPoolLayerParams
        """
        import theano
        import theano.tensor as T
        from theano.tensor.signal.pool import pool_2d
        from theano.tensor.nnet import conv2d

        super(ConvPoolLayer, self).__init__(rng)

        assert isinstance(cfgParams, ConvPoolLayerParams)

        floatX = theano.config.floatX  # @UndefinedVariable

        filter_shape = cfgParams.filter_shape
        image_shape = cfgParams.image_shape
        filter_stride = cfgParams.stride
        poolsize = cfgParams.poolsize
        poolType = cfgParams.poolType
        activation = cfgParams.activation
        inputDim = cfgParams.inputDim
        border_mode = cfgParams.border_mode

        self.cfgParams = cfgParams
        self.layerNum = layerNum

        assert image_shape[1] == filter_shape[1]
        self.inputVar = inputVar

        # there are "num inputVar feature maps * filter height * filter width"
        # inputs to each hidden unit
        fan_in = numpy.prod(filter_shape[1:])
        # each unit in the lower layer receives a gradient from:
        # "num output feature maps * filter height * filter width" / pooling size
        fan_out = (filter_shape[0] * numpy.prod(filter_shape[2:]) /
                   numpy.prod(filter_stride) / numpy.prod(poolsize))

        if not (copyLayer is None):
            self.W = copyLayer.W
        else:
            wInitVals = self.getInitVals(filter_shape,
                                         'conv',
                                         act_fn=cfgParams.activation_str,
                                         orthogonal=False,
                                         method=cfgParams._init_method)
            self.W = theano.shared(wInitVals,
                                   borrow=True,
                                   name='convW{}'.format(layerNum))

        # the bias is a 1D tensor -- one bias per output feature map
        if self.cfgParams.hasBias is True:
            if not (copyLayer is None):
                self.b = copyLayer.b
            else:
                b_values = numpy.zeros((filter_shape[0], ), dtype=floatX)
                self.b = theano.shared(value=b_values,
                                       borrow=True,
                                       name='convB{}'.format(layerNum))

        if border_mode == 'same':
            # convolve inputVar feature maps with filters
            conv_out = conv2d(input=inputVar,
                              filters=self.W,
                              filter_shape=filter_shape,
                              input_shape=image_shape,
                              subsample=filter_stride,
                              border_mode='full')

            # perform full convolution and crop output of input size
            offset_2 = filter_shape[2] // 2
            offset_3 = filter_shape[3] // 2
            conv_out = conv_out[:, :, offset_2:offset_2 + image_shape[2],
                                offset_3:offset_3 + image_shape[3]]
        else:
            # convolve inputVar feature maps with filters
            conv_out = conv2d(input=inputVar,
                              filters=self.W,
                              filter_shape=filter_shape,
                              input_shape=image_shape,
                              subsample=filter_stride,
                              border_mode=border_mode)

        # downsample each feature map individually, using maxpooling
        if poolType == 0:
            # use maxpooling
            pooled_out = pool_2d(input=conv_out,
                                 ds=poolsize,
                                 ignore_border=True,
                                 mode='max')
        elif poolType == 1:
            # use average pooling
            pooled_out = pool_2d(input=conv_out,
                                 ds=poolsize,
                                 ignore_border=True,
                                 mode='average_inc_pad')
        elif poolType == 3:
            # use subsampling and ignore border
            pooled_out = conv_out[:, :, :(inputDim[2] // poolsize[0]) *
                                  poolsize[0], :(inputDim[3] // poolsize[1]) *
                                  poolsize[1]][:, :, ::2, ::2]
        elif poolType == -1:
            # no pooling at all
            pooled_out = conv_out
        else:
            raise NotImplementedError()

        # add the bias term. Since the bias is a vector (1D array), we first reshape it to a tensor of shape
        # (1,n_filters,1,1). Each bias will thus be broadcasted across mini-batches and feature map width & height
        if self.cfgParams.hasBias is True:
            lin_output = pooled_out + self.b.dimshuffle('x', 0, 'x', 'x')
        else:
            lin_output = pooled_out
        self.output_pre_act = lin_output
        self.output = (lin_output
                       if activation is None else activation(lin_output))

        self.output.name = 'output_layer_{}'.format(self.layerNum)

        # store parameters of this layer
        self.params = [self.W, self.b] if self.cfgParams.hasBias else [self.W]
        self.weights = [self.W]
Beispiel #48
0
    def __init__(self,
                 rng,
                 inputVar,
                 cfgParams,
                 copyLayer=None,
                 layerNum=None):
        """
        Allocate a LeNetConvPoolLayer with shared variable internal parameters.

        :type rng: numpy.random.RandomState
        :param rng: a random number generator used to initialize weights

        :type inputVar: theano.tensor.dtensor4
        :param inputVar: symbolic image tensor, of shape image_shape

        :type cfgParams: ConvPoolLayerParams
        """
        import theano
        import theano.tensor as T
        from theano.tensor.nnet import conv2d

        super(ConvLayer, self).__init__(rng)

        assert isinstance(cfgParams, ConvLayerParams)

        floatX = theano.config.floatX  # @UndefinedVariable

        filter_shape = cfgParams.filter_shape
        image_shape = cfgParams.image_shape
        filter_stride = cfgParams.stride
        activation = cfgParams.activation
        inputDim = cfgParams.inputDim
        border_mode = cfgParams.border_mode

        self.cfgParams = cfgParams
        self.layerNum = layerNum

        assert image_shape[1] == filter_shape[1]
        self.inputVar = inputVar

        if not (copyLayer is None):
            self.W = copyLayer.W
        else:
            wInitVals = self.getInitVals(filter_shape,
                                         'conv',
                                         act_fn=cfgParams.activation_str,
                                         orthogonal=False,
                                         method=cfgParams._init_method)
            self.W = theano.shared(wInitVals,
                                   borrow=True,
                                   name='convW{}'.format(layerNum))

        # the bias is a 1D tensor -- one bias per output feature map
        if self.cfgParams.hasBias is True:
            if not (copyLayer is None):
                self.b = copyLayer.b
            else:
                b_values = numpy.zeros((filter_shape[0], ), dtype=floatX)
                self.b = theano.shared(value=b_values,
                                       borrow=True,
                                       name='convB{}'.format(layerNum))

        # convolve inputVar feature maps with filters
        conv_out = conv2d(input=inputVar,
                          filters=self.W,
                          filter_shape=filter_shape,
                          input_shape=image_shape,
                          subsample=filter_stride,
                          border_mode=border_mode)

        # add the bias term. Since the bias is a vector (1D array), we first reshape it to a tensor of shape
        # (1,n_filters,1,1). Each bias will thus be broadcasted across mini-batches and feature map width & height
        if self.cfgParams.hasBias is True:
            lin_output = conv_out + self.b.dimshuffle('x', 0, 'x', 'x')
        else:
            lin_output = conv_out
        self.output_pre_act = lin_output
        self.output = (lin_output
                       if activation is None else activation(lin_output))

        self.output.name = 'output_layer_{}'.format(self.layerNum)

        # store parameters of this layer
        self.params = [self.W, self.b] if self.cfgParams.hasBias else [self.W]
        self.weights = [self.W]
                                            size=w_shp),
                                dtype=input.dtype),
                  name='W')

# initialize shared variable for bias (1D tensor) with random values
# IMPORTANT: biases are usually initialized to zero. However in this
# particular application, we simply apply the convolutional layer to
# an image without learning the parameters. We therefore initialize
# them to random values to "simulate" learning.
b_shp = (2, )
b = theano.shared(numpy.asarray(rng.uniform(low=-.5, high=.5, size=b_shp),
                                dtype=input.dtype),
                  name='b')

# build symbolic expression that computes the convolution of input with filters in w
conv_out = conv2d(input, W)

# build symbolic expression to add bias and apply activation function, i.e. produce neural net layer output
# A few words on ``dimshuffle`` :
#   ``dimshuffle`` is a powerful tool in reshaping a tensor;
#   what it allows you to do is to shuffle dimension around
#   but also to insert new ones along which the tensor will be
#   broadcastable;
#   dimshuffle('x', 2, 'x', 0, 1)
#   This will work on 3d tensors with no broadcastable
#   dimensions. The first dimension will be broadcastable,
#   then we will have the third dimension of the input tensor as
#   the second of the resulting tensor, etc. If the tensor has
#   shape (20, 30, 40), the resulting tensor will have dimensions
#   (1, 40, 1, 20, 30). (AxBxC tensor is mapped to 1xCx1xAxB tensor)
#   More examples:
Beispiel #50
0
    def __init__(self,
                 rng,
                 input,
                 filter_shape,
                 p1,
                 p2,
                 image_shape,
                 poolsize=(2, 2),
                 W=None,
                 b=None):
        """
        Allocate a LeNetConvPoolLayer with shared variable internal parameters.

        :type rng: numpy.random.RandomState
        :param rng: a random number generator used to initialize weights

        :type input: theano.tensor.dtensor4
        :param input: symbolic image tensor, of shape image_shape

        :type filter_shape: tuple or list of length 4
        :param filter_shape: (number of filters, num input feature maps,
                              filter height, filter width)

        :type p1: int
        :param p1: padding for input image in x direction

        :type p2: int
        :param p2: padding for input image in y direction

        :type image_shape: tuple or list of length 4
        :param image_shape: (batch size, num input feature maps,
                             image height, image width)

        :type poolsize: tuple or list of length 2
        :param poolsize: the downsampling (pooling) factor (#rows, #cols)
        """
        assert image_shape[1] == filter_shape[1]
        self.input = input

        # there are "num input feature maps * filter height * filter width"
        # inputs to each hidden unit
        fan_in = numpy.prod(filter_shape[1:])

        # each unit in the lower layer receives a gradient from:
        # "num output feature maps * filter height * filter width" /
        #   pooling size
        fan_out = (filter_shape[0] * numpy.prod(filter_shape[2:]) //
                   numpy.prod(poolsize))

        if (W is None):
            # initialize weights with random weights
            W_bound = numpy.sqrt(6. / (fan_in + fan_out))
            self.W = theano.shared(numpy.asarray(rng.uniform(
                low=-W_bound, high=W_bound, size=filter_shape),
                                                 dtype=theano.config.floatX),
                                   borrow=True)
        else:
            self.W = theano.shared(W)

        if (b is None):
            # the bias is a 1D tensor -- one bias per output feature map
            b_values = numpy.zeros((filter_shape[0], ),
                                   dtype=theano.config.floatX)
            self.b = theano.shared(value=b_values, borrow=True)
        else:
            self.b = theano.shared(b)

        # convolve input feature maps with filters
        conv_out = conv2d(input=input,
                          filters=self.W,
                          filter_shape=filter_shape,
                          border_mode=(p1, p2),
                          input_shape=image_shape,
                          subsample=(1, 1))

        # pool each feature map individually, using maxpooling
        pooled_out = pool.pool_2d(input=conv_out,
                                  ws=poolsize,
                                  ignore_border=True)

        # add the bias term. Since the bias is a vector (1D array), we first
        # reshape it to a tensor of shape (1, n_filters, 1, 1). Each bias will
        # thus be broadcasted across mini-batches and feature map
        # width & height
        self.output = T.nnet.relu(pooled_out +
                                  self.b.dimshuffle('x', 0, 'x', 'x'))

        # store parameters of this layer
        self.params = [self.W, self.b]

        # keep track of model input
        self.input = input
Beispiel #51
0
    def __init__(self,
                 input,
                 filter_shape,
                 image_shape,
                 f_params_w,
                 f_params_b,
                 lrn=False,
                 t_style=None,
                 t_content=None,
                 convstride=1,
                 padsize=0,
                 group=1,
                 poolsize=3,
                 poolstride=1):

        self.input = input

        if t_style is not None:
            self.t_style = np.asarray(np.load(t_style),
                                      dtype=theano.config.floatX)

        if t_content is not None:
            self.t_content = np.asarray(np.load(t_content),
                                        dtype=theano.config.floatX)

        if lrn is True:
            self.lrn_func = CrossChannelNormalization()

        if group == 1:
            self.W = theano.shared(np.asarray(np.transpose(
                np.load(os.path.join(params_path, f_params_w)), (3, 0, 1, 2)),
                                              dtype=theano.config.floatX),
                                   borrow=True)
            self.b = theano.shared(np.asarray(np.load(
                os.path.join(params_path, f_params_b)),
                                              dtype=theano.config.floatX),
                                   borrow=True)
            conv_out = conv2d(input=self.input,
                              filters=self.W,
                              filter_shape=filter_shape,
                              border_mode=padsize,
                              subsample=(convstride, convstride),
                              filter_flip=True)

        elif group == 2:
            self.filter_shape = np.asarray(filter_shape)
            self.image_shape = np.asarray(image_shape)
            self.filter_shape[0] = self.filter_shape[0] / 2
            self.filter_shape[1] = self.filter_shape[1] / 2
            self.image_shape[1] = self.image_shape[1] / 2
            self.W0 = theano.shared(np.asarray(np.transpose(
                np.load(os.path.join(params_path, f_params_w[0])),
                (3, 0, 1, 2)),
                                               dtype=theano.config.floatX),
                                    borrow=True)
            self.W1 = theano.shared(np.asarray(np.transpose(
                np.load(os.path.join(params_path, f_params_w[1])),
                (3, 0, 1, 2)),
                                               dtype=theano.config.floatX),
                                    borrow=True)
            self.b0 = theano.shared(np.asarray(np.load(
                os.path.join(params_path, f_params_b[0])),
                                               dtype=theano.config.floatX),
                                    borrow=True)
            self.b1 = theano.shared(np.asarray(np.load(
                os.path.join(params_path, f_params_b[1])),
                                               dtype=theano.config.floatX),
                                    borrow=True)
            conv_out0 = conv2d(input=self.input[:, :self.image_shape[1], :, :],
                               filters=self.W0,
                               filter_shape=tuple(self.filter_shape),
                               border_mode=padsize,
                               subsample=(convstride, convstride),
                               filter_flip=True) + self.b0.dimshuffle(
                                   'x', 0, 'x', 'x')
            conv_out1 = conv2d(input=self.input[:, self.image_shape[1]:, :, :],
                               filters=self.W1,
                               filter_shape=tuple(self.filter_shape),
                               border_mode=padsize,
                               subsample=(convstride, convstride),
                               filter_flip=True) + self.b1.dimshuffle(
                                   'x', 0, 'x', 'x')
            conv_out = T.concatenate([conv_out0, conv_out1], axis=1)

        else:
            raise AssertionError()

        relu_out = T.maximum(conv_out, 0)
        if poolsize != 1:
            self.output = pool.pool_2d(input=relu_out,
                                       ds=(poolsize, poolsize),
                                       ignore_border=True,
                                       st=(poolstride, poolstride),
                                       mode='average_exc_pad')
        else:
            self.output = relu_out

        if lrn is True:
            self.output = self.lrn_func(self.output)
Beispiel #52
0
def ResidualUnit(name,
                 left_b,
                 right_b,
                 load_params,
                 cur_dim,
                 down_dim,
                 up_dim,
                 stride=(1, 1),
                 left_convolve=False):
    # param key names
    left_res = 'res%s_branch1' % (name)
    left_bn = 'bn%s_branch1' % (name)

    right_res_a = 'res%s_branch2a' % (name)
    right_bn_a = 'bn%s_branch2a' % (name)
    right_res_b = 'res%s_branch2b' % (name)
    right_bn_b = 'bn%s_branch2b' % (name)
    right_res_c = 'res%s_branch2c' % (name)
    right_bn_c = 'bn%s_branch2c' % (name)

    # init theano weights
    # NOTE: the np.loaded params do not have any bias vectors
    #     except for the final dense layer, so no bias additions here
    params = []
    if left_convolve:
        left_res_W = theano.shared(value=np.transpose(
            load_params[left_res + '/W'], (3, 2, 0, 1)),
                                   borrow=True,
                                   name=left_res + '_W')
        left_bn_beta = theano.shared(load_params[left_bn + '/beta'],
                                     borrow=True,
                                     name=left_bn + '_beta')
        left_bn_gamma = theano.shared(load_params[left_bn + '/gamma'],
                                      borrow=True,
                                      name=left_bn + '_gamma')
        left_bn_mean = theano.shared(load_params[left_bn + '/mean/EMA'],
                                     borrow=True,
                                     name=left_bn + '_mean')
        left_bn_std = theano.shared(np.sqrt(load_params[left_bn +
                                                        '/variance/EMA']),
                                    borrow=True,
                                    name=left_bn + '_std-dev')
        params += [
            left_res_W, left_bn_gamma, left_bn_beta, left_bn_mean, left_bn_std
        ]

    right_res_a_W = theano.shared(value=np.transpose(
        load_params[right_res_a + '/W'], (3, 2, 0, 1)),
                                  borrow=True,
                                  name=right_res_a + '_W')
    right_bn_a_beta = theano.shared(load_params[right_bn_a + '/beta'],
                                    borrow=True,
                                    name=right_bn_a + '_beta')
    right_bn_a_gamma = theano.shared(load_params[right_bn_a + '/gamma'],
                                     borrow=True,
                                     name=right_bn_a + '_gamma')
    right_bn_a_mean = theano.shared(load_params[right_bn_a + '/mean/EMA'],
                                    borrow=True,
                                    name=right_bn_a + '_mean')
    right_bn_a_std = theano.shared(np.sqrt(load_params[right_bn_a +
                                                       '/variance/EMA']),
                                   borrow=True,
                                   name=right_bn_a + '_std-dev')

    right_res_b_W = theano.shared(value=np.transpose(
        load_params[right_res_b + '/W'], (3, 2, 0, 1)),
                                  borrow=True,
                                  name=right_res_b + '_W')
    right_bn_b_beta = theano.shared(load_params[right_bn_b + '/beta'],
                                    borrow=True,
                                    name=right_bn_b + '_beta')
    right_bn_b_gamma = theano.shared(load_params[right_bn_b + '/gamma'],
                                     borrow=True,
                                     name=right_bn_b + '_gamma')
    right_bn_b_mean = theano.shared(load_params[right_bn_b + '/mean/EMA'],
                                    borrow=True,
                                    name=right_bn_b + '_mean')
    right_bn_b_std = theano.shared(np.sqrt(load_params[right_bn_b +
                                                       '/variance/EMA']),
                                   borrow=True,
                                   name=right_bn_b + '_std-dev')

    right_res_c_W = theano.shared(value=np.transpose(
        load_params[right_res_c + '/W'], (3, 2, 0, 1)),
                                  borrow=True,
                                  name=right_res_c + '_W')
    right_bn_c_beta = theano.shared(load_params[right_bn_c + '/beta'],
                                    borrow=True,
                                    name=right_bn_c + '_beta')
    right_bn_c_gamma = theano.shared(load_params[right_bn_c + '/gamma'],
                                     borrow=True,
                                     name=right_bn_c + '_gamma')
    right_bn_c_mean = theano.shared(load_params[right_bn_c + '/mean/EMA'],
                                    borrow=True,
                                    name=right_bn_c + '_mean')
    right_bn_c_std = theano.shared(np.sqrt(load_params[right_bn_c +
                                                       '/variance/EMA']),
                                   borrow=True,
                                   name=right_bn_c + '_std-dev')

    params += [
        right_res_a_W, right_bn_a_gamma, right_bn_a_beta, right_bn_a_mean,
        right_bn_a_std, right_res_b_W, right_bn_b_gamma, right_bn_b_beta,
        right_bn_b_mean, right_bn_b_std, right_res_c_W, right_bn_c_gamma,
        right_bn_c_beta, right_bn_c_mean, right_bn_c_std
    ]

    # make tensor graph
    if left_convolve:
        left_conv_out = conv2d(input=left_b,
                               filters=left_res_W,
                               filter_shape=(up_dim, cur_dim, 1, 1),
                               subsample=stride)
        left_out = BN(inputs=left_conv_out.dimshuffle(0, 2, 3, 1),
                      gamma=left_bn_gamma,
                      beta=left_bn_beta,
                      mean=left_bn_mean,
                      std=left_bn_std).dimshuffle(0, 3, 1, 2)
    else:
        left_out = left_b

    right_conv_a_out = conv2d(input=right_b,
                              filters=right_res_a_W,
                              filter_shape=(down_dim, cur_dim, 1, 1),
                              subsample=stride)
    right_a_out = T.nnet.relu(
        BN(inputs=right_conv_a_out.dimshuffle(0, 2, 3, 1),
           gamma=right_bn_a_gamma,
           beta=right_bn_a_beta,
           mean=right_bn_a_mean,
           std=right_bn_a_std).dimshuffle(0, 3, 1, 2))
    right_conv_b_out = conv2d(input=right_a_out,
                              filters=right_res_b_W,
                              filter_shape=(down_dim, down_dim, 3, 3),
                              border_mode=(1, 1))
    right_b_out = T.nnet.relu(
        BN(inputs=right_conv_b_out.dimshuffle(0, 2, 3, 1),
           gamma=right_bn_b_gamma,
           beta=right_bn_b_beta,
           mean=right_bn_b_mean,
           std=right_bn_b_std).dimshuffle(0, 3, 1, 2))
    right_c_conv_out = conv2d(input=right_b_out,
                              filters=right_res_c_W,
                              filter_shape=(up_dim, down_dim, 1, 1))
    right_out = BN(inputs=right_c_conv_out.dimshuffle(0, 2, 3, 1),
                   gamma=right_bn_c_gamma,
                   beta=right_bn_c_beta,
                   mean=right_bn_c_mean,
                   std=right_bn_c_std).dimshuffle(0, 3, 1, 2)

    output = T.nnet.relu(left_out + right_out)

    return output, params, 4 * down_dim
Beispiel #53
0
    def __init__(self,
                 rng,
                 input,
                 filter_shape,
                 image_shape,
                 poolsize=(2, 2),
                 needPool=True):
        """
        根据传入的参数初始化卷基层
        :tpye rng: numpy.random.RandomState
        :param rng: 用于初始化权值的随机种子

        :tpye input: theano.tensor.dtensor4
        :param input: 用于表征image_shape的形状

        :type filter_shape: 长度为4的tuple或者list
        :param filter_shape: (number_of_filters, number of input feature maps,
                                filter_height, filter_width)
        :type image_shape: 长度为4的tuple或者list
        :param image_shape:(batch_size, num of input feature maps,
                            image height, image width)

        :type poolsize: 长度为2的tuple或者list
        :param poolsize: (x,y),x,y分别表示下采样的规模
        """

        assert image_shape[1] == filter_shape[1]
        self.input = input

        fan_in = numpy.prod(filter_shape[1:])
        #fan_out的值为:filter的个数 × filter的宽 × filter的高 ÷
        #    下采样核心的宽 × 下采样核心的高
        fan_out = (filter_shape[0] * numpy.prod(filter_shape[2:]) //
                   numpy.prod(poolsize))

        #用随机种子初始化权值参数--Weights矩阵
        W_bound = numpy.sqrt(6. / (fan_in + fan_out))
        self.W = theano.shared(numpy.asarray(rng.uniform(low=-W_bound,
                                                         high=W_bound,
                                                         size=filter_shape),
                                             dtype=theano.config.floatX),
                               borrow=True)

        #将全部bias赋值为0
        b_values = numpy.zeros((filter_shape[0], ), dtype=theano.config.floatX)
        self.b = theano.shared(value=b_values, borrow=True)

        #调用theano的conv2d函数,对输入数据结构进行卷积构造
        conv_out = conv2d(input=input,
                          filters=self.W,
                          filter_shape=filter_shape,
                          input_shape=image_shape)

        #       if needPool == True:
        #进行下采样操作
        pooled_out = downsample.max_pool_2d(input=conv_out,
                                            ds=poolsize,
                                            ignore_border=True)

        self.output = T.tanh(pooled_out + self.b.dimshuffle('x', 0, 'x', 'x'))

        self.params = [self.W, self.b]

        self.input = input
Beispiel #54
0
def deconv(X, w, s=2, b=None):
    z = conv2d(X, w, border_mode='full')[:, :, s:-s, s:-s]
    if b is not None:
        z += b.dimshuffle('x', 0, 'x', 'x')
    return z
Beispiel #55
0
    def __init__(self,
                 rng,
                 trainmode,
                 running_average_factor,
                 input,
                 filter_shape,
                 image_shape,
                 poolsize=(2, 2),
                 name='convlayer'):
        self.input = input
        self.trainmode = trainmode
        self.running_average_factor = running_average_factor
        fan_in = np.prod(filter_shape[1:])
        fan_out = (filter_shape[0] * np.prod(filter_shape[2:]) //
                   np.prod(poolsize))
        # initialize weights with random weights
        W_bound = np.sqrt(6. / (fan_in + fan_out))
        self.W = theano.shared(np.asarray(rng.uniform(low=-W_bound,
                                                      high=W_bound,
                                                      size=filter_shape),
                                          dtype=theano.config.floatX),
                               name='W_' + name,
                               borrow=True)
        # b_values = numpy.zeros((filter_shape[0],), dtype=theano.config.floatX)
        # self.b = theano.shared(value=b_values, name='b_'+name,borrow=True)
        self.conv_out = conv2d(input=input,
                               filters=self.W,
                               filter_shape=filter_shape,
                               input_shape=image_shape)
        # pool each feature map individually, using maxpooling
        self.pooled_out = pool.pool_2d(input=self.conv_out,
                                       ws=poolsize,
                                       ignore_border=True)

        self.gamma = theano.shared(np.asarray(rng.uniform(
            low=-np.sqrt(6. / (filter_shape[0])),
            high=np.sqrt(6. / (filter_shape[0])),
            size=(filter_shape[0], )),
                                              dtype=theano.config.floatX),
                                   name='gamma_' + name,
                                   borrow=True)
        self.beta = theano.shared(np.zeros((filter_shape[0], ),
                                           dtype=theano.config.floatX),
                                  name='beta_' + name,
                                  borrow=True)

        self.new_running_mean = theano.shared(np.zeros(
            (filter_shape[0]), dtype=theano.config.floatX),
                                              name='rmean_' + name,
                                              borrow=True)
        self.new_running_var = theano.shared(np.zeros(
            (filter_shape[0]), dtype=theano.config.floatX),
                                             name='rvar_' + name,
                                             borrow=True)

        mean = T.mean(self.pooled_out,
                      axis=[0, 2, 3])  #.dimshuffle('x', 0, 'x', 'x')
        var = T.mean(T.sqr(self.pooled_out -
                           mean.dimshuffle('x', 0, 'x', 'x')),
                     axis=[0, 2, 3])  #.dimshuffle('x', 0, 'x', 'x')

        self.bnTr = (self.pooled_out - mean.dimshuffle('x', 0, 'x', 'x')) / (
            T.sqrt(var.dimshuffle('x', 0, 'x', 'x') + 0.0001))
        self.bnTe = (self.pooled_out - self.new_running_mean.dimshuffle(
            'x', 0, 'x', 'x')) / (T.sqrt(
                self.new_running_var.dimshuffle('x', 0, 'x', 'x') + 0.0001))

        self.lbnOut = T.switch(self.trainmode, self.bnTr, self.bnTe)
        self.actInput = self.gamma.dimshuffle(
            'x', 0, 'x', 'x') * self.lbnOut + self.beta.dimshuffle(
                'x', 0, 'x', 'x')
        # activation = lambda x: T.switch(T.gt(x,6), 1+T.mul(x,0.1) ,
        #                                   T.switch(T.lt(x,-6),T.mul(x,0.1),(1/12.)*x+0.5))

        self.output = T.nnet.relu(self.actInput)
        # self.output = activation(self.actInput)
        self.params = [self.W, self.gamma, self.beta]
        # self.params = [self.W, self.beta]

        self.raupdates = [
            (self.new_running_mean,
             self.new_running_mean * self.running_average_factor + mean *
             (1 - self.running_average_factor)),
            (self.new_running_var,
             self.new_running_var * self.running_average_factor + var *
             (1 - self.running_average_factor))
        ]

        self.raparams = [self.new_running_mean, self.new_running_var]
Beispiel #56
0
    def __init__(self, rng, input, filter_shape):
        """
        Allocate a LeNetConvPoolLayer with shared variable internal parameters.

        :type rng: np.random.RandomState
        :param rng: a random number generator used to initialize weights

        :type input: theano.tensor.dtensor4
        :param input: symbolic image tensor, of shape image_shape

        :type filter_shape: tuple or list of length 4
        :param filter_shape: (number of filters, num input feature maps,
                              filter height, filter width)

        :type image_shape: tuple or list of length 4
        :param image_shape: (batch size, num input feature maps,
                             image height, image width)

        :type poolsize: tuple or list of length 2
        :param poolsize: the downsampling (pooling) factor (#rows, #cols)
        """
        self.input = input

        # there are "num input feature maps * filter height * filter width"
        # inputs to each hidden unit
        W_bound = 1.0 / np.prod(filter_shape[1:])
        # each unit in the lower layer receives a gradient from:
        # "num output feature maps * filter height * filter width" /
        #   pooling size
        self.W = theano.shared(
            np.asarray(
                rng.uniform(low=-W_bound, high=W_bound, size=filter_shape),
                dtype=theano.config.floatX
            ),
            borrow=True
        )

        # the bias is a 1D tensor -- one bias per output feature map
        b_values = np.zeros((filter_shape[0],), dtype=theano.config.floatX)
        self.b = theano.shared(value=b_values, borrow=True)

        # convolve input feature maps with filters
        conv_out = conv2d(
            input=input,
            filters=self.W
        )

        # pool each feature map individually, using maxpooling
        # pooled_out = pool.pool_2d(
        #     input=conv_out,
        #     ds=poolsize,
        #     ignore_border=True
        # )

        # add the bias term. Since the bias is a vector (1D array), we first
        # reshape it to a tensor of shape (1, n_filters, 1, 1). Each bias will
        # thus be broadcasted across mini-batches and feature map
        # width & height
        self.output = T.tanh(conv_out + self.b.dimshuffle('x', 0, 'x', 'x'))

        # store parameters of this layer
        self.params = [self.W, self.b]

        # keep track of model input
        self.input = input
Beispiel #57
0
out = [out[i].flatten(2) for i in range(len(out))]
vid_ = T.concatenate(out, axis=1)
# vid_ = var_norm(vid_,axis=1)

# traject convolution
# ------------------------------------------------------------------------------

if use.trajconv:
    t_conv = t.reshape(
        (batch.micro * prod(traj_shape[1:-1]), 1, 1, traj_shape[-1]))
    t_filt_sh = (1, 1, 1, trajconv.filter_size)
    n_out = traj_shape[-1]
    for i in xrange(trajconv.layers):
        t_W.append(_shared(rng.normal(loc=0, scale=0.01, size=t_filt_sh)))
        t_conv = conv2d(t_conv,
                        filters=t_W[-1],
                        filter_shape=t_filt_sh,
                        border_mode='valid')
        n_out -= trajconv.filter_size - 1
        t_b.append(_shared(ones((n_out, ), dtype=floatX) * 0.1))
        t_conv = t_conv + t_b[-1].dimshuffle('x', 0)
        t_conv = activation(t_conv)

    conv_length = prod(traj_shape[1:-1]) * trajconv.res_shape
    t_conv = t_conv.reshape((batch.micro, conv_length))
    if trajconv.append:
        traj_ = T.concatenate([t.flatten(2), t_conv.flatten(2)], axis=1)
    else:
        traj_ = t_conv.flatten(2)
        n_in_MLP -= traj_size
    n_in_MLP += conv_length
Beispiel #58
0
    def __init__(self, input1, input2, input3, word_embeddings, batch_size,
                 sequence_len, embedding_size, filter_sizes, num_filters,
                 keep_prob):
        rng = np.random.RandomState(23455)
        self.params = []

        lookup_table = theano.shared(word_embeddings)
        self.params += [lookup_table]
        #input1-问题, input2-正向答案, input3-负向答案
        #将每个字替换成字向量
        input_matrix1 = lookup_table[T.cast(input1.flatten(), dtype="int32")]
        input_matrix2 = lookup_table[T.cast(input2.flatten(), dtype="int32")]
        input_matrix3 = lookup_table[T.cast(input3.flatten(), dtype="int32")]

        #CNN的输入是4维矩阵,这里只是增加了一个维度而已
        input_x1 = input_matrix1.reshape(
            (batch_size, 1, sequence_len, embedding_size))
        input_x2 = input_matrix2.reshape(
            (batch_size, 1, sequence_len, embedding_size))
        input_x3 = input_matrix3.reshape(
            (batch_size, 1, sequence_len, embedding_size))
        #print(input_x1.shape.eval())
        self.dbg_x1 = input_x1

        outputs_1, outputs_2, outputs_3 = [], [], []
        #设置多种大小的filter
        for filter_size in filter_sizes:
            #每种大小的filter的数量是num_filters
            filter_shape = (num_filters, 1, filter_size, embedding_size)
            image_shape = (batch_size, 1, sequence_len, embedding_size)
            fan_in = np.prod(filter_shape[1:])
            fan_out = filter_shape[0] * np.prod(filter_shape[2:])
            W_bound = np.sqrt(6. / (fan_in + fan_out))
            W = theano.shared(np.asarray(rng.uniform(low=-W_bound,
                                                     high=W_bound,
                                                     size=filter_shape),
                                         dtype=theano.config.floatX),
                              borrow=True)
            b_values = np.zeros((filter_shape[0], ),
                                dtype=theano.config.floatX)
            b = theano.shared(value=b_values, borrow=True)

            #卷积+max_pooling
            conv_out = conv2d(input=input_x1,
                              filters=W,
                              filter_shape=filter_shape,
                              input_shape=image_shape)
            #卷积后的向量的长度为ds
            pooled_out = pool.pool_2d(input=conv_out,
                                      ds=(sequence_len - filter_size + 1, 1),
                                      ignore_border=True,
                                      mode='max')
            pooled_active = T.tanh(pooled_out + b.dimshuffle('x', 0, 'x', 'x'))
            outputs_1.append(pooled_active)

            conv_out = conv2d(input=input_x2,
                              filters=W,
                              filter_shape=filter_shape,
                              input_shape=image_shape)
            pooled_out = pool.pool_2d(input=conv_out,
                                      ds=(sequence_len - filter_size + 1, 1),
                                      ignore_border=True,
                                      mode='max')
            pooled_active = T.tanh(pooled_out + b.dimshuffle('x', 0, 'x', 'x'))
            outputs_2.append(pooled_active)

            conv_out = conv2d(input=input_x3,
                              filters=W,
                              filter_shape=filter_shape,
                              input_shape=image_shape)
            pooled_out = pool.pool_2d(input=conv_out,
                                      ds=(sequence_len - filter_size + 1, 1),
                                      ignore_border=True,
                                      mode='max')
            pooled_active = T.tanh(pooled_out + b.dimshuffle('x', 0, 'x', 'x'))
            outputs_3.append(pooled_active)

            self.params += [W, b]
            self.dbg_conv_out = conv_out.shape

        num_filters_total = num_filters * len(filter_sizes)
        self.dbg_outputs_1 = outputs_1[0].shape
        #每一个句子的语义表示向量的长度为num_filters_total
        output_flat1 = T.reshape(T.concatenate(outputs_1, axis=1),
                                 [batch_size, num_filters_total])
        output_flat2 = T.reshape(T.concatenate(outputs_2, axis=1),
                                 [batch_size, num_filters_total])
        output_flat3 = T.reshape(T.concatenate(outputs_3, axis=1),
                                 [batch_size, num_filters_total])
        #dropout, keep_prob为1表示不进行dropout
        output_drop1 = self._dropout(rng, output_flat1, keep_prob)
        output_drop2 = self._dropout(rng, output_flat2, keep_prob)
        output_drop3 = self._dropout(rng, output_flat3, keep_prob)

        #计算问题和答案之前的向量夹角
        #计算向量的长度
        len1 = T.sqrt(T.sum(output_drop1 * output_drop1, axis=1))
        len2 = T.sqrt(T.sum(output_drop2 * output_drop2, axis=1))
        len3 = T.sqrt(T.sum(output_drop3 * output_drop3, axis=1))
        #计算向量之间的夹角
        cos12 = T.sum(output_drop1 * output_drop2, axis=1) / (len1 * len2)
        self.cos12 = cos12
        cos13 = T.sum(output_drop1 * output_drop3, axis=1) / (len1 * len3)
        self.cos13 = cos13

        zero = theano.shared(np.zeros(batch_size, dtype=theano.config.floatX),
                             borrow=True)
        margin = theano.shared(np.full(batch_size,
                                       0.05,
                                       dtype=theano.config.floatX),
                               borrow=True)
        #Loss损失函数
        diff = T.cast(T.maximum(zero, margin - cos12 + cos13),
                      dtype=theano.config.floatX)
        self.cost = T.sum(diff, acc_dtype=theano.config.floatX)
        #mini-batch数据的准确率(如果正向答案和问题之间的cosine大于负向答案和问题的cosine,则认为正确,
        #否则是错误的)
        #Loss和Accuracy是用来评估训练中模型时候收敛的两个很重要的指标
        self.accuracy = T.sum(T.cast(T.eq(zero, diff),
                                     dtype='int32')) / float(batch_size)
Beispiel #59
0
self.W = theano.shared(
    np.asarray(
        rng.uniform(low=-W_bound, high=W_bound, size=filter_shape),
        dtype=theano.config.floatX
    ),
    borrow=True
)

# the bias is a 1D tensor -- one bias per output feature map
b_values = np.zeros((filter_shape[0],), dtype=theano.config.floatX)
self.b = theano.shared(value=b_values, borrow=True)

# convolve input feature maps with filters
conv_out = conv2d(
    input=input,
    filters=self.W,
    filter_shape=filter_shape,
    input_shape=image_shape
)

# pool each feature map individually, using maxpooling
pooled_out = pool.pool_2d(
    input=conv_out,
    ds=poolsize,
    ignore_border=True
)

# add the bias term. Since the bias is a vector (1D array), we first
# reshape it to a tensor of shape (1, n_filters, 1, 1). Each bias will
# thus be broadcasted across mini-batches and feature map
# width & height
self.output = T.tanh(pooled_out + self.b.dimshuffle('x', 0, 'x', 'x'))
Beispiel #60
0
    def __init__(self, rng, input, filter_shape, image_shape, W=None, b=None):
        """
        Allocate a convulutional layer with shared variable internal parameters.

        :type rng: numpy.random.RandomState
        :param rng: a random number generator used to initialize weights

        :type input: theano.tensor.dtensor4
        :param input: symbolic image tensor, of shape image_shape

        :type filter_shape: tuple or list of length 4
        :param filter_shape: (number of filters, num input feature maps,
                              filter height, filter width)

        :type image_shape: tuple or list of length 4
        :param image_shape: (batch size, num input feature maps,
                             image height, image width)

        """

        assert image_shape[1] == filter_shape[1]
        self.input = input

        # there are "num input feature maps * filter height * filter width"
        # inputs to each hidden unit
        fan_in = numpy.prod(filter_shape[1:])

        # each unit in the lower layer receives a gradient from:
        # "num output feature maps * filter height * filter width" /
        #   pooling size
        fan_out = (filter_shape[0] * numpy.prod(filter_shape[2:]))

        if (W is None):
            # initialize weights with random weights
            W_bound = numpy.sqrt(6. / (fan_in + fan_out))
            self.W = theano.shared(numpy.asarray(rng.uniform(
                low=-W_bound, high=W_bound, size=filter_shape),
                                                 dtype=theano.config.floatX),
                                   borrow=True)
        else:
            self.W = theano.shared(W)

        if (b is None):
            # the bias is a 1D tensor -- one bias per output feature map
            b_values = numpy.zeros((filter_shape[0], ),
                                   dtype=theano.config.floatX)
            self.b = theano.shared(value=b_values, borrow=True)
        else:
            self.b = theano.shared(b)

        # convolve input feature maps with filters
        conv_out = conv2d(input=input,
                          filters=self.W,
                          filter_shape=filter_shape,
                          input_shape=image_shape)

        # add the bias term. Since the bias is a vector (1D array), we first
        # reshape it to a tensor of shape (1, n_filters, 1, 1). Each bias will
        # thus be broadcasted across mini-batches and feature map
        # width & height
        self.output = T.nnet.relu(conv_out +
                                  self.b.dimshuffle('x', 0, 'x', 'x'))

        # store parameters of this layer
        self.params = [self.W, self.b]

        # keep track of model input
        self.input = input

        # L2_sqr for L2 regularization
        self.L2_sqr = ((self.b**2).sum() + (self.W**2).sum())