Exemple #1
0
def test_shape():
    x = T.tensor3()
    x_flat_2_mat = T.flatten(x, 2)
    x_flat_2_vec = T.flatten(x, 1)
    flat_f = theano.function([x], [x_flat_2_mat, x_flat_2_vec])
    flat_mat_val, flat_vec_val = flat_f(tensor3_val)
    print 'flatten to 2-d array:'
    print flat_mat_val
    print 'flatten to 1-d array:'
    print flat_vec_val

    x_mat = T.matrix()
    x_mat_2_t3 = T.reshape(x_mat, (2, 2, 2))
    x_mat_2_vec = T.reshape(x_mat, (8,))
    reshape_f = theano.function([x_mat], [x_mat_2_t3, x_mat_2_vec])
    """
    t3_shape = T.lvector()
    vec_shape = T.lvector()
    x_mat_2_t3 = T.reshape(x_mat, t3_shape, 3)
    x_mat_2_vec = T.reshape(x_mat, vec_shape, 1)
    reshape_f = theano.function([x_mat, t3_shape, vec_shape], [x_mat_2_t3, x_mat_2_vec])
    """
    mat_2_t3_val, mat_2_vec_val = reshape_f(flat_mat_val)
    print 'reshape 2-d array to 3-d array:'
    print mat_2_t3_val
    print 'reshape 2-d array to 1-d array:'
    print mat_2_vec_val
        def loop(i, x, p, t):
            p_class_t = p[i, t[i]]

            return T.dot(
                T.flatten(T.grad(p_class_t, x)[i]),
                T.flatten(x[i])
            )
Exemple #3
0
def build_model(tparams, options, Wemb):
	trng = RandomStreams(123)
	use_noise = theano.shared(numpy_floatX(0.))

	x = T.matrix('x', dtype='int32')
	t = T.matrix('t', dtype=config.floatX)
	mask = T.matrix('mask', dtype=config.floatX)
	y = T.vector('y', dtype='int32')

	n_timesteps = x.shape[0]
	n_samples = x.shape[1]

	x_emb = Wemb[x.flatten()].reshape([n_timesteps,n_samples,options['embDimSize']])
	x_t_emb = T.concatenate([t.reshape([n_timesteps,n_samples,1]), x_emb], axis=2) #Adding the time element to the embedding

	proj = gru_layer(tparams, x_t_emb, options, mask=mask)
	if options['use_dropout']: proj = dropout_layer(proj, use_noise, trng)

	p_y_given_x = T.nnet.sigmoid(T.dot(proj, tparams['W_logistic']) + tparams['b_logistic'])
	L = -(y * T.flatten(T.log(p_y_given_x)) + (1 - y) * T.flatten(T.log(1 - p_y_given_x)))
	cost = T.mean(L)

	if options['L2_reg'] > 0.: cost += options['L2_reg'] * (tparams['W_logistic'] ** 2).sum()

	return use_noise, x, t, mask, y, p_y_given_x, cost
Exemple #4
0
def unet_crossentropy_loss_sampled(y_true, y_pred):
    print 'unet_crossentropy_loss_sampled'
    epsilon = 1.0e-4
    y_pred_clipped = T.flatten(T.clip(y_pred, epsilon, 1.0-epsilon))
    y_true = T.flatten(y_true)
    # this seems to work
    # it is super ugly though and I am sure there is a better way to do it
    # but I am struggling with theano to cooperate
    # filter the right indices
    indPos = T.nonzero(y_true)[0] # no idea why this is a tuple
    indNeg = T.nonzero(1-y_true)[0]
    # shuffle
    n = indPos.shape[0]
    indPos = indPos[srng.permutation(n=n)]
    n = indNeg.shape[0]
    indNeg = indNeg[srng.permutation(n=n)]
    # take equal number of samples depending on which class has less
    n_samples = T.cast(T.min([T.sum(y_true), T.sum(1-y_true)]), dtype='int64')

    indPos = indPos[:n_samples]
    indNeg = indNeg[:n_samples]
    loss_vector = -T.mean(T.log(y_pred_clipped[indPos])) - T.mean(T.log(1-y_pred_clipped[indNeg]))
    average_loss = T.mean(loss_vector)
    print 'average_loss:', average_loss
    return average_loss
    def apply(self, dataset, can_fit=True):
        x = dataset.get_design_matrix()

        denseX = T.matrix(dtype=x.dtype)

        image_shape = (len(x),) + self.img_shape
        X = denseX.reshape(image_shape)
        filters = gaussian_filter_9x9().reshape((1,1,9,9))

        convout = conv.conv2d(input = X,
                             filters = filters,
                             image_shape = image_shape,
                             filter_shape = (1, 1, 9, 9),
                             border_mode='full')

        # For each pixel, remove mean of 9x9 neighborhood
        centered_X = X - convout[:,:,4:-4,4:-4]
        
        # Scale down norm of 9x9 patch if norm is bigger than 1
        sum_sqr_XX = conv.conv2d(input = centered_X**2,
                             filters = filters,
                             image_shape = image_shape,
                             filter_shape = (1, 1, 9, 9),
                             border_mode='full')
        denom = T.sqrt(sum_sqr_XX[:,:,4:-4,4:-4])
        per_img_mean = T.mean(T.flatten(denom, outdim=3), axis=2)
        divisor = T.largest(per_img_mean.dimshuffle((0,1,'x','x')), denom)

        new_X = centered_X / divisor
        new_X = T.flatten(new_X, outdim=2)

        f = theano.function([denseX], new_X)
        dataset.set_design_matrix(f(x))
Exemple #6
0
    def unet_crossentropy_loss_sampled(y_true, y_pred):
        epsilon = 1.0e-4
        y_pred_clipped = T.flatten(T.clip(y_pred, epsilon, 1.0-epsilon))
        y_true = T.flatten(y_true)
        # this seems to work
        # it is super ugly though and I am sure there is a better way to do it
        # but I am struggling with theano to cooperate
        # filter the right indices
        classPos = 1
        classNeg = 0
        indPos   = T.eq(y_true, classPos).nonzero()[0]
        indNeg   = T.eq(y_true, classNeg).nonzero()[0]
        #pos      = y_true[ indPos ]
        #neg      = y_true[ indNeg ]

        # shuffle
        n = indPos.shape[0]
        indPos = indPos[UNET.srng.permutation(n=n)]
        n = indNeg.shape[0]
        indNeg = indNeg[UNET.srng.permutation(n=n)]
        # take equal number of samples depending on which class has less
        n_samples = T.cast(T.min([ indPos.shape[0], indNeg.shape[0]]), dtype='int64')
        #n_samples = T.cast(T.min([T.sum(y_true), T.sum(1-y_true)]), dtype='int64')

        indPos = indPos[:n_samples]
        indNeg = indNeg[:n_samples]
        #loss_vector = -T.mean(T.log(y_pred_clipped[indPos])) - T.mean(T.log(1-y_pred_clipped[indNeg]))
        loss_vector = -T.mean(T.log(y_pred_clipped[indPos])) - T.mean(T.log(y_pred_clipped[indNeg]))
        loss_vector = T.clip(loss_vector, epsilon, 1.0-epsilon)
        average_loss = T.mean(loss_vector)
        if T.isnan(average_loss):
            average_loss = T.mean( y_pred_clipped[indPos])
        return average_loss
Exemple #7
0
        def _recurrence(v_h_, x_h_, v_t_, x_t_, a_t_, is_aggressive):

            state = tt.concatenate([v_h_, x_h_, tt.flatten(v_t_), tt.flatten(x_t_), tt.flatten(a_t_)])

            h0 = tt.dot(state, self.W_a_0) + self.b_a_0
            relu0 = tt.nnet.relu(h0)

            h1 = tt.dot(relu0, self.W_a_1) + self.b_a_1
            relu1 = tt.nnet.relu(h1)

            h2 = tt.dot(relu1, self.W_a_2) + self.b_a_2
            relu2 = tt.nnet.relu(h2)

            a = tt.dot(relu2, self.W_a_c)

            v_h, x_h, v_t, x_t, a_t, cost_transition = _step_state(v_h_, x_h_, v_t_, x_t_, a_t_, a, is_aggressive)

            # cost:

            # 0. smooth acceleration policy
            cost_accel = tt.abs_(a)

            # 1. forcing the host to move forward (until the top point of the roundabout)
            cost_progress = tt.nnet.relu(0.5*self.two_pi_r-x_h)

            # 2. keeping distance from close vehicles
            x_abs_diffs = tt.abs_(x_h - x_t)

            cost_accident =  tt.mean(3*tt.nnet.relu( self.require_distance-x_abs_diffs )) * (x_h > - 0.5*self.host_length) #tt.nnet.sigmoid(x_h + 0.5*self.host_length)

            cost = self.alpha_accel * cost_accel + self.alpha_progress * cost_progress + self.alpha_accident * cost_accident

            return (v_h, x_h, v_t, x_t, a_t, cost, cost_transition), t.scan_module.until(x_h[0]>=0.45*self.two_pi_r)
 def __call__(self, x, leak):
     f1 = 0.5 * (1 + leak)
     f2 = 0.5 * (1 - leak)
     if leak.ndim == 1:
         return T.flatten(f1, 1)[0] * x + T.flatten(f2, 1)[0] * abs(x)
     else:
         return f1 * x + f2 * abs(x)
Exemple #9
0
    def __create_node_set(self, n_features, n_output, data_in, note_set_name, weightsFunc = None,):
        prev_out = data_in
        prev_dim = n_features
        layers = []
        n_weights = 0
        weights_list = []
        state = None
        for i_h_layer in range(0,len(self.hidden_dimensions)):
            n_hidden_nodes = self.hidden_dimensions[i_h_layer]
            weights = None
            #weights = np.ones((prev_dim,n_hidden_nodes)) - 0.5
            bias = None
            if weightsFunc is not None:
                weights,bias,state = weightsFunc(i_h_layer,state)
            #acutal hidden layer
            hidden_layer = Layer(data_in=prev_out,
                                    n_input=prev_dim,
                                    n_output=n_hidden_nodes,
                                    link_function=self.link_function_hidden,
                                    weights=weights,
                                    bias=bias,
                                    name=note_set_name + " Hidden Layer")
            weights_list.append(hidden_layer.weights)
            weights_list.append(hidden_layer.bias)
            layers.append(hidden_layer)
            n_weights += (prev_dim+1)*n_hidden_nodes
            prev_out = hidden_layer.output
            prev_dim = n_hidden_nodes

        weights = None
        #weights = np.ones((prev_dim,n_output)) - 0.5
        bias = None
        if weightsFunc is not None:
            weights,bias,state = weightsFunc(len(self.hidden_dimensions),state)
        output_layer = Layer(
            data_in=prev_out,
            n_input=prev_dim,
            n_output=n_output,
            link_function=self.link_function_output,
            weights=weights,
            bias=bias,
            name=note_set_name + " Output Layer")
        weights_list.append(output_layer.weights)
        weights_list.append(output_layer.bias)
        layers.append(output_layer)
        n_weights += (prev_dim+1)*n_output

        #concatenate weights into one huge vector
        flat_weights = T.concatenate([T.flatten(item) for item in weights_list])
        flat_weights.name = "Network " + note_set_name + " Weights"
        #compute MSE
        y = self.__y
        errors = y - output_layer.output
        mse = T.mean(T.sqr(errors))
        normalized_mse = mse / 2.0
        normalized_mse.name = note_set_name + " MSE"
        grads = T.concatenate([T.flatten(item) for item in T.grad(normalized_mse, weights_list)])
        grads.name = note_set_name + " Gradients"
        return layers,grads,normalized_mse,weights_list, n_weights, flat_weights
Exemple #10
0
def model(X1, X2, w1, w2, w3, p_drop_conv):
    # first half of the first layer
    l1a = T.flatten(dropout(T.mean(rectify(conv2d(X1, w1, border_mode='valid')), axis=3), p_drop_conv), outdim=2)
    # second half of the first layer
    l1b = T.flatten(dropout(T.mean(rectify(conv2d(X2, w2, border_mode='valid')), axis=3), p_drop_conv), outdim=2)
    # combine two pars as first layer
    l1 = T.concatenate([l1a, l1b], axis=1)    
    # combine two pars as first layer
    pyx = T.dot(l1, w3)
    return pyx
 def lower_bound(self):
     mu = T.flatten(self.trunc_output, outdim=2)
     inp = T.flatten(self.inpt, outdim=2)
     if self.out_distribution == True:
         sigma = T.mean(T.flatten(self.trunk_sigma, outdim=2))
     else:
         sigma = 0
         # log_gauss =  0.5*np.log(2 * np.pi) + 0.5*sigma + 0.5 * ((inp - mu) / T.exp(sigma))**2.
     log_gauss = T.sum(0.5 * np.log(2 * np.pi) + 0.5 * sigma + 0.5 * ((inp - mu) / T.exp(sigma)) ** 2.0, axis=1)
     return T.mean(log_gauss - self.latent_layer.prior)
Exemple #12
0
def t_unroll_ae(wts, bs, tied_wts=False):
    ''' Flattens matrices and concatenates to a vector - specifically for autoencoders '''

    # if we have tied weights, this vector will be comprised of a single matrix and two
    # distinct bias vectors
    if tied_wts:
        v = np.array([], type=theano.config.floatX)
        v = T.concatenate(
            (v, T.flatten(wts[0]), T.flatten(bs[0]), T.flatten(bs[1])))
        return v
    return t_unroll(wts, bs)
def model(X,
    h2_u, h3_u,
    h2_s, h3_s,
    w, w2, g2, b2, w3, g3, b3, wy
    ):
    h = lrelu(dnn_conv(X, w, subsample=(2, 2), border_mode=(2, 2)))
    h2 = lrelu(batchnorm(dnn_conv(h, w2, subsample=(2, 2), border_mode=(2, 2)), g=g2, b=b2, u=h2_u, s=h2_s))
    h3 = lrelu(batchnorm(dnn_conv(h2, w3, subsample=(2, 2), border_mode=(2, 2)), g=g3, b=b3, u=h3_u, s=h3_s))
    h = T.flatten(dnn_pool(h, (4, 4), (4, 4), mode='max'), 2)
    h2 = T.flatten(dnn_pool(h2, (2, 2), (2, 2), mode='max'), 2)
    h3 = T.flatten(dnn_pool(h3, (1, 1), (1, 1), mode='max'), 2)
    f = T.concatenate([h, h2, h3], axis=1)
    return [f]
Exemple #14
0
def model(X, w1, w2, w3, p_drop_conv, p_drop_hidden):
    l1a = rectify(conv2d(X, w1, border_mode='full'))
    l1 = max_pool_2d(l1a, (2, 2))
    l1 = dropout(l1, p_drop_conv)

    dropout(T.flatten(max_pool_2d(rectify(conv2d(X, w2)), (2,2)), outdim=2), 0.3)
    
    l2a = rectify(conv2d(l1, w2))
    l2b = max_pool_2d(l2a, (2, 2))
    l2 = T.flatten(l2b, outdim=2)
    l2 = dropout(l2, p_drop_conv)

    pyx = softmax(T.dot(l2, w3))
    return l1, l2, pyx
def set_sampling_function(decoder_feature_function,
                          decoder_red_function,
                          decoder_green_function,
                          decoder_blue_function):

    hidden_data = T.matrix(name='hidden_data',
                           dtype=theano.config.floatX)

    # decoder
    decoder_outputs = decoder_feature_function(hidden_data)
    decoder_feature = decoder_outputs[1]
    decoder_red     = decoder_red_function(decoder_feature)
    decoder_green   = decoder_green_function(decoder_feature)
    decoder_blue    = decoder_blue_function(decoder_feature)

    num_samples = decoder_red.shape[0]
    num_rows    = decoder_red.shape[2]
    num_cols    = decoder_red.shape[3]
    num_pixels  = num_rows*num_cols

    # shape = (num_samples, num_intensity, num_pixels)
    decoder_red   = T.flatten(decoder_red, 3)
    decoder_green = T.flatten(decoder_green, 3)
    decoder_blue  = T.flatten(decoder_blue, 3)
    # shape = (num_samples, num_pixels, num_intensity)
    decoder_red   = T.swapaxes(decoder_red, axis1=1, axis2=2)
    decoder_green = T.swapaxes(decoder_green, axis1=1, axis2=2)
    decoder_blue  = T.swapaxes(decoder_blue, axis1=1, axis2=2)
    # shape = (num_samples*num_pixels, num_intensity)
    decoder_red   = decoder_red.reshape((num_samples*num_pixels, -1))
    decoder_green = decoder_green.reshape((num_samples*num_pixels, -1))
    decoder_blue  = decoder_blue.reshape((num_samples*num_pixels, -1))
    # softmax
    decoder_red   = T.argmax(T.nnet.softmax(decoder_red),axis=1)
    decoder_green = T.argmax(T.nnet.softmax(decoder_green),axis=1)
    decoder_blue  = T.argmax(T.nnet.softmax(decoder_blue),axis=1)

    decoder_red   = decoder_red.reshape((num_samples, 1, num_rows, num_cols))
    decoder_green = decoder_green.reshape((num_samples, 1, num_rows, num_cols))
    decoder_blue  = decoder_blue.reshape((num_samples, 1, num_rows, num_cols))

    decoder_image = T.concatenate([decoder_red, decoder_green, decoder_blue], axis=1)

    function_inputs = [hidden_data,]
    function_outputs = [decoder_image,]

    function = theano.function(inputs=function_inputs,
                               outputs=function_outputs,
                               on_unused_input='ignore')
    return function
Exemple #16
0
def gauss_style_loss(x_truth, x_guess, log_var=0., scale=1., use_huber=False):
    # compute gram matrices for the two batches of convolutional features
    g_t = T.flatten(gram_matrix(x_truth), 2)
    g_g = T.flatten(gram_matrix(x_guess), 2)
    # get normalization factors based on the size of feature maps
    # N = T.cast(x_truth.shape[1], 'floatX')
    # M = T.cast(x_truth.shape[2] * x_truth.shape[3], 'floatX')
    # compute a pseudo-Gaussian loss on difference between gram matrices
    loss = log_prob_gaussian(g_t, g_g, log_vars=log_var, do_sum=False,
                             use_huber=use_huber, mask=None)
    # take sum over gram matrix entries and normalize for feature map size
    # loss = (scale / (N**2. * M)) * T.sum(loss, axis=1, keepdims=False)
    loss = T.sum(loss, axis=1, keepdims=False)
    return loss
Exemple #17
0
    def L2SVMcost(self, y):
        """Return the mean of the negative log-likelihood of the prediction
        of this model under a given target distribution.

        .. math::

            \frac{1}{|\mathcal{D}|} \mathcal{L} (\theta=\{W,b\}, \mathcal{D}) =
            \frac{1}{|\mathcal{D}|} \sum_{i=0}^{|\mathcal{D}|} \log(P(Y=y^{(i)}|x^{(i)}, W,b)) \\
                \ell (\theta=\{W,b\}, \mathcal{D})

        :type y: theano.tensor.TensorType
        :param y: corresponds to a vector that gives for each example the
                  correct label

        Note: we use the mean instead of the sum so that
              the learning rate is less dependent on the batch size
        """

        '''p = -T.ones_like((y.shape[0],7))
        
        result, updates = theano.scan(fn = lambda p,y: T.basic.set_subtensor(p[i,y[i]]=1),
                                                                outputs_info = -T.ones_like((y.shape[0],7)),
                                                                non_sequences = y,
                                                                n_steps = y.shape[0])
        final_result = result[-1]
        f = theano.function([y,p],final_result,updates = updates)
                                                                
        for i in xrange(500):
                p = T.basic.set_subtensor(p[i,y[i]]=1)
        print p.shape
        print f(y,p)
        print f(y,p).shape'''
        # y.shape[0] is (symbolically) the number of rows in y, i.e.,
        # number of examples (call it n) in the minibatch
        # T.arange(y.shape[0]) is a symbolic vector which will contain
        # [0,1,2,... n-1] T.log(self.p_y_given_x) is a matrix of
        # Log-Probabilities (call it LP) with one row per example and
        # one column per class LP[T.arange(y.shape[0]),y] is a vector
        # v containing [LP[0,y[0]], LP[1,y[1]], LP[2,y[2]], ...,
        # LP[n-1,y[n-1]]] and T.mean(LP[T.arange(y.shape[0]),y]) is
        # the mean (across minibatch examples) of the elements in v,
        # i.e., the mean log-likelihood across the minibatch.
        z = 0.5*T.dot( T.flatten(self.W,outdim=1), T.flatten(self.W, outdim=1)) + 0.5*T.dot( T.flatten(self.b,outdim=1), T.flatten(self.b, outdim=1)) +0.6* T.sum(T.maximum(0,(1-self.p_y_given_x *y)),axis=1).mean()
        #zk = theano.tensor.scalar('zk')
        #zp = theano.printing.Print('this is a very important value')(zk)
        #f = theano.function([zk],zp)
        #z = theano.shared(z)
        #f(z)
        return z
def create_model():
    """Create the deep autoencoder model with Blocks, and load MNIST."""
    mlp = MLP(activations=[Logistic(), Logistic(), Logistic(), None,
                           Logistic(), Logistic(), Logistic(), Logistic()],
              dims=[784, 1000, 500, 250, 30, 250, 500, 1000, 784],
              weights_init=Sparse(15, IsotropicGaussian()),
              biases_init=Constant(0))
    mlp.initialize()

    x = tensor.matrix('features')
    x_hat = mlp.apply(tensor.flatten(x, outdim=2))
    squared_err = SquaredError().apply(tensor.flatten(x, outdim=2), x_hat)
    cost = BinaryCrossEntropy().apply(tensor.flatten(x, outdim=2), x_hat)

    return x, cost, squared_err
def set_updater_function(feature_extractor,
                         sample_generator,
                         generator_parameters,
                         generator_optimizer):
    # set input data, hidden data
    input_data  = T.tensor4(name='input_data',
                            dtype=theano.config.floatX)
    hidden_data = T.matrix(name='hidden_data',
                           dtype=theano.config.floatX)

    # extract feature from input data
    positive_features = feature_extractor(input_data)

    # sample data
    negative_features = sample_generator(hidden_data)
    negative_data     = negative_features[-1]
    negative_features = negative_features[:-1]

    # moment matching
    moment_match_cost = 0
    for i in xrange(len(positive_features)):
        pos_feat = positive_features[i]
        neg_feat = negative_features[i]
        moment_match_cost += T.mean(T.sqr(T.mean(pos_feat, axis=0)-T.mean(neg_feat, axis=0)))
        moment_match_cost += T.mean(T.sqr(T.mean(T.sqr(pos_feat), axis=0)-T.mean(T.sqr(neg_feat), axis=0)))

    pos_feat = T.flatten(input_data, 2)
    neg_feat = T.flatten(negative_data, 2)
    moment_match_cost += T.mean(T.sqr(T.mean(pos_feat, axis=0)-T.mean(neg_feat, axis=0)))
    moment_match_cost += T.mean(T.sqr(T.mean(T.sqr(pos_feat), axis=0)-T.mean(T.sqr(neg_feat), axis=0)))

    generator_updates = generator_optimizer(generator_parameters,
                                            moment_match_cost)

    # updater function input
    updater_function_inputs  = [input_data,
                               hidden_data]

    # updater function output
    updater_function_outputs = [moment_match_cost,
                                negative_data]

    # updater function
    updater_function = theano.function(inputs=updater_function_inputs,
                                       outputs=updater_function_outputs,
                                       updates=generator_updates,
                                       on_unused_input='ignore')
    return updater_function
Exemple #20
0
def jacobian_mul_vector_l_flat(y, x, W, v, x_val, W_val, v_val):
    J = theano.gradient.jacobian(y, x)
    J_flat = T.flatten(J, J.ndim - 1) # The jacobian result on flattened matrix x
    VJ = v.dot(J_flat)
    VJ_reshape = T.reshape(VJ, T.shape(x))
    f_VJ = theano.function([x, W, v], VJ_reshape)
    return f_VJ(x_val, W_val, v_val)
Exemple #21
0
    def apply(self, dataset, can_fit=True):
        x = dataset.get_design_matrix()

        denseX = T.matrix(dtype=x.dtype)

        image_shape = (len(x),) + self.img_shape
        X = denseX.reshape(image_shape)
        ones_patch = T.ones((1,1,9,9), dtype=x.dtype)

        convout = conv.conv2d(input = X,
                             filters = ones_patch / (9.*9.),
                             image_shape = image_shape,
                             filter_shape = (1, 1, 9, 9),
                             border_mode='full')

        # For each pixel, remove mean of 3x3 neighborhood
        centered_X = X - convout[:,:,4:-4,4:-4]
        
        # Scale down norm of 3x3 patch if norm is bigger than 1
        sum_sqr_XX = conv.conv2d(input = centered_X**2,
                             filters = ones_patch,
                             image_shape = image_shape,
                             filter_shape = (1, 1, 9, 9),
                             border_mode='full')
        denom = T.sqrt(sum_sqr_XX[:,:,4:-4,4:-4])
        xdenom = denom.reshape(X.shape)
        new_X = centered_X / T.largest(1.0, xdenom)
        new_X = T.flatten(new_X, outdim=2)

        f = theano.function([denseX], new_X)
        dataset.set_design_matrix(f(x))
Exemple #22
0
def model(X, w1, w2, w3, Max_Pooling_Shape, p_drop_conv, p_drop_hidden):
    l1 = T.flatten(
        dropout(max_pool_2d(rectify(conv2d(X, w1, border_mode="valid")), Max_Pooling_Shape), p_drop_conv), outdim=2
    )
    l2 = dropout(rectify(T.dot(l1, w2)), p_drop_hidden)
    pyx = softmax(T.dot(l2, w3))
    return pyx
Exemple #23
0
def model_conv(
        X,
        w_1,
        w_2,
        w_3,
        w_h2,
        w_o,
        p_use_input,
        p_use_hidden
        ):
    X = dropout(X, p_use_input)

    # first convolutional layer:
    conv_layer_1 = rectify( T.nnet.conv2d(X, w_1, border_mode = 'full' ))
    sub_layer_1  = T.signal.downsample.max_pool_2d(conv_layer_1, (2, 2) )
    out_1   = dropout(sub_layer_1, p_use_input)

    # second convolutional layer:
    conv_layer_2 = rectify( T.nnet.conv2d(out_1, w_2) )
    sub_layer_2  = T.signal.downsample.max_pool_2d(conv_layer_2, (2, 2) )
    out_2   = dropout(sub_layer_2, p_use_hidden)

    # third convolutional layer:
    conv_layer_3 = rectify( T.nnet.conv2d(out_2, w_3) )
    sub_layer_3  = T.signal.downsample.max_pool_2d(conv_layer_3, (2, 2) )
    out_3   = dropout(sub_layer_3, p_use_hidden)

    out_3 = T.flatten(out_3, outdim = 2)
    h2 = rectify(T.dot(out_3, w_h2))
    h2 = dropout(h2, p_use_hidden)
    # output layer, activation function = softmax
    py_x = softmax(T.dot(h2, w_o))
    return out_1, out_2, out_3, h2, py_x
Exemple #24
0
    def test_log1msigm_to_softplus(self):
        x = T.matrix()

        out = T.log(1 - sigmoid(x))
        f = theano.function([x], out, mode=self.m)
        topo = f.maker.fgraph.toposort()
        assert len(topo) == 2
        assert isinstance(topo[0].op.scalar_op,
                          theano.tensor.nnet.sigm.ScalarSoftplus)
        assert isinstance(topo[1].op.scalar_op, theano.scalar.Neg)
        f(numpy.random.rand(54, 11).astype(config.floatX))

        # Same test with a flatten
        out = T.log(1 - T.flatten(sigmoid(x)))
        f = theano.function([x], out, mode=self.m)
        topo = f.maker.fgraph.toposort()
        assert len(topo) == 3
        assert isinstance(topo[0].op, T.Flatten)
        assert isinstance(topo[1].op.scalar_op,
                          theano.tensor.nnet.sigm.ScalarSoftplus)
        assert isinstance(topo[2].op.scalar_op, theano.scalar.Neg)
        f(numpy.random.rand(54, 11).astype(config.floatX))

        # Same test with a reshape
        out = T.log(1 - sigmoid(x).reshape([x.size]))
        f = theano.function([x], out, mode=self.m)
        topo = f.maker.fgraph.toposort()
        #assert len(topo) == 3
        assert any(isinstance(node.op, T.Reshape) for node in topo)
        assert any(isinstance(getattr(node.op, 'scalar_op', None),
                              theano.tensor.nnet.sigm.ScalarSoftplus)
                   for node in topo)
        f(numpy.random.rand(54, 11).astype(config.floatX))
Exemple #25
0
def _flatten_1d_or_2d(v):
    if v.ndim > 2:
        return T.flatten(v, outdim=2)
    elif 1 <= v.ndim <= 2:
        return v
    else:
        raise ValueError
Exemple #26
0
def build_objective(model, deterministic=False, epsilon=1e-12):
    predictions = nn.layers.get_output(model.l_out, deterministic=deterministic)
    targets = T.cast(T.flatten(nn.layers.get_output(model.l_target)), 'int32')
    p = predictions[T.arange(predictions.shape[0]), targets]
    p = T.clip(p, epsilon, 1.)
    loss = T.mean(T.log(p))
    return -loss
    def feature_extractor(input_data):
        # conv stage 0 (64x64=>32x32)
        h0_0 = dnn_conv(input_data, conv_w0_0, border_mode=(1, 1)) + conv_b0_0.dimshuffle("x", 0, "x", "x")
        h0_1 = dnn_conv(relu(h0_0), conv_w0_1, border_mode=(1, 1)) + conv_b0_1.dimshuffle("x", 0, "x", "x")
        h0 = dnn_pool(relu(h0_1), ws=(2, 2), stride=(2, 2))
        # conv stage 1 (32x32=>16x16)
        h1_0 = dnn_conv(h0, conv_w1_0, border_mode=(1, 1)) + conv_b1_0.dimshuffle("x", 0, "x", "x")
        h1_1 = dnn_conv(relu(h1_0), conv_w1_1, border_mode=(1, 1)) + conv_b1_1.dimshuffle("x", 0, "x", "x")
        h1 = dnn_pool(relu(h1_1), ws=(2, 2), stride=(2, 2))
        # conv stage 2 (16x16=>8x8)
        h2_0 = dnn_conv(h1, conv_w2_0, border_mode=(1, 1)) + conv_b2_0.dimshuffle("x", 0, "x", "x")
        h2_1 = dnn_conv(relu(h2_0), conv_w2_1, border_mode=(1, 1)) + conv_b2_1.dimshuffle("x", 0, "x", "x")
        h2_2 = dnn_conv(relu(h2_1), conv_w2_2, border_mode=(1, 1)) + conv_b2_2.dimshuffle("x", 0, "x", "x")
        h2 = dnn_pool(relu(h2_2), ws=(2, 2), stride=(2, 2))
        # conv stage 3 (8x8=>4x4)
        h3_0 = dnn_conv(h2, conv_w3_0, border_mode=(1, 1)) + conv_b3_0.dimshuffle("x", 0, "x", "x")
        h3_1 = dnn_conv(relu(h3_0), conv_w3_1, border_mode=(1, 1)) + conv_b3_1.dimshuffle("x", 0, "x", "x")
        h3_2 = dnn_conv(relu(h3_1), conv_w3_2, border_mode=(1, 1)) + conv_b3_2.dimshuffle("x", 0, "x", "x")
        h3 = dnn_pool(relu(h3_2), ws=(2, 2), stride=(2, 2))
        # conv stage 4 (4x4=>2x2)
        h4_0 = dnn_conv(h3, conv_w4_0, border_mode=(1, 1)) + conv_b4_0.dimshuffle("x", 0, "x", "x")
        h4_1 = dnn_conv(relu(h4_0), conv_w4_1, border_mode=(1, 1)) + conv_b4_1.dimshuffle("x", 0, "x", "x")
        h4_2 = dnn_conv(relu(h4_1), conv_w4_2, border_mode=(1, 1)) + conv_b4_2.dimshuffle("x", 0, "x", "x")
        h4 = dnn_pool(relu(h4_2), ws=(2, 2), stride=(2, 2))

        return T.flatten(h4, 2)
def convolutional_model(X, w_1, w_2, w_3, w_4, w_5, w_6, p_1, p_2, p_3, p_4, p_5):
    l1 = dropout(T.tanh( max_pool_2d(T.maximum(conv2d(X, w_1, border_mode='full'),0.), (2, 2),ignore_border=True) + b_1.dimshuffle('x', 0, 'x', 'x') ), p_1)
    l2 = dropout(T.tanh( max_pool_2d(T.maximum(conv2d(l1, w_2), 0.), (2, 2),ignore_border=True) + b_2.dimshuffle('x', 0, 'x', 'x') ), p_2)
    l3 = dropout(T.flatten(T.tanh( max_pool_2d(T.maximum(conv2d(l2, w_3), 0.), (2, 2),ignore_border=True) + b_3.dimshuffle('x', 0, 'x', 'x') ), outdim=2), p_3)# flatten to switch back to 1d layers
    l4 = dropout(T.maximum(T.dot(l3, w_4), 0.), p_4)
    l5 = dropout(T.maximum(T.dot(l4, w_5), 0.), p_5)
    return T.dot(l5, w_6)
    def lp_norm(self, n, k, r, c, z):
        '''
        Lp = ( 1/n * sum(|x_i|^p, 1..n))^(1/p) where p = 1 + ln(1+e^P)
        :param n:
        :param k:
        :param r:
        :param c:
        :param z:
        :return:
        '''
        ds0, ds1 = self.pool_size
        st0, st1 = self.stride
        pad_h = self.pad[0]
        pad_w = self.pad[1]

        row_st = r * st0
        row_end = T.minimum(row_st + ds0, self.img_rows)
        row_st = T.maximum(row_st, self.pad[0])
        row_end = T.minimum(row_end, self.x_m2d + pad_h)

        col_st = c * st1
        col_end = T.minimum(col_st + ds1, self.img_cols)
        col_st = T.maximum(col_st, self.pad[1])
        col_end = T.minimum(col_end, self.x_m1d + pad_w)

        Lp = T.pow(
                T.mean(T.pow(
                        T.abs_(T.flatten(self.y[n, k, row_st:row_end, col_st:col_end], 1)),
                        1 + T.log(1 + T.exp(self.P))
                )),
                1 / (1 + T.log(1 + T.exp(self.P)))
        )

        return T.set_subtensor(z[n, k, r, c], Lp)
def model(X, params, featMaps, pieces, pDropConv, pDropHidden):
    lnum = 0  # conv: (32, 32) pool: (16, 16)
    layer = conv2d(X, params[lnum][0], border_mode='half') + \
            params[lnum][1].dimshuffle('x', 0, 'x', 'x')
    layer = maxout(layer, featMaps[lnum], pieces[lnum])
    layer = pool_2d(layer, (2, 2), st=(2, 2), ignore_border=False, mode='max')
    layer = basicUtils.dropout(layer, pDropConv)
    lnum += 1  # conv: (16, 16) pool: (8, 8)
    layer = conv2d(layer, params[lnum][0], border_mode='half') + \
            params[lnum][1].dimshuffle('x', 0, 'x', 'x')
    layer = maxout(layer, featMaps[lnum], pieces[lnum])
    layer = pool_2d(layer, (2, 2), st=(2, 2), ignore_border=False, mode='max')
    layer = basicUtils.dropout(layer, pDropConv)
    lnum += 1  # conv: (8, 8) pool: (4, 4)
    layer = conv2d(layer, params[lnum][0], border_mode='half') + \
            params[lnum][1].dimshuffle('x', 0, 'x', 'x')
    layer = maxout(layer, featMaps[lnum], pieces[lnum])
    layer = pool_2d(layer, (2, 2), st=(2, 2), ignore_border=False, mode='max')
    layer = basicUtils.dropout(layer, pDropConv)
    lnum += 1
    layer = T.flatten(layer, outdim=2)
    layer = T.dot(layer, params[lnum][0]) + params[lnum][1].dimshuffle('x', 0)
    layer = relu(layer, alpha=0)
    layer = basicUtils.dropout(layer, pDropHidden)
    lnum += 1
    layer = T.dot(layer, params[lnum][0]) + params[lnum][1].dimshuffle('x', 0)
    layer = relu(layer, alpha=0)
    layer = basicUtils.dropout(layer, pDropHidden)
    lnum += 1
    return softmax(T.dot(layer, params[lnum][0]) + params[lnum][1].dimshuffle('x', 0))  # 如果使用nnet中的softmax训练产生NAN
Exemple #31
0
input_logits = T.ivector('inputs')
input_logits.tag.test_value = bk.logits("abbabaabba", input_logits.dtype)

xs = T.extra_ops.to_one_hot(input_logits, len(bk.character_set))

target_logits = T.ivector('targets')
target_logits.tag.test_value = bk.logits("bbabaabbab", target_logits.dtype)

outputs, read_address, memory = (partial_bptt(xs, read_address_with_grads,
                                              memory_with_grads, w, output_w))

bptt_cost = softmax_log_likelihood(outputs, target_logits)
if l2_param > 0: bptt_cost += l2_param * T.sum((w**2) / 2)  # L2 regularization

j_read_address_w = T.jacobian(read_address, w)
j_memory_w = T.reshape(T.jacobian(T.flatten(memory), w),
                       prev_memory.get_value().shape + w.get_value().shape)
# Reshape will make things broadcastable by default, but then updating fails
# because shared variables are not broadcastable by default and broadcastable
# has to match for a shared var update. We don't want to boradcast anyway.
# This is only a problem if the memory depth is 1, so make the depth not BCable
j_memory_w = T.unbroadcast(j_memory_w, prev_memory.ndim - 1)

update_a_grad = saved_a_grad, j_read_address_w
update_m_grad = saved_m_grad, j_memory_w
update_address = prev_read_address, read_address
update_memory = prev_memory, memory

weight_updates = list(
    lasagne.updates.adadelta(bptt_cost, [w, output_w]).items())
Exemple #32
0
        def _recurrence(time_step, x_h_, v_h_, angle_, speed_, t_h_, x_t_,
                        v_t_, a_t_, t_t_, exist, is_leader, x_goal, turn_vec_h,
                        turn_vec_t):
            # state
            '''
            1. host
                1.1 position (2) - (x,y) coordinates in cross coordinate system
                1.2 speed (2) - (v_x,v_y)
                # 1.3 acceleration (2) - (a_x,a_y)
                # 1.4 waiting time (1) - start counting on full stop. stop counting when clearing the junction
                1.5 x_goal (2) - destination position (indicates different turns)
                total = 5
            2. right lane car
                2.1 position (2) - null value = (-1,-1)
                2.2 speed (2) - null value = (0,0)
                2.3 acceleration (2) - null value = (0,0)
                2.4 waiting time (1) - null value = 0
                total = 7
            3. front lane car
                3.1 position (2)
                3.2 speed (2)
                3.3 acceleration (2)
                3.4 waiting time (1)
                total = 7
            4. target 3
                4.1 position (2)
                4.2 speed (2)
                4.3 acceleration (2)
                4.4 waiting time (1)
                total = 7
            total = 26
            '''

            # host_state_vec = tt.concatenate([x_h_, v_h_, t_h_])
            ang_spd = tt.stack([angle_, speed_])
            host_state_vec = tt.concatenate([x_h_, ang_spd, x_goal])

            # target_state_vec = tt.concatenate([tt.flatten(x_t_), tt.flatten(v_t_), tt.flatten(a_t_), tt.flatten(t_t_)])
            target_state_vec = tt.concatenate([
                tt.flatten(x_t_),
                tt.flatten(v_t_),
                tt.flatten(a_t_), is_leader
            ])

            state = tt.concatenate([host_state_vec, target_state_vec])

            h0 = tt.dot(state, self.W_0) + self.b_0

            relu0 = tt.nnet.relu(h0)

            h1 = tt.dot(relu0, self.W_1) + self.b_1

            relu1 = tt.nnet.relu(h1)

            h2 = tt.dot(relu1, self.W_2) + self.b_2

            relu2 = tt.nnet.relu(h2)

            a_h = tt.dot(relu2, self.W_c)

            x_h, v_h, angle, speed, t_h, x_t, v_t, a_t, t_t = _step_state(
                x_h_, v_h_, angle_, speed_, t_h_, turn_vec_h, x_t_, v_t_, t_t_,
                turn_vec_t, a_h, exist, time_step)

            # cost:

            discount_factor = 0.99**time_step

            # 0. smooth driving policy
            cost_steer = discount_factor * a_h[0]**2
            cost_accel = discount_factor * a_h[1]**2

            # 1. forcing the host to move forward
            dist_from_goal = tt.mean((x_goal - x_h)**2)

            cost_progress = discount_factor * dist_from_goal

            # 2. keeping distance from in front vehicles
            d_t_h = x_t - x_h

            h_t_dists = (d_t_h**2).sum(axis=1)

            # v_h_norm = tt.sqrt((v_h**2).sum())
            # d_t_h_norm = tt.sqrt((d_t_h**2).sum(axis=1))
            #
            # denominator = v_h_norm * d_t_h_norm
            #
            # host_targets_orientation = tt.dot(d_t_h, v_h) / (denominator + 1e-3)
            #
            # in_fornt_targets = tt.nnet.sigmoid(5 * host_targets_orientation)
            #
            # close_targets = tt.sum(tt.abs_(d_t_h))
            #
            # cost_accident = tt.mean(in_fornt_targets * close_targets)

            cost_accident = tt.sum(
                tt.nnet.relu(self.require_distance - h_t_dists))

            # 3. rail divergence
            cost_right_rail = _dist_from_rail(
                x_h, self.right_rail_center,
                self.right_rail_radius) * turn_vec_h[0]
            cost_front_rail = (x_h[0] - self.lw / 2)**2 * turn_vec_h[1]
            cost_left_rail = _dist_from_rail(
                x_h, self.left_rail_center,
                self.left_rail_radius) * turn_vec_h[2]

            cost_rail = cost_right_rail + cost_left_rail + cost_front_rail

            return (x_h, v_h, angle, speed, t_h, x_t, v_t, a_t, t_t,
                    cost_steer, cost_accel, cost_progress, cost_accident,
                    cost_rail,
                    a_h), t.scan_module.until(dist_from_goal < 0.001)
def set_network_trainer(input_data,
                        input_mask,
                        target_data,
                        target_mask,
                        num_outputs,
                        network,
                        updater,
                        learning_rate,
                        grad_max_norm=10.,
                        l2_lambda=1e-5,
                        load_updater_params=None):
    # get one hot target
    one_hot_target_data = T.extra_ops.to_one_hot(y=T.flatten(target_data, 1),
                                                 nb_class=num_outputs,
                                                 dtype=floatX)

    # get network output data
    predict_data = get_output(network, deterministic=False)
    num_seqs = predict_data.shape[0]

    # get prediction cost
    predict_data = T.reshape(x=predict_data,
                             newshape=(-1, num_outputs),
                             ndim=2)
    predict_data = predict_data - T.max(predict_data, axis=-1, keepdims=True)
    predict_data = predict_data - T.log(T.sum(T.exp(predict_data), axis=-1, keepdims=True))
    train_predict_cost = -T.sum(T.mul(one_hot_target_data, predict_data), axis=-1)
    train_predict_cost = train_predict_cost*T.flatten(target_mask, 1)
    train_model_cost = train_predict_cost.sum()/num_seqs
    train_frame_cost = train_predict_cost.sum()/target_mask.sum()

    # get regularizer cost
    train_regularizer_cost = regularize_network_params(network, penalty=l2)*l2_lambda

    # get network parameters
    network_params = get_all_params(network, trainable=True)

    # get network gradients
    network_grads = theano.grad(cost=train_model_cost + train_regularizer_cost,
                                wrt=network_params)

    if grad_max_norm>0.:
        network_grads, network_grads_norm = total_norm_constraint(tensor_vars=network_grads,
                                                                  max_norm=grad_max_norm,
                                                                  return_norm=True)
    else:
        network_grads_norm = T.sqrt(sum(T.sum(grad**2) for grad in network_grads))

    # set updater
    train_lr = theano.shared(lasagne.utils.floatX(learning_rate))
    train_updates, trainer_params = updater(loss_or_grads=network_grads,
                                            params=network_params,
                                            learning_rate=train_lr,
                                            load_params_dict=load_updater_params)

    # get training (update) function
    training_fn = theano.function(inputs=[input_data,
                                          input_mask,
                                          target_data,
                                          target_mask],
                                  outputs=[train_frame_cost,
                                           network_grads_norm],
                                  updates=train_updates)
    return training_fn, trainer_params
Exemple #34
0
 def Hx_plain():
     Hx_plain_splits = TT.grad(TT.sum(
         [TT.sum(g * x) for g, x in zip(constraint_grads, xs)]),
                               wrt=params,
                               disconnected_inputs='warn')
     return TT.concatenate([TT.flatten(s) for s in Hx_plain_splits])
 def get_l2_regularization(self, extra_params=[]):
     return T.mean(
         T.concatenate([T.flatten(layer.W)
                        for layer in self.layers] + extra_params)**2.)
        def step(input_n, cell_previous, hid_previous, *args):
            # word-by-word attention
            mh = T.dot(input_n, self.W_h_attend) + T.dot(
                hid_previous, self.W_m_attend)
            # mh is (n_batch, 1, n_features)
            mh = mh.dimshuffle(0, 'x', 1)
            M = T.dot(encoder_hs, self.W_y_attend) + mh
            # (n_batch, n_time_steps, n_features)
            M = nonlinearities.tanh(M)
            # alpha is (n_batch, n_time_steps, 1)
            alpha = T.dot(M, self.w_attend)
            # now is (n_batch, n_time_steps)
            alpha = T.flatten(alpha, 2)
            # 0 after softmax is not 0, f**k, my mistake.
            # when i > encoder_seq_len, fill alpha_i to -np.inf
            # alpha = T.switch(encoder_mask, alpha, -np.inf)
            alpha = T.nnet.softmax(alpha)
            # apply encoder_mask to alpha
            # encoder_mask is (n_batch, n_time_steps)
            # when i > encoder_seq_len, alpha_i should be 0.
            # actually not need mask, but in case of error
            # alpha = alpha * encoder_mask
            alpha = alpha.dimshuffle(0, 1, 'x')
            weighted_encoder = T.sum(encoder_hs * alpha, axis=1)
            r = weighted_encoder
            # (n_batch, n_features)
            input_n = T.concatenate([r, input_n], axis=1)
            if not self.precompute_input:
                input_n = T.dot(input_n, W_in_stacked) + b_stacked

            # Calculate gates pre-activations and slice
            gates = input_n + T.dot(hid_previous, W_hid_stacked)

            # Clip gradients
            if self.grad_clipping:
                gates = theano.gradient.grad_clip(gates, -self.grad_clipping,
                                                  self.grad_clipping)

            # Extract the pre-activation gate values
            ingate = slice_w(gates, 0)
            forgetgate = slice_w(gates, 1)
            cell_input = slice_w(gates, 2)
            outgate = slice_w(gates, 3)

            if self.peepholes:
                # Compute peephole connections
                ingate += cell_previous * self.W_cell_to_ingate
                forgetgate += cell_previous * self.W_cell_to_forgetgate

            # Apply nonlinearities
            ingate = self.nonlinearity_ingate(ingate)
            forgetgate = self.nonlinearity_forgetgate(forgetgate)
            cell_input = self.nonlinearity_cell(cell_input)

            # Compute new cell value
            cell = forgetgate * cell_previous + ingate * cell_input

            if self.peepholes:
                outgate += cell * self.W_cell_to_outgate
            outgate = self.nonlinearity_outgate(outgate)

            # Compute new hidden unit activation
            hid = outgate * self.nonlinearity(cell)

            return [cell, hid]
Exemple #37
0
 def forward(self, inputtensor):
     inputimage = inputtensor[0]
     return (T.flatten(inputimage, outdim=2),)
Exemple #38
0
 def MSE(self):
     #self.cost = T.mean(T.sum((self.y-self.fully_connected.output)**2))
     m = T.sum(T.flatten(
         (self.inpt - self.trunc_output)**2, outdim=2) * self.df,
               axis=1)
     return T.mean(4 * m - self.latent_layer.prior)
Exemple #39
0
    def __init__(self, dim_z, x_train, x_test, diff=None, magic=5000):
        ####################################### SETTINGS ###################################
        self.x_train = x_train
        self.x_test = x_test
        self.diff = diff
        self.batch_size = 100.
        self.learning_rate = theano.shared(np.float32(0.0008))
        self.momentum = 0.3
        self.performance = {"train": [], "test": []}
        self.inpt = T.ftensor4(name='input')
        self.df = T.fmatrix(name='differential')
        self.dim_z = dim_z
        self.generative_z = theano.shared(np.float32(np.zeros([1, dim_z])))
        self.activation = relu
        self.generative = False
        self.out_distribution = False
        #self.y = T.matrix(name="y")
        self.in_filters = [64, 64, 64]
        self.filter_lengths = [10., 10., 10.]
        self.params = []
        #magic = 73888.
        self.magic = magic

        self.dropout_symbolic = T.fscalar()
        self.dropout_prob = theano.shared(np.float32(0.0))
        ####################################### LAYERS ######################################
        # LAYER 1 ##############################
        self.conv1 = one_d_conv_layer(self.inpt,
                                      self.in_filters[0],
                                      1,
                                      self.filter_lengths[0],
                                      param_names=["W1", 'b1'])
        self.params += self.conv1.params
        self.bn1 = batchnorm(self.conv1.output)
        self.nl1 = self.activation(self.bn1.X)
        self.maxpool1 = ds.max_pool_2d(self.nl1, [3, 1],
                                       st=[2, 1],
                                       ignore_border=False).astype(
                                           theano.config.floatX)
        self.layer1_out = dropout(self.maxpool1, self.dropout_symbolic)
        #self.layer1_out = self.maxpool1
        # LAYER2 ################################
        self.flattened = T.flatten(self.layer1_out, outdim=2)
        # Variational Layer #####################
        self.latent_layer = variational_gauss_layer(self.flattened, self.magic,
                                                    dim_z)
        self.params += self.latent_layer.params
        self.latent_out = self.latent_layer.output
        # Hidden Layer #########################
        self.hidden_layer = hidden_layer(self.latent_out, dim_z, self.magic)
        self.params += self.hidden_layer.params
        self.hid_out = dropout(
            self.activation(self.hidden_layer.output).reshape(
                (self.inpt.shape[0], self.in_filters[-1],
                 int(self.magic / self.in_filters[-1]), 1)),
            self.dropout_symbolic)
        # Devonvolutional 1 ######################
        self.deconv1 = one_d_deconv_layer(self.hid_out,
                                          1,
                                          self.in_filters[2],
                                          self.filter_lengths[2],
                                          pool=2.,
                                          param_names=["W3", 'b3'],
                                          distribution=False)
        self.params += self.deconv1.params
        #self.nl_deconv1 = dropout(self.activation(self.deconv1.output),self.dropout_symbolic)
        self.tanh_out = self.deconv1.output
        self.last_layer = self.deconv1

        if self.out_distribution == True:
            self.trunk_sigma = self.last_layer.log_sigma[:, :, :self.inpt.
                                                         shape[2], :]
        self.trunc_output = self.tanh_out[:, :, :self.inpt.shape[2], :]

        ################################### FUNCTIONS ######################################################
        self.get_latent_states = theano.function(
            [self.inpt],
            self.latent_out,
            givens=[[self.dropout_symbolic, self.dropout_prob]])
        #self.prior_debug = theano.function([self.inpt],[self.latent_out,self.latent_layer.mu_encoder,self.latent_layer.log_sigma_encoder,self.latent_layer.prior])
        #self.get_prior = theano.function([self.inpt],self.latent_layer.prior)
        #self.convolve1 = theano.function([self.inpt],self.layer1_out)
        #self.convolve2 = theano.function([self.inpt],self.layer2_out)
        self.output = theano.function(
            [self.inpt],
            self.trunc_output,
            givens=[[self.dropout_symbolic, self.dropout_prob]])
        self.get_flattened = theano.function(
            [self.inpt],
            self.flattened,
            givens=[[self.dropout_symbolic, self.dropout_prob]])
        #self.deconvolve1 = theano.function([self.inpt],self.deconv1.output)
        #self.deconvolve2 = theano.function([self.inpt],self.deconv2.output)
        #self.sig_out = theano.function([self.inpt],T.flatten(self.trunk_sigma,outdim=2))
        self.output = theano.function(
            [self.inpt],
            self.trunc_output,
            givens=[[self.dropout_symbolic, self.dropout_prob]])
        #self.generate_from_z = theano.function([self.inpt],self.trunc_output,givens = [[self.latent_out,self.generative_z]])
        self.generate_from_z = theano.function(
            [self.inpt],
            self.trunc_output,
            givens=[[self.dropout_symbolic, self.dropout_prob],
                    [self.latent_out, self.generative_z]])

        self.cost = self.MSE()
        self.mse = self.MSE()
        #self.likelihood = self.log_px_z()
        #self.get_cost = theano.function([self.inpt],[self.cost,self.mse])

        #self.get_likelihood = theano.function([self.layer1.inpt],[self.likelihood])
        self.derivatives = T.grad(self.cost, self.params)
        #self.get_gradients = theano.function([self.inpt],self.derivatives)
        self.updates = adam(self.params, self.derivatives, self.learning_rate)
        #self.updates =momentum_update(self.params,self.derivatives,self.learning_rate,self.momentum)
        self.train_model = theano.function(
            inputs=[self.inpt, self.df],
            outputs=self.cost,
            updates=self.updates,
            givens=[[self.dropout_symbolic, self.dropout_prob]])
Exemple #40
0
 def get_parent_state(self, children_states, node_type, use_dropout: bool,
                      iteration_number) -> tuple:
     w = self.__w_with_dropout if use_dropout else self.__w
     return T.tanh(
         T.dot(w[node_type], T.flatten(children_states)) +
         self.__bias[node_type]), 0
Exemple #41
0
    def __init__(self, config, testMode):

        self.config = config

        batch_size = config['batch_size']
        lib_conv = config['lib_conv']
        useLayers = config['useLayers']
        #imgWidth = config['imgWidth']
        #imgHeight = config['imgHeight']
        initWeights = config['initWeights']  #if we wish to initialize alexnet with some weights. #need to make changes in layers.py to accept initilizing weights
        if initWeights:
            weightsDir = config['weightsDir']
            weightFileTag = config['weightFileTag']
        prob_drop = config['prob_drop']

        # ##################### BUILD NETWORK ##########################
        x = T.ftensor4('x')
        mean = T.ftensor4('mean')
        #y = T.lvector('y')

        print '... building the model'
        self.layers = []
        params = []
        weight_types = []

        if useLayers >= 1:
            convpool_layer1 = ConvPoolLayer(input=x-mean,
                                        image_shape=(3, None, None, batch_size),
                                        filter_shape=(3, 11, 11, 96),
                                        convstride=4, padsize=0, group=1, 
                                        poolsize=3, poolstride=2, 
                                        bias_init=0.0, lrn=True,
                                        lib_conv=lib_conv,
                                        initWeights=initWeights, weightsDir=weightsDir, weightFiles=['W_0'+weightFileTag, 'b_0'+weightFileTag]
                                        )
            self.layers.append(convpool_layer1)
            params += convpool_layer1.params
            weight_types += convpool_layer1.weight_type

        if useLayers >= 2:
            convpool_layer2 = ConvPoolLayer(input=convpool_layer1.output,
                                        image_shape=(96, None, None, batch_size),    #change from 27 to appropriate value sbased on conv1's output
                                        filter_shape=(96, 5, 5, 256), 
                                        convstride=1, padsize=2, group=2, 
                                        poolsize=3, poolstride=2, 
                                        bias_init=0.1, lrn=True,
                                        lib_conv=lib_conv,
                                        initWeights=initWeights, weightsDir=weightsDir, weightFiles=['W0_1'+weightFileTag, 'W1_1'+weightFileTag, 'b0_1'+weightFileTag, 'b1_1'+weightFileTag]
                                        )
            self.layers.append(convpool_layer2)
            params += convpool_layer2.params
            weight_types += convpool_layer2.weight_type

        if useLayers >= 3:
            convpool_layer3 = ConvPoolLayer(input=convpool_layer2.output,
                                        image_shape=(256, None, None, batch_size),
                                        filter_shape=(256, 3, 3, 384), 
                                        convstride=1, padsize=1, group=1, 
                                        poolsize=1, poolstride=0, 
                                        bias_init=0.0, lrn=False,
                                        lib_conv=lib_conv,
                                        initWeights=initWeights, weightsDir=weightsDir, weightFiles=['W_2'+weightFileTag, 'b_2'+weightFileTag]
                                        )
            self.layers.append(convpool_layer3)
            params += convpool_layer3.params
            weight_types += convpool_layer3.weight_type

        if useLayers >= 4:
            convpool_layer4 = ConvPoolLayer(input=convpool_layer3.output,
                                        image_shape=(384, None, None, batch_size),
                                        filter_shape=(384, 3, 3, 384), 
                                        convstride=1, padsize=1, group=2, 
                                        poolsize=1, poolstride=0, 
                                        bias_init=0.1, lrn=False,
                                        lib_conv=lib_conv,
                                        initWeights=initWeights, weightsDir=weightsDir, weightFiles=['W0_3'+weightFileTag, 'W1_3'+weightFileTag, 'b0_3'+weightFileTag, 'b1_3'+weightFileTag]
                                        )
            self.layers.append(convpool_layer4)
            params += convpool_layer4.params
            weight_types += convpool_layer4.weight_type

        if useLayers >= 5:
            convpool_layer5 = ConvPoolLayer(input=convpool_layer4.output,
                                        image_shape=(384, None, None, batch_size),
                                        filter_shape=(384, 3, 3, 256), 
                                        convstride=1, padsize=1, group=2, 
                                        poolsize=3, poolstride=2, 
                                        bias_init=0.0, lrn=False,
                                        lib_conv=lib_conv,
                                        initWeights=initWeights, weightsDir=weightsDir, weightFiles=['W0_4'+weightFileTag, 'W1_4'+weightFileTag, 'b0_4'+weightFileTag, 'b1_4'+weightFileTag]
                                        )
            self.layers.append(convpool_layer5)
            params += convpool_layer5.params
            weight_types += convpool_layer5.weight_type

        if useLayers >= 6:
            fc_layer6_input = T.flatten(convpool_layer5.output.dimshuffle(3, 0, 1, 2), 2)
            fc_layer6 = FCLayer(input=fc_layer6_input, n_in=9216, n_out=4096, initWeights=initWeights, weightsDir=weightsDir, weightFiles=['W_5'+weightFileTag, 'b_5'+weightFileTag])
            self.layers.append(fc_layer6)
            params += fc_layer6.params
            weight_types += fc_layer6.weight_type
            if testMode:
                dropout_layer6 = fc_layer6
            else:
                dropout_layer6 = DropoutLayer(fc_layer6.output, n_in=4096, n_out=4096, prob_drop=prob_drop)

        if useLayers >= 7:
            fc_layer7 = FCLayer(input=dropout_layer6.output, n_in=4096, n_out=4096, initWeights=initWeights, weightsDir=weightsDir, weightFiles=['W_6'+weightFileTag, 'b_6'+weightFileTag])
            self.layers.append(fc_layer7)
            params += fc_layer7.params
            weight_types += fc_layer7.weight_type
            if testMode:
                dropout_layer6 = fc_layer7
            else:
                dropout_layer7 = DropoutLayer(fc_layer7.output, n_in=4096, n_out=4096, prob_drop=prob_drop)

        if useLayers >= 8:
            softmax_layer8 = SoftmaxLayer(input=dropout_layer7.output, n_in=4096, n_out=1000, initWeights=initWeights, weightsDir=weightsDir, weightFiles=['W_7'+weightFileTag, 'b_7'+weightFileTag])
            self.layers.append(softmax_layer8)
            params += softmax_layer8.params
            weight_types += softmax_layer8.weight_type

        # #################### NETWORK BUILT #######################

        self.output = self.layers[useLayers-1]
        self.params = params
        self.x = x
        self.mean = mean
        self.weight_types = weight_types
        self.batch_size = batch_size
        self.useLayers = useLayers
        self.outLayer = self.layers[useLayers-1]

        meanVal = np.load(config['mean_file'])
        meanVal = meanVal[:, :, :, np.newaxis].astype('float32')   #x is 4d, with 'batch' number of images. meanVal has only '1' in the 'batch' dimension. subtraction wont work.
        meanVal = np.tile(meanVal,(1,1,1,batch_size))
        self.meanVal = meanVal
        #meanVal = np.zeros([3,imgHeight,imgWidth,2], dtype='float32')

        if useLayers >= 8:  #if last layer is softmax, then its output is y_pred
            finalOut = self.outLayer.y_pred
        else:
            finalOut = self.outLayer.output
        self.forwardFunction = theano.function([self.x, In(self.mean, value=meanVal)], [finalOut])
Exemple #42
0
def flatten(x):
    return T.flatten(x)
Exemple #43
0
def convolve(kerns,
             kshp,
             nkern,
             images,
             imgshp,
             step=(1, 1),
             bias=None,
             mode='valid',
             flatten=True):
    """Convolution implementation by sparse matrix multiplication.

    :note: For best speed, put the matrix which you expect to be
           smaller as the 'kernel' argument

    "images" is assumed to be a matrix of shape batch_size x img_size,
    where the second dimension represents each image in raster order

    If flatten is "False", the output feature map will have shape:

    .. code-block:: python

        batch_size x number of kernels x output_size

    If flatten is "True", the output feature map will have shape:

    .. code-block:: python

        batch_size x number of kernels * output_size

    .. note::

        IMPORTANT: note that this means that each feature map (image
        generate by each kernel) is contiguous in memory. The memory
        layout will therefore be: [ <feature_map_0> <feature_map_1>
        ... <feature_map_n>], where <feature_map> represents a
        "feature map" in raster order

    kerns is a 2D tensor of shape nkern x N.prod(kshp)

    :param kerns: 2D tensor containing kernels which are applied at every pixel
    :param kshp: tuple containing actual dimensions of kernel (not symbolic)
    :param nkern: number of kernels/filters to apply.
                  nkern=1 will apply one common filter to all input pixels
    :param images: tensor containing images on which to apply convolution
    :param imgshp: tuple containing image dimensions
    :param step: determines number of pixels between adjacent receptive fields
                 (tuple containing dx,dy values)
    :param mode: 'full', 'valid' see CSM.evaluate function for details
    :param sumdims: dimensions over which to sum for the tensordot operation.
                    By default ((2,),(1,)) assumes kerns is a nkern x kernsize
                    matrix and images is a batchsize x imgsize matrix
                    containing flattened images in raster order
    :param flatten: flatten the last 2 dimensions of the output. By default,
                    instead of generating a batchsize x outsize x nkern tensor,
                    will flatten to batchsize x outsize*nkern

    :return: out1, symbolic result
    :return: out2, logical shape of the output img (nkern,heigt,width)

    :TODO: test for 1D and think of how to do n-d convolutions
    """
    # start by computing output dimensions, size, etc
    kern_size = np.int64(np.prod(kshp))

    # inshp contains either 2 entries (height,width) or 3 (nfeatures,h,w)
    # in the first case, default nfeatures to 1
    if np.size(imgshp) == 2:
        imgshp = (1, ) + imgshp

    # construct indices and index pointers for sparse matrix, which,
    # when multiplied with input images will generate a stack of image
    # patches
    indices, indptr, spmat_shape, sptype, outshp = \
            convolution_indices.conv_eval(imgshp, kshp, step, mode)

    # build sparse matrix, then generate stack of image patches
    csc = theano.sparse.CSM(sptype)(np.ones(indices.size), indices, indptr,
                                    spmat_shape)
    patches = (sparse.structured_dot(csc, images.T)).T

    # compute output of linear classifier
    pshape = tensor.stack([images.shape[0] * tensor.as_tensor(np.prod(outshp)),\
                           tensor.as_tensor(imgshp[0] * kern_size)])
    patch_stack = tensor.reshape(patches, pshape, ndim=2)

    # kern is of shape: nkern x ksize*number_of_input_features
    # output is thus of shape: bsize*outshp x nkern
    output = tensor.dot(patch_stack, kerns.T)

    # add bias across each feature map (more efficient to do it now)
    if bias is not None:
        output += bias

    # now to have feature maps in raster order ...
    # go from bsize*outshp x nkern to bsize x nkern*outshp
    newshp = tensor.stack([images.shape[0],\
                           tensor.as_tensor(np.prod(outshp)),\
                           tensor.as_tensor(nkern)])
    tensout = tensor.reshape(output, newshp, ndim=3)
    output = tensor.DimShuffle((False, ) * tensout.ndim, (0, 2, 1))(tensout)
    if flatten:
        output = tensor.flatten(output, 2)

    return output, np.hstack((nkern, outshp))
Exemple #44
0
def build_objective(model, deterministic=False, epsilon=1.e-7):
    predictions = T.flatten(
        nn.layers.get_output(model.l_out, deterministic=deterministic))
    targets = T.flatten(nn.layers.get_output(model.l_target))
    preds = T.clip(predictions, epsilon, 1. - epsilon)
    return T.mean(nn.objectives.binary_crossentropy(preds, targets))
Exemple #45
0
 def op(self, state):
     X = self.l_in.op(state=state)
     return T.flatten(X, outdim=self.axes)
    def build_computation_graph(self):
        ###################### BUILD NETWORK ##########################
        # whether or not to mirror the input images before feeding them into the network
        if self.flag_datalayer:
            layer_1_input = mirror_images(
                input=self.x,
                image_shape=(
                    self.batch_size,
                    3,
                    256,
                    256,
                ),  # bc01 format
                cropsize=227,
                rand=self.rand,
                flag_rand=self.rand_crop)
        else:
            layer_1_input = self.x  # 4D tensor (going to be in c01b format)

        # Start with 5 convolutional pooling layers
        log.debug("convpool layer 1...")
        convpool_layer1 = ConvPoolLayer(inputs_hook=((self.batch_size, 3, 227,
                                                      227), layer_1_input),
                                        filter_shape=(96, 3, 11, 11),
                                        convstride=4,
                                        padsize=0,
                                        group=1,
                                        poolsize=3,
                                        poolstride=2,
                                        bias_init=0.0,
                                        local_response_normalization=True)
        # Add this layer's parameters!
        self.params += convpool_layer1.get_params()

        log.debug("convpool layer 2...")
        convpool_layer2 = ConvPoolLayer(inputs_hook=((
            self.batch_size,
            96,
            27,
            27,
        ), convpool_layer1.get_outputs()),
                                        filter_shape=(256, 96, 5, 5),
                                        convstride=1,
                                        padsize=2,
                                        group=2,
                                        poolsize=3,
                                        poolstride=2,
                                        bias_init=0.1,
                                        local_response_normalization=True)
        # Add this layer's parameters!
        self.params += convpool_layer2.get_params()

        log.debug("convpool layer 3...")
        convpool_layer3 = ConvPoolLayer(
            inputs_hook=((self.batch_size, 256, 13, 13),
                         convpool_layer2.get_outputs()),
            filter_shape=(384, 256, 3, 3),
            convstride=1,
            padsize=1,
            group=1,
            poolsize=1,
            poolstride=0,
            bias_init=0.0,
            local_response_normalization=False)
        # Add this layer's parameters!
        self.params += convpool_layer3.get_params()

        log.debug("convpool layer 4...")
        convpool_layer4 = ConvPoolLayer(
            inputs_hook=((self.batch_size, 384, 13, 13),
                         convpool_layer3.get_outputs()),
            filter_shape=(384, 384, 3, 3),
            convstride=1,
            padsize=1,
            group=2,
            poolsize=1,
            poolstride=0,
            bias_init=0.1,
            local_response_normalization=False)
        # Add this layer's parameters!
        self.params += convpool_layer4.get_params()

        log.debug("convpool layer 5...")
        convpool_layer5 = ConvPoolLayer(
            inputs_hook=((self.batch_size, 384, 13, 13),
                         convpool_layer4.get_outputs()),
            filter_shape=(256, 384, 3, 3),
            convstride=1,
            padsize=1,
            group=2,
            poolsize=3,
            poolstride=2,
            bias_init=0.0,
            local_response_normalization=False)
        # Add this layer's parameters!
        self.params += convpool_layer5.get_params()

        # Now onto the fully-connected layers!
        fc_config = {
            'activation':
            'rectifier',  # type of activation function to use for output
            'weights_init':
            'gaussian',  # either 'gaussian' or 'uniform' - how to initialize weights
            'weights_mean': 0.0,  # mean for gaussian weights init
            'weights_std':
            0.005,  # standard deviation for gaussian weights init
            'bias_init': 0.0  # how to initialize the bias parameter
        }
        log.debug("fully connected layer 1 (model layer 6)...")
        # we want to have dropout applied to the training version, but not the test version.
        fc_layer6_input = T.flatten(convpool_layer5.get_outputs(), 2)
        fc_layer6 = BasicLayer(inputs_hook=(9216, fc_layer6_input),
                               output_size=4096,
                               config=fc_config)
        # Add this layer's parameters!
        self.params += fc_layer6.get_params()

        # now apply dropout to the output for training
        dropout_layer6 = dropout(fc_layer6.get_outputs(), corruption_level=0.5)

        log.debug("fully connected layer 2 (model layer 7)...")
        fc_layer7 = BasicLayer(inputs_hook=(4096, fc_layer6.get_outputs()),
                               output_size=4096,
                               config=fc_config)
        fc_layer7_train = BasicLayer(inputs_hook=(4096, dropout_layer6),
                                     output_size=4096,
                                     params_hook=fc_layer7.get_params(),
                                     config=fc_config)
        # Add this layer's parameters!
        self.params += fc_layer7_train.get_params()

        # apply dropout again for training
        dropout_layer7 = dropout(fc_layer7_train.get_outputs(),
                                 corruption_level=0.5)

        # last layer is a softmax prediction output layer
        softmax_config = {
            'weights_init': 'gaussian',
            'weights_mean': 0.0,
            'weights_std': 0.005,
            'bias_init': 0.0
        }
        log.debug("softmax classification layer (model layer 8)...")
        softmax_layer8 = SoftmaxLayer(inputs_hook=(4096,
                                                   fc_layer7.get_outputs()),
                                      output_size=1000,
                                      config=softmax_config)
        softmax_layer8_train = SoftmaxLayer(
            inputs_hook=(4096, dropout_layer7),
            output_size=1000,
            params_hook=softmax_layer8.get_params(),
            config=softmax_config)
        # Add this layer's parameters!
        self.params += softmax_layer8.get_params()

        # finally the softmax output from the whole thing!
        self.output = softmax_layer8.get_outputs()

        #####################
        # Cost and monitors #
        #####################
        self.train_cost = softmax_layer8_train.negative_log_likelihood(self.y)
        cost = softmax_layer8.negative_log_likelihood(self.y)
        errors = softmax_layer8.errors(self.y)
        train_errors = softmax_layer8_train.errors(self.y)

        self.monitors = OrderedDict([('cost', cost), ('errors', errors),
                                     ('dropout_errors', train_errors)])

        #########################
        # Compile the functions #
        #########################
        log.debug("Compiling functions!")
        t = time.time()
        log.debug("f_predict...")
        # use the actual argmax from the classification
        self.f_predict = function(
            inputs=[self.x], outputs=softmax_layer8.get_argmax_prediction())
        log.debug("f_monitors")
        self.f_monitors = function(inputs=[self.x, self.y],
                                   outputs=self.monitors.values())
        log.debug("compilation took %s" %
                  make_time_units_string(time.time() - t))
Exemple #47
0
def gradient1(f, v):
    """flat gradient of f wrt v"""
    return tt.flatten(tt.grad(f, v, disconnected_inputs="warn"))
Exemple #48
0
    def get_output_for(self, inputs, **kwargs):
        """
        Have to re-write LSTMLayer's output construction because we need
        cell_out, which is not stored in the original
        """
        # Retrieve the layer input
        input = inputs[0]
        # Retrieve the mask when it is supplied
        mask = None
        hid_init = None
        cell_init = None
        if self.mask_incoming_index > 0:
            mask = inputs[self.mask_incoming_index]
        if self.hid_init_incoming_index > 0:
            hid_init = inputs[self.hid_init_incoming_index]
        if self.cell_init_incoming_index > 0:
            cell_init = inputs[self.cell_init_incoming_index]

        # Treat all dimensions after the second as flattened feature dimensions
        if input.ndim > 3:
            input = T.flatten(input, 3)

        # Because scan iterates over the first dimension we dimshuffle to
        # (n_time_steps, n_batch, n_features)
        input = input.dimshuffle(1, 0, 2)
        seq_len, num_batch, _ = input.shape

        # Stack input weight matrices into a (num_inputs, 4*num_units)
        # matrix, which speeds up computation
        W_in_stacked = T.concatenate([
            self.W_in_to_ingate, self.W_in_to_forgetgate, self.W_in_to_cell,
            self.W_in_to_outgate
        ],
                                     axis=1)

        # Same for hidden weight matrices
        W_hid_stacked = T.concatenate([
            self.W_hid_to_ingate, self.W_hid_to_forgetgate, self.W_hid_to_cell,
            self.W_hid_to_outgate
        ],
                                      axis=1)

        # Stack biases into a (4*num_units) vector
        b_stacked = T.concatenate(
            [self.b_ingate, self.b_forgetgate, self.b_cell, self.b_outgate],
            axis=0)

        if self.precompute_input:
            # Because the input is given for all time steps, we can
            # precompute_input the inputs dot weight matrices before scanning.
            # W_in_stacked is (n_features, 4*num_units). input is then
            # (n_time_steps, n_batch, 4*num_units).
            input = T.dot(input, W_in_stacked) + b_stacked

        # At each call to scan, input_n will be (n_time_steps, 4*num_units).
        # We define a slicing function that extract the input to each LSTM gate
        def slice_w(x, n):
            return x[:, n * self.num_units:(n + 1) * self.num_units]

        # Create single recurrent computation step function
        # input_n is the n'th vector of the input
        def step(input_n, cell_previous, hid_previous, *args):
            if not self.precompute_input:
                input_n = T.dot(input_n, W_in_stacked) + b_stacked

            # Calculate gates pre-activations and slice
            gates = input_n + T.dot(hid_previous, W_hid_stacked)

            # Clip gradients
            if self.grad_clipping:
                gates = theano.gradient.grad_clip(gates, -self.grad_clipping,
                                                  self.grad_clipping)

            # Extract the pre-activation gate values
            ingate = slice_w(gates, 0)
            forgetgate = slice_w(gates, 1)
            cell_input = slice_w(gates, 2)
            outgate = slice_w(gates, 3)

            if self.peepholes:
                # Compute peephole connections
                ingate += cell_previous * self.W_cell_to_ingate
                forgetgate += cell_previous * self.W_cell_to_forgetgate

            # Apply nonlinearities
            ingate = self.nonlinearity_ingate(ingate)
            forgetgate = self.nonlinearity_forgetgate(forgetgate)
            cell_input = self.nonlinearity_cell(cell_input)

            # Compute new cell value
            cell = forgetgate * cell_previous + ingate * cell_input

            if self.peepholes:
                outgate += cell * self.W_cell_to_outgate
            outgate = self.nonlinearity_outgate(outgate)

            # Compute new hidden unit activation
            hid = outgate * self.nonlinearity(cell)
            return [cell, hid]

        def step_masked(input_n, mask_n, cell_previous, hid_previous, *args):
            cell, hid = step(input_n, cell_previous, hid_previous, *args)

            # Skip over any input with mask 0 by copying the previous
            # hidden state; proceed normally for any input with mask 1.
            not_mask = 1 - mask_n
            cell = cell * mask_n + cell_previous * not_mask
            hid = hid * mask_n + hid_previous * not_mask

            return [cell, hid]

        if mask is not None:
            # mask is given as (batch_size, seq_len). Because scan iterates
            # over first dimension, we dimshuffle to (seq_len, batch_size) and
            # add a broadcastable dimension
            mask = mask.dimshuffle(1, 0, 'x')
            sequences = [input, mask]
            step_fun = step_masked
        else:
            sequences = input
            step_fun = step

        ones = T.ones((num_batch, 1))
        if isinstance(self.cell_init, Layer):
            pass
        elif isinstance(self.cell_init, T.TensorVariable):
            cell_init = self.cell_init
        else:
            # Dot against a 1s vector to repeat to shape (num_batch, num_units)
            cell_init = T.dot(ones, self.cell_init)

        if isinstance(self.hid_init, Layer):
            pass
        elif isinstance(self.hid_init, T.TensorVariable):
            hid_init = self.hid_init
        else:
            # Dot against a 1s vector to repeat to shape (num_batch, num_units)
            hid_init = T.dot(ones, self.hid_init)

        # The hidden-to-hidden weight matrix is always used in step
        non_seqs = [W_hid_stacked]
        # The "peephole" weight matrices are only used when self.peepholes=True
        if self.peepholes:
            non_seqs += [
                self.W_cell_to_ingate, self.W_cell_to_forgetgate,
                self.W_cell_to_outgate
            ]

        # When we aren't precomputing the input outside of scan, we need to
        # provide the input weights and biases to the step function
        if not self.precompute_input:
            non_seqs += [W_in_stacked, b_stacked]

        if self.unroll_scan:
            # Retrieve the dimensionality of the incoming layer
            input_shape = self.input_shapes[0]
            # Explicitly unroll the recurrence instead of using scan
            cell_out, hid_out = unroll_scan(fn=step_fun,
                                            sequences=sequences,
                                            outputs_info=[cell_init, hid_init],
                                            go_backwards=self.backwards,
                                            non_sequences=non_seqs,
                                            n_steps=input_shape[1])
        else:
            # Scan op iterates over first dimension of input and repeatedly
            # applies the step function
            cell_out, hid_out = theano.scan(
                fn=step_fun,
                sequences=sequences,
                outputs_info=[cell_init, hid_init],
                go_backwards=self.backwards,
                truncate_gradient=self.gradient_steps,
                non_sequences=non_seqs,
                strict=True)[0]

        # When it is requested that we only return the final sequence step,
        # we need to slice it out immediately after scan is applied
        if self.only_return_final:
            hid_out = hid_out[-1]
            cell_out = cell_out[-1]
        else:
            # dimshuffle back to (n_batch, n_time_steps, n_features))
            hid_out = hid_out.dimshuffle(1, 0, 2)
            cell_out = cell_out.dimshuffle(1, 0, 2)

            # if scan is backward reverse the output
            if self.backwards:
                hid_out = hid_out[:, ::-1]
                cell_out = cell_out[:, ::-1]

        return T.concatenate([cell_out, hid_out], axis=2)
Exemple #49
0
    def get_output_for(self, inputs, **kwargs):
        """
        Compute this layer's output function given a symbolic input variable

        Parameters
        ----------
        inputs : list of theano.TensorType
            `inputs[0]` should always be the symbolic input variable.  When
            this layer has a mask input (i.e. was instantiated with
            `mask_input != None`, indicating that the lengths of sequences in
            each batch vary), `inputs` should have length 2, where `inputs[1]`
            is the `mask`.  The `mask` should be supplied as a Theano variable
            denoting whether each time step in each sequence in the batch is
            part of the sequence or not.  `mask` should be a matrix of shape
            ``(n_batch, n_time_steps)`` where ``mask[i, j] = 1`` when ``j <=
            (length of sequence i)`` and ``mask[i, j] = 0`` when ``j > (length
            of sequence i)``. When the hidden state of this layer is to be
            pre-filled (i.e. was set to a :class:`Layer` instance) `inputs`
            should have length at least 2, and `inputs[-1]` is the hidden state
            to prefill with. When the cell state of this layer is to be
            pre-filled (i.e. was set to a :class:`Layer` instance) `inputs`
            should have length at least 2, and `inputs[-1]` is the hidden state
            to prefill with. When both the cell state and the hidden state are
            being pre-filled `inputs[-2]` is the hidden state, while
            `inputs[-1]` is the cell state.

        Returns
        -------
        layer_output : theano.TensorType
            Symbolic output variable.
        """
        # Retrieve the layer input
        input = inputs[0]
        # Retrieve the mask when it is supplied
        mask = None
        hid_init = None
        cell_init = None
        if self.mask_incoming_index > 0:
            mask = inputs[self.mask_incoming_index]
        if self.hid_init_incoming_index > 0:
            hid_init = inputs[self.hid_init_incoming_index]
        if self.cell_init_incoming_index > 0:
            cell_init = inputs[self.cell_init_incoming_index]

        # Treat all dimensions after the second as flattened feature dimensions
        if input.ndim > 3:
            input = T.flatten(input, 3)

        # Because scan iterates over the first dimension we dimshuffle to
        # (n_time_steps, n_batch, n_features)
        input = input.dimshuffle(1, 0, 2)
        seq_len, num_batch, _ = input.shape

        # Same for hidden weight matrices
        W_hid_stacked = T.concatenate(
            [self.W_hid_to_ingate, self.W_hid_to_forgetgate,
            self.W_hid_to_cell, self.W_hid_to_outgate], axis=1)
    
            

        if self.precompute_input:
            # Because the input is given for all time steps, we can
            # precompute_input the inputs dot weight matrices before scanning.
            # W_in_stacked is (n_features, 4*num_units). input is then
            # (n_time_steps, n_batch, 4*num_units).
            
            # Stack input weight matrices into a (num_inputs, 4*num_units)
            # matrix, which speeds up computation
            W_in_stacked = T.concatenate(
                [self.W_in_to_ingate, self.W_in_to_forgetgate,
                 self.W_in_to_cell, self.W_in_to_outgate], axis=1)
            if not self.batch_norm:    
                # Stack biases into a (4*num_units) vector
                b_stacked = T.concatenate(
                    [self.b_ingate, self.b_forgetgate,
                     self.b_cell, self.b_outgate], axis=0)
                         
                input = T.dot(input, W_in_stacked) + b_stacked
            else:
                input = self.bn.get_output_for(T.dot(input, W_in_stacked), mask, **kwargs)
                
        else:
            # Stack input weight matrices into a (num_inputs, 4*num_units)
            # matrix, which speeds up computation
            W_in_stacked = T.concatenate(
                [self.W_in_to_ingate, self.W_in_to_forgetgate,
                self.W_in_to_cell, self.W_in_to_outgate], axis=1)
                
            # Stack biases into a (4*num_units) vector
            b_stacked = T.concatenate(
                [self.b_ingate, self.b_forgetgate,
                self.b_cell, self.b_outgate], axis=0)

        # At each call to scan, input_n will be (n_time_steps, 4*num_units).
        # We define a slicing function that extract the input to each LSTM gate
        def slice_w(x, n):
            return x[:, n*self.num_units:(n+1)*self.num_units]

        # Create single recurrent computation step function
        # input_n is the n'th vector of the input
        def step(input_n, cell_previous, hid_previous, *args):
            if not self.precompute_input:
                input_n = T.dot(input_n, W_in_stacked) + b_stacked

            # Calculate gates pre-activations and slice
            gates = input_n + T.dot(hid_previous, W_hid_stacked)

            # Clip gradients
            if self.grad_clipping:
                gates = theano.gradient.grad_clip(
                    gates, -self.grad_clipping, self.grad_clipping)

            # Extract the pre-activation gate values
            ingate = slice_w(gates, 0)
            forgetgate = slice_w(gates, 1)
            cell_input = slice_w(gates, 2)
            outgate = slice_w(gates, 3)

            if self.peepholes:
                # Compute peephole connections
                ingate += cell_previous*self.W_cell_to_ingate
                forgetgate += cell_previous*self.W_cell_to_forgetgate

            # Apply nonlinearities
            ingate = self.nonlinearity_ingate(ingate)
            forgetgate = self.nonlinearity_forgetgate(forgetgate)
            cell_input = self.nonlinearity_cell(cell_input)

            # Compute new cell value
            cell = forgetgate*cell_previous + ingate*cell_input

            if self.peepholes:
                outgate += cell*self.W_cell_to_outgate
            outgate = self.nonlinearity_outgate(outgate)

            # Compute new hidden unit activation
            hid = outgate*self.nonlinearity(cell)
            return [cell, hid]

        def step_masked(input_n, mask_n, cell_previous, hid_previous, *args):
            cell, hid = step(input_n, cell_previous, hid_previous, *args)

            # Skip over any input with mask 0 by copying the previous
            # hidden state; proceed normally for any input with mask 1.
            cell = T.switch(mask_n, cell, cell_previous)
            hid = T.switch(mask_n, hid, hid_previous)

            return [cell, hid]

        if mask is not None:
            # mask is given as (batch_size, seq_len). Because scan iterates
            # over first dimension, we dimshuffle to (seq_len, batch_size) and
            # add a broadcastable dimension
            mask = mask.dimshuffle(1, 0, 'x')
            sequences = [input, mask]
            step_fun = step_masked
        else:
            sequences = input
            step_fun = step

        ones = T.ones((num_batch, 1))
        if not isinstance(self.cell_init, Layer):
            # Dot against a 1s vector to repeat to shape (num_batch, num_units)
            cell_init = T.dot(ones, self.cell_init)

        if not isinstance(self.hid_init, Layer):
            # Dot against a 1s vector to repeat to shape (num_batch, num_units)
            hid_init = T.dot(ones, self.hid_init)

        # The hidden-to-hidden weight matrix is always used in step
        non_seqs = [W_hid_stacked]
        # The "peephole" weight matrices are only used when self.peepholes=True
        if self.peepholes:
            non_seqs += [self.W_cell_to_ingate,
                         self.W_cell_to_forgetgate,
                         self.W_cell_to_outgate]

        # When we aren't precomputing the input outside of scan, we need to
        # provide the input weights and biases to the step function
        if not self.precompute_input:
            non_seqs += [W_in_stacked, b_stacked]

        if self.unroll_scan:
            # Retrieve the dimensionality of the incoming layer
            input_shape = self.input_shapes[0]
            # Explicitly unroll the recurrence instead of using scan
            cell_out, hid_out = unroll_scan(
                fn=step_fun,
                sequences=sequences,
                outputs_info=[cell_init, hid_init],
                go_backwards=self.backwards,
                non_sequences=non_seqs,
                n_steps=input_shape[1])
        else:
            # Scan op iterates over first dimension of input and repeatedly
            # applies the step function
            cell_out, hid_out = theano.scan(
                fn=step_fun,
                sequences=sequences,
                outputs_info=[cell_init, hid_init],
                go_backwards=self.backwards,
                truncate_gradient=self.gradient_steps,
                non_sequences=non_seqs,
                strict=True)[0]

        # When it is requested that we only return the final sequence step,
        # we need to slice it out immediately after scan is applied
        if self.only_return_final:
            hid_out = hid_out[-1]
        else:
            # dimshuffle back to (n_batch, n_time_steps, n_features))
            hid_out = hid_out.dimshuffle(1, 0, 2)

            # if scan is backward reverse the output
            if self.backwards:
                hid_out = hid_out[:, ::-1]

        return hid_out
Exemple #50
0
    def get_output_for(self, inputs, deterministic=False, **kwargs):
        if not self.stochastic and not deterministic:
            deterministic = True
        print "deterministic mode: ", deterministic

        def apply_regularization(weights, hid=False):
            current_w0 = self.w0
            if hid:
                current_w0 = self.w0_hid

            if self.mean_substraction_rounding:
                return weights
            elif self.mode == 'ternary':

                return ternarize_weights(weights,
                                         w0=current_w0,
                                         deterministic=deterministic,
                                         srng=self.srng)

            elif self.mode == "binary":
                return binarize_weights(weights,
                                        1.,
                                        self.srng,
                                        deterministic=deterministic)
            elif self.mode == "dual-copy":
                return quantize_weights(weights,
                                        srng=self.srng,
                                        deterministic=deterministic)
            else:
                return weights

        if self.round_input_weights:
            self.Wb_in_to_hid = apply_regularization(self.W_in_to_hid)

        if self.round_hid:
            self.Wb_hid_to_hid = apply_regularization(self.W_hid_to_hid)

        if self.round_bias:
            self.bb = apply_regularization(self.b)

        if self.round_input_weights:
            Wr_in_to_hid = self.W_in_to_hid

        if self.round_hid:
            Wr_hid_to_hid = self.W_hid_to_hid
        if self.round_bias:
            br = self.b

        if self.round_input_weights:
            self.W_in_to_hid = self.Wb_in_to_hid

        if self.round_hid:
            self.W_hid_to_hid = self.Wb_hid_to_hid

        if self.round_bias:
            self.b = self.bb

        input = inputs[0]

        if self.batch_norm:
            input = self.bn.get_output_for(input,
                                           deterministic=deterministic,
                                           **kwargs)
            if len(inputs) > 1:
                new_inputs = [input, inputs[1]]
            else:
                new_inputs = [input]
        else:
            new_inputs = inputs

        inputs = new_inputs

        input = inputs[0]

        mask = None
        hid_init = None
        if self.mask_incoming_index > 0:
            mask = inputs[self.mask_incoming_index]
        if self.hid_init_incoming_index > 0:
            hid_init = inputs[self.hid_init_incoming_index]

        if input.ndim > 3:
            input = T.flatten(input, 3)

        input = input.dimshuffle(1, 0, 2)
        seq_len, num_batch, _ = input.shape

        W_in_stacked = T.concatenate([self.W_in_to_hid], axis=1)

        W_hid_stacked = T.concatenate([self.W_hid_to_hid], axis=1)

        b_stacked = T.concatenate([self.b], axis=0)

        if self.precompute_input:

            input = T.dot(input, W_in_stacked) + b_stacked

        def step(input_n, hid_previous, *args):

            hid_input = T.dot(hid_previous, W_hid_stacked)

            if self.grad_clipping:
                input_n = theano.gradient.grad_clip(input_n,
                                                    -self.grad_clipping,
                                                    self.grad_clipping)
                hid_input = theano.gradient.grad_clip(hid_input,
                                                      -self.grad_clipping,
                                                      self.grad_clipping)

            if not self.precompute_input:

                input_n = T.dot(input_n, W_in_stacked) + b_stacked

            hid = self.nonlinearity(hid_input + input_n)

            return hid

        def step_masked(input_n, mask_n, hid_previous, *args):
            hid = step(input_n, hid_previous, *args)

            hid = T.switch(mask_n, hid, hid_previous)

            return hid

        if mask is not None:

            mask = mask.dimshuffle(1, 0, 'x')
            sequences = [input, mask]
            step_fun = step_masked

        else:
            sequences = [input]
            step_fun = step

        if not isinstance(self.hid_init, lasagne.layers.Layer):

            hid_init = T.dot(T.ones((num_batch, 1)), self.hid_init)

        non_seqs = [W_hid_stacked]

        if not self.precompute_input:
            non_seqs += [W_in_stacked, b_stacked]

        if self.unroll_scan:

            input_shape = self.input_shapes[0]

            hid_out = lasagne.utils.unroll_scan(fn=step_fun,
                                                sequences=sequences,
                                                outputs_info=[hid_init],
                                                go_backwards=self.backwards,
                                                non_sequences=non_seqs,
                                                n_steps=input_shape[1])[0]
        else:

            hid_out = theano.scan(fn=step_fun,
                                  sequences=sequences,
                                  go_backwards=self.backwards,
                                  outputs_info=[hid_init],
                                  non_sequences=non_seqs,
                                  truncate_gradient=self.gradient_steps,
                                  strict=True)[0]

        if self.only_return_final:
            hid_out = hid_out[-1]
        else:

            hid_out = hid_out.dimshuffle(1, 0, 2)

            if self.backwards:
                hid_out = hid_out[:, ::-1]

        if self.round_input_weights:
            self.W_in_to_hid = Wr_in_to_hid

        if self.round_hid:
            self.W_hid_to_hid = Wr_hid_to_hid

        if self.round_bias:
            self.b = br

        return hid_out
Exemple #51
0
def conv2d(
        input,
        filters,
        image_shape=None,
        filter_shape=None,
        border_mode="valid",
        subsample=(1, 1),
        **kargs,
):
    """
    signal.conv.conv2d performs a basic 2D convolution of the input with the
    given filters. The input parameter can be a single 2D image or a 3D tensor,
    containing a set of images. Similarly, filters can be a single 2D filter or
    a 3D tensor, corresponding to a set of 2D filters.

    Shape parameters are optional and will result in faster execution.

    Parameters
    ----------
    input   : Symbolic theano tensor for images to be filtered.
              Dimensions: ([num_images], image height, image width)
    filters : Symbolic theano tensor for convolution filter(s).
              Dimensions: ([num_filters], filter height, filter width)
    border_mode: {'valid', 'full'}
        See scipy.signal.convolve2d.
    subsample
        Factor by which to subsample output.
    image_shape : tuple of length 2 or 3
        ([num_images,] image height, image width).
    filter_shape : tuple of length 2 or 3
        ([num_filters,] filter height, filter width).
    kwargs
        See theano.tensor.nnet.conv.conv2d.

    Returns
    -------
    symbolic 2D,3D or 4D tensor
        Tensor of filtered images, with shape
        ([number images,] [number filters,] image height, image width).

    """
    assert input.ndim in (2, 3)
    assert filters.ndim in (2, 3)

    # use shape information if it is given to us ###
    if filter_shape and image_shape:
        if input.ndim == 3:
            bsize = image_shape[0]
        else:
            bsize = 1
        imshp = (1, ) + tuple(image_shape[-2:])

        if filters.ndim == 3:
            nkern = filter_shape[0]
        else:
            nkern = 1
        kshp = filter_shape[-2:]
    else:
        nkern, kshp = None, None
        bsize, imshp = None, None

    # reshape tensors to 4D, for compatibility with ConvOp ###
    if input.ndim == 3:
        sym_bsize = input.shape[0]
    else:
        sym_bsize = 1

    if filters.ndim == 3:
        sym_nkern = filters.shape[0]
    else:
        sym_nkern = 1

    new_input_shape = tensor.join(0, tensor.stack([sym_bsize, 1]),
                                  input.shape[-2:])
    input4D = tensor.reshape(input, new_input_shape, ndim=4)

    new_filter_shape = tensor.join(0, tensor.stack([sym_nkern, 1]),
                                   filters.shape[-2:])
    filters4D = tensor.reshape(filters, new_filter_shape, ndim=4)

    # perform actual convolution ###
    op = conv.ConvOp(
        output_mode=border_mode,
        dx=subsample[0],
        dy=subsample[1],
        imshp=imshp,
        kshp=kshp,
        nkern=nkern,
        bsize=bsize,
        **kargs,
    )

    output = op(input4D, filters4D)

    # flatten to 3D tensor if convolving with single filter or single image
    if input.ndim == 2 and filters.ndim == 2:
        if theano.config.warn.signal_conv2d_interface:
            warnings.warn(
                "theano.tensor.signal.conv2d() now outputs a 2d tensor when both"
                " inputs are 2d. To disable this warning, set the Theano flag"
                " warn.signal_conv2d_interface to False",
                stacklevel=3,
            )

        output = tensor.flatten(output.T, ndim=2).T
    elif input.ndim == 2 or filters.ndim == 2:
        output = tensor.flatten(output.T, ndim=3).T

    return output
Exemple #52
0
import numpy as np
import keras.backend as K

# A test script to validate causal dilated convolutions
dilation = 2
input = T.fvector()
filters = T.fvector(
)  # (output channels, input channels, filter rows, filter columns).
input_reshaped = T.reshape(input, (1, -1, 1))
input_reshaped = K.temporal_pre_padding(input_reshaped, padding=dilation)
input_reshaped = T.reshape(input_reshaped, (1, 1, -1, 1))
filters_reshaped = T.reshape(filters, (1, 1, -1, 1))
out = T.nnet.conv2d(input_reshaped,
                    filters_reshaped,
                    border_mode='valid',
                    filter_dilation=(dilation, 1))
out = T.reshape(out, (1, -1, 1))
out = K.temporal_pre_padding(out, padding=dilation)
out = T.reshape(out, (1, 1, -1, 1))
out = T.nnet.conv2d(out,
                    filters_reshaped,
                    border_mode='valid',
                    filter_dilation=(dilation, 1))
out = T.flatten(out)

in_input = np.arange(8, dtype='float32')
in_filters = np.array([1, 1], dtype='float32')
f = theano.function([input, filters], out)
print "".join(["%3.0f" % i for i in in_input])
print "".join(["%3.0f" % i for i in f(in_input, in_filters)])
                     image_shape=x_shp,
                     filter_shape=w_fb.shape,
                     border_mode='valid')

    s_P_sum = theano.shared(w_fb.sum(3).sum(2).sum(1))
    Pmmm = p_mean * s_P_sum.dimshuffle(0, 'x', 'x')
    s_PM = theano.shared((w_means * w_fb).sum(3).sum(2).sum(1))
    z = p_scale * (Px - Pmmm) - s_PM.dimshuffle(0, 'x', 'x')

    assert z.dtype == x.dtype, (z.dtype, x.dtype)
    return z, (_shp[0], kN, _shp[2], _shp[3])


@pyll.scope.define
def slm_flatten((x, x_shp), ):
    r = tensor.flatten(x, 2)
    r_shp = x_shp[0], np.prod(x_shp[1:])
    return r, r_shp


@pyll.scope.define_info(o_len=2)
def slm_lpool_smallgrid((x, x_shp), grid_res=2, order=1):
    """
    Like lpool, but parametrized to produce a fixed size image as output.
    The image is not rescaled, but rather single giant box filters are
    defined for each output pixel, and stored in a matrix.
    """
    assert x.dtype == 'float32'
    order = float(order)

    if hasattr(order, '__iter__'):
    def get_output_for(self, inputs, **kwargs):
        """
        Compute this layer's output function given a symbolic input variable

        Parameters
        ----------
        inputs : list of theano.TensorType
            `inputs[0]` should always be the symbolic input variable.  When
            this layer has a mask input (i.e. was instantiated with
            `mask_input != None`, indicating that the lengths of sequences in
            each batch vary), `inputs` should have length 2, where `inputs[1]`
            is the `mask`.  The `mask` should be supplied as a Theano variable
            denoting whether each time step in each sequence in the batch is
            part of the sequence or not.  `mask` should be a matrix of shape
            ``(n_batch, n_time_steps)`` where ``mask[i, j] = 1`` when ``j <=
            (length of sequence i)`` and ``mask[i, j] = 0`` when ``j > (length
            of sequence i)``. When the hidden state of this layer is to be
            pre-filled (i.e. was set to a :class:`Layer` instance) `inputs`
            should have length at least 2, and `inputs[-1]` is the hidden state
            to prefill with. When the cell state of this layer is to be
            pre-filled (i.e. was set to a :class:`Layer` instance) `inputs`
            should have length at least 2, and `inputs[-1]` is the hidden state
            to prefill with. When both the cell state and the hidden state are
            being pre-filled `inputs[-2]` is the hidden state, while
            `inputs[-1]` is the cell state.

        Returns
        -------
        layer_output : theano.TensorType
            Symbolic output variable.
        """
        # Retrieve the layer input
        input = inputs[0]
        # Retrieve the mask when it is supplied
        mask = None
        hid_init = None
        cell_init = None
        encoder_hs = None
        encoder_mask = None
        if self.mask_incoming_index > 0:
            mask = inputs[self.mask_incoming_index]
        if self.hid_init_incoming_index > 0:
            hid_init = inputs[self.hid_init_incoming_index]
        if self.encoder_mask_incoming_index > 0:
            # (n_batch, n_time_steps)
            encoder_mask = inputs[self.encoder_mask_incoming_index]
            encoder_mask = encoder_mask.astype('float32')
        cell_init = inputs[self.cell_init_incoming_index]
        if self.attention:
            # (n_batch, n_time_steps, n_features)
            encoder_hs = cell_init[0]
            # encoder_mask is # (n_batch, n_time_steps, 1)
            encoder_hs = encoder_hs * encoder_mask.dimshuffle(0, 1, 'x')
        cell_init = cell_init[1]

        # Treat all dimensions after the second as flattened feature dimensions
        if input.ndim > 3:
            input = T.flatten(input, 3)

        # Because scan iterates over the first dimension we dimshuffle to
        # (n_time_steps, n_batch, n_features)
        input = input.dimshuffle(1, 0, 2)
        seq_len, num_batch, _ = input.shape

        # Stack input weight matrices into a (num_inputs, 4*num_units)
        # matrix, which speeds up computation
        W_in_stacked = T.concatenate([
            self.W_in_to_ingate, self.W_in_to_forgetgate, self.W_in_to_cell,
            self.W_in_to_outgate
        ],
                                     axis=1)

        # Same for hidden weight matrices
        W_hid_stacked = T.concatenate([
            self.W_hid_to_ingate, self.W_hid_to_forgetgate, self.W_hid_to_cell,
            self.W_hid_to_outgate
        ],
                                      axis=1)

        # Stack biases into a (4*num_units) vector
        b_stacked = T.concatenate(
            [self.b_ingate, self.b_forgetgate, self.b_cell, self.b_outgate],
            axis=0)

        if self.precompute_input:
            # Because the input is given for all time steps, we can
            # precompute_input the inputs dot weight matrices before scanning.
            # W_in_stacked is (n_features, 4*num_units). input is then
            # (n_time_steps, n_batch, 4*num_units).
            input = T.dot(input, W_in_stacked) + b_stacked

        # At each call to scan, input_n will be (n_time_steps, 4*num_units).
        # We define a slicing function that extract the input to each LSTM gate
        def slice_w(x, n):
            return x[:, n * self.num_units:(n + 1) * self.num_units]

        # Create single recurrent computation step function
        # input_n is the n'th vector of the input
        def step(input_n, cell_previous, hid_previous, previous_r, *args):
            if not self.precompute_input:
                input_n = T.dot(input_n, W_in_stacked) + b_stacked

            # Calculate gates pre-activations and slice
            gates = input_n + T.dot(hid_previous, W_hid_stacked)

            # Clip gradients
            if self.grad_clipping:
                gates = theano.gradient.grad_clip(gates, -self.grad_clipping,
                                                  self.grad_clipping)

            # Extract the pre-activation gate values
            ingate = slice_w(gates, 0)
            forgetgate = slice_w(gates, 1)
            cell_input = slice_w(gates, 2)
            outgate = slice_w(gates, 3)

            if self.peepholes:
                # Compute peephole connections
                ingate += cell_previous * self.W_cell_to_ingate
                forgetgate += cell_previous * self.W_cell_to_forgetgate

            # Apply nonlinearities
            ingate = self.nonlinearity_ingate(ingate)
            forgetgate = self.nonlinearity_forgetgate(forgetgate)
            cell_input = self.nonlinearity_cell(cell_input)

            # Compute new cell value
            cell = forgetgate * cell_previous + ingate * cell_input

            if self.peepholes:
                outgate += cell * self.W_cell_to_outgate
            outgate = self.nonlinearity_outgate(outgate)

            # Compute new hidden unit activation
            hid = outgate * self.nonlinearity(cell)
            r = previous_r
            if self.attention and self.word_by_word:
                mh = T.dot(hid, self.W_h_attend) + T.dot(
                    previous_r, self.W_r_attend)
                # mh is (n_batch, 1, n_features)
                mh = mh.dimshuffle(0, 'x', 1)
                M = T.dot(encoder_hs, self.W_y_attend) + mh
                # (n_batch, n_time_steps, n_features)
                M = nonlinearities.tanh(M)
                # alpha is (n_batch, n_time_steps, 1)
                alpha = T.dot(M, self.w_attend)
                # now is (n_batch, n_time_steps)
                alpha = T.flatten(alpha, 2)
                # 0 after softmax is not 0, f**k, my mistake.
                # when i > encoder_seq_len, fill alpha_i to -np.inf
                # alpha = T.switch(encoder_mask, alpha, -np.inf)
                alpha = T.nnet.softmax(alpha)
                # apply encoder_mask to alpha
                # encoder_mask is (n_batch, n_time_steps)
                # when i > encoder_seq_len, alpha_i should be 0.
                # actually not need mask, but in case of error
                # alpha = alpha * encoder_mask
                alpha = alpha.dimshuffle(0, 1, 'x')
                weighted_encoder = T.sum(encoder_hs * alpha, axis=1)
                r = weighted_encoder + nonlinearities.tanh(
                    T.dot(previous_r, self.W_t_attend))

            return [cell, hid, r]

        def step_masked(input_n, mask_n, cell_previous, hid_previous,
                        previous_r, *args):
            cell, hid, r = step(input_n, cell_previous, hid_previous,
                                previous_r, *args)

            # Skip over any input with mask 0 by copying the previous
            # hidden state; proceed normally for any input with mask 1.
            cell = T.switch(mask_n, cell, cell_previous)
            hid = T.switch(mask_n, hid, hid_previous)
            r = T.switch(mask_n, r, previous_r)
            return [cell, hid, r]

        if mask is not None:
            # mask is given as (batch_size, seq_len). Because scan iterates
            # over first dimension, we dimshuffle to (seq_len, batch_size) and
            # add a broadcastable dimension
            mask = mask.dimshuffle(1, 0, 'x')
            sequences = [input, mask]
            step_fun = step_masked
        else:
            sequences = input
            step_fun = step

        ones = T.ones((num_batch, 1))
        if not isinstance(self.hid_init, Layer):
            # Dot against a 1s vector to repeat to shape (num_batch, num_units)
            hid_init = T.dot(ones, self.hid_init)

        # The hidden-to-hidden weight matrix is always used in step
        non_seqs = [W_hid_stacked]
        # The "peephole" weight matrices are only used when self.peepholes=True
        if self.peepholes:
            non_seqs += [
                self.W_cell_to_ingate, self.W_cell_to_forgetgate,
                self.W_cell_to_outgate
            ]

        # When we aren't precomputing the input outside of scan, we need to
        # provide the input weights and biases to the step function
        if not self.precompute_input:
            non_seqs += [W_in_stacked, b_stacked]

        r_init = T.dot(ones, self.r_init)
        if self.attention and self.word_by_word:
            non_seqs += [
                self.W_y_attend,
                self.W_h_attend,
                self.W_r_attend,
                self.w_attend,
                self.W_t_attend,
                encoder_hs,
                # encoder_mask
            ]
        # Scan op iterates over first dimension of input and repeatedly
        # applies the step function
        cell_out, hid_out, r_out = theano.scan(
            fn=step_fun,
            sequences=sequences,
            outputs_info=[cell_init, hid_init, r_init],
            go_backwards=self.backwards,
            truncate_gradient=self.gradient_steps,
            non_sequences=non_seqs,
            strict=True)[0]
        # (n_batch, n_features)
        hid_N = hid_out[-1]
        out = hid_N
        if self.attention:
            if self.word_by_word:
                r_N = r_out[-1]
            else:
                mh = T.dot(hid_N, self.W_h_attend)
                mh = mh.dimshuffle(0, 'x', 1)
                M = T.dot(encoder_hs, self.W_y_attend) + mh
                # (n_batch, n_time_steps, n_features)
                M = nonlinearities.tanh(M)
                alpha = T.dot(M, self.w_attend)
                # (n_batch, n_time_steps)
                alpha = T.flatten(alpha, 2)
                # when i > encoder_seq_len, fill alpha_i to -np.inf
                # alpha = T.switch(encoder_mask, alpha, -np.inf)
                alpha = T.nnet.softmax(alpha)
                # apply encoder_mask to alpha
                # encoder_mask is (n_batch, n_time_steps)
                # when i > encoder_seq_len, alpha_i should be 0.
                # actually not need mask, but in case of error
                # alpha = alpha * encoder_mask
                alpha = alpha.dimshuffle(0, 1, 'x')
                # (n_batch, n_features)
                r_N = T.sum(encoder_hs * alpha, axis=1)
            out = nonlinearities.tanh(
                T.dot(r_N, self.W_p_attend) + T.dot(hid_N, self.W_x_attend))
        return out
Exemple #55
0
all_params = nn.layers.get_all_params(l_out)
if config.one_hot:
    all_params = all_params[1:]
all_layers = nn.layers.get_all_layers(l_out)
num_params = nn.layers.count_params(l_out)
print('  number of parameters: %d' % num_params)
print('  layer output shapes:')
print('#params:')
print('output shape:')
for layer in all_layers:
    name = layer.__class__.__name__
    num_param = sum([np.prod(p.get_value().shape) for p in layer.get_params()])
    num_param = num_param.__str__()
    print('    %s %s %s' % (name, num_param, layer.output_shape))

y = T.cast(T.flatten(x[:, 1:]), 'int32')
# training loss
p1 = T.reshape(T.log(predictions[T.arange(y.shape[0]), y]), mask.shape)
loss = -1. * T.mean(T.sum(mask * p1, axis=1), axis=0)

# validation loss (with disabled dropout)
p1_det = T.reshape(T.log(predictions_det[T.arange(y.shape[0]), y]), mask.shape)
loss_det = -1. * T.mean(T.sum(mask * p1_det, axis=1), axis=0)

learning_rate = theano.shared(np.float32(config.learning_rate))
grads = theano.grad(loss, all_params)
updates = nn.updates.rmsprop(grads, all_params, config.learning_rate)

train = theano.function([x, mask], loss, updates=updates)
validate = theano.function([x, mask], loss_det)
Exemple #56
0
def main(data_sets, W_embed):
    # Optimization learning rate
    LEARNING_RATE = theano.shared(np.array(0.001, dtype=theano.config.floatX))
    eta_decay = np.array(0.5, dtype=theano.config.floatX)
    # Min/max sequence length
    MAX_LENGTH = 300
    X_raw_data, Y_raw_data = data_sets.get_data_from_type("train")
    trainingAdmiSeqs, trainingMask, trainingLabels, trainingLengths, ltr = prepare_data(
        X_raw_data, Y_raw_data, vocabsize=619, maxlen=MAX_LENGTH)
    Num_Samples, MAX_LENGTH, N_VOCAB = trainingAdmiSeqs.shape

    X_valid_data, Y_valid_data = data_sets.get_data_from_type("valid")
    validAdmiSeqs, validMask, validLabels, validLengths, lval = prepare_data(
        X_valid_data, Y_valid_data, vocabsize=619, maxlen=MAX_LENGTH)

    X_test_data, Y_test_data = data_sets.get_data_from_type("test")
    test_admiSeqs, test_mask, test_labels, testLengths, ltes = prepare_data(
        X_test_data, Y_test_data, vocabsize=619, maxlen=MAX_LENGTH)
    alllength = sum(trainingLengths) + sum(validLengths) + sum(testLengths)
    print(alllength)
    eventNum = sum(ltr) + sum(lval) + sum(ltes)
    print(eventNum)

    print("Building network ...")
    N_BATCH = 1
    # First, we build the network, starting with an input layer
    # Recurrent layers expect input of shape
    # (batch size, max sequence length, number of features)
    l_in = lasagne.layers.InputLayer(shape=(N_BATCH, MAX_LENGTH, N_VOCAB))
    #l_label = lasagne.layers.InputLayer(shape=(N_BATCH, MAX_LENGTH, 1))

    # The network also needs a way to provide a mask for each sequence.  We'll
    # use a separate input layer for that.  Since the mask only determines
    # which indices are part of the sequence for each batch entry, they are
    # supplied as matrices of dimensionality (N_BATCH, MAX_LENGTH)
    l_mask = lasagne.layers.InputLayer(shape=(N_BATCH, MAX_LENGTH))
    embedsize = 100
    n_topics = 50
    #l_embed = lasagne.layers.DenseLayer(l_in, num_units=embedsize, b=None, W = W_embed, num_leading_axes=2)
    l_embed = lasagne.layers.DenseLayer(l_in,
                                        num_units=embedsize,
                                        b=None,
                                        num_leading_axes=2)
    #l_embed.params[l_embed.W].remove("trainable")
    #l_drop = lasagne.layers.dropout(l_embed)
    l_forward0 = lasagne.layers.GRULayer(l_embed,
                                         N_HIDDEN,
                                         mask_input=l_mask,
                                         grad_clipping=GRAD_CLIP,
                                         only_return_final=False)

    l_forward = MaskingLayer([l_forward0, l_mask])

    l_1 = lasagne.layers.DenseLayer(
        l_in,
        num_units=N_HIDDEN,
        nonlinearity=lasagne.nonlinearities.rectify,
        num_leading_axes=2)
    l_2 = lasagne.layers.DenseLayer(
        l_1,
        num_units=N_HIDDEN,
        nonlinearity=lasagne.nonlinearities.rectify,
        num_leading_axes=2)
    mu = lasagne.layers.DenseLayer(l_2,
                                   num_units=n_topics,
                                   nonlinearity=None,
                                   num_leading_axes=1)  # batchsize * n_topic
    log_sigma = lasagne.layers.DenseLayer(
        l_2, num_units=n_topics, nonlinearity=None,
        num_leading_axes=1)  # batchsize * n_topic
    l_theta = ThetaLayer([mu, log_sigma],
                         maxlen=MAX_LENGTH)  #batchsize * maxlen * n_topic

    l_B = lasagne.layers.DenseLayer(l_in,
                                    b=None,
                                    num_units=n_topics,
                                    nonlinearity=None,
                                    num_leading_axes=2)
    l_context = lasagne.layers.ElemwiseMergeLayer([l_B, l_theta], T.mul)
    l_context = lasagne.layers.ExpressionLayer(l_context,
                                               lambda X: X.mean(-1),
                                               output_shape="auto")

    l_dense0 = lasagne.layers.DenseLayer(l_forward,
                                         num_units=1,
                                         nonlinearity=None,
                                         num_leading_axes=2)
    l_dense1 = lasagne.layers.reshape(l_dense0,
                                      ([0], [1]))  #batchsize * maxlen
    l_dense = lasagne.layers.ElemwiseMergeLayer([l_dense1, l_context], T.add)
    l_out0 = lasagne.layers.NonlinearityLayer(
        l_dense, nonlinearity=lasagne.nonlinearities.sigmoid)
    l_out = lasagne.layers.ExpressionLayer(
        lasagne.layers.ElemwiseMergeLayer([l_out0, l_mask], T.mul),
        lambda X: X + 0.000001)

    target_values = T.matrix('target_output')
    target_values_flat = T.flatten(target_values)

    # lasagne.layers.get_output produces a variable for the output of the net
    network_output = lasagne.layers.get_output(l_out)
    # The network output will have shape (n_batch, maxlen); let's flatten to get a
    # 1-dimensional vector of predicted values
    predicted_values = network_output.flatten()
    # Our cost will be mean-squared error
    cost = lasagne.objectives.binary_crossentropy(predicted_values,
                                                  target_values_flat)
    kl_term = l_theta.klterm
    cost = cost.sum() + kl_term

    test_output = lasagne.layers.get_output(l_out, deterministic=True)

    #cost = T.mean((predicted_values - target_values)**2)
    # Retrieve all parameters from the network
    all_params = lasagne.layers.get_all_params(l_out)

    # Compute SGD updates for training
    print("Computing updates ...")
    updates = lasagne.updates.adam(cost, all_params, LEARNING_RATE)
    # Theano functions for training and computing cost
    print("Compiling functions ...")
    train = theano.function([l_in.input_var, target_values, l_mask.input_var],
                            cost,
                            updates=updates)
    compute_cost = theano.function(
        [l_in.input_var, target_values, l_mask.input_var], cost)
    prd = theano.function([l_in.input_var, l_mask.input_var], test_output)
    #rnn_out = T.concatenate(l_theta.theta, lasagne.layers.get_output(l_forward0)[:,-1,:].reshape((N_BATCH, N_HIDDEN)),axis=1)
    output_theta = theano.function([l_in.input_var, l_mask.input_var], [
        l_theta.theta,
        lasagne.layers.get_output(l_forward0)[:, -1, :].reshape(
            (N_BATCH, N_HIDDEN))
    ],
                                   on_unused_input='ignore')

    print("Training ...")
    try:
        for epoch in range(num_epochs):
            train_err = 0
            train_batches = 0
            start_time = time.time()
            thetas_train = []
            for batch in iterate_minibatches_listinputs(
                [trainingAdmiSeqs, trainingLabels, trainingMask],
                    N_BATCH,
                    shuffle=True):
                inputs = batch
                train_err += train(inputs[0], inputs[1], inputs[2])
                train_batches += 1
                theta_train, rnnvec_train = output_theta(inputs[0], inputs[2])
                rnnout_train = np.concatenate([theta_train, rnnvec_train],
                                              axis=1)
                thetas_train.append(rnnout_train.flatten())
                if (train_batches + 1) % 1000 == 0:
                    print(train_batches)

            np.save("theta_with_rnnvec/thetas_train" + str(epoch),
                    thetas_train)

            # # And a full pass over the validation data:
            # val_err = 0
            # val_acc = 0
            # val_batches = 0
            # new_validlabels = []
            # pred_validlabels = []
            # for batch in iterate_minibatches_listinputs([validAdmiSeqs, validLabels, validMask, validLengths], 1, shuffle=False):
            #     inputs = batch
            #     err = compute_cost(inputs[0], inputs[1], inputs[2])
            #     val_err += err
            #     leng = inputs[3][0]
            #     new_validlabels.extend(inputs[1].flatten()[:leng])
            #     pred_validlabels.extend(prd(inputs[0], inputs[2]).flatten()[:leng])
            #     val_batches += 1
            # val_auc = roc_auc_score(new_validlabels, pred_validlabels)
            # Then we print the results for this epoch:
            print("Epoch {} of {} took {:.3f}s".format(
                epoch + 1, num_epochs,
                time.time() - start_time))
            print("  training loss:\t\t{:.6f}".format(train_err /
                                                      train_batches))
            # print("  validation loss:\t\t{:.6f}".format(val_err / val_batches))
            # print("  validation auc:\t\t{:.6f}".format(val_auc))
            # print("  validation accuracy:\t\t{:.2f} %".format(
            #     val_acc / val_batches * 100))

            # After training, we compute and print the test error:
            test_err = 0

            test_batches = 0
            new_testlabels = []
            pred_testlabels = []
            thetas = []
            for batch in iterate_minibatches_listinputs(
                [test_admiSeqs, test_labels, test_mask, testLengths],
                    1,
                    shuffle=False):
                inputs = batch
                err = compute_cost(inputs[0], inputs[1], inputs[2])
                test_err += err
                leng = inputs[3][0]
                new_testlabels.extend(inputs[1].flatten()[:leng])
                pred_testlabels.extend(
                    prd(inputs[0], inputs[2]).flatten()[:leng])
                theta, rnnvec = output_theta(inputs[0], inputs[2])
                rnnout = np.concatenate([theta, rnnvec], axis=1)
                thetas.append(rnnout.flatten())
                test_batches += 1
            test_auc = roc_auc_score(new_testlabels, pred_testlabels)
            test_pr_auc = pr_auc(new_testlabels, pred_testlabels)
            # np.save("CONTENT_results/testlabels_"+str(epoch),new_testlabels)
            # np.save("CONTENT_results/predlabels_"+str(epoch),pred_testlabels)
            # np.save("CONTENT_results/thetas"+str(epoch),thetas)

            # np.save("theta_with_rnnvec/testlabels_"+str(epoch),new_testlabels)
            # np.save("theta_with_rnnvec/predlabels_"+str(epoch),pred_testlabels)
            # np.save("theta_with_rnnvec/thetas"+str(epoch),thetas)

            test_pre_rec_f1 = precision_recall_fscore_support(
                np.array(new_testlabels),
                np.array(pred_testlabels) > 0.5,
                average='binary')
            test_acc = accuracy_score(np.array(new_testlabels),
                                      np.array(pred_testlabels) > 0.5)
            print("Final results:")
            print("  test loss:\t\t{:.6f}".format(test_err / test_batches))
            print("  test auc:\t\t{:.6f}".format(test_auc))
            print("  test pr_auc:\t\t{:.6f}".format(test_pr_auc))
            print("  test accuracy:\t\t{:.2f} %".format(test_acc * 100))
            print(
                "  test Precision, Recall and F1:\t\t{:.4f} %\t\t{:.4f}\t\t{:.4f}"
                .format(test_pre_rec_f1[0], test_pre_rec_f1[1],
                        test_pre_rec_f1[2]))

    except KeyboardInterrupt:
        pass
Exemple #57
0
def sparse_categorical_crossentropy(output, target, from_logits=False):
    target = T.cast(T.flatten(target), 'int32')
    target = T.extra_ops.to_one_hot(target, nb_class=output.shape[-1])
    target = reshape(target, shape(output))
    return categorical_crossentropy(output, target, from_logits)
Exemple #58
0
def build_objective(model, deterministic=False, epsilon=1e-12):
    predictions = nn.layers.get_output(model.l_out, deterministic=deterministic)
    targets = T.cast(T.flatten(nn.layers.get_output(model.l_target)), 'int32')
    cc = nn.objectives.categorical_crossentropy(predictions,targets)
    return T.mean(cc)
Exemple #59
0
def build_objective(model, deterministic=False, epsilon=1e-12):
    p = nn.layers.get_output(model.l_out, deterministic=deterministic)
    targets = T.flatten(nn.layers.get_output(model.l_target))
    p = T.clip(p, epsilon, 1. - epsilon)
    bce = T.nnet.binary_crossentropy(p, targets)
    return T.mean(bce)
Exemple #60
0
def max_pool(images, imgshp, maxpoolshp):
    """
    Implements a max pooling layer

    Takes as input a 2D tensor of shape batch_size x img_size and performs max pooling.
    Max pooling downsamples by taking the max value in a given area, here defined by
    maxpoolshp. Outputs a 2D tensor of shape batch_size x output_size.

    Parameters
    ----------
    images : 2D tensor
        Tensorcontaining images on which to apply convolution. Assumed to be \
        of shape `batch_size x img_size`
    imgshp : tuple
        Tuple containing image dimensions
    maxpoolshp : tuple
        Tuple containing shape of area to max pool over

    Returns
    -------
    out1 : WRITEME
        Symbolic result (2D tensor)
    out2 : WRITEME
        Logical shape of the output
    """
    N = numpy
    poolsize = N.int64(N.prod(maxpoolshp))

    # imgshp contains either 2 entries (height,width) or 3 (nfeatures,h,w)
    # in the first case, default nfeatures to 1
    if N.size(imgshp) == 2:
        imgshp = (1, ) + imgshp

    # construct indices and index pointers for sparse matrix, which, when multiplied
    # with input images will generate a stack of image patches
    indices, indptr, spmat_shape, sptype, outshp = \
            convolution_indices.conv_eval(imgshp, maxpoolshp, maxpoolshp, mode='valid')

    print 'XXXXXXXXXXXXXXXX MAX POOLING LAYER XXXXXXXXXXXXXXXXXXXX'
    print 'imgshp = ', imgshp
    print 'maxpoolshp = ', maxpoolshp
    print 'outshp = ', outshp

    # build sparse matrix, then generate stack of image patches
    csc = theano.sparse.CSM(sptype)(N.ones(indices.size), indices, indptr,
                                    spmat_shape)
    patches = sparse.structured_dot(csc, images.T).T

    pshape = tensor.stack(images.shape[0]*\
                            tensor.as_tensor(N.prod(outshp)),
                          tensor.as_tensor(imgshp[0]),
                          tensor.as_tensor(poolsize))
    patch_stack = tensor.reshape(patches, pshape, ndim=3)

    out1 = tensor.max(patch_stack, axis=2)

    pshape = tensor.stack(images.shape[0], tensor.as_tensor(N.prod(outshp)),
                          tensor.as_tensor(imgshp[0]))
    out2 = tensor.reshape(out1, pshape, ndim=3)

    out3 = tensor.DimShuffle((False, ) * 3, (0, 2, 1))(out2)

    return tensor.flatten(out3, 2), outshp