Exemple #1
0
def spatial_2d_padding(x, padding=(1, 1), dim_ordering='th'):
    '''Pad the 2nd and 3rd dimensions of a 4D tensor
    with "padding[0]" and "padding[1]" (resp.) zeros left and right.
    '''
    input_shape = x.shape
    if dim_ordering == 'th':
        output_shape = (input_shape[0],
                        input_shape[1],
                        input_shape[2] + 2 * padding[0],
                        input_shape[3] + 2 * padding[1])
        output = T.zeros(output_shape)
        indices = (slice(None),
                   slice(None),
                   slice(padding[0], input_shape[2] + padding[0]),
                   slice(padding[1], input_shape[3] + padding[1]))

    elif dim_ordering == 'tf':
        output_shape = (input_shape[0],
                        input_shape[1] + 2 * padding[0],
                        input_shape[2] + 2 * padding[1],
                        input_shape[3])
        output = T.zeros(output_shape)
        indices = (slice(None),
                   slice(padding[0], input_shape[1] + padding[0]),
                   slice(padding[1], input_shape[2] + padding[1]),
                   slice(None))
    else:
        raise Exception('Invalid dim_ordering: ' + dim_ordering)
    return T.set_subtensor(output[indices], x)
Exemple #2
0
    def initial_states(self, batch_size, *args, **kwargs):
        r"""Return initial states for an application call.

        Default implementation assumes that the recurrent application
        method is called `apply`. It fetches the state names
        from `apply.states` and a returns a zero matrix for each of them.

        :class:`SimpleRecurrent`, :class:`LSTM` and :class:`GatedRecurrent`
        override this method  with trainable initial states initialized
        with zeros.

        Parameters
        ----------
        batch_size : int
            The batch size.
        \*args
            The positional arguments of the application call.
        \*\*kwargs
            The keyword arguments of the application call.

        """
        result = []
        for state in self.apply.states:
            dim = self.get_dim(state)
            if dim == 0:
                result.append(tensor.zeros((batch_size,)))
            else:
                result.append(tensor.zeros((batch_size, dim)))
        return result
 def initial_glimpses(self, batch_size, attended):
     return ([tensor.zeros((batch_size, self.attended_dim))]
         + 2 * [tensor.concatenate([
                    tensor.ones((batch_size, 1)),
                    tensor.zeros((batch_size, attended.shape[0] - 1))],
                    axis=1)]
         + [tensor.zeros((batch_size,), dtype='int64')])
Exemple #4
0
 def lllistool(i, inp, func):
     if func == LSTM:
         NUMS[i+1] *= 4
     sdim = DIMS[i]
     if func == SimpleRecurrent or func == LSTM:
         sdim = DIMS[i] + DIMS[i+1]
     l = Linear(input_dim=DIMS[i], output_dim=DIMS[i+1] * NUMS[i+1], 
                weights_init=IsotropicGaussian(std=sdim**(-0.5)), 
                biases_init=IsotropicGaussian(std=sdim**(-0.5)),
                name='Lin{}'.format(i))
     l.initialize()
     if func == SimpleRecurrent:
         gong = func(dim=DIMS[i+1], activation=Rectifier(), weights_init=IsotropicGaussian(std=sdim**(-0.5)))
         gong.initialize()
         ret = gong.apply(l.apply(inp))
     elif func == LSTM:
         gong = func(dim=DIMS[i+1], activation=Tanh(), weights_init=IsotropicGaussian(std=sdim**(-0.5)))
         gong.initialize()
         print(inp)
         ret, _ = gong.apply(
             l.apply(inp), 
             T.zeros((inp.shape[1], DIMS[i+1])),
             T.zeros((inp.shape[1], DIMS[i+1])),
         )
     elif func == SequenceGenerator:
         gong = func(
             readout=None, 
             transition=SimpleRecurrent(dim=100, activation=Rectifier(), weights_init=IsotropicGaussian(std=0.1)))
         ret = None
     elif func == None:
         ret = l.apply(inp)
     else:
         gong = func()
         ret = gong.apply(l.apply(inp))
     return ret
Exemple #5
0
def pad(inp, padding):

    if all([padval == 0 for padval in pyk.flatten(padding)]):
        return inp

    if inp.ndim == 4:
        # Make a zero tensor of the right shape
        zt = T.zeros(shape=(inp.shape[0], inp.shape[1], inp.shape[2]+sum(padding[0]), inp.shape[3]+sum(padding[1])))
        # Compute assignment slice
        [[ystart, ystop], [xstart, xstop]] = [[padval[0], (-padval[1] if padval[1] != 0 else None)]
                                              for padval in padding]
        # Assign subtensor
        padded = T.set_subtensor(zt[:, :, ystart:ystop, xstart:xstop], inp)
        return padded
    elif inp.ndim == 5:

        # Make a zero tensor of the right shape
        zt = T.zeros(shape=(inp.shape[0], inp.shape[1]+sum(padding[2]), inp.shape[2], inp.shape[3]+sum(padding[0]),
                            inp.shape[4]+sum(padding[1])))
        # Compute assignment slice
        [[ystart, ystop], [xstart, xstop], [zstart, zstop]] = [[padval[0], (-padval[1] if padval[1] != 0 else None)]
                                                               for padval in padding]
        # Assign subtensor
        padded = T.set_subtensor(zt[:, zstart:zstop, :, ystart:ystop, xstart:xstop], inp)
        return padded
    else:
        raise NotImplementedError("Padding is only implemented for 4 and 5 dimensional tensors.")
Exemple #6
0
    def plotUpdate(self,updates):
        '''
        >>>get update info of each layer
        >>>type updates: dict
        >>>para updates: update dictionary
        '''
        maxdict=T.zeros(shape=(self.deep*2+1,))
        mindict=T.zeros(shape=(self.deep*2+1,))
        meandict=T.zeros(shape=(self.deep*2+1,))
        
        for i in xrange(self.deep):
            updw=updates[self.layers[i].w]-self.layers[i].w
            maxdict=T.set_subtensor(maxdict[2*i],T.max(updw))
            mindict=T.set_subtensor(mindict[2*i],T.min(updw))
            meandict=T.set_subtensor(meandict[2*i],T.mean(updw))
            updb=updates[self.layers[i].b]-self.layers[i].b
            maxdict=T.set_subtensor(maxdict[2*i+1],T.max(updb))
            mindict=T.set_subtensor(mindict[2*i+1],T.min(updb))
            meandict=T.set_subtensor(meandict[2*i+1],T.mean(updb))

        updw=updates[self.classifier.w]-self.classifier.w
        maxdict=T.set_subtensor(maxdict[self.deep*2],T.max(updw))
        mindict=T.set_subtensor(mindict[self.deep*2],T.min(updw))
        meandict=T.set_subtensor(meandict[self.deep*2],T.mean(updw))
        return [maxdict,mindict,meandict]
    def __init__(self, rng, input, mask, n_in, n_h):

        # Init params
        self.W_i = theano.shared(gauss_weight(rng, n_in, n_h), 'W_i', borrow=True)
        self.W_f = theano.shared(gauss_weight(rng, n_in, n_h), 'W_f', borrow=True)
        self.W_c = theano.shared(gauss_weight(rng, n_in, n_h), 'W_c', borrow=True)
        self.W_o = theano.shared(gauss_weight(rng, n_in, n_h), 'W_o', borrow=True)

        self.U_i = theano.shared(gauss_weight(rng, n_h), 'U_i', borrow=True)
        self.U_f = theano.shared(gauss_weight(rng, n_h), 'U_f', borrow=True)
        self.U_c = theano.shared(gauss_weight(rng, n_h), 'U_c', borrow=True)
        self.U_o = theano.shared(gauss_weight(rng, n_h), 'U_o', borrow=True)

        self.b_i = theano.shared(numpy.zeros((n_h,), dtype=config.floatX),
                                 'b_i', borrow=True)
        self.b_f = theano.shared(numpy.zeros((n_h,), dtype=config.floatX),
                                 'b_f', borrow=True)
        self.b_c = theano.shared(numpy.zeros((n_h,), dtype=config.floatX),
                                 'b_c', borrow=True)
        self.b_o = theano.shared(numpy.zeros((n_h,), dtype=config.floatX),
                                 'b_o', borrow=True)

        self.params = [self.W_i, self.W_f, self.W_c, self.W_o,
                       self.U_i, self.U_f, self.U_c, self.U_o,
                       self.b_i, self.b_f, self.b_c, self.b_o]

        outputs_info = [T.zeros((input.shape[1], n_h)),
                        T.zeros((input.shape[1], n_h))]

        rval, updates = theano.scan(self._step,
                                    sequences=[mask, input],
                                    outputs_info=outputs_info)

        # self.output is in the format (length, batchsize, n_h)
        self.output = rval[0]
Exemple #8
0
    def get_output(self, train=False):
        X = self.get_input().dimshuffle(1, 0, 2)
        Vx = T.dot(X, self.V)
        x_init = T.zeros((X.shape[1], self.input_dim))
        s_init = T.zeros((X.shape[1], self.output_dim))
        u_init = T.zeros((X.shape[1], self.causes_dim))
        outputs, uptdates = scan(
            self._step,
            sequences=[X, Vx],
            outputs_info=[x_init, s_init, u_init],
            non_sequences=self.params,
            truncate_gradient=self.truncate_gradient)

        if self.return_mode == 'both':
            return T.concatenate([outputs[1], outputs[2]],
                                 axis=-1)
        elif self.return_mode == 'states':
            out = outputs[1]
        elif self.return_mode == 'causes':
            out = outputs[2]
        else:
            raise ValueError("return_model {0} not valid. Choose "
                             "'both', 'states' or 'causes'".format(
                                 self.return_mode))

        if self.return_sequences:
            return out.dimshuffle(1, 0, 2)
        else:
            return out[-1]
Exemple #9
0
 def initial_glimpses(self, name, batch_size, sequence):
     if name == "glimpses":
         return tensor.zeros((batch_size, self.sequence_dim))
     elif name == "weights":
         return tensor.zeros((batch_size, sequence.shape[0]))
     else:
         raise ValueError("Unknown glimpse name {}".format(name))
Exemple #10
0
    def function(self, input_tensor):
        init_hs = T.zeros((input_tensor.shape[1], self.output_neurons))
        init_cs = T.zeros((input_tensor.shape[1], self.output_neurons))

        lstm_out_1, _ = theano.scan(fn=lambda a,b,c: self.__lstm_wrapper(a,b,c,self.d_forward, go_forwards=True),
                                      outputs_info=[init_hs,init_cs],
                                      sequences=input_tensor,
                                      non_sequences=None)
        
        lstm_out_2, _ = theano.scan(fn=lambda a,b,c: self.__lstm_wrapper(a,b,c,self.d_backward, go_forwards=False),
                                      outputs_info=[init_hs,init_cs],
                                      sequences=input_tensor,
                                      non_sequences=None)
        
        lstm_out_3, _ = theano.scan(fn=lambda a,b,c: self.__lstm_wrapper(a,b,c,self.u_forward, go_forwards=True),
                                      outputs_info=[init_hs,init_cs],
                                      sequences=input_tensor,
                                      non_sequences=None,
                                      go_backwards=True)

        lstm_out_4, _ = theano.scan(fn=lambda a,b,c: self.__lstm_wrapper(a,b,c,self.u_backward, go_forwards=False),
                                      outputs_info=[init_hs,init_cs],
                                      sequences=input_tensor,
                                      non_sequences=None,
                                      go_backwards=True)


        return T.concatenate((lstm_out_1[0],
                              lstm_out_2[0],
                              lstm_out_3[0][::-1],
                              lstm_out_4[0][::-1]), axis=2)
Exemple #11
0
 def __init__(self, n_in, n_out, layers, decoder=linear.Linear, itype='int32'
              , solver=solvers.RMSprop(0.01)):
     self.data = T.matrix(dtype=itype)
     self.x = self.data[:-1] # T.matrix(dtype=itype)
     self.y = self.data[1:] # T.matrix(dtype=itype)
     self.mask = T.matrix(dtype='int32')
     self.weights = []
     k,b = self.x.shape
     y_layer = self.x
     self.y_layers = []
     m = n_in
     for n in layers:
         layer = lstm.LSTM(m, n)
         self.weights.append(layer.weights)
         y0 = T.zeros((b, n))
         c0 = T.zeros((b, n))
         y_layer, _ = layer.scanl(y0, c0, y_layer)
         self.y_layers.append(y_layer)
         m = n
     decode = decoder(m, n_out)
     self.weights.append(decode.weights)
     yh = decode(y_layer)
     self.yh = softmax.softmax(yh)
     self.loss_t = T.sum(crossent.crossent(self.yh, self.y)*self.mask[1:])
     self.correct = T.sum(T.eq(T.argmax(self.yh, axis=2), self.y)*self.mask[1:])
     self.count = T.sum(self.mask[1:])
     self.solver = solver
     #compile theano functions
     self._loss = theano.function([self.data, self.mask], [self.loss_t, self.correct, self.count])
     self._activations = theano.function([self.data], self.y_layers+[self.yh], givens={self.x:self.data})
Exemple #12
0
    def best_path_decode(self, scorematrix, scorematrix_mask=None, blank_symbol=None):
        """
        Computes the best path by simply choosing most likely label at each timestep
        :param scorematrix: (T, C+1, B)
        :param scorematrix_mask: (T, B)
        :param blank_symbol: = C by default
        :return: resultseq (T, B), resultseq_mask(T, B)
        Speed much slower than pure python version (normally ~40 times on HTR tasks)
        """
        bestlabels = tensor.argmax(scorematrix, axis=1)    # (T, B)
        T, Cp, B = scorematrix.shape
        resultseq, resultseq_mask = tensor.zeros([T, B], dtype=scorematrix.dtype)-1, tensor.zeros([T, B], dtype=scorematrix.dtype)
        if blank_symbol is None:
            blank_symbol = Cp - 1
        if scorematrix_mask is None:
            scorematrix_mask = tensor.ones([T, B], dtype=scorematrix.dtype)

        def step(labelseq, labelseq_mask, idx, resultseq, resultseq_mask, blank_symbol):
            seqlen = tensor.cast(labelseq_mask.sum(), 'int32')
            labelseq = self._remove_adjdup(labelseq[0:seqlen])
            labelseq = self._remove_value(labelseq, blank_symbol)
            seqlen2 = labelseq.size
            resultseq = tensor.set_subtensor(resultseq[0:seqlen2, idx], labelseq)
            resultseq_mask = tensor.set_subtensor(resultseq_mask[0:seqlen2, idx], tensor.ones_like(labelseq))
            idx += 1
            return idx, resultseq, resultseq_mask

        outputs, updates = theano.scan(fn = step,
                                       sequences=[bestlabels.T, scorematrix_mask.T],
                                       outputs_info=[0, resultseq, resultseq_mask],
                                       non_sequences=[blank_symbol],
                                       name='decode_scan')
        resultseq, resultseq_mask = outputs[1][-1], outputs[2][-1]
        return resultseq, resultseq_mask
Exemple #13
0
    def calc_CER(self, resultseq, targetseq, resultseq_mask=None, targetseq_mask=None):
        """
        Calculate the character error rate (CER) given ground truth 'targetseq' and CTC decoding output 'resultseq'
        :param resultseq (T1,  B)
        :param resultseq_mask (T1, B)
        :param targetseq (T2,  B)
        :param targetseq_mask (T2, B)
        :return: CER scalar
        """
        if resultseq_mask is None:
            resultseq_mask = tensor.ones_like(resultseq)
        if targetseq_mask is None:
            targetseq_mask = tensor.ones_like(targetseq)

        def step(result_seq, target_seq, result_seq_mask, target_seq_mask, TE, TG):
            L1 = tensor.cast(result_seq_mask.sum(), 'int32')
            L2 = tensor.cast(target_seq_mask.sum(), 'int32')
            d = self._editdist(result_seq[0:L1], target_seq[0:L2])
            TE += d
            TG += target_seq_mask.sum()
            return TE, TG

        outputs, updates = theano.scan(fn=step,
                                       sequences=[resultseq.T, targetseq.T, resultseq_mask.T, targetseq_mask.T],
                                       outputs_info=[tensor.zeros(1), tensor.zeros(1)],
                                       name='calc_CER')
        TE, TG = outputs[0][-1], outputs[1][-1]
        CER = TE/TG
        return CER, TE, TG
Exemple #14
0
    def get_coefficients(self):
        c1 = self.term1.coefficients
        c2 = self.term2.coefficients

        # First compute real terms
        ar = []
        cr = []
        ar.append(tt.flatten(c1[0][:, None] * c2[0][None, :]))
        cr.append(tt.flatten(c1[1][:, None] * c2[1][None, :]))

        # Then the complex terms
        ac = []
        bc = []
        cc = []
        dc = []

        # real * complex
        ac.append(tt.flatten(c1[0][:, None] * c2[2][None, :]))
        bc.append(tt.flatten(c1[0][:, None] * c2[3][None, :]))
        cc.append(tt.flatten(c1[1][:, None] + c2[4][None, :]))
        dc.append(tt.flatten(tt.zeros_like(c1[1])[:, None] + c2[5][None, :]))

        ac.append(tt.flatten(c2[0][:, None] * c1[2][None, :]))
        bc.append(tt.flatten(c2[0][:, None] * c1[3][None, :]))
        cc.append(tt.flatten(c2[1][:, None] + c1[4][None, :]))
        dc.append(tt.flatten(tt.zeros_like(c2[1])[:, None] + c1[5][None, :]))

        # complex * complex
        aj, bj, cj, dj = c1[2:]
        ak, bk, ck, dk = c2[2:]

        ac.append(
            tt.flatten(
                0.5 * (aj[:, None] * ak[None, :] + bj[:, None] * bk[None, :])
            )
        )
        bc.append(
            tt.flatten(
                0.5 * (bj[:, None] * ak[None, :] - aj[:, None] * bk[None, :])
            )
        )
        cc.append(tt.flatten(cj[:, None] + ck[None, :]))
        dc.append(tt.flatten(dj[:, None] - dk[None, :]))

        ac.append(
            tt.flatten(
                0.5 * (aj[:, None] * ak[None, :] - bj[:, None] * bk[None, :])
            )
        )
        bc.append(
            tt.flatten(
                0.5 * (bj[:, None] * ak[None, :] + aj[:, None] * bk[None, :])
            )
        )
        cc.append(tt.flatten(cj[:, None] + ck[None, :]))
        dc.append(tt.flatten(dj[:, None] + dk[None, :]))

        return [
            tt.concatenate(vals, axis=0)
            if len(vals)
            else tt.zeros(0, dtype=self.dtype)
            for vals in (ar, cr, ac, bc, cc, dc)
        ]
Exemple #15
0
    return y1, y2, y3, y4, y5


X = T.tensor4()
Z0 = T.matrix()

# draw samples from the generator
gX = gen(Z0, gwx)

# feed real data and generated data through discriminator
p_real = discrim(X)
p_gen = discrim(gX)

# compute costs based on discriminator output for real/generated data
d_cost_real = sum([bce(p, T.ones(p.shape)).mean() for p in p_real])
d_cost_gen = sum([bce(p, T.zeros(p.shape)).mean() for p in p_gen])
g_cost_d = sum([bce(p, T.ones(p.shape)).mean() for p in p_gen])

# d_cost_real = bce(p_real[-1], T.ones(p_real[-1].shape)).mean()
# d_cost_gen = bce(p_gen[-1], T.zeros(p_gen[-1].shape)).mean()
# g_cost_d = bce(p_gen[-1], T.ones(p_gen[-1].shape)).mean()

d_cost = d_cost_real + d_cost_gen + (
    1e-5 * sum([T.sum(p**2.0) for p in discrim_params]))
g_cost = g_cost_d + (1e-5 * sum([T.sum(p**2.0) for p in gen_params]))

cost = [g_cost, d_cost, g_cost_d, d_cost_real, d_cost_gen]

lrt = sharedX(lr)
d_updater = updates.Adam(lr=lrt, b1=b1, regularizer=updates.Regularizer(l2=l2))
g_updater = updates.Adam(lr=lrt, b1=b1, regularizer=updates.Regularizer(l2=l2))
    def fit(self,
            trees,
            learning_rate=3 * 1e-3,
            mu=0.99,
            reg=1e-4,
            epochs=15,
            activation=T.nnet.relu,
            train_inner_nodes=False):
        D = self.D
        V = self.V
        K = self.K
        self.f = activation
        N = len(trees)

        We = init_weight(V, D)
        Wh = np.random.randn(2, D, D) / np.sqrt(2 + D + D)
        bh = np.zeros(D)
        Wo = init_weight(D, K)
        bo = np.zeros(K)

        self.We = theano.shared(We)
        self.Wh = theano.shared(Wh)
        self.bh = theano.shared(bh)
        self.Wo = theano.shared(Wo)
        self.bo = theano.shared(bo)
        self.params = [self.We, self.Wh, self.bh, self.Wo, self.bo]

        words = T.ivector('words')
        parents = T.ivector('parents')
        relations = T.ivector('relations')
        labels = T.ivector('labels')

        def recurrence(n, hiddens, words, parents, relations):
            w = words[n]
            hiddens = T.switch(
                T.ge(w, 0), T.set_subtensor(hiddens[n], self.We[w]),
                T.set_subtensor(hiddens[n], self.f(hiddens[n] + self.bh)))

            r = relations[n]
            p = parents[n]
            hiddens = T.switch(
                T.ge(p, 0),
                T.set_subtensor(hiddens[p],
                                hiddens[p] + hiddens[n].dot(self.Wh[r])),
                hiddens)
            return hiddens

        hiddens = T.zeros((words.shape[0], D))

        h, _ = theano.scan(
            fn=recurrence,
            outputs_info=[hiddens],
            n_steps=words.shape[0],
            sequences=T.arange(words.shape[0]),
            non_sequences=[words, parents, relations],
        )

        py_x = T.nnet.softmax(h[-1].dot(self.Wo) + self.bo)
        prediction = T.argmax(py_x, axis=1)

        rcost = reg * T.mean([(p * p).sum() for p in self.params])
        if train_inner_nodes:
            cost = -T.mean(T.log(py_x[T.arange(labels.shape[0]),
                                      labels])) + rcost
        else:
            cost = -T.mean(T.log(py_x[-1, labels[-1]])) + rcost

        # grads = T.grad(cost, self.params)
        # dparams = [theano.shared(p.get_value()*0) for p in self.params]
        #
        # updates = [
        #     (p, p * mu*dp - learning_rate*g) for p, dp, g in zip(self.params, dparams, grads)
        # ] + [
        #     (dp, mu*dp - learning_rate*g) for dp, g in zip(dparams, grads)
        # ]

        updates = adagrad(cost, self.params, lr=1e-4)

        self.cost_predict_op = theano.function(
            inputs=[words, parents, relations, labels],
            outputs=[cost, prediction],
            allow_input_downcast=True,
        )

        self.train_op = theano.function(
            inputs=[words, parents, relations, labels],
            outputs=[h, cost, prediction],
            updates=updates)

        costs = []
        sequence_indexes = range(N)
        if train_inner_nodes:
            n_total = sum(len(words) for words, _, _, _ in trees)
        else:
            n_total = N
        for i in range(epochs):
            t0 = datetime.now()
            sequence_indexes = shuffle(sequence_indexes)
            n_correct = 0
            cost = 0
            it = 0
            for j in sequence_indexes:
                words, par, rel, lab = trees[j]
                _, c, p = self.train_op(words, par, rel, lab)
                cost += c
                if train_inner_nodes:
                    n_correct += np.sum(p == lab)
                else:
                    n_correct += (p[-1] == lab[-1])
                it += 1
                if it % 1 == 0:
                    sys.stdout.write(
                        "j/N: %d/%d correct rate so far: %f, cost so far: %f/r"
                        % (it, N, float(n_correct / n_total), cost))
                    sys.stdout.flush()
                print("i:", i, "cost:", cost, "correct rate:",
                      (float(n_correct) / n_total), "time for epoch:",
                      (datetime.now() - t0))
                costs.append(cost)

        print('costs:', costs)
        plt.plot(costs)
        plt.show()
Exemple #17
0
def neibs2images(neibs, neib_shape, original_shape, mode='valid'):
    """
    Function :func:`neibs2images <theano.sandbox.neighbours.neibs2images>`
    performs the inverse operation of
    :func:`images2neibs <theano.sandbox.neigbours.neibs2images>`. It inputs
    the output of :func:`images2neibs <theano.sandbox.neigbours.neibs2images>`
    and reconstructs its input.

    Parameters
    ----------
    neibs : 2d tensor
        Like the one obtained by
        :func:`images2neibs <theano.sandbox.neigbours.neibs2images>`.
    neib_shape
        `neib_shape` that was used in
        :func:`images2neibs <theano.sandbox.neigbours.neibs2images>`.
    original_shape
        Original shape of the 4d tensor given to
        :func:`images2neibs <theano.sandbox.neigbours.neibs2images>`

    Returns
    -------
    object
        Reconstructs the input of
        :func:`images2neibs <theano.sandbox.neigbours.neibs2images>`,
        a 4d tensor of shape `original_shape`.

    Notes
    -----
    Currently, the function doesn't support tensors created with
    `neib_step` different from default value. This means that it may be
    impossible to compute the gradient of a variable gained by
    :func:`images2neibs <theano.sandbox.neigbours.neibs2images>` w.r.t.
    its inputs in this case, because it uses
    :func:`images2neibs <theano.sandbox.neigbours.neibs2images>` for
    gradient computation.

    Examples
    --------
    Example, which uses a tensor gained in example for
    :func:`images2neibs <theano.sandbox.neigbours.neibs2images>`:

    .. code-block:: python

        im_new = neibs2images(neibs, (5, 5), im_val.shape)
        # Theano function definition
        inv_window = theano.function([neibs], im_new)
        # Function application
        im_new_val = inv_window(neibs_val)

    .. note:: The code will output the initial image array.

    """
    neibs = T.as_tensor_variable(neibs)
    neib_shape = T.as_tensor_variable(neib_shape)
    original_shape = T.as_tensor_variable(original_shape)

    new_neib_shape = T.stack([original_shape[-1] // neib_shape[1],
                              neib_shape[1]])
    output_2d = images2neibs(neibs.dimshuffle('x', 'x', 0, 1),
                             new_neib_shape, mode=mode)

    if mode == 'ignore_borders':
        # We use set_subtensor to accept original_shape we can't infer
        # the shape and still raise error when it don't have the right
        # shape.
        valid_shape = original_shape
        valid_shape = T.set_subtensor(
            valid_shape[2],
            (valid_shape[2] // neib_shape[0]) * neib_shape[0])
        valid_shape = T.set_subtensor(
            valid_shape[3],
            (valid_shape[3] // neib_shape[1]) * neib_shape[1])
        output_4d = output_2d.reshape(valid_shape, ndim=4)
        # padding the borders with zeros
        for d in [2, 3]:
            pad_shape = list(output_4d.shape)
            pad_shape[d] = original_shape[d] - valid_shape[d]
            output_4d = T.concatenate([output_4d, T.zeros(pad_shape)], axis=d)
    elif mode == 'valid':
        # TODO: we do not implement all mode with this code.
        # Add a check for the good cases.
        output_4d = output_2d.reshape(original_shape, ndim=4)
    else:
        raise NotImplementedError("neibs2images do not support mode=%s" % mode)

    return output_4d
Exemple #18
0
    def theano_expr(self, targets, mode='stack', sparse=False):
        """
        Return the one-hot transformation as a symbolic expression.
        If labels appear multiple times, their value in the one-hot
        vector is incremented.

        Parameters
        ----------
        targets : tensor_like, 1- or 2-dimensional, integer dtype
            A symbolic tensor representing labels as integers
            between 0 and `max_labels` - 1, `max_labels` supplied
            at formatter construction.
        mode : string
            The way in which to convert the labels to arrays. Takes
            three different options:

            - "concatenate" : concatenates the one-hot vectors from
                multiple labels
            - "stack" : returns a matrix where each row is the
                one-hot vector of a label
            - "merge" : merges the one-hot vectors together to
                form a vector where the elements are
                the result of an indicator function
                NB: As the result of an indicator function
                the result is the same in case a label
                is duplicated in the input.
        sparse : bool
            If true then the return value is sparse matrix. Note that
            if sparse is True, then mode cannot be 'stack' because
            sparse matrices need to be 2D

        Returns
        -------
        one_hot : TensorVariable, 1, 2 or 3-dimensional, sparse or dense
            A symbolic tensor representing a one-hot encoding of the \
            supplied labels.
        """
        if mode not in ('concatenate', 'stack', 'merge'):
            raise ValueError("%s got bad mode argument '%s'" %
                            (self.__class__.__name__, str(self._max_labels)))
        elif mode == 'stack' and sparse:
            raise ValueError("Sparse matrices need to be 2D, hence they"
                             "cannot be stacked")
        squeeze_required = False
        if targets.ndim != 2:
            if targets.ndim == 1:
                squeeze_required = True
                targets = targets.dimshuffle('x', 0)
            else:
                raise ValueError("targets tensor must be 1 or 2-dimensional")
        if 'int' not in str(targets.dtype):
            raise TypeError("need an integer tensor for targets")
        if sparse:
            if mode == 'concatenate':
                one_hot = theano.sparse.CSR(
                    tensor.ones_like(targets, dtype=self._dtype).flatten(),
                    (targets.flatten() + tensor.arange(targets.size) *
                     self._max_labels) % (self._max_labels * targets.shape[1]),
                    tensor.arange(targets.shape[0] + 1) * targets.shape[1],
                    tensor.stack(targets.shape[0],
                                 self._max_labels * targets.shape[1])
                )
            else:
                one_hot = theano.sparse.CSR(
                    tensor.ones_like(targets, dtype=self._dtype).flatten(),
                    targets.flatten(),
                    tensor.arange(targets.shape[0] + 1) * targets.shape[1],
                    tensor.stack(targets.shape[0], self._max_labels)
                )
        else:
            if mode == 'concatenate':
                one_hot = tensor.zeros((targets.shape[0] * targets.shape[1],
                                        self._max_labels))
                one_hot = tensor.set_subtensor(
                        one_hot[tensor.arange(targets.size),
                                targets.flatten()], 1)
                one_hot = one_hot.reshape((targets.shape[0],
                                           targets.shape[1] * self._max_labels))
            elif mode == 'merge':
                one_hot = tensor.zeros((targets.shape[0], self._max_labels))
                one_hot = tensor.set_subtensor(
                    one_hot[tensor.arange(targets.size) % targets.shape[0],
                            targets.T.flatten()], 1)
            else:
                one_hot = tensor.zeros((targets.shape[0], targets.shape[1],
                                        self._max_labels))
                one_hot = tensor.set_subtensor(one_hot[
                    tensor.arange(targets.shape[0]).reshape((targets.shape[0],
                                                             1)),
                    tensor.arange(targets.shape[1]),
                    targets
                ], 1)
            if squeeze_required:
                if one_hot.ndim == 2:
                    one_hot = one_hot.reshape((one_hot.shape[1],))
                if one_hot.ndim == 3:
                    one_hot = one_hot.reshape((one_hot.shape[1],
                                               one_hot.shape[2]))
        return one_hot
Exemple #19
0
 def step(batch_idx, out_seq_b1):
     #out_seq = seq[T.ge(idx[:, batch_idx], 0).nonzero(), batch_idx][0]
     out_seq = seq[:, batch_idx][T.ge(idx[:, batch_idx], 0).nonzero()]
     return T.concatenate((out_seq,
                           T.zeros((max_seq_len - out_seq.shape[0], ),
                                   dtype=seq.dtype)))
v_gen_embed = lasagne.layers.get_output(l_embed_char, v_gen_input)

# Freeze the hidden inputs of the decoder layers, which do not tap into the encoder.
for layer in dec_rnn_layers:
    GRULayer_freeze(layer, v_gen_input)

# Readout the last state from the encoder.
inputs = {l_encoder_embed: v_gen_embed, l_encoder_mask: tt.ge(v_gen_input, 0)}
outputs = [l.hid_init for l in dec_rnn_layers]

dec_hid_inits = lasagne.layers.get_output(outputs, inputs, deterministic=True)

# Prepare the initial values fed into the scan loop of the Generator
h_0 = tt.concatenate(dec_hid_inits, axis=-1)

x_0 = tt.fill(tt.zeros((v_gen_input.shape[0], ), dtype="int32"),
              vocab.index("\x02"))
x_0 = lasagne.layers.get_output(l_embed_char, x_0)

m_0 = tt.ones((v_gen_input.shape[0], ), 'bool')

# Compile the Generator's scan op
result, updates = theano.scan(generator_step_sm,
                              sequences=None,
                              n_steps=n_steps,
                              outputs_info=[x_0, h_0, m_0, None, None],
                              strict=False,
                              return_list=True,
                              non_sequences=[tau, eps],
                              go_backwards=False,
                              name="generator/scan")
    def get_output_for(self, inputs, **kwargs):
        """
		Compute this layer's output function given a symbolic input variable

		Parameters
		----------
		input : theano.TensorType
			Symbolic input variable.
		mask : theano.TensorType
			Theano variable denoting whether each time step in each
			sequence in the batch is part of the sequence or not.  If ``None``,
			then it is assumed that all sequences are of the same length.  If
			not all sequences are of the same length, then it must be
			supplied as a matrix of shape ``(n_batch, n_time_steps)`` where
			``mask[i, j] = 1`` when ``j <= (length of sequence i)`` and
			``mask[i, j] = 0`` when ``j > (length of sequence i)``.

		Returns
		-------
		layer_output : theano.TensorType
			Symblic output variable.
		"""
        input = inputs[0]
        # Retrieve the mask when it is supplied
        mask = inputs[1] if len(inputs) > 1 else None

        # Treat all dimensions after the second as flattened feature dimensions
        if input.ndim > 3:
            input = input.reshape(
                (input.shape[0], input.shape[1], T.prod(input.shape[2:])))
        num_batch = input.shape[0]
        encode_seqlen = input.shape[1]

        if mask is None:
            mask = T.ones((num_batch, encode_seqlen), dtype='float32')
        # At each call to scan, input_n will be (n_time_steps, 4*num_units).
        # We define a slicing function that extract the input to each LSTM gate
        def slice_w(x, n):
            return x[:, n * self.num_units:(n + 1) * self.num_units]

        # Create single recurrent computation step function
        # input_n is the n'th vector of the input
        def step(cell_previous, hid_previous, alpha_prev, weighted_hidden_prev,
                 input, mask, hUa, W_align, v_align, W_hid_stacked,
                 W_weightedhid_stacked, W_cell_to_ingate, W_cell_to_forgetgate,
                 W_cell_to_outgate, b_stacked, *args):

            #compute (unormalized) attetion vector
            sWa = T.dot(hid_previous, W_align)  # (BS, aln_num_units)
            sWa = sWa.dimshuffle(0, 'x', 1)  # (BS, 1, aln_num_units)
            align_act = sWa + hUa
            tanh_sWahUa = self.nonlinearity_align(align_act)
            # (BS, seqlen, num_units_aln)

            # CALCULATE WEIGHT FOR EACH HIDDEN STATE VECTOR
            a = T.dot(tanh_sWahUa, v_align)  # (BS, Seqlen, 1)
            a = T.reshape(a, (a.shape[0], a.shape[1]))
            #                                # (BS, Seqlen)
            # # ->(BS, seq_len)

            a = a * mask - (1 - mask) * 10000

            alpha = self.attention_softmax_function(a)
            #alpha = T.reshape(alpha, (input.shape[0], input.shape[1]))

            # input: (BS, Seqlen, num_units)
            weighted_hidden = input * alpha.dimshuffle(0, 1, 'x')
            weighted_hidden = T.sum(weighted_hidden, axis=1)  #sum seqlen out

            # Calculate gates pre-activations and slice

            # (BS, dec_hid) x (dec_hid, dec_hid)
            gates = T.dot(hid_previous, W_hid_stacked) + b_stacked
            # (BS, enc_hid) x (enc_hid, dec_hid)
            gates += T.dot(weighted_hidden, W_weightedhid_stacked)

            # Clip gradients
            if self.grad_clipping is not False:
                gates = theano.gradient.grad_clip(gates, -self.grad_clipping,
                                                  self.grad_clipping)

            # Extract the pre-activation gate values
            ingate = slice_w(gates, 0)
            forgetgate = slice_w(gates, 1)
            cell_input = slice_w(gates, 2)
            outgate = slice_w(gates, 3)

            if self.peepholes:
                # Compute peephole connections
                ingate += cell_previous * W_cell_to_ingate
                forgetgate += cell_previous * W_cell_to_forgetgate

            # Apply nonlinearities
            ingate = self.nonlinearity_ingate(ingate)
            forgetgate = self.nonlinearity_forgetgate(forgetgate)
            cell_input = self.nonlinearity_cell(cell_input)
            outgate = self.nonlinearity_outgate(outgate)

            # Compute new cell value
            cell = forgetgate * cell_previous + ingate * cell_input

            if self.peepholes:
                outgate += cell * W_cell_to_outgate

            # W_align:  (num_units, aln_num_units)
            # U_align:  (num_feats, aln_num_units)
            # v_align:  (aln_num_units, 1)
            # hUa:      (BS, Seqlen, aln_num_units)
            # hid:      (BS, num_units_dec)
            # input:    (BS, Seqlen, num_inputs)

            # Compute new hidden unit activation
            hid = outgate * self.nonlinearity_out(cell)

            return [cell, hid, alpha, weighted_hidden]

        sequences = []
        step_fun = step

        ones = T.ones((num_batch, 1))
        if isinstance(self.cell_init, T.TensorVariable):
            cell_init = self.cell_init
        else:
            # Dot against a 1s vector to repeat to shape (num_batch, num_units)
            cell_init = T.dot(ones, self.cell_init)

        if isinstance(self.hid_init, T.TensorVariable):
            hid_init = self.hid_init
        else:
            # Dot against a 1s vector to repeat to shape (num_batch, num_units)
            hid_init = T.dot(ones, self.hid_init)

        #weighted_hidden_init = T.zeros((num_batch, input.shape[2]))
        alpha_init = T.zeros((num_batch, encode_seqlen))

        weighted_hidden_init = T.zeros((num_batch, self.num_inputs))

        # The hidden-to-hidden weight matrix is always used in step

        hUa = T.dot(input, self.U_align)  # (num_batch, seq_len, num_units_aln)

        non_seqs = [
            input, mask, hUa, self.W_align, self.v_align, self.W_hid_stacked,
            self.W_weightedhid_stacked
        ]
        # The "peephole" weight matrices are only used when self.peepholes=True
        if self.peepholes:
            non_seqs += [
                self.W_cell_to_ingate, self.W_cell_to_forgetgate,
                self.W_cell_to_outgate
            ]
        # theano.scan only allows for positional arguments, so when
        # self.peepholes is False, we need to supply fake placeholder arguments
        # for the three peephole matrices.
        else:
            non_seqs += [(), (), ()]
        # When we aren't precomputing the input outside of scan, we need to
        # provide the input weights and biases to the step function

        non_seqs += [self.b_stacked]

        if self.unroll_scan:
            # Explicitly unroll the recurrence instead of using scan
            cell_out, hid_out, alpha_out, weighted_hidden_out = unroll_scan(
                fn=step_fun,
                sequences=sequences,
                outputs_info=[
                    cell_init, hid_init, alpha_init, weighted_hidden_init
                ],
                go_backwards=self.backwards,
                non_sequences=non_seqs,
                n_steps=self.n_decodesteps + self.decode_pre_steps)
        else:
            # Scan op iterates over first dimension of input and repeatedly
            # applies the step function
            cell_out, hid_out, alpha_out, weighted_hidden_out = theano.scan(
                fn=step_fun,
                sequences=sequences,
                outputs_info=[
                    cell_init, hid_init, alpha_init, weighted_hidden_init
                ],
                go_backwards=self.backwards,
                truncate_gradient=self.gradient_steps,
                non_sequences=non_seqs,
                n_steps=self.n_decodesteps + self.decode_pre_steps,
                strict=True)[0]

        # dimshuffle back to (n_batch, n_time_steps, n_features))

        #a_out - (n_decodesteps, bs, seqlen)
        #hid_out -   (n_decode_steps, bs, num_units)

        # mask:  (BS, encode_seqlen
        # a_out; (n_decodesteps, BS, encode_seqlen)
        cell_out = cell_out.dimshuffle(1, 0, 2)
        hid_out = hid_out.dimshuffle(1, 0,
                                     2)  # (BS, n_decodesteps, encode_seqlen)
        mask = mask.dimshuffle(0, 'x', 1)
        alpha_out = alpha_out.dimshuffle(
            1, 0, 2)  # (BS, n_decodesteps, encode_seqlen)

        weighted_hidden_out = weighted_hidden_out.dimshuffle(1, 0, 2)

        # if scan is backward reverse the output
        if self.backwards:
            hid_out = hid_out[:, ::-1]
            cell_out = cell_out[:, ::-1]
            weighted_hidden_out = weighted_hidden_out[:, ::-1]
            alpha_out = alpha_out[:, ::-1]

        if self.decode_pre_steps > 0:
            hid_out = hid_out[:, self.decode_pre_steps:]
            cell_out = hid_out[:, self.decode_pre_steps:]
            weighted_hidden_out = weighted_hidden_out[:,
                                                      self.decode_pre_steps:]
            alpha_out = hid_out[:, self.decode_pre_steps:]

        self.hid_out = hid_out
        self.cell_out = cell_out
        self.weighted_hidden_out = weighted_hidden_out
        self.alpha = alpha_out

        if self.return_decodehid:
            return hid_out
        else:
            return weighted_hidden_out
Exemple #22
0
    def log_ctc(self, labels_len_const):
        def _build_diag(_d):
            extend_I = T.eye(labels_len + 2)
            return T.eye(labels_len) + extend_I[
                1:-1, :-2] + extend_I[2:, :-2] * _d[:, None]

        # prepare y
        n_samples, labels_len = self.y.shape
        y1 = T.concatenate(
            [self.y, T.ones((self.y.shape[0], 2)) * self.blank], axis=1)
        diag = T.neq(y1[:, :-2], y1[:, 2:]) * T.neq(y1[:, 2:], self.blank)
        # stretch out, (labels_len, n_samples*labels_len)
        diags0, _ = theano.scan(fn=_build_diag,
                                sequences=[diag],
                                n_steps=n_samples)
        shape = diags0.shape
        diags = T.transpose(diags0, (1, 0, 2)).reshape(
            (shape[1], shape[0] * shape[2]))

        # prepare x
        assert self.x.ndim == 3
        # (n_steps, n_samples, softmax_output) to (n_steps, n_samples, labels_len)
        x1 = self.x[:, T.arange(n_samples)[:, None], self.y]
        dims = x1.shape
        # stretch out, (n_steps, n_samples * labels_len)
        x2 = x1.reshape((dims[0], dims[1] * dims[2]))

        def log_matrix_dot(x, y, z):
            v1 = x[:, :, None]
            v2 = T.tile(v1, (1, 1, labels_len_const))
            v2_shape = v2.shape
            v3 = T.transpose(v2, (1, 0, 2)).reshape(
                (v2_shape[1], v2_shape[0] * v2_shape[2]))
            v4 = v3 + y
            m = T.max(v4, axis=0)

            v5 = v4 - m[None, :]
            # mask = T.nonzero(T.isnan(v5))
            # v6 = T.set_subtensor(v5[mask], -np.inf)
            # v7 = T.exp(v5)
            v7 = safe_exp(v5)
            v8 = T.sum(v7, axis=0)
            # v9 = T.log(v8)
            v9 = safe_log(v8)
            v10 = v9 + m
            v11 = v10 + z
            v12 = v11.reshape((n_samples, labels_len))
            return v12

        # each step
        def _step(m_, s_, h_, diags):
            v = log_matrix_dot(h_, diags, s_)
            m_extend = T.tile(m_[:, None], (1, labels_len_const))
            p = T.switch(m_extend, v, h_)
            return p

        # scan loop
        log_x2 = safe_log(x2)
        log_outputs_info = safe_log(
            T.set_subtensor(
                T.zeros((n_samples, labels_len),
                        dtype=theano.config.floatX)[:, 0], 1))
        log_diags = safe_log(diags)

        self.pin0 = log_x2
        self.pin1 = log_outputs_info
        self.pin2 = log_diags

        self.debug, _ = theano.scan(fn=_step,
                                    sequences=[self.x_mask.T, log_x2],
                                    outputs_info=[log_outputs_info],
                                    non_sequences=[log_diags])

        # prepare y_clip
        y_clip1 = T.concatenate([(self.y_clip - 2)[:, None],
                                 (self.y_clip - 1)[:, None]],
                                axis=1)
        self.prob = self.debug[-1][T.arange(n_samples)[:, None], y_clip1]

        # compute loss
        mx = T.max(self.prob, axis=1)
        l1 = self.prob - mx[:, None]
        # l2 = T.sum(T.exp(l1), axis=1)
        # l3 = T.log(l2) + mx
        l2 = T.sum(safe_exp(l1), axis=1)
        l3 = safe_log(l2) + mx
        self.loss = T.mean(-l3)
Exemple #23
0
 def t_initial_state(self):
     # return theano.shared(name='initstate0',value=self.initial_state.astype(theano.config.floatX))
     return T.concatenate([self._t_state0, T.zeros(self._win_dim)], axis=0)
Exemple #24
0
 def get_output_for(self, input, deterministic=False, **kwargs):
     out, r = T.zeros(self.get_output_shape_for(input.shape)), self.upscale
     for y, x in itertools.product(range(r), repeat=2):
         out=T.inc_subtensor(out[:,:,y::r,x::r], input[:,r*y+x::r*r,:,:])
     return out
def conv1d_sd(input,
              filters,
              image_shape,
              filter_shape,
              border_mode='valid',
              subsample=(1, )):
    """
    Using a single dot product.

    border_mode has to be 'valid' at the moment.
    """
    if border_mode != 'valid':
        log.error("Unsupported border_mode for conv1d_sd: " "%s" % border_mode)
        raise RuntimeError("Unsupported border_mode for conv1d_sd: "
                           "%s" % border_mode)

    batch_size, num_input_channels, input_length = image_shape
    num_filters, num_input_channels_, filter_length = filter_shape
    stride = subsample[0]

    if filter_length % stride > 0:
        raise RuntimeError("Filter length (%d) is not a multiple of the "
                           "stride (%d)" % (filter_length, stride))

    num_steps = filter_length // stride
    output_length = (input_length - filter_length + stride) // stride

    # pad the input so all the shifted dot products fit inside.
    # shape is (b, c, l)
    padded_length = ((input_length // filter_length) * filter_length +
                     (num_steps - 1) * stride)

    # at this point, it is possible that the padded_length is SMALLER than the
    # input size. so then we have to truncate first.
    truncated_length = min(input_length, padded_length)
    input_truncated = input[:, :, :truncated_length]

    input_padded_shape = (batch_size, num_input_channels, padded_length)
    input_padded = T.zeros(input_padded_shape)
    input_padded = T.set_subtensor(input_padded[:, :, :truncated_length],
                                   input_truncated)

    inputs = []
    for num in range(num_steps):
        shift = num * stride
        length = (padded_length - shift) // filter_length

        r_input_shape = (batch_size, num_input_channels, length, filter_length)
        r_input = input_padded[:, :, shift:length * filter_length +
                               shift].reshape(r_input_shape)

        inputs.append(r_input)

    inputs_stacked = T.stack(*inputs)  # shape is (n, b, c, w, f)
    filters_flipped = filters[:, :, ::-1]

    r_conved = T.tensordot(
        inputs_stacked, filters_flipped,
        numpy.asarray([[2, 4], [1, 2]], dtype=theano.config.floatX))
    # resulting shape is (n, b, w, n_filters)
    # output needs to be (b, n_filters, w * n)
    r_conved = r_conved.dimshuffle(1, 3, 2, 0)  # (b, n_filters, w, n)
    conved = r_conved.reshape((r_conved.shape[0], r_conved.shape[1],
                               r_conved.shape[2] * r_conved.shape[3]))
    # result is (b, n_f, l)

    # remove padding
    return conved[:, :, :output_length]
Exemple #26
0
    def do_preprocess_scan(self, deterministic_dropout=False, **kwargs):
        """
        Run a scan using this LSTM, preprocessing all inputs before the scan.

        Parameters:
            kwargs[k]: should be a theano tensor of shape (n_batch, n_time, ... )
                Note that "relative_position" should be a keyword argument given here if there are relative
                shifts.
            deterministic_dropout: If True, apply dropout deterministically, scaling everything. If false,
                sample dropout

        Returns:
            A theano tensor of shape (n_batch, n_time, output_size) of activations
        """

        assert len(kwargs) > 0, "Need at least one input argument!"
        n_batch, n_time = list(kwargs.values())[0].shape[:2]

        squashed_kwargs = {
            k: v.reshape([n_batch * n_time] + [x for x in v.shape[2:]])
            for k, v in kwargs.items()
        }

        full_input = T.concatenate(
            [part.generate(**squashed_kwargs) for part in self.input_parts], 1)
        adjusted_input = full_input.reshape([n_batch, n_time,
                                             self.input_size]).dimshuffle(
                                                 (1, 0, 2))

        if "relative_position" in kwargs:
            relative_position = kwargs["relative_position"]
            diff_shifts = T.extra_ops.diff(relative_position, axis=1)
            cat_shifts = T.concatenate(
                [T.zeros((n_batch, 1), 'int32'), diff_shifts], 1)
            shifts = cat_shifts.dimshuffle((1, 0))
        else:
            shifts = T.zeros(n_time, n_batch, 'int32')

        def _scan_fn(in_data, shifts, *other):
            other = list(other)
            if self.dropout and not deterministic_dropout:
                split = -len(self.tot_layer_sizes)
                hiddens = other[:split]
                masks = [None] + other[split:]
            else:
                masks = []
                hiddens = other

            return self.perform_step(in_data,
                                     shifts,
                                     hiddens,
                                     dropout_masks=masks)

        if self.dropout and not deterministic_dropout:
            dropout_masks = UpscaleMultiDropout(
                [(n_batch, shape) for shape in self.tot_layer_sizes],
                self.dropout)
        else:
            dropout_masks = []

        outputs_info = [
            initial_state_with_taps(layer, n_batch)
            for layer in self.cells.layers
        ]
        result, _ = theano.scan(fn=_scan_fn,
                                sequences=[adjusted_input, shifts],
                                non_sequences=dropout_masks,
                                outputs_info=outputs_info)

        final_out = get_last_layer(result).transpose((1, 0, 2))

        return final_out
Exemple #27
0
 def get_real_coefficients(self):
     return (tt.zeros(0, dtype=self.dtype), tt.zeros(0, dtype=self.dtype))
Exemple #28
0
    def build_generator(self, version=1, encode=False):

        #from lasagne.layers import TransposedConv2DLayer as Deconv2DLayer
        global mask

        if mask is None:
            mask = T.zeros(shape=(self.batch_size, 1, 64, 64),
                           dtype=theano.config.floatX)
            mask = T.set_subtensor(mask[:, :, 16:48, 16:48], 1.)
            self.mask = mask

        noise_dim = (self.batch_size, 100)
        theano_rng = MRG_RandomStreams(rng.randint(2**15))
        noise = theano_rng.uniform(size=noise_dim)
        # mask_color = T.cast(T.cast(theano_rng.uniform(size=(self.batch_size,), low=0., high=2.), 'int16').dimshuffle(0, 'x', 'x', 'x') * mask, dtype=theano.config.floatX)
        input = ll.InputLayer(shape=noise_dim, input_var=noise)

        cropped_image = T.cast(T.zeros_like(self.input_) * mask +
                               (1. - mask) * self.input_,
                               dtype=theano.config.floatX)
        encoder_input = T.concatenate([cropped_image, mask],
                                      axis=1)  # shoudl concat wrt channels

        if version == 1:
            if encode:
                gen_layers = [
                    ll.InputLayer(shape=(self.batch_size, 4, 64, 64),
                                  input_var=encoder_input)
                ]  #  3 x 64 x 64 -->  64 x 32 x 32

                gen_layers.append(
                    nn.batch_norm(
                        ll.Conv2DLayer(gen_layers[-1],
                                       64,
                                       4,
                                       2,
                                       pad=1,
                                       nonlinearity=nn.lrelu))
                )  #  64 x 32 x 32 --> 128 x 16 x 16

                gen_layers.append(
                    nn.batch_norm(
                        ll.Conv2DLayer(gen_layers[-1],
                                       128,
                                       4,
                                       2,
                                       pad=1,
                                       nonlinearity=nn.lrelu))
                )  # 128 x 16 x 16 -->  256 x 8 x 8

                gen_layers.append(
                    nn.batch_norm(
                        ll.Conv2DLayer(gen_layers[-1],
                                       256,
                                       4,
                                       2,
                                       pad=1,
                                       nonlinearity=nn.lrelu))
                )  # 256 x 8 x 8 --> 512 x 4 x 4

                gen_layers.append(
                    nn.batch_norm(
                        ll.Conv2DLayer(gen_layers[-1],
                                       512,
                                       4,
                                       2,
                                       pad=1,
                                       nonlinearity=nn.lrelu))
                )  # 512 x 4 x 4 --> 1024 x 2 x 2

                gen_layers.append(
                    nn.batch_norm(
                        ll.Conv2DLayer(gen_layers[-1],
                                       4000,
                                       4,
                                       4,
                                       pad=1,
                                       nonlinearity=nn.lrelu))
                )  # 1024 x 2 x 2 --> 2048 x 1 x 1

                #gen_layers.append(nn.batch_norm(ll.Conv2DLayer(gen_layers[-1], 2048, 4, 2, pad=1, nonlinearity=nn.lrelu)))
                # flatten this out
                #gen_layers.append(ll.FlattenLayer(gen_layers[-1]))

                gen_layers.append(
                    nn.batch_norm(
                        nn.Deconv2DLayer(gen_layers[-1],
                                         (self.batch_size, 128 * 4, 4, 4),
                                         (5, 5),
                                         stride=(4, 4))))
                # concat with noise
                latent_size = 2048

            else:
                gen_layers = [input]
                latent_size = 100

                # TODO : put batchorm back on all layers, + g=None
                gen_layers.append(
                    ll.DenseLayer(gen_layers[-1],
                                  128 * 8 * 4 * 4,
                                  W=Normal(0.02)))
                gen_layers.append(
                    ll.ReshapeLayer(gen_layers[-1],
                                    (self.batch_size, 128 * 8, 4, 4)))

# creating array of mixing coefficients (shared Theano floats) that will be used for mixing generated_output and image at each layer
            mixing_coefs = [
                theano.shared(lasagne.utils.floatX(0.05)) for i in range(2)
            ]

            # theano.shared(lasagne.utils.floatX(np.array([0.5])))  for i in range(3)]
            mixing_coefs.append(theano.shared(lasagne.utils.floatX(1)))
            border = 2
            gen_layers.append(
                nn.batch_norm(nn.Deconv2DLayer(
                    gen_layers[-1], (self.batch_size, 128 * 2, 8, 8), (5, 5),
                    W=Normal(0.02),
                    nonlinearity=nn.relu),
                              g=None))  # 4 -> 8
            #gen_layers.append(ll.DropoutLayer(gen_layers[-1],p=0.5))
            #gen_layers.append(nn.ResetDeconvLayer(gen_layers[-1], cropped_image, mixing_coefs[0], border=border))

            #layer_a = nn.ResetDeconvLayer(gen_layers[-1], cropped_image, mixing_coefs[0]) # all new
            #layer_concat_a = ll.ConcatLayer([layer_a, gen_layers[-1]], axis=1)
            #gen_layers.append(layer_concat_a)

            gen_layers.append(
                nn.batch_norm(nn.Deconv2DLayer(gen_layers[-1],
                                               (self.batch_size, 128, 16, 16),
                                               (5, 5),
                                               W=Normal(0.02),
                                               nonlinearity=nn.relu),
                              g=None))  # 8 -> 16

            #gen_layers.append(ll.DropoutLayer(gen_layers[-1],p=0.5))
            #gen_layers.append(nn.ResetDeconvLayer(gen_layers[-1], cropped_image, mixing_coefs[1], border=border*2))
            #layer_b = nn.ResetDeconvLayer(gen_layers[-1], cropped_image, mixing_coefs[1]) # all new
            #layer_concat_b = ll.ConcatLayer([layer_b, gen_layers[-1]], axis=1)
            #gen_layers.append(layer_concat_b)

            gen_layers.append(
                nn.batch_norm(nn.Deconv2DLayer(gen_layers[-1],
                                               (self.batch_size, 64, 32, 32),
                                               (5, 5),
                                               W=Normal(0.02),
                                               nonlinearity=nn.relu),
                              g=None))  # 16 -> 32

            #gen_layers.append(ll.DropoutLayer(gen_layers[-1],p=0.5))
            #gen_layers.append(nn.ResetDeconvLayer(gen_layers[-1], cropped_image, mixing_coefs[2], border=border*2*2))
            #layer_c = nn.ResetDeconvLayer(gen_layers[-1], cropped_image, mixing_coefs[1]) # all new
            #layer_concat_c = ll.ConcatLayer([layer_c, gen_layers[-1]], axis=1)
            #gen_layers.append(layer_concat_c)

            gen_layers.append(
                nn.Deconv2DLayer(
                    gen_layers[-1], (self.batch_size, 3, 64, 64), (5, 5),
                    W=Normal(0.02),
                    nonlinearity=lasagne.nonlinearities.sigmoid))  # 32 -> 64

#gen_layers.append(ll.DropoutLayer(gen_layers[-1],p=0.5))
#gen_layers.append(nn.ResetDeconvLayer(gen_layers[-1], cropped_image, mixing_coefs[3], border=border*2*2*2, trainable=False))

        for layer in gen_layers:
            print layer.output_shape
        print ''

        GAN.mixing_coefs = mixing_coefs

        return gen_layers
Exemple #29
0
    def __theano_build__(self):
        E, V, U, W, b, c = self.E, self.V, self.U, self.W, self.b, self.c

        x = T.ivector('x')
        y = T.ivector('y')

        def forward_prop_step(x_t, s_prev):
            # Word embedding layer
            x_e = E[:, x_t]

            # GRU Layer 1
            z = T.nnet.hard_sigmoid(U[0].dot(x_e) + W[0].dot(s_prev) + b[0])
            r = T.nnet.hard_sigmoid(U[1].dot(x_e) + W[1].dot(s_prev) + b[1])
            c = T.tanh(U[2].dot(x_e) + W[2].dot(s_prev * r) + b[2])
            s = (T.ones_like(z) - z) * c + z * s_prev

            # Final output calculation
            # Theano's softmax returns a matrix with one row, we only need the row
            o_t = T.nnet.softmax(V.dot(s) + c)[0]

            return [o_t, s]

        [o, s], updates = theano.scan(
            forward_prop_step,
            sequences=x,
            truncate_gradient=self.bptt_truncate,
            outputs_info=[None, dict(initial=T.zeros(self.hidden_dim))])

        prediction = T.argmax(o, axis=1)
        o_error = T.sum(T.nnet.categorical_crossentropy(o, y))

        # Total cost (could add regularization here)
        cost = o_error

        # Gradients
        dE = T.grad(cost, E)
        dU = T.grad(cost, U)
        dW = T.grad(cost, W)
        db = T.grad(cost, b)
        dV = T.grad(cost, V)
        dc = T.grad(cost, c)

        # Assign functions
        self.predict = theano.function([x], [o], allow_input_downcast=True)
        self.predict_class = theano.function([x],
                                             prediction,
                                             allow_input_downcast=True)
        self.ce_error = theano.function([x, y],
                                        cost,
                                        allow_input_downcast=True)
        self.bptt = theano.function([x, y], [dE, dU, dW, db, dV, dc],
                                    allow_input_downcast=True)

        # SGD parameters
        learning_rate = T.scalar('learning_rate')
        decay = T.scalar('decay')

        # rmsprop cache updates
        mE = decay * self.mE + (1 - decay) * dE**2
        mU = decay * self.mU + (1 - decay) * dU**2
        mW = decay * self.mW + (1 - decay) * dW**2
        mV = decay * self.mV + (1 - decay) * dV**2
        mb = decay * self.mb + (1 - decay) * db**2
        mc = decay * self.mc + (1 - decay) * dc**2

        self.sgd_step = theano.function(
            [x, y, learning_rate,
             theano.In(decay, value=0.9)], [],
            updates=[(E, E - learning_rate * dE / T.sqrt(mE + 1e-6)),
                     (U, U - learning_rate * dU / T.sqrt(mU + 1e-6)),
                     (W, W - learning_rate * dW / T.sqrt(mW + 1e-6)),
                     (V, V - learning_rate * dV / T.sqrt(mV + 1e-6)),
                     (b, b - learning_rate * db / T.sqrt(mb + 1e-6)),
                     (c, c - learning_rate * dc / T.sqrt(mc + 1e-6)),
                     (self.mE, mE), (self.mU, mU), (self.mW, mW),
                     (self.mV, mV), (self.mb, mb), (self.mc, mc)],
            allow_input_downcast=True)
Exemple #30
0
    def __theano_build__(self):
        E, V, U, W, b, c = self.E, self.V, self.U, self.W, self.b, self.c

        x = T.ivector('x')
        y = T.ivector('y')

        def forward_direction_prop_step(x_t, s_t_prev):
            #
            #
            # Word embedding layer
            x_e = E[:, x_t]
            # GRU layer 1
            z_t = T.nnet.hard_sigmoid(U[0].dot(x_e) +
                                      W[0].dot(s_t_prev)) + b[0]
            r_t = T.nnet.hard_sigmoid(U[1].dot(x_e) +
                                      W[1].dot(s_t_prev)) + b[1]
            c_t = T.tanh(U[2].dot(x_e) + W[2].dot(s_t_prev * r_t) + b[2])
            s_t = (T.ones_like(z_t) - z_t) * c_t + z_t * s_t_prev
            # directly return the hidden state as intermidate output
            return [s_t]

        def backward_direction_prop_step(x_t, s_t_prev):
            #
            #
            #
            x_e = E[:, x_t]

            # GRU layer 2
            z_t = T.nnet.hard_sigmoid(U[3].dot(x_e) +
                                      W[3].dot(s_t_prev)) + b[3]
            r_t = T.nnet.hard_sigmoid(U[4].dot(x_e) +
                                      W[4].dot(s_t_prev)) + b[4]
            c_t = T.tanh(U[5].dot(x_e) + W[5].dot(s_t_prev * r_t) + b[5])
            s_t = (T.ones_like(z_t) - z_t) * c_t + z_t * s_t_prev
            return [s_t]

        def o_step(combined_s_t):
            o_t = T.nnet.softmax(V.dot(combined_s_t) + c)[0]
            return o_t

        # forward direction states
        f_s, updates = theano.scan(forward_direction_prop_step,
                                   sequences=x,
                                   truncate_gradient=self.bptt_truncate,
                                   outputs_info=T.zeros(self.hidden_dim))

        # backward direction states
        b_s, updates = theano.scan(
            backward_direction_prop_step,
            sequences=x[::-1],  # the reverse direction input
            truncate_gradient=self.bptt_truncate,
            outputs_info=T.zeros(self.hidden_dim))

        self.f_s = f_s
        self.b_s = b_s

        f_b_s = b_s[::-1]

        # combine the forward GRU state and backward GRU state together
        combined_s = T.concatenate([f_s, b_s[::-1]], axis=1)
        # concatenate the hidden state from 2 GRU layer to do the output
        o, updates = theano.scan(o_step,
                                 sequences=combined_s,
                                 truncate_gradient=self.bptt_truncate,
                                 outputs_info=None)

        prediction = T.argmax(o, axis=1)
        o_error = T.sum(T.nnet.categorical_crossentropy(o, y))

        cost = o_error

        # Gradients
        dE = T.grad(cost, E)
        dU = T.grad(cost, U)
        dW = T.grad(cost, W)
        db = T.grad(cost, b)
        dV = T.grad(cost, V)
        dc = T.grad(cost, c)

        # Assign functions
        self.predict = theano.function([x], o)
        self.predict_class = theano.function([x], prediction)
        self.ce_error = theano.function([x, y], cost)
        # self.bptt = theano.function([x,y],[dE,dU,dW,db,dV,dc])

        # SGD parameters
        learning_rate = T.scalar('learning_rate')
        decay = T.scalar('decay')

        # rmsprop cache updates
        mE = decay * self.mE + (1 - decay) * dE**2
        mU = decay * self.mU + (1 - decay) * dU**2
        mW = decay * self.mW + (1 - decay) * dW**2
        mV = decay * self.mV + (1 - decay) * dV**2
        mb = decay * self.mb + (1 - decay) * db**2
        mc = decay * self.mc + (1 - decay) * dc**2

        updates = [(E, E - learning_rate * dE / T.sqrt(mE + 1e-6)),
                   (U, U - learning_rate * dU / T.sqrt(mU + 1e-6)),
                   (W, W - learning_rate * dW / T.sqrt(mW + 1e-6)),
                   (V, V - learning_rate * dV / T.sqrt(mV + 1e-6)),
                   (b, b - learning_rate * db / T.sqrt(mb + 1e-6)),
                   (c, c - learning_rate * dc / T.sqrt(mc + 1e-6)),
                   (self.mE, mE), (self.mU, mU), (self.mW, mW), (self.mV, mV),
                   (self.mb, mb), (self.mc, mc)]

        self.sgd_step = theano.function(
            [x, y, learning_rate,
             theano.Param(decay, default=0.9)], [],
            updates=updates)
Exemple #31
0
def conv3d(signals,
           filters,
           signals_shape=None,
           filters_shape=None,
           border_mode='valid'):
    """
    Convolve spatio-temporal filters with a movie.

    It flips the filters.

    Parameters
    ----------
    signals
        Timeseries of images whose pixels have color channels.
        Shape: [Ns, Ts, C, Hs, Ws].
    filters
        Spatio-temporal filters.
        Shape: [Nf, Tf, C, Hf, Wf].
    signals_shape
        None or a tuple/list with the shape of signals.
    filters_shape
        None or a tuple/list with the shape of filters.
    border_mode
        One of 'valid', 'full' or 'half'.

    Notes
    -----
    Another way to define signals: (batch,  time, in channel, row, column)
    Another way to define filters: (out channel,time,in channel, row, column)

    For the GPU, use nnet.conv3d.

    See Also
    --------
    Someone made a script that shows how to swap the axes between
    both 3d convolution implementations in Theano. See the last
    `attachment <https://groups.google.com/d/msg/theano-users/1S9_bZgHxVw/0cQR9a4riFUJ>`_

    """

    if isinstance(border_mode, str):
        border_mode = (border_mode, border_mode, border_mode)

    if signals_shape is None:
        _signals_shape_5d = signals.shape
    else:
        _signals_shape_5d = signals_shape

    if filters_shape is None:
        _filters_shape_5d = filters.shape
    else:
        _filters_shape_5d = filters_shape

    Ns, Ts, C, Hs, Ws = _signals_shape_5d
    Nf, Tf, C, Hf, Wf = _filters_shape_5d

    _signals_shape_4d = (Ns * Ts, C, Hs, Ws)
    _filters_shape_4d = (Nf * Tf, C, Hf, Wf)

    if border_mode[1] != border_mode[2]:
        raise NotImplementedError('height and width bordermodes must match')
    conv2d_signal_shape = _signals_shape_4d
    conv2d_filter_shape = _filters_shape_4d
    if signals_shape is None:
        conv2d_signal_shape = None
    if filters_shape is None:
        conv2d_filter_shape = None

    out_4d = tensor.nnet.conv2d(
        signals.reshape(_signals_shape_4d),
        filters.reshape(_filters_shape_4d),
        input_shape=conv2d_signal_shape,
        filter_shape=conv2d_filter_shape,
        border_mode=border_mode[1])  # ignoring border_mode[2]

    # compute the intended output size
    if border_mode[1] == 'valid':
        Hout = Hs - Hf + 1
        Wout = Ws - Wf + 1
    elif border_mode[1] == 'full':
        Hout = Hs + Hf - 1
        Wout = Ws + Wf - 1
    elif border_mode[1] == 'half':
        Hout = Hs - (Hf % 2) + 1
        Wout = Ws - (Wf % 2) + 1
    elif border_mode[1] == 'same':
        raise NotImplementedError()
    else:
        raise ValueError('invalid border mode', border_mode[1])

    # reshape the temporary output to restore its original size
    out_tmp = out_4d.reshape((Ns, Ts, Nf, Tf, Hout, Wout))

    # now sum out along the Tf to get the output
    # but we have to sum on a diagonal through the Tf and Ts submatrix.
    if Tf == 1:
        # for Tf==1, no sum along Tf, the Ts-axis of the output is unchanged!
        out_5d = out_tmp.reshape((Ns, Ts, Nf, Hout, Wout))
    else:
        # for some types of convolution, pad out_tmp with zeros
        if border_mode[0] == 'valid':
            Tpad = 0
        elif border_mode[0] == 'full':
            Tpad = Tf - 1
        elif border_mode[0] == 'half':
            Tpad = Tf // 2
        elif border_mode[0] == 'same':
            raise NotImplementedError()
        else:
            raise ValueError('invalid border mode', border_mode[0])

        if Tpad == 0:
            out_5d = diagonal_subtensor(out_tmp, 1, 3).sum(axis=3)
        else:
            # pad out_tmp with zeros before summing over the diagonal
            out_tmp_padded = tensor.zeros(dtype=out_tmp.dtype,
                                          shape=(Ns, Ts + 2 * Tpad, Nf, Tf,
                                                 Hout, Wout))
            out_tmp_padded = tensor.set_subtensor(
                out_tmp_padded[:, Tpad:(Ts + Tpad), :, :, :, :], out_tmp)
            out_5d = diagonal_subtensor(out_tmp_padded, 1, 3).sum(axis=3)

    return out_5d
Exemple #32
0
    def compute_landmarks_helper(self, moms, init_landmarks):
        moms = T.reshape(moms[:136], (68, 2))  # 68 * 2
        init_landmarks = T.reshape(init_landmarks[:136], (68, 2))

        mask = T.zeros((68, 2))
        mask = T.set_subtensor(mask[65:68, :], np.ones((3, 2)))

        initLandmarks_aftmas = init_landmarks * mask
        moms_aftmas = moms * mask

        dp = T.zeros((68, 2))
        dp1 = T.zeros((68, 2))

        initLandmarks_loca1 = T.alloc(initLandmarks_aftmas[65, :], 68, 2)
        initLandmarks_loca1_aftmas = initLandmarks_loca1 * mask
        initLandmarks_loca2 = T.alloc(initLandmarks_aftmas[66, :], 68, 2)
        initLandmarks_loca2_aftmas = initLandmarks_loca2 * mask
        initLandmarks_loca3 = T.alloc(initLandmarks_aftmas[67, :], 68, 2)
        initLandmarks_loca3_aftmas = initLandmarks_loca3 * mask

        weight1 = T.zeros((68, 2))
        weight1_val = T.exp(- T.sum((initLandmarks_loca1_aftmas - initLandmarks_aftmas) ** 2, axis=1) / self.sigmaV2)
        weight1 = T.set_subtensor(weight1[:, 0], weight1_val)
        weight1 = T.set_subtensor(weight1[:, 1], weight1_val)
        val1 = T.sum(weight1 * moms_aftmas, axis=0)
        dp = T.set_subtensor(dp[65, :], val1)

        weight2 = T.zeros((68, 2))
        weight2_val = T.exp(- T.sum((initLandmarks_loca2_aftmas - initLandmarks_aftmas) ** 2, axis=1) / self.sigmaV2)
        weight2 = T.set_subtensor(weight2[:, 0], weight2_val)
        weight2 = T.set_subtensor(weight2[:, 1], weight2_val)
        val2 = T.sum(weight2 * moms_aftmas, axis=0)
        dp = T.set_subtensor(dp[66, :], val2)

        weight3 = T.zeros((68, 2))
        weight3_val = T.exp(- T.sum((initLandmarks_loca3_aftmas - initLandmarks_aftmas) ** 2, axis=1) / self.sigmaV2)
        weight3 = T.set_subtensor(weight3[:, 0], weight3_val)
        weight3 = T.set_subtensor(weight3[:, 1], weight3_val)
        val3 = T.sum(weight3 * moms_aftmas, axis=0)
        dp = T.set_subtensor(dp[67, :], val3)

        deformedShape = initLandmarks_aftmas + (dp * self.tau)

        deformedShape_loca1 = T.alloc(deformedShape[65, :], 68, 2)
        deformedShape_loca2 = T.alloc(deformedShape[66, :], 68, 2)
        deformedShape_loca3 = T.alloc(deformedShape[67, :], 68, 2)

        weight11 = T.zeros((68, 2))
        weight11_val = T.exp(- T.sum((deformedShape_loca1 - deformedShape) ** 2, axis=1) / self.sigmaV2)
        weight11 = T.set_subtensor(weight11[:, 0], weight11_val)
        weight11 = T.set_subtensor(weight11[:, 1], weight11_val)
        val11 = T.sum(weight11 * moms_aftmas, axis=0)
        dp1 = T.set_subtensor(dp1[65, :], val11)

        weight22 = T.zeros((68, 2))
        weight22_val = T.exp(- T.sum((deformedShape_loca2 - deformedShape) ** 2, axis=1) / self.sigmaV2)
        weight22 = T.set_subtensor(weight22[:, 0], weight22_val)
        weight22 = T.set_subtensor(weight22[:, 1], weight22_val)
        val22 = T.sum(weight22 * moms_aftmas, axis=0)
        dp1 = T.set_subtensor(dp1[66, :], val22)

        weight33 = T.zeros((68, 2))
        weight33_val = T.exp(- T.sum((deformedShape_loca3 - deformedShape) ** 2, axis=1) / self.sigmaV2)
        weight33 = T.set_subtensor(weight33[:, 0], weight33_val)
        weight33 = T.set_subtensor(weight33[:, 1], weight33_val)
        val33 = T.sum(weight33 * moms_aftmas, axis=0)
        dp1 = T.set_subtensor(dp1[67, :], val33)

        output = (deformedShape + dp1 * self.tau).flatten()
        return output
Exemple #33
0
    def __init__(self, x, n_in, n_hidden, n_out, activation='tanh', order=1):
        self.x = x
        self.n_in = n_in
        self.n_hidden = n_hidden
        self.n_out = n_out
        self.order = order

        if activation.lower() == 'tanh':
            act = tanh
        elif activation.lower() == 'relu':
            act = relu
        elif activation.lower() == 'sigmoid':
            act = sigmoid
        elif activation.lower() == 'linear':
            act = lambda x: x

        def _slice(x, n):
            return x[:, n * self.n_hidden:(n + 1) * self.n_hidden]

        # initialize weights
        def ortho_weight(ndim, rng=rng):
            W = rng.randn(ndim, ndim)
            u, s, v = numpy.linalg.svd(W)
            return u.astype(theano.config.floatX)

        def uniform_weight(n1, n2, rng=rng):
            limit = numpy.sqrt(6. / (n1 + n2))
            return rng.uniform(low=-limit, high=limit,
                               size=(n1, n2)).astype(theano.config.floatX)

        def const_bias(n, value=0):
            return value * numpy.ones((n, ), dtype=theano.config.floatX)

        if self.order == 0:
            # no multiplicative terms
            self.Wx = theano.shared(uniform_weight(n_in, n_hidden),
                                    borrow=True)
            self.Wh = theano.shared(ortho_weight(n_hidden), borrow=True)
            self.bh = theano.shared(const_bias(n_hidden, 0), borrow=True)

            self.Wy = theano.shared(uniform_weight(n_hidden, n_out),
                                    borrow=True)
            self.by = theano.shared(const_bias(n_out, 0), borrow=True)

            self.am = []
            self.ax = []
            self.ah = []

            self.params = [self.Wx, self.Wh, self.bh, self.Wy, self.by]
            self.W = [self.Wx, self.Wh, self.Wy]
            self.L1 = numpy.sum([abs(w).sum() for w in self.W])
            self.L2 = numpy.sum([(w**2).sum() for w in self.W])

            # forward function
            def forward(x_t, h_tm1, Wx, Wh, bh, am, ax, ah, Wy, by):
                preact = T.dot(x_t, Wx) + T.dot(h_tm1, Wh) + bh
                h_t = act(preact)
                y_t = softmax(T.dot(h_t, Wy) + by)
                return h_t, y_t, preact
        else:
            self.Wx = theano.shared(numpy.concatenate(
                [uniform_weight(n_in, n_hidden) for i in range(order)],
                axis=1),
                                    borrow=True)
            self.Wh = theano.shared(numpy.concatenate(
                [ortho_weight(n_hidden) for i in range(order)], axis=1),
                                    borrow=True)
            self.am = theano.shared(numpy.concatenate(
                [const_bias(n_hidden, 2) for i in range(order)], axis=0),
                                    borrow=True)
            self.ax = theano.shared(numpy.concatenate(
                [const_bias(n_hidden, 0.5) for i in range(order)], axis=0),
                                    borrow=True)
            self.ah = theano.shared(numpy.concatenate(
                [const_bias(n_hidden, 0.5) for i in range(order)], axis=0),
                                    borrow=True)
            self.bh = theano.shared(numpy.concatenate(
                [const_bias(n_hidden, 0) for i in range(order)], axis=0),
                                    borrow=True)

            self.Wy = theano.shared(uniform_weight(n_hidden, n_out),
                                    borrow=True)
            self.by = theano.shared(const_bias(n_out, 0), borrow=True)

            self.params = [
                self.Wx, self.Wh, self.am, self.ax, self.ah, self.bh, self.Wy,
                self.by
            ]
            self.W = [self.Wx, self.Wh, self.Wy]
            self.L1 = numpy.sum([abs(w).sum() for w in self.W])
            self.L2 = numpy.sum([(w**2).sum() for w in self.W])

            # forward function
            def forward(x_t, h_tm1, Wx, Wh, bh, am, ax, ah, Wy, by):
                h_t = 1
                preact = am*T.dot(x_t,Wx)*T.dot(h_tm1,Wh) \
                        +ax*T.dot(x_t,Wx) \
                        +ah*T.dot(h_tm1,Wh) \
                        +bh
                for i in range(self.order):
                    h_t = h_t * act(_slice(preact, i))
                y_t = softmax(T.dot(h_t, Wy) + by)
                return h_t, y_t, preact

        h0 = T.alloc(T.zeros((self.n_hidden, ), dtype=theano.config.floatX),
                     x.shape[0], self.n_hidden)
        ([h, y, p], updates) = theano.scan(
            fn=forward,
            sequences=x.dimshuffle([1, 0, 2]),
            outputs_info=[dict(initial=h0, taps=[-1]), None, None],
            non_sequences=[
                self.Wx, self.Wh, self.bh, self.am, self.ax, self.ah, self.Wy,
                self.by
            ])
        self.output = y
        self.preact = p
        self.pred = T.argmax(self.output, axis=1)
def MultiOutput_Bayesian_Calibration(n_y,DataComp,DataField,DataPred,output_folder):
    # This is data preprocessing part
    n = np.shape(DataField)[0] # number of measured data
    m = np.shape(DataComp)[0] # number of simulation data

    p = np.shape(DataField)[1] - n_y # number of input x
    q = np.shape(DataComp)[1] - p - n_y # number of calibration parameters t

    xc = DataComp[:,n_y:] # simulation input x + calibration parameters t
    xf = DataField[:,n_y:] # observed input

    yc = DataComp[:,:n_y] # simulation output
    yf = DataField[:,:n_y] # observed output

    x_pred = DataPred[:,n_y:] # design points for predictions
    y_true = DataPred[:,:n_y] # true measured value for design points for predictions
    n_pred = np.shape(x_pred)[0] # number of predictions
    N = n+m+n_pred

    # Put points xc, xf, and x_pred on [0,1] 
    for i in range(p):
        x_min = min(min(xc[:,i]),min(xf[:,i]))
        x_max = max(max(xc[:,i]),max(xf[:,i]))
        xc[:,i] = (xc[:,i]-x_min)/(x_max-x_min)
        xf[:,i] = (xf[:,i]-x_min)/(x_max-x_min)
        x_pred[:,i] = (x_pred[:,i]-x_min)/(x_max-x_min)

    # Put calibration parameters t on domain [0,1]
    for i in range(p,(p+q)):
        t_min = min(xc[:,i])
        t_max = max(xc[:,i])
        xc[:,i] = (xc[:,i]-t_min)/(t_max-t_min)

    # store mean and std of yc for future scale back use
    yc_mean = np.zeros(n_y)
    yc_sd = np.zeros(n_y)

    # standardization of output yf and yc
    for i in range(n_y):
        yc_mean[i] = np.mean(yc[:,i])
        yc_sd[i] = np.std(yc[:,i])
        yc[:,i] = (yc[:,i]-yc_mean[i])/yc_sd[i]
        yf[:,i] = (yf[:,i]-yc_mean[i])/yc_sd[i]

    # This is modeling part
    with pm.Model() as model:
        # Claim prior part
        eta1 = pm.HalfCauchy("eta1", beta=5) # for eta of gaussian process
        lengthscale = pm.Gamma("lengthscale", alpha=2, beta=1, shape=(p+q)) # for lengthscale of gaussian process
        tf = pm.Beta("tf", alpha=2, beta=2, shape=q) # for calibration parameters
        sigma1 = pm.HalfCauchy('sigma1', beta=5) # for noise
        y_pred = pm.Normal('y_pred', 0, 1.5, shape=(n_pred,n_y)) # for y prediction

        # Setup prior of right cholesky matrix
        sd_dist = pm.HalfCauchy.dist(beta=2.5, shape=n_y)
        colchol_packed = pm.LKJCholeskyCov('colcholpacked', n=n_y, eta=2,sd_dist=sd_dist)
        colchol = pm.expand_packed_triangular(n_y, colchol_packed)

        # Concate data into a big matrix[[xf tf], [xc tc], [x_pred tf]]
        xf1 = tt.concatenate([xf, tt.fill(tt.zeros([n,q]), tf)], axis = 1)
        x_pred1 = tt.concatenate([x_pred, tt.fill(tt.zeros([n_pred,q]), tf)], axis = 1)
        X = tt.concatenate([xf1, xc, x_pred1], axis = 0)
        # Concate data into a big matrix[[yf], [yc], [y_pred]]
        y = tt.concatenate([yf, yc, y_pred], axis = 0)

        # Covariance funciton of gaussian process
        cov_z = eta1**2 * pm.gp.cov.ExpQuad((p+q), ls=lengthscale)
        # Gaussian process with covariance funciton of cov_z
        gp = MultiMarginal(cov_func = cov_z)

        # Bayesian inference
        matrix_shape = [n+m+n_pred,n_y]
        outcome = gp.marginal_likelihood("outcome", X=X, y=y, colchol=colchol, noise=sigma1, matrix_shape=matrix_shape)
        trace = pm.sample(250,cores=1)

    # This part is for data collection and visualization
    pm.summary(trace).to_csv(output_folder + '/trace_summary.csv')
    print(pm.summary(trace))

    name_columns = []
    n_columns = n_pred
    for i in range(n_columns):
        for j in range(n_y):
            name_columns.append('y'+str(j+1)+'_pred'+str(i+1))
    y_prediction = pd.DataFrame(np.array(trace['y_pred']).reshape(500,n_pred*n_y),columns=name_columns)

    #Draw Picture of cvrmse_dist and calculate index
    for i in range(n_y):
        index = list(range(0+i,n_pred*n_y+i,n_y))
        y_prediction1 = pd.DataFrame(y_prediction.iloc[:,index])
        y_prediction1 = y_prediction1*yc_sd[i]+yc_mean[i] # Scale y_prediction back
        y_prediction1.to_csv(output_folder + '/y_pred'+str(i+1)+'.csv') # Store y_prediction

        # Calculate the distribution of cvrmse
        cvrmse = 100*np.sqrt(np.sum(np.square(y_prediction1-y_true[:,i]),axis=1)/n_pred)/np.mean(y_true[:,i])
        # Calculate the index and store it into csv
        index_cal(y_prediction1,y_true[:,i]).to_csv(output_folder + '/index'+str(i+1)+'.csv')
        # Draw pictrue of cvrmse distribution of each y
        plt.subplot(n_y, 1, i+1)
        plt.hist(cvrmse)

    plt.savefig(output_folder + '/cvrmse_dist.pdf')
    plt.close()

    #Draw Picture of Prediction_Plot
    for i in range(n_y):
        index = list(range(0+i,n_pred*n_y+i,n_y))

        y_prediction_mean = np.array(pm.summary(trace)['mean'][index])*yc_sd[i]+yc_mean[i]
        y_prediction_975 = np.array(pm.summary(trace)['hpd_97.5'][index])*yc_sd[i]+yc_mean[i]
        y_prediction_025 = np.array(pm.summary(trace)['hpd_2.5'][index])*yc_sd[i]+yc_mean[i]

        plt.subplot(n_y, 1, i+1)
        # estimated probability
        plt.scatter(x=range(n_pred), y=y_prediction_mean)
        # error bars on the estimate
         

        plt.vlines(range(n_pred), ymin=y_prediction_025, ymax=y_prediction_975)
        # actual outcomes
        plt.scatter(x=range(n_pred),
                   y=y_true[:,i], marker='x')

        plt.xlabel('predictor')
        plt.ylabel('outcome')

        # This is just to print original cvrmse to test whether outcome good
        if i == 0:
            cvrmse = 100*np.sqrt(np.sum(np.square(y_prediction_mean-y_true[:,0]))/len(y_prediction_mean-y_true[:,0]))/np.mean(y_true[:,0])
            print(cvrmse)

    plt.savefig(output_folder + '/Prediction_Plot.pdf')
    plt.close()
def test_convolutional_layer():
	batch_size=2
	x = T.tensor4();
	y = T.ivector()
	V = 200
	layer_conv = Convolutional(filter_size=(5,5),num_filters=V,
				name="toto",
				weights_init=IsotropicGaussian(0.01),
				biases_init=Constant(0.0))
	# try with no bias
	activation = Rectifier()
	pool = MaxPooling(pooling_size=(2,2))

	convnet = ConvolutionalSequence([layer_conv, activation, pool], num_channels=15,
					image_size=(10,10),
					name="conv_section")
	convnet.push_allocation_config()
	convnet.initialize()
	output=convnet.apply(x)
	batch_size=output.shape[0]
	output_dim=np.prod(convnet.get_dim('output'))
	result_conv = output.reshape((batch_size, output_dim))
	mlp=MLP(activations=[Rectifier().apply], dims=[output_dim, 10],
				weights_init=IsotropicGaussian(0.01),
				biases_init=Constant(0.0))
	mlp.initialize()
	output=mlp.apply(result_conv)
	cost = T.mean(Softmax().categorical_cross_entropy(y.flatten(), output))
	cg = ComputationGraph(cost)
	W = VariableFilter(roles=[WEIGHT])(cg.variables)
	B = VariableFilter(roles=[BIAS])(cg.variables)
	W = W[-1]; b = B[-1]
	
	print W.shape.eval()
	print b.shape.eval()
	import pdb
	pdb.set_trace()
	inputs_conv = VariableFilter(roles=[INPUT], bricks=[Convolutional])(cg)
	outputs_conv = VariableFilter(roles=[OUTPUT], bricks=[Convolutional])(cg)
	var_input=inputs_conv[0]
	var_output=outputs_conv[0]
	
	[d_W,d_S,d_b] = T.grad(cost, [W, var_output, b])

	import pdb
	pdb.set_trace()
	w_shape = W.shape.eval()
	d_W = d_W.reshape((w_shape[0], w_shape[1]*w_shape[2]*w_shape[3]))

	d_b = T.zeros((w_shape[0],6*6))
	#d_b = d_b.reshape((w_shape[0], 8*8))
	d_p = T.concatenate([d_W, d_b], axis=1)
	d_S = d_S.dimshuffle((1, 0, 2, 3)).reshape((w_shape[0], batch_size, 6*6)).reshape((w_shape[0], batch_size*6*6))
	#d_S = d_S.reshape((2,200, 64))
	#x_value=1e3*np.random.ranf((1,15,10,10))
	x_value = 1e3*np.random.ranf((2,15, 10, 10))
	f = theano.function([x,y], [var_input, d_S, d_W], allow_input_downcast=True, on_unused_input='ignore')
	A, B, C= f(x_value, [5, 5])
	print np.mean(B)
	return
	
	E_A = expansion_op(A, (2, 15, 10, 10), (5,5))
	print E_A.shape
	E_A = E_A.reshape((2*36, C.shape[1]))
	print E_A.shape
	tmp = C - np.dot(B, E_A)
	print lin.norm(tmp, 'fro')
def embedder(x, all_embeddings):

    all_embeddings = T.concatenate(
        [all_embeddings, T.zeros((1, all_embeddings.shape[1]))], axis=0)

    return all_embeddings[x]
Exemple #37
0
 def __init__(self, u, model=None):
     add_citations_to_model(self.__citations__, model=model)
     self.u = tt.as_tensor_variable(u)
     u_ext = tt.concatenate([-1 + tt.zeros(1, dtype=self.u.dtype), self.u])
     self.c = get_cl(u_ext)
     self.c_norm = self.c / (np.pi * (self.c[0] + 2 * self.c[1] / 3))
Exemple #38
0
 def jacobian_det(self, x):
     return tt.zeros(x.shape)
Exemple #39
0
    def cal_decoder_step(self, decoder_val):
        '''
            Calculate the weight ratios in decoder
            
            :type decoder_val: class
            :param decoder_val: the class which stores the intermediate variables in decoder
            
            :returns: R_h_h, R_h_x, R_h_y, R_outenergy_2_h, R_outenergy_2_x, R_outenergy_2_y_before are theano variables, weight ratios in decoder.
        '''
        y = decoder_val.y[self.idx].dimshuffle(0, 'x')
        R_state_in_y = (
            y * self.dec_input_emb + self.dec_input_emb_offset[self.idx]) / (
                decoder_val.state_in[self.idx] + self.ep *
                TT.sgn(decoder_val.state_in[self.idx])).dimshuffle('x', 0)
        R_state_in_y = R_state_in_y.dimshuffle(1, 0)
        R_reset_in_y = y * self.dec_reset_emb / (
            decoder_val.reset_in[self.idx] + self.ep *
            TT.sgn(decoder_val.reset_in[self.idx])).dimshuffle('x', 0)
        R_reset_in_y = R_reset_in_y.dimshuffle(1, 0)
        R_gate_in_y = y * self.dec_gate_emb / (
            decoder_val.gate_in[self.idx] + self.ep *
            TT.sgn(decoder_val.gate_in[self.idx])).dimshuffle('x', 0)
        R_gate_in_y = R_gate_in_y.dimshuffle(1, 0)
        c = decoder_val.c[self.idx].dimshuffle(0, 'x')
        R_gate_cin = c * self.dec_gate_context / (
            decoder_val.gate_cin[self.idx] + self.ep *
            TT.sgn(decoder_val.gate_cin[self.idx])).dimshuffle('x', 0)
        R_gate_cin = R_gate_cin.dimshuffle(1, 0)
        R_reset_cin = c * self.dec_reset_context / (
            decoder_val.reset_cin[self.idx] + self.ep *
            TT.sgn(decoder_val.reset_cin[self.idx])).dimshuffle('x', 0)
        R_reset_cin = R_reset_cin.dimshuffle(1, 0)
        R_state_cin = c * self.dec_input_context / (
            decoder_val.state_cin[self.idx] + self.ep *
            TT.sgn(decoder_val.state_cin[self.idx])).dimshuffle('x', 0)
        R_state_cin = R_state_cin.dimshuffle(1, 0)
        R_gate_cin_x = TT.dot(R_gate_cin, self.R_c_x).dimshuffle(1, 0, 2)
        R_reset_cin_x = TT.dot(R_reset_cin, self.R_c_x)
        R_reset_cin_x = R_reset_cin_x.dimshuffle(1, 0, 2)
        R_state_cin_x = TT.dot(R_state_cin, self.R_c_x)
        R_state_cin_x = R_state_cin_x.dimshuffle(1, 0, 2)
        h_before = decoder_val.h_before[self.idx].dimshuffle(0, 'x')
        R_gate_h = h_before * self.dec_gate_hidden / (
            decoder_val.gate[self.idx] +
            self.ep * TT.sgn(decoder_val.gate[self.idx])).dimshuffle('x', 0)
        R_gate_h = R_gate_h.dimshuffle(1, 0)
        R_reset_h = h_before * self.dec_reset_hidden / (
            decoder_val.reset[self.idx] +
            self.ep * TT.sgn(decoder_val.reset[self.idx])).dimshuffle('x', 0)
        R_reset_h = R_reset_h.dimshuffle(1, 0)
        R_gate_y = R_gate_in_y * (
            decoder_val.gate_in[self.idx] /
            (decoder_val.gate[self.idx] +
             self.ep * TT.sgn(decoder_val.gate[self.idx]))).dimshuffle(0, 'x')
        R_reset_y = R_reset_in_y * (decoder_val.reset_in[self.idx] / (
            decoder_val.reset[self.idx] +
            self.ep * TT.sgn(decoder_val.reset[self.idx]))).dimshuffle(0, 'x')
        R_gate = (decoder_val.gate_cin[self.idx] /
                  (decoder_val.gate[self.idx] +
                   self.ep * TT.sgn(decoder_val.gate[self.idx]))).dimshuffle(
                       'x', 0, 'x')
        R_gate_x = R_gate * R_gate_cin_x
        R_reset = (decoder_val.reset_cin[self.idx] /
                   (decoder_val.reset[self.idx] +
                    self.ep * TT.sgn(decoder_val.reset[self.idx]))).dimshuffle(
                        'x', 0, 'x')
        R_reset_x = R_reset * R_reset_cin_x
        R_reseted_h = R_reset_h * self.weight + TT.eye(self.dim,
                                                       self.dim) * self.weight
        R_reseted_y = R_reset_y * self.weight
        R_reseted_x = R_reset_x * self.weight
        R_state_x = R_state_cin_x * (
            decoder_val.state_cin[self.idx] /
            (decoder_val.state[self.idx] + self.ep *
             TT.sgn(decoder_val.state[self.idx]))).dimshuffle('x', 0, 'x')
        R_state_y = R_state_in_y * (decoder_val.state_in[self.idx] / (
            decoder_val.state[self.idx] +
            self.ep * TT.sgn(decoder_val.state[self.idx]))).dimshuffle(0, 'x')
        reseted = decoder_val.reseted[self.idx].dimshuffle(0, 'x')
        R_state_reseted = reseted * self.dec_input_hidden[self.idx] / (
            decoder_val.state[self.idx] +
            self.ep * TT.sgn(decoder_val.state[self.idx])).dimshuffle(0, 'x')
        R_state_reseted = R_state_reseted.dimshuffle(1, 0)
        R_state_h = TT.dot(R_state_reseted, R_reseted_h)
        R_state_x += TT.dot(R_state_reseted, R_reseted_x).dimshuffle(1, 0, 2)
        R_state_y = TT.dot(R_state_reseted, R_reseted_y)
        R_h = (decoder_val.gate[self.idx] * decoder_val.state[self.idx] /
               (decoder_val.h[self.idx] +
                self.ep * TT.sgn(decoder_val.h[self.idx]))).dimshuffle(
                    0, 'x') * self.weight
        R_h_h = R_gate_h * R_h + R_state_h * R_h
        R_h2 = ((1 - decoder_val.gate[self.idx]) *
                decoder_val.h_before[self.idx] /
                (decoder_val.h[self.idx] +
                 self.ep * TT.sgn(decoder_val.h[self.idx]))).dimshuffle(
                     0, 'x')
        R_h_h += TT.identity_like(R_h_h) * R_h2
        R_h_y = R_gate_y * R_h + R_state_y * R_h
        R_h = (decoder_val.gate[self.idx] * decoder_val.state[self.idx] /
               (decoder_val.h[self.idx] +
                self.ep * TT.sgn(decoder_val.h[self.idx]))).dimshuffle(
                    'x', 0, 'x') * self.weight
        R_h_x = R_gate_x * R_h + R_state_x * R_h

        R_readout_c = c * self.dec_readout_context / (
            decoder_val.readout[self.idx] + self.ep *
            TT.sgn(decoder_val.readout[self.idx])).dimshuffle('x', 0)
        R_readout_c = R_readout_c.dimshuffle(1, 0)
        R_readout_x = TT.dot(R_readout_c, self.R_c_x).dimshuffle(1, 0, 2)
        R_readout_h = h_before * self.dec_readout_hidden / (
            decoder_val.readout[self.idx] + self.ep *
            TT.sgn(decoder_val.readout[self.idx])).dimshuffle('x', 0)
        R_readout_h = R_readout_h.dimshuffle(1, 0)
        y_before = decoder_val.y_before[self.idx].dimshuffle(0, 'x')
        R_readout_y_before = y_before * self.dec_readout_emb / (
            decoder_val.readout[self.idx] + self.ep *
            TT.sgn(decoder_val.readout[self.idx])).dimshuffle('x', 0)
        R_readout_y_before = R_readout_y_before.dimshuffle(1, 0)
        dim1 = decoder_val.maxout[self.idx].shape[0]
        maxout = decoder_val.maxout[self.idx].reshape([dim1 / 2, 2])
        maxout = TT.argmax(maxout, axis=1)
        maxout = maxout.reshape([dim1 / 2])
        L = TT.arange(dim1 / 2)
        maxout = maxout + L * 2 + L * dim1
        R_maxout = TT.zeros((self.dim * self.dim / 2))
        R_maxout = TT.set_subtensor(R_maxout[maxout.flatten()], 1.0)
        R_maxout = R_maxout.reshape([self.dim / 2, self.dim])
        R_maxout_y_before = TT.dot(R_maxout, R_readout_y_before)
        R_maxout_h = TT.dot(R_maxout, R_readout_h)
        R_maxout_x = TT.dot(R_maxout, R_readout_x).dimshuffle(1, 0, 2)
        maxout = decoder_val.maxout[self.idx].dimshuffle(0, 'x')
        R_outenergy1_maxout = maxout * self.dec_probs_emb / (
            decoder_val.outenergy_1[self.idx] + self.ep *
            TT.sgn(decoder_val.outenergy_1[self.idx])).dimshuffle('x', 0)
        R_outenergy1_maxout = R_outenergy1_maxout.dimshuffle(1, 0)
        R_outenergy1_y_before = TT.dot(R_outenergy1_maxout, R_maxout_y_before)
        R_outenergy1_h = TT.dot(R_outenergy1_maxout, R_maxout_h)
        R_outenergy1_x = TT.dot(R_outenergy1_maxout,
                                R_maxout_x).dimshuffle(1, 0, 2)
        probs = self.dec_probs.dimshuffle(
            1, 0)[decoder_val.y_idx[self.idx]].dimshuffle(0, 'x')
        outenergy_1 = decoder_val.outenergy_1[self.idx].dimshuffle(0, 'x')
        idx = decoder_val.y_idx[self.idx]
        outenergy_2 = (decoder_val.outenergy_2[self.idx][idx])
        R_outenergy_2 = outenergy_1 * probs / (outenergy_2 +
                                               self.ep * outenergy_2)
        R_outenergy_2 = R_outenergy_2.dimshuffle(1, 0)
        R_outenergy_2_y_before = TT.dot(R_outenergy_2, R_outenergy1_y_before)
        R_outenergy_2_h = TT.dot(R_outenergy_2, R_outenergy1_h)
        R_outenergy_2_x = TT.dot(R_outenergy_2,
                                 R_outenergy1_x).dimshuffle(1, 0, 2)
        return R_h_h, R_h_x, R_h_y, R_outenergy_2_h, R_outenergy_2_x, R_outenergy_2_y_before
Exemple #40
0
def conv3d(x,
           kernel,
           strides=(1, 1, 1),
           border_mode='valid',
           dim_ordering='th',
           volume_shape=None,
           filter_shape=None):
    '''
    Run on cuDNN if available.
    border_mode: string, "same" or "valid".
    '''
    if dim_ordering not in {'th', 'tf'}:
        raise Exception('Unknown dim_ordering ' + str(dim_ordering))

    if border_mode not in {'same', 'valid'}:
        raise Exception('Invalid border mode: ' + str(border_mode))

    if dim_ordering == 'tf':
        # TF uses the last dimension as channel dimension,
        # instead of the 2nd one.
        # TH input shape: (samples, input_depth, conv_dim1, conv_dim2, conv_dim3)
        # TF input shape: (samples, conv_dim1, conv_dim2, conv_dim3, input_depth)
        # TH kernel shape: (out_depth, input_depth, kernel_dim1, kernel_dim2, kernel_dim3)
        # TF kernel shape: (kernel_dim1, kernel_dim2, kernel_dim3, input_depth, out_depth)
        x = x.dimshuffle((0, 4, 1, 2, 3))
        kernel = kernel.dimshuffle((4, 3, 0, 1, 2))
        if volume_shape:
            volume_shape = (volume_shape[0], volume_shape[4], volume_shape[1],
                            volume_shape[2], volume_shape[3])
        if filter_shape:
            filter_shape = (filter_shape[4], filter_shape[3], filter_shape[0],
                            filter_shape[1], filter_shape[2])

    if border_mode == 'same':
        assert (strides == (1, 1, 1))
        pad_dim1 = (kernel.shape[2] - 1)
        pad_dim2 = (kernel.shape[3] - 1)
        pad_dim3 = (kernel.shape[4] - 1)
        output_shape = (x.shape[0], x.shape[1], x.shape[2] + pad_dim1,
                        x.shape[3] + pad_dim2, x.shape[4] + pad_dim3)
        output = T.zeros(output_shape)
        indices = (slice(None), slice(None),
                   slice(pad_dim1 // 2, x.shape[2] + pad_dim1 // 2),
                   slice(pad_dim2 // 2, x.shape[3] + pad_dim2 // 2),
                   slice(pad_dim3 // 2, x.shape[4] + pad_dim3 // 2))
        x = T.set_subtensor(output[indices], x)
        border_mode = 'valid'

    border_mode_3d = (border_mode, border_mode, border_mode)
    conv_out = conv3d2d.conv3d(signals=x.dimshuffle(0, 2, 1, 3, 4),
                               filters=kernel.dimshuffle(0, 2, 1, 3, 4),
                               border_mode=border_mode_3d)
    conv_out = conv_out.dimshuffle(0, 2, 1, 3, 4)

    # support strides by manually slicing the output
    if strides != (1, 1, 1):
        conv_out = conv_out[:, :, ::strides[0], ::strides[1], ::strides[2]]

    if dim_ordering == 'tf':
        conv_out = conv_out.dimshuffle((0, 2, 3, 4, 1))

    return conv_out
Exemple #41
0
def theano_one_hot(idx, n):
    z = T.zeros((idx.shape[0], n))
    one_hot = T.set_subtensor(z[T.arange(idx.shape[0]), idx], 1)
    return one_hot
Exemple #42
0
    def construct_graph(self, args, x, length, popstats=None):
        p = self.parameters

        # use `symlength` where we need to be able to adapt to longer sequences
        # than the ones we trained on
        symlength = x.shape[0]
        t = T.cast(T.arange(symlength), "int16")
        long_sequence_is_long = T.ge(
            T.cast(T.arange(symlength), theano.config.floatX), length)
        batch_size = x.shape[1]
        dummy_states = dict(h=T.zeros(
            (symlength, batch_size, args.num_hidden)),
                            c=T.zeros(
                                (symlength, batch_size, args.num_hidden)))

        output_names = "h c atilde btilde".split()
        for key in "abc":
            for stat in "mean var".split():
                output_names.append("%s_%s" % (key, stat))

        def stepfn(t, long_sequence_is_long, x, dummy_h, dummy_c, h, c):
            # population statistics are sequences, but we use them
            # like a non-sequence and index it ourselves. this allows
            # us to generalize to longer sequences, in which case we
            # repeat the last element.
            popstats_by_key = dict()
            for key in "abc":
                popstats_by_key[key] = dict()
                for stat in "mean var".split():
                    if not args.baseline and args.use_population_statistics:
                        popstat = popstats["%s_%s" % (key, stat)]
                        # pluck the appropriate population statistic for this
                        # time step out of the sequence, or take the last
                        # element if we've gone beyond the training length.
                        # if `long_sequence_is_long` then `t` may be unreliable
                        # as it will overflow for looong sequences.
                        popstat = theano.ifelse.ifelse(long_sequence_is_long,
                                                       popstat[-1], popstat[t])
                    else:
                        popstat = None
                    popstats_by_key[key][stat] = popstat

            atilde, btilde = T.dot(h, p.Wa), T.dot(x, p.Wx)
            a_normal, a_mean, a_var = self.bn_a.construct_graph(
                atilde, baseline=args.baseline, **popstats_by_key["a"])
            b_normal, b_mean, b_var = self.bn_b.construct_graph(
                btilde, baseline=args.baseline, **popstats_by_key["b"])
            ab = a_normal + b_normal
            g, f, i, o = [
                fn(ab[:, j * args.num_hidden:(j + 1) * args.num_hidden])
                for j, fn in enumerate([self.activation] +
                                       3 * [T.nnet.sigmoid])
            ]
            c = dummy_c + f * c + i * g
            c_normal, c_mean, c_var = self.bn_c.construct_graph(
                c, baseline=args.baseline, **popstats_by_key["c"])
            h = dummy_h + o * self.activation(c_normal)

            return [locals()[name] for name in output_names]

        sequences = [
            t, long_sequence_is_long, x, dummy_states["h"], dummy_states["c"]
        ]
        outputs_info = [
            T.repeat(p.h0[None, :], batch_size, axis=0),
            T.repeat(p.c0[None, :], batch_size, axis=0),
        ]
        outputs_info.extend([None] * (len(output_names) - len(outputs_info)))

        outputs, updates = theano.scan(stepfn,
                                       sequences=sequences,
                                       outputs_info=outputs_info)
        outputs = dict(zip(output_names, outputs))

        if not args.baseline and not args.use_population_statistics:
            # prepare population statistic estimation
            popstats = dict()
            alpha = 0.05
            for key, size in zip(
                    "abc",
                [4 * args.num_hidden, 4 * args.num_hidden, args.num_hidden]):
                for stat, init in zip("mean var".split(), [0, 1]):
                    name = "%s_%s" % (key, stat)
                    popstats[name] = theano.shared(init + np.zeros(
                        (
                            length,
                            size,
                        ), dtype=theano.config.floatX),
                                                   name=name)
                    popstats[name].tag.estimand = outputs[name]
                    updates[popstats[name]] = (alpha * outputs[name] +
                                               (1 - alpha) * popstats[name])

        return outputs, updates, dummy_states, popstats
Exemple #43
0
 def jacobian_det(self, x):
     y = tt.zeros(x.shape)
     return tt.sum(y, axis=-1)