def GetClassifier(self):

    def PairwiseLoss(x, mutation):
      """
      This function takes two matrix and return a vector by first
      calculating the loss for each row and then take element-wise
      maximum for each row.
      """
      return T.maximum(0., 1. - self.F(x) + self.F(mutation)).sum()

    inputs = T.tensor3(name='input', dtype='int32')
    mutations = T.tensor3(name='mutations', dtype='int32')

    components, updates = theano.scan(fn=PairwiseLoss,
                                      outputs_info=None,
                                      sequences=[inputs, mutations])
    loss = components.sum()

    gparams = [T.grad(loss, param) for param in self.params]

    updates = [(param, param - self.learning_rate * gparam)
               for param, gparam in zip(self.params, gparams)]

    return theano.function(inputs=[inputs, mutations],
                           outputs=loss,
                           updates=updates)
Beispiel #2
0
    def init_exprs(self):
        inpt = T.tensor3('inpt')
        if self.pooling is None:
            target = T.tensor3('target')
        else:
            target = T.matrix('tensor3')
        pars = self.parameters

        hidden_to_hiddens = [getattr(pars, 'hidden_to_hidden_%i' % i)
                             for i in range(len(self.n_hiddens) - 1)]
        hidden_biases = [getattr(pars, 'hidden_bias_%i' % i)
                         for i in range(len(self.n_hiddens))]
        recurrents = [getattr(pars, 'recurrent_%i' % i)
                      for i in range(len(self.n_hiddens))]
        ingate_peepholes = [getattr(pars, 'ingate_peephole_%i' % i)
                            for i in range(len(self.n_hiddens))]
        outgate_peepholes = [getattr(pars, 'outgate_peephole_%i' % i)
                             for i in range(len(self.n_hiddens))]
        forgetgate_peepholes = [getattr(pars, 'forgetgate_peephole_%i' % i)
                                for i in range(len(self.n_hiddens))]

        self.exprs = self.make_exprs(
            inpt, target,
            pars.in_to_hidden, hidden_to_hiddens, pars.hidden_to_out,
            hidden_biases, recurrents, pars.out_bias,
            ingate_peepholes, outgate_peepholes, forgetgate_peepholes,
            self.hidden_transfers, self.out_transfer, self.loss, self.pooling,
            self.leaky_coeffs)
Beispiel #3
0
    def _setup_vars(self, sparse_input):
        '''Setup Theano variables for our network.

        Parameters
        ----------
        sparse_input : bool
            Not used -- sparse inputs are not supported for recurrent networks.

        Returns
        -------
        vars : list of theano variables
            A list of the variables that this network requires as inputs.
        '''
        _warn_dimshuffle()

        assert not sparse_input, 'Theanets does not support sparse recurrent models!'

        # the first dimension indexes time, the second indexes the elements of
        # each minibatch, and the third indexes the variables in a given frame.
        self.x = TT.tensor3('x')

        # for a regressor, this specifies the correct outputs for a given input.
        self.targets = TT.tensor3('targets')

        # the weights are the same shape as the output and specify the strength
        # of each entries in the error computation.
        self.weights = TT.tensor3('weights')

        if self.weighted:
            return [self.x, self.targets, self.weights]
        return [self.x, self.targets]
Beispiel #4
0
    def __init__(self, rng, dim_proj, W=None, U=None, b=None):
        self._init_params(rng, dim_proj, W, U, b, 5)
        word_matrix = T.tensor3('Word matrix', dtype=config.floatX) 

        c_mask = T.matrix('Child mask', dtype=config.floatX)
        node_mask = T.matrix('Node mask', dtype=config.floatX)
        children = T.tensor3('Children', dtype='int64')
        
        self.X = word_matrix
        self.mask = node_mask
        self.c_mask = c_mask

        self.input = [word_matrix, children, c_mask, node_mask]
        n_samples = word_matrix.shape[1]

        self.h, self.c_memory = self.project(word_matrix, children, c_mask)
        all_samples = T.arange(n_samples)

        self.max_pooled_h = (self.h * node_mask[:, :, None]).max(axis=0) 
        self.sum_pooled_h = (self.h * node_mask[:, :, None]).sum(axis=0) 

        self.mean_pooled_h = self.sum_pooled_h /\
                T.maximum(c_mask.sum(axis=0)[:, None], 1)
        num_inner_nodes = c_mask.sum(axis=0).astype('int64')
        num_nodes = num_inner_nodes * 2 + 1
        self.top_h = self.h[num_nodes - 1, all_samples, :]
Beispiel #5
0
    def __init__(self, inpShape, outputNum, clip):
        num_units = 256
        # By setting the first two dimensions as None, we are allowing them to vary
        # They correspond to batch size and sequence length, so we will be able to
        # feed in batches of varying size with sequences of varying length.
        self.l_inp = InputLayer(inpShape)
        # We can retrieve symbolic references to the input variable's shape, which
        # we will later use in reshape layers.
        batchsize, seqlen, _ = self.l_inp.input_var.shape
        self.l_lstm = LSTMLayer(self.l_inp, num_units=num_units)
        # In order to connect a recurrent layer to a dense layer, we need to
        # flatten the first two dimensions (our "sample dimensions"); this will
        # cause each time step of each sequence to be processed independently
        l_shp = ReshapeLayer(self.l_lstm, (-1, num_units))
        self.l_dense = DenseLayer(l_shp, num_units=outputNum)
        # To reshape back to our original shape, we can use the symbolic shape
        # variables we retrieved above.
        self.l_out = ReshapeLayer(self.l_dense, (batchsize, seqlen, outputNum))

        net_output = lasagne.layers.get_output(self.l_out)
        truth = T.tensor3()
        mask = T.tensor3()
        loss = T.mean(mask*(net_output-truth)**2)

        params = lasagne.layers.get_all_params(self.l_out)
        grads = lasagne.updates.total_norm_constraint(T.grad(loss, params), clip)
        update = lasagne.updates.rmsprop(grads, params, 0.002)

        self.train = theano.function([self.l_inp.input_var, truth, mask], loss, updates=update)
        self.get_output = theano.function([self.l_inp.input_var], outputs=net_output)
def main():
    pars = "model/4GRAM_BI/76.69"
    print("Loading data...")
    (feats_in,feats_out) = iodata.iodata_forPre()
    feats_in = np.array(feats_in).astype(theano.config.floatX)
    print("{}".format((QUESTION_SIZE*(NGRAMS+1)*NUM_CHOICES,1,WORD_2_VEC_FEATURES)))
    feats_out = np.array(feats_out).astype(theano.config.floatX).reshape((QUESTION_SIZE*(NGRAMS+1)*NUM_CHOICES,1,WORD_2_VEC_FEATURES))
    #print(feats_out.shape)
    #print(feats_in)
    #print(lenfeats_out)
    output_layer = build_model(bi_directional = True)
    network.layers.set_all_param_values(output_layer, pickle.load(open(pars, "r")))
    x = T.tensor3('x', dtype=theano.config.floatX)
    y  =T.tensor3('y',dtype = theano.config.floatX)  
    cos_distance_ls = np.zeros((QUESTION_SIZE,NUM_CHOICES))
    predict = theano.function([x,y],calculate_cos_dis(output_layer.get_output(x,deterministic=True),y),on_unused_input='ignore')

    for index in range(QUESTION_SIZE):
        try:
            print(feats_in[(index)*NUM_CHOICES:(index+1)*NUM_CHOICES].shape)
            print(feats_out[(index)*NUM_CHOICES:(index+1)*NUM_CHOICES].shape)
            pred  = predict(feats_in[(index)*NUM_CHOICES:(index+1)*NUM_CHOICES],feats_out[(index)*NUM_CHOICES:(index+1)*NUM_CHOICES])
            print("OHOHOH")
        except RuntimeError:
            pass
        cos_distance_ls[index,:] = cos_distance_ls[index,:] + pred
Beispiel #7
0
    def init_model(self):
        print('Initializing model...')
        ra_input_var = T.tensor3('raw_audio_input')
        mc_input_var = T.tensor3('melody_contour_input')
        target_var = T.imatrix('targets')
        network = self.build_network(ra_input_var, mc_input_var)
        prediction = layers.get_output(network)
        prediction = T.clip(prediction, 1e-7, 1.0 - 1e-7)
        loss = lasagne.objectives.categorical_crossentropy(prediction, target_var)
        loss = loss.mean()
        params = layers.get_all_params(network, trainable=True)
        updates = lasagne.updates.sgd(loss, params, learning_rate=0.02)

        test_prediction = layers.get_output(network, deterministic=True)
        test_loss = lasagne.objectives.categorical_crossentropy(test_prediction,
                                                                target_var)
        test_loss = test_loss.mean()
        test_acc = T.mean(T.eq(T.argmax(test_prediction, axis=1), T.argmax(target_var, axis=1)),
                          dtype=theano.config.floatX)

        print('Building functions...')
        self.train_fn = theano.function([ra_input_var, mc_input_var, target_var], 
                                        [loss, prediction], 
                                        updates=updates, 
                                        on_unused_input='ignore')
        self.val_fn = theano.function([ra_input_var, mc_input_var, target_var], 
                                        [test_loss, test_acc, test_prediction], 
                                        on_unused_input='ignore')
        self.run_fn = theano.function([ra_input_var, mc_input_var],
                                        [prediction],
                                        on_unused_input='ignore')
def build_decoder(tparams, options):
    """
    build an encoder, given pre-computed word embeddings
    """

    # description string: #words x #samples
    # text: text sentence
    # hypothesis: hypothesis sentence
    text_embedding = tensor.tensor3('text_embedding', dtype='float32')
    # text = tensor.matrix('text', dtype='int64')
    text_mask = tensor.matrix('text_mask', dtype='float32')
    hypothesis_embedding = tensor.tensor3('hypothesis_embedding', dtype='float32')
    # hypothesis = tensor.matrix('hypothesis', dtype='int64')
    hypothesis_mask = tensor.matrix('hypothesis_mask', dtype='float32')

    # encoder
    proj = get_layer(options['encoder'])[1](tparams, text_embedding, None, options,
                                            prefix='encoder',
                                            mask=text_mask)
    ctx = proj[0][-1]
    dec_ctx = ctx

    # decoder (hypothesis)
    proj_hypo = get_layer(options['decoder'])[1](tparams, hypothesis_embedding, dec_ctx, options,
                                             prefix='decoder_f',
                                             mask=hypothesis_mask)

    hypo_ctx = proj_hypo[0][-1]

    return text_embedding, text_mask, hypothesis_embedding, hypothesis_mask, hypo_ctx
Beispiel #9
0
def build_model_EvoMN(options, tparams):
    trng = RandomStreams(SEED)
    use_noise = theano.shared(numpy_floatX(0.))
    use_linear = theano.shared(numpy_floatX(0.))
    
    x = tensor.tensor3('x', dtype='int64') # x is n_sent * n_word * n_samples
    xmask = tensor.tensor3('xmask', dtype=config.floatX) # same as x
    q = tensor.matrix('q', dtype='int64') # q is nword * n_samples
    qmask = tensor.matrix('qmask', dtype=config.floatX)
    y = tensor.vector('y',dtype='int64') # nsamples * 1
    nhops = tensor.scalar('nhops',dtype='int64') # nhops, used to loop.
    wmat = tensor.matrix('wmat',dtype=config.floatX) # dim_word * (maxSentLen+1)
    
    aEmbSeq, bEmbSeq, qSeq = memLayers(tparams, options, x, xmask, q, qmask, nhops, wmat, use_linear)
    proj = qSeq[-1] # nsamples * dim_hidden
    if options['use_dropout']:
        proj = dropout_layer(proj, use_noise, trng)
    
    pred = tensor.nnet.softmax(tensor.dot(proj, tparams['Wemb_B_' + str(options['nhops'])].T)) # nsamples * vocab_size
    pred_ans = pred.argmax(axis=1) # nsamples vector
    off = 1e-7
    cost = -tensor.log(pred[tensor.arange(y.shape[0]), y] + off).sum()
    
    f_debug = theano.function([x,xmask,q,qmask,y,nhops,wmat], [x,xmask,q,qmask,y,nhops,wmat,proj,pred,pred_ans,aEmbSeq,bEmbSeq,qSeq],name='f_debug')
    print 'f_debug complete~'
    f_pred = theano.function([x,xmask,q,qmask,nhops,wmat], pred, name='f_pred')
    print 'f_pred complete~'
    f_ans = theano.function([x,xmask,q,qmask,nhops,wmat], pred_ans, name='f_ans')
    print 'f_ans complete~'
    
    return use_noise, use_linear, x, xmask, q, qmask, y, nhops, wmat, proj, pred, pred_ans, cost, f_debug, f_pred, f_ans
Beispiel #10
0
def main():

    """
    a = tensor.tensor3('a')
    b = tensor.tensor3('b')
    c = tensor.tensor3('c')
    d = tensor.concatenate([a,b,c], axis=0)
    f = theano.function([a,b,c],d)
    aval = np.array([[[2,2,2,2]],[[2,2,2,2]], [[2,2,2,2]]])
    bval = 2*aval
    cval = 3*aval
    ans = f(a=aval, b=bval, c = None)
    print(ans)
    print(ans.shape)
    """

    a = tensor.tensor3("a")
    b = tensor.tensor3("b")
    c = tensor.tensor3("c")
    d = con(rep0=a, rep1=b, rep2=c)
    f = theano.function([a, b, c], d)
    aval = np.array([[[2, 2, 2, 2]], [[2, 2, 2, 2]], [[2, 2, 2, 2]]])
    tval = np.zeros(aval.shape)
    print tval
    print tval.shape
    bval = 2 * aval
    cval = 3 * aval
    ans = f(a=aval, b=bval, c=cval)
    print (ans)
    print (ans.shape)
    print (tensor.dim(ans))

    """
Beispiel #11
0
def build_model(args):
    x = tensor.tensor3('features', dtype=floatX)
    y = tensor.tensor3('targets', dtype=floatX)

    linear = Linear(input_dim=1, output_dim=4 * args.units)
    rnn = LSTM(dim=args.units, activation=Tanh())
    linear2 = Linear(input_dim=args.units, output_dim=1)

    prediction = Tanh().apply(linear2.apply(rnn.apply(linear.apply(x))))

    prediction = prediction[:-1, :, :]

    # SquaredError does not work on 3D tensor
    y = y.reshape((y.shape[0] * y.shape[1], y.shape[2]))
    prediction = prediction.reshape((prediction.shape[0] * prediction.shape[1],
                                     prediction.shape[2]))

    cost = SquaredError().apply(y, prediction)

    # Initialization
    linear.weights_init = IsotropicGaussian(0.1)
    linear2.weights_init = IsotropicGaussian(0.1)
    linear.biases_init = Constant(0)
    linear2.biases_init = Constant(0)
    rnn.weights_init = Orthogonal()

    return cost
Beispiel #12
0
 def step_fun(self):
     if self._step_fun is None:
         inputs = T.matrix('inputs')
         states_tm1 = [T.matrix('state_%d_%d_tm1' % (layer, state))
                       for layer in range(self.n_layers)
                       for state in range(self.gate0.n_states)]
         if self.gates[-1].use_attention:
             raise NotImplementedError('Stacked RNN with attention')
             attended=T.tensor3('attended')
             attended_dot_u=T.tensor3('attended_dot_u')
             attention_mask=T.matrix('attention_mask')
             self._step_fun = function(
                     [inputs] + states_tm1 + [
                         attended, attended_dot_u, attention_mask],
                     self.step(*([inputs, T.ones(inputs.shape[:-1])] +
                                 states_tm1 + [T.ones_like(states_tm1[0]),
                                 attended, attended_dot_u,
                                 attention_mask])),
                     name='%s_step_fun'%self.name)
         else:
             self._step_fun = function(
                     [inputs] + states_tm1,
                     self.step(*([inputs, T.ones(inputs.shape[:-1])] +
                               states_tm1 + [T.ones_like(states_tm1[0])])),
                     name='%s_step_fun'%self.name)
     return self._step_fun
Beispiel #13
0
    def mulclassfunc(self, layerid, wdecay):
        # mult-label loss
        x = []
        for j in range(self.emb_num):
            x.append(T.tensor3(dtype = 'int32'))
        y = T.tensor3(dtype = 'int8')
        label = T.matrix(dtype = 'int8')

        iin = []
        iin.extend(x)
        iin.append(y)
        iin.append(label)


        wikiloss = self.mulclassloss(layerid,x,y,label)

        loss = wikiloss + self.l2reg(self.unsuperw, wdecay)

        w = self.unsuperw
        witems = w.values()
        if not self.fix_emb:
            witems += self.dicw.values()

        g = T.grad(loss, witems)
        up = self.upda(g,witems,self.lrate, self.mweight,self.opt,self.fix_emb)

        mulclassfunc = theano.function(iin, loss, updates = up)
        return mulclassfunc
Beispiel #14
0
def initialize_data_nodes(loss_function, input_type, out_every_t):
    x = T.tensor3() if input_type == 'real' else T.matrix(dtype=INT_STR)
    if loss_function == 'CE':
        y = T.matrix(dtype=INT_STR) if out_every_t else T.vector(dtype=INT_STR)
    else:
        y = T.tensor3() if out_every_t else T.matrix()
    return x, y
Beispiel #15
0
    def test_reset_only_many_steps(self):
        x = tensor.tensor3('x')
        ri = tensor.tensor3('ri')
        mask = tensor.matrix('mask')
        h = self.reset_only.apply(x, reset_inputs=ri, mask=mask)
        calc_h = theano.function(inputs=[x, ri, mask], outputs=[h])

        x_val = 0.1 * numpy.asarray(list(itertools.permutations(range(4))),
                                    dtype=floatX)
        x_val = numpy.ones((24, 4, 3), dtype=floatX) * x_val[..., None]
        ri_val = 0.3 - x_val
        mask_val = numpy.ones((24, 4), dtype=floatX)
        mask_val[12:24, 3] = 0
        h_val = numpy.zeros((25, 4, 3), dtype=floatX)
        W = self.reset_only.state_to_state.get_value()
        U = self.reset_only.state_to_reset.get_value()

        for i in range(1, 25):
            r_val = numpy.tanh(h_val[i - 1].dot(U) + ri_val[i - 1])
            h_val[i] = numpy.tanh((r_val * h_val[i - 1]).dot(W) +
                                  x_val[i - 1])
            h_val[i] = (mask_val[i - 1, :, None] * h_val[i] +
                        (1 - mask_val[i - 1, :, None]) * h_val[i - 1])
        h_val = h_val[1:]
        # TODO Figure out why this tolerance needs to be so big
        assert_allclose(h_val, calc_h(x_val, ri_val,  mask_val)[0], 1e-03)
    def __init__(self,n_in,n_hidden,n_out):
        self.n_in=int(n_in)
        self.n_hidden=int(n_hidden)
        self.n_out=int(n_out)
        self.input= T.tensor3()
        self.output= T.tensor3()
        self.x_mask=T.matrix()
        #self.y_mask=T.matrix()

        
        self.W_z = glorot_normal((n_out,n_hidden))
        self.U_z = glorot_normal((n_hidden,n_hidden))
        self.b_z = zero((n_hidden,))

        self.W_r = glorot_normal((n_out,n_hidden))
        self.U_r = glorot_normal((n_hidden,n_hidden))
        self.b_r = zero((n_hidden,))

        self.W_h = glorot_normal((n_out,n_hidden)) 
        self.U_h = glorot_normal((n_hidden,n_hidden))
        self.b_h = zero((n_hidden,))
        
        self.U_att= glorot_normal((self.n_in,1)) 
        self.b_att= zero((1,))

        self.W_yc=glorot_normal((self.n_out,))
        

        self.W_cy = glorot_normal((self.n_in,self.n_hidden))
        self.W_cs= glorot_normal((self.n_in,self.n_hidden))

        
        self.W_ha = glorot_normal((self.n_in,self.n_in))
        self.W_sa= glorot_normal((self.n_hidden,self.n_in))
        

        
        self.W_cl= glorot_normal((self.n_in,self.n_out))
        self.W_yl= glorot_normal((self.n_out,self.n_out))
        self.W_hl= glorot_normal((self.n_hidden,self.n_out))
        
        self.params=[self.W_z,self.U_z,self.b_z,self.W_r,self.U_r,self.b_r,
                   self.W_h,self.U_h,self.b_h,self.W_cy,self.W_cs,self.W_ha,self.W_sa
                     ,self.W_cl,self.W_yl,self.W_hl,self.U_att,self.b_att]
        

        self.L1 = T.sum(abs(self.W_z))+T.sum(abs(self.U_z))+\
                  T.sum(abs(self.W_r))+T.sum(abs(self.U_r))+\
                  T.sum(abs(self.W_h))+T.sum(abs(self.U_h))+\
                  T.sum(abs(self.W_cy))+T.sum(abs(self.W_cs))+\
                  T.sum(abs(self.W_ha))+T.sum(abs(self.W_sa))+\
                  T.sum(abs(self.W_cl))+T.sum(abs(self.W_yl))+\
                  T.sum(abs(self.W_hl))+T.sum(abs(self.U_att))
        self.L2_sqr = T.sum(self.W_z**2) + T.sum(self.U_z**2)+\
                      T.sum(self.W_r**2) + T.sum(self.U_r**2)+\
                      T.sum(self.W_h**2) + T.sum(self.U_h**2)+\
                      T.sum(self.W_cy**2) + T.sum(self.W_cs**2)+\
                      T.sum(self.W_ha**2) + T.sum(self.W_sa**2)+\
                      T.sum(self.W_cl**2) + T.sum(self.W_yl**2)+\
                      T.sum(self.W_hl**2) + T.sum(self.U_att**2)
Beispiel #17
0
def test7():
    morph = T.tensor3("morph")
    morph_mask = T.tensor3("mask")
    rel = T.matrix("rel")
    morphStruct = MorphStruct()
    morph_out = morphStruct.apply(morph , morph_mask , rel)
    fn = theano.function(inputs = [morph , morph_mask , rel] ,outputs = [morph_out] , on_unused_input='ignore')
    #rel : batch * sentence
    #state_below_morph : batch * sentence * n_emb_morph
    i = [
            [
                [1,1,1,1,1] , [2,2,2,2,2] , [3,3,3,3,3] , [4,4,4,4,4] ,
                [1,1,1,1,1] , [2,2,2,2,2] , [3,3,3,3,3] , [4,4,4,4,4] ,
                [1,1,1,1,1] , [2,2,2,2,2] , [3,3,3,3,3] , [4,4,4,4,4]
            ],
            [
                [1,1,1,1,1] , [2,2,2,2,2] , [3,3,3,3,3] , [4,4,4,4,4] ,
                [1,1,1,1,1] , [2,2,2,2,2] , [3,3,3,3,3] , [4,4,4,4,4] ,
                [1,1,1,1,1] , [2,2,2,2,2] , [3,3,3,3,3] , [4,4,4,4,4]
            ],
            [
                [1,1,1,1,1] , [2,2,2,2,2] , [3,3,3,3,3] , [4,4,4,4,4] ,
                [1,1,1,1,1] , [2,2,2,2,2] , [3,3,3,3,3] , [4,4,4,4,4] ,
                [1,1,1,1,1] , [2,2,2,2,2] , [3,3,3,3,3] , [4,4,4,4,4]
            ]
        ]
    m = i
    r = [[1,1,2,1,3,1,3,0,0,0] , [1,2,1,2,1,1,1,1,1,1],[3,1,1,5,1,1,0,0,0,0]]
    #res = fn(i , m , r)
    mat = np.array(i)
    print mat.shape
    print mat.sum(2)
    print mat.sum(1).shape
def set_evaluation_function(generator_rnn_model,
                            generator_output_model):

    # input sequence data (time_length * num_samples * input_dims)
    input_sequence  = tensor.tensor3(name='input_sequence',
                                     dtype=floatX)
    target_sequence  = tensor.tensor3(name='target_sequence',
                                    dtype=floatX)
    # set generator input data list
    generator_input_data_list = [input_sequence,]

    # get generator output data
    generator_output = generator_rnn_model[0].forward(generator_input_data_list, is_training=True)
    generator_hidden = generator_output[0]
    generator_cell   = generator_output[1]

    generator_sample = get_tensor_output(generator_hidden, generator_output_model, is_training=True)

    # get square error
    square_error = tensor.sqr(target_sequence-generator_sample).sum(axis=2)

    # set evaluation inputs
    evaluation_inputs  = [input_sequence,
                          target_sequence]

    # set evaluation outputs
    evaluation_outputs = [square_error,]

    # set evaluation function
    evaluation_function = theano.function(inputs=evaluation_inputs,
                                          outputs=evaluation_outputs,
                                          on_unused_input='ignore')

    return evaluation_function
Beispiel #19
0
def generate_subpop_input(r_E, r_I, n_pairs):
    
    c = T.scalar("c", dtype='float32')
    h = T.matrix("h", dtype='float32')
    W_EE = T.tensor3("W_EE", dtype='float32')
    W_EI = T.tensor3("W_EI", dtype='float32')
    W_IE = T.tensor3("W_IE", dtype='float32')
    W_II = T.tensor3("W_II", dtype='float32')

    r_e = T.matrix("r_e", dtype='float32')
    r_i = T.matrix("r_i", dtype='float32')

    I_E = T.matrix('I_E', dtype='float32')
    I_I = T.matrix('I_I', dtype='float32')

    I_thresh_E = T.matrix('I_thresh_E', dtype='float32')
    I_thresh_I = T.matrix('I_thresh_I', dtype='float32')

    # Compile functions:
    I_E = c*h + T.sum(T.sum(W_EE*r_e,1),1).reshape((n_pairs, n_pairs)).T - T.sum(T.sum(W_EI*r_i,1),1).reshape((n_pairs, n_pairs)).T
    I_I = c*h + T.sum(T.sum(W_IE*r_e,1),1).reshape((n_pairs, n_pairs)).T - T.sum(T.sum(W_II*r_i,1),1).reshape((n_pairs, n_pairs)).T

    I_thresh_E = T.switch(T.lt(I_E,0), 0, I_E)
    I_thresh_I = T.switch(T.lt(I_I,0), 0, I_I)

    inputs = theano.function(inputs=[c,h,W_EE,W_EI,W_IE,W_II],
                                outputs=[I_thresh_E, I_thresh_I],
                                givens={r_e:r_E, r_i:r_I},
                                allow_input_downcast=True)
    return inputs
Beispiel #20
0
    def multaskfunc(self,layerid,wdecay, LMweight):
        #language model + multiple label classification
        # multi-label loss
        x = []
        for j in range(self.emb_num):
            x.append(T.tensor3(dtype = 'int32'))
        y = T.tensor3(dtype = 'int8')
        label = T.matrix(dtype = 'int8')
        nextwords = T.imatrix()

        iin = []
        iin.extend(x)
        iin.append(y)
        iin.append(label)
        iin.append(nextwords)

        mulloss, posLMloss, negLMloss = self.LMmulcloss(layerid,x,y,label,nextwords)

        loss = mulloss + LMweight*(posLMloss+negLMloss)+self.l2reg(self.unsuperw, wdecay)
        w = self.unsuperw
        witems = w.values()
        if not self.fix_emb:
            witems += self.dicw.values()

        g = T.grad(loss, witems)
        up = self.upda(g,witems,self.lrate, self.mweight,self.opt,self.fix_emb)

        mtaskfunc = theano.function(iin, loss, updates = up)
        return mtaskfunc
Beispiel #21
0
    def _get_net(self):
        net = OrderedDict()

        net['l_in_x'] = InputLayer(shape=(None, None, TOKEN_REPRESENTATION_SIZE),
                                   input_var=T.tensor3(name="enc_ix"),
                                   name="encoder_seq_ix")

        net['l_in_y'] = InputLayer(shape=(None, None, TOKEN_REPRESENTATION_SIZE),
                                   input_var=T.tensor3(name="dec_ix"),
                                   name="decoder_seq_ix")

        # encoder ###############################################
        net['l_enc'] = LSTMLayer(
            incoming=net['l_in_x'],
            num_units=HIDDEN_LAYER_DIMENSION,
            grad_clipping=GRAD_CLIP,
            only_return_final=True,
            name='lstm_encoder'
        )

        # decoder ###############################################

        net['l_dec'] = LSTMLayer(
            incoming=net['l_in_y'],
            num_units=HIDDEN_LAYER_DIMENSION,
            hid_init=net['l_enc'],
            grad_clipping=GRAD_CLIP,
            name='lstm_decoder'
        )

        # decoder returns the batch of sequences of though vectors, each corresponds to a decoded token
        # reshape this 3d tensor to 2d matrix so that the next Dense layer can convert each though vector to
        # probability distribution vector

        # output ###############################################
        # cut off the last prob vectors for every prob sequence:
        # they correspond to the tokens that go after EOS_TOKEN and we are not interested in it
        net['l_slice'] = SliceLayer(
            incoming=net['l_dec'],
            indices=slice(0, -1),  # keep all but the last token
            axis=1,  # sequneces axis
            name='slice_layer'
        )

        net['l_dec_long'] = ReshapeLayer(
            incoming=net['l_slice'],
            shape=(-1, HIDDEN_LAYER_DIMENSION),
            name='reshape_layer'
        )

        net['l_dist'] = DenseLayer(
            incoming=net['l_dec_long'],
            num_units=self.vocab_size,
            nonlinearity=lasagne.nonlinearities.softmax,
            name="dense_output_probas"
        )

        # don't need to reshape back, can compare this "long" output with true one-hot vectors

        return net
Beispiel #22
0
    def __init__(self, input_size, hidden_size, output_size):
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size

        x = tensor.tensor3('x', dtype=floatX)
        y = tensor.tensor3('y', dtype=floatX)

        x_to_lstm = Linear(name="x_to_lstm", input_dim=input_size, output_dim=4 * hidden_size,
                           weights_init=IsotropicGaussian(), biases_init=Constant(0))
        lstm = LSTM(dim=hidden_size, name="lstm", weights_init=IsotropicGaussian(), biases_init=Constant(0))
        lstm_to_output = Linear(name="lstm_to_output", input_dim=hidden_size, output_dim=output_size,
                                weights_init=IsotropicGaussian(), biases_init=Constant(0))

        x_transform = x_to_lstm.apply(x)
        h, c = lstm.apply(x_transform)

        y_hat = lstm_to_output.apply(h)
        y_hat = Logistic(name="y_hat").apply(y_hat)

        self.cost = BinaryCrossEntropy(name="cost").apply(y, y_hat)

        x_to_lstm.initialize()
        lstm.initialize()
        lstm_to_output.initialize()

        self.computation_graph = ComputationGraph(self.cost)
def set_tf_update_function(input_emb_param,
                           generator_rnn_model,
                           generator_output_model,
                           generator_optimizer,
                           generator_grad_clipping):

    # input sequence data (time_length * num_samples * input_dims)
    input_sequence  = tensor.tensor3(name='input_sequence',
                                     dtype=floatX)
    target_sequence  = tensor.tensor3(name='target_sequence',
                                     dtype=floatX)

    # embedding sequence
    input_emb_sequence  = tensor.dot(input_sequence, input_emb_param)
    target_emb_sequence = tensor.dot(target_sequence, input_emb_param)

    # set generator input data list
    generator_input_data_list = [input_emb_sequence,]

    # get generator output data
    generator_output = generator_rnn_model[0].forward(generator_input_data_list, is_training=True)
    generator_hidden = generator_output[0]
    generator_cell   = generator_output[1]

    generator_emb_sequence = get_tensor_output(generator_hidden, generator_output_model, is_training=True)
    generator_sequence     = tensor.dot(generator_emb_sequence, tensor.transpose(input_emb_param))

    # get square error
    square_error = tensor.sqr(target_sequence-generator_sequence).sum(axis=2)

    # set generator update
    tf_updates_cost = square_error.mean()
    tf_updates_dict = get_model_and_params_updates(layers=generator_rnn_model+generator_output_model,
                                                   params=[input_emb_param,],
                                                   cost=tf_updates_cost,
                                                   optimizer=generator_optimizer)

    generator_gradient_dict  = get_model_and_params_gradients(layers=generator_rnn_model+generator_output_model,
                                                              params=[input_emb_param,],
                                                              cost=tf_updates_cost)
    generator_gradient_norm  = 0.
    for grad in generator_gradient_dict:
        generator_gradient_norm += tensor.sum(grad**2)
    generator_gradient_norm  = tensor.sqrt(generator_gradient_norm)

    # set tf update inputs
    tf_updates_inputs  = [input_sequence,
                          target_sequence]

    # set tf update outputs
    tf_updates_outputs = [square_error,
                          generator_gradient_norm,]

    # set tf update function
    tf_updates_function = theano.function(inputs=tf_updates_inputs,
                                          outputs=tf_updates_outputs,
                                          updates=tf_updates_dict,
                                          on_unused_input='ignore')

    return tf_updates_function
    def __init__(self):
        print("Initialising network...")
        import theano
        import theano.tensor as T
        import lasagne
        from lasagne.layers import (InputLayer, LSTMLayer, ReshapeLayer, 
                                    ConcatLayer, DenseLayer)
        theano.config.compute_test_value = 'raise'

        # Construct LSTM RNN: One LSTM layer and one dense output layer
        l_in = InputLayer(shape=input_shape)

        # setup fwd and bck LSTM layer.
        l_fwd = LSTMLayer(
            l_in, N_HIDDEN, backwards=False, learn_init=True, peepholes=True)
        l_bck = LSTMLayer(
            l_in, N_HIDDEN, backwards=True, learn_init=True, peepholes=True)

        # concatenate forward and backward LSTM layers
        concat_shape = (N_SEQ_PER_BATCH * SEQ_LENGTH, N_HIDDEN)
        l_fwd_reshape = ReshapeLayer(l_fwd, concat_shape)
        l_bck_reshape = ReshapeLayer(l_bck, concat_shape)
        l_concat = ConcatLayer([l_fwd_reshape, l_bck_reshape], axis=1)

        l_recurrent_out = DenseLayer(l_concat, num_units=N_OUTPUTS, 
                                     nonlinearity=None)
        l_out = ReshapeLayer(l_recurrent_out, output_shape)

        input = T.tensor3('input')
        target_output = T.tensor3('target_output')

        # add test values
        input.tag.test_value = rand(
            *input_shape).astype(theano.config.floatX)
        target_output.tag.test_value = rand(
            *output_shape).astype(theano.config.floatX)

        print("Compiling Theano functions...")
        # Cost = mean squared error
        cost = T.mean((l_out.get_output(input) - target_output)**2)

        # Use NAG for training
        all_params = lasagne.layers.get_all_params(l_out)
        updates = lasagne.updates.nesterov_momentum(cost, all_params, LEARNING_RATE)

        # Theano functions for training, getting output, and computing cost
        self.train = theano.function(
            [input, target_output],
            cost, updates=updates, on_unused_input='warn',
            allow_input_downcast=True)

        self.y_pred = theano.function(
            [input], l_out.get_output(input), on_unused_input='warn',
            allow_input_downcast=True)

        self.compute_cost = theano.function(
            [input, target_output], cost, on_unused_input='warn',
            allow_input_downcast=True)

        print("Done initialising network.")
Beispiel #25
0
    def init_exprs(self):
        inpt_mean = T.tensor3('inpt_mean')
        inpt_var = T.tensor3('inpt_var')
        target = T.tensor3('target')
        pars = self.parameters

        hidden_to_hiddens = [getattr(pars, 'hidden_to_hidden_%i' % i)
                             for i in range(len(self.n_hiddens) - 1)]
        hidden_biases = [getattr(pars, 'hidden_bias_%i' % i)
                         for i in range(len(self.n_hiddens))]
        hidden_var_biases_sqrt = [1 if i else 0 for i in self.use_varprop_at]
        recurrents = [getattr(pars, 'recurrent_%i' % i)
                      for i in range(len(self.n_hiddens))]
        initial_hiddens = [getattr(pars, 'initial_hidden_%i' % i)
                           for i in range(len(self.n_hiddens))]

        self.exprs = self.make_exprs(
            inpt_mean, inpt_var, target,
            pars.in_to_hidden, hidden_to_hiddens, pars.hidden_to_out,
            hidden_biases, hidden_var_biases_sqrt,
            initial_hiddens, recurrents, pars.out_bias,
            self.hidden_transfers, self.out_transfer, self.loss,
            self.pooling, self.leaky_coeffs,
            [self.p_dropout_inpt] + [self.p_dropout_hidden] * len(recurrents),
            self.hotk_inpt)
Beispiel #26
0
def set_evaluation_function(generator_model):
    # input sequence data (time_length * num_samples * input_dims)
    input_sequence  = tensor.tensor3(name='input_sequence',
                                     dtype=floatX)
    target_sequence  = tensor.tensor3(name='target_sequence',
                                    dtype=floatX)
    # set generator input data list
    generator_input_data_list = [input_sequence,]

    # get generator output data
    generator_output = generator_model[0].forward(generator_input_data_list,
                                                  is_training=True)
    output_sequence  = generator_output[0]
    generator_random = generator_output[-1]

    # get square error
    sample_cost = tensor.sqr(target_sequence-output_sequence).sum(axis=2)

    # set evaluation inputs
    evaluation_inputs  = [input_sequence,
                          target_sequence]

    # set evaluation outputs
    evaluation_outputs = [sample_cost,
                          output_sequence]

    # set evaluation function
    evaluation_function = theano.function(inputs=evaluation_inputs,
                                          outputs=evaluation_outputs,
                                          updates=generator_random,
                                          on_unused_input='ignore')

    return evaluation_function
def build_model(tparams, options):

    opt_ret = dict()

    trng = RandomStreams(1234)
    p = 0.5
    retain_prob = 1. - p
    print('dropout: {0}'.format(p))

    # description string: #words x #samples
    # text: text sentence
    # hypothesis: hypothesis sentence
    text_embedding = tensor.tensor3('text_embedding', dtype='float32')
    # text = tensor.matrix('text', dtype='int64')
    text_mask = tensor.matrix('text_mask', dtype='float32')
    hypothesis_embedding = tensor.tensor3('hypothesis_embedding', dtype='float32')
    # hypothesis = tensor.matrix('hypothesis', dtype='int64')
    hypothesis_mask = tensor.matrix('hypothesis_mask', dtype='float32')

    label = tensor.vector('label', dtype='int64')

    # encoder
    proj = get_layer(options['encoder'])[1](tparams, text_embedding, None, options,
                                            prefix='encoder',
                                            mask=text_mask)
    ctx = proj[0][-1]
    dec_ctx = ctx
    # dropout
    dec_ctx_dropped = dec_ctx
    dec_ctx_dropped *= trng.binomial(dec_ctx_dropped.shape, p=retain_prob, dtype=dec_ctx_dropped.dtype)
    dec_ctx_dropped /= retain_prob

    # decoder (hypothesis)
    proj_hypo = get_layer(options['decoder'])[1](tparams, hypothesis_embedding, dec_ctx, options,
                                             prefix='h_decode_t',
                                             mask=hypothesis_mask)
    proj_hypo_dropped = get_layer(options['decoder'])[1](tparams, hypothesis_embedding, dec_ctx_dropped, options,
                                             prefix='h_decode_t',
                                             mask=hypothesis_mask)
    hypo_ctx = proj_hypo[0][-1]
    hypo_ctx_dropped = proj_hypo_dropped[0][-1]
    # dropout
    hypo_ctx_dropped *= trng.binomial(hypo_ctx_dropped.shape, p=retain_prob, dtype=hypo_ctx_dropped.dtype)
    hypo_ctx_dropped /= retain_prob


    # cost (cross entropy)

    logit = get_layer('ff')[1](tparams, hypo_ctx, options, prefix='ff_logit', activ='tensor.nnet.sigmoid')
    logit_dropped = get_layer('ff')[1](tparams, hypo_ctx_dropped, options, prefix='ff_logit', activ='tensor.nnet.sigmoid')

    # flatten logit
    logit = logit.flatten()
    logit_dropped = logit_dropped.flatten()
    cost = binary_crossentropy(logit_dropped, label)
    cost = tensor.mean(cost)
    acc = tensor.mean(tensor.eq(tensor.round(logit), label))

    return text_embedding, text_mask, hypothesis_embedding, hypothesis_mask, label, cost, acc
Beispiel #28
0
 def __init_symb(self):
     """
     Initialize the symbolic variables of the model (e.g. input and output)
     :return:
     """
     self.input = TT.tensor3('input')
     self.target_output = TT.tensor3('target_output')
     self.mask = TT.matrix("mask")
    def build(self):
        x_range=T.tensor4()
        x_label=T.tensor3()
        x_action=T.tensor3()
        x_reward=T.vector()
        x_memory=T.tensor4()

        self.x_range_shared=theano.shared(np.zeros((self.batch_size,self.path_length,self.x_dim[0],self.x_dim[1]),dtype=theano.config.floatX),borrow=True)
        self.x_range_label=theano.shared(np.zeros((self.batch_size,self.path_length,self.n_classes),dtype=theano.config.floatX),borrow=True)
        self.x_range_action=theano.shared(np.zeros((self.batch_size,self.path_length,self.n_classes),dtype=theano.config.floatX),borrow=True)
        self.x_range_reward=theano.shared(np.zeros(self.batch_size,dtype=theano.config.floatX),borrow=True)
        self.x_range_memory=theano.shared(np.zeros((self.batch_size,self.path_length,self.n_classes,self.h_dim),dtype=theano.config.floatX),borrow=True)

        '''前期的框架模型,主要是得到x到h的映射,以及memory的构建'''
        D1, D2, D3 = lasagne.init.Normal(std=self.std,mean=0), lasagne.init.Normal(std=self.std,mean=0), lasagne.init.Normal(std=self.std,mean=0)
        # D1, D2, D3 = lasagne.init.Uniform(-1,1), lasagne.init.Uniform(-1,1), lasagne.init.Uniform(-1,1)

        l_range_in = lasagne.layers.InputLayer(shape=(self.batch_size,self.path_length,self.x_dim[0],self.x_dim[1]))
        # l_range_flatten = lasagne.layers.ReshapeLayer(l_range_in, [self.batch_size * self.path_length, 1, self.x_dim[0],self.x_dim[1]])
        # l_range_dense2 = lasagne.layers.DenseLayer(l_range_flatten,self.tmp_h_dim,W=D1,nonlinearity=lasagne.nonlinearities.rectify) #[bs*path_length,dimension]
        # l_range_dense2 = lasagne.layers.DenseLayer(l_range_dense2,self.tmp_h_dim,W=D1,nonlinearity=lasagne.nonlinearities.rectify) #[bs*path_length,dimension]
        #
        l_range_label = lasagne.layers.InputLayer(shape=(self.batch_size,self.path_length,self.n_classes))

        l_range_hidden=lasagne.layers.ReshapeLayer(l_range_in,[self.batch_size*self.path_length,1,self.tmp_h_dim])
        l_range_dense2_origin=lasagne.layers.ReshapeLayer(l_range_in,[self.batch_size,self.path_length,self.tmp_h_dim])


        '''Policy Gradient Methods的模型,主要是从Memory状态得到action的概率'''
        l_range_memory_in = lasagne.layers.InputLayer(shape=(self.batch_size,self.path_length,self.n_classes,self.h_dim))
        l_range_memory = lasagne.layers.ReshapeLayer(l_range_memory_in,[self.batch_size*self.path_length,self.n_classes,self.h_dim])
        if 1:
            l_range_status=ChoiceLayer((l_range_memory,l_range_hidden),D3,D3,D3,nonlinearity=lasagne.nonlinearities.tanh) #[bs*pl,(n_class+1),dim]
            l_range_mu = lasagne.layers.ReshapeLayer(l_range_status,[self.batch_size,self.path_length,self.n_classes])

        '''模型的总体参数和更新策略等'''
        hidden = lasagne.layers.helper.get_output(l_range_dense2_origin, {l_range_in: x_range,l_range_label:x_label})
        probas_range = lasagne.layers.helper.get_output(l_range_mu, {l_range_in: x_range,l_range_memory_in:x_memory,l_range_label:x_label})
        params=lasagne.layers.helper.get_all_params(l_range_mu,trainable=True)
        params=[]#相当于只更新最后一个参数,别的不参与更新了
        givens = {
            x_range: self.x_range_shared,
            x_label:self.x_range_label,
            x_action: self.x_range_action,
            x_reward: self.x_range_reward,
            x_memory: self.x_range_memory
        }
        cost=-T.mean(T.sum(T.sum(T.log(probas_range)*x_action,axis=2),axis=1)*x_reward)
        grads=T.grad(cost,params)
        scaled_grads = lasagne.updates.total_norm_constraint(grads, self.max_norm)
        updates = self.update_method(scaled_grads, params, learning_rate=self.lr)

        self.output_model_range = theano.function([],[probas_range,cost,hidden],givens=givens,on_unused_input='ignore',allow_input_downcast=True)
        self.output_model_range_updates = theano.function([],[probas_range,cost,hidden],updates=updates,givens=givens,on_unused_input='ignore',allow_input_downcast=True)
        self.output_hidden = theano.function([x_range,x_label],[hidden[:,0]],on_unused_input='ignore',allow_input_downcast=True)


        self.network=l_range_mu
Beispiel #30
0
    def __init__(self, n_in, n_hid, n_out, lr=0.05, batch_size=64, single_output=True, output_activation=T.nnet.softmax, cost_function='nll'):   
        self.n_in = n_in
        self.n_hid = n_hid
        self.n_out = n_out
        self.W_in = init_weight((self.n_in, self.n_hid),'W_in')
        self.W_out = init_weight((self.n_hid, self.n_out),'W_out')
        self.W_rec = init_weight((self.n_hid, self.n_hid),'W_rec', 'svd')
        self.b_hid = shared(np.zeros(shape = n_hid, dtype=dtype))
        self.b_out = shared(np.zeros(shape = n_out, dtype=dtype))

        self.params = [self.W_in,self.W_out,self.W_rec,self.b_out,self.b_hid]

        self.activation = output_activation

        def step(x_t, h_tm1):
            h_t = T.tanh(T.dot(x_t, self.W_in) + T.dot(h_tm1, self.W_rec) + self.b_hid)
            y_t = T.nnet.softmax(T.dot(h_t, self.W_out) + self.b_out)
            return [h_t, y_t]

        X = T.tensor3() # batch of sequence of vector
        Y = T.tensor3() # batch of sequence of vector (should be 0 when X is not null) 
        if single_output:
            Y = T.matrix() 
        else:
            Y = T.tensor3()
        h0 = shared(np.zeros(shape=(batch_size,self.n_hid), dtype=dtype)) # initial hidden state                 
        lr = shared(np.cast[dtype](lr))
        
        [h_vals, y_vals], _ = theano.scan(fn=step,        
                                          sequences=X.dimshuffle(1,0,2),
                                          outputs_info=[h0, None])

        if single_output:
            self.output = y_vals[-1]            
        else:
            self.output = y_vals.dimshuffle(1,0,2)
        
        cxe = T.mean(T.nnet.binary_crossentropy(self.output, Y))
        nll = -T.mean(Y * T.log(self.output)+ (1.- Y) * T.log(1. - self.output))     
        mse = T.mean((self.output - Y) ** 2)

        cost = 0
        if cost_function == 'mse':
            cost = mse
        elif cost_function == 'cxe':
            cost = cxe
        else:
            cost = nll        

        gparams = T.grad(cost, self.params)
        updates = OrderedDict()
        for param, gparam in zip(self.params, gparams):
            updates[param] = param - gparam * lr
        
        self.loss = theano.function(inputs = [X, Y], outputs = cost)
        self.train = theano.function(inputs = [X, Y], outputs = cost, updates=updates)
        self.predictions = theano.function(inputs = [X], outputs = self.output)
        self.debug = theano.function(inputs = [X, Y], outputs = [X.shape, Y.shape, y_vals.shape, self.output.shape])
Beispiel #31
0
    def print_layer(self):
        v = '--------------------\n'
        v += 'Read Layer ' + self.name + '\n'
        v += 'Input Shape: ' + str((self.width, self.height)) + '\n'
        return v + 'Output Shape: ' + str((self.N, self.N)) + '\n'

if __name__ == '__main__':
    # testing
    theano.config.optimizer = 'fast_compile'
    attn = TemporalAttentionLayer(batch_size=10,
                                  N=5,
                                  channels=6,
                                  use_gpu=False)

    time_mask = T.imatrix('time_mask')
    features = T.tensor3('features')

    res, (g, s2, d) = attn.run(features, time_mask)

    f = theano.function([features, time_mask], [res, g, s2, d],
                        on_unused_input='warn')

    fts = np.random.random((10, 6, 12))
    tm = np.ones((10, 12))
    tm[0, 6:] = 0
    tm[1, 4:] = 0
    tm[2, 2:] = 0
    tm[3, 8:] = 0
    tm[4, 9:] = 0
    tm[5, 1:] = 0
    tm[6, 3:] = 0
Beispiel #32
0
 def __init__(self, mask_value=0.):
     super(Masking, self).__init__()
     self.mask_value = mask_value
     self.input = T.tensor3()
Beispiel #33
0
def compile_theano_func_build_G_mtx():
    tau_inter_x, tau_inter_y = TT.scalar('tau_inter_x'), TT.scalar(
        'tau_inter_y')
    M, N = TT.scalar('M'), TT.scalar('N')
    m_grid, n_grid = TT.vector('m_grid'), TT.vector('n_grid')
    cross_beamShape_r, cross_beamShape_i = \
        TT.tensor3('cross_beamShape_r'), TT.tensor3('cross_beamShape_i')
    baseline_x, baseline_y = TT.tensor3('baseline_x'), TT.tensor3('baseline_y')
    pi = TT.constant(np.pi)

    def theano_periodic_sinc(in_sig, bandwidth):
        eps = TT.constant(1e-10)
        denominator = TT.mul(TT.sin(TT.true_div(in_sig, bandwidth)), bandwidth)
        idx_modi = TT.lt(TT.abs_(denominator), eps)
        numerator = TT.switch(idx_modi, TT.cos(in_sig), TT.sin(in_sig))
        denominator = TT.switch(idx_modi,
                                TT.cos(TT.true_div(in_sig,
                                                   bandwidth)), denominator)
        return TT.true_div(numerator, denominator)

    # def theano_periodic_sinc(in_sig, bandwidth):
    #     eps = TT.constant(1e-10)
    #     numerator = TT.sin(in_sig)
    #     denominator = TT.mul(TT.sin(TT.true_div(in_sig, bandwidth)), bandwidth)
    #     out0 = TT.true_div(numerator, denominator)
    #     out1 = TT.true_div(TT.cos(in_sig), TT.cos(TT.true_div(in_sig, bandwidth)))
    #     idx_modi = TT.lt(TT.abs_(denominator), eps)
    #     out = TT.switch(idx_modi, out1, out0)
    #     return out

    # define the function
    def f_inner(cross_beamShape_r, cross_beamShape_i, baseline_x, baseline_y,
                tau_inter_x, tau_inter_y, m_grid, n_grid, M, N):
        periodic_sinc_2d = \
            TT.mul(
                theano_periodic_sinc(
                    0.5 * (TT.shape_padright(tau_inter_x * baseline_x, n_ones=1) -
                           2 * pi * TT.shape_padleft(m_grid, n_ones=2)),
                    M * tau_inter_x
                ),
                theano_periodic_sinc(
                    0.5 * (TT.shape_padright(tau_inter_y * baseline_y, n_ones=1) -
                           2 * pi * TT.shape_padleft(n_grid, n_ones=2)),
                    N * tau_inter_y
                )
            )
        G_mtx_r = TT.tensordot(cross_beamShape_r,
                               periodic_sinc_2d,
                               axes=[[0, 1], [0, 1]])
        G_mtx_i = TT.tensordot(cross_beamShape_i,
                               periodic_sinc_2d,
                               axes=[[0, 1], [0, 1]])

        return G_mtx_r, G_mtx_i

    G_mtx_r, G_mtx_i = theano.map(fn=f_inner,
                                  sequences=(cross_beamShape_r,
                                             cross_beamShape_i, baseline_x,
                                             baseline_y),
                                  non_sequences=(tau_inter_x, tau_inter_y,
                                                 m_grid, n_grid, M, N))[0]

    # compile the function
    func = theano.function([
        tau_inter_x, tau_inter_y, M, N, m_grid, n_grid, baseline_x, baseline_y,
        cross_beamShape_r, cross_beamShape_i
    ], [G_mtx_r, G_mtx_i],
                           allow_input_downcast=True)
    return func
Beispiel #34
0
    def setup(self, model, dataset):

        if self.cost is None:
            self.cost = model.get_default_cost()

        inf_params = [
            param for param in model.get_params()
            if np.any(np.isinf(param.get_value()))
        ]
        if len(inf_params) > 0:
            raise ValueError("These params are Inf: " + str(inf_params))
        if any([
                np.any(np.isnan(param.get_value()))
                for param in model.get_params()
        ]):
            nan_params = [
                param for param in model.get_params()
                if np.any(np.isnan(param.get_value()))
            ]
            raise ValueError("These params are NaN: " + str(nan_params))
        self.model = model

        batch_size = self.batch_size
        if hasattr(model, "force_batch_size"):
            if model.force_batch_size > 0:
                if batch_size is not None:
                    if batch_size != model.force_batch_size:
                        if self.set_batch_size:
                            model.set_batch_size(batch_size)
                        else:
                            raise ValueError(
                                "batch_size argument to SGD conflicts with model's force_batch_size attribute"
                            )
                else:
                    self.batch_size = model.force_batch_size
        model._test_batch_size = self.batch_size
        self.monitor = Monitor.get_monitor(model)
        # TODO: come up with some standard scheme for associating training runs
        # with monitors / pushing the monitor automatically, instead of just
        # enforcing that people have called push_monitor
        assert self.monitor.get_examples_seen() == 0
        self.monitor._sanity_check()

        X = model.get_input_space().make_theano_batch(name="%s[X]" %
                                                      self.__class__.__name__)
        self.topo = not X.ndim == 2

        if config.compute_test_value == 'raise':
            if self.topo:
                X.tag.test_value = dataset.get_batch_topo(self.batch_size)
            else:
                X.tag.test_value = dataset.get_batch_design(self.batch_size)

        Y = T.tensor3(name="%s[Y]" % self.__class__.__name__)

        if self.cost.supervised:
            if config.compute_test_value == 'raise':
                _, Y.tag.test_value = dataset.get_batch_design(
                    self.batch_size, True)

            self.supervised = True
            cost_value = self.cost(model, X, Y)

        else:
            self.supervised = False
            cost_value = self.cost(model, X)
        if cost_value is not None and cost_value.name is None:
            if self.supervised:
                cost_value.name = 'objective(' + X.name + ', ' + Y.name + ')'
            else:
                cost_value.name = 'objective(' + X.name + ')'

        # Set up monitor to model the objective value, learning rate,
        # momentum (if applicable), and extra channels defined by
        # the cost
        learning_rate = self.learning_rate
        if self.monitoring_dataset is not None:
            self.monitor.setup(dataset=self.monitoring_dataset,
                               cost=self.cost,
                               batch_size=self.batch_size,
                               num_batches=self.monitoring_batches,
                               extra_costs=self.monitoring_costs)
            if self.supervised:
                ipt = (X, Y)
            else:
                ipt = X

            dataset_name = self.monitoring_dataset.keys()[0]
            monitoring_dataset = self.monitoring_dataset[dataset_name]
            #TODO: have Monitor support non-data-dependent channels
            self.monitor.add_channel(name='learning_rate',
                                     ipt=ipt,
                                     val=learning_rate,
                                     dataset=monitoring_dataset)
            if self.momentum:
                self.monitor.add_channel(name='momentum',
                                         ipt=ipt,
                                         val=self.momentum,
                                         dataset=monitoring_dataset)
            '''
            Ypred = model.fprop(X)
            Y_ = (T.arange(0,96).dimshuffle('x','x',0)*Ypred).sum(axis = 2)
            y = monitoring_dataset.y
            the_y = T.matrix('targetsss')
            mse = Print('MSE')(T.mean(T.square(Y_-the_y)))
            funct = function(inputs=[X], outputs=mse)
            real_funct = function(inputs=[X,the_y], outputs=funct(), givens=[y=monitoring_dataset.y])
            self.monitor.add_channel(name='MSE', ipt=(y, X), val = 2, dataset=monitoring_dataset, prereqs=(funct))
            '''

        params = list(model.get_params())
        assert len(params) > 0
        for i, param in enumerate(params):
            if param.name is None:
                param.name = 'sgd_params[%d]' % i

        if self.cost.supervised:
            grads, updates = self.cost.get_gradients(model, X, Y)
        else:
            grads, updates = self.cost.get_gradients(model, X)

        for param in grads:
            assert param in params
        for param in params:
            assert param in grads

        for param in grads:
            if grads[param].name is None and cost_value is not None:
                grads[param].name = ('grad(%(costname)s, %(paramname)s)' % {
                    'costname': cost_value.name,
                    'paramname': param.name
                })

        lr_scalers = model.get_lr_scalers()

        for key in lr_scalers:
            if key not in params:
                raise ValueError("Tried to scale the learning rate on " +\
                        str(key)+" which is not an optimization parameter.")

        log.info('Parameter and initial learning rate summary:')
        for param in params:
            param_name = param.name
            if param_name is None:
                param_name = 'anon_param'
            lr = learning_rate.get_value() * lr_scalers.get(param, 1.)
            log.info('\t' + param_name + ': ' + str(lr))

        if self.momentum is None:
            updates.update( dict(safe_zip(params, [param - learning_rate * \
                lr_scalers.get(param, 1.) * grads[param]
                                    for param in params])))
        else:
            for param in params:
                inc = sharedX(param.get_value() * 0.)
                if param.name is not None:
                    inc.name = 'inc_' + param.name
                updated_inc = self.momentum * inc - learning_rate * lr_scalers.get(
                    param, 1.) * grads[param]
                updates[inc] = updated_inc
                updates[param] = param + updated_inc

        for param in params:
            if updates[param].name is None:
                updates[param].name = 'sgd_update(' + param.name + ')'
        model.censor_updates(updates)
        for param in params:
            update = updates[param]
            if update.name is None:
                update.name = 'censor(sgd_update(' + param.name + '))'
            for update_val in get_debug_values(update):
                if np.any(np.isinf(update_val)):
                    raise ValueError("debug value of %s contains infs" %
                                     update.name)
                if np.any(np.isnan(update_val)):
                    raise ValueError("debug value of %s contains nans" %
                                     update.name)

        with log_timing(log, 'Compiling sgd_update'):
            if self.supervised:
                fn_inputs = [X, Y]
            else:
                fn_inputs = [X]
            self.sgd_update = function(fn_inputs,
                                       updates=updates,
                                       name='sgd_update',
                                       on_unused_input='ignore',
                                       mode=self.theano_function_mode)
        self.params = params
Beispiel #35
0
    #Get in pro_data.mask_train_input()
    # Random shuffle.
    indices = np.arange(len(training_word_pos_vec3D))
    np.random.shuffle(indices)
    training_word_pos_vec3D = training_word_pos_vec3D[indices]
    training_sen_length = training_sen_length[indices]
    training_label = training_label[indices]
    #    training_label_1hot=training_label_1hot[indices]
    train_left_sdp_length = train_left_sdp_length[indices]
    """
    new model
    """
    model = Network()

    # Prepare Theano variables for inputs and targets
    input_var = T.tensor3('inputs')
    target_var = T.ivector('targets')
    mask_var = T.imatrix('mask_layer')
    #Pi model variables:
    if model.network_type == "pi":
        input_b_var = T.tensor3('inputs_b')
        mask_train = T.vector('mask_train')
        unsup_weight_var = T.scalar('unsup_weight')
    elif model.network_type == "tempens":
        #tempens model variables:
        z_target_var = T.matrix('z_targets')
        mask_train = T.vector('mask_train')
        unsup_weight_var = T.scalar('unsup_weight')

    learning_rate_var = T.scalar('learning_rate')
    adam_beta1_var = T.scalar('adam_beta1')
Beispiel #36
0
    def _get_compiled_forward_backward_theano_func(self):
        """Returns a compiled theano function that perform forward-backward and either updates log posterior
        probabilities or returns it.

        Note:
            The returned theano function takes 6 inputs:

                num_states (integer scalar),
                temperature (float scalar),
                log_prior_c (float vector),
                og_trans_tcc (float tensor3),
                log_emission_tc (float matrix)
                prev_log_posterior_tc (float matrix)


            If a `log_posterior_output` shared tensor is given to the class initializer,
            the return tuple will be:

                update_norm_t, log_data_likelihood,
                (+ alpha_tc, beta_tc if self.include_alpha_beta_output == True)

            and the posterior will be directly written to `self.log_posterior_output`. Otherwise,
            return tuple will be:

                admixed_log_posterior_tc, update_norm_t, log_data_likelihood,
                (+ alpha_tc, beta_tc if self.include_alpha_beta_output == True)

        Returns:
            A compiled theano function
        """
        num_states = tt.iscalar('num_states')
        temperature = tt.scalar('temperature')
        log_prior_c = tt.vector('log_prior_c')
        log_trans_tcc = tt.tensor3('log_trans_tcc')
        log_emission_tc = tt.matrix('log_emission_tc')
        prev_log_posterior_tc = tt.matrix('prev_log_posterior_tc')

        new_log_posterior_tc, log_data_likelihood_t, alpha_tc, beta_tc = self._get_symbolic_log_posterior(
            num_states, temperature, log_prior_c, log_trans_tcc,
            log_emission_tc, self.resolve_nans)

        admixed_log_posterior_tc = commons.safe_logaddexp(
            new_log_posterior_tc + np.log(self.admixing_rate),
            prev_log_posterior_tc + np.log(1.0 - self.admixing_rate))

        log_data_likelihood = log_data_likelihood_t[
            -1]  # in theory, they are all the same
        update_norm_t = commons.get_jensen_shannon_divergence(
            admixed_log_posterior_tc, prev_log_posterior_tc)

        ext_output = [alpha_tc, beta_tc
                      ] if self.include_alpha_beta_output else []
        inputs = [
            num_states, temperature, log_prior_c, log_trans_tcc,
            log_emission_tc, prev_log_posterior_tc
        ]
        if self.log_posterior_output is not None:
            return th.function(
                inputs=inputs,
                outputs=[update_norm_t, log_data_likelihood] + ext_output,
                updates=[(self.log_posterior_output, admixed_log_posterior_tc)
                         ])
        else:
            return th.function(inputs=inputs,
                               outputs=[
                                   admixed_log_posterior_tc, update_norm_t,
                                   log_data_likelihood
                               ] + ext_output)
Beispiel #37
0
def retrain(trainX,
            trainY,
            testX,
            testY,
            theta_mat,
            lambda_mat,
            learning_rate=5e-4,
            rate_decay=1.0,
            init_scale=0.2,
            scale_decay=0.998,
            momentum=0.0,
            minibatch_size=64,
            num_epochs=70,
            rng_seed=2017,
            model_path=None,
            model_to_save=None):

    if rng_seed is not None:
        print("Setting RandomState with seed=%i" % (rng_seed))
        rng = np.random.RandomState(rng_seed)
        set_rng(rng)

    index = T.lscalar()  # Minibatch index
    x = T.tensor3('x')  # Inputs
    y = T.fmatrix('y')  # Target

    #define and initialize RNN network
    network_0 = build_rnn_net(input_var=x,
                              input_width=time_step,
                              input_dim=feature_dim,
                              nin_units=12,
                              h_num_units=[16, 16],
                              h_grad_clip=5.0,
                              output_width=time_step)
    if not os.path.isfile(model_path):
        print("Model file does not exist!")
        return None
    init_model = np.load(model_path)
    init_params = init_model[init_model.files[0]]
    LL.set_all_param_values([network_0], init_params)

    train_set_y = theano.shared(np.zeros((1, time_step),
                                         dtype=theano.config.floatX),
                                borrow=True)
    train_set_x = theano.shared(np.zeros((1, time_step, feature_dim),
                                         dtype=theano.config.floatX),
                                borrow=True)

    valid_set_y = theano.shared(np.zeros((1, time_step),
                                         dtype=theano.config.floatX),
                                borrow=True)
    valid_set_x = theano.shared(np.zeros((1, time_step, feature_dim),
                                         dtype=theano.config.floatX),
                                borrow=True)
    test_set_x = theano.shared(np.zeros((1, time_step, feature_dim),
                                        dtype=theano.config.floatX),
                               borrow=True)

    theta = theano.shared(
        np.zeros((time_step, time_step), dtype=theano.config.floatX))
    lamda = theano.shared(
        np.zeros((time_step, time_step), dtype=theano.config.floatX))

    out_x = LL.BatchNormLayer(network_0)

    #define updates
    params = LL.get_all_params(out_x, trainable=True)
    r = lasagne.regularization.regularize_network_params(out_x, l2)
    semi_x = LL.get_output(out_x, deterministic=True)

    #define SGCRF in theano expressions
    S_yy = T.dot(y.T, y) / minibatch_size
    S_yx = T.dot(y.T, semi_x) / minibatch_size
    S_xx = T.dot(semi_x.T, semi_x) / minibatch_size

    ilamda = T.nlinalg.matrix_inverse(lamda)
    t1 = T.dot(S_yy, lamda)
    t2 = 2 * T.dot(S_yx, theta)
    t3 = T.dot(T.dot(T.dot(ilamda, theta.T), S_xx), theta)

    det_lamda = T.nlinalg.det(lamda)
    loss = -T.log(det_lamda) + T.nlinalg.trace(t1 + t2 + t3)

    eigen_lamda, _ = T.nlinalg.eig(lamda)
    train_loss = -T.sum(T.log(eigen_lamda)) + T.nlinalg.trace(t1 + t2 + t3)

    lamda_diag = T.nlinalg.diag(lamda)
    regularized_loss = loss + 1e-4 * r + 1e-3 * l1(theta) + 1e-3 * l1(
        lamda - lamda_diag)

    learn_rate = T.scalar('learn_rate', dtype=theano.config.floatX)
    momentum = T.scalar('momentum', dtype=theano.config.floatX)
    scale_rate = T.scalar('scale_rate', dtype=theano.config.floatX)

    # scale the grads of theta, lamda
    new_params = [theta, lamda]
    new_grads = T.grad(regularized_loss, new_params)
    for i in range(len(new_grads)):
        new_grads[i] *= scale_rate
    grads = T.grad(regularized_loss, params)
    params += new_params
    grads += new_grads
    clipped_grads = lasagne.updates.total_norm_constraint(grads, 5.0)
    updates = lasagne.updates.nesterov_momentum(clipped_grads,
                                                params,
                                                learning_rate=learn_rate,
                                                momentum=momentum)

    pred_x = LL.get_output(out_x, deterministic=True)
    valid_predictions = -T.dot(T.dot(ilamda, theta.T), pred_x.T).T
    valid_loss = T.mean(T.abs_(pred_x - y))

    train_model = theano.function(
        [index, learn_rate, momentum, scale_rate],
        train_loss,
        updates=updates,
        givens={
            x: train_set_x[(index * minibatch_size):((index + 1) *
                                                     minibatch_size)],
            y: train_set_y[(index * minibatch_size):((index + 1) *
                                                     minibatch_size)]
        })

    validate_model = theano.function(
        [index],
        valid_loss,
        givens={
            x:
            valid_set_x[index * minibatch_size:(index + 1) * minibatch_size],
            y: valid_set_y[index * minibatch_size:(index + 1) * minibatch_size]
        })

    test_model = theano.function(
        [index],
        valid_predictions,
        givens={
            x: test_set_x[(index * minibatch_size):((index + 1) *
                                                    minibatch_size)],
        })

    this_train_loss = 0.0
    this_valid_loss = 0.0
    best_valid_loss = np.inf
    best_train_loss = np.inf
    best_test_loss = np.inf

    eval_starts = 0
    near_convergence = 1500  # to be set
    eval_multiple = 10
    eval_num = 1000
    train_eval_scores = np.ones(eval_num)
    valid_eval_scores = np.ones(eval_num)
    test_eval_scores = np.ones(eval_num)
    cum_iterations = 0
    eval_index = 0

    theta.set_value(theta_mat.astype(np.float32))
    lamda.set_value(lambda_mat.astype(np.float32))

    batch_num = trainX.shape[0] // minibatch_size
    near_convergence = batch_num * (num_epochs - 10)

    for i in range(num_epochs):
        x_train, y_train, x_cv, y_cv = shuffle_data(trainX, trainY, testX,
                                                    testY)
        train_batch_num = x_train.shape[
            0] // minibatch_size  #discard last small batch
        valid_batch_num = x_cv.shape[0] // minibatch_size + 1
        start_time = time.time()

        train_set_y.set_value(y_train[:])
        train_set_x.set_value(x_train)
        valid_set_y.set_value(y_cv[:])
        valid_set_x.set_value(x_cv)
        test_set_x.set_value(x_cv)

        #        if(num_epochs % 10 == 0):
        #            learning_rate *= 0.7

        # Iterate over minibatches in each batch
        for mini_index in xrange(train_batch_num):
            this_rate = np.float32(learning_rate *
                                   (rate_decay**cum_iterations))
            this_scale_rate = np.float32(init_scale *
                                         (scale_decay**cum_iterations))
            # adaptive momentum
            this_momentum = 0.99

            if cum_iterations > near_convergence:
                this_momentum = 0.90

            this_train_loss += train_model(mini_index, this_rate,
                                           this_momentum, this_scale_rate)
            cum_iterations += 1
            if np.isnan(this_train_loss):
                print "Training Error!!!!!!!!!"
                return
                # begin evaluation and report loss
            if (cum_iterations % eval_multiple == 0
                    and cum_iterations > eval_starts):
                this_train_loss = this_train_loss / eval_multiple
                this_valid_loss = np.mean(
                    [validate_model(k) for k in xrange(valid_batch_num)])
                predictions = np.concatenate(
                    [test_model(k) for k in xrange(valid_batch_num)])
                this_test_loss = np.mean(np.abs(predictions - y_cv))
                train_eval_scores[eval_index] = this_train_loss
                valid_eval_scores[eval_index] = this_valid_loss
                test_eval_scores[eval_index] = this_test_loss

                # Save model if best validation score
                if (this_valid_loss < best_valid_loss):
                    best_valid_loss = this_valid_loss

                if (this_test_loss < best_test_loss):
                    best_test_loss = this_test_loss
                    #np.savez(model_to_save, LL.get_all_param_values(network_0))

                print("Training Loss:", this_train_loss)
                print("Validation Loss:", this_valid_loss)
                print("Test Loss:", this_test_loss)
                print("Current scale rate:", this_scale_rate)
                eval_index += 1
                this_train_loss = 0.0
                this_valid_loss = 0.0

        end_time = time.time()
        print("Computing time for epoch %d: %f" % (i, end_time - start_time))
        cur_train_loss = np.min(train_eval_scores)
        cur_valid_loss = np.min(valid_eval_scores)
        cur_test_loss = np.min(test_eval_scores)
        print(
            "The best training loss in epoch!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! %f"
            % cur_train_loss)
        print(
            "The best validation loss in epoch!!!!!!!!!!!!!!!!!!!!!!!!!!!!! : %f"
            % cur_valid_loss)
        print("The best test loss in epoch!!!!!!!!!!!!!!!!!!!!!!!!!!!!! : %f" %
              cur_test_loss)

    print("Best loss in training: %f" % best_train_loss)
    print("Best loss in cross-validation: %f" % best_valid_loss)
    print("Best loss in testing: %f" % best_test_loss)
    del train_set_x, train_set_y, valid_set_x, valid_set_y, trainX, trainY
    gc.collect()
Beispiel #38
0
                                         numUnits,
                                         True,
                                         nonlinearity=None)

transLayerMatricies = transLayer.W.get_value()
transLayerSharedMatrix = transLayer.W_Shared.get_value()

transLayerMatricies[0] = np.zeros((fShape[0] * fShape[1], numUnits))
transLayerMatricies[1] = -1 * np.eye(fShape[0] * fShape[1])
transLayerMatricies[2] = 2 * np.eye(fShape[0] * fShape[1])
transLayerSharedMatrix = 1 * np.eye(fShape[0] * fShape[1])

transLayer.W.set_value(transLayerMatricies)
transLayer.W_Shared.set_value(transLayerSharedMatrix)

x = T.tensor3()
output = lasagne.layers.get_output(transLayer, x)

f = theano.function([x], output)

tasks = [0, 1, 2, 3]
transLayer.setTaskIndices(tasks)

testValues = np.random.randn(batch_size, fShape[0], fShape[1])

print "Task matrix indicies: "
print tasks
print "Task matricies:"
print transLayer.W.get_value()
print "ShareMatrix:"
print transLayer.W_Shared.get_value()
Beispiel #39
0
    scale * numpy.random.uniform(-1.0, 1.0,
                                 (nClasses, 1)).astype(theano.config.floatX),
    'Sb')

#eps  = theano.shared(scale * numpy.ones(1).astype(theano.config.floatX), 'eps') * 0.0001

# bundle
params = [h0, Wr, Ur, br, Wz, Uz, bz, W, U, b, S, Sb]

# Adagrad shared variables
hists = {}
for param in params:
    hists[param.name + 'Hist'] = theano.shared(
        numpy.zeros_like(param.get_value()))

x = T.tensor3('x')
expected = T.matrix('expected')


def recurrence(x_t, h_tm1):
    # reset gate
    r_t = T.nnet.sigmoid(T.dot(Wr, x_t) + T.dot(Ur, h_tm1) + br)
    # update gate
    z_t = T.nnet.sigmoid(T.dot(Wz, x_t) + T.dot(Uz, h_tm1) + bz)
    # proposed hidden state
    _h_t = T.tanh(T.dot(W, x_t) + T.dot(U, r_t * h_tm1) + b)
    # actual hidden state
    h_t = z_t * h_tm1 + (1 - z_t) * _h_t
    return h_t

Beispiel #40
0
        test_m /= mmm
        test_f /= mmm
        test_x /= mmm
    #######
    n_features = train_m.shape[1]  # this time they are 512
    if tpe is 0 or 2:
        nonlin = sigmoid
    if tpe is 1:
        nonlin = sigmoid

    max_len = 50

    NUM_UNITS_ENC = 1000
    NUM_UNITS_DEC = 1000

    x_sym = T.tensor3()
    mask_x_sym = T.matrix()
    m_sym = T.tensor3()
    f_sym = T.tensor3()
    mask_m_sym = T.tensor3()
    mask_f_sym = T.tensor3()
    n_sym = T.tensor3()
    mask_n_sym = T.tensor3()

    l_in = lasagne.layers.InputLayer(shape=(None, max_len, n_features))

    l_dec_fwd = lasagne.layers.GRULayer(l_in,
                                        num_units=NUM_UNITS_DEC,
                                        name='GRUDecoder',
                                        backwards=False)
    l_dec_bwd = lasagne.layers.GRULayer(l_in,
Beispiel #41
0
from scipy.spatial.distance import cdist

rng = np.random.RandomState(42)
d = 20  # dimension
nX = 10
nY = 30

x = np.random.rand(1, 121, 52).astype('float32')
y = np.random.rand(1, 200, 52).astype('float32')

#x = np.asarray([[ [1,2,3], [1,2,1] ]]).astype('float32')
#y = np.asarray([[ [1,2,3], [1,2,3], [1,2,3] ]]).astype('float32')

print cdist(x[0], y[0])

X = T.tensor3('X', dtype='float32')
Y = T.tensor3('Y', dtype='float32')

x_square = K.square(X)
y_square = K.square(Y)

x_sq_sum = K.repeat(K.sum(x_square, axis=-1), n=y_square.shape[1])
y_sq_sum = K.repeat(K.sum(y_square, axis=-1), n=x_square.shape[1])

dot = K.batch_dot(X, K.permute_dimensions(Y, (0, 2, 1)), axes=(2, 1))

squared_euclidean_distances = K.sqrt(
    K.permute_dimensions(x_sq_sum, (0, 2, 1)) + y_sq_sum - 2 * dot)

f_x = theano.function([X, Y], x_sq_sum)
f_y = theano.function([Y, X], y_sq_sum)
Beispiel #42
0
    updates = []

    for p, g in zip(params, grads):
        acc = theano.shared(p.get_value() * 0.)
        acc_new = rho * acc + (1 - rho) * g ** 2

        gradient_scaling = T.sqrt(acc_new + epsilon)
        g = g / gradient_scaling

        updates.append((acc, acc_new))
        updates.append((p, p - T.clip(lr * g, -0.01, 0.01)))

    return updates

inp = T.matrix()   # batchsize x imgsize ** 2
randn = T.tensor3() # timestep x batchsize x latent_vector_size

enc = LSTM(784 + 784 + 784, 256)
dec = LSTM(256, 784)

enc_to_mean = init_weights([256, 256])
enc_to_variance = init_weights([256, 256])

dec_to_write = init_weights([784, 784])

def encoder(canvas, decoder_hidden_1, encoder_hidden_1, encoder_cell_1):
    error = inp - T.nnet.sigmoid(canvas)

    read_vec = T.concatenate([inp, error, decoder_hidden_1], axis = 1)

    enc_hidden, enc_cell = enc.recurrence(read_vec, encoder_hidden_1, encoder_cell_1)
Beispiel #43
0
    def test_machine_translation(self):
        """
        This test case comes from https://github.com/rizar/scan-grad-speed and
        is an example of actual computation done with scan in the context of
        machine translation

        'dim' has been reduced from 1000 to 5 to make the test run faster
        """

        # Parameters from an actual machine tranlation run
        batch_size = 80
        seq_len = 50
        n_words = 80 * 50
        dim = 5

        # Weight matrices
        U = theano.shared(
            numpy.random.normal(size=(dim, dim),
                                scale=0.0001).astype(config.floatX))
        U.name = 'U'
        V = theano.shared(U.get_value())
        V.name = 'V'
        W = theano.shared(U.get_value())
        W.name = 'W'

        # Variables and their values
        x = T.tensor3('x')
        x_value = numpy.random.normal(size=(seq_len, batch_size, dim),
                                      scale=0.0001).astype(config.floatX)

        ri = T.tensor3('ri')
        ri_value = x_value

        zi = T.tensor3('zi')
        zi_value = x_value

        init = T.alloc(numpy.cast[config.floatX](0), batch_size, dim)

        def rnn_step1(
                # sequences
                x,
                ri,
                zi,
                # outputs_info
                h):
            pre_r = ri + h.dot(U)
            pre_z = zi + h.dot(V)
            r = T.nnet.sigmoid(pre_r)
            z = T.nnet.sigmoid(pre_z)

            after_r = r * h
            pre_h = x + after_r.dot(W)
            new_h = T.tanh(pre_h)

            res_h = z * new_h + (1 - z) * h
            return res_h

        # Compile the function twice, once with the optimization and once
        # without
        opt_mode = mode.including("scan")
        h, _ = theano.scan(rnn_step1,
                           sequences=[x, ri, zi],
                           n_steps=seq_len,
                           outputs_info=init,
                           name='fpass1',
                           mode=opt_mode)
        cost = h[-1].sum()
        grad1 = T.grad(cost, [U, V, W])
        f_opt = theano.function(inputs=[x, ri, zi],
                                outputs=grad1,
                                mode=opt_mode)

        no_opt_mode = mode.excluding("scanOp_pushout_output")
        h, _ = theano.scan(rnn_step1,
                           sequences=[x, ri, zi],
                           n_steps=seq_len,
                           outputs_info=init,
                           name='fpass1',
                           mode=no_opt_mode)
        cost = h[-1].sum()
        grad1 = T.grad(cost, [U, V, W])
        f_no_opt = theano.function(inputs=[x, ri, zi],
                                   outputs=grad1,
                                   mode=no_opt_mode)

        # Validate that the optimization has been applied
        scan_node_grad = [
            node for node in f_opt.maker.fgraph.toposort()
            if isinstance(node.op, Scan)
        ][1]

        for output in scan_node_grad.op.outputs:
            assert not (
                isinstance(output.owner.op, T.elemwise.Elemwise)
                and any([isinstance(i, T.Dot) for i in output.owner.inputs]))

        # Compare the outputs of the two functions on the same input data.
        f_opt_output = f_opt(x_value, ri_value, zi_value)
        f_no_opt_output = f_no_opt(x_value, ri_value, zi_value)
        utt.assert_allclose(f_opt_output, f_no_opt_output)
Beispiel #44
0
def main():
    x1 = tensor.tensor3("x1", dtype=THEANOTYPE)
    x2 = tensor.tensor3("x2", dtype=THEANOTYPE)
    x3 = tensor.tensor3("x3", dtype=THEANOTYPE)
    x1_indices = tensor.ivector("x1_indices")
    x2_indices = tensor.ivector("x2_indices")
    x3_indices = tensor.ivector("x3_indices")
    m1 = tensor.matrix("m1", dtype=THEANOTYPE)
    m2 = tensor.matrix("m2", dtype=THEANOTYPE)
    m3 = tensor.matrix("m3", dtype=THEANOTYPE)
    rng = numpy.random.RandomState(0)
    n_data = 1000
    max_sequence_length = 50
    n_dim = 5
    n_hiddens = [10, 10]
    model = SiameseTripletBatchLSTM(rng,
                                    x1,
                                    x2,
                                    x3,
                                    m1,
                                    m2,
                                    m3,
                                    n_in=n_dim,
                                    n_hiddens=n_hiddens)

    xs = theano.shared(
        rng.randn(n_data, max_sequence_length, n_dim).astype(THEANOTYPE))
    masks = theano.shared(
        rng.randn(n_data, max_sequence_length).astype(THEANOTYPE))

    xs_numpy = xs.get_value()
    masks_numpy = masks.get_value()

    x1_lstms = lstm.BatchMultiLayerLSTM(rng,
                                        x1,
                                        m1,
                                        n_dim,
                                        n_hiddens=n_hiddens,
                                        output_type="last",
                                        prefix="lstms_x1")

    f1 = theano.function(inputs=[x1, m1], outputs=x1_lstms.output)

    small_n_data = 10
    sequence_lengths = [5, 10, 15, 20]
    xs0 = [
        rng.randn(small_n_data, n_dim).astype(THEANOTYPE)
        for n_data in sequence_lengths
    ]
    xs_arr0, mask = lstm.batchify(xs0)

    f1_ind = theano.function(inputs=[x1_indices],
                             outputs=x1_lstms.output,
                             givens={
                                 x1: xs[x1_indices].swapaxes(0, 1),
                                 m1: masks[x1_indices]
                             })

    fbatch = theano.function(inputs=[x1_indices, x2_indices, x3_indices],
                             outputs=[
                                 model.x1_lstms.output, model.x2_lstms.output,
                                 model.x3_lstms.output
                             ],
                             givens={
                                 x1: xs[x1_indices].swapaxes(0, 1),
                                 m1: masks[x1_indices],
                                 x2: xs[x2_indices].swapaxes(0, 1),
                                 m2: masks[x2_indices],
                                 x3: xs[x3_indices].swapaxes(0, 1),
                                 m3: masks[x3_indices],
                             })
    ind1 = numpy.asarray([1, 2], dtype=numpy.int32)
    ind2 = numpy.asarray([1, 2], dtype=numpy.int32)
    ind3 = numpy.asarray([1, 2], dtype=numpy.int32)

    import pdb
    pdb.set_trace()
Beispiel #45
0
    def _run(self, num_features, num_timesteps, batch_size, mode):
        # determine shapes of inputs and targets depending on the batch size
        if batch_size == 1:
            inputs_size = (num_timesteps, num_features)
            targets_size = (num_timesteps, 1)
        else:
            inputs_size = (num_timesteps, batch_size, num_features)
            targets_size = (num_timesteps, batch_size, 1)

        # make inputs and targets shared variables
        inputs = theano.shared(self.rng.uniform(size=inputs_size).astype(
            config.floatX),
                               borrow=True)
        targets = theano.shared(self.rng.uniform(size=targets_size).astype(
            config.floatX),
                                borrow=True)

        # create symbolic inputs and targets variables
        if batch_size == 1:
            x = T.matrix('inputs')
            t = T.matrix('targets')
        else:
            x = T.tensor3('inputs')
            t = T.tensor3('inputs')
        x.tag.test_value = inputs.get_value(borrow=True)
        t.tag.test_value = targets.get_value(borrow=True)

        # create a set of parameters for a simple RNN
        W_xh = theano.shared(
            (0.01 * self.rng.uniform(size=(num_features, 10))).astype(
                config.floatX),
            borrow=True)
        W_hh = theano.shared(
            (0.01 * self.rng.uniform(size=(10, 10))).astype(config.floatX),
            borrow=True)
        W_hy = theano.shared(
            (0.01 * self.rng.uniform(size=(10, 1))).astype(config.floatX),
            borrow=True)
        b_h = theano.shared(numpy.zeros(10).astype(config.floatX), borrow=True)
        b_y = theano.shared(numpy.zeros(1).astype(config.floatX), borrow=True)

        params = [W_xh, W_hh, W_hy, b_h, b_y]

        # recurrent function
        def step(x_t, h_tm1):
            h = T.tanh(T.dot(h_tm1, W_hh) + T.dot(x_t, W_xh) + b_h)
            return h

        # build recurrent graph
        if batch_size == 1:
            h_0 = T.alloc(0.0, 10).astype(config.floatX)
        else:
            h_0 = T.alloc(0.0, batch_size, 10).astype(config.floatX)
        h, updates = theano.scan(step, sequences=[x], outputs_info=[h_0])
        # network output
        y = T.dot(h, W_hy) + b_y

        # Create Gauss-Newton-Matrix object. Not really of any use here, but I
        # need it for Hessian-Free optimization.
        gn = GaussNewtonMatrix(y)

        # compute MSE
        cost = ((t - y)**2).sum(axis=1).mean()

        # Compute the cost at some other point in the parameter
        # space. Not really of any use here, but this is how I do it
        # during certain iterations of CG in the HF algorithm. There,
        # it's in fact `pi + current update proposal`.  For simplicity,
        # I just multiply by 2 here.
        cost_ = theano.clone(cost,
                             replace=dict([(pi, 2 * pi) for pi in params]))

        # Compute Gauss-Newton-Matrix times some vector `v` which is `p` in CG,
        # but for simplicity, I just take the parameters vector because it's
        # already there.
        Gv = gn(v=params, cost=cost, parameters=params, damp=T.constant(1.0))

        # compile Theano function
        f = theano.function([], [cost_] + Gv,
                            givens={
                                x: inputs,
                                t: targets
                            },
                            mode=mode)
        # execute
        f()
Beispiel #46
0
    def __init__(self,
                 rng,
                 input_x1,
                 input_x2,
                 input_x3,
                 input_m1,
                 input_m2,
                 input_m3,
                 n_in,
                 n_hiddens,
                 output_type="last",
                 srng=None,
                 dropout=0.0):
        """
        Initialize symbolic parameters and expressions.

        Many of the parameters are identical to that of `cnn.build_cnn_layers`.
        Some of the other parameters are described below.

        Parameters
        ----------
        input_x1 : symbolic matrix
            The matrix is reshaped according to `input_shape` and then treated
            as the input of the first side of the Siamese network.
        input_x2 : symbolic matrix
            The matrix is reshaped according to `input_shape` and then treated
            as the input of the second side of the Siamese network, forming a
            same-pair with `input_x1`.
        input_x3 : symbolic matrix
            The matrix is reshaped according to `input_shape` and then treated
            as the input of the third side of the Siamese network, forming a
            different-pair with `input_x1`.
        """

        # Build common layers to which the Siamese layers are tied
        input = T.tensor3("x", dtype=THEANOTYPE)
        mask = T.matrix("m", dtype=THEANOTYPE)
        self.input = input
        self.mask = mask
        self.n_in = n_in
        self.n_hiddens = n_hiddens
        self.n_layers = len(self.n_hiddens)
        self.lstms = lstm.BatchMultiLayerLSTM(rng,
                                              input,
                                              mask,
                                              n_in,
                                              n_hiddens,
                                              output_type=output_type,
                                              prefix="lstms",
                                              srng=srng,
                                              dropout=dropout)

        self.dropout = dropout

        self.x1_lstms = lstm.BatchMultiLayerLSTM(
            rng,
            input_x1,
            input_m1,
            n_in,
            n_hiddens,
            parameters=self.lstms.parameters,
            output_type=output_type,
            prefix="lstms_x1",
            srng=srng,
            dropout=dropout)
        self.x2_lstms = lstm.BatchMultiLayerLSTM(
            rng,
            input_x2,
            input_m2,
            n_in,
            n_hiddens,
            parameters=self.lstms.parameters,
            output_type=output_type,
            prefix="lstms_x2",
            srng=srng,
            dropout=dropout)
        self.x3_lstms = lstm.BatchMultiLayerLSTM(
            rng,
            input_x3,
            input_m3,
            n_in,
            n_hiddens,
            parameters=self.lstms.parameters,
            output_type=output_type,
            prefix="lstms_x3",
            srng=srng,
            dropout=dropout)

        self.parameters = self.lstms.parameters
        self.l2 = self.lstms.l2
        self.output = self.lstms.output
Beispiel #47
0
def defineGCN(params, nodeNames, nodeList, edgeList, edgeListComplete,
              edgeFeatures, nodeFeatureLength, nodeToEdgeConnections, new_idx,
              featureRange, adjacency):

    gradient_method = Momentum(momentum=params.momentum)

    if (params.gcnType == 0):
        print("-------")
        from neuralmodels.layers.GraphConvolution import GraphConvolution
    elif (params.gcnType == 1):
        print("=======")
        from neuralmodels.layers.GraphConvolution_temporal import GraphConvolution_t as GraphConvolution
    elif (params.gcnType == 2):
        print("########")
        from neuralmodels.layers.GraphConvolution_temporal_pairwise import GraphConvolution_tp as GraphConvolution

    edgeRNNs = {}
    nodeRNNs = {}
    finalLayer = {}
    nodeLabels = {}
    edgeListComplete = []

    for nm in nodeNames:
        num_classes = nodeList[nm]
        if (params.test == 1):

            nodeRNNs[nm] = [FCLayer('linear', params.fc_init, size=1, rng=rng)]

            et = nm + '_temporal'
            edgeListComplete.append(et)
            edgeRNNs[et] = [
                TemporalInputFeatures(edgeFeatures[et]),
                FCLayer('rectify',
                        params.fc_init,
                        size=params.fc_size,
                        rng=rng)
            ]

            et = nm + '_normal'
            edgeListComplete.append(et)
            edgeRNNs[et] = [
                TemporalInputFeatures(edgeFeatures[et]),
                FCLayer('rectify',
                        params.fc_init,
                        size=params.fc_size,
                        rng=rng)
            ]

            finalLayer[nm] = [
                FCLayer_out('rectify',
                            params.fc_init,
                            size=params.fc_size,
                            rng=rng,
                            flag=1),
                FCLayer('linear', params.fc_init, size=num_classes, rng=rng),
            ]

        else:
            LSTMs = [
                LSTM('tanh',
                     'sigmoid',
                     params.lstm_init,
                     truncate_gradient=params.truncate_gradient,
                     size=params.node_lstm_size,
                     rng=rng,
                     g_low=-params.g_clip,
                     g_high=params.g_clip)
            ]
            nodeRNNs[nm] = [
                #multilayerLSTM(LSTMs, skip_input=True,skip_output=True, input_output_fused=True),
                FCLayer('rectify',
                        params.fc_init,
                        size=params.fc_size,
                        rng=rng),
                FCLayer('linear', params.fc_init, size=params.fc_size,
                        rng=rng),
            ]

            et = nm + '_temporal'
            edgeListComplete.append(et)

            edgeRNNs[et] = [
                TemporalInputFeatures(edgeFeatures[et]),
                FCLayer('rectify',
                        params.fc_init,
                        size=params.fc_size,
                        rng=rng),
                FCLayer('linear', params.fc_init, size=params.fc_size, rng=rng)
            ]

            et = nm + '_normal'
            edgeListComplete.append(et)
            edgeRNNs[et] = [
                TemporalInputFeatures(edgeFeatures[et]),
                FCLayer('rectify',
                        params.fc_init,
                        size=params.fc_size,
                        rng=rng),
                FCLayer('linear', params.fc_init, size=params.fc_size, rng=rng)
            ]

            nodeLabels[nm] = T.tensor3(dtype=theano.config.floatX)

            finalLayer[nm] = [
                FCLayer_out('rectify',
                            params.fc_init,
                            size=params.fc_size,
                            rng=rng,
                            flag=1),
                FCLayer('rectify', params.fc_init, size=100, rng=rng),
                FCLayer('linear', params.fc_init, size=num_classes, rng=rng),
            ]

    if (params.test == 1):
        graphLayers = [
            GraphConvolution(params.fc_size, adjacency),
            AddNoiseToInput(rng=rng, dropout_noise=True),
            AddNoiseToInput(rng=rng, dropout=True),
        ]
    else:
        graphLayers = [
            GraphConvolution(params.fc_size, adjacency),
            GraphConvolution(params.fc_size, adjacency),
            # AddNoiseToInput(rng=rng, dropout_noise=True),
            GraphConvolution(params.fc_size, adjacency),
            # AddNoiseToInput(rng=rng, dropout=True),
            GraphConvolution(params.fc_size, adjacency),
            # AddNoiseToInput(rng=rng, dropout=True),
            GraphConvolution(params.fc_size, adjacency),
            # AddNoiseToInput(rng=rng, dropout=True),
            GraphConvolution(params.fc_size, adjacency),
            # AddNoiseToInput(rng=rng, dropout=True),
            GraphConvolution(params.fc_size,
                             adjacency,
                             activation_str='linear'),
        ]
# ---------------------------------------------------------------------------------------------

    learning_rate = T.scalar(dtype=theano.config.floatX)
    learning_rate.tag.test_value = 1.0
    gcnn = GCNN(params,
                graphLayers,
                finalLayer,
                nodeNames,
                edgeRNNs,
                nodeRNNs,
                nodeToEdgeConnections,
                edgeListComplete,
                euclidean_loss,
                nodeLabels,
                learning_rate,
                new_idx,
                featureRange,
                clipnorm=params.clipnorm,
                update_type=gradient_method,
                weight_decay=params.weight_decay)

    return gcnn
Beispiel #48
0
    def __init__(self,
                 rng,
                 input_x1,
                 input_x2,
                 input_x3,
                 input_m1,
                 input_m2,
                 input_m3,
                 input_shape,
                 filter_shape,
                 n_lstm_hiddens,
                 n_outputs,
                 prefix="triplet_convlstm",
                 output_type="max",
                 truncate_gradient=-1,
                 srng=None,
                 dropout=0.0,
                 use_dropout_regularization=False,
                 stabilize_activations=None):
        """
        Initialize symbolic parameters and expressions.

        Many of the parameters are identical to that of `cnn.build_cnn_layers`.
        Some of the other parameters are described below.

        Parameters
        ----------
        input_x1 : symbolic matrix
            The matrix is reshaped according to `input_shape` and then treated
            as the input of the first side of the Siamese network.
        input_x2 : symbolic matrix
            The matrix is reshaped according to `input_shape` and then treated
            as the input of the second side of the Siamese network, forming a
            same-pair with `input_x1`.
        input_x3 : symbolic matrix
            The matrix is reshaped according to `input_shape` and then treated
            as the input of the third side of the Siamese network, forming a
            different-pair with `input_x1`.
        """

        # Build common layers to which the Siamese layers are tied
        input = T.tensor3("x", dtype=THEANOTYPE)
        mask = T.matrix("m", dtype=THEANOTYPE)
        self.use_dropout_regularization = use_dropout_regularization
        self.input = input
        self.mask = mask
        self.output_type = output_type
        self.input_shape = input_shape
        self.filter_shape = filter_shape
        self.n_lstm_hiddens = n_lstm_hiddens
        self.prefix = prefix
        self.srng = srng
        self.dropout = dropout
        self.truncate_gradient = truncate_gradient
        self.stabilize_activations = stabilize_activations
        self.model = lstm.BatchMultiLayerConvLSTM(
            rng,
            input,
            mask,
            input_shape,
            filter_shape,
            n_lstm_hiddens,
            n_outputs=n_outputs,
            output_type=self.output_type,
            prefix="%s_lstm" % self.prefix,
            truncate_gradient=self.truncate_gradient,
            srng=self.srng,
            dropout=self.dropout,
            use_dropout_regularization=self.use_dropout_regularization,
            stabilize_activations=self.stabilize_activations)
        self.n_outputs = self.model.n_outputs

        self.x1_model = lstm.BatchMultiLayerConvLSTM(
            rng,
            input_x1,
            input_m1,
            input_shape,
            filter_shape,
            n_lstm_hiddens,
            n_outputs=self.n_outputs,
            V=self.model.V,
            parameters=self.model.parameters[1:],
            output_type=self.output_type,
            prefix="%s_lstm1" % self.prefix,
            truncate_gradient=self.truncate_gradient,
            srng=self.srng,
            dropout=self.dropout,
            out_W=self.model.out_W,
            out_b=self.model.out_b,
            use_dropout_regularization=self.use_dropout_regularization,
            stabilize_activations=self.stabilize_activations)
        self.x2_model = lstm.BatchMultiLayerConvLSTM(
            rng,
            input_x2,
            input_m2,
            input_shape,
            filter_shape,
            n_lstm_hiddens,
            n_outputs=self.n_outputs,
            V=self.model.V,
            parameters=self.model.parameters[1:],
            output_type=self.output_type,
            prefix="%s_lstm2" % self.prefix,
            truncate_gradient=self.truncate_gradient,
            srng=self.srng,
            dropout=self.dropout,
            out_W=self.model.out_W,
            out_b=self.model.out_b,
            use_dropout_regularization=self.use_dropout_regularization,
            stabilize_activations=self.stabilize_activations)
        self.x3_model = lstm.BatchMultiLayerConvLSTM(
            rng,
            input_x3,
            input_m3,
            input_shape,
            filter_shape,
            n_lstm_hiddens,
            n_outputs=self.n_outputs,
            V=self.model.V,
            parameters=self.model.parameters[1:],
            output_type=self.output_type,
            prefix="%s_lstm3" % self.prefix,
            truncate_gradient=self.truncate_gradient,
            srng=self.srng,
            dropout=self.dropout,
            out_W=self.model.out_W,
            out_b=self.model.out_b,
            use_dropout_regularization=self.use_dropout_regularization,
            stabilize_activations=self.stabilize_activations)

        self.parameters = self.model.parameters
        self.l2 = self.model.l2
        self.output = self.model.output
    def build_model_core(self):

        # gradient clipping function
        self.clipg = lambda x: grad_clip(
            x, -self.conf['GRAD_CLIP_SIZE'], self.conf['GRAD_CLIP_SIZE'])

        shared_layers = {}

        if self.conf['BATCH_NORM']:
            if not hasattr(self, 'gamma_h'):
                gamma_h_val = np.ones(
                    (self.conf['lstm_hidden_size'] * 2,), dtype=theano.config.floatX)
                shared_layers['gamma_h'] = gamma_h_val
            if not hasattr(self, 'beta_h'):
                beta_h_val = np.zeros(
                    (self.conf['lstm_hidden_size'] * 2,), dtype=theano.config.floatX)
                shared_layers['beta_h'] = beta_h_val

        # set the default network weights
        if not hasattr(self, 'wemb'):
            wemb_val = init_layer_k(
                self.conf['vocab_size'], self.conf['emb_size'])
            shared_layers['wemb'] = wemb_val

        if not hasattr(self, 'h0_hidden'):
            h0_hidden_val = np.zeros(
                (self.conf['lstm_hidden_size'], ), dtype=theano.config.floatX)
            shared_layers['h0_hidden'] = h0_hidden_val

        if not hasattr(self, 'h0_cell'):
            h0_cell_val = np.zeros(
                (self.conf['lstm_hidden_size'], ), dtype=theano.config.floatX)
            shared_layers['h0_cell'] = h0_cell_val

        # mapping from visual space to word space
        if not hasattr(self, 'wvm'):
            wvm_val = init_layer_k(
                self.conf['visual_size'], self.conf['emb_size'])
            shared_layers['wvm'] = wvm_val

        if not hasattr(self, 'bmv'):
            bmv_val = np.zeros(
                (self.conf['emb_size'],), dtype=theano.config.floatX)
            shared_layers['bmv'] = bmv_val

        # LSTM layer parameters
        if not hasattr(self, 'w_lstm'):
            w_lstm_val = init_layer_k(
                self.conf['lstm_hidden_size']*2, self.conf['lstm_hidden_size']*4)
            shared_layers['w_lstm'] = w_lstm_val

        # mapping from RNN hidden output to vocabulary
        if not hasattr(self, 'w'):
            w_val = init_layer_k(
                self.conf['lstm_hidden_size'], self.conf['output_size'])
            shared_layers['w'] = w_val

        if not hasattr(self, 'b'):
            b_val = np.zeros(
                (self.conf['output_size'],), dtype=theano.config.floatX)
            if self.conf["INIT_OUTPUT_BIAS"]:
                # set the bias on the last layer to be the log prob of each of the words in the vocab
                wcount = 0
                w2i = self.dp.w2i
                w2c = self.dp.get_word_counts(RNNDataProvider.TRAIN)
                for w in w2i:
                    if w in w2c:
                        wcount += w2c[w]
                wcount += self.X_train.shape[0]
                b_val[w2i[RNNDataProvider.STOP_TOKEN]] = np.log(
                    self.X_train.shape[0]/float(wcount))
                for w in w2i:
                    if w in w2c:
                        b_val[w2i[w]] = np.log(w2c[w]/float(wcount))
                b_val -= np.max(b_val[1:])
            shared_layers['b'] = b_val

        self.build_shared_layers(shared_layers)

        # input variables for training
        self.x = T.imatrix("x")
        self.v = T.matrix("v")
        self.xlen = T.matrix("xlen")

        # input variables for generation
        self.v_single = T.vector("v")
        self.nstep = T.iscalar("nstep")

        # the dropout masks
        self.x_drop = T.tensor3("x_drop")  # drop the input
        self.y_drop = T.tensor3("y_drop")  # drop the output

        self.forced_word = T.imatrix("forced_word")

        h_tm1 = T.vector("h_tm1")  # hidden layer ouput
        word_t = T.ivector("word_t")  # word indexes
        v_i = T.vector("v")  # visual information

        # Generates the next word based on the: previous true word, hidden state & visual features
        # inputs: hiddent_layer, last_predicted word, visual features
        def recurrance(word_t, x_drop_slice, hh_drop_slice, use_v, h_tm1_hidden, h_tm1_cell, v_i):

                #word_t = theano.printing.Print("word_t")(word_t)

                # get the word embedding matrix or the context information
            if self.conf['DECODER']:
                x_t = ifelse(T.eq(use_v, 1), T.dot(
                    v_i, self.wvm) + self.bmv, self.wemb[word_t])
            else:
                x_t = ifelse(T.eq(use_v, 1), T.zeros_like(
                    self.wemb[word_t]), self.wemb[word_t])

            # if we are not doing minibatch training
            if word_t.ndim == 0:
                x_t = x_t.reshape((1, x_t.shape[0]))
                h_tm1_hidden = h_tm1_hidden.reshape((1, h_tm1_hidden.shape[0]))
                h_tm1_cell = h_tm1_cell.reshape((1, h_tm1_cell.shape[0]))

            # dropout on the input embddings
            if self.conf['DROP_INPUT']:
                x_t *= x_drop_slice

            # clip the gradients so they dont get too large
            h_tm1_hidden_clip = self.clipg(h_tm1_hidden)

            in_state = T.concatenate([x_t, h_tm1_hidden_clip], axis=1)

            if self.conf['BATCH_NORM']:
                mu = T.mean(in_state, axis=0, keepdims=True)
                var = T.var(in_state, axis=0, keepdims=True)
                normed_is = (in_state - mu) / T.sqrt(var +
                                                     T.constant(1e-10, dtype=theano.config.floatX))
                in_state = self.gamma_h * in_state + self.beta_h

            # calculate 8 dot products in one go
            dot_out = T.dot(in_state, self.w_lstm)

            lstm_hidden_size = self.conf['lstm_hidden_size']
            # input gate
            ig = T.nnet.sigmoid(dot_out[:, :lstm_hidden_size])
            # forget gate
            fg = T.nnet.sigmoid(
                dot_out[:, lstm_hidden_size:lstm_hidden_size*2])
            # output gate
            og = T.nnet.sigmoid(
                dot_out[:, lstm_hidden_size*2:lstm_hidden_size*3])

            # cell memory
            cc = fg * h_tm1_cell + ig * T.tanh(dot_out[:, lstm_hidden_size*3:])

            # hidden state
            hh = og * cc

            # drop the output state
            if self.conf['DROP_OUTPUT']:
                hh_d = hh * hh_drop_slice

            # the distribution over output words
            if self.conf['SOFTMAX_OUT']:
                s_t = T.nnet.softmax(T.dot(hh_d, self.w) + self.b)
            else:
                s_t = T.nnet.sigmoid(T.dot(hh_d, self.w) + self.b)

            #hh = ifelse(T.eq(word_t, 0) and T.eq(use_v, 0), h_tm1_hidden, hh)
            #cc = ifelse(T.eq(word_t, 0) and T.eq(use_v, 0), h_tm1_cell, cc)

            if not self.conf['DECODER']:
                keep_idx = T.and_(T.eq(word_t, 0), T.eq(use_v, 0))
                #keep_idx = theano.printing.Print("keep_idx")(keep_idx)
                if word_t.ndim != 0:
                    keep_idx = keep_idx.dimshuffle((0, 'x'))
                #hh_ret = hh
                #hh_ret[keep_idx, :] = h_tm1_hidden[keep_idx, :]
                hh_ret = keep_idx * h_tm1_hidden + (1-keep_idx) * hh
                cc_ret = keep_idx * h_tm1_cell + (1-keep_idx) * cc
            else:
                hh_ret = hh
                cc_ret = cc

            # if we are not doing minibatch training
            if word_t.ndim == 0:
                hh_ret = hh_ret[0]
                cc_ret = cc_ret[0]

            return [hh_ret, cc_ret, s_t]

        # Generates the next word by feeding the old word as input
        # inputs: hiddent_layer, last_predicted word, visual features
        def recurrance_word_feedback(h_tm1_hidden, h_tm1_cell, word_t, use_visual, v_i):
            x_drop_val = T.ones(
                (self.conf['emb_size'],), dtype=theano.config.floatX)
            y_drop_val = T.ones(
                (self.conf['lstm_hidden_size'],), dtype=theano.config.floatX)
            [hh, cc, s_t] = recurrance(
                word_t, x_drop_val, y_drop_val, use_visual, h_tm1_hidden, h_tm1_cell, v_i)

            # the predicted word
            w_idx = T.cast(T.argmax(s_t, axis=1), dtype='int32')[0]

            return [hh, cc, s_t[0], w_idx, T.zeros((0,), dtype='int32')[0]]

        def recurrance_partial_word_feedback(word_t_real, x_drop_val, y_drop_val, use_visual, forced_word, h_tm1_hidden, h_tm1_cell, word_t_pred, v_i):
            word_last = T.switch(forced_word, word_t_real, word_t_pred)
            [hh, cc, s_t] = recurrance(
                word_last, x_drop_val, y_drop_val, use_visual, h_tm1_hidden, h_tm1_cell, v_i)

            # the predicted word
            w_idx = T.cast(T.argmax(s_t, axis=1), dtype='int32')

            return [hh, cc, s_t, w_idx]

        # build the teacher forcing loop
        use_visual_info = T.concatenate([T.ones((1,), dtype=np.int32), T.zeros(
            (self.conf['MAX_SENTENCE_LEN'],), dtype=np.int32)])
        if self.conf['DECODER']:
            #h0_hidden_matrix = self.encoder.hh_out[self.encoder.conf['MAX_SENTENCE_LEN']]

            h0_hidden_matrix = self.h0_hidden * \
                T.ones((self.x.shape[0], self.h0_hidden.shape[0]))
            v_input = T.concatenate(
                [self.encoder.hh_out[self.encoder.conf['MAX_SENTENCE_LEN']], self.v], axis=1)
            #v_input = T.printing.Print("v_input")(v_input)
        else:
            h0_hidden_matrix = self.h0_hidden * \
                T.ones((self.x.shape[0], self.h0_hidden.shape[0]))
            v_input = self.v
            #v_input = T.printing.Print("v_input_v")(v_input)

        h0_cell_matrix = self.h0_cell * \
            T.ones((self.x.shape[0], self.h0_cell.shape[0]))
        x_adj = T.concatenate(
            [T.zeros((1, self.x.T[0].shape[0]), dtype=self.x.dtype), self.x.T])
        y_adj = T.concatenate(
            [self.x.T, T.zeros((1, self.x.T[0].shape[0]), dtype=self.x.dtype)])
        [self.hh_out, self.cc_out, s], _ = theano.scan(fn=recurrance,
                                                       sequences=[x_adj, self.x_drop.dimshuffle(
                                                           (1, 0, 2)), self.y_drop.dimshuffle((1, 0, 2)), use_visual_info],
                                                       n_steps=self.conf['MAX_SENTENCE_LEN']+1,
                                                       non_sequences=v_input,
                                                       outputs_info=[h0_hidden_matrix, h0_cell_matrix, None])

        # build the semi-forced loop
        [_, _, s_semi, _], _ = theano.scan(fn=recurrance_partial_word_feedback,
                                           sequences=[x_adj, self.x_drop.dimshuffle((1, 0, 2)), self.y_drop.dimshuffle((1, 0, 2)),
                                                      use_visual_info, self.forced_word[:, :self.x.shape[0]]],
                                           n_steps=self.conf['MAX_SENTENCE_LEN']+1,
                                           non_sequences=self.v,
                                           outputs_info=[h0_hidden_matrix, h0_cell_matrix, None, T.zeros((self.x.shape[0],), dtype=np.int32)])

        # build the un-forced loop
        [_, _, _, self.wout_fb, _], _ = theano.scan(fn=recurrance_word_feedback,
                                                    non_sequences=self.v_single,
                                                    outputs_info=[self.h0_hidden, self.h0_cell, None, np.array(
                                                        0, dtype=np.int32), T.ones((1,), dtype=np.int32)[0]],
                                                    n_steps=self.nstep)

        if self.conf['SEMI_FORCED'] < 1:
            s = s_semi

        self.new_s = s.reshape((s.shape[0] * s.shape[1], s.shape[2]))
        softmax_out = self.build_loss_function(self.new_s, y_adj)
        self.softmax_out = softmax_out

        # calculate the perplexity
        ff_small = T.constant(1e-20, dtype=theano.config.floatX)
        ppl_idx = softmax_out.shape[1] * \
            T.arange(softmax_out.shape[0]) + T.flatten(y_adj)
        hsum = -T.log2(T.flatten(softmax_out)[ppl_idx] + ff_small)
        hsum_new = hsum.reshape((s.shape[0], s.shape[1])).T
        self.perplexity_sentence = 2 ** (T.sum(hsum_new,
                                               axis=1) / T.sum(self.xlen, axis=1))
        self.perplexity_batch = 2 ** (T.sum(hsum *
                                            T.flatten(self.xlen.T)) / T.sum(self.xlen))
        self.perplexity_batch_v = T.sum(hsum * T.flatten(self.xlen.T))
        self.perplexity_batch_n = T.sum(self.xlen)

        # build the single step code
        h_hid = T.vector("h_hid")
        h_cell = T.vector("h_cell")
        x_drop_val = T.ones(
            (self.conf['emb_size'],), dtype=theano.config.floatX)
        y_drop_val = T.ones(
            (self.conf['lstm_hidden_size'],), dtype=theano.config.floatX)
        use_v = T.iscalar("use_v")
        word_t_s = T.iscalar("word_t_s")
        one_step_theano = recurrance(
            word_t_s, x_drop_val, y_drop_val, use_v, h_hid, h_cell, v_i)

        if self.conf['DECODER']:
            self.one_step = theano.function(
                [word_t_s, use_v, h_hid, h_cell, v_i], outputs=one_step_theano)
        else:
            tmp_x = T.imatrix("tmp_x")
            tmp_v = T.matrix("tmp_v")
            x_d_tmp = T.ones(
                (1, self.conf['MAX_SENTENCE_LEN'], self.conf['emb_size']), dtype=theano.config.floatX)
            y_d_tmp = T.ones(
                (1, self.conf['MAX_SENTENCE_LEN'], self.conf['lstm_hidden_size']), dtype=theano.config.floatX)
            x_d_tmp.type.broadcastable = (False, False, False)
            y_d_tmp.type.broadcastable = (False, False, False)
            self.start_step = theano.function([tmp_x, tmp_v],
                                              outputs=self.hh_out[self.conf['MAX_SENTENCE_LEN']],
                                              givens={self.x_drop: x_d_tmp,
                                                      self.y_drop: y_d_tmp,
                                                      self.x: tmp_x,
                                                      self.v: tmp_v})
Beispiel #50
0
    # dense output layer
    l_in = lasagne.layers.InputLayer(shape=(N_BATCH, N_TIME_STEPS, N_INPUT_FEATURES))

    # Followed by a Dense Layer to Produce Action
    l_action = lasagne.layers.DenseLayer(incoming=l_in,
                                         W=lasagne.init.Uniform([-0.1, 0.1]),
                                         num_units=N_ACTIONS,
                                         nonlinearity=None,
                                         b=None)

    l_action_formed = lasagne.layers.ReshapeLayer(input_layer=l_action,
                                        shape=(N_BATCH, N_TIME_STEPS, N_ACTIONS))


    # Cost function is mean squared error
    input = T.tensor3('input')
    target_output = T.tensor3('target_output')

    # create environment
    env = CartPoleEnvironment()
    # create task
    task = BalanceTask(env, 200, desiredValue=None)

    #
    action_prediction = theano.function([input], l_action_formed.get_output(input))


    all_params = lasagne.layers.get_all_params(l_action_formed)

    records = []
    for time in xrange(50):
Beispiel #51
0
def build_text_only_network(d_word, d_hidden, lr, eps=1e-6):

    # input theano vars
    in_context_fc7 = T.tensor3(name='context_images')
    in_context_bb = T.tensor4(name='context_bb')
    in_bbmask = T.tensor3(name='bounding_box_mask')
    in_context = T.itensor4(name='context')
    in_cmask = T.tensor4(name='context_mask')
    in_answer_fc7 = T.matrix(name='answer_images')
    in_answer_bb = T.matrix(name='answer_bb')
    in_answers = T.itensor3(name='answers')
    in_amask = T.tensor3(name='answer_mask')
    in_labels = T.imatrix(name='labels')

    # define network
    l_context_fc7 = lasagne.layers.InputLayer(shape=(None, 3, 4096),
                                              input_var=in_context_fc7)
    l_answer_fc7 = lasagne.layers.InputLayer(shape=(None, 4096),
                                             input_var=in_answer_fc7)

    l_context = lasagne.layers.InputLayer(shape=(None, max_panels, max_boxes,
                                                 max_words),
                                          input_var=in_context)
    l_answers = lasagne.layers.InputLayer(shape=(None, 3, max_words),
                                          input_var=in_answers)

    l_cmask = lasagne.layers.InputLayer(shape=l_context.shape,
                                        input_var=in_cmask)
    l_amask = lasagne.layers.InputLayer(shape=l_answers.shape,
                                        input_var=in_amask)
    l_bbmask = lasagne.layers.InputLayer(shape=(None, 3, max_boxes),
                                         input_var=in_bbmask)

    # contexts and answers should share embeddings
    l_context_emb = lasagne.layers.EmbeddingLayer(l_context,
                                                  len_voc,
                                                  d_word,
                                                  name='word_emb')
    l_answer_emb = lasagne.layers.EmbeddingLayer(l_answers,
                                                 len_voc,
                                                 d_word,
                                                 W=l_context_emb.W)

    l_context_box_reps = SumAverageLayer([l_context_emb, l_cmask],
                                         compute_sum=True,
                                         num_dims=4)
    l_box_reshape = lasagne.layers.ReshapeLayer(l_context_box_reps,
                                                (-1, max_boxes, d_word))
    l_bbmask_reshape = lasagne.layers.ReshapeLayer(l_bbmask, (-1, max_boxes))
    l_box_lstm = lasagne.layers.LSTMLayer(l_box_reshape,
                                          num_units=d_word,
                                          mask_input=l_bbmask_reshape,
                                          only_return_final=True)
    l_context_panel_reps = lasagne.layers.ReshapeLayer(l_box_lstm,
                                                       (-1, 3, d_word))
    l_context_final_reps = lasagne.layers.LSTMLayer(l_context_panel_reps,
                                                    num_units=d_word,
                                                    only_return_final=True)

    l_ans_reps = SumAverageLayer([l_answer_emb, l_amask],
                                 compute_sum=True,
                                 num_dims=3)
    l_scores = InnerProductLayer([l_context_final_reps, l_ans_reps])

    preds = lasagne.layers.get_output(l_scores)
    loss = T.mean(lasagne.objectives.categorical_crossentropy(
        preds, in_labels))

    all_params = lasagne.layers.get_all_params(l_scores, trainable=True)
    updates = lasagne.updates.adam(loss, all_params, learning_rate=lr)
    train_fn = theano.function([
        in_context_fc7, in_context_bb, in_bbmask, in_context, in_cmask,
        in_answer_fc7, in_answer_bb, in_answers, in_amask, in_labels
    ],
                               loss,
                               updates=updates,
                               on_unused_input='warn')
    pred_fn = theano.function([
        in_context_fc7, in_context_bb, in_bbmask, in_context, in_cmask,
        in_answer_fc7, in_answer_bb, in_answers, in_amask
    ],
                              preds,
                              on_unused_input='warn')
    return train_fn, pred_fn, l_scores
Beispiel #52
0
 def _compile_ll_F_Y():
     Y = tensor.matrix('Y')
     Wf = tensor.tensor3('Wf')
     sigma_inv = tensor.matrix('sigma_inv')
     c = 1.0 / 2 * (theano.dot((Y - Wf), sigma_inv) * (Y - Wf)).sum(axis=2)
     return theano.function([Y, Wf, sigma_inv], c)
Beispiel #53
0
def create_iter_functions(dataset,
                          output_layer,
                          batch_size=BATCH_SIZE,
                          learning_rate=LEARNING_RATE,
                          momentum=MOMENTUM):
    """
    Create functions for training, validation and testing to iterate one epoch.
    """
    batch_index = T.iscalar('batch_index')
    X_batch = T.tensor3('input')
    y_batch = T.matrix('target_output')
    batch_slice = slice(batch_index * batch_size,
                        (batch_index + 1) * batch_size)

    prediction = T.argmax(lasagne.layers.get_output(output_layer,
                                                    X_batch,
                                                    deterministic=True),
                          axis=-1)
    accuracy = T.mean(T.eq(prediction, T.argmax(y_batch, axis=-1)),
                      dtype=theano.config.floatX)

    loss_train = cross_ent_cost(
        lasagne.layers.get_output(output_layer, X_batch, deterministic=False),
        y_batch)

    loss_eval = cross_ent_cost(
        lasagne.layers.get_output(output_layer, X_batch, deterministic=True),
        y_batch)

    all_params = lasagne.layers.get_all_params(output_layer)
    updates = lasagne.updates.adam(loss_train,
                                   all_params,
                                   learning_rate=learning_rate)

    iter_train = theano.function(
        [batch_index],
        loss_train,
        updates=updates,
        givens={
            X_batch: dataset['X_train'][batch_slice],
            y_batch: dataset['y_train'][batch_slice],
        },
    )

    iter_valid = theano.function(
        [batch_index],
        [loss_eval, accuracy],
        givens={
            X_batch: dataset['X_valid'][batch_slice],
            y_batch: dataset['y_valid'][batch_slice],
        },
    )

    iter_test = theano.function(
        [batch_index],
        [loss_eval, accuracy],
        givens={
            X_batch: dataset['X_test'][batch_slice],
            y_batch: dataset['y_test'][batch_slice],
        },
    )

    return dict(
        train=iter_train,
        valid=iter_valid,
        test=iter_test,
    )
Beispiel #54
0
def test_gpu_rowwise_switch():
    assert theano.config.device.startswith("gpu"), "Need to test on GPU!"

    data = [
        # 4 x 2
        (np.array([[0.22323515, 0.36703175], [0.82260513, 0.3461504],
                   [0.82362652, 0.81626087], [0.95270008, 0.2226797]]),
         np.array([[0.36341551, 0.20102882], [0.24144639, 0.45237923],
                   [0.39951822, 0.7348066],
                   [0.16649647, 0.60306537]]), np.array([1, 0, 1, 1]),
         np.array([[0.22323515, 0.36703175], [0.24144639, 0.45237923],
                   [0.82362652, 0.81626087], [0.95270008, 0.2226797]])),

        # 2 x 3 x 4
        (np.array([[[0.48769062, 0.82649632, 0.2047115, 0.41437615],
                    [0.25290664, 0.87164914, 0.80968588, 0.49295084],
                    [0.71438099, 0.97913502, 0.37598001, 0.76958707]],
                   [[0.37605973, 0.538358, 0.74304674, 0.84346291],
                    [0.95310617, 0.61540292, 0.49881143, 0.1028554],
                    [0.83481996, 0.90969569, 0.40410424, 0.34419989]]]),
         np.array([[[0.7289117, 0.97323253, 0.19070121, 0.64164653],
                    [0.26816493, 0.76093069, 0.95284825, 0.77350426],
                    [0.55415519, 0.39431256, 0.86588665, 0.50031027]],
                   [[0.1980869, 0.7753601, 0.26810868, 0.3628802],
                    [0.2488143, 0.21278388, 0.09724567, 0.58457886],
                    [0.12295105, 0.75321368, 0.37258797,
                     0.27756972]]]), np.array([1, 0]),
         np.array([[[0.48769062, 0.82649632, 0.2047115, 0.41437615],
                    [0.25290664, 0.87164914, 0.80968588, 0.49295084],
                    [0.71438099, 0.97913502, 0.37598001, 0.76958707]],
                   [[0.1980869, 0.7753601, 0.26810868, 0.3628802],
                    [0.2488143, 0.21278388, 0.09724567, 0.58457886],
                    [0.12295105, 0.75321368, 0.37258797, 0.27756972]]]))
    ]

    A2, B2 = T.matrices("AB")
    A3, B3 = T.tensor3("A"), T.tensor3("B")
    mask = T.ivector("mask")

    switch2 = T.switch(mask.dimshuffle(0, "x"), A2, B2)
    switch3 = T.switch(mask.dimshuffle(0, "x", "x"), A3, B3)

    f2 = theano.function([A2, B2, mask], switch2)
    f3 = theano.function([A3, B3, mask], switch3)

    print "Graph of 2dim switch:"
    theano.printing.debugprint(f2.maker.fgraph.outputs[0])
    print "Graph of 3dim switch:"
    theano.printing.debugprint(f3.maker.fgraph.outputs[0])

    for instance in data:
        # Retrieve appropriate function
        func = f2 if instance[0].ndim == 2 else f3

        # Cast to float-friendly types
        instance = [
            x.astype(np.float32) if x.dtype.kind == 'f' else x.astype(np.int32)
            for x in instance
        ]

        yield tuple([_test_gpu_rowwise_switch_inner, func] + instance)
Beispiel #55
0
 def theano_vars(self):
     return [T.tensor3('x', dtype=theano.config.floatX),
             T.tensor3('y', dtype=theano.config.floatX)]
Beispiel #56
0
    grus = [gru0]
    for i in xrange(1, N_GRUS):
        gru = lib.ops.LowMemGRU('Recurrence.GRU' + str(i),
                                DIM,
                                DIM,
                                grus[-1],
                                h0=h0[:, i])
        grus.append(gru)

    last_hidden = T.stack([gru[:, -1] for gru in grus], axis=1)

    return (grus[-1], last_hidden)


sequences = T.imatrix('sequences')
h0 = T.tensor3('h0')
reset = T.iscalar('reset')

frames = sequences.reshape((sequences.shape[0], -1, FRAME_SIZE))
processed_frames = FrameProcessor(frames)

contexts, new_h0 = Recurrence(processed_frames[:, :-1], h0, reset)

mu_prior, log_sigma_prior = Prior(contexts)
mu_post, log_sigma_post = Encoder(processed_frames[:, 1:], contexts)

# log_sigma_prior = T.log(T.nnet.softplus(log_sigma_prior))
# log_sigma_post = T.log(T.nnet.softplus(log_sigma_post))

eps = theano_srng.normal(mu_post.shape).astype('float32')
latents = mu_post
Beispiel #57
0
def test_lstm_hid_init_layer_eval():
    # Test `hid_init` as a `Layer` with some dummy input. Compare the output of
    # a network with a `Layer` as input to `hid_init` to a network with a
    # `np.array` as input to `hid_init`
    n_units = 7
    n_test_cases = 2
    in_shp = (n_test_cases, 2, 3)
    in_h_shp = (1, n_units)
    in_cell_shp = (1, n_units)

    # dummy inputs
    X_test = np.ones(in_shp, dtype=theano.config.floatX)
    Xh_test = np.ones(in_h_shp, dtype=theano.config.floatX)
    Xc_test = np.ones(in_cell_shp, dtype=theano.config.floatX)
    Xh_test_batch = np.tile(Xh_test, (n_test_cases, 1))
    Xc_test_batch = np.tile(Xc_test, (n_test_cases, 1))

    # network with `Layer` initializer for hid_init
    l_inp = InputLayer(in_shp)
    l_inp_h = InputLayer(in_h_shp)
    l_inp_cell = InputLayer(in_cell_shp)
    l_rec_inp_layer = LSTMLayer(l_inp,
                                n_units,
                                hid_init=l_inp_h,
                                cell_init=l_inp_cell,
                                nonlinearity=None)

    # network with `np.array` initializer for hid_init
    l_rec_nparray = LSTMLayer(l_inp,
                              n_units,
                              hid_init=Xh_test,
                              cell_init=Xc_test,
                              nonlinearity=None)

    # copy network parameters from l_rec_inp_layer to l_rec_nparray
    l_il_param = dict([(p.name, p) for p in l_rec_inp_layer.get_params()])
    l_rn_param = dict([(p.name, p) for p in l_rec_nparray.get_params()])
    for k, v in l_rn_param.items():
        if k in l_il_param:
            v.set_value(l_il_param[k].get_value())

    # build the theano functions
    X = T.tensor3()
    Xh = T.matrix()
    Xc = T.matrix()
    output_inp_layer = lasagne.layers.get_output(l_rec_inp_layer, {
        l_inp: X,
        l_inp_h: Xh,
        l_inp_cell: Xc
    })
    output_nparray = lasagne.layers.get_output(l_rec_nparray, {l_inp: X})

    # test both nets with dummy input
    output_val_inp_layer = output_inp_layer.eval({
        X: X_test,
        Xh: Xh_test_batch,
        Xc: Xc_test_batch
    })
    output_val_nparray = output_nparray.eval({X: X_test})

    # check output given `Layer` is the same as with `np.array`
    assert np.allclose(output_val_inp_layer, output_val_nparray)
Beispiel #58
0
    rating_freq = np.zeros((6040, 5))
    init_b = np.zeros((6040, 5))
    for batch in valid_monitor_stream.get_epoch_iterator():
        inp_r, out_r, inp_m, out_m = batch
        rating_freq += inp_r.sum(axis=0)

    log_rating_freq = np.log(rating_freq + 1e-8)
    log_rating_freq_diff = np.diff(log_rating_freq, axis=1)
    init_b[:, 1:] = log_rating_freq_diff
    init_b[:, 0] = log_rating_freq[:, 0]
    #     init_b = np.log(rating_freq / (rating_freq.sum(axis=1)[:, None] + 1e-8) +1e-8)  * (rating_freq>0)

    new_items = np.where(rating_freq.sum(axis=1) == 0)[0]

    input_ratings = T.tensor3(name='input_ratings', dtype=theano.config.floatX)
    output_ratings = T.tensor3(name='output_ratings',
                               dtype=theano.config.floatX)
    input_masks = T.matrix(name='input_masks', dtype=theano.config.floatX)
    output_masks = T.matrix(name='output_masks', dtype=theano.config.floatX)

    input_ratings_cum = T.extra_ops.cumsum(input_ratings[:, :, ::-1],
                                           axis=2)[:, :, ::-1]

    #     hidden_size = [256]
    if activation_function == 'reclin':
        act = Rectifier
    elif activation_function == 'tanh':
        act = Tanh
    elif activation_function == 'sigmoid':
        act = Logistic
Beispiel #59
0
import pickle

import lasagne
from lasagne.layers import helper
import theano
import theano.tensor as T

from permutationlayer import PermutationalLayer
from simulate import doSimulation

SITES = 8
VARS = 4
HIDDEN = 128

invar = T.tensor3()
targ = T.tensor3()

input = lasagne.layers.InputLayer((None, VARS, SITES), input_var=invar)

# Define subnetwork for 1st layer
dinp_1 = lasagne.layers.InputLayer((None, 2 * VARS, SITES, SITES))
dense1_1 = lasagne.layers.NINLayer(dinp_1, num_units=HIDDEN)
dense2_1 = lasagne.layers.NINLayer(dense1_1, num_units=HIDDEN)
dense3_1 = lasagne.layers.NINLayer(dense2_1, num_units=HIDDEN)
dense4_1 = lasagne.layers.NINLayer(dense3_1, num_units=HIDDEN)

# Define subnetwork for 2nd layer
dinp2 = lasagne.layers.InputLayer((None, 2 * HIDDEN, SITES, SITES))
dense1_2 = lasagne.layers.NINLayer(dinp2, num_units=HIDDEN)
dense2_2 = lasagne.layers.NINLayer(dense1_2, num_units=HIDDEN)
Beispiel #60
0
def test_gru_grad_clipping():
    # test that you can set grad_clip variable
    x = T.tensor3()
    l_rec = GRULayer(InputLayer((2, 2, 3)), 5, grad_clipping=1)
    output = lasagne.layers.get_output(l_rec, x)