예제 #1
0
    def __init__(self, num_hidden, num_features, seq_length, mb_size, tf_states, rf_states):
        
        tf_states = T.specify_shape(tf_states, (seq_length, mb_size, num_features))
        rf_states = T.specify_shape(rf_states, (seq_length, mb_size, num_features))

        hidden_state_features = T.specify_shape(T.concatenate([tf_states, rf_states], axis = 1), (seq_length, mb_size * 2, num_features))

        gru_params_1 = init_tparams(param_init_gru(None, {}, prefix = "gru1", dim = num_hidden, nin = num_features))
        #gru_params_2 = init_tparams(param_init_gru(None, {}, prefix = "gru2", dim = num_hidden, nin = num_hidden + num_features))
        #gru_params_3 = init_tparams(param_init_gru(None, {}, prefix = "gru3", dim = num_hidden, nin = num_hidden + num_features))

        gru_1_out = gru_layer(gru_params_1, hidden_state_features, None, prefix = 'gru1')[0]
        #gru_2_out = gru_layer(gru_params_2, T.concatenate([gru_1_out, hidden_state_features], axis = 2), None, prefix = 'gru2', backwards = True)[0]
        #gru_3_out = gru_layer(gru_params_3, T.concatenate([gru_2_out, hidden_state_features], axis = 2), None, prefix = 'gru3')[0]

        final_out_recc = T.specify_shape(T.mean(gru_1_out, axis = 0), (mb_size * 2, num_hidden))

        h_out_1 = DenseLayer((mb_size * 2, num_hidden), num_units = num_hidden, nonlinearity=lasagne.nonlinearities.rectify)
        #h_out_2 = DenseLayer((mb_size * 2, num_hidden), num_units = num_hidden, nonlinearity=lasagne.nonlinearities.rectify)
        #h_out_3 = DenseLayer((mb_size * 2, num_hidden), num_units = num_hidden, nonlinearity=lasagne.nonlinearities.rectify)
        h_out_4 = DenseLayer((mb_size * 2, num_hidden), num_units = 1, nonlinearity=None)

        h_out_1_value = h_out_1.get_output_for(final_out_recc)
        h_out_4_value = h_out_4.get_output_for(h_out_1_value)

        raw_y = h_out_4_value
        #raw_y = T.clip(h_out_4_value, -10.0, 10.0)
        classification = T.nnet.sigmoid(raw_y)

        #tf comes before rf.  
        p_real =  classification[:mb_size]
        p_gen  = classification[mb_size:]

        #bce = lambda r,t: t * T.nnet.softplus(-r) + (1 - t) * (r + T.nnet.softplus(-r))

        self.d_cost_real = bce(p_real, 0.9 * T.ones(p_real.shape)).mean()
        self.d_cost_gen = bce(p_gen, 0.1 + T.zeros(p_gen.shape)).mean()
        self.g_cost_d = bce(p_gen, 0.9 * T.ones(p_gen.shape)).mean()
        self.d_cost = self.d_cost_real + self.d_cost_gen
        self.g_cost = self.g_cost_d


        self.classification = classification

        self.params = []
        self.params += lasagne.layers.get_all_params(h_out_4,trainable=True)
        #self.params += lasagne.layers.get_all_params(h_out_3,trainable=True)
        #self.params += lasagne.layers.get_all_params(h_out_2,trainable=True)
        self.params += lasagne.layers.get_all_params(h_out_1,trainable=True)

        self.params += gru_params_1.values()
        #self.params += gru_params_2.values()
        #self.params += gru_params_3.values()

        self.accuracy = T.mean(T.eq(T.ones(p_real.shape).flatten(), T.gt(p_real, 0.5).flatten())) + T.mean(T.eq(T.ones(p_gen.shape).flatten(), T.lt(p_gen, 0.5).flatten()))
예제 #2
0
파일: rnn.py 프로젝트: alexmlamb/IFT6266H16
def rnn_one_step(config, params, observed_sequence_last, observed_sequence_current, use_samples, last_states, last_outputs, last_loss):

    mb_size = config['mb_size']
    num_hidden = config['num_hidden']

    last_states = T.specify_shape(last_states, (config['mb_size'],2 * config['num_hidden']))
    last_outputs = T.specify_shape(last_outputs, (config['mb_size'],))

    obs_last = T.specify_shape(observed_sequence_last, (mb_size,)).reshape((mb_size,1))
    obs_curr = T.specify_shape(observed_sequence_current, (mb_size,))

    obs_use = theano.ifelse.ifelse(use_samples, last_outputs.reshape((mb_size,1)), obs_last)

    last_states_1 = last_states[:,0:1024]
    last_states_2 = last_states[:,1024:2048]

    next_states_1 = T.specify_shape(gru_layer(params,state_below = obs_use, options = None, prefix='gru1', mask=None, one_step=True, init_state=last_states_1, backwards=False)[0], (mb_size, num_hidden))

    next_states_2 = T.specify_shape(gru_layer(params,state_below = next_states_1, options = None, prefix='gru2', mask=None, one_step=True, init_state=last_states_2, backwards=False)[0], (mb_size, num_hidden))

    h1 = T.specify_shape(fflayer(params,next_states_2,options=None,prefix='ff_h1',activ='lambda x: tensor.maximum(x,0.0)'), (mb_size, num_hidden))

    h2 = T.specify_shape(fflayer(params,h1,options=None,prefix='ff_h2',activ='lambda x: tensor.maximum(x,0.0)'), (mb_size, num_hidden))

    y = T.specify_shape(fflayer(params,h2,options = None,prefix='ff_1',activ='lambda x: x').flatten(), (mb_size,))
    #y = T.specify_shape(T.sum(next_states, axis = 1), (mb_size,))

    loss = T.sqr(y - obs_curr)

    obs_curr = T.specify_shape(observed_sequence_current, (mb_size,))

    next_outputs = y

    next_states = T.specify_shape(T.concatenate([next_states_1, next_states_2], axis = 1), (mb_size, num_hidden * 2))

    return next_states, next_outputs, loss
예제 #3
0
    def __init__(self, num_hidden, num_features, mb_size,
                 hidden_state_features, target):
        self.mb_size = mb_size
        #self.seq_length = seq_length

        #using 0.8
        hidden_state_features = dropout(hidden_state_features, 1.0)

        gru_params_1 = init_tparams(
            param_init_gru(None, {},
                           prefix="gru1",
                           dim=num_hidden,
                           nin=num_features))
        gru_params_2 = init_tparams(
            param_init_gru(None, {},
                           prefix="gru2",
                           dim=num_hidden,
                           nin=num_hidden + num_features))

        gru_1_out = gru_layer(gru_params_1,
                              hidden_state_features,
                              None,
                              prefix='gru1',
                              gradient_steps=100)[0]
        gru_2_out = gru_layer(gru_params_2,
                              T.concatenate([gru_1_out, hidden_state_features],
                                            axis=2),
                              None,
                              prefix='gru2',
                              backwards=True,
                              gradient_steps=100)[0]

        self.gru_1_out = gru_1_out

        final_out_recc = T.mean(gru_2_out, axis=0)

        h_out_1 = DenseLayer((mb_size * 2, num_hidden),
                             num_units=num_hidden,
                             nonlinearity=lasagne.nonlinearities.rectify)
        h_out_2 = DenseLayer((mb_size * 2, num_hidden),
                             num_units=num_hidden,
                             nonlinearity=lasagne.nonlinearities.rectify)
        h_out_4 = DenseLayer((mb_size * 2, num_hidden),
                             num_units=1,
                             nonlinearity=None)

        h_out_1_value = dropout(h_out_1.get_output_for(final_out_recc), 1.0)
        h_out_2_value = dropout(h_out_2.get_output_for(h_out_1_value), 1.0)
        h_out_4_value = h_out_4.get_output_for(h_out_2_value)

        raw_y = T.clip(h_out_4_value, -10.0, 10.0)

        classification = T.nnet.sigmoid(raw_y)

        self.accuracy = T.mean(
            T.eq(target,
                 T.gt(classification, 0.5).flatten()))

        p_real = classification[0:mb_size]
        p_gen = classification[mb_size:mb_size * 2]

        self.d_cost_real = bce(p_real, T.ones(p_real.shape)).mean()
        self.d_cost_gen = bce(p_gen, T.zeros(p_gen.shape)).mean()

        self.g_cost_real = bce(p_real, T.zeros(p_gen.shape)).mean()
        self.g_cost_gen = bce(p_gen, T.ones(p_real.shape)).mean()

        #self.g_cost = self.g_cost_gen
        self.g_cost = self.g_cost_real + self.g_cost_gen

        print "pulling both TF and PF togeher"

        self.d_cost = self.d_cost_real + self.d_cost_gen
        #if d_cost < 1.0, use g cost.

        self.d_cost = T.switch(
            T.gt(self.accuracy, 0.95) * T.gt(p_real.mean(), 0.99) *
            T.lt(p_gen.mean(), 0.01), 0.0, self.d_cost)
        '''
        gX = gen(Z, *gen_params)

        p_real = discrim(X, *discrim_params)
        p_gen = discrim(gX, *discrim_params)

        d_cost_real = bce(p_real, T.ones(p_real.shape)).mean()
        d_cost_gen = bce(p_gen, T.zeros(p_gen.shape)).mean()
        g_cost_d = bce(p_gen, T.ones(p_gen.shape)).mean()

        d_cost = d_cost_real + d_cost_gen
        g_cost = g_cost_d

        cost = [g_cost, d_cost, g_cost_d, d_cost_real, d_cost_gen]
        d_updates = d_updater(discrim_params, d_cost)
        g_updates = g_updater(gen_params, g_cost)

        '''

        self.classification = classification

        self.params = []
        self.params += lasagne.layers.get_all_params(h_out_4, trainable=True)
        self.params += lasagne.layers.get_all_params(h_out_1, trainable=True)
        self.params += lasagne.layers.get_all_params(h_out_2, trainable=True)

        #self.params += h_out_1.getParams() + h_out_2.getParams() + h_out_3.getParams()

        #        self.params += lasagne.layers.get_all_params(h_initial_1,trainable=True)
        #        self.params += lasagne.layers.get_all_params(h_initial_2,trainable=True)

        self.params += gru_params_1.values()
        self.params += gru_params_2.values()
        '''
        layerParams = c1.getParams()
        for paramKey in layerParams:
            self.params += [layerParams[paramKey]]
        layerParams = c2.getParams()
        for paramKey in layerParams:
            self.params += [layerParams[paramKey]]
        layerParams = c3.getParams()
        for paramKey in layerParams:
            self.params += [layerParams[paramKey]]

        '''

        #all_grads = T.grad(self.loss, self.params)
        #for j in range(0, len(all_grads)):
        #    all_grads[j] = T.switch(T.isnan(all_grads[j]), T.zeros_like(all_grads[j]), all_grads[j])
        #self.updates = lasagne.updates.adam(all_grads, self.params, learning_rate = 0.0001, beta1 = 0.5)
        '''
예제 #4
0
    def __init__(self, num_hidden, num_features, seq_length, mb_size,
                 tf_states, rf_states):

        tf_states = T.specify_shape(tf_states,
                                    (seq_length, mb_size, num_features))
        rf_states = T.specify_shape(rf_states,
                                    (seq_length, mb_size, num_features))

        hidden_state_features = T.specify_shape(
            T.concatenate([tf_states, rf_states], axis=1),
            (seq_length, mb_size * 2, num_features))

        gru_params_1 = init_tparams(
            param_init_gru(None, {},
                           prefix="gru1",
                           dim=num_hidden,
                           nin=num_features))
        #gru_params_2 = init_tparams(param_init_gru(None, {}, prefix = "gru2", dim = num_hidden, nin = num_hidden + num_features))
        #gru_params_3 = init_tparams(param_init_gru(None, {}, prefix = "gru3", dim = num_hidden, nin = num_hidden + num_features))

        gru_1_out = gru_layer(gru_params_1,
                              hidden_state_features,
                              None,
                              prefix='gru1')[0]
        #gru_2_out = gru_layer(gru_params_2, T.concatenate([gru_1_out, hidden_state_features], axis = 2), None, prefix = 'gru2', backwards = True)[0]
        #gru_3_out = gru_layer(gru_params_3, T.concatenate([gru_2_out, hidden_state_features], axis = 2), None, prefix = 'gru3')[0]

        final_out_recc = T.specify_shape(T.mean(gru_1_out, axis=0),
                                         (mb_size * 2, num_hidden))

        h_out_1 = DenseLayer((mb_size * 2, num_hidden),
                             num_units=num_hidden,
                             nonlinearity=lasagne.nonlinearities.rectify)
        #h_out_2 = DenseLayer((mb_size * 2, num_hidden), num_units = num_hidden, nonlinearity=lasagne.nonlinearities.rectify)
        #h_out_3 = DenseLayer((mb_size * 2, num_hidden), num_units = num_hidden, nonlinearity=lasagne.nonlinearities.rectify)
        h_out_4 = DenseLayer((mb_size * 2, num_hidden),
                             num_units=1,
                             nonlinearity=None)

        h_out_1_value = h_out_1.get_output_for(final_out_recc)
        h_out_4_value = h_out_4.get_output_for(h_out_1_value)

        raw_y = h_out_4_value
        #raw_y = T.clip(h_out_4_value, -10.0, 10.0)
        classification = T.nnet.sigmoid(raw_y)

        #tf comes before rf.
        p_real = classification[:mb_size]
        p_gen = classification[mb_size:]

        #bce = lambda r,t: t * T.nnet.softplus(-r) + (1 - t) * (r + T.nnet.softplus(-r))

        self.d_cost_real = bce(p_real, 0.9 * T.ones(p_real.shape)).mean()
        self.d_cost_gen = bce(p_gen, 0.1 + T.zeros(p_gen.shape)).mean()
        self.g_cost_d = bce(p_gen, 0.9 * T.ones(p_gen.shape)).mean()
        self.d_cost = self.d_cost_real + self.d_cost_gen
        self.g_cost = self.g_cost_d

        self.classification = classification

        self.params = []
        self.params += lasagne.layers.get_all_params(h_out_4, trainable=True)
        #self.params += lasagne.layers.get_all_params(h_out_3,trainable=True)
        #self.params += lasagne.layers.get_all_params(h_out_2,trainable=True)
        self.params += lasagne.layers.get_all_params(h_out_1, trainable=True)

        self.params += gru_params_1.values()
        #self.params += gru_params_2.values()
        #self.params += gru_params_3.values()

        self.accuracy = T.mean(
            T.eq(T.ones(p_real.shape).flatten(),
                 T.gt(p_real, 0.5).flatten())) + T.mean(
                     T.eq(
                         T.ones(p_gen.shape).flatten(),
                         T.lt(p_gen, 0.5).flatten()))
def build_encoder(tparams,options,trng,use_noise,x_mask=None,sampling=False):
    
    x = tensor.matrix('x',dtype='int64')
    x.tag.test_value = (numpy.random.rand(5,10)*100).astype('int64')
    
    #for the backward rnn, we just need to invert x
    xr = x[::-1]   #此处有区别 xr = x[:,::-1]
    if x_mask is None:  #测试的时候
        xr_mask = None
    else:
        xr_mask = x_mask[::-1]
    
    #时间步数,和样本个数
    n_timesteps = x.shape[0]
    n_samples = x.shape[1]
    
    #是否使用 dropout
    if options['use_dropout']:
        retain_probability_emb = 1-options['dropout_embedding']
        retain_probability_hidden = 1-options['dropout_hidden']
        retain_probability_source = 1-options['dropout_source']
        if sampling:
            if options['model_version'] < 0.1:
                rec_dropout = theano.shared(numpy.array([retain_probability_hidden]*2, dtype='float32'))
                rec_dropout_r = theano.shared(numpy.array([retain_probability_hidden]*2, dtype='float32'))
                emb_dropout = theano.shared(numpy.array([retain_probability_emb]*2, dtype='float32'))
                emb_dropout_r = theano.shared(numpy.array([retain_probability_emb]*2, dtype='float32'))
                source_dropout = theano.shared(numpy.float32(retain_probability_source))
            else:
                rec_dropout = theano.shared(numpy.array([1.]*2, dtype='float32'))
                rec_dropout_r = theano.shared(numpy.array([1.]*2, dtype='float32'))
                emb_dropout = theano.shared(numpy.array([1.]*2, dtype='float32'))
                emb_dropout_r = theano.shared(numpy.array([1.]*2, dtype='float32'))
                source_dropout = theano.shared(numpy.float32(1.))
        else:
            if options['model_version'] < 0.1:
                scaled = False
            else:
                scaled = True
            rec_dropout = shared_dropout_layer((2, n_samples, options['dim']), use_noise, trng, retain_probability_hidden, scaled)
            rec_dropout_r = shared_dropout_layer((2, n_samples, options['dim']), use_noise, trng, retain_probability_hidden, scaled)
            emb_dropout = shared_dropout_layer((2, n_samples, options['dim_word']), use_noise, trng, retain_probability_emb, scaled)
            emb_dropout_r = shared_dropout_layer((2, n_samples, options['dim_word']), use_noise, trng, retain_probability_emb, scaled)
            source_dropout = shared_dropout_layer((n_timesteps, n_samples, 1), use_noise, trng, retain_probability_source, scaled)
            source_dropout = tensor.tile(source_dropout, (1,1,options['dim_word']))
    else:
        rec_dropout = theano.shared(numpy.array([1.]*2, dtype='float32'))
        rec_dropout_r = theano.shared(numpy.array([1.]*2, dtype='float32'))
        emb_dropout = theano.shared(numpy.array([1.]*2, dtype='float32'))
        emb_dropout_r = theano.shared(numpy.array([1.]*2, dtype='float32'))
    
    # word embedding for forward rnn (source)
    emb = tparams['Wemb'][x.flatten()]     #此处不同
    emb = emb.reshape([n_timesteps,n_samples,options['dim_word']])
    if options['use_dropout']:
        emb *= source_dropout
    
    proj = gru_layer(tparams,emb,options,
                     prefix='encoder',
                     mask=x_mask,
                     emb_dropout=emb_dropout,
                     rec_dropout=rec_dropout,
                     profile=profile)
    
    # word embedding for backward rnn (source)
    embr = tparams['Wemb'][xr.flatten()]
    embr = embr.reshape([n_timesteps,n_samples,options['dim_word']])
    if options['use_dropout']:
        if sampling:
            embr *= source_dropout
        else:
            embr *= source_dropout[::-1]
    
    projr = gru_layer(tparams,embr,options,
                      prefix='encoder_r',
                      mask=xr_mask,
                      emb_dropout=emb_dropout_r,
                      rec_dropout=rec_dropout,
                      profile=profile)
    
    #context will be the concatenation of forward and backward rnns
    ctx = concatenate([proj[0],projr[0][::-1]],axis=proj[0].ndim-1)
    
    return x,ctx
예제 #6
0
def rnn_one_step(config, params, observed_sequence_last,
                 observed_sequence_current, use_samples, last_states,
                 last_outputs, last_loss):

    mb_size = config['mb_size']
    num_hidden = config['num_hidden']

    last_states = T.specify_shape(
        last_states, (config['mb_size'], 2 * config['num_hidden']))
    last_outputs = T.specify_shape(last_outputs, (config['mb_size'], ))

    obs_last = T.specify_shape(observed_sequence_last, (mb_size, )).reshape(
        (mb_size, 1))
    obs_curr = T.specify_shape(observed_sequence_current, (mb_size, ))

    obs_use = theano.ifelse.ifelse(use_samples,
                                   last_outputs.reshape((mb_size, 1)),
                                   obs_last)

    last_states_1 = last_states[:, 0:1024]
    last_states_2 = last_states[:, 1024:2048]

    next_states_1 = T.specify_shape(
        gru_layer(params,
                  state_below=obs_use,
                  options=None,
                  prefix='gru1',
                  mask=None,
                  one_step=True,
                  init_state=last_states_1,
                  backwards=False)[0], (mb_size, num_hidden))

    next_states_2 = T.specify_shape(
        gru_layer(params,
                  state_below=next_states_1,
                  options=None,
                  prefix='gru2',
                  mask=None,
                  one_step=True,
                  init_state=last_states_2,
                  backwards=False)[0], (mb_size, num_hidden))

    h1 = T.specify_shape(
        fflayer(params,
                next_states_2,
                options=None,
                prefix='ff_h1',
                activ='lambda x: tensor.maximum(x,0.0)'),
        (mb_size, num_hidden))

    h2 = T.specify_shape(
        fflayer(params,
                h1,
                options=None,
                prefix='ff_h2',
                activ='lambda x: tensor.maximum(x,0.0)'),
        (mb_size, num_hidden))

    y = T.specify_shape(
        fflayer(params, h2, options=None, prefix='ff_1',
                activ='lambda x: x').flatten(), (mb_size, ))
    #y = T.specify_shape(T.sum(next_states, axis = 1), (mb_size,))

    loss = T.sqr(y - obs_curr)

    obs_curr = T.specify_shape(observed_sequence_current, (mb_size, ))

    next_outputs = y

    next_states = T.specify_shape(
        T.concatenate([next_states_1, next_states_2], axis=1),
        (mb_size, num_hidden * 2))

    return next_states, next_outputs, loss