def get_generator_params(config): params = {} params = param_init_gru(options = None, param = params, prefix='gru1', nin=1, dim=config['num_hidden']) params = param_init_gru(options = None, param = params, prefix='gru2', nin=config['num_hidden'], dim=config['num_hidden']) params = param_init_fflayer(options = None, param = params, prefix='ff_1', nin=config['num_hidden'],nout=1,ortho=False) params = param_init_fflayer(options = None, param = params, prefix='ff_h1', nin=config['num_hidden'],nout=config['num_hidden'],ortho=False) params = param_init_fflayer(options = None, param = params, prefix='ff_h2', nin=config['num_hidden'],nout=config['num_hidden'],ortho=False) for paramKey in params: params[paramKey] = theano.shared(params[paramKey]) return params
def __init__(self, num_hidden, num_features, seq_length, mb_size, tf_states, rf_states): tf_states = T.specify_shape(tf_states, (seq_length, mb_size, num_features)) rf_states = T.specify_shape(rf_states, (seq_length, mb_size, num_features)) hidden_state_features = T.specify_shape(T.concatenate([tf_states, rf_states], axis = 1), (seq_length, mb_size * 2, num_features)) gru_params_1 = init_tparams(param_init_gru(None, {}, prefix = "gru1", dim = num_hidden, nin = num_features)) #gru_params_2 = init_tparams(param_init_gru(None, {}, prefix = "gru2", dim = num_hidden, nin = num_hidden + num_features)) #gru_params_3 = init_tparams(param_init_gru(None, {}, prefix = "gru3", dim = num_hidden, nin = num_hidden + num_features)) gru_1_out = gru_layer(gru_params_1, hidden_state_features, None, prefix = 'gru1')[0] #gru_2_out = gru_layer(gru_params_2, T.concatenate([gru_1_out, hidden_state_features], axis = 2), None, prefix = 'gru2', backwards = True)[0] #gru_3_out = gru_layer(gru_params_3, T.concatenate([gru_2_out, hidden_state_features], axis = 2), None, prefix = 'gru3')[0] final_out_recc = T.specify_shape(T.mean(gru_1_out, axis = 0), (mb_size * 2, num_hidden)) h_out_1 = DenseLayer((mb_size * 2, num_hidden), num_units = num_hidden, nonlinearity=lasagne.nonlinearities.rectify) #h_out_2 = DenseLayer((mb_size * 2, num_hidden), num_units = num_hidden, nonlinearity=lasagne.nonlinearities.rectify) #h_out_3 = DenseLayer((mb_size * 2, num_hidden), num_units = num_hidden, nonlinearity=lasagne.nonlinearities.rectify) h_out_4 = DenseLayer((mb_size * 2, num_hidden), num_units = 1, nonlinearity=None) h_out_1_value = h_out_1.get_output_for(final_out_recc) h_out_4_value = h_out_4.get_output_for(h_out_1_value) raw_y = h_out_4_value #raw_y = T.clip(h_out_4_value, -10.0, 10.0) classification = T.nnet.sigmoid(raw_y) #tf comes before rf. p_real = classification[:mb_size] p_gen = classification[mb_size:] #bce = lambda r,t: t * T.nnet.softplus(-r) + (1 - t) * (r + T.nnet.softplus(-r)) self.d_cost_real = bce(p_real, 0.9 * T.ones(p_real.shape)).mean() self.d_cost_gen = bce(p_gen, 0.1 + T.zeros(p_gen.shape)).mean() self.g_cost_d = bce(p_gen, 0.9 * T.ones(p_gen.shape)).mean() self.d_cost = self.d_cost_real + self.d_cost_gen self.g_cost = self.g_cost_d self.classification = classification self.params = [] self.params += lasagne.layers.get_all_params(h_out_4,trainable=True) #self.params += lasagne.layers.get_all_params(h_out_3,trainable=True) #self.params += lasagne.layers.get_all_params(h_out_2,trainable=True) self.params += lasagne.layers.get_all_params(h_out_1,trainable=True) self.params += gru_params_1.values() #self.params += gru_params_2.values() #self.params += gru_params_3.values() self.accuracy = T.mean(T.eq(T.ones(p_real.shape).flatten(), T.gt(p_real, 0.5).flatten())) + T.mean(T.eq(T.ones(p_gen.shape).flatten(), T.lt(p_gen, 0.5).flatten()))
def init_params(options): params = OrderedDict() #embedding params['Wemb'] = norm_weight(options['n_words_src'], options['dim_word']) params['Wemb_dec'] = norm_weight(options['n_words_tgt'], options['dim_word']) #encoder: bidirectional RNN params = param_init_gru(options,params, prefix='encoder', nin=options['dim_word'], dim=options['dim']) params = param_init_gru(options,params, prefix='encoder_r', nin=options['dim_word'], dim=options['dim']) ctxdim = 2*options['dim'] #init state, init cell params = param_init_fflayer(options,params,prefix='ff_state', nin=ctxdim,nout=options['dim']) #decoder params = param_init_gru_cond(options,params, prefix='decoder', nin=options['dim_word'], dim=options['dim'], dimctx=ctxdim) #readout params = param_init_fflayer(options,params,prefix='ff_logit_lstm', nin=options['dim'],nout=options['dim_word'], ortho=False) params = param_init_fflayer(options,params,prefix='ff_logit_prev', nin=options['dim_word'], nout=options['dim_word'],ortho=False) params = param_init_fflayer(options,params,prefix='ff_logit_ctx', nin=ctxdim,nout=options['dim_word'], ortho=False) params = param_init_fflayer(options,params,prefix='ff_logit', nin=options['dim_word'], nout=options['n_words_tgt']) return params
def get_generator_params(config): params = {} params = param_init_gru(options=None, param=params, prefix='gru1', nin=1, dim=config['num_hidden']) params = param_init_gru(options=None, param=params, prefix='gru2', nin=config['num_hidden'], dim=config['num_hidden']) params = param_init_fflayer(options=None, param=params, prefix='ff_1', nin=config['num_hidden'], nout=1, ortho=False) params = param_init_fflayer(options=None, param=params, prefix='ff_h1', nin=config['num_hidden'], nout=config['num_hidden'], ortho=False) params = param_init_fflayer(options=None, param=params, prefix='ff_h2', nin=config['num_hidden'], nout=config['num_hidden'], ortho=False) for paramKey in params: params[paramKey] = theano.shared(params[paramKey]) return params
def __init__(self, num_hidden, num_features, mb_size, hidden_state_features, target): self.mb_size = mb_size #self.seq_length = seq_length #using 0.8 hidden_state_features = dropout(hidden_state_features, 1.0) gru_params_1 = init_tparams( param_init_gru(None, {}, prefix="gru1", dim=num_hidden, nin=num_features)) gru_params_2 = init_tparams( param_init_gru(None, {}, prefix="gru2", dim=num_hidden, nin=num_hidden + num_features)) gru_1_out = gru_layer(gru_params_1, hidden_state_features, None, prefix='gru1', gradient_steps=100)[0] gru_2_out = gru_layer(gru_params_2, T.concatenate([gru_1_out, hidden_state_features], axis=2), None, prefix='gru2', backwards=True, gradient_steps=100)[0] self.gru_1_out = gru_1_out final_out_recc = T.mean(gru_2_out, axis=0) h_out_1 = DenseLayer((mb_size * 2, num_hidden), num_units=num_hidden, nonlinearity=lasagne.nonlinearities.rectify) h_out_2 = DenseLayer((mb_size * 2, num_hidden), num_units=num_hidden, nonlinearity=lasagne.nonlinearities.rectify) h_out_4 = DenseLayer((mb_size * 2, num_hidden), num_units=1, nonlinearity=None) h_out_1_value = dropout(h_out_1.get_output_for(final_out_recc), 1.0) h_out_2_value = dropout(h_out_2.get_output_for(h_out_1_value), 1.0) h_out_4_value = h_out_4.get_output_for(h_out_2_value) raw_y = T.clip(h_out_4_value, -10.0, 10.0) classification = T.nnet.sigmoid(raw_y) self.accuracy = T.mean( T.eq(target, T.gt(classification, 0.5).flatten())) p_real = classification[0:mb_size] p_gen = classification[mb_size:mb_size * 2] self.d_cost_real = bce(p_real, T.ones(p_real.shape)).mean() self.d_cost_gen = bce(p_gen, T.zeros(p_gen.shape)).mean() self.g_cost_real = bce(p_real, T.zeros(p_gen.shape)).mean() self.g_cost_gen = bce(p_gen, T.ones(p_real.shape)).mean() #self.g_cost = self.g_cost_gen self.g_cost = self.g_cost_real + self.g_cost_gen print "pulling both TF and PF togeher" self.d_cost = self.d_cost_real + self.d_cost_gen #if d_cost < 1.0, use g cost. self.d_cost = T.switch( T.gt(self.accuracy, 0.95) * T.gt(p_real.mean(), 0.99) * T.lt(p_gen.mean(), 0.01), 0.0, self.d_cost) ''' gX = gen(Z, *gen_params) p_real = discrim(X, *discrim_params) p_gen = discrim(gX, *discrim_params) d_cost_real = bce(p_real, T.ones(p_real.shape)).mean() d_cost_gen = bce(p_gen, T.zeros(p_gen.shape)).mean() g_cost_d = bce(p_gen, T.ones(p_gen.shape)).mean() d_cost = d_cost_real + d_cost_gen g_cost = g_cost_d cost = [g_cost, d_cost, g_cost_d, d_cost_real, d_cost_gen] d_updates = d_updater(discrim_params, d_cost) g_updates = g_updater(gen_params, g_cost) ''' self.classification = classification self.params = [] self.params += lasagne.layers.get_all_params(h_out_4, trainable=True) self.params += lasagne.layers.get_all_params(h_out_1, trainable=True) self.params += lasagne.layers.get_all_params(h_out_2, trainable=True) #self.params += h_out_1.getParams() + h_out_2.getParams() + h_out_3.getParams() # self.params += lasagne.layers.get_all_params(h_initial_1,trainable=True) # self.params += lasagne.layers.get_all_params(h_initial_2,trainable=True) self.params += gru_params_1.values() self.params += gru_params_2.values() ''' layerParams = c1.getParams() for paramKey in layerParams: self.params += [layerParams[paramKey]] layerParams = c2.getParams() for paramKey in layerParams: self.params += [layerParams[paramKey]] layerParams = c3.getParams() for paramKey in layerParams: self.params += [layerParams[paramKey]] ''' #all_grads = T.grad(self.loss, self.params) #for j in range(0, len(all_grads)): # all_grads[j] = T.switch(T.isnan(all_grads[j]), T.zeros_like(all_grads[j]), all_grads[j]) #self.updates = lasagne.updates.adam(all_grads, self.params, learning_rate = 0.0001, beta1 = 0.5) '''
def __init__(self, num_hidden, num_features, seq_length, mb_size, tf_states, rf_states): tf_states = T.specify_shape(tf_states, (seq_length, mb_size, num_features)) rf_states = T.specify_shape(rf_states, (seq_length, mb_size, num_features)) hidden_state_features = T.specify_shape( T.concatenate([tf_states, rf_states], axis=1), (seq_length, mb_size * 2, num_features)) gru_params_1 = init_tparams( param_init_gru(None, {}, prefix="gru1", dim=num_hidden, nin=num_features)) #gru_params_2 = init_tparams(param_init_gru(None, {}, prefix = "gru2", dim = num_hidden, nin = num_hidden + num_features)) #gru_params_3 = init_tparams(param_init_gru(None, {}, prefix = "gru3", dim = num_hidden, nin = num_hidden + num_features)) gru_1_out = gru_layer(gru_params_1, hidden_state_features, None, prefix='gru1')[0] #gru_2_out = gru_layer(gru_params_2, T.concatenate([gru_1_out, hidden_state_features], axis = 2), None, prefix = 'gru2', backwards = True)[0] #gru_3_out = gru_layer(gru_params_3, T.concatenate([gru_2_out, hidden_state_features], axis = 2), None, prefix = 'gru3')[0] final_out_recc = T.specify_shape(T.mean(gru_1_out, axis=0), (mb_size * 2, num_hidden)) h_out_1 = DenseLayer((mb_size * 2, num_hidden), num_units=num_hidden, nonlinearity=lasagne.nonlinearities.rectify) #h_out_2 = DenseLayer((mb_size * 2, num_hidden), num_units = num_hidden, nonlinearity=lasagne.nonlinearities.rectify) #h_out_3 = DenseLayer((mb_size * 2, num_hidden), num_units = num_hidden, nonlinearity=lasagne.nonlinearities.rectify) h_out_4 = DenseLayer((mb_size * 2, num_hidden), num_units=1, nonlinearity=None) h_out_1_value = h_out_1.get_output_for(final_out_recc) h_out_4_value = h_out_4.get_output_for(h_out_1_value) raw_y = h_out_4_value #raw_y = T.clip(h_out_4_value, -10.0, 10.0) classification = T.nnet.sigmoid(raw_y) #tf comes before rf. p_real = classification[:mb_size] p_gen = classification[mb_size:] #bce = lambda r,t: t * T.nnet.softplus(-r) + (1 - t) * (r + T.nnet.softplus(-r)) self.d_cost_real = bce(p_real, 0.9 * T.ones(p_real.shape)).mean() self.d_cost_gen = bce(p_gen, 0.1 + T.zeros(p_gen.shape)).mean() self.g_cost_d = bce(p_gen, 0.9 * T.ones(p_gen.shape)).mean() self.d_cost = self.d_cost_real + self.d_cost_gen self.g_cost = self.g_cost_d self.classification = classification self.params = [] self.params += lasagne.layers.get_all_params(h_out_4, trainable=True) #self.params += lasagne.layers.get_all_params(h_out_3,trainable=True) #self.params += lasagne.layers.get_all_params(h_out_2,trainable=True) self.params += lasagne.layers.get_all_params(h_out_1, trainable=True) self.params += gru_params_1.values() #self.params += gru_params_2.values() #self.params += gru_params_3.values() self.accuracy = T.mean( T.eq(T.ones(p_real.shape).flatten(), T.gt(p_real, 0.5).flatten())) + T.mean( T.eq( T.ones(p_gen.shape).flatten(), T.lt(p_gen, 0.5).flatten()))