def set_update_function(recurrent_model, output_model, recurrent_optimizer, output_optimizer): # set input data (time_step*num_samples*features) input_seq = tensor.tensor3(name='input_seq', dtype=floatX) # set target data (time_step*num_samples*output_size) target_seq = tensor.tensor3(name='target_seq', dtype=floatX) # truncate grad truncate_grad_step = tensor.scalar(name='truncate_grad_step', dtype='int32') # get hidden data hidden_seq = get_tensor_output(input=[input_seq, None, None, truncate_grad_step], layers=recurrent_model, is_training=True) # get prediction data output_seq = get_tensor_output(input=hidden_seq, layers=output_model, is_training=True) # get cost (here mask_seq is like weight, sum over feature) sequence_cost = tensor.sqr(output_seq-target_seq) sample_cost = tensor.sum(sequence_cost, axis=(0, 2)) # get model updates recurrent_cost = sample_cost.mean() recurrent_updates_dict = get_model_updates(layers=recurrent_model, cost=recurrent_cost, optimizer=recurrent_optimizer, use_grad_clip=1.0) output_cost = sample_cost.mean() output_updates_dict = get_model_updates(layers=output_model, cost=output_cost, optimizer=output_optimizer, use_grad_clip=1.0) update_function_inputs = [input_seq, target_seq, truncate_grad_step] update_function_outputs = [hidden_seq, output_seq, sample_cost] update_function = theano.function(inputs=update_function_inputs, outputs=update_function_outputs, updates=merge_dicts([recurrent_updates_dict, output_updates_dict]), on_unused_input='ignore') return update_function
def set_generator_update_function( generator_rnn_model, generator_mean_model, generator_std_model, generator_optimizer, grad_clipping ): # input data (time length * num_samples * input_dims) source_data = tensor.tensor3(name="source_data", dtype=floatX) target_data = tensor.tensor3(name="target_data", dtype=floatX) # set generator input data list generator_input_data_list = [source_data] # get generator hidden data hidden_data = generator_rnn_model[0].forward(generator_input_data_list, is_training=True)[0] hidden_data = hidden_data.dimshuffle(0, 2, 1, 3).flatten(3) # get generator output data output_mean_data = get_tensor_output(input=hidden_data, layers=generator_mean_model, is_training=True) # output_std_data = get_tensor_output(input=hidden_data, # layers=generator_std_model, # is_training=True) output_std_data = 0.22 # get generator cost (time_length x num_samples x hidden_size) generator_cost = 0.5 * tensor.inv(2.0 * tensor.sqr(output_std_data)) * tensor.sqr(output_mean_data - target_data) generator_cost += tensor.log(output_std_data) + 0.5 * tensor.log(2.0 * numpy.pi) generator_cost = tensor.sum(generator_cost, axis=2) # set generator update generator_updates_cost = generator_cost.mean() generator_updates_dict = get_model_updates( layers=generator_rnn_model + generator_mean_model, cost=generator_updates_cost, optimizer=generator_optimizer, use_grad_clip=grad_clipping, ) gradient_dict = get_model_gradients(generator_rnn_model + generator_mean_model, generator_updates_cost) gradient_norm = 0.0 for grad in gradient_dict: gradient_norm += tensor.sum(grad ** 2) gradient_norm = tensor.sqrt(gradient_norm) # set generator update inputs generator_updates_inputs = [source_data, target_data] # set generator update outputs generator_updates_outputs = [generator_cost, gradient_norm] # set generator update function generator_updates_function = theano.function( inputs=generator_updates_inputs, outputs=generator_updates_outputs, updates=generator_updates_dict, on_unused_input="ignore", ) return generator_updates_function
def set_generator_update_function(generator_rnn_model, generator_mean_model, generator_std_model, generator_optimizer, grad_clipping): # input data (time length * num_samples * input_dims) source_data = tensor.tensor3(name='source_data', dtype=floatX) target_data = tensor.tensor3(name='target_data', dtype=floatX) # set generator input data list generator_input_data_list = [source_data,] # get generator hidden data hidden_data = generator_rnn_model[0].forward(generator_input_data_list, is_training=True)[0] # get generator output data output_mean_data = get_tensor_output(input=hidden_data, layers=generator_mean_model, is_training=True) output_std_data = get_tensor_output(input=hidden_data, layers=generator_std_model, is_training=True) generator_cost = -0.5*tensor.inv(2.0*tensor.sqr(output_std_data))*tensor.sqr(output_mean_data-target_data) generator_cost += -0.5*tensor.log(2.0*tensor.sqr(output_std_data)*numpy.pi) # set generator update generator_updates_cost = generator_cost.mean() generator_updates_dict = get_model_updates(layers=generator_rnn_model+generator_mean_model+generator_std_model, cost=generator_updates_cost, optimizer=generator_optimizer, use_grad_clip=grad_clipping) gradient_dict = get_model_gradients(generator_rnn_model+generator_mean_model+generator_std_model, generator_updates_cost) gradient_norm = 0. for grad in gradient_dict: gradient_norm += tensor.sum(grad**2) gradient_norm = tensor.sqrt(gradient_norm) # set generator update inputs generator_updates_inputs = [source_data, target_data,] # set generator update outputs generator_updates_outputs = [generator_cost, gradient_norm] # set generator update function generator_updates_function = theano.function(inputs=generator_updates_inputs, outputs=generator_updates_outputs, updates=generator_updates_dict, on_unused_input='ignore') return generator_updates_function
def set_updater_function(generator_model, generator_optimizer, generator_grad_clipping): # input sequence data (time_length * num_samples * input_dims) input_sequence = tensor.tensor3(name="input_sequence", dtype=floatX) target_sequence = tensor.tensor3(name="target_sequence", dtype=floatX) lambda_regularizer = tensor.scalar(name="lambda_regularizer", dtype=floatX) # set generator input data list generator_input_data_list = [input_sequence] # get generator output data generator_output = generator_model[0].forward(generator_input_data_list, is_training=True) output_sequence = generator_output[0] data_hidden = generator_output[1] data_cell = generator_output[2] model_hidden = generator_output[3] model_cell = generator_output[4] generator_random = generator_output[-1] # get square error sample_cost = tensor.sqr(target_sequence - output_sequence).sum(axis=2) # get positive phase hidden positive_hid = data_hidden[1:] # get negative phase hidden negative_hid = model_hidden[1:] # get phase diff cost regularizer_cost = tensor.sqr(positive_hid - negative_hid).sum(axis=2) # set generator update updater_cost = sample_cost.mean() + regularizer_cost.mean() * lambda_regularizer updater_dict = get_model_updates(layers=generator_model, cost=updater_cost, optimizer=generator_optimizer) # get generator gradient norm2 generator_gradient_dict = get_model_gradients(generator_model, updater_cost) generator_gradient_norm = 0.0 for grad in generator_gradient_dict: generator_gradient_norm += tensor.sum(grad ** 2) generator_gradient_norm = tensor.sqrt(generator_gradient_norm) # set updater inputs updater_inputs = [input_sequence, target_sequence, lambda_regularizer] # set updater outputs updater_outputs = [sample_cost, regularizer_cost, generator_gradient_norm] # set updater function updater_function = theano.function( inputs=updater_inputs, outputs=updater_outputs, updates=merge_dicts([updater_dict, generator_random]), on_unused_input="ignore", ) return updater_function
def set_update_function(recurrent_model, output_model, optimizer, grad_clip=1.0): # set input data (time_length * num_samples * input_dims) input_data = tensor.tensor3(name='input_data', dtype=floatX) # set input mask (time_length * num_samples) input_mask = tensor.matrix(name='input_mask', dtype=floatX) # set init hidden/cell data (num_samples * hidden_dims) init_hidden = tensor.matrix(name='init_hidden', dtype=floatX) init_cell = tensor.matrix(name='init_cell', dtype=floatX) # truncate grad truncate_grad_step = tensor.scalar(name='truncate_grad_step', dtype='int32') # set target data (time_length * num_samples * output_dims) target_data = tensor.tensor3(name='target_data', dtype=floatX) # get hidden data input_list = [input_data, None, None, None, truncate_grad_step] hidden_data = get_tensor_output(input=input_list, layers=recurrent_model, is_training=True)[0] # get prediction data output_data = get_tensor_output(input=hidden_data, layers=output_model, is_training=True) # get cost (here mask_seq is like weight, sum over feature, and time) sample_cost = tensor.sqr(output_data-target_data) sample_cost = tensor.sum(sample_cost, axis=(0, 2)) # get model updates model_cost = sample_cost.mean() model_updates_dict = get_model_updates(layers=recurrent_model+output_model, cost=model_cost, optimizer=optimizer, use_grad_clip=grad_clip) update_function_inputs = [input_data, input_mask, init_hidden, init_cell, target_data, truncate_grad_step] update_function_outputs = [hidden_data, output_data, sample_cost] update_function = theano.function(inputs=update_function_inputs, outputs=update_function_outputs, updates=model_updates_dict, on_unused_input='ignore') return update_function
def set_updater_function(generator_rnn_model, generator_emb_matrix, generator_optimizer, generator_grad_clipping): # input/target sequence data (time_length * batch_size, list of idx) input_sequence = tensor.matrix(name='input_sequence', dtype=floatX) target_sequence = tensor.matrix(name='target_sequence', dtype=floatX) # generator_emb_matrix.shape = (num_idx, feature_size) input_emb_sequence = generator_emb_matrix[input_sequence] target_emb_sequence = generator_emb_matrix[target_sequence] # set generator input data list generator_input_data_list = [input_emb_sequence,] # get generator output data generator_output = generator_rnn_model[0].forward(generator_input_data_list, is_training=True) generator_sample = generator_output[0] generator_random = generator_output[-1] # get square error square_error = tensor.sqr(target_sequence-generator_sample).sum(axis=2) # set generator update tf_updates_cost = square_error.mean() tf_updates_dict = get_model_updates(layers=generator_rnn_model, cost=tf_updates_cost, optimizer=generator_optimizer) generator_gradient_dict = get_model_gradients(layers=generator_rnn_model, cost=tf_updates_cost) generator_gradient_norm = 0. for grad in generator_gradient_dict: generator_gradient_norm += tensor.sum(grad**2) generator_gradient_norm = tensor.sqrt(generator_gradient_norm) # set tf update inputs tf_updates_inputs = [input_sequence, target_sequence] # set tf update outputs tf_updates_outputs = [square_error, generator_gradient_norm,] # set tf update function tf_updates_function = theano.function(inputs=tf_updates_inputs, outputs=tf_updates_outputs, updates=merge_dicts([tf_updates_dict, generator_random]), on_unused_input='ignore') return tf_updates_function
def set_tf_update_function(generator_model, generator_optimizer, generator_grad_clipping): # input sequence data (time_length * num_samples * input_dims) input_sequence = tensor.tensor3(name='input_sequence', dtype=floatX) target_sequence = tensor.tensor3(name='target_sequence', dtype=floatX) # set generator input data list generator_input_data_list = [input_sequence,] # get generator output data generator_output = generator_model[0].forward(generator_input_data_list, is_training=True) output_sequence = generator_output[0] generator_random = generator_output[-1] # get square error square_error = tensor.sqr(target_sequence-output_sequence).sum(axis=2) # set generator update tf_updates_cost = square_error.mean() tf_updates_dict = get_model_updates(layers=generator_model, cost=tf_updates_cost, optimizer=generator_optimizer) generator_gradient_dict = get_model_gradients(generator_model, tf_updates_cost) # get generator gradient norm2 generator_gradient_norm = 0. for grad in generator_gradient_dict: generator_gradient_norm += tensor.sum(grad**2) generator_gradient_norm = tensor.sqrt(generator_gradient_norm) # set tf update inputs tf_updates_inputs = [input_sequence, target_sequence] # set tf update outputs tf_updates_outputs = [square_error, generator_gradient_norm,] # set tf update function tf_updates_function = theano.function(inputs=tf_updates_inputs, outputs=tf_updates_outputs, updates=merge_dicts([tf_updates_dict, generator_random]), on_unused_input='ignore') return tf_updates_function
def set_gan_update_function(generator_model, discriminator_feature_model, discriminator_output_model, generator_optimizer, discriminator_optimizer, generator_grad_clipping, discriminator_grad_clipping): # input sequence data (time_length * num_samples * input_dims) input_sequence = tensor.tensor3(name='input_sequence', dtype=floatX) target_sequence = tensor.tensor3(name='target_sequence', dtype=floatX) # set generator input data list generator_input_data_list = [input_sequence, 1] # get generator output data generator_output = generator_model[0].forward(generator_input_data_list, is_training=True) output_sequence = generator_output[0] data_hidden = generator_output[1] data_cell = generator_output[2] model_hidden = generator_output[3] model_cell = generator_output[4] generator_random = generator_output[-1] # get conditional hidden condition_hid = data_hidden[:-1] condition_hid = theano.gradient.disconnected_grad(condition_hid) condition_feature = get_tensor_output(condition_hid, discriminator_feature_model, is_training=True) # get positive phase hidden positive_hid = data_hidden[1:] positive_feature = get_tensor_output(positive_hid, discriminator_feature_model, is_training=True) # get negative phase hidden negative_hid = model_hidden[1:] negative_feature = get_tensor_output(negative_hid, discriminator_feature_model, is_training=True) # get positive/negative phase pairs positive_pair = tensor.concatenate([condition_feature, positive_feature], axis=2) negative_pair = tensor.concatenate([condition_feature, negative_feature], axis=2) # get positive pair score positive_score = get_tensor_output(positive_pair, discriminator_output_model, is_training=True) # get negative pair score negative_score = get_tensor_output(negative_pair, discriminator_output_model, is_training=True) # get generator cost (increase negative score) generator_gan_cost = tensor.nnet.binary_crossentropy(output=negative_score, target=tensor.ones_like(negative_score)) # get discriminator cost (increase positive score, decrease negative score) discriminator_gan_cost = (tensor.nnet.binary_crossentropy(output=positive_score, target=tensor.ones_like(positive_score)) + tensor.nnet.binary_crossentropy(output=negative_score, target=tensor.zeros_like(negative_score))) # set generator update generator_updates_cost = generator_gan_cost.mean() generator_updates_dict = get_model_updates(layers=generator_model, cost=generator_updates_cost, optimizer=generator_optimizer, use_grad_clip=generator_grad_clipping) # get generator gradient norm2 generator_gradient_dict = get_model_gradients(generator_model, generator_updates_cost) generator_gradient_norm = 0. for grad in generator_gradient_dict: generator_gradient_norm += tensor.sum(grad**2) generator_gradient_norm = tensor.sqrt(generator_gradient_norm) # set discriminator update discriminator_updates_cost = discriminator_gan_cost.mean() discriminator_updates_dict = get_model_updates(layers=discriminator_feature_model+discriminator_output_model, cost=discriminator_updates_cost, optimizer=discriminator_optimizer, use_grad_clip=discriminator_grad_clipping) discriminator_gradient_dict = get_model_gradients(discriminator_feature_model+discriminator_output_model, discriminator_updates_cost) # get discriminator gradient norm2 discriminator_gradient_norm = 0. for grad in discriminator_gradient_dict: discriminator_gradient_norm += tensor.sum(grad**2) discriminator_gradient_norm = tensor.sqrt(discriminator_gradient_norm) # get mean square error square_error = tensor.sqr(target_sequence-output_sequence).sum(axis=2) # set gan update inputs gan_updates_inputs = [input_sequence, target_sequence] # set gan update outputs gan_updates_outputs = [generator_gan_cost, discriminator_gan_cost, positive_score, negative_score, square_error, generator_gradient_norm, discriminator_gradient_norm,] # set gan update function gan_updates_function = theano.function(inputs=gan_updates_inputs, outputs=gan_updates_outputs, updates=merge_dicts([generator_updates_dict, discriminator_updates_dict, generator_random]), on_unused_input='ignore') return gan_updates_function
def set_gan_update_function(generator_model, discriminator_model, generator_optimizer, discriminator_optimizer, generator_grad_clipping, discriminator_grad_clipping): # input sequence data (time_length * num_samples * input_dims) input_sequence = tensor.tensor3(name='input_sequence', dtype=floatX) target_sequence = tensor.tensor3(name='target_sequence', dtype=floatX) # set generator input data list generator_input_data_list = [input_sequence,] # get generator output data output_data_set = generator_model[0].forward(generator_input_data_list, is_training=True) output_sequence = output_data_set[0] data_hidden = output_data_set[1] data_cell = output_data_set[2] model_hidden = output_data_set[3] model_cell = output_data_set[4] condition_hidden = data_hidden[:-1] condition_cell = data_cell[:-1] condition_hidden = theano.gradient.disconnected_grad(condition_hidden) condition_cell = theano.gradient.disconnected_grad(condition_cell) true_hidden = data_hidden[1:] true_cell = data_cell[1:] false_hidden = model_hidden[1:] false_cell = model_cell[1:] true_pair_hidden = tensor.concatenate([condition_hidden, true_hidden], axis=2) true_pair_cell = tensor.concatenate([condition_cell, true_cell], axis=2) false_pair_hidden = tensor.concatenate([condition_hidden, false_hidden], axis=2) false_pair_cell = tensor.concatenate([condition_cell, false_cell], axis=2) discriminator_true_score = get_tensor_output(true_pair_hidden, discriminator_model, is_training=True) discriminator_false_score = get_tensor_output(false_pair_hidden, discriminator_model, is_training=True) generator_gan_cost = tensor.nnet.binary_crossentropy(output=discriminator_false_score, target=tensor.ones_like(discriminator_false_score)) discriminator_gan_cost = (tensor.nnet.binary_crossentropy(output=discriminator_true_score, target=tensor.ones_like(discriminator_true_score)) + tensor.nnet.binary_crossentropy(output=discriminator_false_score, target=tensor.zeros_like(discriminator_false_score))) # set generator update generator_updates_cost = generator_gan_cost.mean() generator_updates_dict = get_model_updates(layers=generator_model, cost=generator_updates_cost, optimizer=generator_optimizer, use_grad_clip=generator_grad_clipping) generator_gradient_dict = get_model_gradients(generator_model, generator_updates_cost) generator_gradient_norm = 0. for grad in generator_gradient_dict: generator_gradient_norm += tensor.sum(grad**2) generator_gradient_norm = tensor.sqrt(generator_gradient_norm) # set discriminator update discriminator_updates_cost = discriminator_gan_cost.mean() discriminator_updates_dict = get_model_updates(layers=discriminator_model, cost=discriminator_updates_cost, optimizer=discriminator_optimizer, use_grad_clip=discriminator_grad_clipping) discriminator_gradient_dict = get_model_gradients(discriminator_model, discriminator_updates_cost) discriminator_gradient_norm = 0. for grad in discriminator_gradient_dict: discriminator_gradient_norm += tensor.sum(grad**2) discriminator_gradient_norm = tensor.sqrt(discriminator_gradient_norm) square_error = tensor.sqr(target_sequence-output_sequence).sum(axis=2) # set gan update inputs gan_updates_inputs = [input_sequence, target_sequence] # set gan update outputs gan_updates_outputs = [generator_gan_cost, discriminator_gan_cost, discriminator_true_score, discriminator_false_score, square_error, generator_gradient_norm, discriminator_gradient_norm,] # set gan update function gan_updates_function = theano.function(inputs=gan_updates_inputs, outputs=gan_updates_outputs, updates=merge_dicts([generator_updates_dict, discriminator_updates_dict]), on_unused_input='ignore') return gan_updates_function
def set_gan_update_function(generator_rnn_model, generator_output_model, discriminator_rnn_model, discriminator_output_model, generator_optimizer, discriminator_optimizer, generator_grad_clipping, discriminator_grad_clipping): # input sequence data (time_length * num_samples * input_dims) input_sequence = tensor.tensor3(name='input_sequence', dtype=floatX) target_sequence = tensor.tensor3(name='target_sequence', dtype=floatX) # set generator input data list generator_input_data_list = [input_sequence,] # get generator output data generator_output = generator_rnn_model[0].forward(generator_input_data_list, is_training=True) generator_hidden = generator_output[0] generator_cell = generator_output[1] generator_sample = get_tensor_output(generator_hidden, generator_output_model, is_training=True) condition_generator_hidden = theano.gradient.disconnected_grad(generator_hidden) positive_pair = tensor.concatenate([condition_generator_hidden, target_sequence], axis=2) negative_pair = tensor.concatenate([condition_generator_hidden, generator_sample], axis=2) # set generator input data list discriminator_input_data_list = [positive_pair,] discriminator_output = discriminator_rnn_model[0].forward(discriminator_input_data_list, is_training=True) positive_hidden = discriminator_output[0] positive_cell = discriminator_output[1] positive_score = get_tensor_output(positive_hidden, discriminator_output_model, is_training=True) discriminator_input_data_list = [negative_pair,] discriminator_output = discriminator_rnn_model[0].forward(discriminator_input_data_list, is_training=True) negative_hidden = discriminator_output[0] negative_cell = discriminator_output[1] negative_score = get_tensor_output(negative_hidden, discriminator_output_model, is_training=True) generator_gan_cost = tensor.nnet.binary_crossentropy(output=negative_score, target=tensor.ones_like(negative_score)) discriminator_gan_cost = (tensor.nnet.binary_crossentropy(output=positive_score, target=tensor.ones_like(positive_score)) + tensor.nnet.binary_crossentropy(output=negative_score, target=tensor.zeros_like(negative_score))) # set generator update generator_updates_cost = generator_gan_cost.mean() generator_updates_dict = get_model_updates(layers=generator_rnn_model+generator_output_model, cost=generator_updates_cost, optimizer=generator_optimizer, use_grad_clip=generator_grad_clipping) generator_gradient_dict = get_model_gradients(generator_rnn_model+generator_output_model, generator_updates_cost) generator_gradient_norm = 0. for grad in generator_gradient_dict: generator_gradient_norm += tensor.sum(grad**2) generator_gradient_norm = tensor.sqrt(generator_gradient_norm) # set discriminator update discriminator_updates_cost = discriminator_gan_cost.mean() discriminator_updates_dict = get_model_updates(layers=discriminator_rnn_model+discriminator_output_model, cost=discriminator_updates_cost, optimizer=discriminator_optimizer, use_grad_clip=discriminator_grad_clipping) discriminator_gradient_dict = get_model_gradients(discriminator_rnn_model+discriminator_output_model, discriminator_updates_cost) discriminator_gradient_norm = 0. for grad in discriminator_gradient_dict: discriminator_gradient_norm += tensor.sum(grad**2) discriminator_gradient_norm = tensor.sqrt(discriminator_gradient_norm) square_error = tensor.sqr(target_sequence-generator_sample).sum(axis=2) # set gan update inputs gan_updates_inputs = [input_sequence, target_sequence] # set gan update outputs gan_updates_outputs = [generator_gan_cost, discriminator_gan_cost, positive_score, negative_score, square_error, generator_gradient_norm, discriminator_gradient_norm,] # set gan update function gan_updates_function = theano.function(inputs=gan_updates_inputs, outputs=gan_updates_outputs, updates=merge_dicts([generator_updates_dict, discriminator_updates_dict]), on_unused_input='ignore') return gan_updates_function
def set_reg_update_function(generator_model, generator_optimizer, generator_grad_clipping): # input sequence data (time_length * num_samples * input_dims) input_sequence = tensor.tensor3(name='input_sequence', dtype=floatX) target_sequence = tensor.tensor3(name='target_sequence', dtype=floatX) # set generator input data list generator_input_data_list = [input_sequence, 1] # get generator output data generator_output = generator_model[0].forward(generator_input_data_list, is_training=True) output_sequence = generator_output[0] data_hidden = generator_output[1] data_cell = generator_output[2] model_hidden = generator_output[3] model_cell = generator_output[4] generator_random = generator_output[-1] # get positive phase hidden positive_hid = data_hidden[1:] positive_hid = theano.gradient.disconnected_grad(positive_hid) # get negative phase hidden negative_hid = model_hidden[1:] # get phase diff cost phase_diff = tensor.sqr(positive_hid-negative_hid).sum(axis=2) # set generator update generator_updates_cost = phase_diff.mean() generator_updates_dict = get_model_updates(layers=generator_model, cost=generator_updates_cost, optimizer=generator_optimizer, use_grad_clip=generator_grad_clipping) # get generator gradient norm2 generator_gradient_dict = get_model_gradients(generator_model, generator_updates_cost) generator_gradient_norm = 0. for grad in generator_gradient_dict: generator_gradient_norm += tensor.sum(grad**2) generator_gradient_norm = tensor.sqrt(generator_gradient_norm) # get mean square error square_error = tensor.sqr(target_sequence-output_sequence).sum(axis=2) # set reg update inputs reg_updates_inputs = [input_sequence, target_sequence] # set reg update outputs reg_updates_outputs = [phase_diff, square_error, generator_gradient_norm,] # set reg update function reg_updates_function = theano.function(inputs=reg_updates_inputs, outputs=reg_updates_outputs, updates=merge_dicts([generator_updates_dict, generator_random]), on_unused_input='ignore') return reg_updates_function
def set_generator_update_function(generator_rnn_model, generator_output_models, generator_optimizer, grad_clipping): # set source data (time_length * num_samples * input_dims) source_data = tensor.tensor3(name='source_data', dtype=floatX) # set target data (time_length * num_samples * input_dims) target_data = tensor.bmatrix(name='target_data') # set generator input data list generator_input_data_list = [source_data,] # get generator output data hidden_data = generator_rnn_model[0].forward(generator_input_data_list, is_training=True)[0] # for each rnn layer output_data_list = [] for l, output_model in enumerate(generator_output_models): output_data = get_tensor_output(input=hidden_data[l], layers=output_model, is_training=True) output_data_list.append(output_data) output_data = tensor.concatenate(output_data_list[::-1], axis=1) output_sign_data = output_data[:,0] output_sign_data = 2.0*output_sign_data-tensor.ones_like(output_sign_data) output_value_data = output_data[:,1:] output_value_data = output_value_data*tensor.pow(2.0, tensor.arange(output_value_data.shape[1])) output_value_data = output_sign_data*output_value_data.sum(axis=1) target_sign_data = target_data[:,0] target_sign_data = 2.0*target_sign_data-tensor.ones_like(target_sign_data) target_value_data = target_data[:,1:] target_value_data = target_value_data*tensor.pow(2.0, tensor.arange(target_value_data.shape[1])) target_value_data = target_sign_data*target_value_data.sum(axis=1) mse_cost = tensor.sqr(output_value_data, target_value_data) bce_cost = tensor.nnet.binary_crossentropy(output_data, target_data).sum(axis=1) # set generator update generator_updates_cost = generator_cost.mean() generator_updates_dict = get_model_updates(layers=generator_rnn_model+, cost=generator_updates_cost, optimizer=generator_optimizer, use_grad_clip=grad_clipping) # gradient_dict = get_model_gradients(generator_rnn_model, generator_updates_cost) # gradient_norm = 0. # for grad in gradient_dict: # gradient_norm += tensor.sum(grad**2) # gradient_norm = tensor.sqrt(gradient_norm) # set generator update inputs generator_updates_inputs = [init_input_data, init_hidden_data, init_cell_data, sampling_length] # set generator update outputs generator_updates_outputs = [sample_cost_data, generator_cost, ]#gradient_norm] # set generator update function generator_updates_function = theano.function(inputs=generator_updates_inputs, outputs=generator_updates_outputs, updates=merge_dicts([generator_updates_dict, update_data]), on_unused_input='ignore') return generator_updates_function
def set_update_function(recurrent_model, output_model, controller_optimizer, model_optimizer, grad_clip=1.0): # set input data (time_length * num_samples * input_dims) input_data = tensor.tensor3(name='input_data', dtype=floatX) # set target data (time_length * num_samples * output_dims) target_data = tensor.tensor3(name='target_data', dtype=floatX) time_length = input_data.shape[0] num_samples = input_data.shape[1] # cost control parameter controller = theano.shared(value=1.0, name='controller') # get hidden data input_list = [input_data, ] hidden_data = get_lstm_outputs(input_list=input_list, layers=recurrent_model, is_training=True)[-1] # get prediction data output_data = get_tensor_output(input=hidden_data, layers=output_model, is_training=True) # get cost (here mask_seq is like weight, sum over feature, and time) sample_cost = tensor.sqr(output_data-target_data) sample_cost = tensor.sum(input=sample_cost, axis=2).reshape((time_length, num_samples)) # time_step = tensor.arange(start=0, stop=time_length, dtype=floatX).reshape((time_length, 1)) # time_step = tensor.repeat(time_step, num_samples, axis=1) # cost_weight (time_length * num_samples) # cost_weight = tensor.transpose(-controller*time_step) # cost_weight = tensor.nnet.softmax(cost_weight) # cost_weight = tensor.transpose(cost_weight).reshape((time_length, num_samples)) # weighted_sample_cost = cost_weight*sample_cost # get model updates # model_cost = weighted_sample_cost.sum(axis=0).mean() model_cost = sample_cost.max(axis=0).mean() model_updates_dict = get_model_updates(layers=recurrent_model+output_model, cost=model_cost, optimizer=model_optimizer, use_grad_clip=grad_clip) # controller_cost = weighted_sample_cost.var(axis=0).mean() # # controller_updates_dict = OrderedDict() # controller_grad = tensor.grad(cost=controller_cost, wrt=controller) # for param, update in controller_optimizer(controller, controller_grad).iteritems(): # controller_updates_dict[param] = update update_function_inputs = [input_data, target_data] update_function_outputs = [hidden_data, output_data, sample_cost] # update_function_updates = merge_dicts([model_updates_dict, controller_updates_dict]) update_function_updates = model_updates_dict update_function = theano.function(inputs=update_function_inputs, outputs=update_function_outputs, updates=update_function_updates, on_unused_input='ignore') return update_function
def set_generator_update_function(generator_rnn_model, discriminator_rnn_model, discriminator_output_model, generator_optimizer, grad_clipping): # init input data (num_samples *input_dims) init_input_data = tensor.matrix(name='init_input_data', dtype=floatX) # init hidden data (num_layers * num_samples *input_dims) init_hidden_data = tensor.tensor3(name='init_hidden_data', dtype=floatX) # init cell data (num_layers * num_samples *input_dims) init_cell_data = tensor.tensor3(name='init_cell_data', dtype=floatX) # sampling length sampling_length = tensor.scalar(name='sampling_length', dtype='int32') # set generator input data list generator_input_data_list = [init_input_data, init_hidden_data, init_cell_data, sampling_length] # get generator output data output_data = generator_rnn_model[0].forward(generator_input_data_list, is_training=True)[0] # set discriminator input data list discriminator_input_data_list = [output_data,] # get discriminator hidden data discriminator_hidden_data = get_lstm_outputs(input_list=discriminator_input_data_list, layers=discriminator_rnn_model, is_training=True)[-1] # get discriminator output data sample_cost_data = get_tensor_output(input=discriminator_hidden_data, layers=discriminator_output_model, is_training=True)[-1] # get cost based on discriminator (binary cross-entropy over all data) # sum over generator cost over time_length and output_dims, then mean over samples generator_cost = tensor.nnet.binary_crossentropy(output=sample_cost_data, target=tensor.ones_like(sample_cost_data)).sum(axis=1) # set generator update generator_updates_cost = generator_cost.mean() generator_updates_dict = get_model_updates(layers=generator_rnn_model, cost=generator_updates_cost, optimizer=generator_optimizer, use_grad_clip=grad_clipping) # set generator update inputs generator_updates_inputs = [init_input_data, init_hidden_data, init_cell_data, sampling_length] # set generator update outputs generator_updates_outputs = [sample_cost_data, generator_cost] # set generator update function generator_updates_function = theano.function(inputs=generator_updates_inputs, outputs=generator_updates_outputs, updates=generator_updates_dict, on_unused_input='ignore') return generator_updates_function
def set_gan_update_function(generator_rnn_model, discriminator_rnn_model, discriminator_output_model, generator_optimizer, discriminator_optimizer, generator_grad_clipping, discriminator_grad_clipping): # input for loop forward input_sequence = tensor.tensor3(name='input_sequence', dtype=floatX) time_length = tensor.scalar(name='time_length', dtype='int32') # get init data for looping generator_output = generator_rnn_model[0].loop_forward([input_sequence[0], time_length]) generator_sequence = generator_output[0] generator_rand_update = generator_output[-1] discriminator_output = discriminator_rnn_model[0].forward([input_sequence, ], is_training=True) positive_hidden = discriminator_output[0] positive_score = get_tensor_output(positive_hidden, discriminator_output_model, is_training=True) discriminator_output = discriminator_rnn_model[0].forward([generator_sequence, ], is_training=True) negative_hidden = discriminator_output[0] negative_score = get_tensor_output(negative_hidden, discriminator_output_model, is_training=True) generator_gan_cost = tensor.nnet.binary_crossentropy(output=negative_score, target=tensor.ones_like(negative_score)) discriminator_gan_cost = (tensor.nnet.binary_crossentropy(output=positive_score, target=tensor.ones_like(positive_score)) + tensor.nnet.binary_crossentropy(output=negative_score, target=tensor.zeros_like(negative_score))) # set generator update generator_updates_cost = generator_gan_cost.mean() generator_updates_dict = get_model_updates(layers=generator_rnn_model, cost=generator_updates_cost, optimizer=generator_optimizer, use_grad_clip=generator_grad_clipping) generator_gradient_dict = get_model_gradients(layers=generator_rnn_model, cost=generator_updates_cost) generator_gradient_norm = 0. for grad in generator_gradient_dict: generator_gradient_norm += tensor.sum(grad**2) generator_gradient_norm = tensor.sqrt(generator_gradient_norm) # set discriminator update discriminator_updates_cost = discriminator_gan_cost.mean() discriminator_updates_dict = get_model_updates(layers=discriminator_rnn_model+discriminator_output_model, cost=discriminator_updates_cost, optimizer=discriminator_optimizer, use_grad_clip=discriminator_grad_clipping) discriminator_gradient_dict = get_model_gradients(layers=discriminator_rnn_model+discriminator_output_model, cost=discriminator_updates_cost) discriminator_gradient_norm = 0. for grad in discriminator_gradient_dict: discriminator_gradient_norm += tensor.sum(grad**2) discriminator_gradient_norm = tensor.sqrt(discriminator_gradient_norm) # set gan update inputs gan_updates_inputs = [input_sequence, time_length] # set gan update outputs gan_updates_outputs = [generator_gan_cost, discriminator_gan_cost, positive_score, negative_score, generator_gradient_norm, discriminator_gradient_norm,] # set gan update function gan_updates_function = theano.function(inputs=gan_updates_inputs, outputs=gan_updates_outputs, updates=merge_dicts([generator_updates_dict, discriminator_updates_dict, generator_rand_update]), on_unused_input='ignore') return gan_updates_function