Ejemplo n.º 1
0
def set_update_function(recurrent_model,
                        output_model,
                        recurrent_optimizer,
                        output_optimizer):
    # set input data (time_step*num_samples*features)
    input_seq   = tensor.tensor3(name='input_seq', dtype=floatX)
    # set target data (time_step*num_samples*output_size)
    target_seq  = tensor.tensor3(name='target_seq', dtype=floatX)

    # truncate grad
    truncate_grad_step = tensor.scalar(name='truncate_grad_step', dtype='int32')

    # get hidden data
    hidden_seq = get_tensor_output(input=[input_seq, None, None, truncate_grad_step], layers=recurrent_model, is_training=True)
    # get prediction data
    output_seq = get_tensor_output(input=hidden_seq, layers=output_model, is_training=True)

    # get cost (here mask_seq is like weight, sum over feature)
    sequence_cost = tensor.sqr(output_seq-target_seq)
    sample_cost   = tensor.sum(sequence_cost, axis=(0, 2))

    # get model updates
    recurrent_cost         = sample_cost.mean()
    recurrent_updates_dict = get_model_updates(layers=recurrent_model,
                                               cost=recurrent_cost,
                                               optimizer=recurrent_optimizer,
                                               use_grad_clip=1.0)

    output_cost         = sample_cost.mean()
    output_updates_dict = get_model_updates(layers=output_model,
                                            cost=output_cost,
                                            optimizer=output_optimizer,
                                            use_grad_clip=1.0)

    update_function_inputs  = [input_seq,
                               target_seq,
                               truncate_grad_step]
    update_function_outputs = [hidden_seq,
                               output_seq,
                               sample_cost]

    update_function = theano.function(inputs=update_function_inputs,
                                      outputs=update_function_outputs,
                                      updates=merge_dicts([recurrent_updates_dict, output_updates_dict]),
                                      on_unused_input='ignore')

    return update_function
Ejemplo n.º 2
0
def set_generator_update_function(
    generator_rnn_model, generator_mean_model, generator_std_model, generator_optimizer, grad_clipping
):

    # input data (time length * num_samples * input_dims)
    source_data = tensor.tensor3(name="source_data", dtype=floatX)

    target_data = tensor.tensor3(name="target_data", dtype=floatX)

    # set generator input data list
    generator_input_data_list = [source_data]

    # get generator hidden data
    hidden_data = generator_rnn_model[0].forward(generator_input_data_list, is_training=True)[0]
    hidden_data = hidden_data.dimshuffle(0, 2, 1, 3).flatten(3)

    # get generator output data
    output_mean_data = get_tensor_output(input=hidden_data, layers=generator_mean_model, is_training=True)
    # output_std_data = get_tensor_output(input=hidden_data,
    #                                     layers=generator_std_model,
    #                                     is_training=True)
    output_std_data = 0.22
    # get generator cost (time_length x num_samples x hidden_size)
    generator_cost = 0.5 * tensor.inv(2.0 * tensor.sqr(output_std_data)) * tensor.sqr(output_mean_data - target_data)
    generator_cost += tensor.log(output_std_data) + 0.5 * tensor.log(2.0 * numpy.pi)
    generator_cost = tensor.sum(generator_cost, axis=2)

    # set generator update
    generator_updates_cost = generator_cost.mean()
    generator_updates_dict = get_model_updates(
        layers=generator_rnn_model + generator_mean_model,
        cost=generator_updates_cost,
        optimizer=generator_optimizer,
        use_grad_clip=grad_clipping,
    )

    gradient_dict = get_model_gradients(generator_rnn_model + generator_mean_model, generator_updates_cost)
    gradient_norm = 0.0
    for grad in gradient_dict:
        gradient_norm += tensor.sum(grad ** 2)
    gradient_norm = tensor.sqrt(gradient_norm)

    # set generator update inputs
    generator_updates_inputs = [source_data, target_data]

    # set generator update outputs
    generator_updates_outputs = [generator_cost, gradient_norm]

    # set generator update function
    generator_updates_function = theano.function(
        inputs=generator_updates_inputs,
        outputs=generator_updates_outputs,
        updates=generator_updates_dict,
        on_unused_input="ignore",
    )

    return generator_updates_function
Ejemplo n.º 3
0
def set_generator_update_function(generator_rnn_model,
                                  generator_mean_model,
                                  generator_std_model,
                                  generator_optimizer,
                                  grad_clipping):

    # input data (time length * num_samples * input_dims)
    source_data = tensor.tensor3(name='source_data',
                                 dtype=floatX)

    target_data = tensor.tensor3(name='target_data',
                                 dtype=floatX)

    # set generator input data list
    generator_input_data_list = [source_data,]

    # get generator hidden data
    hidden_data = generator_rnn_model[0].forward(generator_input_data_list, is_training=True)[0]

    # get generator output data
    output_mean_data = get_tensor_output(input=hidden_data,
                                         layers=generator_mean_model,
                                         is_training=True)
    output_std_data = get_tensor_output(input=hidden_data,
                                        layers=generator_std_model,
                                        is_training=True)

    generator_cost  = -0.5*tensor.inv(2.0*tensor.sqr(output_std_data))*tensor.sqr(output_mean_data-target_data)
    generator_cost += -0.5*tensor.log(2.0*tensor.sqr(output_std_data)*numpy.pi)

    # set generator update
    generator_updates_cost = generator_cost.mean()
    generator_updates_dict = get_model_updates(layers=generator_rnn_model+generator_mean_model+generator_std_model,
                                               cost=generator_updates_cost,
                                               optimizer=generator_optimizer,
                                               use_grad_clip=grad_clipping)

    gradient_dict  = get_model_gradients(generator_rnn_model+generator_mean_model+generator_std_model, generator_updates_cost)
    gradient_norm  = 0.
    for grad in gradient_dict:
        gradient_norm += tensor.sum(grad**2)
        gradient_norm  = tensor.sqrt(gradient_norm)

    # set generator update inputs
    generator_updates_inputs  = [source_data,
                                 target_data,]

    # set generator update outputs
    generator_updates_outputs = [generator_cost, gradient_norm]

    # set generator update function
    generator_updates_function = theano.function(inputs=generator_updates_inputs,
                                                 outputs=generator_updates_outputs,
                                                 updates=generator_updates_dict,
                                                 on_unused_input='ignore')

    return generator_updates_function
Ejemplo n.º 4
0
def set_updater_function(generator_model, generator_optimizer, generator_grad_clipping):
    # input sequence data (time_length * num_samples * input_dims)
    input_sequence = tensor.tensor3(name="input_sequence", dtype=floatX)
    target_sequence = tensor.tensor3(name="target_sequence", dtype=floatX)
    lambda_regularizer = tensor.scalar(name="lambda_regularizer", dtype=floatX)

    # set generator input data list
    generator_input_data_list = [input_sequence]

    # get generator output data
    generator_output = generator_model[0].forward(generator_input_data_list, is_training=True)
    output_sequence = generator_output[0]
    data_hidden = generator_output[1]
    data_cell = generator_output[2]
    model_hidden = generator_output[3]
    model_cell = generator_output[4]
    generator_random = generator_output[-1]

    # get square error
    sample_cost = tensor.sqr(target_sequence - output_sequence).sum(axis=2)

    # get positive phase hidden
    positive_hid = data_hidden[1:]

    # get negative phase hidden
    negative_hid = model_hidden[1:]

    # get phase diff cost
    regularizer_cost = tensor.sqr(positive_hid - negative_hid).sum(axis=2)

    # set generator update
    updater_cost = sample_cost.mean() + regularizer_cost.mean() * lambda_regularizer
    updater_dict = get_model_updates(layers=generator_model, cost=updater_cost, optimizer=generator_optimizer)

    # get generator gradient norm2
    generator_gradient_dict = get_model_gradients(generator_model, updater_cost)
    generator_gradient_norm = 0.0
    for grad in generator_gradient_dict:
        generator_gradient_norm += tensor.sum(grad ** 2)
    generator_gradient_norm = tensor.sqrt(generator_gradient_norm)

    # set updater inputs
    updater_inputs = [input_sequence, target_sequence, lambda_regularizer]

    # set updater outputs
    updater_outputs = [sample_cost, regularizer_cost, generator_gradient_norm]

    # set updater function
    updater_function = theano.function(
        inputs=updater_inputs,
        outputs=updater_outputs,
        updates=merge_dicts([updater_dict, generator_random]),
        on_unused_input="ignore",
    )

    return updater_function
Ejemplo n.º 5
0
def set_update_function(recurrent_model,
                        output_model,
                        optimizer,
                        grad_clip=1.0):
    # set input data (time_length * num_samples * input_dims)
    input_data  = tensor.tensor3(name='input_data', dtype=floatX)
    # set input mask (time_length * num_samples)
    input_mask  = tensor.matrix(name='input_mask', dtype=floatX)
    # set init hidden/cell data (num_samples * hidden_dims)
    init_hidden = tensor.matrix(name='init_hidden', dtype=floatX)
    init_cell   = tensor.matrix(name='init_cell', dtype=floatX)

    # truncate grad
    truncate_grad_step = tensor.scalar(name='truncate_grad_step', dtype='int32')
    # set target data (time_length * num_samples * output_dims)
    target_data = tensor.tensor3(name='target_data', dtype=floatX)

    # get hidden data
    input_list  = [input_data, None, None, None, truncate_grad_step]
    hidden_data = get_tensor_output(input=input_list,
                                    layers=recurrent_model,
                                    is_training=True)[0]
    # get prediction data
    output_data = get_tensor_output(input=hidden_data,
                                    layers=output_model,
                                    is_training=True)

    # get cost (here mask_seq is like weight, sum over feature, and time)
    sample_cost = tensor.sqr(output_data-target_data)
    sample_cost = tensor.sum(sample_cost, axis=(0, 2))

    # get model updates
    model_cost         = sample_cost.mean()
    model_updates_dict = get_model_updates(layers=recurrent_model+output_model,
                                           cost=model_cost,
                                           optimizer=optimizer,
                                           use_grad_clip=grad_clip)

    update_function_inputs  = [input_data,
                               input_mask,
                               init_hidden,
                               init_cell,
                               target_data,
                               truncate_grad_step]
    update_function_outputs = [hidden_data,
                               output_data,
                               sample_cost]

    update_function = theano.function(inputs=update_function_inputs,
                                      outputs=update_function_outputs,
                                      updates=model_updates_dict,
                                      on_unused_input='ignore')

    return update_function
Ejemplo n.º 6
0
def set_updater_function(generator_rnn_model,
                         generator_emb_matrix,
                         generator_optimizer,
                         generator_grad_clipping):

    # input/target sequence data (time_length * batch_size, list of idx)
    input_sequence  = tensor.matrix(name='input_sequence',
                                     dtype=floatX)
    target_sequence = tensor.matrix(name='target_sequence',
                                     dtype=floatX)

    # generator_emb_matrix.shape = (num_idx, feature_size)
    input_emb_sequence  = generator_emb_matrix[input_sequence]
    target_emb_sequence = generator_emb_matrix[target_sequence]

    # set generator input data list
    generator_input_data_list = [input_emb_sequence,]

    # get generator output data
    generator_output = generator_rnn_model[0].forward(generator_input_data_list, is_training=True)
    generator_sample = generator_output[0]
    generator_random = generator_output[-1]

    # get square error
    square_error = tensor.sqr(target_sequence-generator_sample).sum(axis=2)

    # set generator update
    tf_updates_cost = square_error.mean()
    tf_updates_dict = get_model_updates(layers=generator_rnn_model,
                                        cost=tf_updates_cost,
                                        optimizer=generator_optimizer)

    generator_gradient_dict  = get_model_gradients(layers=generator_rnn_model,
                                                   cost=tf_updates_cost)
    generator_gradient_norm  = 0.
    for grad in generator_gradient_dict:
        generator_gradient_norm += tensor.sum(grad**2)
    generator_gradient_norm  = tensor.sqrt(generator_gradient_norm)

    # set tf update inputs
    tf_updates_inputs  = [input_sequence,
                          target_sequence]

    # set tf update outputs
    tf_updates_outputs = [square_error,
                          generator_gradient_norm,]

    # set tf update function
    tf_updates_function = theano.function(inputs=tf_updates_inputs,
                                          outputs=tf_updates_outputs,
                                          updates=merge_dicts([tf_updates_dict, generator_random]),
                                          on_unused_input='ignore')

    return tf_updates_function
Ejemplo n.º 7
0
def set_tf_update_function(generator_model,
                           generator_optimizer,
                           generator_grad_clipping):

    # input sequence data (time_length * num_samples * input_dims)
    input_sequence  = tensor.tensor3(name='input_sequence',
                                     dtype=floatX)
    target_sequence  = tensor.tensor3(name='target_sequence',
                                      dtype=floatX)
    # set generator input data list
    generator_input_data_list = [input_sequence,]

    # get generator output data
    generator_output = generator_model[0].forward(generator_input_data_list, is_training=True)
    output_sequence  = generator_output[0]
    generator_random = generator_output[-1]

    # get square error
    square_error = tensor.sqr(target_sequence-output_sequence).sum(axis=2)

    # set generator update
    tf_updates_cost = square_error.mean()
    tf_updates_dict = get_model_updates(layers=generator_model,
                                        cost=tf_updates_cost,
                                        optimizer=generator_optimizer)

    generator_gradient_dict  = get_model_gradients(generator_model, tf_updates_cost)

    # get generator gradient norm2
    generator_gradient_norm  = 0.
    for grad in generator_gradient_dict:
        generator_gradient_norm += tensor.sum(grad**2)
    generator_gradient_norm  = tensor.sqrt(generator_gradient_norm)

    # set tf update inputs
    tf_updates_inputs  = [input_sequence,
                          target_sequence]

    # set tf update outputs
    tf_updates_outputs = [square_error,
                          generator_gradient_norm,]

    # set tf update function
    tf_updates_function = theano.function(inputs=tf_updates_inputs,
                                          outputs=tf_updates_outputs,
                                          updates=merge_dicts([tf_updates_dict,
                                                               generator_random]),
                                          on_unused_input='ignore')

    return tf_updates_function
Ejemplo n.º 8
0
def set_gan_update_function(generator_model,
                            discriminator_feature_model,
                            discriminator_output_model,
                            generator_optimizer,
                            discriminator_optimizer,
                            generator_grad_clipping,
                            discriminator_grad_clipping):

    # input sequence data (time_length * num_samples * input_dims)
    input_sequence  = tensor.tensor3(name='input_sequence',
                                     dtype=floatX)
    target_sequence  = tensor.tensor3(name='target_sequence',
                                      dtype=floatX)

    # set generator input data list
    generator_input_data_list = [input_sequence,
                                 1]

    # get generator output data
    generator_output = generator_model[0].forward(generator_input_data_list,
                                                  is_training=True)
    output_sequence  = generator_output[0]
    data_hidden      = generator_output[1]
    data_cell        = generator_output[2]
    model_hidden     = generator_output[3]
    model_cell       = generator_output[4]
    generator_random = generator_output[-1]

    # get conditional hidden
    condition_hid    = data_hidden[:-1]
    condition_hid    = theano.gradient.disconnected_grad(condition_hid)
    condition_feature = get_tensor_output(condition_hid,
                                          discriminator_feature_model,
                                          is_training=True)

    # get positive phase hidden
    positive_hid     = data_hidden[1:]
    positive_feature = get_tensor_output(positive_hid,
                                         discriminator_feature_model,
                                         is_training=True)
    # get negative phase hidden
    negative_hid     = model_hidden[1:]
    negative_feature = get_tensor_output(negative_hid,
                                         discriminator_feature_model,
                                         is_training=True)

    # get positive/negative phase pairs
    positive_pair = tensor.concatenate([condition_feature, positive_feature], axis=2)
    negative_pair = tensor.concatenate([condition_feature, negative_feature], axis=2)

    # get positive pair score
    positive_score = get_tensor_output(positive_pair,
                                       discriminator_output_model,
                                       is_training=True)
    # get negative pair score
    negative_score = get_tensor_output(negative_pair,
                                       discriminator_output_model,
                                       is_training=True)

    # get generator cost (increase negative score)
    generator_gan_cost = tensor.nnet.binary_crossentropy(output=negative_score,
                                                         target=tensor.ones_like(negative_score))

    # get discriminator cost (increase positive score, decrease negative score)
    discriminator_gan_cost = (tensor.nnet.binary_crossentropy(output=positive_score,
                                                              target=tensor.ones_like(positive_score)) +
                              tensor.nnet.binary_crossentropy(output=negative_score,
                                                              target=tensor.zeros_like(negative_score)))

    # set generator update
    generator_updates_cost = generator_gan_cost.mean()
    generator_updates_dict = get_model_updates(layers=generator_model,
                                               cost=generator_updates_cost,
                                               optimizer=generator_optimizer,
                                               use_grad_clip=generator_grad_clipping)

    # get generator gradient norm2
    generator_gradient_dict  = get_model_gradients(generator_model, generator_updates_cost)
    generator_gradient_norm  = 0.
    for grad in generator_gradient_dict:
        generator_gradient_norm += tensor.sum(grad**2)
    generator_gradient_norm  = tensor.sqrt(generator_gradient_norm)

    # set discriminator update
    discriminator_updates_cost = discriminator_gan_cost.mean()
    discriminator_updates_dict = get_model_updates(layers=discriminator_feature_model+discriminator_output_model,
                                                   cost=discriminator_updates_cost,
                                                   optimizer=discriminator_optimizer,
                                                   use_grad_clip=discriminator_grad_clipping)

    discriminator_gradient_dict  = get_model_gradients(discriminator_feature_model+discriminator_output_model,
                                                       discriminator_updates_cost)

    # get discriminator gradient norm2
    discriminator_gradient_norm  = 0.
    for grad in discriminator_gradient_dict:
        discriminator_gradient_norm += tensor.sum(grad**2)
    discriminator_gradient_norm  = tensor.sqrt(discriminator_gradient_norm)

    # get mean square error
    square_error = tensor.sqr(target_sequence-output_sequence).sum(axis=2)

    # set gan update inputs
    gan_updates_inputs  = [input_sequence,
                           target_sequence]

    # set gan update outputs
    gan_updates_outputs = [generator_gan_cost,
                           discriminator_gan_cost,
                           positive_score,
                           negative_score,
                           square_error,
                           generator_gradient_norm,
                           discriminator_gradient_norm,]

    # set gan update function
    gan_updates_function = theano.function(inputs=gan_updates_inputs,
                                           outputs=gan_updates_outputs,
                                           updates=merge_dicts([generator_updates_dict,
                                                                discriminator_updates_dict,
                                                                generator_random]),
                                           on_unused_input='ignore')

    return gan_updates_function
Ejemplo n.º 9
0
def set_gan_update_function(generator_model,
                            discriminator_model,
                            generator_optimizer,
                            discriminator_optimizer,
                            generator_grad_clipping,
                            discriminator_grad_clipping):

    # input sequence data (time_length * num_samples * input_dims)
    input_sequence  = tensor.tensor3(name='input_sequence',
                                     dtype=floatX)
    target_sequence  = tensor.tensor3(name='target_sequence',
                                    dtype=floatX)
    # set generator input data list
    generator_input_data_list = [input_sequence,]

    # get generator output data
    output_data_set = generator_model[0].forward(generator_input_data_list, is_training=True)
    output_sequence = output_data_set[0]
    data_hidden     = output_data_set[1]
    data_cell       = output_data_set[2]
    model_hidden    = output_data_set[3]
    model_cell      = output_data_set[4]

    condition_hidden = data_hidden[:-1]
    condition_cell   = data_cell[:-1]

    condition_hidden = theano.gradient.disconnected_grad(condition_hidden)
    condition_cell   = theano.gradient.disconnected_grad(condition_cell)

    true_hidden = data_hidden[1:]
    true_cell   = data_cell[1:]

    false_hidden = model_hidden[1:]
    false_cell   = model_cell[1:]

    true_pair_hidden = tensor.concatenate([condition_hidden, true_hidden], axis=2)
    true_pair_cell   = tensor.concatenate([condition_cell, true_cell], axis=2)

    false_pair_hidden = tensor.concatenate([condition_hidden, false_hidden], axis=2)
    false_pair_cell   = tensor.concatenate([condition_cell, false_cell], axis=2)

    discriminator_true_score  = get_tensor_output(true_pair_hidden, discriminator_model, is_training=True)
    discriminator_false_score = get_tensor_output(false_pair_hidden, discriminator_model, is_training=True)


    generator_gan_cost = tensor.nnet.binary_crossentropy(output=discriminator_false_score,
                                                         target=tensor.ones_like(discriminator_false_score))

    discriminator_gan_cost = (tensor.nnet.binary_crossentropy(output=discriminator_true_score,
                                                              target=tensor.ones_like(discriminator_true_score)) +
                              tensor.nnet.binary_crossentropy(output=discriminator_false_score,
                                                              target=tensor.zeros_like(discriminator_false_score)))

    # set generator update
    generator_updates_cost = generator_gan_cost.mean()
    generator_updates_dict = get_model_updates(layers=generator_model,
                                               cost=generator_updates_cost,
                                               optimizer=generator_optimizer,
                                               use_grad_clip=generator_grad_clipping)

    generator_gradient_dict  = get_model_gradients(generator_model, generator_updates_cost)
    generator_gradient_norm  = 0.
    for grad in generator_gradient_dict:
        generator_gradient_norm += tensor.sum(grad**2)
    generator_gradient_norm  = tensor.sqrt(generator_gradient_norm)

    # set discriminator update
    discriminator_updates_cost = discriminator_gan_cost.mean()
    discriminator_updates_dict = get_model_updates(layers=discriminator_model,
                                                   cost=discriminator_updates_cost,
                                                   optimizer=discriminator_optimizer,
                                                   use_grad_clip=discriminator_grad_clipping)

    discriminator_gradient_dict  = get_model_gradients(discriminator_model, discriminator_updates_cost)
    discriminator_gradient_norm  = 0.
    for grad in discriminator_gradient_dict:
        discriminator_gradient_norm += tensor.sum(grad**2)
    discriminator_gradient_norm  = tensor.sqrt(discriminator_gradient_norm)

    square_error = tensor.sqr(target_sequence-output_sequence).sum(axis=2)

    # set gan update inputs
    gan_updates_inputs  = [input_sequence,
                           target_sequence]

    # set gan update outputs
    gan_updates_outputs = [generator_gan_cost,
                           discriminator_gan_cost,
                           discriminator_true_score,
                           discriminator_false_score,
                           square_error,
                           generator_gradient_norm,
                           discriminator_gradient_norm,]

    # set gan update function
    gan_updates_function = theano.function(inputs=gan_updates_inputs,
                                           outputs=gan_updates_outputs,
                                           updates=merge_dicts([generator_updates_dict, discriminator_updates_dict]),
                                           on_unused_input='ignore')

    return gan_updates_function
Ejemplo n.º 10
0
def set_gan_update_function(generator_rnn_model,
                            generator_output_model,
                            discriminator_rnn_model,
                            discriminator_output_model,
                            generator_optimizer,
                            discriminator_optimizer,
                            generator_grad_clipping,
                            discriminator_grad_clipping):

    # input sequence data (time_length * num_samples * input_dims)
    input_sequence  = tensor.tensor3(name='input_sequence',
                                     dtype=floatX)
    target_sequence  = tensor.tensor3(name='target_sequence',
                                      dtype=floatX)
    # set generator input data list
    generator_input_data_list = [input_sequence,]

    # get generator output data
    generator_output = generator_rnn_model[0].forward(generator_input_data_list, is_training=True)
    generator_hidden = generator_output[0]
    generator_cell   = generator_output[1]

    generator_sample = get_tensor_output(generator_hidden, generator_output_model, is_training=True)

    condition_generator_hidden = theano.gradient.disconnected_grad(generator_hidden)

    positive_pair = tensor.concatenate([condition_generator_hidden, target_sequence], axis=2)
    negative_pair = tensor.concatenate([condition_generator_hidden, generator_sample], axis=2)

    # set generator input data list
    discriminator_input_data_list = [positive_pair,]
    discriminator_output = discriminator_rnn_model[0].forward(discriminator_input_data_list, is_training=True)
    positive_hidden = discriminator_output[0]
    positive_cell   = discriminator_output[1]
    positive_score  = get_tensor_output(positive_hidden, discriminator_output_model, is_training=True)

    discriminator_input_data_list = [negative_pair,]
    discriminator_output = discriminator_rnn_model[0].forward(discriminator_input_data_list, is_training=True)
    negative_hidden = discriminator_output[0]
    negative_cell   = discriminator_output[1]
    negative_score  = get_tensor_output(negative_hidden, discriminator_output_model, is_training=True)


    generator_gan_cost = tensor.nnet.binary_crossentropy(output=negative_score,
                                                         target=tensor.ones_like(negative_score))

    discriminator_gan_cost = (tensor.nnet.binary_crossentropy(output=positive_score,
                                                              target=tensor.ones_like(positive_score)) +
                              tensor.nnet.binary_crossentropy(output=negative_score,
                                                              target=tensor.zeros_like(negative_score)))

    # set generator update
    generator_updates_cost = generator_gan_cost.mean()
    generator_updates_dict = get_model_updates(layers=generator_rnn_model+generator_output_model,
                                               cost=generator_updates_cost,
                                               optimizer=generator_optimizer,
                                               use_grad_clip=generator_grad_clipping)

    generator_gradient_dict  = get_model_gradients(generator_rnn_model+generator_output_model, generator_updates_cost)
    generator_gradient_norm  = 0.
    for grad in generator_gradient_dict:
        generator_gradient_norm += tensor.sum(grad**2)
    generator_gradient_norm  = tensor.sqrt(generator_gradient_norm)

    # set discriminator update
    discriminator_updates_cost = discriminator_gan_cost.mean()
    discriminator_updates_dict = get_model_updates(layers=discriminator_rnn_model+discriminator_output_model,
                                                   cost=discriminator_updates_cost,
                                                   optimizer=discriminator_optimizer,
                                                   use_grad_clip=discriminator_grad_clipping)

    discriminator_gradient_dict  = get_model_gradients(discriminator_rnn_model+discriminator_output_model, discriminator_updates_cost)
    discriminator_gradient_norm  = 0.
    for grad in discriminator_gradient_dict:
        discriminator_gradient_norm += tensor.sum(grad**2)
    discriminator_gradient_norm  = tensor.sqrt(discriminator_gradient_norm)

    square_error = tensor.sqr(target_sequence-generator_sample).sum(axis=2)

    # set gan update inputs
    gan_updates_inputs  = [input_sequence,
                           target_sequence]

    # set gan update outputs
    gan_updates_outputs = [generator_gan_cost,
                           discriminator_gan_cost,
                           positive_score,
                           negative_score,
                           square_error,
                           generator_gradient_norm,
                           discriminator_gradient_norm,]

    # set gan update function
    gan_updates_function = theano.function(inputs=gan_updates_inputs,
                                           outputs=gan_updates_outputs,
                                           updates=merge_dicts([generator_updates_dict, discriminator_updates_dict]),
                                           on_unused_input='ignore')

    return gan_updates_function
Ejemplo n.º 11
0
def set_reg_update_function(generator_model,
                            generator_optimizer,
                            generator_grad_clipping):

    # input sequence data (time_length * num_samples * input_dims)
    input_sequence  = tensor.tensor3(name='input_sequence',
                                     dtype=floatX)
    target_sequence  = tensor.tensor3(name='target_sequence',
                                      dtype=floatX)

    # set generator input data list
    generator_input_data_list = [input_sequence,
                                 1]

    # get generator output data
    generator_output = generator_model[0].forward(generator_input_data_list,
                                                  is_training=True)
    output_sequence  = generator_output[0]
    data_hidden      = generator_output[1]
    data_cell        = generator_output[2]
    model_hidden     = generator_output[3]
    model_cell       = generator_output[4]
    generator_random = generator_output[-1]

    # get positive phase hidden
    positive_hid     = data_hidden[1:]
    positive_hid     = theano.gradient.disconnected_grad(positive_hid)

    # get negative phase hidden
    negative_hid     = model_hidden[1:]

    # get phase diff cost
    phase_diff = tensor.sqr(positive_hid-negative_hid).sum(axis=2)

    # set generator update
    generator_updates_cost = phase_diff.mean()
    generator_updates_dict = get_model_updates(layers=generator_model,
                                               cost=generator_updates_cost,
                                               optimizer=generator_optimizer,
                                               use_grad_clip=generator_grad_clipping)

    # get generator gradient norm2
    generator_gradient_dict  = get_model_gradients(generator_model, generator_updates_cost)
    generator_gradient_norm  = 0.
    for grad in generator_gradient_dict:
        generator_gradient_norm += tensor.sum(grad**2)
    generator_gradient_norm  = tensor.sqrt(generator_gradient_norm)

    # get mean square error
    square_error = tensor.sqr(target_sequence-output_sequence).sum(axis=2)

    # set reg update inputs
    reg_updates_inputs  = [input_sequence,
                           target_sequence]

    # set reg update outputs
    reg_updates_outputs = [phase_diff,
                           square_error,
                           generator_gradient_norm,]

    # set reg update function
    reg_updates_function = theano.function(inputs=reg_updates_inputs,
                                           outputs=reg_updates_outputs,
                                           updates=merge_dicts([generator_updates_dict,
                                                                generator_random]),
                                           on_unused_input='ignore')

    return reg_updates_function
Ejemplo n.º 12
0
def set_generator_update_function(generator_rnn_model,
                                  generator_output_models,
                                  generator_optimizer,
                                  grad_clipping):
    # set source data (time_length * num_samples * input_dims)
    source_data  = tensor.tensor3(name='source_data',
                                  dtype=floatX)

    # set target data (time_length * num_samples * input_dims)
    target_data  = tensor.bmatrix(name='target_data')

    # set generator input data list
    generator_input_data_list = [source_data,]

    # get generator output data
    hidden_data = generator_rnn_model[0].forward(generator_input_data_list, is_training=True)[0]

    # for each rnn layer
    output_data_list = []
    for l, output_model in enumerate(generator_output_models):
        output_data = get_tensor_output(input=hidden_data[l],
                                        layers=output_model,
                                        is_training=True)
        output_data_list.append(output_data)

    output_data = tensor.concatenate(output_data_list[::-1], axis=1)

    output_sign_data  = output_data[:,0]
    output_sign_data  = 2.0*output_sign_data-tensor.ones_like(output_sign_data)
    output_value_data = output_data[:,1:]
    output_value_data = output_value_data*tensor.pow(2.0, tensor.arange(output_value_data.shape[1]))
    output_value_data = output_sign_data*output_value_data.sum(axis=1)

    target_sign_data  = target_data[:,0]
    target_sign_data  = 2.0*target_sign_data-tensor.ones_like(target_sign_data)
    target_value_data = target_data[:,1:]
    target_value_data = target_value_data*tensor.pow(2.0, tensor.arange(target_value_data.shape[1]))
    target_value_data = target_sign_data*target_value_data.sum(axis=1)

    mse_cost = tensor.sqr(output_value_data, target_value_data)
    bce_cost = tensor.nnet.binary_crossentropy(output_data, target_data).sum(axis=1)

    # set generator update
    generator_updates_cost = generator_cost.mean()
    generator_updates_dict = get_model_updates(layers=generator_rnn_model+,
                                               cost=generator_updates_cost,
                                               optimizer=generator_optimizer,
                                               use_grad_clip=grad_clipping)

    # gradient_dict  = get_model_gradients(generator_rnn_model, generator_updates_cost)
    # gradient_norm  = 0.
    # for grad in gradient_dict:
    #     gradient_norm += tensor.sum(grad**2)
    #     gradient_norm  = tensor.sqrt(gradient_norm)

    # set generator update inputs
    generator_updates_inputs  = [init_input_data,
                                 init_hidden_data,
                                 init_cell_data,
                                 sampling_length]

    # set generator update outputs
    generator_updates_outputs = [sample_cost_data, generator_cost, ]#gradient_norm]

    # set generator update function
    generator_updates_function = theano.function(inputs=generator_updates_inputs,
                                                 outputs=generator_updates_outputs,
                                                 updates=merge_dicts([generator_updates_dict, update_data]),
                                                 on_unused_input='ignore')

    return generator_updates_function
Ejemplo n.º 13
0
def set_update_function(recurrent_model,
                        output_model,
                        controller_optimizer,
                        model_optimizer,
                        grad_clip=1.0):

    # set input data (time_length * num_samples * input_dims)
    input_data  = tensor.tensor3(name='input_data', dtype=floatX)

    # set target data (time_length * num_samples * output_dims)
    target_data = tensor.tensor3(name='target_data', dtype=floatX)

    time_length = input_data.shape[0]
    num_samples = input_data.shape[1]

    # cost control parameter
    controller = theano.shared(value=1.0,
                               name='controller')

    # get hidden data
    input_list  = [input_data, ]
    hidden_data = get_lstm_outputs(input_list=input_list,
                                   layers=recurrent_model,
                                   is_training=True)[-1]
    # get prediction data
    output_data = get_tensor_output(input=hidden_data,
                                    layers=output_model,
                                    is_training=True)

    # get cost (here mask_seq is like weight, sum over feature, and time)
    sample_cost = tensor.sqr(output_data-target_data)
    sample_cost = tensor.sum(input=sample_cost, axis=2).reshape((time_length, num_samples))

    # time_step = tensor.arange(start=0, stop=time_length, dtype=floatX).reshape((time_length, 1))
    # time_step = tensor.repeat(time_step, num_samples, axis=1)

    # cost_weight (time_length * num_samples)
    # cost_weight = tensor.transpose(-controller*time_step)
    # cost_weight = tensor.nnet.softmax(cost_weight)
    # cost_weight = tensor.transpose(cost_weight).reshape((time_length, num_samples))

    # weighted_sample_cost = cost_weight*sample_cost

    # get model updates
    # model_cost         = weighted_sample_cost.sum(axis=0).mean()
    model_cost         = sample_cost.max(axis=0).mean()
    model_updates_dict = get_model_updates(layers=recurrent_model+output_model,
                                           cost=model_cost,
                                           optimizer=model_optimizer,
                                           use_grad_clip=grad_clip)

    # controller_cost = weighted_sample_cost.var(axis=0).mean()
    #
    # controller_updates_dict = OrderedDict()
    # controller_grad = tensor.grad(cost=controller_cost, wrt=controller)
    # for param, update in controller_optimizer(controller, controller_grad).iteritems():
    #     controller_updates_dict[param] = update


    update_function_inputs  = [input_data,
                               target_data]
    update_function_outputs = [hidden_data,
                               output_data,
                               sample_cost]

    # update_function_updates = merge_dicts([model_updates_dict, controller_updates_dict])
    update_function_updates = model_updates_dict

    update_function = theano.function(inputs=update_function_inputs,
                                      outputs=update_function_outputs,
                                      updates=update_function_updates,
                                      on_unused_input='ignore')

    return update_function
Ejemplo n.º 14
0
def set_generator_update_function(generator_rnn_model,
                                  discriminator_rnn_model,
                                  discriminator_output_model,
                                  generator_optimizer,
                                  grad_clipping):
    # init input data (num_samples *input_dims)
    init_input_data = tensor.matrix(name='init_input_data',
                                    dtype=floatX)

    # init hidden data (num_layers * num_samples *input_dims)
    init_hidden_data = tensor.tensor3(name='init_hidden_data',
                                      dtype=floatX)

    # init cell data (num_layers * num_samples *input_dims)
    init_cell_data = tensor.tensor3(name='init_cell_data',
                                    dtype=floatX)

    # sampling length
    sampling_length = tensor.scalar(name='sampling_length',
                                    dtype='int32')
    # set generator input data list
    generator_input_data_list = [init_input_data,
                                 init_hidden_data,
                                 init_cell_data,
                                 sampling_length]

    # get generator output data
    output_data = generator_rnn_model[0].forward(generator_input_data_list, is_training=True)[0]

    # set discriminator input data list
    discriminator_input_data_list = [output_data,]

    # get discriminator hidden data
    discriminator_hidden_data = get_lstm_outputs(input_list=discriminator_input_data_list,
                                                 layers=discriminator_rnn_model,
                                                 is_training=True)[-1]

    # get discriminator output data
    sample_cost_data = get_tensor_output(input=discriminator_hidden_data,
                                         layers=discriminator_output_model,
                                         is_training=True)[-1]

    # get cost based on discriminator (binary cross-entropy over all data)
    # sum over generator cost over time_length and output_dims, then mean over samples
    generator_cost = tensor.nnet.binary_crossentropy(output=sample_cost_data,
                                                     target=tensor.ones_like(sample_cost_data)).sum(axis=1)

    # set generator update
    generator_updates_cost = generator_cost.mean()
    generator_updates_dict = get_model_updates(layers=generator_rnn_model,
                                               cost=generator_updates_cost,
                                               optimizer=generator_optimizer,
                                               use_grad_clip=grad_clipping)

    # set generator update inputs
    generator_updates_inputs  = [init_input_data,
                                 init_hidden_data,
                                 init_cell_data,
                                 sampling_length]

    # set generator update outputs
    generator_updates_outputs = [sample_cost_data, generator_cost]

    # set generator update function
    generator_updates_function = theano.function(inputs=generator_updates_inputs,
                                                 outputs=generator_updates_outputs,
                                                 updates=generator_updates_dict,
                                                 on_unused_input='ignore')

    return generator_updates_function
Ejemplo n.º 15
0
def set_gan_update_function(generator_rnn_model,
                            discriminator_rnn_model,
                            discriminator_output_model,
                            generator_optimizer,
                            discriminator_optimizer,
                            generator_grad_clipping,
                            discriminator_grad_clipping):

    # input for loop forward
    input_sequence  = tensor.tensor3(name='input_sequence',
                                     dtype=floatX)

    time_length = tensor.scalar(name='time_length',
                                dtype='int32')

    # get init data for looping
    generator_output = generator_rnn_model[0].loop_forward([input_sequence[0], time_length])
    generator_sequence    = generator_output[0]
    generator_rand_update = generator_output[-1]


    discriminator_output = discriminator_rnn_model[0].forward([input_sequence, ], is_training=True)
    positive_hidden = discriminator_output[0]
    positive_score  = get_tensor_output(positive_hidden, discriminator_output_model, is_training=True)

    discriminator_output = discriminator_rnn_model[0].forward([generator_sequence, ], is_training=True)
    negative_hidden = discriminator_output[0]
    negative_score  = get_tensor_output(negative_hidden, discriminator_output_model, is_training=True)

    generator_gan_cost = tensor.nnet.binary_crossentropy(output=negative_score,
                                                         target=tensor.ones_like(negative_score))

    discriminator_gan_cost = (tensor.nnet.binary_crossentropy(output=positive_score,
                                                              target=tensor.ones_like(positive_score)) +
                              tensor.nnet.binary_crossentropy(output=negative_score,
                                                              target=tensor.zeros_like(negative_score)))

    # set generator update
    generator_updates_cost = generator_gan_cost.mean()
    generator_updates_dict = get_model_updates(layers=generator_rnn_model,
                                               cost=generator_updates_cost,
                                               optimizer=generator_optimizer,
                                               use_grad_clip=generator_grad_clipping)

    generator_gradient_dict  = get_model_gradients(layers=generator_rnn_model,
                                                   cost=generator_updates_cost)
    generator_gradient_norm  = 0.
    for grad in generator_gradient_dict:
        generator_gradient_norm += tensor.sum(grad**2)
    generator_gradient_norm  = tensor.sqrt(generator_gradient_norm)

    # set discriminator update
    discriminator_updates_cost = discriminator_gan_cost.mean()
    discriminator_updates_dict = get_model_updates(layers=discriminator_rnn_model+discriminator_output_model,
                                                   cost=discriminator_updates_cost,
                                                   optimizer=discriminator_optimizer,
                                                   use_grad_clip=discriminator_grad_clipping)

    discriminator_gradient_dict  = get_model_gradients(layers=discriminator_rnn_model+discriminator_output_model,
                                                       cost=discriminator_updates_cost)
    discriminator_gradient_norm  = 0.
    for grad in discriminator_gradient_dict:
        discriminator_gradient_norm += tensor.sum(grad**2)
    discriminator_gradient_norm  = tensor.sqrt(discriminator_gradient_norm)

    # set gan update inputs
    gan_updates_inputs  = [input_sequence,
                           time_length]

    # set gan update outputs
    gan_updates_outputs = [generator_gan_cost,
                           discriminator_gan_cost,
                           positive_score,
                           negative_score,
                           generator_gradient_norm,
                           discriminator_gradient_norm,]

    # set gan update function
    gan_updates_function = theano.function(inputs=gan_updates_inputs,
                                           outputs=gan_updates_outputs,
                                           updates=merge_dicts([generator_updates_dict,
                                                                discriminator_updates_dict,
                                                                generator_rand_update]),
                                           on_unused_input='ignore')

    return gan_updates_function