Example #1
0
def set_generation_function(recurrent_model, output_model):
    # set input data (1*num_samples*features)
    input_data  = tensor.matrix(name='input_seq', dtype=floatX)
    # set init hidden/cell(num_samples*hidden_size)
    prev_hidden_data = tensor.matrix(name='prev_hidden_data', dtype=floatX)
    prev_cell_data   = tensor.matrix(name='prev_cell_data', dtype=floatX)

    # get hidden data
    recurrent_data = get_tensor_output(input=[input_data, prev_hidden_data, prev_cell_data], layers=recurrent_model, is_training=False)
    cur_hidden_data = recurrent_data[0]
    cur_cell_data   = recurrent_data[1]

    # get prediction data
    output_data = get_tensor_output(input=cur_hidden_data, layers=output_model, is_training=False)

    # input data
    generation_function_inputs  = [input_data,
                                   prev_hidden_data,
                                   prev_cell_data]
    generation_function_outputs = [cur_hidden_data,
                                   cur_cell_data,
                                   output_data]

    generation_function = theano.function(inputs=generation_function_inputs,
                                          outputs=generation_function_outputs,
                                          on_unused_input='ignore')
    return generation_function
Example #2
0
def set_generator_update_function(generator_rnn_model,
                                  generator_mean_model,
                                  generator_std_model,
                                  generator_optimizer,
                                  grad_clipping):

    # input data (time length * num_samples * input_dims)
    source_data = tensor.tensor3(name='source_data',
                                 dtype=floatX)

    target_data = tensor.tensor3(name='target_data',
                                 dtype=floatX)

    # set generator input data list
    generator_input_data_list = [source_data,]

    # get generator hidden data
    hidden_data = generator_rnn_model[0].forward(generator_input_data_list, is_training=True)[0]

    # get generator output data
    output_mean_data = get_tensor_output(input=hidden_data,
                                         layers=generator_mean_model,
                                         is_training=True)
    output_std_data = get_tensor_output(input=hidden_data,
                                        layers=generator_std_model,
                                        is_training=True)

    generator_cost  = -0.5*tensor.inv(2.0*tensor.sqr(output_std_data))*tensor.sqr(output_mean_data-target_data)
    generator_cost += -0.5*tensor.log(2.0*tensor.sqr(output_std_data)*numpy.pi)

    # set generator update
    generator_updates_cost = generator_cost.mean()
    generator_updates_dict = get_model_updates(layers=generator_rnn_model+generator_mean_model+generator_std_model,
                                               cost=generator_updates_cost,
                                               optimizer=generator_optimizer,
                                               use_grad_clip=grad_clipping)

    gradient_dict  = get_model_gradients(generator_rnn_model+generator_mean_model+generator_std_model, generator_updates_cost)
    gradient_norm  = 0.
    for grad in gradient_dict:
        gradient_norm += tensor.sum(grad**2)
        gradient_norm  = tensor.sqrt(gradient_norm)

    # set generator update inputs
    generator_updates_inputs  = [source_data,
                                 target_data,]

    # set generator update outputs
    generator_updates_outputs = [generator_cost, gradient_norm]

    # set generator update function
    generator_updates_function = theano.function(inputs=generator_updates_inputs,
                                                 outputs=generator_updates_outputs,
                                                 updates=generator_updates_dict,
                                                 on_unused_input='ignore')

    return generator_updates_function
Example #3
0
def set_update_function(recurrent_model,
                        output_model,
                        optimizer,
                        grad_clip=1.0):
    # set input data (time_length * num_samples * input_dims)
    input_data  = tensor.tensor3(name='input_data', dtype=floatX)
    # set input mask (time_length * num_samples)
    input_mask  = tensor.matrix(name='input_mask', dtype=floatX)
    # set init hidden/cell data (num_samples * hidden_dims)
    init_hidden = tensor.matrix(name='init_hidden', dtype=floatX)
    init_cell   = tensor.matrix(name='init_cell', dtype=floatX)

    # truncate grad
    truncate_grad_step = tensor.scalar(name='truncate_grad_step', dtype='int32')
    # set target data (time_length * num_samples * output_dims)
    target_data = tensor.tensor3(name='target_data', dtype=floatX)

    # get hidden data
    input_list  = [input_data, None, None, None, truncate_grad_step]
    hidden_data = get_tensor_output(input=input_list,
                                    layers=recurrent_model,
                                    is_training=True)[0]
    # get prediction data
    output_data = get_tensor_output(input=hidden_data,
                                    layers=output_model,
                                    is_training=True)

    # get cost (here mask_seq is like weight, sum over feature, and time)
    sample_cost = tensor.sqr(output_data-target_data)
    sample_cost = tensor.sum(sample_cost, axis=(0, 2))

    # get model updates
    model_cost         = sample_cost.mean()
    model_updates_dict = get_model_updates(layers=recurrent_model+output_model,
                                           cost=model_cost,
                                           optimizer=optimizer,
                                           use_grad_clip=grad_clip)

    update_function_inputs  = [input_data,
                               input_mask,
                               init_hidden,
                               init_cell,
                               target_data,
                               truncate_grad_step]
    update_function_outputs = [hidden_data,
                               output_data,
                               sample_cost]

    update_function = theano.function(inputs=update_function_inputs,
                                      outputs=update_function_outputs,
                                      updates=model_updates_dict,
                                      on_unused_input='ignore')

    return update_function
def set_tf_update_function(input_emb_param,
                           generator_rnn_model,
                           generator_output_model,
                           generator_optimizer,
                           generator_grad_clipping):

    # input sequence data (time_length * num_samples * input_dims)
    input_sequence  = tensor.tensor3(name='input_sequence',
                                     dtype=floatX)
    target_sequence  = tensor.tensor3(name='target_sequence',
                                     dtype=floatX)

    # embedding sequence
    input_emb_sequence  = tensor.dot(input_sequence, input_emb_param)
    target_emb_sequence = tensor.dot(target_sequence, input_emb_param)

    # set generator input data list
    generator_input_data_list = [input_emb_sequence,]

    # get generator output data
    generator_output = generator_rnn_model[0].forward(generator_input_data_list, is_training=True)
    generator_hidden = generator_output[0]
    generator_cell   = generator_output[1]

    generator_emb_sequence = get_tensor_output(generator_hidden, generator_output_model, is_training=True)
    generator_sequence     = tensor.dot(generator_emb_sequence, tensor.transpose(input_emb_param))

    # get square error
    square_error = tensor.sqr(target_sequence-generator_sequence).sum(axis=2)

    # set generator update
    tf_updates_cost = square_error.mean()
    tf_updates_dict = get_model_and_params_updates(layers=generator_rnn_model+generator_output_model,
                                                   params=[input_emb_param,],
                                                   cost=tf_updates_cost,
                                                   optimizer=generator_optimizer)

    generator_gradient_dict  = get_model_and_params_gradients(layers=generator_rnn_model+generator_output_model,
                                                              params=[input_emb_param,],
                                                              cost=tf_updates_cost)
    generator_gradient_norm  = 0.
    for grad in generator_gradient_dict:
        generator_gradient_norm += tensor.sum(grad**2)
    generator_gradient_norm  = tensor.sqrt(generator_gradient_norm)

    # set tf update inputs
    tf_updates_inputs  = [input_sequence,
                          target_sequence]

    # set tf update outputs
    tf_updates_outputs = [square_error,
                          generator_gradient_norm,]

    # set tf update function
    tf_updates_function = theano.function(inputs=tf_updates_inputs,
                                          outputs=tf_updates_outputs,
                                          updates=tf_updates_dict,
                                          on_unused_input='ignore')

    return tf_updates_function
def set_evaluation_function(generator_rnn_model,
                            generator_output_model):

    # input sequence data (time_length * num_samples * input_dims)
    input_sequence  = tensor.tensor3(name='input_sequence',
                                     dtype=floatX)
    target_sequence  = tensor.tensor3(name='target_sequence',
                                    dtype=floatX)
    # set generator input data list
    generator_input_data_list = [input_sequence,]

    # get generator output data
    generator_output = generator_rnn_model[0].forward(generator_input_data_list, is_training=True)
    generator_hidden = generator_output[0]
    generator_cell   = generator_output[1]

    generator_sample = get_tensor_output(generator_hidden, generator_output_model, is_training=True)

    # get square error
    square_error = tensor.sqr(target_sequence-generator_sample).sum(axis=2)

    # set evaluation inputs
    evaluation_inputs  = [input_sequence,
                          target_sequence]

    # set evaluation outputs
    evaluation_outputs = [square_error,]

    # set evaluation function
    evaluation_function = theano.function(inputs=evaluation_inputs,
                                          outputs=evaluation_outputs,
                                          on_unused_input='ignore')

    return evaluation_function
Example #6
0
def set_generation_function(recurrent_model, output_model):

    num_layers = len(recurrent_model)

    # set input data (1*num_samples*features)
    input_data  = tensor.matrix(name='input_seq', dtype=floatX)

    # set init hidden/cell(num_samples*hidden_size)
    prev_hidden_data_list = [tensor.matrix(name='prev_hidden_data{}'.format(i), dtype=floatX) for i in xrange(num_layers)]
    prev_cell_data_list   = [tensor.matrix(name='prev_cell_data{}'.format(i), dtype=floatX) for i in xrange(num_layers)]

    cur_hidden_data_list = []
    cur_cell_data_list   = []

    # get intermediate states
    input_list = [input_data, prev_hidden_data_list[0], prev_cell_data_list[0]]
    for l, layer in enumerate(recurrent_model):
        recurrent_data  = layer.forward(input_data_list=input_list, is_training=False)
        cur_hidden_data_list.append(recurrent_data[0])
        cur_cell_data_list.append(recurrent_data[1])

        input_list = [cur_hidden_data_list[-1], prev_hidden_data_list[l], prev_cell_data_list[l]]

    # get prediction data
    output_data = get_tensor_output(input=cur_hidden_data_list[-1], layers=output_model, is_training=False)

    # input data
    generation_function_inputs  = [input_data,] + prev_hidden_data_list + prev_cell_data_list
    generation_function_outputs = cur_hidden_data_list + cur_cell_data_list + [output_data,]

    generation_function = theano.function(inputs=generation_function_inputs,
                                          outputs=generation_function_outputs,
                                          on_unused_input='ignore')
    return generation_function
def set_generator_sampling_function(generator_rnn_model, generator_mean_model, generator_std_model):

    # input data (num_samples *input_dims)
    cur_input_data = tensor.matrix(name="cur_input_data", dtype=floatX)

    # prev hidden data (num_layers * num_samples * input_dims))
    prev_hidden_data = tensor.tensor3(name="prev_hidden_data", dtype=floatX)

    # get current hidden data
    generator_input_data_list = [cur_input_data, prev_hidden_data]
    cur_hidden_data = generator_rnn_model[0].forward(generator_input_data_list, is_training=False)[0]

    # get generator output data
    output_mean_data = get_tensor_output(
        input=cur_hidden_data.dimshuffle(1, 0, 2).flatten(2), layers=generator_mean_model, is_training=False
    )
    output_data = output_mean_data

    # input data
    generation_sampling_inputs = [cur_input_data, prev_hidden_data]
    generation_sampling_outputs = [output_data, cur_hidden_data]

    generation_sampling_function = theano.function(
        inputs=generation_sampling_inputs, outputs=generation_sampling_outputs, on_unused_input="ignore"
    )
    return generation_sampling_function
def set_update_function(recurrent_model,
                        output_model,
                        recurrent_optimizer,
                        output_optimizer):
    # set input data (time_step*num_samples*features)
    input_seq   = tensor.tensor3(name='input_seq', dtype=floatX)
    # set target data (time_step*num_samples*output_size)
    target_seq  = tensor.tensor3(name='target_seq', dtype=floatX)

    # truncate grad
    truncate_grad_step = tensor.scalar(name='truncate_grad_step', dtype='int32')

    # get hidden data
    hidden_seq = get_tensor_output(input=[input_seq, None, None, truncate_grad_step], layers=recurrent_model, is_training=True)
    # get prediction data
    output_seq = get_tensor_output(input=hidden_seq, layers=output_model, is_training=True)

    # get cost (here mask_seq is like weight, sum over feature)
    sequence_cost = tensor.sqr(output_seq-target_seq)
    sample_cost   = tensor.sum(sequence_cost, axis=(0, 2))

    # get model updates
    recurrent_cost         = sample_cost.mean()
    recurrent_updates_dict = get_model_updates(layers=recurrent_model,
                                               cost=recurrent_cost,
                                               optimizer=recurrent_optimizer,
                                               use_grad_clip=1.0)

    output_cost         = sample_cost.mean()
    output_updates_dict = get_model_updates(layers=output_model,
                                            cost=output_cost,
                                            optimizer=output_optimizer,
                                            use_grad_clip=1.0)

    update_function_inputs  = [input_seq,
                               target_seq,
                               truncate_grad_step]
    update_function_outputs = [hidden_seq,
                               output_seq,
                               sample_cost]

    update_function = theano.function(inputs=update_function_inputs,
                                      outputs=update_function_outputs,
                                      updates=merge_dicts([recurrent_updates_dict, output_updates_dict]),
                                      on_unused_input='ignore')

    return update_function
Example #9
0
def set_generator_evaluation_function(generator_rnn_model,
                                      generator_mean_model,
                                      generator_std_model):

    # input data (time length * num_samples * input_dims)
    source_data = tensor.tensor3(name='source_data',
                                 dtype=floatX)

    target_data = tensor.tensor3(name='target_data',
                                 dtype=floatX)

    # set generator input data list
    generator_input_data_list = [source_data,]

    # get generator hidden data
    hidden_data = generator_rnn_model[0].forward(generator_input_data_list, is_training=True)[0]
    hidden_data = hidden_data.dimshuffle(0, 2, 1, 3)
    hidden_data = hidden_data[:,:,-1,:].flatten(3)

    # get generator output data
    output_mean_data = get_tensor_output(input=hidden_data,
                                         layers=generator_mean_model,
                                         is_training=True)
    output_std_data = get_tensor_output(input=hidden_data,
                                        layers=generator_std_model,
                                        is_training=True)
    # output_std_data = 0.22
    # get generator cost (time_length x num_samples x hidden_size)
    generator_cost  = 0.5*tensor.inv(2.0*tensor.sqr(output_std_data))*tensor.sqr(output_mean_data-target_data)
    generator_cost += tensor.log(output_std_data) + 0.5*tensor.log(2.0*numpy.pi)
    generator_cost  = tensor.sum(generator_cost, axis=2)

    # set generator evaluate inputs
    generator_evaluate_inputs  = [source_data,
                                  target_data,]

    # set generator evaluate outputs
    generator_evaluate_outputs = [generator_cost,]

    # set generator evaluate function
    generator_evaluate_function = theano.function(inputs=generator_evaluate_inputs,
                                                  outputs=generator_evaluate_outputs,
                                                  on_unused_input='ignore')

    return generator_evaluate_function
Example #10
0
def set_generator_update_function(
    generator_rnn_model, generator_mean_model, generator_std_model, generator_optimizer, grad_clipping
):

    # input data (time length * num_samples * input_dims)
    source_data = tensor.tensor3(name="source_data", dtype=floatX)

    target_data = tensor.tensor3(name="target_data", dtype=floatX)

    # set generator input data list
    generator_input_data_list = [source_data]

    # get generator hidden data
    hidden_data = generator_rnn_model[0].forward(generator_input_data_list, is_training=True)[0]
    hidden_data = hidden_data.dimshuffle(0, 2, 1, 3).flatten(3)

    # get generator output data
    output_mean_data = get_tensor_output(input=hidden_data, layers=generator_mean_model, is_training=True)
    # output_std_data = get_tensor_output(input=hidden_data,
    #                                     layers=generator_std_model,
    #                                     is_training=True)
    output_std_data = 0.22
    # get generator cost (time_length x num_samples x hidden_size)
    generator_cost = 0.5 * tensor.inv(2.0 * tensor.sqr(output_std_data)) * tensor.sqr(output_mean_data - target_data)
    generator_cost += tensor.log(output_std_data) + 0.5 * tensor.log(2.0 * numpy.pi)
    generator_cost = tensor.sum(generator_cost, axis=2)

    # set generator update
    generator_updates_cost = generator_cost.mean()
    generator_updates_dict = get_model_updates(
        layers=generator_rnn_model + generator_mean_model,
        cost=generator_updates_cost,
        optimizer=generator_optimizer,
        use_grad_clip=grad_clipping,
    )

    gradient_dict = get_model_gradients(generator_rnn_model + generator_mean_model, generator_updates_cost)
    gradient_norm = 0.0
    for grad in gradient_dict:
        gradient_norm += tensor.sum(grad ** 2)
    gradient_norm = tensor.sqrt(gradient_norm)

    # set generator update inputs
    generator_updates_inputs = [source_data, target_data]

    # set generator update outputs
    generator_updates_outputs = [generator_cost, gradient_norm]

    # set generator update function
    generator_updates_function = theano.function(
        inputs=generator_updates_inputs,
        outputs=generator_updates_outputs,
        updates=generator_updates_dict,
        on_unused_input="ignore",
    )

    return generator_updates_function
Example #11
0
def set_generator_evaluation_function(generator_rnn_model,
                                      generator_mean_model,
                                      generator_std_model):

    # input data (time length * num_samples * input_dims)
    source_data = tensor.tensor3(name='source_data',
                                 dtype=floatX)

    target_data = tensor.tensor3(name='target_data',
                                 dtype=floatX)

    # set generator input data list
    generator_input_data_list = [source_data,]

    # get generator hidden data
    hidden_data = generator_rnn_model[0].forward(generator_input_data_list, is_training=True)[0]

    # get generator output data
    output_mean_data = get_tensor_output(input=hidden_data,
                                         layers=generator_mean_model,
                                         is_training=True)
    output_std_data = get_tensor_output(input=hidden_data,
                                        layers=generator_std_model,
                                        is_training=True)

    generator_cost  = -0.5*tensor.inv(2.0*tensor.sqr(output_std_data))*tensor.sqr(output_mean_data-target_data)
    generator_cost += -0.5*tensor.log(2.0*tensor.sqr(output_std_data)*numpy.pi)

    # set generator evaluate inputs
    generator_evaluate_inputs  = [source_data,
                                  target_data,]

    # set generator evaluate outputs
    generator_evaluate_outputs = [generator_cost, ]

    # set generator evaluate function
    generator_evaluate_function = theano.function(inputs=generator_evaluate_inputs,
                                                  outputs=generator_evaluate_outputs,
                                                  on_unused_input='ignore')

    return generator_evaluate_function
Example #12
0
def set_generator_sampling_function(generator_rnn_model,
                                    generator_mean_model,
                                    generator_std_model):

    # input data (num_samples *input_dims)
    cur_input_data = tensor.matrix(name='cur_input_data',
                                   dtype=floatX)

    # prev hidden data (num_samples * (num_layers * input_dims))
    prev_hidden_data = tensor.matrix(name='prev_hidden_data',
                                      dtype=floatX)

    generator_input_data_list = [cur_input_data, prev_hidden_data]
    cur_hidden_data = generator_rnn_model[0].forward(generator_input_data_list, is_training=False)[0]


    # get generator output data
    output_mean_data = get_tensor_output(input=cur_hidden_data,
                                         layers=generator_mean_model,
                                         is_training=True)
    output_std_data = get_tensor_output(input=cur_hidden_data,
                                        layers=generator_std_model,
                                        is_training=True)

    output_data = output_mean_data + output_std_data*theano_rng.normal(size=output_std_data.shape, dtype=floatX)
    output_data = tensor.clip(output_data, -1., 1.)

    # input data
    generation_sampling_inputs  = [cur_input_data,
                                   prev_hidden_data]
    generation_sampling_outputs = [output_data,
                                   cur_hidden_data]

    generation_sampling_function = theano.function(inputs=generation_sampling_inputs,
                                                   outputs=generation_sampling_outputs,
                                                   on_unused_input='ignore')
    return generation_sampling_function
def set_sample_function(input_emb_param,
                        generator_rnn_model,
                        generator_output_model):

    # init input data (num_samples *input_dims)
    init_input_data = tensor.matrix(name='init_input_data',
                                    dtype=floatX)

    # init hidden data (num_samples *input_dims)
    init_hidden_data = tensor.matrix(name='init_hidden_data',
                                     dtype=floatX)

    # init cell data (num_samples *input_dims)
    init_cell_data = tensor.matrix(name='init_cell_data',
                                   dtype=floatX)

    # embedding input data
    init_input_emb_data  = tensor.dot(init_input_data, input_emb_param)
    # set generator input data list
    generator_input_data_list = [init_input_emb_data,
                                 init_hidden_data,
                                 init_cell_data]

    # get generator output data
    generator_output = generator_rnn_model[0].forward(generator_input_data_list, is_training=False)
    generator_hidden = generator_output[0]
    generator_cell   = generator_output[1]

    generator_emb_sequence = get_tensor_output(generator_hidden, generator_output_model, is_training=False)
    generator_sequence     = tensor.dot(generator_emb_sequence, tensor.transpose(input_emb_param))

    # input data
    sample_function_inputs  = [init_input_data,
                               init_hidden_data,
                               init_cell_data]
    sample_function_outputs = [generator_sequence,
                               generator_hidden,
                               generator_cell]

    sample_function = theano.function(inputs=sample_function_inputs,
                                      outputs=sample_function_outputs,
                                      on_unused_input='ignore')
    return sample_function
Example #14
0
def set_generator_update_function(generator_rnn_model,
                                  generator_output_models,
                                  generator_optimizer,
                                  grad_clipping):
    # set source data (time_length * num_samples * input_dims)
    source_data  = tensor.tensor3(name='source_data',
                                  dtype=floatX)

    # set target data (time_length * num_samples * input_dims)
    target_data  = tensor.bmatrix(name='target_data')

    # set generator input data list
    generator_input_data_list = [source_data,]

    # get generator output data
    hidden_data = generator_rnn_model[0].forward(generator_input_data_list, is_training=True)[0]

    # for each rnn layer
    output_data_list = []
    for l, output_model in enumerate(generator_output_models):
        output_data = get_tensor_output(input=hidden_data[l],
                                        layers=output_model,
                                        is_training=True)
        output_data_list.append(output_data)

    output_data = tensor.concatenate(output_data_list[::-1], axis=1)

    output_sign_data  = output_data[:,0]
    output_sign_data  = 2.0*output_sign_data-tensor.ones_like(output_sign_data)
    output_value_data = output_data[:,1:]
    output_value_data = output_value_data*tensor.pow(2.0, tensor.arange(output_value_data.shape[1]))
    output_value_data = output_sign_data*output_value_data.sum(axis=1)

    target_sign_data  = target_data[:,0]
    target_sign_data  = 2.0*target_sign_data-tensor.ones_like(target_sign_data)
    target_value_data = target_data[:,1:]
    target_value_data = target_value_data*tensor.pow(2.0, tensor.arange(target_value_data.shape[1]))
    target_value_data = target_sign_data*target_value_data.sum(axis=1)

    mse_cost = tensor.sqr(output_value_data, target_value_data)
    bce_cost = tensor.nnet.binary_crossentropy(output_data, target_data).sum(axis=1)

    # set generator update
    generator_updates_cost = generator_cost.mean()
    generator_updates_dict = get_model_updates(layers=generator_rnn_model+,
                                               cost=generator_updates_cost,
                                               optimizer=generator_optimizer,
                                               use_grad_clip=grad_clipping)

    # gradient_dict  = get_model_gradients(generator_rnn_model, generator_updates_cost)
    # gradient_norm  = 0.
    # for grad in gradient_dict:
    #     gradient_norm += tensor.sum(grad**2)
    #     gradient_norm  = tensor.sqrt(gradient_norm)

    # set generator update inputs
    generator_updates_inputs  = [init_input_data,
                                 init_hidden_data,
                                 init_cell_data,
                                 sampling_length]

    # set generator update outputs
    generator_updates_outputs = [sample_cost_data, generator_cost, ]#gradient_norm]

    # set generator update function
    generator_updates_function = theano.function(inputs=generator_updates_inputs,
                                                 outputs=generator_updates_outputs,
                                                 updates=merge_dicts([generator_updates_dict, update_data]),
                                                 on_unused_input='ignore')

    return generator_updates_function
Example #15
0
def set_gan_update_function(generator_model,
                            discriminator_model,
                            generator_optimizer,
                            discriminator_optimizer,
                            generator_grad_clipping,
                            discriminator_grad_clipping):

    # input sequence data (time_length * num_samples * input_dims)
    input_sequence  = tensor.tensor3(name='input_sequence',
                                     dtype=floatX)
    target_sequence  = tensor.tensor3(name='target_sequence',
                                    dtype=floatX)
    # set generator input data list
    generator_input_data_list = [input_sequence,]

    # get generator output data
    output_data_set = generator_model[0].forward(generator_input_data_list, is_training=True)
    output_sequence = output_data_set[0]
    data_hidden     = output_data_set[1]
    data_cell       = output_data_set[2]
    model_hidden    = output_data_set[3]
    model_cell      = output_data_set[4]

    condition_hidden = data_hidden[:-1]
    condition_cell   = data_cell[:-1]

    condition_hidden = theano.gradient.disconnected_grad(condition_hidden)
    condition_cell   = theano.gradient.disconnected_grad(condition_cell)

    true_hidden = data_hidden[1:]
    true_cell   = data_cell[1:]

    false_hidden = model_hidden[1:]
    false_cell   = model_cell[1:]

    true_pair_hidden = tensor.concatenate([condition_hidden, true_hidden], axis=2)
    true_pair_cell   = tensor.concatenate([condition_cell, true_cell], axis=2)

    false_pair_hidden = tensor.concatenate([condition_hidden, false_hidden], axis=2)
    false_pair_cell   = tensor.concatenate([condition_cell, false_cell], axis=2)

    discriminator_true_score  = get_tensor_output(true_pair_hidden, discriminator_model, is_training=True)
    discriminator_false_score = get_tensor_output(false_pair_hidden, discriminator_model, is_training=True)


    generator_gan_cost = tensor.nnet.binary_crossentropy(output=discriminator_false_score,
                                                         target=tensor.ones_like(discriminator_false_score))

    discriminator_gan_cost = (tensor.nnet.binary_crossentropy(output=discriminator_true_score,
                                                              target=tensor.ones_like(discriminator_true_score)) +
                              tensor.nnet.binary_crossentropy(output=discriminator_false_score,
                                                              target=tensor.zeros_like(discriminator_false_score)))

    # set generator update
    generator_updates_cost = generator_gan_cost.mean()
    generator_updates_dict = get_model_updates(layers=generator_model,
                                               cost=generator_updates_cost,
                                               optimizer=generator_optimizer,
                                               use_grad_clip=generator_grad_clipping)

    generator_gradient_dict  = get_model_gradients(generator_model, generator_updates_cost)
    generator_gradient_norm  = 0.
    for grad in generator_gradient_dict:
        generator_gradient_norm += tensor.sum(grad**2)
    generator_gradient_norm  = tensor.sqrt(generator_gradient_norm)

    # set discriminator update
    discriminator_updates_cost = discriminator_gan_cost.mean()
    discriminator_updates_dict = get_model_updates(layers=discriminator_model,
                                                   cost=discriminator_updates_cost,
                                                   optimizer=discriminator_optimizer,
                                                   use_grad_clip=discriminator_grad_clipping)

    discriminator_gradient_dict  = get_model_gradients(discriminator_model, discriminator_updates_cost)
    discriminator_gradient_norm  = 0.
    for grad in discriminator_gradient_dict:
        discriminator_gradient_norm += tensor.sum(grad**2)
    discriminator_gradient_norm  = tensor.sqrt(discriminator_gradient_norm)

    square_error = tensor.sqr(target_sequence-output_sequence).sum(axis=2)

    # set gan update inputs
    gan_updates_inputs  = [input_sequence,
                           target_sequence]

    # set gan update outputs
    gan_updates_outputs = [generator_gan_cost,
                           discriminator_gan_cost,
                           discriminator_true_score,
                           discriminator_false_score,
                           square_error,
                           generator_gradient_norm,
                           discriminator_gradient_norm,]

    # set gan update function
    gan_updates_function = theano.function(inputs=gan_updates_inputs,
                                           outputs=gan_updates_outputs,
                                           updates=merge_dicts([generator_updates_dict, discriminator_updates_dict]),
                                           on_unused_input='ignore')

    return gan_updates_function
def set_gan_update_function(generator_rnn_model,
                            generator_output_model,
                            discriminator_rnn_model,
                            discriminator_output_model,
                            generator_optimizer,
                            discriminator_optimizer,
                            generator_grad_clipping,
                            discriminator_grad_clipping):

    # input sequence data (time_length * num_samples * input_dims)
    input_sequence  = tensor.tensor3(name='input_sequence',
                                     dtype=floatX)
    target_sequence  = tensor.tensor3(name='target_sequence',
                                      dtype=floatX)
    # set generator input data list
    generator_input_data_list = [input_sequence,]

    # get generator output data
    generator_output = generator_rnn_model[0].forward(generator_input_data_list, is_training=True)
    generator_hidden = generator_output[0]
    generator_cell   = generator_output[1]

    generator_sample = get_tensor_output(generator_hidden, generator_output_model, is_training=True)

    condition_generator_hidden = theano.gradient.disconnected_grad(generator_hidden)

    positive_pair = tensor.concatenate([condition_generator_hidden, target_sequence], axis=2)
    negative_pair = tensor.concatenate([condition_generator_hidden, generator_sample], axis=2)

    # set generator input data list
    discriminator_input_data_list = [positive_pair,]
    discriminator_output = discriminator_rnn_model[0].forward(discriminator_input_data_list, is_training=True)
    positive_hidden = discriminator_output[0]
    positive_cell   = discriminator_output[1]
    positive_score  = get_tensor_output(positive_hidden, discriminator_output_model, is_training=True)

    discriminator_input_data_list = [negative_pair,]
    discriminator_output = discriminator_rnn_model[0].forward(discriminator_input_data_list, is_training=True)
    negative_hidden = discriminator_output[0]
    negative_cell   = discriminator_output[1]
    negative_score  = get_tensor_output(negative_hidden, discriminator_output_model, is_training=True)


    generator_gan_cost = tensor.nnet.binary_crossentropy(output=negative_score,
                                                         target=tensor.ones_like(negative_score))

    discriminator_gan_cost = (tensor.nnet.binary_crossentropy(output=positive_score,
                                                              target=tensor.ones_like(positive_score)) +
                              tensor.nnet.binary_crossentropy(output=negative_score,
                                                              target=tensor.zeros_like(negative_score)))

    # set generator update
    generator_updates_cost = generator_gan_cost.mean()
    generator_updates_dict = get_model_updates(layers=generator_rnn_model+generator_output_model,
                                               cost=generator_updates_cost,
                                               optimizer=generator_optimizer,
                                               use_grad_clip=generator_grad_clipping)

    generator_gradient_dict  = get_model_gradients(generator_rnn_model+generator_output_model, generator_updates_cost)
    generator_gradient_norm  = 0.
    for grad in generator_gradient_dict:
        generator_gradient_norm += tensor.sum(grad**2)
    generator_gradient_norm  = tensor.sqrt(generator_gradient_norm)

    # set discriminator update
    discriminator_updates_cost = discriminator_gan_cost.mean()
    discriminator_updates_dict = get_model_updates(layers=discriminator_rnn_model+discriminator_output_model,
                                                   cost=discriminator_updates_cost,
                                                   optimizer=discriminator_optimizer,
                                                   use_grad_clip=discriminator_grad_clipping)

    discriminator_gradient_dict  = get_model_gradients(discriminator_rnn_model+discriminator_output_model, discriminator_updates_cost)
    discriminator_gradient_norm  = 0.
    for grad in discriminator_gradient_dict:
        discriminator_gradient_norm += tensor.sum(grad**2)
    discriminator_gradient_norm  = tensor.sqrt(discriminator_gradient_norm)

    square_error = tensor.sqr(target_sequence-generator_sample).sum(axis=2)

    # set gan update inputs
    gan_updates_inputs  = [input_sequence,
                           target_sequence]

    # set gan update outputs
    gan_updates_outputs = [generator_gan_cost,
                           discriminator_gan_cost,
                           positive_score,
                           negative_score,
                           square_error,
                           generator_gradient_norm,
                           discriminator_gradient_norm,]

    # set gan update function
    gan_updates_function = theano.function(inputs=gan_updates_inputs,
                                           outputs=gan_updates_outputs,
                                           updates=merge_dicts([generator_updates_dict, discriminator_updates_dict]),
                                           on_unused_input='ignore')

    return gan_updates_function
Example #17
0
def set_generator_update_function(generator_rnn_model,
                                  discriminator_rnn_model,
                                  discriminator_output_model,
                                  generator_optimizer,
                                  grad_clipping):
    # init input data (num_samples *input_dims)
    init_input_data = tensor.matrix(name='init_input_data',
                                    dtype=floatX)

    # init hidden data (num_layers * num_samples *input_dims)
    init_hidden_data = tensor.tensor3(name='init_hidden_data',
                                      dtype=floatX)

    # init cell data (num_layers * num_samples *input_dims)
    init_cell_data = tensor.tensor3(name='init_cell_data',
                                    dtype=floatX)

    # sampling length
    sampling_length = tensor.scalar(name='sampling_length',
                                    dtype='int32')
    # set generator input data list
    generator_input_data_list = [init_input_data,
                                 init_hidden_data,
                                 init_cell_data,
                                 sampling_length]

    # get generator output data
    output_data = generator_rnn_model[0].forward(generator_input_data_list, is_training=True)[0]

    # set discriminator input data list
    discriminator_input_data_list = [output_data,]

    # get discriminator hidden data
    discriminator_hidden_data = get_lstm_outputs(input_list=discriminator_input_data_list,
                                                 layers=discriminator_rnn_model,
                                                 is_training=True)[-1]

    # get discriminator output data
    sample_cost_data = get_tensor_output(input=discriminator_hidden_data,
                                         layers=discriminator_output_model,
                                         is_training=True)[-1]

    # get cost based on discriminator (binary cross-entropy over all data)
    # sum over generator cost over time_length and output_dims, then mean over samples
    generator_cost = tensor.nnet.binary_crossentropy(output=sample_cost_data,
                                                     target=tensor.ones_like(sample_cost_data)).sum(axis=1)

    # set generator update
    generator_updates_cost = generator_cost.mean()
    generator_updates_dict = get_model_updates(layers=generator_rnn_model,
                                               cost=generator_updates_cost,
                                               optimizer=generator_optimizer,
                                               use_grad_clip=grad_clipping)

    # set generator update inputs
    generator_updates_inputs  = [init_input_data,
                                 init_hidden_data,
                                 init_cell_data,
                                 sampling_length]

    # set generator update outputs
    generator_updates_outputs = [sample_cost_data, generator_cost]

    # set generator update function
    generator_updates_function = theano.function(inputs=generator_updates_inputs,
                                                 outputs=generator_updates_outputs,
                                                 updates=generator_updates_dict,
                                                 on_unused_input='ignore')

    return generator_updates_function
Example #18
0
def set_update_function(recurrent_model,
                        output_model,
                        controller_optimizer,
                        model_optimizer,
                        grad_clip=1.0):

    # set input data (time_length * num_samples * input_dims)
    input_data  = tensor.tensor3(name='input_data', dtype=floatX)

    # set target data (time_length * num_samples * output_dims)
    target_data = tensor.tensor3(name='target_data', dtype=floatX)

    time_length = input_data.shape[0]
    num_samples = input_data.shape[1]

    # cost control parameter
    controller = theano.shared(value=1.0,
                               name='controller')

    # get hidden data
    input_list  = [input_data, ]
    hidden_data = get_lstm_outputs(input_list=input_list,
                                   layers=recurrent_model,
                                   is_training=True)[-1]
    # get prediction data
    output_data = get_tensor_output(input=hidden_data,
                                    layers=output_model,
                                    is_training=True)

    # get cost (here mask_seq is like weight, sum over feature, and time)
    sample_cost = tensor.sqr(output_data-target_data)
    sample_cost = tensor.sum(input=sample_cost, axis=2).reshape((time_length, num_samples))

    # time_step = tensor.arange(start=0, stop=time_length, dtype=floatX).reshape((time_length, 1))
    # time_step = tensor.repeat(time_step, num_samples, axis=1)

    # cost_weight (time_length * num_samples)
    # cost_weight = tensor.transpose(-controller*time_step)
    # cost_weight = tensor.nnet.softmax(cost_weight)
    # cost_weight = tensor.transpose(cost_weight).reshape((time_length, num_samples))

    # weighted_sample_cost = cost_weight*sample_cost

    # get model updates
    # model_cost         = weighted_sample_cost.sum(axis=0).mean()
    model_cost         = sample_cost.max(axis=0).mean()
    model_updates_dict = get_model_updates(layers=recurrent_model+output_model,
                                           cost=model_cost,
                                           optimizer=model_optimizer,
                                           use_grad_clip=grad_clip)

    # controller_cost = weighted_sample_cost.var(axis=0).mean()
    #
    # controller_updates_dict = OrderedDict()
    # controller_grad = tensor.grad(cost=controller_cost, wrt=controller)
    # for param, update in controller_optimizer(controller, controller_grad).iteritems():
    #     controller_updates_dict[param] = update


    update_function_inputs  = [input_data,
                               target_data]
    update_function_outputs = [hidden_data,
                               output_data,
                               sample_cost]

    # update_function_updates = merge_dicts([model_updates_dict, controller_updates_dict])
    update_function_updates = model_updates_dict

    update_function = theano.function(inputs=update_function_inputs,
                                      outputs=update_function_outputs,
                                      updates=update_function_updates,
                                      on_unused_input='ignore')

    return update_function
def set_gan_update_function(generator_model,
                            discriminator_feature_model,
                            discriminator_output_model,
                            generator_optimizer,
                            discriminator_optimizer,
                            generator_grad_clipping,
                            discriminator_grad_clipping):

    # input sequence data (time_length * num_samples * input_dims)
    input_sequence  = tensor.tensor3(name='input_sequence',
                                     dtype=floatX)
    target_sequence  = tensor.tensor3(name='target_sequence',
                                      dtype=floatX)

    # set generator input data list
    generator_input_data_list = [input_sequence,
                                 1]

    # get generator output data
    generator_output = generator_model[0].forward(generator_input_data_list,
                                                  is_training=True)
    output_sequence  = generator_output[0]
    data_hidden      = generator_output[1]
    data_cell        = generator_output[2]
    model_hidden     = generator_output[3]
    model_cell       = generator_output[4]
    generator_random = generator_output[-1]

    # get conditional hidden
    condition_hid    = data_hidden[:-1]
    condition_hid    = theano.gradient.disconnected_grad(condition_hid)
    condition_feature = get_tensor_output(condition_hid,
                                          discriminator_feature_model,
                                          is_training=True)

    # get positive phase hidden
    positive_hid     = data_hidden[1:]
    positive_feature = get_tensor_output(positive_hid,
                                         discriminator_feature_model,
                                         is_training=True)
    # get negative phase hidden
    negative_hid     = model_hidden[1:]
    negative_feature = get_tensor_output(negative_hid,
                                         discriminator_feature_model,
                                         is_training=True)

    # get positive/negative phase pairs
    positive_pair = tensor.concatenate([condition_feature, positive_feature], axis=2)
    negative_pair = tensor.concatenate([condition_feature, negative_feature], axis=2)

    # get positive pair score
    positive_score = get_tensor_output(positive_pair,
                                       discriminator_output_model,
                                       is_training=True)
    # get negative pair score
    negative_score = get_tensor_output(negative_pair,
                                       discriminator_output_model,
                                       is_training=True)

    # get generator cost (increase negative score)
    generator_gan_cost = tensor.nnet.binary_crossentropy(output=negative_score,
                                                         target=tensor.ones_like(negative_score))

    # get discriminator cost (increase positive score, decrease negative score)
    discriminator_gan_cost = (tensor.nnet.binary_crossentropy(output=positive_score,
                                                              target=tensor.ones_like(positive_score)) +
                              tensor.nnet.binary_crossentropy(output=negative_score,
                                                              target=tensor.zeros_like(negative_score)))

    # set generator update
    generator_updates_cost = generator_gan_cost.mean()
    generator_updates_dict = get_model_updates(layers=generator_model,
                                               cost=generator_updates_cost,
                                               optimizer=generator_optimizer,
                                               use_grad_clip=generator_grad_clipping)

    # get generator gradient norm2
    generator_gradient_dict  = get_model_gradients(generator_model, generator_updates_cost)
    generator_gradient_norm  = 0.
    for grad in generator_gradient_dict:
        generator_gradient_norm += tensor.sum(grad**2)
    generator_gradient_norm  = tensor.sqrt(generator_gradient_norm)

    # set discriminator update
    discriminator_updates_cost = discriminator_gan_cost.mean()
    discriminator_updates_dict = get_model_updates(layers=discriminator_feature_model+discriminator_output_model,
                                                   cost=discriminator_updates_cost,
                                                   optimizer=discriminator_optimizer,
                                                   use_grad_clip=discriminator_grad_clipping)

    discriminator_gradient_dict  = get_model_gradients(discriminator_feature_model+discriminator_output_model,
                                                       discriminator_updates_cost)

    # get discriminator gradient norm2
    discriminator_gradient_norm  = 0.
    for grad in discriminator_gradient_dict:
        discriminator_gradient_norm += tensor.sum(grad**2)
    discriminator_gradient_norm  = tensor.sqrt(discriminator_gradient_norm)

    # get mean square error
    square_error = tensor.sqr(target_sequence-output_sequence).sum(axis=2)

    # set gan update inputs
    gan_updates_inputs  = [input_sequence,
                           target_sequence]

    # set gan update outputs
    gan_updates_outputs = [generator_gan_cost,
                           discriminator_gan_cost,
                           positive_score,
                           negative_score,
                           square_error,
                           generator_gradient_norm,
                           discriminator_gradient_norm,]

    # set gan update function
    gan_updates_function = theano.function(inputs=gan_updates_inputs,
                                           outputs=gan_updates_outputs,
                                           updates=merge_dicts([generator_updates_dict,
                                                                discriminator_updates_dict,
                                                                generator_random]),
                                           on_unused_input='ignore')

    return gan_updates_function
def set_gan_update_function(generator_rnn_model,
                            discriminator_rnn_model,
                            discriminator_output_model,
                            generator_optimizer,
                            discriminator_optimizer,
                            generator_grad_clipping,
                            discriminator_grad_clipping):

    # input for loop forward
    input_sequence  = tensor.tensor3(name='input_sequence',
                                     dtype=floatX)

    time_length = tensor.scalar(name='time_length',
                                dtype='int32')

    # get init data for looping
    generator_output = generator_rnn_model[0].loop_forward([input_sequence[0], time_length])
    generator_sequence    = generator_output[0]
    generator_rand_update = generator_output[-1]


    discriminator_output = discriminator_rnn_model[0].forward([input_sequence, ], is_training=True)
    positive_hidden = discriminator_output[0]
    positive_score  = get_tensor_output(positive_hidden, discriminator_output_model, is_training=True)

    discriminator_output = discriminator_rnn_model[0].forward([generator_sequence, ], is_training=True)
    negative_hidden = discriminator_output[0]
    negative_score  = get_tensor_output(negative_hidden, discriminator_output_model, is_training=True)

    generator_gan_cost = tensor.nnet.binary_crossentropy(output=negative_score,
                                                         target=tensor.ones_like(negative_score))

    discriminator_gan_cost = (tensor.nnet.binary_crossentropy(output=positive_score,
                                                              target=tensor.ones_like(positive_score)) +
                              tensor.nnet.binary_crossentropy(output=negative_score,
                                                              target=tensor.zeros_like(negative_score)))

    # set generator update
    generator_updates_cost = generator_gan_cost.mean()
    generator_updates_dict = get_model_updates(layers=generator_rnn_model,
                                               cost=generator_updates_cost,
                                               optimizer=generator_optimizer,
                                               use_grad_clip=generator_grad_clipping)

    generator_gradient_dict  = get_model_gradients(layers=generator_rnn_model,
                                                   cost=generator_updates_cost)
    generator_gradient_norm  = 0.
    for grad in generator_gradient_dict:
        generator_gradient_norm += tensor.sum(grad**2)
    generator_gradient_norm  = tensor.sqrt(generator_gradient_norm)

    # set discriminator update
    discriminator_updates_cost = discriminator_gan_cost.mean()
    discriminator_updates_dict = get_model_updates(layers=discriminator_rnn_model+discriminator_output_model,
                                                   cost=discriminator_updates_cost,
                                                   optimizer=discriminator_optimizer,
                                                   use_grad_clip=discriminator_grad_clipping)

    discriminator_gradient_dict  = get_model_gradients(layers=discriminator_rnn_model+discriminator_output_model,
                                                       cost=discriminator_updates_cost)
    discriminator_gradient_norm  = 0.
    for grad in discriminator_gradient_dict:
        discriminator_gradient_norm += tensor.sum(grad**2)
    discriminator_gradient_norm  = tensor.sqrt(discriminator_gradient_norm)

    # set gan update inputs
    gan_updates_inputs  = [input_sequence,
                           time_length]

    # set gan update outputs
    gan_updates_outputs = [generator_gan_cost,
                           discriminator_gan_cost,
                           positive_score,
                           negative_score,
                           generator_gradient_norm,
                           discriminator_gradient_norm,]

    # set gan update function
    gan_updates_function = theano.function(inputs=gan_updates_inputs,
                                           outputs=gan_updates_outputs,
                                           updates=merge_dicts([generator_updates_dict,
                                                                discriminator_updates_dict,
                                                                generator_rand_update]),
                                           on_unused_input='ignore')

    return gan_updates_function