Beispiel #1
0
 def get_dec_cell(self, cell_size):
   cell = core_rnn_cell.GRUCell(cell_size)
   if self.phase_train:
     cell = core_rnn_cell.DropoutWrapper(
         cell, input_keep_prob=0.5, output_keep_prob=0.5)
   cell = core_rnn_cell.InputProjectionWrapper(cell, cell_size)
   return cell
Beispiel #2
0
        def do_reconstruction(enc_inputs, enc_outputs, enc_last_state,
                              input_weights, seq_lengths):
            num_units = 100
            # attn_mech = attention_wrapper.LuongAttention(
            # num_units=num_units,
            # memory=enc_outputs,
            # memory_sequence_length=seq_lengths,
            # scale=True)
            attn_mech = tf.contrib.seq2seq.BahdanauAttention(
                num_units=num_units,
                memory=enc_outputs,
                memory_sequence_length=seq_lengths,
                normalize=True,
                name='attention_mechanism')
            cell = gru_ops.GRUBlockCell(1024)
            cell = core_rnn_cell.DropoutWrapper(cell, 0.5, 0.5)
            attn_cell = tf.contrib.seq2seq.AttentionWrapper(
                cell=cell,
                attention_mechanism=attn_mech,
                attention_layer_size=1024,
                output_attention=False,
                initial_cell_state=enc_last_state,
                name="attention_wrapper")

            decoder_target = tf.reverse_sequence(enc_inputs,
                                                 seq_lengths,
                                                 seq_dim=1,
                                                 batch_dim=0)
            decoder_inputs = tf.pad(decoder_target[:, :-1, :],
                                    [[0, 0], [1, 0], [0, 0]])

            helper = tf.contrib.seq2seq.TrainingHelper(
                inputs=decoder_inputs,  # decoder inputs
                sequence_length=seq_lengths,  # decoder input length
                name="decoder_training_helper")

            # Decoder setup
            decoder = tf.contrib.seq2seq.BasicDecoder(
                cell=attn_cell,
                helper=helper,
                initial_state=attn_cell.zero_state(tf.shape(enc_inputs)[0],
                                                   dtype=tf.float32),
                output_layer=Dense(1024 + 128))
            # Perform dynamic decoding with decoder object
            dec_outputs, final_state, final_sequence_lengths = tf.contrib.seq2seq.dynamic_decode(
                decoder,
                swap_memory=True,
            )
            loss = reconstruct_loss(logit=dec_outputs.rnn_output,
                                    target=decoder_target)
            # input_weights = tf.cast(input_weights, tf.float32)
            loss = tf.reduce_sum(loss * input_weights, axis=1) / tf.cast(
                seq_lengths, tf.float32)
            loss = tf.reduce_mean(loss)
            # loss = tf.contrib.seq2seq.sequence_loss(
            # dec_outputs.rnn_output, decoder_target, input_weights,
            # softmax_loss_function=reconstruct_loss)
            predictions = tf.no_op()
            return predictions, loss
Beispiel #3
0
    def get_pretrain_enc_cell(self, ):
        cell = gru_ops.GRUBlockCell(1024)
        if self.is_training:
            cell = core_rnn_cell.DropoutWrapper(cell, 0.5, 0.5)
        cell = core_rnn_cell.InputProjectionWrapper(cell, 1024)
        cell = core_rnn_cell.OutputProjectionWrapper(cell, 1024)

        cell = core_rnn_cell.DeviceWrapper(cell, device='/gpu:0')
        return cell
Beispiel #4
0
 def build_input_sequence(self, gpu_id=0):
     #embedding layer
     self.__build_embedding_layer__()
     with get_new_variable_scope('rnn_lstm') as rnn_scope:
         single_cell = rnn_cell.LSTMCell(self.hidden_size,
                                         use_peepholes=True,
                                         state_is_tuple=True)
         single_cell = rnn_cell.DropoutWrapper(
             single_cell,
             input_keep_prob=self.keep_prob,
             output_keep_prob=self.keep_prob)
         cell = rnn_cell.MultiRNNCell([single_cell] * self.num_layers,
                                      state_is_tuple=True)
         self.state_list[gpu_id], self.output_list[gpu_id] = dynamic_rnn(
             cell,
             self.input_embedding,
             self.split_seqLengths[gpu_id],
             dtype=tf.float32)
     if self.input_params is None:
         self.input_params = tf.trainable_variables()[1:]
Beispiel #5
0
 def get_dec_cell(self, cell_size):
     cell = core_rnn_cell.GRUCell(cell_size)
     # TODO
     if True:
         num_layers = 2
         '''
   if self.phase_train:
     cell = core_rnn_cell.DropoutWrapper(
         cell, input_keep_prob=0.5)
   '''
         cell = core_rnn_cell.MultiRNNCell([cell] * num_layers)
         '''
   if self.phase_train:
     cell = core_rnn_cell.DropoutWrapper(
         cell, output_keep_prob=0.5)
   '''
     else:
         if self.phase_train:
             cell = core_rnn_cell.DropoutWrapper(cell,
                                                 input_keep_prob=0.5,
                                                 output_keep_prob=0.5)
     return cell
Beispiel #6
0
 def get_dec_cell(self, cell_size):
     cell = core_rnn_cell.GRUCell(cell_size)
     cell = core_rnn_cell.DropoutWrapper(cell, 0.5, 0.5)
     # num_layers = 1
     # cell = core_rnn_cell.MultiRNNCell([cell] * num_layers)
     return cell
Beispiel #7
0
    def __init__(self, args, infer=False):
        if infer:
            args.batch_size = 1
            args.seq_length = 1
        self.args = args

        self.unitcell_state_is_tuple = False

        if args.model == 'gru':
            cell_fn = supercell.GRUCell  #tf.contrib.rnn.GRUCell
        elif args.model == 'lstm':
            cell_fn = supercell.LSTMCell  #tf.nn.rnn_cell.BasicLSTMCell #(state_is_tuple=True)
            self.unitcell_state_is_tuple = True
        elif args.model == 'hyperlstm':
            cell_fn = supercell.HyperLSTMCell  #HyperLnLSTMCell # HyperLSTMCell
            self.unitcell_state_is_tuple = True
        else:
            raise Exception("model type not supported: {}".format(args.model))

        self.state_is_tuple = True  # should not be False
        cell = cell_fn(args.rnn_size)

        # we may use stacked RNN with skip or residual connections
        if args.skip_conn and args.resid_conn:
            cell = supercell.MultiRNNCellWithAdditionalConn(
                [cell] * args.num_layers,
                state_is_tuple=self.state_is_tuple,
                add_skip_conn=True,
                add_resid_conn=True)
        elif args.skip_conn:
            cell = supercell.MultiRNNCellWithAdditionalConn(
                [cell] * args.num_layers,
                state_is_tuple=self.state_is_tuple,
                add_skip_conn=True,
                add_resid_conn=False)
        elif args.resid_conn:
            cell = supercell.MultiRNNCellWithAdditionalConn(
                [cell] * args.num_layers,
                state_is_tuple=self.state_is_tuple,
                add_skip_conn=False,
                add_resid_conn=True)
        else:
            cell = core_rnn_cell_impl.MultiRNNCell(
                [cell] * args.num_layers, state_is_tuple=self.state_is_tuple)

        if (infer == False and args.keep_prob < 1):  # training mode
            cell = core_rnn_cell.DropoutWrapper(
                cell, output_keep_prob=args.keep_prob)

        self.cell = cell

        self.input_data = tf.placeholder(
            dtype=tf.float32, shape=[args.batch_size, args.seq_length, 5])
        self.target_data = tf.placeholder(
            dtype=tf.float32, shape=[args.batch_size, args.seq_length, 5])

        ###
        self.initial_state = cell.zero_state(batch_size=args.batch_size,
                                             dtype=tf.float32)

        #print('## initial state: {}\n'.format(self.initial_state))

        self.num_mixture = args.num_mixture
        NOUT = 3 + self.num_mixture * 6  # [end_of_stroke + end_of_char, continue_with_stroke] + prob + 2*(mu + sig) + corr

        with tf.variable_scope('rnn_mdn'):
            if args.skip_conn:  # adding state-to-output skip connection
                #output_w = [ tf.get_variable("output_w{}".format(i), [args.rnn_size, NOUT]) for i in xrange(args.num_layers) ]
                output_w = tf.get_variable(
                    "output_w", [args.rnn_size * args.num_layers, NOUT])
            else:
                output_w = tf.get_variable("output_w", [args.rnn_size, NOUT])
            output_b = tf.get_variable("output_b", [NOUT])

        inputs = tf.split(self.input_data, args.seq_length, 1)
        inputs = [tf.squeeze(input_, [1]) for input_ in inputs]

        self.initial_input = np.zeros((args.batch_size, 5), dtype=np.float32)
        self.initial_input[:, 4] = 1.0  # initially, the pen is down.
        self.initial_input = tf.constant(self.initial_input)

        def tfrepeat(a, repeats):
            num_row = a.get_shape()[0].value
            num_col = a.get_shape()[1].value
            assert (num_col == 1)
            result = [a for i in range(repeats)]
            result = tf.concat(result, 0)
            result = tf.reshape(result, [repeats, num_row])
            result = tf.transpose(result)
            return result

        def custom_rnn_autodecoder(decoder_inputs,
                                   initial_input,
                                   initial_state,
                                   cell,
                                   scope=None):
            # customized rnn_decoder for the task of dealing with the end of character
            with tf.variable_scope(scope or "rnn_decoder"):
                states = [initial_state]
                outputs = []
                prev = None

                for i in xrange(len(
                        decoder_inputs)):  # for each time step in mini-batch
                    inp = decoder_inputs[i]
                    if i > 0:
                        tf.get_variable_scope().reuse_variables()
                    #output, new_state = cell(inp, states[-1])  # this line is for single RNN cell
                    _, new_states = cell(
                        inp, states[-1]
                    )  # this line is for MultiRNNCell. The first return value is inp
                    #print('## new_states: {}, \n new_states[0]: {}\n'.format(new_states, new_states[0]))

                    if self.state_is_tuple:
                        if self.unitcell_state_is_tuple:
                            num_state = new_states[0][0].get_shape()[1].value
                            if args.skip_conn:
                                output = new_states[0][1]
                                for i in xrange(1, self.args.num_layers):
                                    output = tf.concat(
                                        [output, new_states[i][1]], 1)
                            else:
                                output = new_states[-1][1]
                        else:
                            num_state = new_states[0].get_shape()[1].value
                            if args.skip_conn:
                                output = new_states[0]
                                for i in xrange(1, self.args.num_layers):
                                    output = tf.concat([output, new_states[i]],
                                                       1)
                            else:
                                output = new_states[
                                    -1]  # get the top hidden states as the output
                    else:  # should not be reached
                        num_state = int(new_states.get_shape()[1].value /
                                        self.args.rnn_size)
                        if self.unitcell_state_is_tuple:
                            output = new_states[-self.args.rnn_size:]  # ??
                        else:
                            output = new_states[-self.args.rnn_size:]
                    #print('## output: {}\n'.format(output))
                    #print('## n_states: {}'.format(num_state))

                    # if the input has an end-of-character signal, have to zero out the state
                    #to do by hardmaru: test this code.
                    num_batches = self.args.batch_size
                    eoc_detection = inp[:, 3]
                    #eoc_detection = tf.reshape(eoc_detection, [num_batches, 1])
                    #eoc_detection_state = tfrepeat(eoc_detection, num_state)
                    #eoc_detection_state = tf.greater(eoc_detection_state, tf.zeros_like(eoc_detection_state, dtype=tf.float32)) # make it a binary tensor

                    # if the eoc detected, new state should be reset to zeros (initial state)
                    #new_state = tf.select(eoc_detection_state, initial_state, new_state)   # tf.select(condition, t, e, name=None). Selects elements from t or e , depending on condition
                    #new_states = tf.where(eoc_detection_state, initial_state, new_states)

                    for i in xrange(num_batches):
                        if eoc_detection[i] == 1:
                            for j in self.args.num_layers:
                                if args.model == 'gru':
                                    new_states[j][i] = initial_state[j][i]
                                elif args.model == 'lstm':
                                    new_states[j][0][i] = initial_state[j][0][
                                        i]
                                    new_states[j][1][i] = initial_state[j][1][
                                        i]
                                else:
                                    pass  #TODO

                    outputs.append(output)
                    states.append(new_states)

            return outputs, states

        outputs, states = custom_rnn_autodecoder(inputs,
                                                 self.initial_input,
                                                 self.initial_state,
                                                 cell,
                                                 scope='rnn_mdn')

        if args.skip_conn:
            output = tf.reshape(tf.concat(outputs, 1),
                                [-1, args.rnn_size * args.num_layers])
        else:
            output = tf.reshape(tf.concat(outputs, 1), [-1, args.rnn_size])
        #output = tf.nn.xw_plus_b(output, output_w[-1], output_b)
        output = tf.matmul(output, output_w) + output_b

        self.final_state = states[-1]

        # reshape target data so that it is compatible with prediction shape
        flat_target_data = tf.reshape(self.target_data, [-1, 5])
        [x1_data, x2_data, eos_data, eoc_data,
         cont_data] = tf.split(flat_target_data, 5, 1)
        pen_data = tf.concat([eos_data, eoc_data, cont_data], 1)

        # long method:
        #flat_target_data = tf.split(1, args.seq_length, self.target_data)
        #flat_target_data = [tf.squeeze(flat_target_data_, [1]) for flat_target_data_ in flat_target_data]
        #flat_target_data = tf.reshape(tf.concat(1, flat_target_data), [-1, 3])

        def tf_2d_normal(x1, x2, mu1, mu2, s1, s2, rho):
            # eq # 24 and 25 of http://arxiv.org/abs/1308.0850
            norm1 = tf.subtract(x1, mu1)
            norm2 = tf.subtract(x2, mu2)
            s1s2 = tf.multiply(s1, s2)
            z = tf.square(tf.divide(norm1, s1)) + tf.square(
                tf.divide(norm2, s2)) - 2 * tf.divide(
                    tf.multiply(rho, tf.multiply(norm1, norm2)), s1s2)
            negRho = 1 - tf.square(rho)
            result = tf.exp(tf.divide(-z, 2 * negRho))
            denom = 2 * np.pi * tf.multiply(s1s2, tf.sqrt(negRho))
            result = tf.divide(result, denom)
            return result

        def get_lossfunc(z_pi, z_mu1, z_mu2, z_sigma1, z_sigma2, z_corr, z_pen,
                         x1_data, x2_data, pen_data):
            result0 = tf_2d_normal(x1_data, x2_data, z_mu1, z_mu2, z_sigma1,
                                   z_sigma2, z_corr)
            # implementing eq # 26 of http://arxiv.org/abs/1308.0850
            epsilon = 1e-20
            result1 = tf.multiply(result0, z_pi)
            result1 = tf.reduce_sum(result1, 1, keep_dims=True)
            result1 = -tf.log(tf.maximum(
                result1,
                1e-20))  # at the beginning, some errors are exactly zero.
            result_shape = tf.reduce_mean(result1)

            result2 = tf.nn.softmax_cross_entropy_with_logits(labels=pen_data,
                                                              logits=z_pen)
            #pen_data_weighting = pen_data[:, 2]+np.sqrt(self.args.stroke_importance_factor)*pen_data[:, 0]+self.args.stroke_importance_factor*pen_data[:, 1]
            pen_data_weighting = pen_data[:, 2] + \
                        np.sqrt(self.args.stroke_importance_factor)*pen_data[:, 0] + \
                        self.args.stroke_importance_factor*pen_data[:, 1]
            result2 = tf.multiply(result2, pen_data_weighting)
            result_pen = tf.reduce_mean(result2)

            result = result_shape + result_pen
            return result, result_shape, result_pen,

        # below is where we need to do MDN splitting of distribution params
        def get_mixture_coef(output):
            # returns the tf slices containing mdn dist params
            # ie, eq 18 -> 23 of http://arxiv.org/abs/1308.0850
            z = output
            z_pen = z[:, 0:
                      3]  # end of stroke, end of character/content, continue w/ stroke
            z_pi, z_mu1, z_mu2, z_sigma1, z_sigma2, z_corr = tf.split(
                z[:, 3:], 6, 1)

            # process output z's into MDN paramters

            # softmax all the pi's:
            max_pi = tf.reduce_max(z_pi, 1, keep_dims=True)
            z_pi = tf.subtract(z_pi, max_pi)
            z_pi = tf.exp(z_pi)
            normalize_pi = tf.reciprocal(
                tf.reduce_sum(z_pi, 1, keep_dims=True)
            )  # inv (api 0.10) --> reciprocal (api 0.12, name changed)
            z_pi = tf.multiply(normalize_pi, z_pi)

            # exponentiate the sigmas and also make corr between -1 and 1.
            z_sigma1 = tf.exp(z_sigma1)
            z_sigma2 = tf.exp(z_sigma2)
            z_corr = tf.tanh(z_corr)

            return [z_pi, z_mu1, z_mu2, z_sigma1, z_sigma2, z_corr, z_pen]

        [o_pi, o_mu1, o_mu2, o_sigma1, o_sigma2, o_corr,
         o_pen] = get_mixture_coef(output)

        self.pi = o_pi
        self.mu1 = o_mu1
        self.mu2 = o_mu2
        self.sigma1 = o_sigma1
        self.sigma2 = o_sigma2
        self.corr = o_corr
        self.pen = o_pen  # state of the pen

        [lossfunc, loss_shape,
         loss_pen] = get_lossfunc(o_pi, o_mu1, o_mu2, o_sigma1, o_sigma2,
                                  o_corr, o_pen, x1_data, x2_data, pen_data)
        self.cost = lossfunc
        self.cost_shape = loss_shape
        self.cost_pen = loss_pen

        self.lr = tf.Variable(
            0.0001, trainable=False)  # tf.Variable(0.01, trainable=False)
        tvars = tf.trainable_variables()
        grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars),
                                          args.grad_clip)
        optimizer = tf.train.AdamOptimizer(self.lr, epsilon=0.001)
        self.train_op = optimizer.apply_gradients(zip(grads, tvars))