def get_dec_cell(self, cell_size): cell = core_rnn_cell.GRUCell(cell_size) if self.phase_train: cell = core_rnn_cell.DropoutWrapper( cell, input_keep_prob=0.5, output_keep_prob=0.5) cell = core_rnn_cell.InputProjectionWrapper(cell, cell_size) return cell
def do_reconstruction(enc_inputs, enc_outputs, enc_last_state, input_weights, seq_lengths): num_units = 100 # attn_mech = attention_wrapper.LuongAttention( # num_units=num_units, # memory=enc_outputs, # memory_sequence_length=seq_lengths, # scale=True) attn_mech = tf.contrib.seq2seq.BahdanauAttention( num_units=num_units, memory=enc_outputs, memory_sequence_length=seq_lengths, normalize=True, name='attention_mechanism') cell = gru_ops.GRUBlockCell(1024) cell = core_rnn_cell.DropoutWrapper(cell, 0.5, 0.5) attn_cell = tf.contrib.seq2seq.AttentionWrapper( cell=cell, attention_mechanism=attn_mech, attention_layer_size=1024, output_attention=False, initial_cell_state=enc_last_state, name="attention_wrapper") decoder_target = tf.reverse_sequence(enc_inputs, seq_lengths, seq_dim=1, batch_dim=0) decoder_inputs = tf.pad(decoder_target[:, :-1, :], [[0, 0], [1, 0], [0, 0]]) helper = tf.contrib.seq2seq.TrainingHelper( inputs=decoder_inputs, # decoder inputs sequence_length=seq_lengths, # decoder input length name="decoder_training_helper") # Decoder setup decoder = tf.contrib.seq2seq.BasicDecoder( cell=attn_cell, helper=helper, initial_state=attn_cell.zero_state(tf.shape(enc_inputs)[0], dtype=tf.float32), output_layer=Dense(1024 + 128)) # Perform dynamic decoding with decoder object dec_outputs, final_state, final_sequence_lengths = tf.contrib.seq2seq.dynamic_decode( decoder, swap_memory=True, ) loss = reconstruct_loss(logit=dec_outputs.rnn_output, target=decoder_target) # input_weights = tf.cast(input_weights, tf.float32) loss = tf.reduce_sum(loss * input_weights, axis=1) / tf.cast( seq_lengths, tf.float32) loss = tf.reduce_mean(loss) # loss = tf.contrib.seq2seq.sequence_loss( # dec_outputs.rnn_output, decoder_target, input_weights, # softmax_loss_function=reconstruct_loss) predictions = tf.no_op() return predictions, loss
def get_pretrain_enc_cell(self, ): cell = gru_ops.GRUBlockCell(1024) if self.is_training: cell = core_rnn_cell.DropoutWrapper(cell, 0.5, 0.5) cell = core_rnn_cell.InputProjectionWrapper(cell, 1024) cell = core_rnn_cell.OutputProjectionWrapper(cell, 1024) cell = core_rnn_cell.DeviceWrapper(cell, device='/gpu:0') return cell
def build_input_sequence(self, gpu_id=0): #embedding layer self.__build_embedding_layer__() with get_new_variable_scope('rnn_lstm') as rnn_scope: single_cell = rnn_cell.LSTMCell(self.hidden_size, use_peepholes=True, state_is_tuple=True) single_cell = rnn_cell.DropoutWrapper( single_cell, input_keep_prob=self.keep_prob, output_keep_prob=self.keep_prob) cell = rnn_cell.MultiRNNCell([single_cell] * self.num_layers, state_is_tuple=True) self.state_list[gpu_id], self.output_list[gpu_id] = dynamic_rnn( cell, self.input_embedding, self.split_seqLengths[gpu_id], dtype=tf.float32) if self.input_params is None: self.input_params = tf.trainable_variables()[1:]
def get_dec_cell(self, cell_size): cell = core_rnn_cell.GRUCell(cell_size) # TODO if True: num_layers = 2 ''' if self.phase_train: cell = core_rnn_cell.DropoutWrapper( cell, input_keep_prob=0.5) ''' cell = core_rnn_cell.MultiRNNCell([cell] * num_layers) ''' if self.phase_train: cell = core_rnn_cell.DropoutWrapper( cell, output_keep_prob=0.5) ''' else: if self.phase_train: cell = core_rnn_cell.DropoutWrapper(cell, input_keep_prob=0.5, output_keep_prob=0.5) return cell
def get_dec_cell(self, cell_size): cell = core_rnn_cell.GRUCell(cell_size) cell = core_rnn_cell.DropoutWrapper(cell, 0.5, 0.5) # num_layers = 1 # cell = core_rnn_cell.MultiRNNCell([cell] * num_layers) return cell
def __init__(self, args, infer=False): if infer: args.batch_size = 1 args.seq_length = 1 self.args = args self.unitcell_state_is_tuple = False if args.model == 'gru': cell_fn = supercell.GRUCell #tf.contrib.rnn.GRUCell elif args.model == 'lstm': cell_fn = supercell.LSTMCell #tf.nn.rnn_cell.BasicLSTMCell #(state_is_tuple=True) self.unitcell_state_is_tuple = True elif args.model == 'hyperlstm': cell_fn = supercell.HyperLSTMCell #HyperLnLSTMCell # HyperLSTMCell self.unitcell_state_is_tuple = True else: raise Exception("model type not supported: {}".format(args.model)) self.state_is_tuple = True # should not be False cell = cell_fn(args.rnn_size) # we may use stacked RNN with skip or residual connections if args.skip_conn and args.resid_conn: cell = supercell.MultiRNNCellWithAdditionalConn( [cell] * args.num_layers, state_is_tuple=self.state_is_tuple, add_skip_conn=True, add_resid_conn=True) elif args.skip_conn: cell = supercell.MultiRNNCellWithAdditionalConn( [cell] * args.num_layers, state_is_tuple=self.state_is_tuple, add_skip_conn=True, add_resid_conn=False) elif args.resid_conn: cell = supercell.MultiRNNCellWithAdditionalConn( [cell] * args.num_layers, state_is_tuple=self.state_is_tuple, add_skip_conn=False, add_resid_conn=True) else: cell = core_rnn_cell_impl.MultiRNNCell( [cell] * args.num_layers, state_is_tuple=self.state_is_tuple) if (infer == False and args.keep_prob < 1): # training mode cell = core_rnn_cell.DropoutWrapper( cell, output_keep_prob=args.keep_prob) self.cell = cell self.input_data = tf.placeholder( dtype=tf.float32, shape=[args.batch_size, args.seq_length, 5]) self.target_data = tf.placeholder( dtype=tf.float32, shape=[args.batch_size, args.seq_length, 5]) ### self.initial_state = cell.zero_state(batch_size=args.batch_size, dtype=tf.float32) #print('## initial state: {}\n'.format(self.initial_state)) self.num_mixture = args.num_mixture NOUT = 3 + self.num_mixture * 6 # [end_of_stroke + end_of_char, continue_with_stroke] + prob + 2*(mu + sig) + corr with tf.variable_scope('rnn_mdn'): if args.skip_conn: # adding state-to-output skip connection #output_w = [ tf.get_variable("output_w{}".format(i), [args.rnn_size, NOUT]) for i in xrange(args.num_layers) ] output_w = tf.get_variable( "output_w", [args.rnn_size * args.num_layers, NOUT]) else: output_w = tf.get_variable("output_w", [args.rnn_size, NOUT]) output_b = tf.get_variable("output_b", [NOUT]) inputs = tf.split(self.input_data, args.seq_length, 1) inputs = [tf.squeeze(input_, [1]) for input_ in inputs] self.initial_input = np.zeros((args.batch_size, 5), dtype=np.float32) self.initial_input[:, 4] = 1.0 # initially, the pen is down. self.initial_input = tf.constant(self.initial_input) def tfrepeat(a, repeats): num_row = a.get_shape()[0].value num_col = a.get_shape()[1].value assert (num_col == 1) result = [a for i in range(repeats)] result = tf.concat(result, 0) result = tf.reshape(result, [repeats, num_row]) result = tf.transpose(result) return result def custom_rnn_autodecoder(decoder_inputs, initial_input, initial_state, cell, scope=None): # customized rnn_decoder for the task of dealing with the end of character with tf.variable_scope(scope or "rnn_decoder"): states = [initial_state] outputs = [] prev = None for i in xrange(len( decoder_inputs)): # for each time step in mini-batch inp = decoder_inputs[i] if i > 0: tf.get_variable_scope().reuse_variables() #output, new_state = cell(inp, states[-1]) # this line is for single RNN cell _, new_states = cell( inp, states[-1] ) # this line is for MultiRNNCell. The first return value is inp #print('## new_states: {}, \n new_states[0]: {}\n'.format(new_states, new_states[0])) if self.state_is_tuple: if self.unitcell_state_is_tuple: num_state = new_states[0][0].get_shape()[1].value if args.skip_conn: output = new_states[0][1] for i in xrange(1, self.args.num_layers): output = tf.concat( [output, new_states[i][1]], 1) else: output = new_states[-1][1] else: num_state = new_states[0].get_shape()[1].value if args.skip_conn: output = new_states[0] for i in xrange(1, self.args.num_layers): output = tf.concat([output, new_states[i]], 1) else: output = new_states[ -1] # get the top hidden states as the output else: # should not be reached num_state = int(new_states.get_shape()[1].value / self.args.rnn_size) if self.unitcell_state_is_tuple: output = new_states[-self.args.rnn_size:] # ?? else: output = new_states[-self.args.rnn_size:] #print('## output: {}\n'.format(output)) #print('## n_states: {}'.format(num_state)) # if the input has an end-of-character signal, have to zero out the state #to do by hardmaru: test this code. num_batches = self.args.batch_size eoc_detection = inp[:, 3] #eoc_detection = tf.reshape(eoc_detection, [num_batches, 1]) #eoc_detection_state = tfrepeat(eoc_detection, num_state) #eoc_detection_state = tf.greater(eoc_detection_state, tf.zeros_like(eoc_detection_state, dtype=tf.float32)) # make it a binary tensor # if the eoc detected, new state should be reset to zeros (initial state) #new_state = tf.select(eoc_detection_state, initial_state, new_state) # tf.select(condition, t, e, name=None). Selects elements from t or e , depending on condition #new_states = tf.where(eoc_detection_state, initial_state, new_states) for i in xrange(num_batches): if eoc_detection[i] == 1: for j in self.args.num_layers: if args.model == 'gru': new_states[j][i] = initial_state[j][i] elif args.model == 'lstm': new_states[j][0][i] = initial_state[j][0][ i] new_states[j][1][i] = initial_state[j][1][ i] else: pass #TODO outputs.append(output) states.append(new_states) return outputs, states outputs, states = custom_rnn_autodecoder(inputs, self.initial_input, self.initial_state, cell, scope='rnn_mdn') if args.skip_conn: output = tf.reshape(tf.concat(outputs, 1), [-1, args.rnn_size * args.num_layers]) else: output = tf.reshape(tf.concat(outputs, 1), [-1, args.rnn_size]) #output = tf.nn.xw_plus_b(output, output_w[-1], output_b) output = tf.matmul(output, output_w) + output_b self.final_state = states[-1] # reshape target data so that it is compatible with prediction shape flat_target_data = tf.reshape(self.target_data, [-1, 5]) [x1_data, x2_data, eos_data, eoc_data, cont_data] = tf.split(flat_target_data, 5, 1) pen_data = tf.concat([eos_data, eoc_data, cont_data], 1) # long method: #flat_target_data = tf.split(1, args.seq_length, self.target_data) #flat_target_data = [tf.squeeze(flat_target_data_, [1]) for flat_target_data_ in flat_target_data] #flat_target_data = tf.reshape(tf.concat(1, flat_target_data), [-1, 3]) def tf_2d_normal(x1, x2, mu1, mu2, s1, s2, rho): # eq # 24 and 25 of http://arxiv.org/abs/1308.0850 norm1 = tf.subtract(x1, mu1) norm2 = tf.subtract(x2, mu2) s1s2 = tf.multiply(s1, s2) z = tf.square(tf.divide(norm1, s1)) + tf.square( tf.divide(norm2, s2)) - 2 * tf.divide( tf.multiply(rho, tf.multiply(norm1, norm2)), s1s2) negRho = 1 - tf.square(rho) result = tf.exp(tf.divide(-z, 2 * negRho)) denom = 2 * np.pi * tf.multiply(s1s2, tf.sqrt(negRho)) result = tf.divide(result, denom) return result def get_lossfunc(z_pi, z_mu1, z_mu2, z_sigma1, z_sigma2, z_corr, z_pen, x1_data, x2_data, pen_data): result0 = tf_2d_normal(x1_data, x2_data, z_mu1, z_mu2, z_sigma1, z_sigma2, z_corr) # implementing eq # 26 of http://arxiv.org/abs/1308.0850 epsilon = 1e-20 result1 = tf.multiply(result0, z_pi) result1 = tf.reduce_sum(result1, 1, keep_dims=True) result1 = -tf.log(tf.maximum( result1, 1e-20)) # at the beginning, some errors are exactly zero. result_shape = tf.reduce_mean(result1) result2 = tf.nn.softmax_cross_entropy_with_logits(labels=pen_data, logits=z_pen) #pen_data_weighting = pen_data[:, 2]+np.sqrt(self.args.stroke_importance_factor)*pen_data[:, 0]+self.args.stroke_importance_factor*pen_data[:, 1] pen_data_weighting = pen_data[:, 2] + \ np.sqrt(self.args.stroke_importance_factor)*pen_data[:, 0] + \ self.args.stroke_importance_factor*pen_data[:, 1] result2 = tf.multiply(result2, pen_data_weighting) result_pen = tf.reduce_mean(result2) result = result_shape + result_pen return result, result_shape, result_pen, # below is where we need to do MDN splitting of distribution params def get_mixture_coef(output): # returns the tf slices containing mdn dist params # ie, eq 18 -> 23 of http://arxiv.org/abs/1308.0850 z = output z_pen = z[:, 0: 3] # end of stroke, end of character/content, continue w/ stroke z_pi, z_mu1, z_mu2, z_sigma1, z_sigma2, z_corr = tf.split( z[:, 3:], 6, 1) # process output z's into MDN paramters # softmax all the pi's: max_pi = tf.reduce_max(z_pi, 1, keep_dims=True) z_pi = tf.subtract(z_pi, max_pi) z_pi = tf.exp(z_pi) normalize_pi = tf.reciprocal( tf.reduce_sum(z_pi, 1, keep_dims=True) ) # inv (api 0.10) --> reciprocal (api 0.12, name changed) z_pi = tf.multiply(normalize_pi, z_pi) # exponentiate the sigmas and also make corr between -1 and 1. z_sigma1 = tf.exp(z_sigma1) z_sigma2 = tf.exp(z_sigma2) z_corr = tf.tanh(z_corr) return [z_pi, z_mu1, z_mu2, z_sigma1, z_sigma2, z_corr, z_pen] [o_pi, o_mu1, o_mu2, o_sigma1, o_sigma2, o_corr, o_pen] = get_mixture_coef(output) self.pi = o_pi self.mu1 = o_mu1 self.mu2 = o_mu2 self.sigma1 = o_sigma1 self.sigma2 = o_sigma2 self.corr = o_corr self.pen = o_pen # state of the pen [lossfunc, loss_shape, loss_pen] = get_lossfunc(o_pi, o_mu1, o_mu2, o_sigma1, o_sigma2, o_corr, o_pen, x1_data, x2_data, pen_data) self.cost = lossfunc self.cost_shape = loss_shape self.cost_pen = loss_pen self.lr = tf.Variable( 0.0001, trainable=False) # tf.Variable(0.01, trainable=False) tvars = tf.trainable_variables() grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars), args.grad_clip) optimizer = tf.train.AdamOptimizer(self.lr, epsilon=0.001) self.train_op = optimizer.apply_gradients(zip(grads, tvars))