def _output_module(self, rnn_outputs, output_matrices, output_biases): optimizer_ins = dict() with tf.name_scope('output_module'): # print('rnn_outputs:', rnn_outputs) rnn_output_ndim = len(rnn_outputs[0].get_shape().as_list()) if rnn_output_ndim == 3: concat_dim = 1 else: concat_dim = 0 num_split = len(rnn_outputs) # o = rnn_outputs # rnn_outputs = tf.concat(rnn_outputs, concat_dim, name='concatenated_rnn_outputs') o = rnn_outputs for layer_idx, (matr, bias) in enumerate(zip(output_matrices, output_biases)): with tf.name_scope('layer_%s' % layer_idx): # print('hs.shape:', hs.get_shape().as_list()) if layer_idx > 0: o = tf.split(hs, num_split, axis=concat_dim, name='split_o_for_optimizer') hs = tf.concat(o, concat_dim, name='united_o') s = custom_matmul( hs, matr, name_scope='first_s') s = tf.split(s, num_split, axis=concat_dim, name='split_s_for_optimizer') optimizer_ins['output_layer_%s' % layer_idx] = dict( o=o, s=s ) s = tf.concat(s, concat_dim, name='united_s') hs = custom_add( s, bias, name='res_of_%s_output_layer' % layer_idx) if layer_idx < self._num_output_layers - 1: hs = tf.nn.relu(hs) return hs, optimizer_ins
def _apply_lstm_layer(self, inp, state, matrix, bias, scope='lstm'): with tf.name_scope(scope): x = tf.concat([ tf.nn.dropout(inp, self._optimizer_dropout_keep_prob), state[0] ], -1, name='X') s = custom_matmul(x, matrix) linear_res = custom_add(s, bias, name='linear_res') state_dim = tf.shape(state[0])[-1:] split_dims = tf.concat([3 * state_dim, state_dim], 0) [sigm_arg, tanh_arg] = tf.split(linear_res, split_dims, axis=-1, name='split_to_act_func_args') sigm_res = tf.sigmoid(sigm_arg, name='sigm_res') transform_vec = tf.tanh(tanh_arg, name='transformation_vector') [forget_gate, input_gate, output_gate] = tf.split(sigm_res, 3, axis=-1, name='gates') new_cell_state = tf.add(forget_gate * state[1], input_gate * transform_vec, name='new_cell_state') new_hidden_state = tf.multiply(output_gate, tf.tanh(new_cell_state), name='new_hidden_state') return new_hidden_state, new_cell_state
def _apply_res_core(self, vars, opt_ins, rnn_part, target, scope, target_dims): if self._res_core_activation_func == 'relu': a_func = tf.nn.relu elif self._res_core_activation_func == 'tanh': a_func = tf.tanh else: a_func = None with tf.name_scope(scope): # print("\n(ResNet4Mlp._apply_res_core)rnn_part:", rnn_part) # print('(ResNet4Mlp._apply_res_core)opt_ins_united:', opt_ins_united) opt_ins_united = tf.concat(opt_ins, -1, name='opt_ins_united') ndim = len(opt_ins_united.get_shape().as_list()) # print('(ResNet4Mlp._apply_res_core)opt_ins:', opt_ins) rnn_stack_num = tf.concat( [tf.ones([1], dtype=tf.int32), tf.shape(opt_ins_united)[-2:-1], tf.ones([1], dtype=tf.int32)], 0 ) rnn_part = tf.expand_dims(rnn_part, 1) rnn_part = tf.tile(rnn_part, rnn_stack_num, name='stacked_rnn_part') # print("(ResNet4Mlp._apply_res_core)rnn_part:", rnn_part) hs = tf.concat([opt_ins_united, rnn_part], -1, name='opt_ins_with_rnn_part') matrices = vars[0] biases = vars[1] for idx, (m, b) in enumerate(zip(matrices, biases)): # print('\n(ResNet4Mlp._apply_res_core)hs:', hs) # print('(ResNet4Mlp._apply_res_core)m:', m) # with tf.device('/cpu:0'): # hs = tf.Print( # hs, [l2_loss_per_elem(hs)], # message="(ResNetOpt._apply_res_core)(%s)(%s)hs before: " % (scope, idx), summarize=20) matmul_res = custom_matmul(hs, m) if idx == 0: self._debug_tensors.append(matmul_res) hs = a_func(custom_add(matmul_res, b)) # hs = tf.tanh(custom_add(matmul_res, b)) # with tf.device('/cpu:0'): # hs = tf.Print( # hs, [l2_loss_per_elem(matmul_res)], # message="(ResNetOpt._apply_res_core)(%s)(%s)matmul_res: " % (scope, idx), summarize=20) hs = tf.add( hs, tf.concat(target + [tf.zeros(tf.shape(rnn_part))], -1, name='res_tensor'), name='after_res_conn' ) rnn_part_dim = hs.get_shape().as_list()[-1] - sum(target_dims) o, sigma, rnn_part = tf.split(hs, list(target_dims) + [rnn_part_dim], axis=-1, name='o_sigma_and_rnn_part') # print("(ResNet4Mlp._apply_res_core)rnn_part:", rnn_part) # with tf.device('/cpu:0'): # o = tf.Print(o, [o], message='(ResNetOpt._apply_res_core)(scope=%s)o: ' % scope, summarize=10) # sigma = tf.Print( # sigma, [sigma], message='(ResNetOpt._apply_res_core)(scope=%s)sigma: ' % scope, summarize=10) return o, sigma, rnn_part
def _apply_core(self, vars, opt_ins, scope, target_dims): with tf.name_scope(scope): opt_ins_united = tf.concat(opt_ins, -1, name='opt_ins_united') hs = opt_ins_united matrix = vars[0] bias = vars[1] matmul_res = custom_matmul(hs, matrix) self._debug_tensors.append(matmul_res) hs = custom_add(matmul_res, bias) o, sigma = tf.split(hs, list(target_dims), axis=-1, name='o_sigma') return o, sigma
def _embed(inputs, matrix): with tf.name_scope('embeddings'): inputs_ndims = len(inputs.get_shape().as_list()) if inputs_ndims == 4: unstack_dim = 1 else: unstack_dim = 0 o = tf.unstack(inputs, axis=unstack_dim, name='o_embedding_layer') inputs = tf.stack(o, axis=unstack_dim) embeddings = custom_matmul(inputs, matrix, base_ndims=[3, 2]) unstacked_embeddings = tf.unstack(embeddings, axis=unstack_dim, name='embeddings') optimizer_ins = {'embedding_layer': {'o': o, 's': unstacked_embeddings}} return unstacked_embeddings, optimizer_ins
def _apply_core(self, vars, opt_ins, scope, target_dims): if self._activation_func == 'relu': a_func = tf.nn.relu elif self._activation_func == 'tanh': a_func = tf.tanh else: a_func = None with tf.name_scope(scope): opt_ins_united = tf.concat(opt_ins, -1, name='opt_ins_united') hs = opt_ins_united matrix = vars[0] bias = vars[1] matmul_res = custom_matmul(hs, matrix) self._debug_tensors.append(matmul_res) hs = a_func(custom_add(matmul_res, bias)) o, sigma = tf.split(hs, list(target_dims), axis=-1, name='o_sigma') return o, sigma
def _lstm_layer(self, inp, state, layer_idx, matr, bias): with tf.name_scope('lstm_layer_%s' % layer_idx): nn = self._num_nodes[layer_idx] x = tf.concat( [tf.nn.dropout( inp, self._dropout_keep_prob), state[0]], -1, name='X') s = custom_matmul(x, matr) linear_res = custom_add( s, bias, name='linear_res') [sigm_arg, tanh_arg] = tf.split(linear_res, [3 * nn, nn], axis=-1, name='split_to_act_func_args') sigm_res = tf.sigmoid(sigm_arg, name='sigm_res') transform_vec = tf.tanh(tanh_arg, name='transformation_vector') [forget_gate, input_gate, output_gate] = tf.split(sigm_res, 3, axis=-1, name='gates') new_cell_state = tf.add(forget_gate * state[1], input_gate * transform_vec, name='new_cell_state') new_hidden_state = tf.multiply(output_gate, tf.tanh(new_cell_state), name='new_hidden_state') optimizer_ins = {'lstm_layer_%s' % layer_idx: {'o': x, 's': s}} return new_hidden_state, [new_hidden_state, new_cell_state], optimizer_ins
def _mlp(self, inputs, matrices, biases): opt_ins = dict() inp_shape = inputs.get_shape().as_list() inp_shape = [-1 if a is None else a for a in inp_shape] ndim = len(inp_shape) inputs = tf.reshape(inputs, inp_shape[:ndim - self._input_ndim] + [self._input_size], name='inp_reshaped') hs = inputs with tf.name_scope('mlp'): for idx, (m, b) in enumerate(zip(matrices, biases)): layer_name = 'layer_%s' % idx opt_ins[layer_name] = dict(o=hs) with tf.name_scope(layer_name): preactivate = custom_add(custom_matmul(hs, m), b) if idx < len(matrices) - 1: hs = tf.nn.relu(preactivate) else: hs = preactivate opt_ins[layer_name]['s'] = preactivate return hs, opt_ins
def _apply_res_core(self, vars, opt_ins, scope, target_dims): if self._res_core_activation_func == 'relu': a_func = tf.nn.relu elif self._res_core_activation_func == 'tanh': a_func = tf.tanh else: a_func = None with tf.name_scope(scope): opt_ins_united = tf.concat(opt_ins, -1, name='opt_ins_united') hs = opt_ins_united matrices = vars[0] biases = vars[1] for idx, (m, b) in enumerate(zip(matrices, biases)): matmul_res = custom_matmul(hs, m) if idx == 0: self._debug_tensors.append(matmul_res) hs = a_func(custom_add(matmul_res, b)) hs = tf.add(hs, tf.concat(opt_ins, -1, name='res_tensor'), name='after_res_conn') o, sigma = tf.split(hs, list(target_dims), axis=-1, name='o_sigma') return o, sigma
def _apply_net( self, inp, state, vars, ): inp, old_shape, stack = self._opt_in_reshaping(inp) inp = log_and_sign(inp, P) new_state = list() for layer_idx, (layer_state, matrix, bias) in enumerate( zip(state, vars['lstm_matrices'], vars['lstm_biases'])): new_h, new_c = self._apply_lstm_layer( inp, layer_state, matrix, bias, scope='lstm_layer_%s' % layer_idx, ) new_state.append((new_h, new_c)) inp = new_h with tf.name_scope('linear'): linear_res = self._scale * custom_matmul(inp, vars['linear']) res = self._reshape_back(linear_res, old_shape, stack) return res, new_state