def bidirectional_dynamic_rnn(cell_fw, cell_bw, inputs, sequence_length=None, initial_state_fw=None, initial_state_bw=None, dtype=None, parallel_iterations=None, swap_memory=False, time_major=False, scope=None): assert not time_major flat_inputs = flatten(inputs, 2) # [-1, J, d] flat_len = None if sequence_length is None else tf.cast( flatten(sequence_length, 0), 'int64') (flat_fw_outputs, flat_bw_outputs), final_state = \ _bidirectional_dynamic_rnn(cell_fw, cell_bw, flat_inputs, sequence_length=flat_len, initial_state_fw=initial_state_fw, initial_state_bw=initial_state_bw, dtype=dtype, parallel_iterations=parallel_iterations, swap_memory=swap_memory, time_major=time_major, scope=scope) fw_outputs = reconstruct(flat_fw_outputs, inputs, 2) bw_outputs = reconstruct(flat_bw_outputs, inputs, 2) # FIXME : final state is not reshaped! return (fw_outputs, bw_outputs), final_state
def bidirectional_dynamic_rnn(cell_fw, cell_bw, inputs, sequence_length=None, initial_state_fw=None, initial_state_bw=None, dtype=None, parallel_iterations=None, swap_memory=False, time_major=False, scope=None): assert not time_major flat_inputs = flatten(inputs, 2) # [-1, J, d] flat_len = None if sequence_length is None else tf.cast(flatten(sequence_length, 0), 'int64') (flat_fw_outputs, flat_bw_outputs), final_state = \ _bidirectional_dynamic_rnn(cell_fw, cell_bw, flat_inputs, sequence_length=flat_len, initial_state_fw=initial_state_fw, initial_state_bw=initial_state_bw, dtype=dtype, parallel_iterations=parallel_iterations, swap_memory=swap_memory, time_major=time_major, scope=scope) fw_outputs = reconstruct(flat_fw_outputs, inputs, 2) bw_outputs = reconstruct(flat_bw_outputs, inputs, 2) # FIXME : final state is not reshaped! (_, flat_final_hidden_fw), (_, flat_final_hidden_bw) = final_state def my_reconstruct(tensor, ref): tensor_shape = tensor.get_shape().as_list() ref_shape = ref.get_shape().as_list() pre_shape = [ref_shape[i] or tf.shape(ref)[i] for i in range(len(ref_shape) - 2)] keep_shape = tensor_shape[-1:] target_shape = pre_shape + keep_shape out = tf.reshape(tensor, target_shape) return out final_hidden_fw = my_reconstruct(flat_final_hidden_fw, inputs) final_hidden_bw = my_reconstruct(flat_final_hidden_bw, inputs) return (fw_outputs, bw_outputs), (final_hidden_fw, final_hidden_bw)
def my_new_bidirectional_dynamic_rnn(cell_fw, cell_bw, inputs, sequence_length=None, initial_state_fw=None, initial_state_bw=None, dtype=None, parallel_iterations=None, swap_memory=False, time_major=False, scope=None): assert not time_major #flat_inputs = flatten(inputs, 2) # [-1, J, d] flat_inputs = my_flatten(inputs) #flat_inputs = tf.reshape(inputs,[tf.shape(inputs)[0],-1,tf.shape(inputs)[3]]) flat_len = sequence_length (flat_fw_outputs, flat_bw_outputs), final_state = \ _bidirectional_dynamic_rnn(cell_fw, cell_bw, flat_inputs, sequence_length=flat_len, initial_state_fw=initial_state_fw, initial_state_bw=initial_state_bw, dtype=dtype, parallel_iterations=parallel_iterations, swap_memory=swap_memory, time_major=time_major, scope=scope) fw_outputs = reconstruct(flat_fw_outputs, inputs, 1) bw_outputs = reconstruct(flat_bw_outputs, inputs, 1) # FIXME : final state is not reshaped! return (fw_outputs, bw_outputs), final_state
def bidirectional_rnn(cell_fw, cell_bw, inputs, initial_state_fw=None, initial_state_bw=None, dtype=None, sequence_length=None, scope=None): flat_inputs = flatten(inputs, 2) # [-1, J, d] flat_len = None if sequence_length is None else tf.cast(flatten(sequence_length, 0), 'int64') (flat_fw_outputs, flat_bw_outputs), final_state = \ _bidirectional_rnn(cell_fw, cell_bw, flat_inputs, sequence_length=flat_len, initial_state_fw=initial_state_fw, initial_state_bw=initial_state_bw, dtype=dtype, scope=scope) fw_outputs = reconstruct(flat_fw_outputs, inputs, 2) bw_outputs = reconstruct(flat_bw_outputs, inputs, 2) # FIXME : final state is not reshaped! return (fw_outputs, bw_outputs), final_state
def linear(args, output_size, bias, bias_start=0.0, scope=None, squeeze=False, wd=0.0, input_keep_prob=1.0, is_train=None): if args is None or (nest.is_sequence(args) and not args): raise ValueError("`args` must be specified") if not nest.is_sequence(args): args = [args] flat_args = [flatten(arg, 1) for arg in args] if input_keep_prob < 1.0: assert is_train is not None flat_args = [ tf.cond(is_train, lambda: tf.nn.dropout(arg, input_keep_prob), lambda: arg) for arg in flat_args ] with tf.variable_scope(scope or 'Linear'): flat_out = _linear( flat_args, output_size, bias, bias_initializer=tf.constant_initializer(bias_start)) out = reconstruct(flat_out, args[0], 1) if squeeze: out = tf.squeeze(out, [len(args[0].get_shape().as_list()) - 1]) if wd: add_wd(wd) return out
def dynamic_rnn(cell, inputs, sequence_length=None, initial_state=None, dtype=None, parallel_iterations=None, swap_memory=False, time_major=False, scope=None): assert not time_major # TODO : to be implemented later! flat_inputs = flatten(inputs, 2) # [-1, J, d] flat_len = None if sequence_length is None else tf.cast( flatten(sequence_length, 0), 'int64') flat_outputs, final_state = _dynamic_rnn( cell, flat_inputs, sequence_length=flat_len, initial_state=initial_state, dtype=dtype, parallel_iterations=parallel_iterations, swap_memory=swap_memory, time_major=time_major, scope=scope) outputs = reconstruct(flat_outputs, inputs, 2) return outputs, final_state
def linear(args, output_size, bias, bias_start=0.0, scope=None, squeeze=False, wd=0.0, input_keep_prob=1.0, is_train=None): """ output the same shape of the args/inputs, :param args: here it's [60, ?, ?, 200] shape tensor :param output_size: output size is same with args's final dimension, which is 200 here :param bias: true, :param bias_start: here it's 0 :param scope: trans, i don't know what this scope means, :param squeeze: False, todo what'is this one? :param wd: weight decay, 0 here :param input_keep_prob: 1 here :param is_train: place holder :return: """ if args is None or (nest.is_sequence(args) and not args): raise ValueError("`args` must be specified") if not nest.is_sequence(args): args = [args] flat_args = [flatten(arg, 1) for arg in args] # [60, ?, ?, 200] -> [?, 200] if input_keep_prob < 1.0: assert is_train is not None flat_args = [tf.cond(is_train, lambda: tf.nn.dropout(arg, input_keep_prob), lambda: arg) for arg in flat_args] with tf.variable_scope(scope or 'Linear'): flat_out = _linear(flat_args, output_size, bias, bias_initializer=tf.constant_initializer(bias_start)) out = reconstruct(flat_out, args[0], 1) if squeeze: out = tf.squeeze(out, [len(args[0].get_shape().as_list()) - 1]) if wd: add_wd(wd) return out
def softmax(logits, mask=None, scope=None): with tf.name_scope(scope or "Softmax"): if mask is not None: logits = exp_mask(logits, mask) flat_logits = flatten(logits, 1) flat_out = tf.nn.softmax(flat_logits) out = reconstruct(flat_out, logits, 1) return out
def bidirectional_dynamic_rnn(cell_fw, cell_bw, inputs, sequence_length=None, initial_state_fw=None, initial_state_bw=None, dtype=None, parallel_iterations=None, swap_memory=False, time_major=False, scope=None): assert not time_major flat_inputs = flatten(inputs, 2) # [-1, J, d] flat_len = None if sequence_length is None else tf.cast(flatten(sequence_length, 0), 'int64') (flat_fw_outputs, flat_bw_outputs), final_state = \ _bidirectional_dynamic_rnn(cell_fw, cell_bw, flat_inputs, sequence_length=flat_len, initial_state_fw=initial_state_fw, initial_state_bw=initial_state_bw, dtype=dtype, parallel_iterations=parallel_iterations, swap_memory=swap_memory, time_major=time_major, scope=scope) fw_outputs = reconstruct(flat_fw_outputs, inputs, 2) bw_outputs = reconstruct(flat_bw_outputs, inputs, 2) # FIXME : final state is not reshaped! return (fw_outputs, bw_outputs), final_state
def bidirectional_dynamic_rnn(cell_fw, cell_bw, inputs, sequence_length=None, initial_state_fw=None, initial_state_bw=None, dtype=None, parallel_iterations=None, swap_memory=False, time_major=False, scope=None): """ :param cell_fw: :param cell_bw: :param inputs: [60, ? , 200] :param sequence_length: shape = [60] :param initial_state_fw: :param initial_state_bw: :param dtype: float :param parallel_iterations: :param swap_memory: false :param time_major: false :param scope: u1 :return: the hidden for each state, both direction, and final state for both direction """ assert not time_major flat_inputs = flatten(inputs, 2) # [-1, J, d] [60, ?, 200] flat_len = None if sequence_length is None else tf.cast(flatten(sequence_length, 0), 'int64') (flat_fw_outputs, flat_bw_outputs), final_state = \ _bidirectional_dynamic_rnn(cell_fw, cell_bw, flat_inputs, # 200, sequence_length=flat_len, # question mark initial_state_fw=initial_state_fw, initial_state_bw=initial_state_bw, dtype=dtype, parallel_iterations=parallel_iterations, swap_memory=swap_memory, time_major=time_major, scope=scope) fw_outputs = reconstruct(flat_fw_outputs, inputs, 2) bw_outputs = reconstruct(flat_bw_outputs, inputs, 2) # FIXME : final state is not reshaped! return (fw_outputs, bw_outputs), final_state
def dynamic_rnn(cell, inputs, sequence_length=None, initial_state=None, dtype=None, parallel_iterations=None, swap_memory=False, time_major=False, scope=None): assert not time_major # TODO : to be implemented later! flat_inputs = flatten(inputs, 2) # [-1, J, d] flat_len = None if sequence_length is None else tf.cast(flatten(sequence_length, 0), 'int64') flat_outputs, final_state = _dynamic_rnn(cell, flat_inputs, sequence_length=flat_len, initial_state=initial_state, dtype=dtype, parallel_iterations=parallel_iterations, swap_memory=swap_memory, time_major=time_major, scope=scope) outputs = reconstruct(flat_outputs, inputs, 2) return outputs, final_state
def linear(args, output_size, bias, bias_start=0.0, scope=None, squeeze=False, wd=0.0, input_keep_prob=1.0, is_train=None): if args is None or (nest.is_sequence(args) and not args): raise ValueError("`args` must be specified") if not nest.is_sequence(args): args = [args] flat_args = [flatten(arg, 1) for arg in args] if input_keep_prob < 1.0: assert is_train is not None flat_args = [tf.cond(is_train, lambda: tf.nn.dropout(arg, input_keep_prob), lambda: arg) for arg in flat_args] flat_out = _linear(flat_args, output_size, bias, bias_start=bias_start, scope=scope) out = reconstruct(flat_out, args[0], 1) if squeeze: out = tf.squeeze(out, [len(args[0].get_shape().as_list())-1]) if wd: add_wd(wd) return out
def linear(args, output_size, bias, bias_start=0.0, scope=None, squeeze=False, wd=0.0, input_keep_prob=1.0, is_train=None): if args is None or (nest.is_sequence(args) and not args): raise ValueError("`args` must be specified") if not nest.is_sequence(args): args = [args] flat_args = [flatten(arg, 1) for arg in args] if input_keep_prob < 1.0: assert is_train is not None flat_args = [ tf.cond(is_train, lambda: tf.nn.dropout(arg, input_keep_prob), lambda: arg) for arg in flat_args ] print(flat_args) print(output_size) print(bias) print(bias_start) print(scope) flat_out = tf.contrib.layers.fully_connected( inputs=flat_args, num_outputs=output_size, biases_initializer=tf.constant_initializer(bias_start), activation_fn=None) #flat_out = _linear(flat_args, output_size, bias, bias_start=bias_start, scope=scope) out = reconstruct(flat_out, args[0], 1) if squeeze: out = tf.squeeze(out, [len(args[0].get_shape().as_list()) - 1]) if wd: add_wd(wd) return out