Esempio n. 1
0
    def blstm_layer(self, input):
        """

        :return:
        """
        with tf.variable_scope('rnn_layer'):
            cell_fw = [
                get_rnn_cell(self.rnn_size, self.dropout_rate)
                for _ in range(self.num_layers)
            ]
            cell_bw = [
                get_rnn_cell(self.rnn_size, self.dropout_rate)
                for _ in range(self.num_layers)
            ]
            # if self.num_layers > 1:
            #     cell_fw = rnn.MultiRNNCell([cell_fw] * self.num_layers, state_is_tuple=True)
            #     cell_bw = rnn.MultiRNNCell([cell_bw] * self.num_layers, state_is_tuple=True)

            rnn_output, _,_ = \
                stack_bidirectional_dynamic_rnn(cell_fw, cell_bw, input,
                                                sequence_length=self.lengths, dtype=tf.float32)
            outputs = tf.concat(rnn_output, axis=2)
            outputs = tf.layers.dropout(outputs,
                                        1. - self.dropout_rate,
                                        training=self.is_training)
        return outputs
Esempio n. 2
0
def rnn_layers(x,seq_length,training,hidden_num=100,layer_num = 3,class_n = 5):
    cells_fw = list()
    cells_bw = list()
    for i in range(layer_num):
        #cell_fw = BNLSTMCell(hidden_num,training = training)#,training)
        #cell_bw = BNLSTMCell(hidden_num,training = training)#,training)
        cell_fw = LSTMCell(hidden_num)
	cell_bw = LSTMCell(hidden_num)
	cells_fw.append(cell_fw)
        cells_bw.append(cell_bw)
    with tf.variable_scope('BDLSTM_rnn') as scope:
        lasth,_,_=stack_bidirectional_dynamic_rnn(cells_fw = cells_fw,cells_bw=cells_bw,\
                                                inputs = x,sequence_length = seq_length,dtype = tf.float32,scope=scope)
    #shape of lasth [batch_size,max_time,hidden_num*2]
    batch_size = lasth.get_shape().as_list()[0]
    max_time = lasth.get_shape().as_list()[1]
    with tf.variable_scope('rnn_fnn_layer'):
        weight_out = tf.Variable(tf.truncated_normal([2,hidden_num],stddev=np.sqrt(2.0 / (2*hidden_num))),name='weights')
        biases_out = tf.Variable(tf.zeros([hidden_num]),name = 'bias')
        weight_class = tf.Variable(tf.truncated_normal([hidden_num,class_n],stddev=np.sqrt(2.0 / hidden_num)),name = 'weights_class')
        bias_class = tf.Variable(tf.zeros([class_n]),name = 'bias_class')
        lasth_rs = tf.reshape(lasth,[batch_size,max_time,2,hidden_num],name = 'lasth_rs')
        lasth_output = tf.nn.bias_add(tf.reduce_sum(tf.multiply(lasth_rs,weight_out),axis = 2),biases_out,name = 'lasth_bias_add')
        lasth_output_rs = tf.reshape(lasth_output,[batch_size*max_time,hidden_num],name = 'lasto_rs')
        logits = tf.reshape(tf.nn.bias_add(tf.matmul(lasth_output_rs,weight_class),bias_class),[batch_size,max_time,class_n],name = "rnn_logits_rs")
        variable_summaries(weight_class)
        variable_summaries(biases_out)
    return logits
Esempio n. 3
0
    def _createStackBidirectionalDynamicRNN(self,
                                            use_gpu,
                                            use_shape,
                                            use_state_tuple,
                                            initial_states_fw=None,
                                            initial_states_bw=None,
                                            scope=None):
        self.layers = [2, 3]
        input_size = 5
        batch_size = 2
        max_length = 8

        initializer = init_ops.random_uniform_initializer(-0.01,
                                                          0.01,
                                                          seed=self._seed)
        sequence_length = array_ops.placeholder(dtypes.int64)

        self.cells_fw = [
            rnn_cell.LSTMCell(num_units,
                              input_size,
                              initializer=initializer,
                              state_is_tuple=False)
            for num_units in self.layers
        ]
        self.cells_bw = [
            rnn_cell.LSTMCell(num_units,
                              input_size,
                              initializer=initializer,
                              state_is_tuple=False)
            for num_units in self.layers
        ]

        inputs = max_length * [
            array_ops.placeholder(
                dtypes.float32,
                shape=(batch_size, input_size) if use_shape else
                (None, input_size))
        ]
        inputs_c = array_ops.stack(inputs)
        inputs_c = array_ops.transpose(inputs_c, [1, 0, 2])
        outputs, st_fw, st_bw = contrib_rnn.stack_bidirectional_dynamic_rnn(
            self.cells_fw,
            self.cells_bw,
            inputs_c,
            initial_states_fw=initial_states_fw,
            initial_states_bw=initial_states_bw,
            dtype=dtypes.float32,
            sequence_length=sequence_length,
            scope=scope)

        # Outputs has shape (batch_size, max_length, 2* layer[-1].
        output_shape = [None, max_length, 2 * self.layers[-1]]
        if use_shape:
            output_shape[0] = batch_size

        self.assertAllEqual(outputs.get_shape().as_list(), output_shape)

        input_value = np.random.randn(batch_size, input_size)

        return input_value, inputs, outputs, st_fw, st_bw, sequence_length
Esempio n. 4
0
    def _build_rnn_op(self):
        with tf.variable_scope("bi_directional_rnn"):
            cell_fw = self._create_rnn_cell()
            cell_bw = self._create_rnn_cell()

            if self.cfg["use_stack_rnn"]:
                rnn_outs, *_ = stack_bidirectional_dynamic_rnn(
                    cell_fw,
                    cell_bw,
                    self.word_emb,
                    dtype=tf.float32,
                    sequence_length=self.seq_len)
            else:
                rnn_outs, *_ = bidirectional_dynamic_rnn(
                    cell_fw,
                    cell_bw,
                    self.word_emb,
                    dtype=tf.float32,
                    sequence_length=self.seq_len)
            rnn_outs = tf.concat(rnn_outs, axis=-1)
            rnn_outs = tf.layers.dropout(rnn_outs,
                                         rate=self.drop_rate,
                                         training=self.is_train)
            self.rnn_outs = rnn_outs
            print("rnn output shape: {}".format(
                rnn_outs.get_shape().as_list()))
Esempio n. 5
0
 def __call__(self,
              inputs,
              seq_len,
              return_last_state=False,
              time_major=False):
     assert not time_major, "StackBiRNN class cannot support time_major currently"
     with tf.variable_scope(self.scope):
         flat_inputs = flatten(inputs,
                               keep=2)  # reshape to [-1, max_time, dim]
         seq_len = flatten(
             seq_len, keep=0)  # reshape to [x] (one dimension sequence)
         outputs, states_fw, states_bw = stack_bidirectional_dynamic_rnn(
             self.cells_fw,
             self.cells_fw,
             flat_inputs,
             sequence_length=seq_len,
             dtype=tf.float32)
         if return_last_state:  # return last states
             # since states_fw is the final states, one tensor per layer, of the forward rnn and states_bw is the
             # final states, one tensor per layer, of the backward rnn, here we extract the last layer of forward
             # and backward states as last state
             h_fw, h_bw = states_fw[self.num_layers -
                                    1].h, states_bw[self.num_layers - 1].h
             output = tf.concat([h_fw, h_bw],
                                axis=-1)  # shape = [-1, 2 * num_units]
             output = reconstruct(
                 output, ref=inputs, keep=2,
                 remove_shape=1)  # remove the max_time shape
         else:
             output = tf.concat(
                 outputs, axis=-1)  # shape = [-1, max_time, 2 * num_units]
             output = reconstruct(
                 output, ref=inputs, keep=2
             )  # reshape to same as inputs, except the last two dim
         return output
Esempio n. 6
0
  def encode(self, inputs, sequence_length, **kwargs):
    scope = tf.get_variable_scope()
    scope.set_initializer(tf.random_uniform_initializer(
        -self.params["init_scale"],
        self.params["init_scale"]))

    cell_fw = training_utils.get_rnn_cell(**self.params["rnn_cell"])
    cell_bw = training_utils.get_rnn_cell(**self.params["rnn_cell"])

    cells_fw = _unpack_cell(cell_fw)
    cells_bw = _unpack_cell(cell_bw)

    result = rnn.stack_bidirectional_dynamic_rnn(
        cells_fw=cells_fw,
        cells_bw=cells_bw,
        inputs=inputs,
        dtype=tf.float32,
        sequence_length=sequence_length,
        **kwargs)
    outputs_concat, _output_state_fw, _output_state_bw = result
    final_state = (_output_state_fw, _output_state_bw)
    return EncoderOutput(
        outputs=outputs_concat,
        final_state=final_state,
        attention_values=outputs_concat,
        attention_values_length=sequence_length)
Esempio n. 7
0
    def encode(self, inputs, sequence_length, **kwargs):
        scope = tf.get_variable_scope()
        scope.set_initializer(
            tf.random_uniform_initializer(-self.params["init_scale"],
                                          self.params["init_scale"]))

        cell_fw = training_utils.get_rnn_cell(**self.params["rnn_cell"])
        cell_bw = training_utils.get_rnn_cell(**self.params["rnn_cell"])

        cells_fw = _unpack_cell(cell_fw)
        cells_bw = _unpack_cell(cell_bw)

        result = rnn.stack_bidirectional_dynamic_rnn(
            cells_fw=cells_fw,
            cells_bw=cells_bw,
            inputs=inputs,
            dtype=tf.float32,
            sequence_length=sequence_length,
            **kwargs)
        outputs_concat, _output_state_fw, _output_state_bw = result
        final_state = (_output_state_fw, _output_state_bw)
        return EncoderOutput(outputs=outputs_concat,
                             final_state=final_state,
                             attention_values=outputs_concat,
                             attention_values_length=sequence_length)
Esempio n. 8
0
  def _createStackBidirectionalDynamicRNN(self,
                                          use_gpu,
                                          use_shape,
                                          use_state_tuple,
                                          initial_states_fw=None,
                                          initial_states_bw=None,
                                          scope=None):
    self.layers = [2, 3]
    input_size = 5
    batch_size = 2
    max_length = 8

    initializer = init_ops.random_uniform_initializer(
        -0.01, 0.01, seed=self._seed)
    sequence_length = array_ops.placeholder(dtypes.int64)

    self.cells_fw = [
        core_rnn_cell_impl.LSTMCell(
            num_units,
            input_size,
            initializer=initializer,
            state_is_tuple=False) for num_units in self.layers
    ]
    self.cells_bw = [
        core_rnn_cell_impl.LSTMCell(
            num_units,
            input_size,
            initializer=initializer,
            state_is_tuple=False) for num_units in self.layers
    ]

    inputs = max_length * [
        array_ops.placeholder(
            dtypes.float32,
            shape=(batch_size, input_size) if use_shape else (None, input_size))
    ]
    inputs_c = array_ops.stack(inputs)
    inputs_c = array_ops.transpose(inputs_c, [1, 0, 2])
    outputs, st_fw, st_bw = rnn.stack_bidirectional_dynamic_rnn(
        self.cells_fw,
        self.cells_bw,
        inputs_c,
        initial_states_fw=initial_states_fw,
        initial_states_bw=initial_states_bw,
        dtype=dtypes.float32,
        sequence_length=sequence_length,
        scope=scope)

    # Outputs has shape (batch_size, max_length, 2* layer[-1].
    output_shape = [None, max_length, 2 * self.layers[-1]]
    if use_shape:
      output_shape[0] = batch_size

    self.assertAllEqual(outputs.get_shape().as_list(), output_shape)

    input_value = np.random.randn(batch_size, input_size)

    return input_value, inputs, outputs, st_fw, st_bw, sequence_length
Esempio n. 9
0
 def __call__(self, inputs, seq_len):
     with tf.variable_scope(self.scope):
         output, *_ = stack_bidirectional_dynamic_rnn(
             self.cells_fw,
             self.cells_bw,
             inputs,
             sequence_length=seq_len,
             dtype=tf.float32)
     return output
Esempio n. 10
0
    def _build(self, inputs, lengths):
        outputs, final_fw_state, final_bw_state = rnn.stack_bidirectional_dynamic_rnn(
            cells_fw=self.cell_fw._cells,
            cells_bw=self.cell_bw._cells,
            inputs=inputs,
            sequence_length=lengths,
            dtype=tf.float32)

        # Concatenate states of the forward and backward RNNs
        final_state = final_fw_state, final_bw_state

        return outputs, final_state
Esempio n. 11
0
    def build_net_aux(self, inputs, lengths):
        outputs = tf.reshape(
            inputs, [self._config.batch_size, -1, self._config.input_size])
        # BLSTM layer
        with tf.variable_scope('blstm_aux'):

            def lstm_cell():
                if not self._infer and self._config.keep_prob < 1.0:
                    return tf.contrib.rnn.DropoutWrapper(
                        tf.contrib.rnn.BasicLSTMCell(
                            self._config.aux_hidden_size),
                        output_keep_prob=self._config.keep_prob)
                else:
                    return tf.contrib.rnn.BasicLSTMCell(
                        self._config.aux_hidden_size)

            # tf.nn.rnn_cell.MultiRNNCell in r1.12
            lstm_fw_cell = tf.contrib.rnn.MultiRNNCell(
                [lstm_cell() for _ in range(self._config.rnn_num_layers)],
                state_is_tuple=True)
            lstm_bw_cell = tf.contrib.rnn.MultiRNNCell(
                [lstm_cell() for _ in range(self._config.rnn_num_layers)],
                state_is_tuple=True)
            lstm_fw_cell = self._unpack_cell(lstm_fw_cell)
            lstm_bw_cell = self._unpack_cell(lstm_bw_cell)
            outputs, fw_final_states, bw_final_states = rnn.stack_bidirectional_dynamic_rnn(
                cells_fw=lstm_fw_cell,
                cells_bw=lstm_bw_cell,
                inputs=outputs,
                dtype=tf.float32,
                sequence_length=lengths)
            outputs = tf.reshape(
                outputs, [-1, 2 * self._config.aux_hidden_size
                          ])  # transform blstm outputs into right output size

        with tf.variable_scope('layer2_aux'):
            weights2, biases2 = self._weight_and_bias(
                2 * self._config.aux_hidden_size, self._config.aux_hidden_size)
            outputs = tf.nn.relu(tf.matmul(outputs, weights2) + biases2)

        with tf.variable_scope('layer3_aux'):
            weights3, biases3 = self._weight_and_bias(
                self._config.aux_hidden_size, self._config.aux_output_size)
            outputs = tf.matmul(outputs, weights3) + biases3
            outputs = tf.reshape(
                outputs,
                [self._config.batch_size, -1, self._config.aux_output_size])
            # average over the frames to get the speaker embedding
            spk_embed = tf.reduce_sum(outputs, 1) / tf.reshape(
                tf.to_float(self._lengths_aux), (-1, 1))

        return spk_embed
Esempio n. 12
0
def my_rnn_layers(x,
                  seq_length,
                  training,
                  hidden_num=200,
                  layer_num=5,
                  class_n=5,
                  cell='BNLSTM',
                  dtype=tf.float32):
    """Generate RNN layers.

    Args:
        x (Float): A 3D-Tensor of shape [batch_size,max_time,channel]
        seq_length (Int): A 1D-Tensor of shape [batch_size], real length of each sequence.
        training (Boolean): A 0D-Tenosr indicate if it's in training.
        hidden_num (int, optional): Defaults to 100. Size of the hidden state,
            hidden unit will be deep concatenated, so the final hidden state will be size of 200.
        layer_num (int, optional): Defaults to 3. Number of layers in RNN.
        class_n (int, optional): Defaults to 5. Number of output class.
        cell(str): A String from 'LSTM','GRU','BNLSTM', the RNN Cell used.
            BNLSTM stand for Batch normalization LSTM Cell.

    Returns:
         logits: A 3D Tensor of shape [batch_size, max_time, class_n]
    """

    cells_fw = list()
    cells_bw = list()
    for i in range(layer_num):
        if cell == 'LSTM':
            cell_fw = LSTMCell(hidden_num)
            cell_bw = LSTMCell(hidden_num)
        elif cell == 'GRU':
            cell_fw = GRUCell(hidden_num)
            cell_bw = GRUCell(hidden_num)
        elif cell == 'BNLSTM':
            cell_fw = BNLSTMCell(hidden_num, training=training)
            cell_bw = BNLSTMCell(hidden_num, training=training)
        else:
            raise ValueError("Cell type unrecognized.")
        cells_fw.append(cell_fw)
        cells_bw.append(cell_bw)
    #multi_cells_fw = tf.nn.rnn_cell.MultiRNNCell(cells_fw)
    #multi_cells_bw = tf.nn.rnn_cell.MultiRNNCell(cells_bw)
    with tf.variable_scope('BDLSTM_rnn') as scope:
        lasth, _, _ = stack_bidirectional_dynamic_rnn(
            cells_fw=cells_fw,
            cells_bw=cells_bw,
            inputs=x,
            sequence_length=seq_length,
            dtype=dtype,
            scope=scope)
    return lasth
Esempio n. 13
0
  def encode(self, inputs, sequence_length, **kwargs):
    cell_fw = training_utils.get_rnn_cell(**self.params["rnn_cell"])
    cell_bw = training_utils.get_rnn_cell(**self.params["rnn_cell"])

    cells_fw = _unpack_cell(cell_fw)
    cells_bw = _unpack_cell(cell_bw)

    result = rnn.stack_bidirectional_dynamic_rnn(
        cells_fw=cells_fw,
        cells_bw=cells_bw,
        inputs=inputs,
        dtype=tf.float32,
        sequence_length=sequence_length,
        **kwargs)
    outputs_concat, _output_state_fw, _output_state_bw = result
    final_state = (_output_state_fw, _output_state_bw)
    return EncoderOutput(
        outputs=outputs_concat,
        final_state=final_state,
        attention_values=outputs_concat,
        attention_values_length=sequence_length)
Esempio n. 14
0
def _CreateCudnnCompatibleCanonicalRNN(rnn, inputs, is_bidi=False, scope=None):
    mode = rnn.rnn_mode
    num_units = rnn.num_units
    num_layers = rnn.num_layers

    # To reuse cuDNN-trained models, must use cudnn compatible rnn cells.
    if mode == CUDNN_LSTM:
        single_cell = lambda: cudnn_rnn_ops.CudnnCompatibleLSTMCell(num_units)
    elif mode == CUDNN_GRU:
        single_cell = lambda: cudnn_rnn_ops.CudnnCompatibleGRUCell(num_units)
    elif mode == CUDNN_RNN_TANH:
        single_cell = (
            lambda: rnn_cell_impl.BasicRNNCell(num_units, math_ops.tanh))
    elif mode == CUDNN_RNN_RELU:
        single_cell = (
            lambda: rnn_cell_impl.BasicRNNCell(num_units, gen_nn_ops.relu))
    else:
        raise ValueError("%s is not supported!" % mode)

    if not is_bidi:
        cell = rnn_cell_impl.MultiRNNCell(
            [single_cell() for _ in range(num_layers)])
        return rnn_lib.dynamic_rnn(cell,
                                   inputs,
                                   dtype=dtypes.float32,
                                   time_major=True,
                                   scope=scope)
    else:
        cells_fw = [single_cell() for _ in range(num_layers)]
        cells_bw = [single_cell() for _ in range(num_layers)]

        (outputs, output_state_fw,
         output_state_bw) = contrib_rnn_lib.stack_bidirectional_dynamic_rnn(
             cells_fw,
             cells_bw,
             inputs,
             dtype=dtypes.float32,
             time_major=True,
             scope=scope)
        return outputs, (output_state_fw, output_state_bw)
Esempio n. 15
0
	def _build_model_op(self):
		with tf.variable_scope("bi_directional_rnn"):
			cell_fw = self._create_rnn_cell()
			cell_bw = self._create_rnn_cell()
			if self.cfg["use_stack_rnn"]:
				rnn_outs, *_ = stack_bidirectional_dynamic_rnn(cell_fw, cell_bw, self.word_emb, dtype=tf.float32,
															   sequence_length=self.seq_len)
			else:
				rnn_outs, *_ = bidirectional_dynamic_rnn(cell_fw, cell_bw, self.word_emb, sequence_length=self.seq_len,
														 dtype=tf.float32)
			rnn_outs = tf.concat(rnn_outs, axis=-1)
			rnn_outs = tf.layers.dropout(rnn_outs, rate=self.drop_rate, training=self.is_train)
			if self.cfg["use_residual"]:
				word_project = tf.layers.dense(self.word_emb, units=2 * self.cfg["num_units"], use_bias=False)
				rnn_outs = rnn_outs + word_project
			outputs = layer_normalize(rnn_outs) if self.cfg["use_layer_norm"] else rnn_outs
			# print("rnn output shape: {}".format(outputs.get_shape().as_list()))

		if self.cfg["use_attention"] == "self_attention":
			with tf.variable_scope("self_attention"):
				attn_outs = multi_head_attention(outputs, outputs, self.cfg["num_heads"], self.cfg["attention_size"],
												 drop_rate=self.drop_rate, is_train=self.is_train)
				if self.cfg["use_residual"]:
					attn_outs = attn_outs + outputs
				outputs = layer_normalize(attn_outs) if self.cfg["use_layer_norm"] else attn_outs
				print("self-attention output shape: {}".format(outputs.get_shape().as_list()))

		elif self.cfg["use_attention"] == "normal_attention":
			with tf.variable_scope("normal_attention"):
				context = tf.transpose(outputs, [1, 0, 2])
				p_context = tf.layers.dense(outputs, units=2 * self.cfg["num_units"], use_bias=False)
				p_context = tf.transpose(p_context, [1, 0, 2])
				attn_cell = AttentionCell(self.cfg["num_units"], context, p_context)  # time major based
				attn_outs, _ = dynamic_rnn(attn_cell, context, sequence_length=self.seq_len, time_major=True,
										   dtype=tf.float32)
				outputs = tf.transpose(attn_outs, [1, 0, 2])
				print("attention output shape: {}".format(outputs.get_shape().as_list()))

		with tf.variable_scope("project"):
			self.logits = tf.layers.dense(outputs, units=self.tag_vocab_size, use_bias=True)
Esempio n. 16
0
    def encode(self, inputs, sequence_length, **kwargs):
        scope = tf.get_variable_scope()
        scope.set_initializer(
            tf.random_uniform_initializer(-self.params["init_scale"],
                                          self.params["init_scale"]))

        self.params["rnn_cell"]["distributed"] = False
        self.params["rnn_cell"]["device_name"] = training_utils.getDeviceName(
            0)
        cell_fw = training_utils.get_rnn_cell(**self.params["rnn_cell"])

        self.params["rnn_cell"]["device_name"] = training_utils.getDeviceName(
            self.params["rnn_cell"]["num_layers"])
        if self.params["rnn_cell"][
                "device_name"] == training_utils.getDeviceName(0):
            self.params["rnn_cell"][
                "device_name"] = training_utils.getDeviceName(
                    1
                )  # to ensure the backward cell is working on aniother GPU
        cell_bw = training_utils.get_rnn_cell(**self.params["rnn_cell"])

        cells_fw = _unpack_cell(cell_fw)
        cells_bw = _unpack_cell(cell_bw)

        result = rnn.stack_bidirectional_dynamic_rnn(
            cells_fw=cells_fw,
            cells_bw=cells_bw,
            inputs=inputs,
            dtype=tf.float32,
            sequence_length=sequence_length,
            **kwargs)
        outputs_concat, _output_state_fw, _output_state_bw = result
        final_state = (_output_state_fw, _output_state_bw)
        return EncoderOutput(outputs=outputs_concat,
                             final_state=final_state,
                             attention_values=outputs_concat,
                             attention_values_length=sequence_length)
Esempio n. 17
0
def _CreateCudnnCompatibleCanonicalRNN(rnn, inputs, is_bidi=False, scope=None):
  mode = rnn.rnn_mode
  num_units = rnn.num_units
  num_layers = rnn.num_layers

  # To reuse cuDNN-trained models, must use cudnn compatible rnn cells.
  if mode == CUDNN_LSTM:
    single_cell = lambda: cudnn_rnn_ops.CudnnCompatibleLSTMCell(num_units)
  elif mode == CUDNN_GRU:
    single_cell = lambda: cudnn_rnn_ops.CudnnCompatibleGRUCell(num_units)
  elif mode == CUDNN_RNN_TANH:
    single_cell = (lambda: rnn_cell_impl.BasicRNNCell(num_units, math_ops.tanh))
  elif mode == CUDNN_RNN_RELU:
    single_cell = (
        lambda: rnn_cell_impl.BasicRNNCell(num_units, gen_nn_ops.relu))
  else:
    raise ValueError("%s is not supported!" % mode)

  if not is_bidi:
    cell = rnn_cell_impl.MultiRNNCell(
        [single_cell() for _ in range(num_layers)])
    return rnn_lib.dynamic_rnn(
        cell, inputs, dtype=dtypes.float32, time_major=True, scope=scope)
  else:
    cells_fw = [single_cell() for _ in range(num_layers)]
    cells_bw = [single_cell() for _ in range(num_layers)]

    (outputs, output_state_fw,
     output_state_bw) = contrib_rnn_lib.stack_bidirectional_dynamic_rnn(
         cells_fw,
         cells_bw,
         inputs,
         dtype=dtypes.float32,
         time_major=True,
         scope=scope)
    return outputs, (output_state_fw, output_state_bw)
Esempio n. 18
0
 def __call__(self, inputs, seq_len):
     with tf.variable_scope(self.scope):
         output, *_ = stack_bidirectional_dynamic_rnn(self.cells_fw, self.cells_bw, inputs, sequence_length=seq_len,
                                                      dtype=tf.float32)
     return output
Esempio n. 19
0
def rnn_layers(x,
               seq_length,
               training,
               hidden_num=100,
               layer_num=3,
               class_n=5,
               cell='LSTM',
               dtype=tf.float32):
    """Generate RNN layers.

    Args:
        x (Float): A 3D-Tensor of shape [batch_size,max_time,channel]
        seq_length (Int): A 1D-Tensor of shape [batch_size], real length of each sequence.
        training (Boolean): A 0D-Tenosr indicate if it's in training.
        hidden_num (int, optional): Defaults to 100. Size of the hidden state, 
            hidden unit will be deep concatenated, so the final hidden state will be size of 200.
        layer_num (int, optional): Defaults to 3. Number of layers in RNN.
        class_n (int, optional): Defaults to 5. Number of output class.
        cell(str): A String from 'LSTM','GRU','BNLSTM', the RNN Cell used. 
            BNLSTM stand for Batch normalization LSTM Cell.

    Returns:
         logits: A 3D Tensor of shape [batch_size, max_time, class_n]
    """

    cells_fw = list()
    cells_bw = list()
    for i in range(layer_num):
        if cell == 'LSTM':
            cell_fw = LSTMCell(hidden_num)
            cell_bw = LSTMCell(hidden_num)
        elif cell == 'GRU':
            cell_fw = GRUCell(hidden_num)
            cell_bw = GRUCell(hidden_num)
        elif cell == 'BNLSTM':
            cell_fw = BNLSTMCell(hidden_num, training=training)
            cell_bw = BNLSTMCell(hidden_num, training=training)
        else:
            raise ValueError("Cell type unrecognized.")
        cells_fw.append(cell_fw)
        cells_bw.append(cell_bw)
    #multi_cells_fw = tf.nn.rnn_cell.MultiRNNCell(cells_fw)
    #multi_cells_bw = tf.nn.rnn_cell.MultiRNNCell(cells_bw)
    with tf.variable_scope('BDLSTM_rnn') as scope:
        lasth, _, _ = stack_bidirectional_dynamic_rnn(
            cells_fw=cells_fw,
            cells_bw=cells_bw,
            inputs=x,
            sequence_length=seq_length,
            dtype=dtype,
            scope=scope)
        #lasth = tf.concat(outputs, 2, name='birnn_output_concat')
    # shape of lasth [batch_size,max_time,hidden_num*2]
    # Difference between bidrectional_dynamic_rnn and stack_bidirectional_dynamic_rnn
    # https://stackoverflow.com/questions/49242266/difference-between-multirnncell-and-stack-bidirectional-dynamic-rnn-in-tensorflo
    batch_size = tf.shape(lasth)[0]
    max_time = lasth.get_shape().as_list()[1]
    with tf.variable_scope('rnn_fnn_layer'):
        weight_out = _variable_on_cpu(
            name='weights',
            shape=[2, hidden_num],
            initializer=tf.truncated_normal_initializer(
                stddev=np.sqrt(2.0 / (2 * hidden_num))),
            dtype=dtype)
        biases_out = _variable_on_cpu(name='bias',
                                      shape=[hidden_num],
                                      initializer=tf.zeros_initializer(),
                                      dtype=dtype)
        weight_class = _variable_on_cpu(
            name='weights_class',
            shape=[hidden_num, class_n],
            initializer=tf.truncated_normal_initializer(
                stddev=np.sqrt(2.0 / hidden_num)),
            dtype=dtype)
        bias_class = _variable_on_cpu(name='bias_class',
                                      shape=[class_n],
                                      initializer=tf.zeros_initializer(),
                                      dtype=dtype)
        lasth_rs = tf.reshape(lasth, [batch_size, max_time, 2, hidden_num],
                              name='lasth_rs')
        lasth_output = tf.nn.bias_add(tf.reduce_sum(tf.multiply(
            lasth_rs, weight_out),
                                                    axis=2),
                                      biases_out,
                                      name='lasth_bias_add')
        lasth_output_rs = tf.reshape(lasth_output,
                                     [batch_size * max_time, hidden_num],
                                     name='lasto_rs')
        logits = tf.reshape(tf.nn.bias_add(
            tf.matmul(lasth_output_rs, weight_class), bias_class),
                            [batch_size, max_time, class_n],
                            name="rnn_logits_rs")
    return logits
Esempio n. 20
0
    def build_net(self):
        outputs = self._inputs
        # feed-forward layer, not used (set to false) when grid lstm is applied
        if self._config.dense_layer.lower() == 'true':
            with tf.variable_scope('forward1'):
                outputs = tf.reshape(outputs, [-1, self._config.input_size])
                outputs = tf.layers.dense(outputs, units=self._config.rnn_size,
                              activation=tf.nn.tanh, reuse=tf.get_variable_scope().reuse)
                outputs = tf.reshape(outputs, [self._config.batch_size, -1, self._config.rnn_size])
        
        # grid lstm layer and a linear reduction layer
        if self._config.tflstm_size > 0:
            with tf.variable_scope('tflstm'):
                def tflstm_cell():
                    return tf.contrib.rnn.GridLSTMCell(self._config.tflstm_size, use_peepholes=True, share_time_frequency_weights=True, 
                               cell_clip=5.0, feature_size=self._config.tffeature_size, frequency_skip=self._config.tffrequency_skip, 
                               num_frequency_blocks=[int((self._config.input_size-self._config.tffeature_size)/self._config.tffrequency_skip+1)])
                    
                cell = tf.contrib.rnn.MultiRNNCell([tflstm_cell() for _ in range(self._config.tflstm_layers)], state_is_tuple=True)
                initial_state = cell.zero_state(self._config.batch_size, tf.float32)
                outputs, final_state = tf.nn.dynamic_rnn(cell, outputs, dtype=tf.float32, sequence_length=self._lengths, initial_state=initial_state)

                tflstm_output_size = 2*self._config.tflstm_size*int((self._config.input_size-self._config.tffeature_size)/self._config.tffrequency_skip+1)
                outputs = tf.reshape(outputs, [-1, tflstm_output_size])
                weights, biases = self._weight_and_bias('linear', tflstm_output_size, self._config.rnn_size)
                outputs = tf.matmul(outputs, weights) + biases
                outputs = tf.reshape(outputs, [self._config.batch_size, -1, self._config.rnn_size])

        # BLSTM layer
        with tf.variable_scope('blstm'):
            def lstm_cell():
                return tf.contrib.rnn.BasicLSTMCell(self._config.rnn_size) #tf.nn.rnn_cell.BasicLSTMCell in r1.12
            attn_cell = lstm_cell
            if not self._infer and self._config.keep_prob < 1.0:
                def attn_cell():
                    return tf.contrib.rnn.DropoutWrapper(lstm_cell(), output_keep_prob=self._config.keep_prob)

            # tf.nn.rnn_cell.MultiRNNCell in r1.12
            lstm_fw_cell = tf.contrib.rnn.MultiRNNCell([attn_cell() for _ in range(self._config.rnn_num_layers)], state_is_tuple=True)
            lstm_bw_cell = tf.contrib.rnn.MultiRNNCell([attn_cell() for _ in range(self._config.rnn_num_layers)], state_is_tuple=True)
            lstm_fw_cell = self._unpack_cell(lstm_fw_cell)
            lstm_bw_cell = self._unpack_cell(lstm_bw_cell)
            outputs, fw_final_states, bw_final_states = rnn.stack_bidirectional_dynamic_rnn(cells_fw=lstm_fw_cell,
                cells_bw=lstm_bw_cell, inputs=outputs, dtype=tf.float32, sequence_length=self._lengths)

        # Mask estimation layer
        with tf.variable_scope('forward2'):
            blstm_output_size = 2*self._config.rnn_size
            outputs = tf.reshape(outputs, [-1, blstm_output_size])
            
            weights1, biases1 = self._weight_and_bias('mask1', blstm_output_size, self._config.output_size)
            weights2, biases2 = self._weight_and_bias('mask2', blstm_output_size, self._config.output_size)
            if self._config.mask_type.lower() == 'relu':
                mask1 = tf.nn.relu(tf.matmul(outputs, weights1) + biases1)
                mask2 = tf.nn.relu(tf.matmul(outputs, weights2) + biases2)
            else:
                mask1 = tf.nn.sigmoid(tf.matmul(outputs, weights1) + biases1)
                mask2 = tf.nn.sigmoid(tf.matmul(outputs, weights2) + biases2)
            
            self._mask1 = tf.reshape(mask1, [self._config.batch_size, -1, self._config.output_size])
            self._mask2 = tf.reshape(mask2, [self._config.batch_size, -1, self._config.output_size])

            self._sep1 = self._mask1 * self._mixed
            self._sep2 = self._mask2 * self._mixed

        self.saver = tf.train.Saver(tf.trainable_variables(), max_to_keep=30)
    def __init__(self,
                 x_mag_spec_batch,
                 lengths_batch,
                 y_mag_spec_batch=None,
                 theta_x_batch=None,
                 theta_y_batch=None,
                 behavior='train'):
        '''
    behavior = 'train/validation/infer'
    '''
        if behavior != self.infer:
            assert (y_mag_spec_batch is not None)
            assert (theta_x_batch is not None)
            assert (theta_y_batch is not None)
        self._log_bias = tf.get_variable(
            'logbias', [1],
            trainable=FLAGS.PARAM.LOG_BIAS_TRAINABLE,
            initializer=tf.constant_initializer(FLAGS.PARAM.INIT_LOG_BIAS))
        self._real_logbias = self._log_bias + FLAGS.PARAM.MIN_LOG_BIAS
        self._x_mag_spec = x_mag_spec_batch
        self._norm_x_mag_spec = norm_mag_spec(self._x_mag_spec,
                                              FLAGS.PARAM.MAG_NORM_MAX)
        self._norm_x_logmag_spec = norm_logmag_spec(self._x_mag_spec,
                                                    FLAGS.PARAM.MAG_NORM_MAX,
                                                    self._log_bias,
                                                    FLAGS.PARAM.MIN_LOG_BIAS)

        self._y_mag_spec = y_mag_spec_batch
        self._norm_y_mag_spec = norm_mag_spec(self._y_mag_spec,
                                              FLAGS.PARAM.MAG_NORM_MAX)
        self._norm_y_logmag_spec = norm_logmag_spec(self._y_mag_spec,
                                                    FLAGS.PARAM.MAG_NORM_MAX,
                                                    self._log_bias,
                                                    FLAGS.PARAM.MIN_LOG_BIAS)

        self._lengths = lengths_batch
        self._batch_size = tf.shape(self._lengths)[0]

        self._x_theta = theta_x_batch
        self._y_theta = theta_y_batch
        self._model_type = FLAGS.PARAM.MODEL_TYPE

        if FLAGS.PARAM.INPUT_TYPE == 'mag':
            self.net_input = self._norm_x_mag_spec
        elif FLAGS.PARAM.INPUT_TYPE == 'logmag':
            self.net_input = self._norm_x_logmag_spec
        if FLAGS.PARAM.LABEL_TYPE == 'mag':
            self._y_labels = self._norm_y_mag_spec
        elif FLAGS.PARAM.LABEL_TYPE == 'logmag':
            self._y_labels = self._norm_y_logmag_spec

        outputs = self.net_input
        if FLAGS.PARAM.INPUT_BN:
            with tf.variable_scope('Batch_Norm_Layer'):
                if_BRN = (FLAGS.PARAM.MVN_TYPE == 'BRN')
                if FLAGS.PARAM.SELF_BN:
                    outputs = tf.layers.batch_normalization(outputs,
                                                            training=True,
                                                            renorm=if_BRN)
                else:
                    outputs = tf.layers.batch_normalization(
                        outputs,
                        training=(behavior == self.train
                                  or behavior == self.validation),
                        renorm=if_BRN)

        lstm_attn_cell = lstm_cell
        if behavior != self.infer and FLAGS.PARAM.KEEP_PROB < 1.0:

            def lstm_attn_cell(n_units, n_proj, act):
                return tf.contrib.rnn.DropoutWrapper(
                    lstm_cell(n_units, n_proj, act),
                    output_keep_prob=FLAGS.PARAM.KEEP_PROB)

        GRU_attn_cell = GRU_cell
        if behavior != self.infer and FLAGS.PARAM.KEEP_PROB < 1.0:

            def GRU_attn_cell(n_units, act):
                return tf.contrib.rnn.DropoutWrapper(
                    GRU_cell(n_units, act),
                    output_keep_prob=FLAGS.PARAM.KEEP_PROB)

        if FLAGS.PARAM.MODEL_TYPE.upper() == 'BLSTM':
            with tf.variable_scope('BLSTM'):

                lstm_fw_cell = tf.contrib.rnn.MultiRNNCell([
                    lstm_attn_cell(FLAGS.PARAM.RNN_SIZE,
                                   FLAGS.PARAM.LSTM_num_proj,
                                   FLAGS.PARAM.LSTM_ACTIVATION)
                    for _ in range(FLAGS.PARAM.RNN_LAYER)
                ],
                                                           state_is_tuple=True)
                lstm_bw_cell = tf.contrib.rnn.MultiRNNCell([
                    lstm_attn_cell(FLAGS.PARAM.RNN_SIZE,
                                   FLAGS.PARAM.LSTM_num_proj,
                                   FLAGS.PARAM.LSTM_ACTIVATION)
                    for _ in range(FLAGS.PARAM.RNN_LAYER)
                ],
                                                           state_is_tuple=True)

                fw_cell = lstm_fw_cell._cells
                bw_cell = lstm_bw_cell._cells
                result = rnn.stack_bidirectional_dynamic_rnn(
                    cells_fw=fw_cell,
                    cells_bw=bw_cell,
                    inputs=outputs,
                    dtype=tf.float32,
                    sequence_length=self._lengths)
                outputs, fw_final_states, bw_final_states = result

        if FLAGS.PARAM.MODEL_TYPE.upper() == 'BGRU':
            with tf.variable_scope('BGRU'):
                gru_fw_cell = tf.contrib.rnn.MultiRNNCell([
                    GRU_attn_cell(FLAGS.PARAM.RNN_SIZE,
                                  FLAGS.PARAM.LSTM_ACTIVATION)
                    for _ in range(FLAGS.PARAM.RNN_LAYER)
                ],
                                                          state_is_tuple=True)
                gru_bw_cell = tf.contrib.rnn.MultiRNNCell([
                    GRU_attn_cell(FLAGS.PARAM.RNN_SIZE,
                                  FLAGS.PARAM.LSTM_ACTIVATION)
                    for _ in range(FLAGS.PARAM.RNN_LAYER)
                ],
                                                          state_is_tuple=True)

                fw_cell = gru_fw_cell._cells
                bw_cell = gru_bw_cell._cells
                result = rnn.stack_bidirectional_dynamic_rnn(
                    cells_fw=fw_cell,
                    cells_bw=bw_cell,
                    inputs=outputs,
                    dtype=tf.float32,
                    sequence_length=self._lengths)
                outputs, fw_final_states, bw_final_states = result

        self.fw_final_state = fw_final_states
        self.bw_final_state = bw_final_states
        # print(fw_final_states[0][0].get_shape().as_list())

        # print(np.shape(fw_final_states),np.shape(bw_final_states))

        # region full connection get mask
        # calcu rnn output size
        in_size = FLAGS.PARAM.RNN_SIZE
        mask = None
        if self._model_type.upper()[0] == 'B':  # bidirection
            rnn_output_num = FLAGS.PARAM.RNN_SIZE * 2
            if FLAGS.PARAM.MODEL_TYPE == 'BLSTM' and (
                    not (FLAGS.PARAM.LSTM_num_proj is None)):
                rnn_output_num = 2 * FLAGS.PARAM.LSTM_num_proj
            in_size = rnn_output_num
        outputs = tf.reshape(outputs, [-1, in_size])
        out_size = FLAGS.PARAM.OUTPUT_SIZE
        with tf.variable_scope('fullconnectOut'):
            weights = tf.get_variable(
                'weights1', [in_size, out_size],
                initializer=tf.random_normal_initializer(stddev=0.01))
            biases = tf.get_variable('biases1', [out_size],
                                     initializer=tf.constant_initializer(
                                         FLAGS.PARAM.INIT_MASK_VAL))
        if FLAGS.PARAM.TIME_NOSOFTMAX_ATTENTION:
            with tf.variable_scope('fullconnectCoef'):
                weights_coef = tf.get_variable(
                    'weights_coef', [in_size, 1],
                    initializer=tf.random_normal_initializer(mean=1.0,
                                                             stddev=0.01))
                biases_coef = tf.get_variable(
                    'biases_coef', [1],
                    initializer=tf.constant_initializer(0.0))
            raw_mask = tf.reshape(
                tf.matmul(outputs, weights) + biases,
                [self._batch_size, -1, FLAGS.PARAM.OUTPUT_SIZE
                 ])  # [batch,time,fre]
            batch_coef_vec = tf.nn.relu(
                tf.reshape(
                    tf.matmul(outputs, weights_coef) + biases_coef,
                    [self._batch_size, -1]))  # [batch, time]
            mask = tf.multiply(
                raw_mask, tf.reshape(batch_coef_vec,
                                     [self._batch_size, -1, 1]))
        else:
            if FLAGS.PARAM.POST_BN:
                linear_out = tf.matmul(outputs, weights)
                with tf.variable_scope('POST_Batch_Norm_Layer'):
                    if_BRN = (FLAGS.PARAM.MVN_TYPE == 'BRN')
                    if FLAGS.PARAM.SELF_BN:
                        linear_out = tf.layers.batch_normalization(
                            linear_out, training=True, renorm=if_BRN)
                    else:
                        linear_out = tf.layers.batch_normalization(
                            linear_out,
                            training=(behavior == self.train
                                      or behavior == self.validation),
                            renorm=if_BRN)
                    weights2 = tf.get_variable(
                        'weights1', [out_size, out_size],
                        initializer=tf.random_normal_initializer(stddev=0.01))
                    biases2 = tf.get_variable(
                        'biases1', [out_size],
                        initializer=tf.constant_initializer(
                            FLAGS.PARAM.INIT_MASK_VAL))
                    linear_out = tf.matmul(linear_out, weights2) + biases2
            else:
                linear_out = tf.matmul(outputs, weights) + biases
            mask = linear_out
            if FLAGS.PARAM.ReLU_MASK:
                mask = tf.nn.relu(linear_out)

        # endregion

        self._mask = tf.reshape(
            mask, [self._batch_size, -1, FLAGS.PARAM.OUTPUT_SIZE])

        if FLAGS.PARAM.TRAINING_MASK_POSITION == 'mag':
            self._y_estimation = self._mask * (self._norm_x_mag_spec +
                                               FLAGS.PARAM.SPEC_EST_BIAS)
        elif FLAGS.PARAM.TRAINING_MASK_POSITION == 'logmag':
            self._y_estimation = self._mask * (self._norm_x_logmag_spec +
                                               FLAGS.PARAM.SPEC_EST_BIAS)

        # region get infer spec
        if FLAGS.PARAM.DECODING_MASK_POSITION == 'mag':
            self._y_mag_estimation = rm_norm_mag_spec(
                self._mask *
                (self._norm_x_mag_spec + FLAGS.PARAM.SPEC_EST_BIAS),
                FLAGS.PARAM.MAG_NORM_MAX)
        elif FLAGS.PARAM.DECODING_MASK_POSITION == 'logmag':
            self._y_mag_estimation = rm_norm_logmag_spec(
                self._mask *
                (self._norm_x_logmag_spec + FLAGS.PARAM.SPEC_EST_BIAS),
                FLAGS.PARAM.MAG_NORM_MAX, self._log_bias,
                FLAGS.PARAM.MIN_LOG_BIAS)
        '''
    _y_mag_estimation is estimated mag_spec
    _y_estimation is loss_targe, mag_sepec or logmag_spec
    '''
        # endregion

        # region prepare y_estimation
        if FLAGS.PARAM.TRAINING_MASK_POSITION != FLAGS.PARAM.LABEL_TYPE:
            if FLAGS.PARAM.LABEL_TYPE == 'mag':
                self._y_estimation = normedLogmag2normedMag(
                    self._y_estimation, FLAGS.PARAM.MAG_NORM_MAX,
                    self._log_bias, FLAGS.PARAM.MIN_LOG_BIAS)
            elif FLAGS.PARAM.LABEL_TYPE == 'logmag':
                self._y_estimation = normedMag2normedLogmag(
                    self._y_estimation, FLAGS.PARAM.MAG_NORM_MAX,
                    self._log_bias, FLAGS.PARAM.MIN_LOG_BIAS)
        # endregion

        # region CBHG
        if FLAGS.PARAM.USE_CBHG_POST_PROCESSING:
            cbhg_kernels = 8  # All kernel sizes from 1 to cbhg_kernels will be used in the convolution bank of CBHG to act as "K-grams"
            cbhg_conv_channels = 128  # Channels of the convolution bank
            cbhg_pool_size = 2  # pooling size of the CBHG
            cbhg_projection = 256  # projection channels of the CBHG (1st projection, 2nd is automatically set to num_mels)
            cbhg_projection_kernel_size = 3  # kernel_size of the CBHG projections
            cbhg_highwaynet_layers = 4  # Number of HighwayNet layers
            cbhg_highway_units = 128  # Number of units used in HighwayNet fully connected layers
            cbhg_rnn_units = 128  # Number of GRU units used in bidirectional RNN of CBHG block. CBHG output is 2x rnn_units in shape
            batch_norm_position = 'before'
            # is_training = True
            is_training = bool(behavior == self.train)
            post_cbhg = CBHG(cbhg_kernels,
                             cbhg_conv_channels,
                             cbhg_pool_size,
                             [cbhg_projection, FLAGS.PARAM.OUTPUT_SIZE],
                             cbhg_projection_kernel_size,
                             cbhg_highwaynet_layers,
                             cbhg_highway_units,
                             cbhg_rnn_units,
                             batch_norm_position,
                             is_training,
                             name='CBHG_postnet')

            #[batch_size, decoder_steps(mel_frames), cbhg_channels]
            self._cbhg_inputs_y_est = self._y_estimation
            cbhg_outputs = post_cbhg(self._y_estimation, None)

            frame_projector = FrameProjection(FLAGS.PARAM.OUTPUT_SIZE,
                                              scope='CBHG_proj_to_spec')
            self._y_estimation = frame_projector(cbhg_outputs)

            if FLAGS.PARAM.DECODING_MASK_POSITION != FLAGS.PARAM.TRAINING_MASK_POSITION:
                print(
                    'DECODING_MASK_POSITION must be equal to TRAINING_MASK_POSITION when use CBHG post processing.'
                )
                exit(-1)
            if FLAGS.PARAM.DECODING_MASK_POSITION == 'mag':
                self._y_mag_estimation = rm_norm_mag_spec(
                    self._y_estimation, FLAGS.PARAM.MAG_NORM_MAX)
            elif FLAGS.PARAM.DECODING_MASK_POSITION == 'logmag':
                self._y_mag_estimation = rm_norm_logmag_spec(
                    self._y_estimation, FLAGS.PARAM.MAG_NORM_MAX,
                    self._log_bias, FLAGS.PARAM.MIN_LOG_BIAS)
        # endregion

        self.saver = tf.train.Saver(tf.trainable_variables(), max_to_keep=30)
        if behavior == self.infer:
            return

        # region get labels LOSS
        # Labels
        if FLAGS.PARAM.MASK_TYPE == 'PSM':
            self._y_labels *= tf.cos(self._x_theta - self._y_theta)
        elif FLAGS.PARAM.MASK_TYPE == 'fixPSM':
            self._y_labels *= (1.0 +
                               tf.cos(self._x_theta - self._y_theta)) * 0.5
        elif FLAGS.PARAM.MASK_TYPE == 'AcutePM':
            self._y_labels *= tf.nn.relu(tf.cos(self._x_theta - self._y_theta))
        elif FLAGS.PARAM.MASK_TYPE == 'PowFixPSM':
            self._y_labels *= tf.pow(
                tf.abs((1.0 + tf.cos(self._x_theta - self._y_theta)) * 0.5),
                FLAGS.PARAM.POW_FIX_PSM_COEF)
        elif FLAGS.PARAM.MASK_TYPE == 'IRM':
            pass
        else:
            tf.logging.error('Mask type error.')
            exit(-1)

        # LOSS
        if FLAGS.PARAM.LOSS_FUNC_FOR_MAG_SPEC == 'SPEC_MSE':  # log_mag and mag MSE
            self._loss = loss.reduce_sum_frame_batchsize_MSE(
                self._y_estimation, self._y_labels)
            if FLAGS.PARAM.USE_CBHG_POST_PROCESSING:
                if FLAGS.PARAM.DOUBLE_LOSS:
                    self._loss = FLAGS.PARAM.CBHG_LOSS_COEF1 * loss.reduce_sum_frame_batchsize_MSE(
                        self._cbhg_inputs_y_est, self._y_labels
                    ) + FLAGS.PARAM.CBHG_LOSS_COEF2 * self._loss
        elif FLAGS.PARAM.LOSS_FUNC_FOR_MAG_SPEC == 'MFCC_SPEC_MSE':
            self._loss1, self._loss2 = loss.balanced_MFCC_AND_SPEC_MSE(
                self._y_estimation, self._y_labels, self._y_mag_estimation,
                self._y_mag_spec)
            self._loss = FLAGS.PARAM.SPEC_LOSS_COEF * self._loss1 + FLAGS.PARAM.MFCC_LOSS_COEF * self._loss2
        elif FLAGS.PARAM.LOSS_FUNC_FOR_MAG_SPEC == 'MEL_MAG_MSE':
            self._loss1, self._loss2 = loss.balanced_MEL_AND_SPEC_MSE(
                self._y_estimation, self._y_labels, self._y_mag_estimation,
                self._y_mag_spec)
            self._loss = FLAGS.PARAM.SPEC_LOSS_COEF * self._loss1 + FLAGS.PARAM.MEL_LOSS_COEF * self._loss2
        elif FLAGS.PARAM.LOSS_FUNC_FOR_MAG_SPEC == "SPEC_MSE_LOWF_EN":
            self._loss = loss.reduce_sum_frame_batchsize_MSE(
                self._y_estimation, self._y_labels)
        elif FLAGS.PARAM.LOSS_FUNC_FOR_MAG_SPEC == "FAIR_SPEC_MSE":
            self._loss = loss.fair_reduce_sum_frame_batchsize_MSE(
                self._y_estimation, self._y_labels)
        elif FLAGS.PARAM.LOSS_FUNC_FOR_MAG_SPEC == "SPEC_MSE_FLEXIBLE_POW_C":
            self._loss = loss.reduce_sum_frame_batchsize_MSE_EmphasizeLowerValue(
                self._y_estimation, self._y_labels, FLAGS.PARAM.POW_COEF)
        elif FLAGS.PARAM.LOSS_FUNC_FOR_MAG_SPEC == "RELATED_MSE":
            self._loss = loss.relative_reduce_sum_frame_batchsize_MSE(
                self._y_estimation, self._y_labels,
                FLAGS.PARAM.RELATED_MSE_IGNORE_TH)
        elif FLAGS.PARAM.LOSS_FUNC_FOR_MAG_SPEC == "AUTO_RELATED_MSE":
            self._loss = loss.auto_ingore_relative_reduce_sum_frame_batchsize_MSE(
                self._y_estimation, self._y_labels,
                FLAGS.PARAM.AUTO_RELATED_MSE_AXIS_FIT_DEG)
        elif FLAGS.PARAM.LOSS_FUNC_FOR_MAG_SPEC == "AUTO_RELATED_MSE2":
            self._loss = loss.auto_ingore_relative_reduce_sum_frame_batchsize_MSE_v2(
                self._y_estimation,
                self._y_labels,
                FLAGS.PARAM.AUTO_RELATED_MSE_AXIS_FIT_DEG,
                FLAGS.PARAM.LINEAR_BROKER,
            )
        elif FLAGS.PARAM.LOSS_FUNC_FOR_MAG_SPEC == "AUTO_RELATED_MSE3":
            self._loss = loss.auto_ingore_relative_reduce_sum_frame_batchsize_MSE_v3(
                self._y_estimation, self._y_labels,
                FLAGS.PARAM.AUTO_RELATIVE_LOSS3_A,
                FLAGS.PARAM.AUTO_RELATIVE_LOSS3_B,
                FLAGS.PARAM.AUTO_RELATIVE_LOSS3_C1,
                FLAGS.PARAM.AUTO_RELATIVE_LOSS3_C2)
        elif FLAGS.PARAM.LOSS_FUNC_FOR_MAG_SPEC == "AUTO_RELATED_MSE4":
            self._loss = loss.auto_ingore_relative_reduce_sum_frame_batchsize_MSE_v4(
                self._y_estimation, self._y_labels,
                FLAGS.PARAM.AUTO_RELATED_MSE_AXIS_FIT_DEG)
        elif FLAGS.PARAM.LOSS_FUNC_FOR_MAG_SPEC == "AUTO_RELATED_MSE5":
            self._loss = loss.auto_ingore_relative_reduce_sum_frame_batchsize_MSE_v5(
                self._y_estimation, self._y_labels)
        elif FLAGS.PARAM.LOSS_FUNC_FOR_MAG_SPEC == "AUTO_RELATED_MSE6":
            self._loss = loss.auto_ingore_relative_reduce_sum_frame_batchsize_MSE_v6(
                self._y_estimation, self._y_labels,
                FLAGS.PARAM.AUTO_RELATIVE_LOSS6_A,
                FLAGS.PARAM.AUTO_RELATIVE_LOSS6_B,
                FLAGS.PARAM.AUTO_RELATIVE_LOSS6_C1,
                FLAGS.PARAM.AUTO_RELATIVE_LOSS6_C2)
        elif FLAGS.PARAM.LOSS_FUNC_FOR_MAG_SPEC == "AUTO_RELATED_MSE7":
            self._loss = loss.auto_ingore_relative_reduce_sum_frame_batchsize_MSE_v7(
                self._y_estimation, self._y_labels,
                FLAGS.PARAM.AUTO_RELATIVE_LOSS7_A1,
                FLAGS.PARAM.AUTO_RELATIVE_LOSS7_A2,
                FLAGS.PARAM.AUTO_RELATIVE_LOSS7_B,
                FLAGS.PARAM.AUTO_RELATIVE_LOSS7_C1,
                FLAGS.PARAM.AUTO_RELATIVE_LOSS7_C2)
        elif FLAGS.PARAM.LOSS_FUNC_FOR_MAG_SPEC == "AUTO_RELATED_MSE8":
            self._loss = loss.auto_ingore_relative_reduce_sum_frame_batchsize_MSE_v8(
                self._y_estimation, self._y_labels,
                FLAGS.PARAM.AUTO_RELATIVE_LOSS8_A,
                FLAGS.PARAM.AUTO_RELATIVE_LOSS8_B,
                FLAGS.PARAM.AUTO_RELATIVE_LOSS8_C1,
                FLAGS.PARAM.AUTO_RELATIVE_LOSS8_C2)
        elif FLAGS.PARAM.LOSS_FUNC_FOR_MAG_SPEC == "AUTO_RELATED_MSE_USE_COS":
            self._loss = loss.cos_auto_ingore_relative_reduce_sum_frame_batchsize_MSE(
                self._y_estimation, self._y_labels,
                FLAGS.PARAM.COS_AUTO_RELATED_MSE_W)
        elif FLAGS.PARAM.LOSS_FUNC_FOR_MAG_SPEC == 'MEL_AUTO_RELATED_MSE':
            # type(y_estimation) = FLAGS.PARAM.LABEL_TYPE
            self._loss = loss.MEL_AUTO_RELATIVE_MSE(
                self._y_estimation, self._norm_y_mag_spec, FLAGS.PARAM.MEL_NUM,
                FLAGS.PARAM.AUTO_RELATED_MSE_AXIS_FIT_DEG)
        else:
            print('Loss type error.')
            exit(-1)
        # endregion

        if behavior == self.validation:
            '''
      val model cannot train.
      '''
            return
        self._lr = tf.Variable(0.0, trainable=False)  #TODO
        tvars = tf.trainable_variables()
        grads, _ = tf.clip_by_global_norm(tf.gradients(self.loss, tvars),
                                          FLAGS.PARAM.CLIP_NORM)
        optimizer = tf.train.AdamOptimizer(self.lr)
        #optimizer = tf.train.GradientDescentOptimizer(self.lr)
        self._train_op = optimizer.apply_gradients(zip(grads, tvars))

        self._new_lr = tf.placeholder(tf.float32,
                                      shape=[],
                                      name='new_learning_rate')
        self._lr_update = tf.assign(self._lr, self._new_lr)
Esempio n. 22
0
    def __init__(self, config, inputs, labels, lengths, infer=False):
        self._inputs = inputs
        self._labels = labels
        self._lengths = lengths
        self._model_type = config.model_type
        if infer:  # if infer, we prefer to run one utterance one time.
            config.batch_size = 1
        outputs = self._inputs
        ## This first layer-- feed forward layer
        ## Transform the input to the right size before feed into RNN

        with tf.variable_scope('forward1'):
            outputs = tf.reshape(outputs, [-1, config.input_size])
            outputs = tf.layers.dense(outputs,
                                      units=config.rnn_size,
                                      activation=tf.nn.tanh,
                                      reuse=tf.get_variable_scope().reuse)
            outputs = tf.reshape(outputs,
                                 [config.batch_size, -1, config.rnn_size])

        ## Configure the LSTM or BLSTM model
        ## For BLSTM, we use the BasicLSTMCell.For LSTM, we use LSTMCell.
        ## You can change them and test the performance...

        if config.model_type.lower() == 'blstm':
            with tf.variable_scope('blstm'):
                cell = tf.contrib.rnn.BasicLSTMCell(config.rnn_size)
                if not infer and config.keep_prob < 1.0:
                    cell = tf.contrib.rnn.DropoutWrapper(
                        cell, output_keep_prob=config.keep_prob)

                lstm_fw_cell = tf.contrib.rnn.MultiRNNCell(
                    [cell] * config.rnn_num_layers)
                lstm_bw_cell = tf.contrib.rnn.MultiRNNCell(
                    [cell] * config.rnn_num_layers)
                lstm_fw_cell = _unpack_cell(lstm_fw_cell)
                lstm_bw_cell = _unpack_cell(lstm_bw_cell)
                result = rnn.stack_bidirectional_dynamic_rnn(
                    cells_fw=lstm_fw_cell,
                    cells_bw=lstm_bw_cell,
                    inputs=outputs,
                    dtype=tf.float32,
                    sequence_length=self._lengths)
                outputs, fw_final_states, bw_final_states = result
        if config.model_type.lower() == 'lstm':
            with tf.variable_scope('lstm'):

                def lstm_cell():
                    return tf.contrib.rnn.LSTMCell(
                        config.rnn_size,
                        forget_bias=1.0,
                        use_peepholes=True,
                        initializer=tf.contrib.layers.xavier_initializer(),
                        state_is_tuple=True,
                        activation=tf.tanh)

                attn_cell = lstm_cell
                if not infer and config.keep_prob < 1.0:

                    def attn_cell():
                        return tf.contrib.rnn.DropoutWrapper(
                            lstm_cell(), output_keep_prob=config.keep_prob)

                cell = tf.contrib.rnn.MultiRNNCell(
                    [attn_cell() for _ in range(config.rnn_num_layers)],
                    state_is_tuple=True)
                self._initial_state = cell.zero_state(config.batch_size,
                                                      tf.float32)
                state = self.initial_state
                outputs, state = tf.nn.dynamic_rnn(
                    cell,
                    outputs,
                    dtype=tf.float32,
                    sequence_length=self._lengths,
                    initial_state=self.initial_state)
                self._final_state = state

        ## Feed forward layer. Transform the RNN output to the right output size

        with tf.variable_scope('forward2'):
            if config.embedding_option == 0:  #no embedding , frame by frame
                if self._model_type.lower() == 'blstm':
                    outputs = tf.reshape(outputs, [-1, 2 * config.rnn_size])
                    in_size = 2 * config.rnn_size
                else:
                    outputs = tf.reshape(outputs, [-1, config.rnn_size])
                    in_size = config.rnn_size

            else:
                if self._model_type.lower() == 'blstm':
                    outputs = tf.reshape(
                        outputs, [config.batch_size, -1, 2 * config.rnn_size])
                    in_size = 2 * config.rnn_size
                else:
                    outputs = tf.reshape(
                        outputs, [config.batch_size, -1, config.rnn_size])
                    in_size = config.rnn_size

                if config.embedding_option == 1:  #last frame embedding
                    #http://sqrtf.com/fetch-rnn-encoder-last-output-using-tf-gather_nd/
                    ind = tf.subtract(self._lengths, tf.constant(1))
                    batch_range = tf.range(config.batch_size)
                    indices = tf.stack([batch_range, ind], axis=1)

                    outputs = tf.gather_nd(outputs, indices)
                    self._labels = tf.reduce_mean(self._labels, 1)
                elif config.embedding_option == 2:  # mean pooing
                    outputs = tf.reduce_mean(outputs, 1)
                    self._labels = tf.reduce_mean(self._labels, 1)
            out_size = config.output_size
            weights1 = tf.get_variable(
                'weights1', [in_size, out_size],
                initializer=tf.random_normal_initializer(stddev=0.01))
            biases1 = tf.get_variable('biases1', [out_size],
                                      initializer=tf.constant_initializer(0.0))
            outputs = tf.matmul(outputs, weights1) + biases1
            if config.embedding_option == 0:
                outputs = tf.reshape(outputs,
                                     [config.batch_size, -1, out_size])
            self._outputs = tf.nn.sigmoid(outputs)
        # Ability to save the model
        self.saver = tf.train.Saver(tf.trainable_variables(), max_to_keep=30)

        if infer: return

        # Compute loss(CE)
        self._loss = tf.losses.sigmoid_cross_entropy(self._labels, outputs)
        if tf.get_variable_scope().reuse: return

        self._lr = tf.Variable(0.0, trainable=False)
        tvars = tf.trainable_variables()
        grads, _ = tf.clip_by_global_norm(tf.gradients(self.loss, tvars),
                                          config.max_grad_norm)
        optimizer = tf.train.AdamOptimizer(self.lr)
        #optimizer = tf.train.GradientDescentOptimizer(self.lr)
        self._train_op = optimizer.apply_gradients(zip(grads, tvars))

        self._new_lr = tf.placeholder(tf.float32,
                                      shape=[],
                                      name='new_learning_rate')
        self._lr_update = tf.assign(self._lr, self._new_lr)
Esempio n. 23
0
    def __init__(self, config, inputs, labels, lengths, genders, infer=False):
        self._inputs = inputs
        self._mixed = inputs
        self._labels1 = tf.slice(labels, [0, 0, 0],
                                 [-1, -1, config.output_size])
        self._labels2 = tf.slice(labels, [0, 0, config.output_size],
                                 [-1, -1, -1])
        self._lengths = lengths
        self._genders = genders
        self._model_type = config.model_type

        outputs = self._inputs
        ## This first layer-- feed forward layer
        ## Transform the input to the right size before feed into RNN

        with tf.variable_scope('forward1'):
            outputs = tf.reshape(outputs, [-1, config.input_size])
            outputs = tf.layers.dense(outputs,
                                      units=config.rnn_size,
                                      activation=tf.nn.tanh,
                                      reuse=tf.get_variable_scope().reuse)
            outputs = tf.reshape(outputs,
                                 [config.batch_size, -1, config.rnn_size])

        def lstm_cell():
            return tf.contrib.rnn.LSTMCell(
                config.rnn_size,
                forget_bias=1.0,
                use_peepholes=True,
                initializer=tf.contrib.layers.xavier_initializer(),
                state_is_tuple=True,
                activation=tf.tanh)

        attn_cell = lstm_cell
        if not infer and config.keep_prob < 1.0:

            def attn_cell():
                return tf.contrib.rnn.DropoutWrapper(
                    lstm_cell(), output_keep_prob=config.keep_prob)

        if config.model_type.lower() == 'blstm':
            with tf.variable_scope('blstm'):
                lstm_fw_cell = tf.contrib.rnn.MultiRNNCell(
                    [attn_cell() for _ in range(config.rnn_num_layers)],
                    state_is_tuple=True)
                lstm_bw_cell = tf.contrib.rnn.MultiRNNCell(
                    [attn_cell() for _ in range(config.rnn_num_layers)],
                    state_is_tuple=True)
                lstm_fw_cell = _unpack_cell(lstm_fw_cell)
                lstm_bw_cell = _unpack_cell(lstm_bw_cell)
                result = rnn.stack_bidirectional_dynamic_rnn(
                    cells_fw=lstm_fw_cell,
                    cells_bw=lstm_bw_cell,
                    inputs=outputs,
                    dtype=tf.float32,
                    sequence_length=self._lengths)
                outputs, fw_final_states, bw_final_states = result

        if config.model_type.lower() == 'lstm':
            with tf.variable_scope('lstm'):
                cell = tf.contrib.rnn.MultiRNNCell(
                    [attn_cell() for _ in range(config.rnn_num_layers)],
                    state_is_tuple=True)
                self._initial_state = cell.zero_state(config.batch_size,
                                                      tf.float32)
                state = self.initial_state
                outputs, state = tf.nn.dynamic_rnn(
                    cell,
                    outputs,
                    dtype=tf.float32,
                    sequence_length=self._lengths,
                    initial_state=self.initial_state)
                self._final_state = state

        ## Feed forward layer. Transform the RNN output to the right output size
        with tf.variable_scope('forward2'):
            if self._model_type.lower() == 'blstm':
                outputs = tf.reshape(outputs, [-1, 2 * config.rnn_size])
                in_size = 2 * config.rnn_size
            else:
                outputs = tf.reshape(outputs, [-1, config.rnn_size])
                in_size = config.rnn_size
            # w1,b1 =self. _weight_and_bias("L_1",in_size,256)
            # outputs1 = tf.nn.relu(tf.matmul(outputs,w1)+b1)
            # w2,b2 = self._weight_and_bias("L_2",256,256)
            # outputs2 = tf.nn.relu(tf.matmul(outputs1,w2)+b2+outputs1)
            out_size = config.output_size
            # in_size=256
            weights1 = tf.get_variable(
                'weights1', [in_size, out_size],
                initializer=tf.random_normal_initializer(stddev=0.01))
            biases1 = tf.get_variable('biases1', [out_size],
                                      initializer=tf.constant_initializer(0.0))
            weights2 = tf.get_variable(
                'weights2', [in_size, out_size],
                initializer=tf.random_normal_initializer(stddev=0.01))
            biases2 = tf.get_variable('biases2', [out_size],
                                      initializer=tf.constant_initializer(0.0))
            mask1 = tf.nn.relu(tf.matmul(outputs, weights1) + biases1)
            mask2 = tf.nn.relu(tf.matmul(outputs, weights2) + biases2)
            self._activations1 = tf.reshape(
                mask1, [config.batch_size, -1, config.output_size])
            self._activations2 = tf.reshape(
                mask2, [config.batch_size, -1, config.output_size])

            # in general, config.czt_dim == 0; However, we found that if we concatenate
            # 128 dim chrip-z transform feats to FFT feats, we got better SDR performance
            # for the same gender case.
            # so , if you don't use czt feats (just the fft feats), config.czt_dim=0
            self._cleaned1 = self._activations1 * self._mixed
            self._cleaned2 = self._activations2 * self._mixed

        # Ability to save the model
        self.saver = tf.train.Saver(tf.trainable_variables(), max_to_keep=30)
        if infer: return

        cost1 = tf.reduce_mean(
            tf.reduce_sum(tf.pow(self._cleaned1 - self._labels1, 2), 1) +
            tf.reduce_sum(tf.pow(self._cleaned2 - self._labels2, 2), 1), 1)
        cost2 = tf.reduce_mean(
            tf.reduce_sum(tf.pow(self._cleaned2 - self._labels1, 2), 1) +
            tf.reduce_sum(tf.pow(self._cleaned1 - self._labels2, 2), 1), 1)

        idx = tf.cast(cost1 > cost2, tf.float32)
        self._loss = tf.reduce_sum(idx * cost2 + (1 - idx) * cost1)
        if tf.get_variable_scope().reuse: return

        self._lr = tf.Variable(0.0, trainable=False)
        tvars = tf.trainable_variables()
        grads, _ = tf.clip_by_global_norm(tf.gradients(self.loss, tvars),
                                          config.max_grad_norm)
        optimizer = tf.train.AdamOptimizer(self.lr)
        # optimizer = tf.train.GradientDescentOptimizer(self.lr)
        self._train_op = optimizer.apply_gradients(zip(grads, tvars))
        self._new_lr = tf.placeholder(tf.float32,
                                      shape=[],
                                      name='new_learning_rate')
        self._lr_update = tf.assign(self._lr, self._new_lr)
Esempio n. 24
0
    def __init__(self,
                 x_mag_spec_batch,
                 lengths_batch,
                 y_mag_spec_batch=None,
                 theta_x_batch=None,
                 theta_y_batch=None,
                 behavior='train'):
        '''
    behavior = 'train/validation/infer'
    '''
        if behavior != self.infer:
            assert (y_mag_spec_batch is not None)
            assert (theta_x_batch is not None)
            assert (theta_y_batch is not None)
        self._x_mag_spec = x_mag_spec_batch
        self._norm_x_mag_spec = norm_mag_spec(self._x_mag_spec,
                                              FLAGS.PARAM.MAG_NORM_MAX)

        self._y_mag_spec = y_mag_spec_batch
        self._norm_y_mag_spec = norm_mag_spec(self._y_mag_spec,
                                              FLAGS.PARAM.MAG_NORM_MAX)

        self._lengths = lengths_batch
        self._batch_size = tf.shape(self._lengths)[0]

        self._x_theta = theta_x_batch
        self._y_theta = theta_y_batch
        self._model_type = FLAGS.PARAM.MODEL_TYPE

        if FLAGS.PARAM.INPUT_TYPE == 'mag':
            self.logbias_net_input = self._norm_x_mag_spec
        elif FLAGS.PARAM.INPUT_TYPE == 'logmag':
            tf.logging.error(
                "Training_In_Turn_Model: NNET input must be magnitude spectrum."
            )
            exit(-1)

        # region training dropout
        lstm_attn_cell = lstm_cell
        if behavior != self.infer and FLAGS.PARAM.KEEP_PROB < 1.0:

            def lstm_attn_cell(n_units, n_proj, act):
                return tf.contrib.rnn.DropoutWrapper(
                    lstm_cell(n_units, n_proj, act),
                    output_keep_prob=FLAGS.PARAM.KEEP_PROB)

        GRU_attn_cell = GRU_cell
        if behavior != self.infer and FLAGS.PARAM.KEEP_PROB < 1.0:

            def GRU_attn_cell(n_units, act):
                return tf.contrib.rnn.DropoutWrapper(
                    GRU_cell(n_units, act),
                    output_keep_prob=FLAGS.PARAM.KEEP_PROB)

        # endregion

        # region logbias net
        with tf.variable_scope('logbias_net'):
            logbias_net_outputs = self.logbias_net_input
            if FLAGS.PARAM.MODEL_TYPE.upper() == 'BLSTM':
                with tf.variable_scope('BLSTM_logbias'):

                    lstm_fw_cell = tf.contrib.rnn.MultiRNNCell(
                        [
                            lstm_attn_cell(FLAGS.PARAM.RNN_SIZE_LOGBIAS,
                                           FLAGS.PARAM.LSTM_num_proj_LOGBIAS,
                                           FLAGS.PARAM.LSTM_ACTIVATION_LOGBIAS)
                            for _ in range(FLAGS.PARAM.RNN_LAYER_LOGBIAS)
                        ],
                        state_is_tuple=True)
                    lstm_bw_cell = tf.contrib.rnn.MultiRNNCell(
                        [
                            lstm_attn_cell(FLAGS.PARAM.RNN_SIZE_LOGBIAS,
                                           FLAGS.PARAM.LSTM_num_proj_LOGBIAS,
                                           FLAGS.PARAM.LSTM_ACTIVATION_LOGBIAS)
                            for _ in range(FLAGS.PARAM.RNN_LAYER_LOGBIAS)
                        ],
                        state_is_tuple=True)

                    fw_cell_logbiasnet = lstm_fw_cell._cells
                    bw_cell_logbiasnet = lstm_bw_cell._cells

            if FLAGS.PARAM.MODEL_TYPE.upper() == 'BGRU':
                with tf.variable_scope('BGRU_logbias'):

                    gru_fw_cell = tf.contrib.rnn.MultiRNNCell(
                        [
                            GRU_attn_cell(FLAGS.PARAM.RNN_SIZE_LOGBIAS,
                                          FLAGS.PARAM.LSTM_ACTIVATION_LOGBIAS)
                            for _ in range(FLAGS.PARAM.RNN_LAYER_LOGBIAS)
                        ],
                        state_is_tuple=True)
                    gru_bw_cell = tf.contrib.rnn.MultiRNNCell(
                        [
                            GRU_attn_cell(FLAGS.PARAM.RNN_SIZE_LOGBIAS,
                                          FLAGS.PARAM.LSTM_ACTIVATION_LOGBIAS)
                            for _ in range(FLAGS.PARAM.RNN_LAYER_LOGBIAS)
                        ],
                        state_is_tuple=True)

                    fw_cell_logbiasnet = gru_fw_cell._cells
                    bw_cell_logbiasnet = gru_bw_cell._cells

            # dynamic rnn
            result = rnn.stack_bidirectional_dynamic_rnn(
                cells_fw=fw_cell_logbiasnet,
                cells_bw=bw_cell_logbiasnet,
                inputs=logbias_net_outputs,
                dtype=tf.float32,
                sequence_length=self._lengths)
            logbias_net_outputs, fw_final_states, bw_final_states = result

            logbias_biRnn_out_size = FLAGS.PARAM.RNN_SIZE_LOGBIAS * 2
            # attend_fea = sum_attention_v2(logbias_net_outputs,self._batch_size,logbias_biRnn_out_size)
            # print(np.shape(fw_final_states),np.shape(bw_final_states),np.shape(logbias_net_outputs))
            # attend_fea = sum_attention_with_final_state(logbias_net_outputs,
            #                                             tf.concat(-1, [fw_final_states,
            #                                                            bw_final_states]),
            #                                             logbias_biRnn_out_size, 1024)
            attend_fea = sum_attention(logbias_net_outputs,
                                       logbias_biRnn_out_size, 1024)

            with tf.variable_scope('fullconnectSuitableLogbias'):
                weights_logbias_fc = tf.get_variable(
                    'weights_logbias_fc', [logbias_biRnn_out_size, 1],
                    initializer=tf.random_normal_initializer(stddev=0.01))
                biases_logbias_fc = tf.get_variable(
                    'biases_logbias_fc', [1],
                    initializer=tf.constant_initializer(0.0))
                logbias_net_out = tf.expand_dims(
                    tf.matmul(attend_fea, weights_logbias_fc) +
                    biases_logbias_fc,
                    axis=-1)  # [batch,1,1]
                self._log_bias = tf.nn.relu(logbias_net_out +
                                            FLAGS.PARAM.INIT_LOG_BIAS)

            self._real_logbias = tf.add(self._log_bias,
                                        FLAGS.PARAM.MIN_LOG_BIAS)
        # endregion

        self._norm_x_logmag_spec = norm_logmag_spec(self._x_mag_spec,
                                                    FLAGS.PARAM.MAG_NORM_MAX,
                                                    self._log_bias,
                                                    FLAGS.PARAM.MIN_LOG_BIAS)
        self._norm_y_logmag_spec = norm_logmag_spec(self._y_mag_spec,
                                                    FLAGS.PARAM.MAG_NORM_MAX,
                                                    self._log_bias,
                                                    FLAGS.PARAM.MIN_LOG_BIAS)

        # region mask net
        with tf.variable_scope('mask_net'):
            mask_net_outputs = self._norm_x_logmag_spec
            if FLAGS.PARAM.MODEL_TYPE.upper() == 'BLSTM':
                with tf.variable_scope('BLSTM_mask'):

                    lstm_fw_cell = tf.contrib.rnn.MultiRNNCell(
                        [
                            lstm_attn_cell(FLAGS.PARAM.RNN_SIZE_MASK,
                                           FLAGS.PARAM.LSTM_num_proj_MASK,
                                           FLAGS.PARAM.LSTM_ACTIVATION_MASK)
                            for _ in range(FLAGS.PARAM.RNN_LAYER_MASK)
                        ],
                        state_is_tuple=True)
                    lstm_bw_cell = tf.contrib.rnn.MultiRNNCell(
                        [
                            lstm_attn_cell(FLAGS.PARAM.RNN_SIZE_MASK,
                                           FLAGS.PARAM.LSTM_num_proj_MASK,
                                           FLAGS.PARAM.LSTM_ACTIVATION_MASK)
                            for _ in range(FLAGS.PARAM.RNN_LAYER_MASK)
                        ],
                        state_is_tuple=True)

                    fw_cell_masknet = lstm_fw_cell._cells
                    bw_cell_masknet = lstm_bw_cell._cells

            if FLAGS.PARAM.MODEL_TYPE.upper() == 'BGRU':
                with tf.variable_scope('BGRU_mask'):

                    gru_fw_cell = tf.contrib.rnn.MultiRNNCell(
                        [
                            GRU_attn_cell(FLAGS.PARAM.RNN_SIZE,
                                          FLAGS.PARAM.LSTM_ACTIVATION)
                            for _ in range(FLAGS.PARAM.RNN_LAYER)
                        ],
                        state_is_tuple=True)
                    gru_bw_cell = tf.contrib.rnn.MultiRNNCell(
                        [
                            GRU_attn_cell(FLAGS.PARAM.RNN_SIZE,
                                          FLAGS.PARAM.LSTM_ACTIVATION)
                            for _ in range(FLAGS.PARAM.RNN_LAYER)
                        ],
                        state_is_tuple=True)

                    fw_cell_masknet = gru_fw_cell._cells
                    bw_cell_masknet = gru_bw_cell._cells

            # dynamic rnn
            result = rnn.stack_bidirectional_dynamic_rnn(
                cells_fw=fw_cell_masknet,
                cells_bw=bw_cell_masknet,
                inputs=mask_net_outputs,
                dtype=tf.float32,
                sequence_length=self._lengths)

            mask_net_outputs, fw_final_states, bw_final_states = result
            mask_biRnn_output_size = FLAGS.PARAM.RNN_SIZE_MASK * 2
            flatten_outputs = tf.reshape(mask_net_outputs,
                                         [-1, mask_biRnn_output_size])
            out_size = FLAGS.PARAM.OUTPUT_SIZE
            with tf.variable_scope('fullconnectMask'):
                weights = tf.get_variable(
                    'weights1', [mask_biRnn_output_size, out_size],
                    initializer=tf.random_normal_initializer(stddev=0.01))
                biases = tf.get_variable(
                    'biases1', [out_size],
                    initializer=tf.constant_initializer(0.0))
            mask = tf.nn.relu(tf.matmul(flatten_outputs, weights) + biases)
            self._mask = tf.reshape(
                mask, [self._batch_size, -1, FLAGS.PARAM.OUTPUT_SIZE])
        # endregion

        # region prepare y_estimation and y_labels
        self._y_mag_labels = self._norm_y_mag_spec
        self._y_logmag_labels = self._norm_y_logmag_spec
        if FLAGS.PARAM.TRAINING_MASK_POSITION == 'mag':
            self._y_normed_mag_estimation = self._mask * self._norm_x_mag_spec
            self._y_normed_logmag_estimation = normedMag2normedLogmag(
                self._y_normed_mag_estimation, FLAGS.PARAM.MAG_NORM_MAX,
                self._log_bias, FLAGS.PARAM.MIN_LOG_BIAS)
        elif FLAGS.PARAM.TRAINING_MASK_POSITION == 'logmag':
            self._y_normed_logmag_estimation = self._mask * self._norm_x_logmag_spec
            self._y_normed_mag_estimation = normedLogmag2normedMag(
                self._y_normed_logmag_estimation, FLAGS.PARAM.MAG_NORM_MAX,
                self._log_bias, FLAGS.PARAM.MIN_LOG_BIAS)
        if FLAGS.PARAM.MASK_TYPE == 'PSM':
            self._y_mag_labels *= tf.cos(self._x_theta - self._y_theta)
            self._y_logmag_labels *= tf.cos(self._x_theta - self._y_theta)
        elif FLAGS.PARAM.MASK_TYPE == 'IRM':
            pass
        else:
            tf.logging.error('Mask type error.')
            exit(-1)

        # region get infer spec
        if FLAGS.PARAM.DECODING_MASK_POSITION != FLAGS.PARAM.TRAINING_MASK_POSITION:
            print(
                'Error, DECODING_MASK_POSITION should be equal to TRAINING_MASK_POSITION when use training_in_turn_model.'
            )
        if FLAGS.PARAM.DECODING_MASK_POSITION == 'mag':
            self._y_mag_estimation = rm_norm_mag_spec(
                self._y_normed_mag_estimation, FLAGS.PARAM.MAG_NORM_MAX)
        elif FLAGS.PARAM.DECODING_MASK_POSITION == 'logmag':
            self._y_mag_estimation = rm_norm_logmag_spec(
                self._y_normed_logmag_estimation, FLAGS.PARAM.MAG_NORM_MAX,
                self._log_bias, FLAGS.PARAM.MIN_LOG_BIAS)
        '''
    _y_mag_estimation is estimated mag_spec
    '''
        # endregion

        # endregion

        self.saver = tf.train.Saver(tf.trainable_variables(), max_to_keep=30)

        if behavior == self.infer:
            return

        # region get LOSS
        if FLAGS.PARAM.LOSS_FUNC_FOR_MAG_SPEC == 'SPEC_MSE':  # log_mag and mag MSE
            self._logbiasnet_loss = loss.relative_reduce_sum_frame_batchsize_MSE(
                self._y_normed_mag_estimation, self._y_mag_labels, 1e-6)
            self._masknet_loss = loss.reduce_sum_frame_batchsize_MSE(
                self._y_normed_logmag_estimation, self._y_logmag_labels)
        elif FLAGS.PARAM.LOSS_FUNC_FOR_MAG_SPEC == "SPEC_MSE_FLEXIBLE_POW_C":
            self._logbiasnet_loss = loss.reduce_sum_frame_batchsize_MSE_EmphasizeLowerValue(
                self._y_normed_mag_estimation, self._y_mag_labels,
                FLAGS.PARAM.POW_COEF)
            self._masknet_loss = loss.reduce_sum_frame_batchsize_MSE_EmphasizeLowerValue(
                self._y_normed_logmag_estimation, self._y_logmag_labels,
                FLAGS.PARAM.POW_COEF)
        else:
            print('Loss type error.')
            exit(-1)
        # endregion

        if behavior == self.validation:
            '''
      val model cannot train.
      '''
            return
        self._lr_logbiasnet = tf.Variable(0.0, trainable=False)
        self._lr_masknet = tf.Variable(0.0, trainable=False)
        tvars = tf.trainable_variables()
        logbias_vars = [var for var in tvars if 'logbias_net' in var.name]
        mask_vars = [var for var in tvars if 'mask_net' in var.name]
        logbiasnet_grads, _ = tf.clip_by_global_norm(
            tf.gradients(self._logbiasnet_loss, logbias_vars),
            FLAGS.PARAM.CLIP_NORM)
        masknet_grads, _ = tf.clip_by_global_norm(
            tf.gradients(self._masknet_loss, mask_vars), FLAGS.PARAM.CLIP_NORM)
        optimizer_logbiasnet = tf.train.AdamOptimizer(self.lr_logbiasnet)
        optimizer_masknet = tf.train.AdamOptimizer(self.lr_masknet)
        #optimizer = tf.train.GradientDescentOptimizer(self.lr)
        # all_grads = [grad for grad in logbiasnet_grads]
        # for grad in masknet_grads:
        #   all_grads.append(grad)
        # all_vars = [var for var in logbias_vars]
        # for var in mask_vars:
        #   all_vars.append(var)
        train_logbiasnet = optimizer_logbiasnet.apply_gradients(
            zip(logbiasnet_grads, logbias_vars))
        train_masknet = optimizer_masknet.apply_gradients(
            zip(masknet_grads, mask_vars))
        if FLAGS.PARAM.TRAIN_TYPE == 'BOTH':
            self._train_op = [train_logbiasnet, train_masknet]
        elif FLAGS.PARAM.TRAIN_TYPE == 'LOGBIASNET':
            self._train_op = train_logbiasnet
        elif FLAGS.PARAM.TRAIN_TYPE == 'MASKNET':
            self._train_op = train_masknet

        self._new_lr_logbiasnet = tf.placeholder(tf.float32,
                                                 shape=[],
                                                 name='new_learning_rate1')
        self._new_lr_masknet = tf.placeholder(tf.float32,
                                              shape=[],
                                              name='new_learning_rate2')
        self._lr_update = [
            tf.assign(self._lr_logbiasnet, self._new_lr_logbiasnet),
            tf.assign(self._lr_masknet, self._new_lr_masknet)
        ]
Esempio n. 25
0
    def __init__(self,
                 x_mag_spec_batch,
                 lengths_batch,
                 y_mag_spec_batch=None,
                 theta_x_batch=None,
                 theta_y_batch=None,
                 behavior='train'):
        '''
    behavior = 'train/validation/infer'
    '''
        if behavior != self.infer:
            assert (y_mag_spec_batch is not None)
            assert (theta_x_batch is not None)
            assert (theta_y_batch is not None)
        self._log_bias = tf.get_variable(
            'logbias', [1],
            trainable=FLAGS.PARAM.LOG_BIAS_TRAINABLE,
            initializer=tf.constant_initializer(FLAGS.PARAM.INIT_LOG_BIAS))
        self._real_logbias = self._log_bias + FLAGS.PARAM.MIN_LOG_BIAS
        self._x_mag_spec = x_mag_spec_batch
        self.indi_mean_x, self.indi_var_x = tf.nn.moments(
            self._x_mag_spec, axes=FLAGS.PARAM.BN_KEEP_DIMS, keep_dims=True)
        self._norm_x_mag_spec = indi_norm_mag_spec(self._x_mag_spec,
                                                   self.indi_mean_x,
                                                   self.indi_var_x)

        self._y_mag_spec = y_mag_spec_batch
        self.indi_mean_y, self.indi_var_y = tf.nn.moments(
            self._y_mag_spec, axes=FLAGS.PARAM.BN_KEEP_DIMS, keep_dims=True)
        self._norm_y_mag_spec = indi_norm_mag_spec(self._y_mag_spec,
                                                   self.indi_mean_y,
                                                   self.indi_var_y)

        self._lengths = lengths_batch
        self._batch_size = tf.shape(self._lengths)[0]

        self._x_theta = theta_x_batch
        self._y_theta = theta_y_batch
        self._model_type = FLAGS.PARAM.MODEL_TYPE

        self.net_input = self._norm_x_mag_spec
        self._y_labels = self._norm_y_mag_spec

        outputs = self.net_input

        lstm_attn_cell = lstm_cell
        if behavior != self.infer and FLAGS.PARAM.KEEP_PROB < 1.0:

            def lstm_attn_cell(n_units, n_proj, act):
                return tf.contrib.rnn.DropoutWrapper(
                    lstm_cell(n_units, n_proj, act),
                    output_keep_prob=FLAGS.PARAM.KEEP_PROB)

        GRU_attn_cell = GRU_cell
        if behavior != self.infer and FLAGS.PARAM.KEEP_PROB < 1.0:

            def GRU_attn_cell(n_units, act):
                return tf.contrib.rnn.DropoutWrapper(
                    GRU_cell(n_units, act),
                    output_keep_prob=FLAGS.PARAM.KEEP_PROB)

        if FLAGS.PARAM.MODEL_TYPE.upper() == 'BLSTM':
            with tf.variable_scope('BLSTM'):

                lstm_fw_cell = tf.contrib.rnn.MultiRNNCell([
                    lstm_attn_cell(FLAGS.PARAM.RNN_SIZE,
                                   FLAGS.PARAM.LSTM_num_proj,
                                   FLAGS.PARAM.LSTM_ACTIVATION)
                    for _ in range(FLAGS.PARAM.RNN_LAYER)
                ],
                                                           state_is_tuple=True)
                lstm_bw_cell = tf.contrib.rnn.MultiRNNCell([
                    lstm_attn_cell(FLAGS.PARAM.RNN_SIZE,
                                   FLAGS.PARAM.LSTM_num_proj,
                                   FLAGS.PARAM.LSTM_ACTIVATION)
                    for _ in range(FLAGS.PARAM.RNN_LAYER)
                ],
                                                           state_is_tuple=True)

                fw_cell = lstm_fw_cell._cells
                bw_cell = lstm_bw_cell._cells
                result = rnn.stack_bidirectional_dynamic_rnn(
                    cells_fw=fw_cell,
                    cells_bw=bw_cell,
                    inputs=outputs,
                    dtype=tf.float32,
                    sequence_length=self._lengths)
                outputs, fw_final_states, bw_final_states = result

        if FLAGS.PARAM.MODEL_TYPE.upper() == 'BGRU':
            with tf.variable_scope('BGRU'):

                gru_fw_cell = tf.contrib.rnn.MultiRNNCell([
                    GRU_attn_cell(FLAGS.PARAM.RNN_SIZE,
                                  FLAGS.PARAM.LSTM_ACTIVATION)
                    for _ in range(FLAGS.PARAM.RNN_LAYER)
                ],
                                                          state_is_tuple=True)
                gru_bw_cell = tf.contrib.rnn.MultiRNNCell([
                    GRU_attn_cell(FLAGS.PARAM.RNN_SIZE,
                                  FLAGS.PARAM.LSTM_ACTIVATION)
                    for _ in range(FLAGS.PARAM.RNN_LAYER)
                ],
                                                          state_is_tuple=True)

                fw_cell = gru_fw_cell._cells
                bw_cell = gru_bw_cell._cells
                result = rnn.stack_bidirectional_dynamic_rnn(
                    cells_fw=fw_cell,
                    cells_bw=bw_cell,
                    inputs=outputs,
                    dtype=tf.float32,
                    sequence_length=self._lengths)
                outputs, fw_final_states, bw_final_states = result

        # region full connection get mask
        # calcu rnn output size
        in_size = FLAGS.PARAM.RNN_SIZE
        mask = None
        if self._model_type.upper()[0] == 'B':  # bidirection
            rnn_output_num = FLAGS.PARAM.RNN_SIZE * 2
            if FLAGS.PARAM.MODEL_TYPE == 'BLSTM' and (
                    not (FLAGS.PARAM.LSTM_num_proj is None)):
                rnn_output_num = 2 * FLAGS.PARAM.LSTM_num_proj
            in_size = rnn_output_num
        outputs = tf.reshape(outputs, [-1, in_size])
        out_size = FLAGS.PARAM.OUTPUT_SIZE
        with tf.variable_scope('fullconnectOut'):
            weights = tf.get_variable(
                'weights1', [in_size, out_size],
                initializer=tf.random_normal_initializer(stddev=0.01))
            biases = tf.get_variable('biases1', [out_size],
                                     initializer=tf.constant_initializer(
                                         FLAGS.PARAM.INIT_MASK_VAL))
        if FLAGS.PARAM.TIME_NOSOFTMAX_ATTENTION:
            with tf.variable_scope('fullconnectCoef'):
                weights_coef = tf.get_variable(
                    'weights_coef', [in_size, 1],
                    initializer=tf.random_normal_initializer(mean=1.0,
                                                             stddev=0.01))
                biases_coef = tf.get_variable(
                    'biases_coef', [1],
                    initializer=tf.constant_initializer(0.0))
            raw_mask = tf.reshape(
                tf.matmul(outputs, weights) + biases,
                [self._batch_size, -1, FLAGS.PARAM.OUTPUT_SIZE
                 ])  # [batch,time,fre]
            batch_coef_vec = tf.nn.relu(
                tf.reshape(
                    tf.matmul(outputs, weights_coef) + biases_coef,
                    [self._batch_size, -1]))  # [batch, time]
            mask = tf.multiply(
                raw_mask, tf.reshape(batch_coef_vec,
                                     [self._batch_size, -1, 1]))
        else:
            mask = tf.nn.relu(tf.matmul(outputs, weights) + biases)
        # endregion

        self._mask = tf.reshape(
            mask, [self._batch_size, -1, FLAGS.PARAM.OUTPUT_SIZE])

        # region get infer spec
        if not FLAGS.PARAM.USE_ESTIMATED_MEAN_VAR:
            tmp_mean, tmp_var = self.indi_mean_x, self.indi_var_x
        else:
            tmp_mean, tmp_var = tf.nn.moments(self._mask *
                                              self._norm_x_mag_spec,
                                              axes=FLAGS.PARAM.BN_KEEP_DIMS,
                                              keep_dims=True)
        self._y_mag_estimation = rm_indi_norm_mag_spec(
            self._mask * self._norm_x_mag_spec, tmp_mean, tmp_var)
        '''
    _y_mag_estimation is estimated mag_spec
    _y_estimation is loss_targe, mag_sepec
    '''
        # endregion

        # region prepare y_estimation and y_labels
        self._y_estimation = self._mask * self._norm_x_mag_spec
        if FLAGS.PARAM.MASK_TYPE == 'PSM':
            self._y_labels *= tf.cos(self._x_theta - self._y_theta)
        elif FLAGS.PARAM.MASK_TYPE == 'IRM':
            pass
        else:
            tf.logging.error('Mask type error.')
            exit(-1)

        if FLAGS.PARAM.TRAINING_MASK_POSITION != FLAGS.PARAM.LABEL_TYPE:
            print(
                "error, FLAGS.PARAM.TRAINING_MASK_POSITION != FLAGS.PARAM.LABEL_TYPE."
            )
            exit(-1)
        # endregion

        self.saver = tf.train.Saver(tf.trainable_variables(), max_to_keep=30)
        if behavior == self.infer:
            return

        # region get LOSS
        if FLAGS.PARAM.LOSS_FUNC_FOR_MAG_SPEC == 'SPEC_MSE':  # log_mag and mag MSE
            self._loss = loss.reduce_sum_frame_batchsize_MSE(
                self._y_estimation, self._y_labels)
        elif FLAGS.PARAM.LOSS_FUNC_FOR_MAG_SPEC == "SPEC_MSE_LOWF_EN":
            self._loss = loss.reduce_sum_frame_batchsize_MSE(
                self._y_estimation, self._y_labels)
        elif FLAGS.PARAM.LOSS_FUNC_FOR_MAG_SPEC == "FAIR_SPEC_MSE":
            self._loss = loss.fair_reduce_sum_frame_batchsize_MSE(
                self._y_estimation, self._y_labels)
        elif FLAGS.PARAM.LOSS_FUNC_FOR_MAG_SPEC == "SPEC_MSE_FLEXIBLE_POW_C":
            self._loss = loss.reduce_sum_frame_batchsize_MSE_EmphasizeLowerValue(
                self._y_estimation, self._y_labels, FLAGS.PARAM.POW_COEF)
        elif FLAGS.PARAM.LOSS_FUNC_FOR_MAG_SPEC == "RELATED_MSE":
            self._loss = loss.relative_reduce_sum_frame_batchsize_MSE(
                self._y_estimation, self._y_labels,
                FLAGS.PARAM.RELATED_MSE_IGNORE_TH)
        elif FLAGS.PARAM.LOSS_FUNC_FOR_MAG_SPEC == "AUTO_RELATED_MSE":
            self._loss = loss.auto_ingore_relative_reduce_sum_frame_batchsize_MSE(
                self._y_estimation, self._y_labels,
                FLAGS.PARAM.AUTO_RELATED_MSE_AXIS_FIT_DEG)
        else:
            print('Loss type error.')
            exit(-1)
        # endregion

        if behavior == self.validation:
            '''
      val model cannot train.
      '''
            return
        self._lr = tf.Variable(0.0, trainable=False)
        tvars = tf.trainable_variables()
        grads, _ = tf.clip_by_global_norm(tf.gradients(self.loss, tvars),
                                          FLAGS.PARAM.CLIP_NORM)
        optimizer = tf.train.AdamOptimizer(self.lr)
        #optimizer = tf.train.GradientDescentOptimizer(self.lr)
        self._train_op = optimizer.apply_gradients(zip(grads, tvars))

        self._new_lr = tf.placeholder(tf.float32,
                                      shape=[],
                                      name='new_learning_rate')
        self._lr_update = tf.assign(self._lr, self._new_lr)
    def __init__(self, config, inputs_cmvn, inputs, labels1, labels2, lengths, infer=False):  # EPOCH
        self._inputs = inputs_cmvn
        self._mixed = inputs
        self._labels1 = labels1
        self._labels2 = labels2
        self._lengths = lengths
        self._model_type = config.model_type
        if infer:  # if infer, we prefer to run one utterance one time.
           config.batch_size = 1

        outputs = self._inputs
        # This first layer-- feed forward layer
        # Transform the input to the right size before feed into RNN
        with tf.variable_scope('forward1'):
            outputs = tf.reshape(outputs, [-1, config.input_size])
            outputs = tf.layers.dense(outputs, units=config.rnn_size,
                                      activation=tf.nn.tanh, kernel_initializer=glorot_uniform_initializer(),
                                      reuse=tf.get_variable_scope().reuse)
            outputs = tf.reshape(
                outputs, [config.batch_size, -1, config.rnn_size])

        # Configure the LSTM or BLSTM model
        # For BLSTM, we use the BasicLSTMCell.For LSTM, we use LSTMCell.
        # You can change them and test the performance...
        if config.model_type.lower() == 'blstm':
            with tf.variable_scope('blstm'):
                cell = tf.contrib.rnn.BasicLSTMCell(config.rnn_size)
                if not infer and config.keep_prob < 1.0:
                    cell = tf.contrib.rnn.DropoutWrapper(
                        cell, output_keep_prob=config.keep_prob)

                lstm_fw_cell = tf.contrib.rnn.MultiRNNCell(
                    [cell] * config.rnn_num_layers)
                lstm_bw_cell = tf.contrib.rnn.MultiRNNCell(
                    [cell] * config.rnn_num_layers)
                lstm_fw_cell = _unpack_cell(lstm_fw_cell)
                lstm_bw_cell = _unpack_cell(lstm_bw_cell)
                result = rnn.stack_bidirectional_dynamic_rnn(
                    cells_fw=lstm_fw_cell,
                    cells_bw=lstm_bw_cell,
                    inputs=outputs,
                    dtype=tf.float32,
                    sequence_length=self._lengths)
                outputs, fw_final_states, bw_final_states = result
        if config.model_type.lower() == 'lstm':
            with tf.variable_scope('lstm'):
                def lstm_cell():
                    return tf.contrib.rnn.LSTMCell(
                       config.rnn_size, forget_bias=1.0, use_peepholes=True,
                       initializer=tf.contrib.layers.xavier_initializer(),
                       state_is_tuple=True, activation=tf.tanh)
                attn_cell = lstm_cell
                if not infer and config.keep_prob < 1.0:
                   def attn_cell():
                       return tf.contrib.rnn.DropoutWrapper(lstm_cell(), output_keep_prob=config.keep_prob)
                cell = tf.contrib.rnn.MultiRNNCell(
                   [attn_cell() for _ in range(config.rnn_num_layers)],
                   state_is_tuple=True)
                self._initial_state = cell.zero_state(
                   config.batch_size, tf.float32)
                state = self.initial_state
                outputs, state = tf.nn.dynamic_rnn(
                   cell, outputs,
                   dtype=tf.float32,
                   sequence_length=self._lengths,
                   initial_state=self.initial_state)
                self._final_state = state

# Feed forward layer. Transform the RNN output to the right output siz
        with tf.variable_scope('forward2'):
            if self._model_type.lower() == 'blstm':
                outputs = tf.reshape(outputs, [-1, 2*config.rnn_size])
                in_size = 2*config.rnn_size
            else:
                outputs = tf.reshape(outputs, [-1, config.rnn_size])
                in_size = config.rnn_size
            out_size = config.output_size
            weights1 = tf.get_variable('weights1', [in_size, out_size],
                                       initializer=tf.random_normal_initializer(stddev=0.01))
            biases1 = tf.get_variable('biases1', [out_size],
                                      initializer=tf.constant_initializer(0.0))
            weights2 = tf.get_variable('weights2', [in_size, out_size],
                                       initializer=tf.random_normal_initializer(stddev=0.01))
            biases2 = tf.get_variable('biases2', [out_size],
                                      initializer=tf.constant_initializer(0.0))
            mask1 = tf.nn.sigmoid(tf.matmul(outputs, weights1) + biases1)
            mask2 = tf.nn.sigmoid(tf.matmul(outputs, weights2) + biases2)
            self._activations1 = tf.reshape(
                mask1, [config.batch_size, -1, config.output_size])
            self._activations2 = tf.reshape(
                mask2, [config.batch_size, -1, config.output_size])
            self._cleaned1 = self._activations1 * \
                self._mixed[:, :, config.czt_dim:]
            self._cleaned2 = self._activations2 * \
                self._mixed[:, :, config.czt_dim:]
        # Ability to save the model
        self.saver = tf.train.Saver(tf.trainable_variables(), max_to_keep=30)
        if infer:
            return
        # Compute loss(Mse)
        cost1 = tf.reduce_mean(tf.reduce_sum(tf.pow(self._cleaned1-self._labels1, 2), 1)
                               + tf.reduce_sum(tf.pow(self._cleaned2-self._labels2, 2), 1), 1)
        cost2 = tf.reduce_mean(tf.reduce_sum(tf.pow(self._cleaned2-self._labels1, 2), 1)
                               + tf.reduce_sum(tf.pow(self._cleaned1-self._labels2, 2), 1), 1)
        idx = tf.cast(cost1 > cost2, tf.float32)
        min_cost = idx*cost2+(1-idx)*cost1
        max_cost = idx*cost1+(1-idx)*cost
        ## Prob PIT cost
        ##########################################################################################
        self.gamma = tf.Variable(0.00000000000000001, trainable=False)
        const = tf.constant(0.00000000001)
        def f1(): return tf.reduce_sum(min_cost)
        def f2(): return tf.reduce_sum(min_cost - self.gamma *
                                       tf.log(tf.exp((min_cost-max_cost)/self.gamma)+1))
        self._loss = tf.cond(tf.less(self.gamma, const), f1, f2)
        ########################################################################
        if tf.get_variable_scope().reuse:
           return
        self._lr = tf.Variable(0.0, trainable=False)
        tvars = tf.trainable_variables()
        grads, _ = tf.clip_by_global_norm(tf.gradients(self.loss, tvars),
                                          config.max_grad_norm)
        optimizer = tf.train.AdamOptimizer(self.lr)
        # optimizer = tf.train.GradientDescentOptimizer(self.lr)
        self._train_op = optimizer.apply_gradients(zip(grads, tvars))
        self._new_lr = tf.placeholder(
            tf.float32, shape=[], name='new_learning_rate')
        self._lr_update = tf.assign(self._lr, self._new_lr)
Esempio n. 27
0
 def __call__(self, inputs, seq_len, keep_prob=None, is_train=None):
     with tf.variable_scope(self.scope):
         output, *_ = stack_bidirectional_dynamic_rnn(self.cells_fw, self.cells_bw, inputs, sequence_length=seq_len,
                                                      dtype=tf.float32)
         output = dropout(output, keep_prob, is_train)
     return output
Esempio n. 28
0
    def build_net(self):

        # build auxiliary network to get the speaker embedding used for speaker extraction network
        with tf.variable_scope('spk_embed') as scope:
            spk_embed_aux = self.build_net_aux(self._inputs_aux,
                                               self._lengths_aux)

        outputs = tf.reshape(
            self._inputs,
            [self._config.batch_size, -1, self._config.input_size])
        # BLSTM layer
        with tf.variable_scope('blstm'):

            def lstm_cell():
                if not self._infer and self._config.keep_prob < 1.0:
                    return tf.contrib.rnn.DropoutWrapper(
                        tf.contrib.rnn.BasicLSTMCell(self._config.rnn_size),
                        output_keep_prob=self._config.keep_prob)
                else:
                    return tf.contrib.rnn.BasicLSTMCell(self._config.rnn_size)

            # tf.nn.rnn_cell.MultiRNNCell in r1.12
            lstm_fw_cell = tf.contrib.rnn.MultiRNNCell(
                [lstm_cell() for _ in range(self._config.rnn_num_layers)],
                state_is_tuple=True)
            lstm_bw_cell = tf.contrib.rnn.MultiRNNCell(
                [lstm_cell() for _ in range(self._config.rnn_num_layers)],
                state_is_tuple=True)
            lstm_fw_cell = self._unpack_cell(lstm_fw_cell)
            lstm_bw_cell = self._unpack_cell(lstm_bw_cell)
            outputs, fw_final_states, bw_final_states = rnn.stack_bidirectional_dynamic_rnn(
                cells_fw=lstm_fw_cell,
                cells_bw=lstm_bw_cell,
                inputs=outputs,
                dtype=tf.float32,
                sequence_length=self._lengths)

        # speaker adaptation layer by concat the output from auxiliary network
        with tf.variable_scope('adapt_concat'):
            outputs = tf.reshape(
                outputs,
                [self._config.batch_size, -1, 2 * self._config.rnn_size])
            frame_num = tf.shape(outputs)[1]
            spk_embed = tf.transpose(tf.reshape(
                tf.tile(tf.reshape(spk_embed_aux, (-1, 1)), (frame_num, 1)),
                (frame_num, self._config.batch_size,
                 self._config.aux_output_size)),
                                     perm=[1, 0, 2])

            outputs = tf.concat([outputs, spk_embed], 2)

            # remove the part out of the lenghts when concate speaker embeddings
            outputs = tf.multiply(
                tf.expand_dims(
                    tf.sequence_mask(self._lengths, dtype=tf.float32), -1),
                outputs)
            concat_dim = 2 * self._config.rnn_size + self._config.aux_output_size

        outputs = tf.reshape(outputs, [-1, concat_dim])

        # one more fully connected layer
        with tf.variable_scope('fc1'):
            weights1, biases1 = self._weight_and_bias(concat_dim,
                                                      self._config.rnn_size)
            outputs = tf.nn.relu(tf.matmul(outputs, weights1) + biases1)

            outputs = tf.reshape(
                outputs, [self._config.batch_size, -1, self._config.rnn_size])

        # BLSTM layer
        with tf.variable_scope('blstm2'):

            def lstm_cell():
                if not self._infer and self._config.keep_prob < 1.0:
                    return tf.contrib.rnn.DropoutWrapper(
                        tf.contrib.rnn.BasicLSTMCell(self._config.rnn_size),
                        output_keep_prob=self._config.keep_prob)
                else:
                    return tf.contrib.rnn.BasicLSTMCell(self._config.rnn_size)

            # tf.nn.rnn_cell.MultiRNNCell in r1.12
            lstm_fw_cell = tf.contrib.rnn.MultiRNNCell(
                [lstm_cell() for _ in range(self._config.rnn_num_layers)],
                state_is_tuple=True)
            lstm_bw_cell = tf.contrib.rnn.MultiRNNCell(
                [lstm_cell() for _ in range(self._config.rnn_num_layers)],
                state_is_tuple=True)
            lstm_fw_cell = self._unpack_cell(lstm_fw_cell)
            lstm_bw_cell = self._unpack_cell(lstm_bw_cell)
            outputs, fw_final_states, bw_final_states = rnn.stack_bidirectional_dynamic_rnn(
                cells_fw=lstm_fw_cell,
                cells_bw=lstm_bw_cell,
                inputs=outputs,
                dtype=tf.float32,
                sequence_length=self._lengths)
            outputs = tf.reshape(outputs, [-1, 2 * self._config.rnn_size])

        # one more fully connected layer
        with tf.variable_scope('fc2'):
            weights2, biases2 = self._weight_and_bias(
                2 * self._config.rnn_size, self._config.rnn_size)
            outputs = tf.nn.relu(tf.matmul(outputs, weights2) + biases2)

        # Mask estimation layer
        with tf.variable_scope('mask'):
            weights_m, biases_m = self._weight_and_bias(
                self._config.rnn_size, self._config.output_size)
            if self._config.mask_type.lower() == 'relu':
                mask = tf.nn.relu(tf.matmul(outputs, weights_m) + biases_m)
            else:
                mask = tf.nn.sigmoid(tf.matmul(outputs, weights_m) + biases_m)

            self._mask = tf.reshape(
                mask, [self._config.batch_size, -1, self._config.output_size])

            self._sep = self._mask * self._mixed

        self.saver = tf.train.Saver(tf.trainable_variables(), max_to_keep=50)
Esempio n. 29
0
    def __init__(self,
                 x_mag_spec_batch,
                 lengths_batch,
                 y_mag_spec_batch=None,
                 theta_x_batch=None,
                 theta_y_batch=None,
                 behavior='train'):
        '''
    behavior = 'train/validation/infer'
    '''
        assert (theta_x_batch is not None)
        if behavior != self.infer:
            assert (y_mag_spec_batch is not None)
            assert (theta_y_batch is not None)
        self._x_mag_spec = x_mag_spec_batch
        self._norm_x_mag_spec = norm_mag_spec(self._x_mag_spec,
                                              FLAGS.PARAM.MAG_NORM_MAX)

        self._y_mag_spec = y_mag_spec_batch
        self._norm_y_mag_spec = norm_mag_spec(self._y_mag_spec,
                                              FLAGS.PARAM.MAG_NORM_MAX)

        self._lengths = lengths_batch
        self._batch_size = tf.shape(self._lengths)[0]

        self._x_theta = theta_x_batch
        self._y_theta = theta_y_batch
        # self._norm_x_theta = self._x_theta/(2.0*FLAGS.PARAM.PI)+0.5
        # self._norm_y_theta = self._y_theta/(2.0*FLAGS.PARAM.PI)+0.5
        self._model_type = FLAGS.PARAM.MODEL_TYPE

        self.net_input = tf.concat([self._norm_x_mag_spec, self._x_theta],
                                   axis=-1)
        self._y_mag_labels = self._norm_y_mag_spec
        # self._y_theta_labels = self._norm_y_theta
        self._y_theta_labels = self._y_theta

        outputs = self.net_input

        lstm_attn_cell = lstm_cell
        if behavior != self.infer and FLAGS.PARAM.KEEP_PROB < 1.0:

            def lstm_attn_cell(n_units, n_proj, act):
                return tf.contrib.rnn.DropoutWrapper(
                    lstm_cell(n_units, n_proj, act),
                    output_keep_prob=FLAGS.PARAM.KEEP_PROB)

        GRU_attn_cell = GRU_cell
        if behavior != self.infer and FLAGS.PARAM.KEEP_PROB < 1.0:

            def GRU_attn_cell(n_units, act):
                return tf.contrib.rnn.DropoutWrapper(
                    GRU_cell(n_units, act),
                    output_keep_prob=FLAGS.PARAM.KEEP_PROB)

        with tf.variable_scope("BiRNN"):
            if FLAGS.PARAM.MODEL_TYPE.upper() == 'BLSTM':
                with tf.variable_scope('BLSTM'):

                    lstm_fw_cell = tf.contrib.rnn.MultiRNNCell(
                        [
                            lstm_attn_cell(FLAGS.PARAM.RNN_SIZE,
                                           FLAGS.PARAM.LSTM_num_proj,
                                           FLAGS.PARAM.LSTM_ACTIVATION)
                            for _ in range(FLAGS.PARAM.RNN_LAYER)
                        ],
                        state_is_tuple=True)
                    lstm_bw_cell = tf.contrib.rnn.MultiRNNCell(
                        [
                            lstm_attn_cell(FLAGS.PARAM.RNN_SIZE,
                                           FLAGS.PARAM.LSTM_num_proj,
                                           FLAGS.PARAM.LSTM_ACTIVATION)
                            for _ in range(FLAGS.PARAM.RNN_LAYER)
                        ],
                        state_is_tuple=True)
                    fw_cell = lstm_fw_cell._cells
                    bw_cell = lstm_bw_cell._cells

            if FLAGS.PARAM.MODEL_TYPE.upper() == 'BGRU':
                with tf.variable_scope('BGRU'):

                    gru_fw_cell = tf.contrib.rnn.MultiRNNCell(
                        [
                            GRU_attn_cell(FLAGS.PARAM.RNN_SIZE,
                                          FLAGS.PARAM.LSTM_ACTIVATION)
                            for _ in range(FLAGS.PARAM.RNN_LAYER)
                        ],
                        state_is_tuple=True)
                    gru_bw_cell = tf.contrib.rnn.MultiRNNCell(
                        [
                            GRU_attn_cell(FLAGS.PARAM.RNN_SIZE,
                                          FLAGS.PARAM.LSTM_ACTIVATION)
                            for _ in range(FLAGS.PARAM.RNN_LAYER)
                        ],
                        state_is_tuple=True)
                    fw_cell = gru_fw_cell._cells
                    bw_cell = gru_bw_cell._cells

            result = rnn.stack_bidirectional_dynamic_rnn(
                cells_fw=fw_cell,
                cells_bw=bw_cell,
                inputs=outputs,
                dtype=tf.float32,
                sequence_length=self._lengths)
            outputs, fw_final_states, bw_final_states = result

        # region full connection get mask
        # calcu rnn output size
        in_size = FLAGS.PARAM.RNN_SIZE
        if self._model_type.upper()[0] == 'B':  # bidirection
            rnn_output_num = FLAGS.PARAM.RNN_SIZE * 2
            if FLAGS.PARAM.MODEL_TYPE == 'BLSTM' and (
                    not (FLAGS.PARAM.LSTM_num_proj is None)):
                rnn_output_num = 2 * FLAGS.PARAM.LSTM_num_proj
            in_size = rnn_output_num
        outputs = tf.reshape(outputs, [-1, in_size])
        out_size = FLAGS.PARAM.OUTPUT_SIZE
        with tf.variable_scope('fullconnectOut1'):
            out1_dense1 = tf.layers.Dense(out_size, activation='tanh')
            out1_dense2 = tf.layers.Dense(
                out_size // 2,
                activation='relu' if FLAGS.PARAM.ReLU_MASK else None,
                bias_initializer=tf.constant_initializer(
                    FLAGS.PARAM.INIT_MASK_VAL))
            self._mask1 = out1_dense2(out1_dense1(outputs))

        with tf.variable_scope('fullconnectOut2'):
            out2_dense1 = tf.layers.Dense(out_size, activation='tanh')
            out2_dense2 = tf.layers.Dense(
                out_size // 2,
                activation='relu' if FLAGS.PARAM.ReLU_MASK else None,
                bias_initializer=tf.constant_initializer(
                    FLAGS.PARAM.INIT_MASK_VAL))
            self._mask2 = out2_dense2(out2_dense1(outputs))

        self._mask1 = tf.reshape(
            self._mask1, [self._batch_size, -1, FLAGS.PARAM.OUTPUT_SIZE // 2])
        self._mask2 = tf.reshape(
            self._mask2, [self._batch_size, -1, FLAGS.PARAM.OUTPUT_SIZE // 2])

        self._mask = tf.concat([self._mask1, self._mask2], axis=-1)
        # endregion

        # mask type
        if FLAGS.PARAM.MASK_TYPE == 'PSM':
            self._y_mag_labels *= tf.cos(self._x_theta - self._y_theta)
        elif FLAGS.PARAM.MASK_TYPE == 'fixPSM':
            self._y_mag_labels *= (1.0 +
                                   tf.cos(self._x_theta - self._y_theta)) * 0.5
        elif FLAGS.PARAM.MASK_TYPE == 'AcutePM':
            self._y_mag_labels *= tf.nn.relu(
                tf.cos(self._x_theta - self._y_theta))
        elif FLAGS.PARAM.MASK_TYPE == 'IRM':
            pass
        else:
            tf.logging.error('Mask type error.')
            exit(-1)

        # region get infer spec
        # self._y_est = self._mask*self.net_input # est->estimation
        # self._norm_y_mag_est = tf.slice(self._y_est,[0,0,0],[-1,-1,FLAGS.PARAM.FFT_DOT])
        # self._norm_y_theta_est = tf.slice(self._y_est,[0,0,FLAGS.PARAM.FFT_DOT],[-1,-1,-1])
        self._norm_y_mag_est = self._mask1 * self._norm_x_mag_spec
        self._norm_y_theta_est = self._mask2 * self._x_theta
        self._y_mag_est = rm_norm_mag_spec(self._norm_y_mag_est,
                                           FLAGS.PARAM.MAG_NORM_MAX)
        # self._y_theta_est = (self._norm_y_theta_est-0.5)*2.0*FLAGS.PARAM.PI
        self._y_theta_est = self._norm_y_theta_est
        # endregion

        self.saver = tf.train.Saver(tf.trainable_variables(), max_to_keep=30)
        if behavior == self.infer:
            return

        # region get LOSS
        if FLAGS.PARAM.LOSS_FUNC_FOR_MAG_SPEC == 'SPEC_MSE':  # log_mag and mag MSE
            self._mag_loss = loss.reduce_sum_frame_batchsize_MSE(
                self._norm_y_mag_est, self._y_mag_labels)
        elif FLAGS.PARAM.LOSS_FUNC_FOR_MAG_SPEC == "RELATED_MSE":
            self._mag_loss = loss.relative_reduce_sum_frame_batchsize_MSE(
                self._norm_y_mag_est, self._y_mag_labels,
                FLAGS.PARAM.RELATED_MSE_IGNORE_TH)
        elif FLAGS.PARAM.LOSS_FUNC_FOR_MAG_SPEC == "AUTO_RELATED_MSE":
            self._mag_loss = loss.auto_ingore_relative_reduce_sum_frame_batchsize_MSE(
                self._norm_y_mag_est, self._y_mag_labels,
                FLAGS.PARAM.AUTO_RELATED_MSE_AXIS_FIT_DEG)
        elif FLAGS.PARAM.LOSS_FUNC_FOR_MAG_SPEC == "AUTO_RELATED_MSE_USE_COS":
            self._mag_loss = loss.cos_auto_ingore_relative_reduce_sum_frame_batchsize_MSE(
                self._norm_y_mag_est, self._y_mag_labels,
                FLAGS.PARAM.COS_AUTO_RELATED_MSE_W)
        else:
            tf.logging.error('Magnitude_Loss type error.')
            exit(-1)

        if FLAGS.PARAM.LOSS_FUNC_FOR_PHASE_SPEC == 'COS':
            self._phase_loss = tf.reduce_sum(
                tf.reduce_mean(
                    tf.pow(
                        tf.abs(1.0 - tf.cos(self._y_theta_est -
                                            self._y_theta_labels)),
                        FLAGS.PARAM.PHASE_LOSS_INDEX), 1))
        elif FLAGS.PARAM.LOSS_FUNC_FOR_PHASE_SPEC == 'MAG_WEIGHTED_COS':
            self._phase_loss = loss.magnitude_weighted_cos_deltaTheta(
                self._y_theta_est,
                self._y_theta_labels,
                self._norm_y_mag_spec,
                index_=FLAGS.PARAM.PHASE_LOSS_INDEX)
        elif FLAGS.PARAM.LOSS_FUNC_FOR_PHASE_SPEC == 'MIXMAG_WEIGHTED_COS':
            self._phase_loss = loss.magnitude_weighted_cos_deltaTheta(
                self._y_theta_est,
                self._y_theta_labels,
                self._norm_x_mag_spec,
                index_=FLAGS.PARAM.PHASE_LOSS_INDEX)
        elif FLAGS.PARAM.LOSS_FUNC_FOR_PHASE_SPEC == 'ABSOLUTE':
            self._phase_loss = tf.reduce_sum(
                tf.reduce_mean(
                    tf.pow(tf.abs(self._y_theta_est - self._y_theta_labels),
                           FLAGS.PARAM.PHASE_LOSS_INDEX), 1))
        elif FLAGS.PARAM.LOSS_FUNC_FOR_PHASE_SPEC == 'MAG_WEIGHTED_ABSOLUTE':
            self._phase_loss = tf.reduce_sum(
                tf.reduce_mean(
                    tf.pow(
                        tf.abs(self._y_theta_est - self._y_theta_labels) *
                        self._norm_y_mag_spec * 10.0,
                        FLAGS.PARAM.PHASE_LOSS_INDEX), 1))
        elif FLAGS.PARAM.LOSS_FUNC_FOR_PHASE_SPEC == 'MIXMAG_WEIGHTED_ABSOLUTE':
            self._phase_loss = tf.reduce_sum(
                tf.reduce_mean(
                    tf.pow(
                        tf.abs(self._y_theta_est - self._y_theta_labels) *
                        self._norm_x_mag_spec * 10.0,
                        FLAGS.PARAM.PHASE_LOSS_INDEX), 1))
        else:
            tf.logging.error('Phase_Loss type error.')
            exit(-1)

        self._loss = self._mag_loss + self._phase_loss
        # endregion

        if behavior == self.validation:
            '''
      val model cannot train.
      '''
            return
        self._lr = tf.Variable(0.0, trainable=False)
        tvars = tf.trainable_variables()
        grads, _ = tf.clip_by_global_norm(tf.gradients(self.loss, tvars),
                                          FLAGS.PARAM.CLIP_NORM)
        optimizer = tf.train.AdamOptimizer(self.lr)
        #optimizer = tf.train.GradientDescentOptimizer(self.lr)
        self._train_op = optimizer.apply_gradients(zip(grads, tvars))

        self._new_lr = tf.placeholder(tf.float32,
                                      shape=[],
                                      name='new_learning_rate')
        self._lr_update = tf.assign(self._lr, self._new_lr)
    def build(self):
        with tf.variable_scope('Neural_Network') as vs:

            def lstm_cell():
                return tf.contrib.rnn.LSTMCell(
                    self.rnn_size,
                    forget_bias=1.0,
                    use_peepholes=True,
                    initializer=tf.contrib.layers.xavier_initializer(),
                    state_is_tuple=True,
                    activation=tf.tanh)

            attn_cell = lstm_cell
            if self.training and self.dropouts < 1.0:

                def attn_cell():
                    return tf.contrib.rnn.DropoutWrapper(
                        lstm_cell(), output_keep_prob=self.dropouts)

            with tf.variable_scope('Intputs'):
                self.x_noisy = tf.placeholder(
                    tf.float32,
                    shape=[None, self.dim_in[0], self.dim_in[1]],
                    name='x')

            with tf.variable_scope('Outputs'):
                self.y_clean = tf.placeholder(tf.float32,
                                              shape=[None, self.dim_out],
                                              name='y_clean')
                # self.y_clean = tf.reshape(self.y_clean, (-1, self.dim_out))

            with tf.variable_scope('DNN'):
                inputs = tf.reshape(self.x_noisy,
                                    (-1, self.dim_in[0] * self.dim_in[1]))
                layer1 = tf.layers.dense(inputs=inputs,
                                         units=1024,
                                         activation=tf.nn.relu)
                # layer1 = tf.layers.dropout(layer1, rate=self.dropouts, training=self.training)
                layer1 = tf.reshape(layer1, [-1, 1, 1024])

                lstm_fw_cell = tf.contrib.rnn.MultiRNNCell(
                    [attn_cell() for _ in range(self.rnn_num_layers)],
                    state_is_tuple=True)
                lstm_bw_cell = tf.contrib.rnn.MultiRNNCell(
                    [attn_cell() for _ in range(self.rnn_num_layers)],
                    state_is_tuple=True)

                lstm_fw_cell = _unpack_cell(lstm_fw_cell)
                lstm_bw_cell = _unpack_cell(lstm_bw_cell)
                result = rnn.stack_bidirectional_dynamic_rnn(
                    cells_fw=lstm_fw_cell,
                    cells_bw=lstm_bw_cell,
                    inputs=layer1,
                    dtype=tf.float32)
                # sequence_length=self.batch_size)
                layer2, fw_final_states, bw_final_states = result

                layer2 = tf.reshape(layer2, [-1, 2 * self.rnn_size])
                in_size = 2 * self.rnn_size

                self.enhanced_outputs = tf.layers.dense(inputs=layer2,
                                                        units=self.dim_out,
                                                        activation=None)

            with tf.name_scope('loss'):
                self.loss = tf.losses.mean_squared_error(
                    self.y_clean, self.enhanced_outputs)
                tf.summary.scalar('Loss', self.loss)

            with tf.name_scope("exp_learning_rate"):
                self.global_step = tf.Variable(0, trainable=False)
                self.exp_learning_rate = tf.train.exponential_decay(
                    self.lr,
                    global_step=self.global_step,
                    decay_steps=50000,
                    decay_rate=0.8,
                    staircase=False)
                tf.summary.scalar('Learning rate', self.exp_learning_rate)

            optimizer = tf.train.AdamOptimizer(self.lr)
            gradients, v = zip(*optimizer.compute_gradients(self.loss))
            gradients, _ = tf.clip_by_global_norm(gradients, 0.5)
            self.optimizer = optimizer.apply_gradients(
                zip(gradients, v), global_step=self.global_step)
            self.saver = tf.train.Saver(tf.trainable_variables(),
                                        max_to_keep=30)
Esempio n. 31
0
    def __init__(self,
                 inputs_batch,
                 label_batch,
                 lengths_batch,
                 theta_x_batch=None,
                 theta_y_batch=None,
                 infer=False):
        self._inputs = inputs_batch
        self._mixed = self._inputs
        self._labels = label_batch
        self._lengths = lengths_batch

        self.batch_size = tf.shape(self._lengths)[0]
        self._model_type = NNET_PARAM.MODEL_TYPE

        outputs = self._inputs

        def lstm_cell():
            return tf.contrib.rnn.LSTMCell(
                NNET_PARAM.RNN_SIZE,
                forget_bias=1.0,
                use_peepholes=True,
                num_proj=NNET_PARAM.LSTM_num_proj,
                initializer=tf.contrib.layers.xavier_initializer(),
                state_is_tuple=True,
                activation=NNET_PARAM.LSTM_ACTIVATION)

        lstm_attn_cell = lstm_cell
        if not infer and NNET_PARAM.KEEP_PROB < 1.0:

            def lstm_attn_cell():
                return tf.contrib.rnn.DropoutWrapper(
                    lstm_cell(), output_keep_prob=NNET_PARAM.KEEP_PROB)

        def GRU_cell():
            return tf.contrib.rnn.GRUCell(
                NNET_PARAM.RNN_SIZE,
                # kernel_initializer=tf.contrib.layers.xavier_initializer(),
                activation=NNET_PARAM.LSTM_ACTIVATION)

        GRU_attn_cell = lstm_cell
        if not infer and NNET_PARAM.KEEP_PROB < 1.0:

            def GRU_attn_cell():
                return tf.contrib.rnn.DropoutWrapper(
                    GRU_cell(), output_keep_prob=NNET_PARAM.KEEP_PROB)

        if NNET_PARAM.MODEL_TYPE.upper() == 'BLSTM':
            with tf.variable_scope('BLSTM'):

                lstm_fw_cell = tf.contrib.rnn.MultiRNNCell(
                    [lstm_attn_cell() for _ in range(NNET_PARAM.RNN_LAYER)],
                    state_is_tuple=True)
                lstm_bw_cell = tf.contrib.rnn.MultiRNNCell(
                    [lstm_attn_cell() for _ in range(NNET_PARAM.RNN_LAYER)],
                    state_is_tuple=True)

                lstm_fw_cell = lstm_fw_cell._cells
                lstm_bw_cell = lstm_bw_cell._cells
                result = rnn.stack_bidirectional_dynamic_rnn(
                    cells_fw=lstm_fw_cell,
                    cells_bw=lstm_bw_cell,
                    inputs=outputs,
                    dtype=tf.float32,
                    sequence_length=self._lengths)
                outputs, fw_final_states, bw_final_states = result
        if NNET_PARAM.MODEL_TYPE.upper() == 'BGRU':
            with tf.variable_scope('BGRU'):

                gru_fw_cell = tf.contrib.rnn.MultiRNNCell(
                    [GRU_attn_cell() for _ in range(NNET_PARAM.RNN_LAYER)],
                    state_is_tuple=True)
                gru_bw_cell = tf.contrib.rnn.MultiRNNCell(
                    [GRU_attn_cell() for _ in range(NNET_PARAM.RNN_LAYER)],
                    state_is_tuple=True)

                gru_fw_cell = gru_fw_cell._cells
                gru_bw_cell = gru_bw_cell._cells
                result = rnn.stack_bidirectional_dynamic_rnn(
                    cells_fw=gru_fw_cell,
                    cells_bw=gru_bw_cell,
                    inputs=outputs,
                    dtype=tf.float32,
                    sequence_length=self._lengths)
                outputs, fw_final_states, bw_final_states = result

        with tf.variable_scope('fullconnectOut'):
            if self._model_type.upper()[0] == 'B':  # bidirection
                outputs = tf.reshape(outputs,
                                     [-1, 2 * NNET_PARAM.LSTM_num_proj])
                in_size = 2 * NNET_PARAM.LSTM_num_proj
            out_size = NNET_PARAM.OUTPUT_SIZE
            weights = tf.get_variable(
                'weights1', [in_size, out_size],
                initializer=tf.random_normal_initializer(stddev=0.01))
            biases = tf.get_variable('biases1', [out_size],
                                     initializer=tf.constant_initializer(0.0))
            mask = tf.nn.relu(tf.matmul(outputs, weights) + biases)
            self._activations_t = tf.reshape(
                mask, [self.batch_size, -1, NNET_PARAM.OUTPUT_SIZE])

            # mask clip
            self._activations = self._activations_t
            # self._activations = tf.clip_by_value(self._activations_t,-1,1.5)

            masked_mag = None
            if DATA_PARAM.FEATURE_TYPE == 'LOG_MAG' and DATA_PARAM.MASK_ON_MAG_EVEN_LOGMAG:
                mag = data_tool.rmNormalization(self._mixed, eager=False)

                # norm to (0,1), 大数乘小数会有误差,mask比较小,所以将mag变小。
                mag = tf.clip_by_value(mag, DATA_PARAM.MAG_NORM_MIN,
                                       DATA_PARAM.MAG_NORM_MAX)
                mag -= DATA_PARAM.MAG_NORM_MIN
                mag /= (DATA_PARAM.MAG_NORM_MAX - DATA_PARAM.MAG_NORM_MIN)

                # add mask on magnitude spectrum
                masked_mag = self._activations * mag

                # rm mag norm
                masked_mag = masked_mag * (
                    DATA_PARAM.MAG_NORM_MAX -
                    DATA_PARAM.MAG_NORM_MIN) + DATA_PARAM.MAG_NORM_MIN

                # change to log_mag feature
                log_masked_mag = tf.log(masked_mag +
                                        DATA_PARAM.LOG_BIAS) / tf.log(10.0)
                log_masked_mag = tf.clip_by_value(log_masked_mag,
                                                  DATA_PARAM.LOG_NORM_MIN,
                                                  DATA_PARAM.LOG_NORM_MAX)
                log_masked_mag -= DATA_PARAM.LOG_NORM_MIN
                log_masked_mag /= (DATA_PARAM.LOG_NORM_MAX -
                                   DATA_PARAM.LOG_NORM_MIN)
                self._cleaned = log_masked_mag
            else:
                self._cleaned = self._activations * self._mixed

        self.saver = tf.train.Saver(tf.trainable_variables(), max_to_keep=30)
        if infer:
            if DATA_PARAM.FEATURE_TYPE == 'LOG_MAG' and DATA_PARAM.MASK_ON_MAG_EVEN_LOGMAG:
                self._cleaned = masked_mag
            return

        if NNET_PARAM.MASK_TYPE == 'PSIRM':
            self._labels *= tf.cos(theta_x_batch - theta_y_batch)

        self._loss = NNET_PARAM.LOSS_FUNC(self.cleaned, self.labels)
        if tf.get_variable_scope().reuse:
            return

        self._lr = tf.Variable(0.0, trainable=False)
        tvars = tf.trainable_variables()
        grads, _ = tf.clip_by_global_norm(tf.gradients(self.loss, tvars),
                                          NNET_PARAM.CLIP_NORM)
        optimizer = tf.train.AdamOptimizer(self.lr)
        #optimizer = tf.train.GradientDescentOptimizer(self.lr)
        self._train_op = optimizer.apply_gradients(zip(grads, tvars))

        self._new_lr = tf.placeholder(tf.float32,
                                      shape=[],
                                      name='new_learning_rate')
        self._lr_update = tf.assign(self._lr, self._new_lr)
Esempio n. 32
0
    def __init__(self, config, infer=False):
        # self._inputs = inputs
        # self._mixed = inputs
        # self._labels1 = tf.slice(labels, [0, 0, 0], [-1, -1, config.output_size])
        # self._labels2 = tf.slice(labels, [0, 0, config.output_size], [-1, -1, -1])
        # self._lengths = lengths
        with tf.name_scope('placeholder'):
            self._inputs = tf.placeholder(tf.float32,
                                          [None, None, config.input_size],
                                          name='inputs')
            self._mixed = self._inputs

            self._labels = tf.placeholder(tf.float32,
                                          [None, None, config.output_size * 2],
                                          name='labels')
            self._labels1 = tf.slice(self._labels, [0, 0, 0],
                                     [-1, -1, config.output_size])
            self._labels2 = tf.slice(self._labels, [0, 0, config.output_size],
                                     [-1, -1, -1])

            self._lengths = tf.placeholder(tf.float32, [None], name='lengths')

            self.batch_size = tf.shape(self._inputs)[0]

            self._model_type = config.model_type

            outputs = self._inputs
        # This first layer-- feed forward layer
        # Transform the input to the right size before feed into RNN

        with tf.variable_scope('forward1'):
            outputs = tf.reshape(outputs, [-1, config.input_size])
            outputs = tf.layers.dense(outputs,
                                      units=config.rnn_size,
                                      activation=tf.nn.tanh,
                                      reuse=tf.get_variable_scope().reuse)
            # print(outputs.name,'__________________________________________________')
            # print([x.name for x in tf.global_variables()],'_______________________________________')
            outputs = tf.reshape(outputs,
                                 [self.batch_size, -1, config.rnn_size])

        def lstm_cell():
            return tf.contrib.rnn.LSTMCell(
                config.rnn_size,
                forget_bias=1.0,
                use_peepholes=True,
                initializer=tf.contrib.layers.xavier_initializer(),
                state_is_tuple=True,
                activation=tf.tanh)

        attn_cell = lstm_cell
        if not infer and config.keep_prob < 1.0:

            def attn_cell():
                return tf.contrib.rnn.DropoutWrapper(
                    lstm_cell(), output_keep_prob=config.keep_prob)

        if config.model_type.lower() == 'blstm':
            with tf.variable_scope('blstm'):

                lstm_fw_cell = tf.contrib.rnn.MultiRNNCell(
                    [attn_cell() for _ in range(config.rnn_num_layers)],
                    state_is_tuple=True)
                lstm_bw_cell = tf.contrib.rnn.MultiRNNCell(
                    [attn_cell() for _ in range(config.rnn_num_layers)],
                    state_is_tuple=True)

                lstm_fw_cell = _unpack_cell(lstm_fw_cell)
                lstm_bw_cell = _unpack_cell(lstm_bw_cell)
                result = rnn.stack_bidirectional_dynamic_rnn(
                    cells_fw=lstm_fw_cell,
                    cells_bw=lstm_bw_cell,
                    inputs=outputs,
                    dtype=tf.float32,
                    sequence_length=tf.cast(self._lengths, tf.int32))
                outputs, fw_final_states, bw_final_states = result
        if config.model_type.lower() == 'lstm':
            with tf.variable_scope('lstm'):
                cell = tf.contrib.rnn.MultiRNNCell(
                    [attn_cell() for _ in range(config.rnn_num_layers)],
                    state_is_tuple=True)
                self._initial_state = cell.zero_state(self.batch_size,
                                                      tf.float32)
                state = self.initial_state
                outputs, state = tf.nn.dynamic_rnn(
                    cell,
                    outputs,
                    dtype=tf.float32,
                    sequence_length=self._lengths,
                    initial_state=self.initial_state)
                self._final_state = state

        with tf.variable_scope('forward2'):
            if self._model_type.lower() == 'blstm':
                outputs = tf.reshape(outputs, [-1, 2 * config.rnn_size])
                in_size = 2 * config.rnn_size
            else:
                outputs = tf.reshape(outputs, [-1, config.rnn_size])
                in_size = config.rnn_size
            out_size = config.output_size
            # in_size=256
            weights1 = tf.get_variable(
                'weights1', [in_size, out_size],
                initializer=tf.random_normal_initializer(stddev=0.01))
            biases1 = tf.get_variable('biases1', [out_size],
                                      initializer=tf.constant_initializer(0.0))
            weights2 = tf.get_variable(
                'weights2', [in_size, out_size],
                initializer=tf.random_normal_initializer(stddev=0.01))
            biases2 = tf.get_variable('biases2', [out_size],
                                      initializer=tf.constant_initializer(0.0))
            mask1 = tf.nn.relu(tf.matmul(outputs, weights1) + biases1)
            mask2 = tf.nn.relu(tf.matmul(outputs, weights2) + biases2)
            self._activations1 = tf.reshape(
                mask1, [self.batch_size, -1, config.output_size])
            self._activations2 = tf.reshape(
                mask2, [self.batch_size, -1, config.output_size])

            self._cleaned1 = self._activations1 * self._mixed
            self._cleaned2 = self._activations2 * self._mixed
        # Ability to save the model
        self.saver = tf.train.Saver(tf.trainable_variables(), max_to_keep=30)
        if infer:
            return

        self._loss = utt_PIT_MSE_for_LSTM(self._cleaned1, self._cleaned2,
                                          self._labels1, self._labels2)
        if tf.get_variable_scope().reuse:
            return

        self._lr = tf.Variable(0.0, trainable=False)
        tvars = tf.trainable_variables()
        grads, _ = tf.clip_by_global_norm(tf.gradients(self.loss, tvars),
                                          config.max_grad_norm)
        optimizer = tf.train.AdamOptimizer(self.lr)
        #optimizer = tf.train.GradientDescentOptimizer(self.lr)
        self._train_op = optimizer.apply_gradients(zip(grads, tvars))

        self._new_lr = tf.placeholder(tf.float32,
                                      shape=[],
                                      name='new_learning_rate')
        self._lr_update = tf.assign(self._lr, self._new_lr)
Esempio n. 33
0
  def __init__(self,
               x_mag_spec_batch,
               lengths_batch,
               y_mag_spec_batch=None,
               theta_x_batch=None,
               theta_y_batch=None,
               behavior='train'):
    '''
    behavior = 'train/validation/infer'
    '''
    if behavior != self.infer:
      assert(y_mag_spec_batch is not None)
      assert(theta_x_batch is not None)
      assert(theta_y_batch is not None)
    self._log_bias = tf.get_variable('logbias', [1], trainable=FLAGS.PARAM.LOG_BIAS_TRAINABLE,
                                     initializer=tf.constant_initializer(FLAGS.PARAM.INIT_LOG_BIAS))
    self._real_logbias = self._log_bias + FLAGS.PARAM.MIN_LOG_BIAS
    self._x_mag_spec = x_mag_spec_batch
    self._norm_x_mag_spec = norm_mag_spec(self._x_mag_spec, FLAGS.PARAM.MAG_NORM_MAX)
    self._norm_x_logmag_spec = norm_logmag_spec(self._x_mag_spec, FLAGS.PARAM.MAG_NORM_MAX, self._log_bias, FLAGS.PARAM.MIN_LOG_BIAS)

    self._y_mag_spec = y_mag_spec_batch
    self._norm_y_mag_spec = norm_mag_spec(self._y_mag_spec, FLAGS.PARAM.MAG_NORM_MAX)
    self._norm_y_logmag_spec = norm_logmag_spec(self._y_mag_spec, FLAGS.PARAM.MAG_NORM_MAX, self._log_bias, FLAGS.PARAM.MIN_LOG_BIAS)

    self._lengths = lengths_batch
    self._batch_size = tf.shape(self._lengths)[0]

    self._x_theta = theta_x_batch
    self._y_theta = theta_y_batch
    self._model_type = FLAGS.PARAM.MODEL_TYPE

    if FLAGS.PARAM.INPUT_TYPE == 'mag':
      self.net_input = self._norm_x_mag_spec
    elif FLAGS.PARAM.INPUT_TYPE == 'logmag':
      self.net_input = self._norm_x_logmag_spec
    if FLAGS.PARAM.LABEL_TYPE == 'mag':
      self._y_labels = self._norm_y_mag_spec
    elif FLAGS.PARAM.LABEL_TYPE == 'logmag':
      self._y_labels = self._norm_y_logmag_spec

    outputs = self.net_input

    lstm_attn_cell = lstm_cell
    if behavior != self.infer and FLAGS.PARAM.KEEP_PROB < 1.0:
      def lstm_attn_cell(n_units, n_proj, act):
        return tf.contrib.rnn.DropoutWrapper(lstm_cell(n_units, n_proj, act),
                                             output_keep_prob=FLAGS.PARAM.KEEP_PROB)

    GRU_attn_cell = GRU_cell
    if behavior != self.infer and FLAGS.PARAM.KEEP_PROB < 1.0:
      def GRU_attn_cell(n_units, act):
        return tf.contrib.rnn.DropoutWrapper(GRU_cell(n_units, act),
                                             output_keep_prob=FLAGS.PARAM.KEEP_PROB)

    if FLAGS.PARAM.MODEL_TYPE.upper() == 'BLSTM':
      with tf.variable_scope('BLSTM'):

        lstm_fw_cell = tf.contrib.rnn.MultiRNNCell(
            [lstm_attn_cell(FLAGS.PARAM.RNN_SIZE,
                            FLAGS.PARAM.LSTM_num_proj,
                            FLAGS.PARAM.LSTM_ACTIVATION) for _ in range(FLAGS.PARAM.RNN_LAYER)], state_is_tuple=True)
        lstm_bw_cell = tf.contrib.rnn.MultiRNNCell(
            [lstm_attn_cell(FLAGS.PARAM.RNN_SIZE,
                            FLAGS.PARAM.LSTM_num_proj,
                            FLAGS.PARAM.LSTM_ACTIVATION) for _ in range(FLAGS.PARAM.RNN_LAYER)], state_is_tuple=True)

        fw_cell = lstm_fw_cell._cells
        bw_cell = lstm_bw_cell._cells
        result = rnn.stack_bidirectional_dynamic_rnn(
            cells_fw=fw_cell,
            cells_bw=bw_cell,
            inputs=outputs,
            dtype=tf.float32,
            sequence_length=self._lengths)
        outputs, fw_final_states, bw_final_states = result

    if FLAGS.PARAM.MODEL_TYPE.upper() == 'BGRU':
      with tf.variable_scope('BGRU'):

        gru_fw_cell = tf.contrib.rnn.MultiRNNCell(
            [GRU_attn_cell(FLAGS.PARAM.RNN_SIZE,
                           FLAGS.PARAM.LSTM_ACTIVATION) for _ in range(FLAGS.PARAM.RNN_LAYER)], state_is_tuple=True)
        gru_bw_cell = tf.contrib.rnn.MultiRNNCell(
            [GRU_attn_cell(FLAGS.PARAM.RNN_SIZE,
                           FLAGS.PARAM.LSTM_ACTIVATION) for _ in range(FLAGS.PARAM.RNN_LAYER)], state_is_tuple=True)

        fw_cell = gru_fw_cell._cells
        bw_cell = gru_bw_cell._cells
        result = rnn.stack_bidirectional_dynamic_rnn(
            cells_fw=fw_cell,
            cells_bw=bw_cell,
            inputs=outputs,
            dtype=tf.float32,
            sequence_length=self._lengths)
        outputs, fw_final_states, bw_final_states = result

    # region full connection get mask
    # calcu rnn output size
    in_size = FLAGS.PARAM.RNN_SIZE
    mask = None
    if self._model_type.upper()[0] == 'B':  # bidirection
      rnn_output_num = FLAGS.PARAM.RNN_SIZE*2
      if FLAGS.PARAM.MODEL_TYPE == 'BLSTM' and (not (FLAGS.PARAM.LSTM_num_proj is None)):
        rnn_output_num = 2*FLAGS.PARAM.LSTM_num_proj
      in_size = rnn_output_num
    outputs = tf.reshape(outputs, [-1, in_size])
    out_size = FLAGS.PARAM.OUTPUT_SIZE
    with tf.variable_scope('fullconnectOut'):
      weights = tf.get_variable('weights1', [in_size, out_size],
                                initializer=tf.random_normal_initializer(stddev=0.01))
      biases = tf.get_variable('biases1', [out_size],
                               initializer=tf.constant_initializer(0.0))

    mask = tf.nn.relu(tf.matmul(outputs, weights) + biases)
    self._mask = tf.reshape(
        mask, [self._batch_size, -1, FLAGS.PARAM.OUTPUT_SIZE])
    # endregion
    outputs = tf.reshape(outputs, [self._batch_size, -1, in_size])

    # region Apply Noise Threshold Function on Mask
    if FLAGS.PARAM.THRESHOLD_FUNC is not None:
      # use noise threshold
      if FLAGS.PARAM.THRESHOLD_POS == FLAGS.PARAM.THRESHOLD_ON_MASK:
        self._mask, self._threshold = threshold_feature(self._mask, outputs,
                                                        self._batch_size, in_size)
      elif FLAGS.PARAM.THRESHOLD_POS == FLAGS.PARAM.THRESHOLD_ON_SPEC:
        pass
      else:
        print('Threshold position error!')
        exit(-1)
    # endregion

    # region prepare y_estimation and y_labels
    if FLAGS.PARAM.TRAINING_MASK_POSITION == 'mag':
      self._y_estimation = self._mask*self._norm_x_mag_spec
    elif FLAGS.PARAM.TRAINING_MASK_POSITION == 'logmag':
      self._y_estimation = self._mask*self._norm_x_logmag_spec
    if FLAGS.PARAM.MASK_TYPE == 'PSM':
      self._y_labels *= tf.cos(self._x_theta-self._y_theta)
    elif FLAGS.PARAM.MASK_TYPE == 'IRM':
      pass
    else:
      tf.logging.error('Mask type error.')
      exit(-1)

    # region Apply Noise Threshold Function on Spec(log or mag)
    if FLAGS.PARAM.THRESHOLD_FUNC is not None:
      # use noise threshold
      if FLAGS.PARAM.THRESHOLD_POS == FLAGS.PARAM.THRESHOLD_ON_MASK:
        pass
      elif FLAGS.PARAM.THRESHOLD_POS == FLAGS.PARAM.THRESHOLD_ON_SPEC:
        self._y_estimation, self._threshold = threshold_feature(self._y_estimation, outputs,
                                                                self._batch_size, in_size)
    # endregion

    # region get infer spec
    if FLAGS.PARAM.DECODING_MASK_POSITION != FLAGS.PARAM.TRAINING_MASK_POSITION:
      print('Error, DECODING_MASK_POSITION should be equal to TRAINING_MASK_POSITION when use thresohold model.')
    if FLAGS.PARAM.DECODING_MASK_POSITION == 'mag':
      self._y_mag_estimation = rm_norm_mag_spec(self._y_estimation, FLAGS.PARAM.MAG_NORM_MAX)
    elif FLAGS.PARAM.DECODING_MASK_POSITION == 'logmag':
      self._y_mag_estimation = rm_norm_logmag_spec(self._y_estimation,
                                                   FLAGS.PARAM.MAG_NORM_MAX,
                                                   self._log_bias, FLAGS.PARAM.MIN_LOG_BIAS)
    '''
    _y_mag_estimation is estimated mag_spec
    _y_estimation is loss_targe, mag_sepec or logmag_spec
    '''
    # endregion

    if FLAGS.PARAM.TRAINING_MASK_POSITION != FLAGS.PARAM.LABEL_TYPE:
      if FLAGS.PARAM.LABEL_TYPE == 'mag':
        self._y_estimation = normedLogmag2normedMag(self._y_estimation, FLAGS.PARAM.MAG_NORM_MAX,
                                                    self._log_bias, FLAGS.PARAM.MIN_LOG_BIAS)
      elif FLAGS.PARAM.LABEL_TYPE == 'logmag':
        self._y_estimation = normedMag2normedLogmag(self._y_estimation, FLAGS.PARAM.MAG_NORM_MAX,
                                                    self._log_bias, FLAGS.PARAM.MIN_LOG_BIAS)
    # endregion

    self.saver = tf.train.Saver(tf.trainable_variables(), max_to_keep=30)

    if behavior == self.infer:
      return

    # region get LOSS
    if FLAGS.PARAM.LOSS_FUNC_FOR_MAG_SPEC == 'SPEC_MSE': # log_mag and mag MSE
      self._loss = loss.reduce_sum_frame_batchsize_MSE(self._y_estimation,self._y_labels)
    elif FLAGS.PARAM.LOSS_FUNC_FOR_MAG_SPEC == 'MFCC_SPEC_MSE':
      self._loss1, self._loss2 = loss.balanced_MFCC_AND_SPEC_MSE(self._y_estimation, self._y_labels,
                                                                 self._y_mag_estimation, self._y_mag_spec)
      self._loss = FLAGS.PARAM.SPEC_LOSS_COEF*self._loss1 + FLAGS.PARAM.MFCC_LOSS_COEF*self._loss2
    elif FLAGS.PARAM.LOSS_FUNC_FOR_MAG_SPEC == 'MEL_MAG_MSE':
      self._loss1, self._loss2 = loss.balanced_MEL_AND_SPEC_MSE(self._y_estimation, self._y_labels,
                                                                self._y_mag_estimation, self._y_mag_spec)
      self._loss = FLAGS.PARAM.SPEC_LOSS_COEF*self._loss1 + FLAGS.PARAM.MEL_LOSS_COEF*self._loss2
    elif FLAGS.PARAM.LOSS_FUNC_FOR_MAG_SPEC == "SPEC_MSE_LOWF_EN":
      self._loss = loss.reduce_sum_frame_batchsize_MSE(self._y_estimation, self._y_labels)
    elif FLAGS.PARAM.LOSS_FUNC_FOR_MAG_SPEC == "FAIR_SPEC_MSE":
      self._loss = loss.fair_reduce_sum_frame_batchsize_MSE(self._y_estimation, self._y_labels)
    elif FLAGS.PARAM.LOSS_FUNC_FOR_MAG_SPEC == "SPEC_MSE_FLEXIBLE_POW_C":
      self._loss = loss.reduce_sum_frame_batchsize_MSE_EmphasizeLowerValue(self._y_estimation,
                                                                           self._y_labels,
                                                                           FLAGS.PARAM.POW_COEF)
    else:
      print('Loss type error.')
      exit(-1)
    # endregion

    if behavior == self.validation:
      '''
      val model cannot train.
      '''
      return
    self._lr = tf.Variable(0.0, trainable=False)
    tvars = tf.trainable_variables()
    grads, _ = tf.clip_by_global_norm(tf.gradients(self.loss, tvars),
                                      FLAGS.PARAM.CLIP_NORM)
    optimizer = tf.train.AdamOptimizer(self.lr)
    #optimizer = tf.train.GradientDescentOptimizer(self.lr)
    self._train_op = optimizer.apply_gradients(zip(grads, tvars))

    self._new_lr = tf.placeholder(
        tf.float32, shape=[], name='new_learning_rate')
    self._lr_update = tf.assign(self._lr, self._new_lr)
Esempio n. 34
0
    def __init__(self,
                 x_mag_spec_batch,
                 lengths_batch,
                 y_mag_spec_batch=None,
                 theta_x_batch=None,
                 theta_y_batch=None,
                 infer=False):
        self._log_bias = tf.get_variable('logbias', [1],
                                         trainable=PARAM.LOG_BIAS_TRAINABEL,
                                         initializer=tf.constant_initializer(
                                             PARAM.INIT_LOG_BIAS))
        self._real_logbias = self._log_bias + DEFAULT_LOG_BIAS
        self._inputs = x_mag_spec_batch
        self._x_mag_spec = self.inputs
        self._norm_x_mag_spec = norm_mag_spec(self._x_mag_spec)
        self._norm_x_logmag_spec = norm_logmag_spec(self._x_mag_spec,
                                                    self._log_bias)

        if not infer:
            self._y_mag_spec = y_mag_spec_batch
            self._norm_y_mag_spec = norm_mag_spec(self._y_mag_spec)
            self._norm_y_logmag_spec = norm_logmag_spec(
                self._y_mag_spec, self._log_bias)

        self._lengths = lengths_batch

        self.batch_size = tf.shape(self._lengths)[0]
        self._model_type = PARAM.MODEL_TYPE

        if PARAM.INPUT_TYPE == 'mag':
            self.net_input = self._norm_x_mag_spec
        elif PARAM.INPUT_TYPE == 'logmag':
            self.net_input = self._norm_x_logmag_spec

        if not infer:
            if PARAM.LABEL_TYPE == 'mag':
                self._labels = self._norm_y_mag_spec
            elif PARAM.LABEL_TYPE == 'logmag':
                self._labels = self._norm_y_logmag_spec

        outputs = self.net_input

        def lstm_cell():
            return tf.contrib.rnn.LSTMCell(
                PARAM.RNN_SIZE,
                forget_bias=1.0,
                use_peepholes=True,
                num_proj=PARAM.LSTM_num_proj,
                initializer=tf.contrib.layers.xavier_initializer(),
                state_is_tuple=True,
                activation=PARAM.LSTM_ACTIVATION)

        lstm_attn_cell = lstm_cell
        if not infer and PARAM.KEEP_PROB < 1.0:

            def lstm_attn_cell():
                return tf.contrib.rnn.DropoutWrapper(
                    lstm_cell(), output_keep_prob=PARAM.KEEP_PROB)

        def GRU_cell():
            return tf.contrib.rnn.GRUCell(
                PARAM.RNN_SIZE,
                # kernel_initializer=tf.contrib.layers.xavier_initializer(),
                activation=PARAM.LSTM_ACTIVATION)

        GRU_attn_cell = lstm_cell
        if not infer and PARAM.KEEP_PROB < 1.0:

            def GRU_attn_cell():
                return tf.contrib.rnn.DropoutWrapper(
                    GRU_cell(), output_keep_prob=PARAM.KEEP_PROB)

        if PARAM.MODEL_TYPE.upper() == 'BLSTM':
            with tf.variable_scope('BLSTM'):

                lstm_fw_cell = tf.contrib.rnn.MultiRNNCell(
                    [lstm_attn_cell() for _ in range(PARAM.RNN_LAYER)],
                    state_is_tuple=True)
                lstm_bw_cell = tf.contrib.rnn.MultiRNNCell(
                    [lstm_attn_cell() for _ in range(PARAM.RNN_LAYER)],
                    state_is_tuple=True)

                lstm_fw_cell = lstm_fw_cell._cells
                lstm_bw_cell = lstm_bw_cell._cells
                result = rnn.stack_bidirectional_dynamic_rnn(
                    cells_fw=lstm_fw_cell,
                    cells_bw=lstm_bw_cell,
                    inputs=outputs,
                    dtype=tf.float32,
                    sequence_length=self._lengths)
                outputs, fw_final_states, bw_final_states = result
        if PARAM.MODEL_TYPE.upper() == 'BGRU':
            with tf.variable_scope('BGRU'):

                gru_fw_cell = tf.contrib.rnn.MultiRNNCell(
                    [GRU_attn_cell() for _ in range(PARAM.RNN_LAYER)],
                    state_is_tuple=True)
                gru_bw_cell = tf.contrib.rnn.MultiRNNCell(
                    [GRU_attn_cell() for _ in range(PARAM.RNN_LAYER)],
                    state_is_tuple=True)

                gru_fw_cell = gru_fw_cell._cells
                gru_bw_cell = gru_bw_cell._cells
                result = rnn.stack_bidirectional_dynamic_rnn(
                    cells_fw=gru_fw_cell,
                    cells_bw=gru_bw_cell,
                    inputs=outputs,
                    dtype=tf.float32,
                    sequence_length=self._lengths)
                outputs, fw_final_states, bw_final_states = result

        with tf.variable_scope('fullconnectOut'):
            if self._model_type.upper()[0] == 'B':  # bidirection
                outputs = tf.reshape(outputs, [-1, 2 * PARAM.LSTM_num_proj])
                in_size = 2 * PARAM.LSTM_num_proj
            out_size = PARAM.OUTPUT_SIZE
            weights = tf.get_variable(
                'weights1', [in_size, out_size],
                initializer=tf.random_normal_initializer(stddev=0.01))
            biases = tf.get_variable('biases1', [out_size],
                                     initializer=tf.constant_initializer(0.0))
            mask = tf.nn.relu(tf.matmul(outputs, weights) + biases)
            self._mask = tf.reshape(mask,
                                    [self.batch_size, -1, PARAM.OUTPUT_SIZE])

        self.saver = tf.train.Saver(tf.trainable_variables(), max_to_keep=30)
        if infer:
            if PARAM.DECODING_MASK_POSITION == 'mag':
                self._cleaned = rm_norm_mag_spec(self._mask *
                                                 self._norm_x_mag_spec)
            elif PARAM.DECODING_MASK_POSITION == 'logmag':
                self._cleaned = rm_norm_logmag_spec(
                    self._mask * self._norm_x_logmag_spec, self._log_bias)
            return

        if PARAM.TRAINING_MASK_POSITION == 'mag':
            self._cleaned = self._mask * self._norm_x_mag_spec
        elif PARAM.TRAINING_MASK_POSITION == 'logmag':
            self._cleaned = self._mask * self._norm_x_logmag_spec
        if PARAM.MASK_TYPE == 'PSM':
            self._labels *= tf.cos(theta_x_batch - theta_y_batch)
        elif PARAM.MASK_TYPE == 'IRM':
            pass
        else:
            tf.logging.error('Mask type error.')
            exit(-1)

        if PARAM.TRAINING_MASK_POSITION != PARAM.LABEL_TYPE:
            if PARAM.LABEL_TYPE == 'mag':
                self._cleaned = normedLogmag2normedMag(self._cleaned,
                                                       self._log_bias)
            elif PARAM.LABEL_TYPE == 'logmag':
                self._cleaned = normedMag2normedLogmag(self._cleaned,
                                                       self._log_bias)
        self._loss = PARAM.LOSS_FUNC(self._cleaned, self._labels)
        if tf.get_variable_scope().reuse:
            return

        self._lr = tf.Variable(0.0, trainable=False)
        tvars = tf.trainable_variables()
        grads, _ = tf.clip_by_global_norm(tf.gradients(self.loss, tvars),
                                          PARAM.CLIP_NORM)
        optimizer = tf.train.AdamOptimizer(self.lr)
        #optimizer = tf.train.GradientDescentOptimizer(self.lr)
        self._train_op = optimizer.apply_gradients(zip(grads, tvars))

        self._new_lr = tf.placeholder(tf.float32,
                                      shape=[],
                                      name='new_learning_rate')
        self._lr_update = tf.assign(self._lr, self._new_lr)