Ejemplo n.º 1
0
def load_cmvn(path):
    ''' load mean and variance from cmvn.npy,
      then convert to TF Tensor
  '''
    # [1, nbins, nchannels]
    mean, variance = np.load(path)
    # [1, 1, nbins, nchannels]
    mean = np.expand_dims(mean, axis=0)
    variance = np.expand_dims(variance, axis=0)
    mean = tf.convert_to_tensor(mean, dtype=tf.float32, name='cmvn_mean')
    variance = tf.convert_to_tensor(variance,
                                    dtype=tf.float32,
                                    name='cmvn_variance')
    return mean, variance
Ejemplo n.º 2
0
    def call(self, audio_data, sample_rate=None):
        """
    Caculate picth features of audio data.
    :param audio_data: the audio signal from which to compute spectrum.
                      Should be an (1, N) tensor.
    :param sample_rate: the samplerate of the signal we working with.
    :return: A float tensor of size (num_frames, 2) containing
           pitch && POV features of every frame in speech.
    """
        p = self.config

        with tf.name_scope('pitch'):

            if sample_rate is None:
                sample_rate = tf.constant(p.sample_rate, dtype=tf.int32)
            else:
                if not tf.is_tensor(sample_rate):
                    sample_rate = tf.convert_to_tensor(sample_rate)

            pitch = py_x_ops.pitch(
                audio_data,
                sample_rate,
                window_length=p.window_length,
                frame_length=p.frame_length,
                snip_edges=p.snip_edges,
                preemph_coeff=p.preemph_coeff,
                min_f0=p.min_f0,
                max_f0=p.max_f0,
                soft_min_f0=p.soft_min_f0,
                penalty_factor=p.penalty_factor,
                lowpass_cutoff=p.lowpass_cutoff,
                resample_freq=p.resample_freq,
                delta_pitch=p.delta_pitch,
                nccf_ballast=p.nccf_ballast,
                lowpass_filter_width=p.lowpass_filter_width,
                upsample_filter_width=p.upsample_filter_width,
                max_frames_latency=p.max_frames_latency,
                frames_per_chunk=p.frames_per_chunk,
                simulate_first_pass_online=p.simulate_first_pass_online,
                recompute_frame=p.recompute_frame,
                nccf_ballast_online=p.nccf_ballast_online,
                pitch_scale=p.pitch_scale,
                pov_scale=p.pov_scale,
                pov_offset=p.pov_offset,
                delta_pitch_scale=p.delta_pitch_scale,
                delta_pitch_noise_stddev=p.delta_pitch_noise_stddev,
                normalization_left_context=p.normalization_left_context,
                normalization_right_context=p.normalization_right_context,
                delta_window=p.delta_window,
                delay=p.delay,
                add_pov_feature=p.add_pov_feature,
                add_normalized_log_pitch=p.add_normalized_log_pitch,
                add_delta_pitch=p.add_delta_pitch,
                add_raw_log_pitch=p.add_raw_log_pitch)

            return pitch
Ejemplo n.º 3
0
  def __init__(self, config):
    super().__init__(config)
    self.smoothing = self.config['solver']['optimizer']['label_smoothing']
    self.temperature = self.config['solver']['distilling']['temperature']
    self.alpha = self.config['solver']['distilling']['alpha']

    assert self.alpha >= 0.0, "alpha : {}".format(self.alpha)
    assert self.alpha <= 1.0, "alpha : {}".format(self.alpha)
    assert self.temperature >= 1, "temperature : {}".format(self.temperature)
    self.T = tf.convert_to_tensor(self.temperature, dtype=tf.float32)  #pylint: disable=invalid-name
Ejemplo n.º 4
0
def char_cut_tf(input_str):
    """Cut sentence char by char with tensoflow operations."""
    input_str = tf.convert_to_tensor(input_str)
    rank = len(input_str.get_shape())
    if rank == 1:
        output_str = tf.strings.unicode_split(
            input_str, "UTF-8").to_tensor(default_value="")
        output_str = tf.strings.reduce_join(output_str, axis=1, separator=" ")
    elif rank == 0:
        output_str = tf.strings.unicode_split(input_str, "UTF-8")
        output_str = tf.strings.reduce_join(output_str, axis=0, separator=" ")
    else:
        logging.error("Please check the shape of input_str!")
        raise Exception("Error input shape for input_str.")
    output_str = tf.strings.strip(output_str)
    return output_str
Ejemplo n.º 5
0
def shape_list(tensor):
    """Return list of dims, statically where possible."""
    tensor = tf.convert_to_tensor(tensor)

    if tensor.get_shape().dims is None:
        return tf.shape(tensor)

    static = tensor.get_shape().as_list()
    shape = tf.shape(tensor)

    ret = []
    for i, _ in enumerate(static):
        dim = static[i]
        if dim is None:
            dim = shape[i]
        ret.append(dim)
    return ret
Ejemplo n.º 6
0
def shape_list(x):
    """Return list of dims, statically where possible."""
    x = tf.convert_to_tensor(x)

    # If unknown rank, return dynamic shape
    if x.get_shape().dims is None:
        return tf.shape(x)

    static = x.get_shape().as_list()
    shape = tf.shape(x)

    ret = []
    for i, _ in enumerate(static):
        dim = static[i]
        if dim is None:
            dim = shape[i]
        ret.append(dim)
    return ret
Ejemplo n.º 7
0
    def build(self, input_shape):
        logging.info(f"{self.__class__.__name__} input_shape : {input_shape}")
        _, time, feat, channels = input_shape['inputs'].as_list()

        self.reshape = layers.Reshape((time, feat * channels),
                                      input_shape=(time, feat, channels))
        self.lstm1 = layers.LSTM(512, return_sequences=True)
        self.lstm2 = layers.LSTM(256, return_sequences=False)
        self.dense1 = layers.Dense(512, activation='relu')
        self.drop1 = layers.Dropout(rate=0.2)
        self.dense2 = layers.Dense(4)

        # https://stackoverflow.com/questions/55684949/subclass-of-tf-keras-model-can-not-get-summay-result
        # https://stackoverflow.com/questions/52826134/keras-model-subclassing-examples
        x = {}
        for key, shape in input_shape.items():
            x[key] = tf.convert_to_tensor(
                np.random.normal(size=[1] + shape.as_list()[1:]),
                dtype=tf.keras.backend.floatx())
        _ = self.call(x)
        #super().build(input_shape=[input_shape['inputs'].as_list(), input_shape['labels'].as_list()])
        self.built = True