def load_cmvn(path): ''' load mean and variance from cmvn.npy, then convert to TF Tensor ''' # [1, nbins, nchannels] mean, variance = np.load(path) # [1, 1, nbins, nchannels] mean = np.expand_dims(mean, axis=0) variance = np.expand_dims(variance, axis=0) mean = tf.convert_to_tensor(mean, dtype=tf.float32, name='cmvn_mean') variance = tf.convert_to_tensor(variance, dtype=tf.float32, name='cmvn_variance') return mean, variance
def call(self, audio_data, sample_rate=None): """ Caculate picth features of audio data. :param audio_data: the audio signal from which to compute spectrum. Should be an (1, N) tensor. :param sample_rate: the samplerate of the signal we working with. :return: A float tensor of size (num_frames, 2) containing pitch && POV features of every frame in speech. """ p = self.config with tf.name_scope('pitch'): if sample_rate is None: sample_rate = tf.constant(p.sample_rate, dtype=tf.int32) else: if not tf.is_tensor(sample_rate): sample_rate = tf.convert_to_tensor(sample_rate) pitch = py_x_ops.pitch( audio_data, sample_rate, window_length=p.window_length, frame_length=p.frame_length, snip_edges=p.snip_edges, preemph_coeff=p.preemph_coeff, min_f0=p.min_f0, max_f0=p.max_f0, soft_min_f0=p.soft_min_f0, penalty_factor=p.penalty_factor, lowpass_cutoff=p.lowpass_cutoff, resample_freq=p.resample_freq, delta_pitch=p.delta_pitch, nccf_ballast=p.nccf_ballast, lowpass_filter_width=p.lowpass_filter_width, upsample_filter_width=p.upsample_filter_width, max_frames_latency=p.max_frames_latency, frames_per_chunk=p.frames_per_chunk, simulate_first_pass_online=p.simulate_first_pass_online, recompute_frame=p.recompute_frame, nccf_ballast_online=p.nccf_ballast_online, pitch_scale=p.pitch_scale, pov_scale=p.pov_scale, pov_offset=p.pov_offset, delta_pitch_scale=p.delta_pitch_scale, delta_pitch_noise_stddev=p.delta_pitch_noise_stddev, normalization_left_context=p.normalization_left_context, normalization_right_context=p.normalization_right_context, delta_window=p.delta_window, delay=p.delay, add_pov_feature=p.add_pov_feature, add_normalized_log_pitch=p.add_normalized_log_pitch, add_delta_pitch=p.add_delta_pitch, add_raw_log_pitch=p.add_raw_log_pitch) return pitch
def __init__(self, config): super().__init__(config) self.smoothing = self.config['solver']['optimizer']['label_smoothing'] self.temperature = self.config['solver']['distilling']['temperature'] self.alpha = self.config['solver']['distilling']['alpha'] assert self.alpha >= 0.0, "alpha : {}".format(self.alpha) assert self.alpha <= 1.0, "alpha : {}".format(self.alpha) assert self.temperature >= 1, "temperature : {}".format(self.temperature) self.T = tf.convert_to_tensor(self.temperature, dtype=tf.float32) #pylint: disable=invalid-name
def char_cut_tf(input_str): """Cut sentence char by char with tensoflow operations.""" input_str = tf.convert_to_tensor(input_str) rank = len(input_str.get_shape()) if rank == 1: output_str = tf.strings.unicode_split( input_str, "UTF-8").to_tensor(default_value="") output_str = tf.strings.reduce_join(output_str, axis=1, separator=" ") elif rank == 0: output_str = tf.strings.unicode_split(input_str, "UTF-8") output_str = tf.strings.reduce_join(output_str, axis=0, separator=" ") else: logging.error("Please check the shape of input_str!") raise Exception("Error input shape for input_str.") output_str = tf.strings.strip(output_str) return output_str
def shape_list(tensor): """Return list of dims, statically where possible.""" tensor = tf.convert_to_tensor(tensor) if tensor.get_shape().dims is None: return tf.shape(tensor) static = tensor.get_shape().as_list() shape = tf.shape(tensor) ret = [] for i, _ in enumerate(static): dim = static[i] if dim is None: dim = shape[i] ret.append(dim) return ret
def shape_list(x): """Return list of dims, statically where possible.""" x = tf.convert_to_tensor(x) # If unknown rank, return dynamic shape if x.get_shape().dims is None: return tf.shape(x) static = x.get_shape().as_list() shape = tf.shape(x) ret = [] for i, _ in enumerate(static): dim = static[i] if dim is None: dim = shape[i] ret.append(dim) return ret
def build(self, input_shape): logging.info(f"{self.__class__.__name__} input_shape : {input_shape}") _, time, feat, channels = input_shape['inputs'].as_list() self.reshape = layers.Reshape((time, feat * channels), input_shape=(time, feat, channels)) self.lstm1 = layers.LSTM(512, return_sequences=True) self.lstm2 = layers.LSTM(256, return_sequences=False) self.dense1 = layers.Dense(512, activation='relu') self.drop1 = layers.Dropout(rate=0.2) self.dense2 = layers.Dense(4) # https://stackoverflow.com/questions/55684949/subclass-of-tf-keras-model-can-not-get-summay-result # https://stackoverflow.com/questions/52826134/keras-model-subclassing-examples x = {} for key, shape in input_shape.items(): x[key] = tf.convert_to_tensor( np.random.normal(size=[1] + shape.as_list()[1:]), dtype=tf.keras.backend.floatx()) _ = self.call(x) #super().build(input_shape=[input_shape['inputs'].as_list(), input_shape['labels'].as_list()]) self.built = True