def synthesis_from_prediction(self, source_sound_features): n_time_steps_source = len(source_sound_features["period_list"]) n_triangle_function = self.params["n_triangle_function"] i_target = 0.0 print("n_time_steps_source ", n_time_steps_source) feature_source_array = np.zeros( [1, n_time_steps_source, n_triangle_function * 2 + 1]) for i in range(n_time_steps_source): # gathering features feature_source_array[0, i, 0] = source_sound_features["period_list"][i] feature_source_array[0, i, 1: 1 + n_triangle_function] = source_sound_features["spectral_envelope_coeffs_harmonic_list"][i] feature_source_array[0, i, 1 + n_triangle_function:1 + 2 * n_triangle_function] = source_sound_features["spectral_envelope_coeffs_noise_list"][i] mu_source, sigma2_source = self.vector_normalizer_source.get_mu_sigma2() mu_target, sigma2_target = self.vector_normalizer_target.get_mu_sigma2() feature_source_array = (feature_source_array - mu_source) / (2 * np.sqrt(sigma2_source)) predicted_vectors = self.sess.run(self.output_layer, feed_dict={self.input_placeholder: feature_source_array}) predicted_vectors = predicted_vectors * 2 * np.sqrt(sigma2_target) + mu_target feature_dict = feature_vector_array_to_feature_dict(predicted_vectors[0, :, :]) #### reconstruction = synthesize_voice(feature_list_dict=feature_dict, params=params, normalize=True) r_source_sound_features = feature_vector_array_to_feature_dict(feature_source_array[0,:,:]) #original = synthesize_voice(feature_list_dict=r_source_sound_features, # params=params, # normalize=True) #write("/Users/pierresendorek/temp/is_the_phoque.wav", 44100, original) return reconstruction
source_batch = batch["source_batch"] + np.random.randn( *(batch["source_batch"].shape)) / 20 target_batch = batch["target_batch"] source_batch = (source_batch - mu_source) / (2 * np.sqrt(sigma2_source)) target_batch = (target_batch - mu_target) / (2 * np.sqrt(sigma2_target)) # testing batch correctness if listen_to_batch: for i_batch in range(batch_size): feature_dict = feature_vector_array_to_feature_dict( target_batch[i_batch, :, :]) sound = synthesize_voice(feature_list_dict=feature_dict, params=params, normalize=True) write( "/Users/pierresendorek/temp/test/test_" + str(i_batch) + "_target.wav", 44100, sound) feature_dict = feature_vector_array_to_feature_dict( source_batch[i_batch, :, :]) sound = synthesize_voice(feature_list_dict=feature_dict, params=params, normalize=True) write( "/Users/pierresendorek/temp/test/test_" + str(i_batch) + "_source.wav", 44100, sound) _, loss = sess.run(
def synthesis_from_prediction(self, source_sound_features): n_time_steps_source = len(source_sound_features["period_list"]) n_triangle_function = self.params["n_triangle_function"] fw_range_source = self.params["fw_range_source"] bw_range_source = self.params["bw_range_source"] bw_range_target = self.params["bw_range_target"] i_target = 0.0 piecewise_linear_function = PiecewiseLinearFunction(params=params) piecewise_linear_function.add_point(time=-1, value=np.abs(np.random.randn(self.params["n_triangle_function"] * 2 + 1))) provided_input_list = [] for i in range(n_time_steps_source): feature_source_array = np.zeros( [n_triangle_function * 2 + 1, fw_range_source + bw_range_source + bw_range_target]) # gathering features for k in range(-bw_range_source, fw_range_source): feature_source_array[0, k + bw_range_source] = \ get_element_from_list_constant_outside(i + k, source_sound_features["period_list"]) feature_source_array[1: 1 + n_triangle_function, k + bw_range_source] = \ get_element_from_list_constant_outside(i + k, source_sound_features["spectral_envelope_coeffs_harmonic_list"]) feature_source_array[1 + n_triangle_function:1 + 2 * n_triangle_function, k + bw_range_source] = \ get_element_from_list_constant_outside(i + k, source_sound_features["spectral_envelope_coeffs_noise_list"]) for k in range(-bw_range_target, 0): # We add sound features from the target speaker from previous time steps feature_source_array[:, - k - 1 + fw_range_source + bw_range_source] = \ piecewise_linear_function.get_value(time=i_target + k) #print(feature_source_array) # prediction predicted_vector, provided_input = self.sess.run([self.output_layer, self.input_placeholder], feed_dict={self.input_placeholder: np.reshape(feature_source_array, [1, -1])}) #if i>0: # print(predicted_vector - prev_predicted_vector) prev_predicted_vector = predicted_vector provided_input_list.append(provided_input) piecewise_linear_function.add_point(time=i_target, value=predicted_vector[0, 1:]) delta_t = predicted_vector[0, 0] i_target += 1 #relu(delta_t) + 1E-6 #pprint(piecewise_linear_function.value_list) #pprint(piecewise_linear_function.time_list) #pprint(provided_input_list) # generating the whole sequence time_target = int(np.floor(i_target)) target_period_list = [] target_spectral_envelope_coeffs_harmonic_list = [] target_spectral_envelope_coeffs_noise_list = [] for i in range(n_time_steps_source): feature_vector = deepcopy(piecewise_linear_function.get_value(time=i)) target_period_list.append(feature_vector[0]) target_spectral_envelope_coeffs_harmonic_list.append(feature_vector[1:n_triangle_function+1]) target_spectral_envelope_coeffs_noise_list.append(feature_vector[n_triangle_function+1:]) feature_dict = {"period_list": target_period_list, "spectral_envelope_coeffs_harmonic_list": target_spectral_envelope_coeffs_harmonic_list, "spectral_envelope_coeffs_noise_list": target_spectral_envelope_coeffs_harmonic_list} #### reconstruction = synthesize_voice(feature_list_dict=feature_dict, params=params, normalize=True) base_path = "/Users/pierresendorek/" #write(base_path + "temp/willis_noise.wav", 44100, out_sound_noise / np.max(np.abs(out_sound_noise))) #write(base_path + "temp/willis_periodic.wav", 44100, out_sound_periodic / np.max(np.abs(out_sound_periodic))) #write(base_path + "temp/willis_out_periodic_filt.wav", 44100, # out_sound_periodic_filtered / np.max(np.abs(out_sound_periodic_filtered))) write(base_path + "temp/willis_reconstruction.wav", 44100, reconstruction)
deepcopy(v_target[2 + params["n_triangle_function"]:])) source_feature_dict = { "period_list": source_period_list, "spectral_envelope_coeffs_harmonic_list": source_spectral_envelope_coeffs_harmonic_list, "spectral_envelope_coeffs_noise_list": source_spectral_envelope_coeffs_noise_list } target_feature_dict = { "period_list": target_period_list, "spectral_envelope_coeffs_harmonic_list": target_spectral_envelope_coeffs_harmonic_list, "spectral_envelope_coeffs_noise_list": target_spectral_envelope_coeffs_noise_list } source_sound = synthesize_voice(feature_list_dict=source_feature_dict, params=params, normalize=True) target_sound = synthesize_voice(feature_list_dict=target_feature_dict, params=params, normalize=True) base_path = "/Users/pierresendorek/" write(base_path + "temp/source_sound.wav", 44100, source_sound) write(base_path + "temp/target_sound.wav", 44100, target_sound)
for it_source in range(len(source_features["period_list"])): it_target = corresponding_segment(it_source) v = target_as_piecewise_linear_function.get_value(time=it_target) period = v[0] harmonic = v[1:1 + params["n_triangle_function"]] noise = v[1 + params["n_triangle_function"]:] aligned_target["period_list"] += [period] aligned_target["spectral_envelope_coeffs_harmonic_list"] += [ harmonic ] aligned_target["spectral_envelope_coeffs_noise_list"] += [noise] target_sound_aligned = synthesize_voice( feature_list_dict=aligned_target, params=params, normalize=True) out_sound = np.zeros([source_sound.shape[0], 2]) out_sound[:, 0] = source_sound / max(abs(source_sound)) out_sound[:, 1] = target_sound_aligned filepath_array = source_filepath.split(sep="/") wavfile.write("/Users/pierresendorek/temp/parallel/" + filepath_array[-2] + "_" + filepath_array[-1], rate=44100, data=out_sound)