def tf_phase_vocode(s, frame_step_in, sampling_rate=16000): """This is unneccesary, even bad for some reason""" delta_t = tf.convert_to_tensor(frame_step_in / sampling_rate, tf.complex64) imag_i = tf.convert_to_tensor(1j, tf.complex64) print(imag_i.dtype) frames = tf.unstack(s) phase_shift = tf.zeros(s.shape[1], tf.complex64) for i, frame_tup in enumerate(zip(frames[:-1], frames[1:])): frame1, frame2 = frame_tup phase_change = tf.cast( tf.angle(frame2) - tf.angle(frame1), tf.complex64) freq_deviation = phase_change / delta_t - frame2 freq_dev_angle = tf.mod(tf.angle(freq_deviation) + np.pi, 2 * np.pi) - np.pi freq_dev_angle = tf.cast(freq_dev_angle, tf.complex64) freq_dev_mag = tf.abs(freq_deviation) freq_dev_mag = tf.cast(freq_dev_mag, tf.complex64) wrapped_freq_deviation = freq_dev_mag * tf.exp(freq_dev_angle * imag_i) true_freq = frame2 + wrapped_freq_deviation phase_shift = phase_shift + delta_t * true_freq true_bins = tf.cast(tf.abs(frame2), tf.complex64) * tf.exp( tf.cast(tf.angle(phase_shift), tf.complex64) * imag_i) frames[i + 1] = true_bins return tf.stack(frames)
def spectral_loss(expected, actual, mag_weight=1.0, phase_weight=1.0): exp = tf.transpose( expected, [0, 2, 1 ]) # Place the samples in the last dimension as required by stft act = tf.transpose(actual, [0, 2, 1]) # TODO: Tunable params here (window size, window stride, window type) en = tf.random_normal(shape=tf.shape(exp), mean=0.0, stddev=0.00001, dtype=tf.float32) an = tf.random_normal(shape=tf.shape(act), mean=0.0, stddev=0.00001, dtype=tf.float32) estft = stft(exp + en, 4096, 2048, window_fn=hamming_window, pad_end=True) astft = stft(act + an, 4096, 2048, window_fn=hamming_window, pad_end=True) esm = tf.abs(estft) esp = tf.angle(estft) asm = tf.abs(astft) asp = tf.angle(astft) mag_err = tf.reduce_mean(tf.abs(esm - asm)) # Cosine-similarity. Also consider replacing tf.cos with 1-tf.sin phe = 1.0 - tf.cos(tf.abs(asp - esp)) ph_err = tf.reduce_mean(phe) loss = mag_weight * mag_err + phase_weight * ph_err loss = tf.where(tf.is_nan(loss), 0., loss) return [loss, estft, astft]
def cdense(x, dim, norm=False, actf=None, complex_activation=False, name=None, use_bias=False, training=True): indim = x.shape.as_list()[-1] with tf.name_scope(name, 'c_dense', [x, dim, norm, actf]) as scope: with tf.variable_scope(scope): w_r = tf.get_variable('kernel_r', shape=[indim, dim]) w_i = tf.get_variable('kernel_i', shape=[indim, dim]) w = tf.complex(w_r, w_i) tf.add_to_collection('kernels', w) x = tf.tensordot(x, w, [[-1], [0]]) if norm: x = tf.complex(tf.nn.softplus( tf.layers.batch_normalization(tf.abs(x), training=training)), 0.) x = x * tf.exp(tf.complex(0., tf.angle(x))) elif use_bias: b_r = tf.get_variable('bias_r', shape=[dim]) b_i = tf.get_variable('bias_i', shape=[dim]) b = tf.complex(b_r, b_i) x = x + b if actf != None: if complex_activation: x = tf.complex(actf(tf.real(x)), actf(tf.imag(x))) else: x = tf.complex(actf(tf.abs(x)), 0.) * \ tf.exp(tf.complex(0., tf.angle(x))) return x
def analysis(x, N_w, N_s, NFFT, legacy=False): ''' Polar form acoustic-domain analysis. Input/s: x - noisy speech. N_w - time-domain window length (samples). N_s - time-domain window shift (samples). NFFT - acoustic-domain DFT components. Output/s: Magnitude and phase spectrums. ''' if legacy: ## MAGNITUDE & PHASE SPECTRUMS (ACOUSTIC DOMAIN) x_DFT = tf.signal.stft(x, N_w, N_s, NFFT, pad_end=True) x_MAG = tf.abs(x_DFT) x_PHA = tf.angle(x_DFT) return x_MAG, x_PHA else: ## MAGNITUDE & PHASE SPECTRUMS (ACOUSTIC DOMAIN) W = functools.partial(window_ops.hamming_window, periodic=False) x_DFT = tf.signal.stft(x, N_w, N_s, NFFT, window_fn=W, pad_end=True) x_MAG = tf.abs(x_DFT) x_PHA = tf.angle(x_DFT) return x_MAG, x_PHA
def rmse_angle(pred_a, pred_v, label_a, label_v): angle_p = tf.angle(tf.complex(pred_a, pred_v)) angle_y = tf.angle(tf.complex(label_a, label_v)) _, rmse_v = l1diff_rms_error(pred_v, label_v) _, rmse_a = l1diff_rms_error(pred_a, label_a) _, rmse_an = l1diff_rms_error(angle_p, angle_y) return rmse_an + rmse_a, +rmse_v
def stfts_to_specgrams(self, stfts): """Converts stfts to specgrams. Args: stfts: Complex64 tensor of stft, shape [batch, time, freq, 1]. Returns: specgrams: Tensor of log magnitudes and instantaneous frequencies, shape [batch, time, freq, 2]. """ num_channels = stfts.shape[3] # STEREO if (self._channel_mode == 'stereo'): stftsL = stfts[:, :, :, 0:1] stftsR = stfts[:, :, :, 1:2] stftsL = stftsL[:, :, :, 0] stftsR = stftsR[:, :, :, 0] channels = [stftsL, stftsR] specs_dict = {} for idx, channel in enumerate(channels): logmag = self._safe_log(tf.abs(channel)) phase_angle = tf.angle(channel) if self._ifreq: p = spectral_ops.instantaneous_frequency(phase_angle) mp = tf.concat( [logmag[:, :, :, tf.newaxis], p[:, :, :, tf.newaxis]], axis=-1) else: p = phase_angle / np.pi mp = tf.concat( [logmag[:, :, :, tf.newaxis], p[:, :, :, tf.newaxis]], axis=-1) specs_dict[idx] = mp specs_concat = tf.concat((specs_dict[0], specs_dict[1]), axis=3) return specs_concat # MONO else: stfts = stfts[:, :, :, 0] logmag = self._safe_log(tf.abs(stfts)) phase_angle = tf.angle(stfts) if self._ifreq: p = spectral_ops.instantaneous_frequency(phase_angle) else: p = phase_angle / np.pi return tf.concat( [logmag[:, :, :, tf.newaxis], p[:, :, :, tf.newaxis]], axis=-1)
def spectralLoss(expected, actual, mag_weight=1.0, phase_weight=1.0): exp = tf.transpose(expected, [0, 2, 1]) # Place the samples in the last dimension as required by stft act = tf.transpose(actual, [0, 2, 1]) # TODO: Tunable params here (window size, window stride, window type) estft = stft(exp, 4096, 2048, window_fn=hamming_window, pad_end=True) astft = stft(act, 4096, 2048, window_fn=hamming_window, pad_end=True) esm = tf.abs(estft) esp = tf.angle(estft) asm = tf.abs(astft) asp = tf.angle(astft) mag_err = tf.reduce_mean(tf.abs(esm - asm)) # Cosine-similarity. Also consider replacing tf.cos with 1-tf.sin ph_err = tf.reduce_mean(1.0 - tf.cos(tf.abs(asp - esp))) return mag_weight * mag_err + phase_weight * ph_err
def get_angle(self, x): real = self.get_realpart(x) imag = self.get_imagpart(x) # ang = T.arctan2(imag,real) comp = tf.complex(real, imag) ang = tf.angle(comp) return ang
def complex_networks_forward(self, mixed_wav_batch): mixed_spec_batch = misc_utils.tf_batch_stft(mixed_wav_batch, PARAM.frame_length, PARAM.frame_step) training = (self.mode == PARAM.MODEL_TRAIN_KEY) # clip mag if PARAM.complex_clip_mag is True: mixed_mag_batch = tf.abs(mixed_spec_batch) # self.debug_mag = mixed_mag_batch mixed_angle_batch = tf.angle(mixed_spec_batch) mixed_mag_batch = tf.clip_by_value( mixed_mag_batch, 0.0, float(PARAM.complex_clip_mag_max)) mixed_spec_batch = tf.complex(mixed_mag_batch, 0.0) * tf.exp( tf.complex(0.0, mixed_angle_batch)) complex_mask = self.CCNN_CRNN_CFC(mixed_spec_batch, training) if PARAM.net_out_mask: est_clean_spec_batch = c_ops.tf_complex_multiply( complex_mask, mixed_spec_batch) # mag estimated else: est_clean_spec_batch = complex_mask _mixed_wav_len = tf.shape(mixed_wav_batch)[-1] _est_clean_wav_batch = misc_utils.tf_batch_istft( est_clean_spec_batch, PARAM.frame_length, PARAM.frame_step) est_clean_wav_batch = tf.slice( _est_clean_wav_batch, [0, 0], [-1, _mixed_wav_len ]) # if stft.pad_end=True, so est_wav may be longger than mixed. est_clean_mag_batch = tf.math.abs(est_clean_spec_batch) return est_clean_mag_batch, est_clean_spec_batch, est_clean_wav_batch
def reduce_mean_angle(weights, angles, use_complex=False, name=None): """ Computes the weighted mean of angles. Accepts option to compute use complex exponentials or real numbers. Complex number-based version is giving wrong gradients for some reason, but forward calculation is fine. See https://en.wikipedia.org/wiki/Mean_of_circular_quantities Args: weights: [BATCH_SIZE, NUM_ANGLES] angles: [NUM_ANGLES, NUM_DIHEDRALS] Returns: [BATCH_SIZE, NUM_DIHEDRALS] """ with tf.name_scope(name, 'reduce_mean_angle', [weights, angles]) as scope: weights = tf.convert_to_tensor(weights, name='weights') angles = tf.convert_to_tensor(angles, name='angles') if use_complex: # use complexed-valued exponentials for calculation cwts = tf.complex(weights, 0.) # cast to complex numbers exps = tf.exp(tf.complex(0., angles)) # convert to point on complex plane unit_coords = tf.matmul(cwts, exps) # take the weighted mixture of the unit circle coordinates return tf.angle(unit_coords, name=scope) # return angle of averaged coordinate else: # use real-numbered pairs of values sins = tf.sin(angles) coss = tf.cos(angles) y_coords = tf.matmul(weights, sins) x_coords = tf.matmul(weights, coss) return tf.atan2(y_coords, x_coords, name=scope)
def batch_time_compressedStft_mse(y1, y2, compress_idx): """ y1: complex, [batch, time, feature_dim] y2: complex, [batch, time, feature_dim] """ y1_abs_cpr = tf.pow(tf.abs(y1), compress_idx) y2_abs_cpr = tf.pow(tf.abs(y2), compress_idx) y1_angle = tf.angle(y1) y2_angle = tf.angle(y2) y1_cpr = tf.complex(y1_abs_cpr, 0.0) * tf.exp(tf.complex(0.0, y1_angle)) y2_cpr = tf.complex(y2_abs_cpr, 0.0) * tf.exp(tf.complex(0.0, y2_angle)) y1_con = tf.concat([tf.real(y1_cpr), tf.imag(y1_cpr)], -1) y2_con = tf.concat([tf.real(y2_cpr), tf.imag(y2_cpr)], -1) loss = tf.square(y1_con - y2_con) loss = tf.reduce_mean(tf.reduce_sum(loss, 0)) return loss
def wav2spec(src_dir, dst_dir): """ Converts all wav files to spectrograms. Also writes the paths to all spectrograms into a .txt :param src_dir: Path to all the wav files. :param dst_dir: Converted spectrograms will be saved here. :return: - """ g = tf.Graph() with g.as_default(): samples_pl = tf.placeholder(shape=[1,16000], dtype=tf.float32) stft = tf.contrib.signal.stft(samples_pl, 400, 160, 400) mag_graph = tf.abs(stft) l_mag_graph = tf.log(mag_graph + 0.000001) phase_graph = tf.angle(stft) disregarded_folders = ['DS_Store', '@eaDir', '.DS_Store']; sess = tf.Session() with open(dst_dir + "_list.txt", "a") as f: #iterate over all audio (wav) files: for folder in os.listdir(src_dir): if not os.path.isdir(src_dir + '/' + folder): continue print folder i = 0 if folder in disregarded_folders: continue for wav in os.listdir(src_dir + '/' + folder): if (wav in disregarded_folders): continue if not (wav.endswith(".wav")): continue path = src_dir + '/' + folder + '/' + wav if not os.path.isfile(path): continue #convert wav to spectrogram: samplerate, samples = scipy.io.wavfile.read(path) samples = samples / (max(abs(samples)) + 0.000001) samples = samples.astype(np.float32) assert samplerate == 16000 assert len(samples.shape) == 1 samples = np.reshape(samples, (1, -1)) if samples.shape != (1,16000): continue l_mag, phase = sess.run([l_mag_graph, phase_graph], feed_dict={samples_pl: samples}) phase = get_phase_difference(phase[0]) spectrogram = stack(l_mag[0], phase) #save the spectrogram: file_ending = "/" + folder + str(i).zfill(5) np.save(dst_dir + file_ending, spectrogram) f.write(dst_dir + file_ending + ".npy\n") if i % 500 == 0: print "iteration in folder: ", i i += 1
def inference(self, input, seed, amppattern): self.ranseed = seed input_shapes = input.get_shape().as_list() self.batch_size = input_shapes[0] with tf.variable_scope('inference'): complexfea, nb_filter = self.encoder_net(input) fea_dim = int(complexfea.get_shape().as_list()[-1] / 2) pro = self.ComplexProjectLayer('project', complexfea, pretrain=self.pretrain, trainable=self.trainable, use_bias=self.use_bias, type=self.prohecttype) feacomplex = tf.complex(complexfea[..., :fea_dim], complexfea[..., fea_dim:]) feaphase = tf.angle(feacomplex) feaamplitude = tf.abs(feacomplex) realfeature, realmap = self.onstreamDecoder( 'realdecoder', pro, nb_filter, fuseindex=self.fusion_index, blocktype=[1, 1, 2, 2], multifusiion=False) amplitudefea, ampmap = self.onstreamDecoder( 'ampdecoder', feaamplitude, nb_filter, fuseindex=self.fusion_index, blocktype=[1, 1, 1, 1], multifusiion=False) phasefea, phamap = self.onstreamDecoder( 'phadecoder', feaphase, nb_filter, fuseindex=self.fusion_index, blocktype=[1, 1, 1, 1], multifusiion=False) realmap = self._normlized_0to1(realmap) phamap = self._normlized_0to1(phamap) * 2 * np.pi - np.pi amppattern = tf.expand_dims(amppattern, -1) amppattern = tf.expand_dims(amppattern, 0) amppattern = tf.cast(amppattern, tf.float32) ampmap = ampmap + amppattern realfea_shape = realfeature.get_shape().as_list() complexfea = self.feaIDFT(phasefea, amplitudefea) complexfea = tf.image.resize_images( complexfea, (realfea_shape[1], realfea_shape[2])) fusionfeatures = tf.concat([realfeature, complexfea], axis=-1) nb_filter2 = int(realfea_shape[-1]) * 2 finalfea, finalmap = self.onstreamDecoder( 'finaldecoder', fusionfeatures, nb_filter2, fuseindex=self.fusion_index, blocktype=[1, 1, 2, 2], multifusiion=False) finalmap = self._normlized_0to1(finalmap) return realmap, ampmap, phamap, finalmap
def postprocessing(self): stft = tf.reshape(self.separated, [self.B * self.S, -1, self.F]) angles = tf.angle(self.stfts) repeats = [self.S, 1, 1] shape = tf.shape(angles) angles = tf.expand_dims(angles, 1) angles = tf.tile(angles, [1, self.S, 1, 1]) angles = tf.reshape(angles, shape * repeats) stft = tf.complex(stft, 0.0 * stft) * tf.exp(tf.complex(0.0, angles)) istft = tf.contrib.signal.inverse_stft( stft, frame_length=self.window_size, frame_step=self.hop_size, window_fn=tf.contrib.signal.inverse_stft_window_fn(self.hop_size)) output = tf.reshape(istft, [self.B, self.S, -1]) self.output = output tf.summary.audio(name="audio/output/reconstructed", tensor=tf.reshape(output, [-1, self.L]), sample_rate=config.fs, max_outputs=4) return output
def cardioid(x): phase = tf.angle(x) scale = 0.5 * (1 + tf.cos(phase)) output = tf.complex(tf.real(x) * scale, tf.imag(x) * scale) # output = 0.5*(1+tf.cos(phase))*z return output
def _tf_fft_process(self, tf_input): with tf.device('/gpu:0'): stft = tf.contrib.signal.stft( tf_input, frame_length=self.params.fft_length, frame_step=self.params.fft_step, pad_end=True) # [channels, frames, ffts] stft = tf.reduce_mean(stft, axis=1) # [channels, ffts] mag = tf.abs(stft) mag = tf.reduce_mean(tf.contrib.signal.frame( mag, self.params.fft_average, 1, pad_end=True, pad_value=tf.reduce_mean(mag)), axis=-1) # Spatial fft average # mag = tf.subtract(mag[0], mag[1]) phase = tf.angle(stft) phase = tf.reduce_mean(tf.contrib.signal.frame( phase, self.params.phase_smooth, 1, pad_end=True, pad_value=tf.reduce_mean(phase)), axis=-1) # Spatial angle average # phase = tf.add(phase[0], phase[1]) return mag, phase
def convert_to_spectrogram(waveforms, waveform_length, sample_rate, spectrogram_shape, overlap): def normalize(inputs, mean, stddev): return (inputs - mean) / stddev time_steps, num_freq_bins = spectrogram_shape frame_length = num_freq_bins * 2 frame_step = int((1.0 - overlap) * frame_length) num_samples = frame_step * (time_steps - 1) + frame_length # For Nsynth dataset, we are putting all padding in the front # This causes edge effects in the tail waveforms = tf.pad(waveforms, [[0, 0], [num_samples - waveform_length, 0]]) stfts = tf.signal.stft(signals=waveforms, frame_length=frame_length, frame_step=frame_step, window_fn=functools.partial(tf.signal.hann_window, periodic=True)) # discard_dc stfts = stfts[..., 1:] magnitude_spectrograms = tf.abs(stfts) phase_spectrograms = tf.angle(stfts) # this matrix can be constant by graph optimization `Constant Folding` # since there are no Tensor inputs linear_to_mel_weight_matrix = tf.signal.linear_to_mel_weight_matrix( num_mel_bins=num_freq_bins, num_spectrogram_bins=num_freq_bins, sample_rate=sample_rate, lower_edge_hertz=0.0, upper_edge_hertz=sample_rate / 2.0) mel_magnitude_spectrograms = tf.tensordot(magnitude_spectrograms, linear_to_mel_weight_matrix, axes=1) mel_magnitude_spectrograms.set_shape( magnitude_spectrograms.shape[:-1].concatenate( linear_to_mel_weight_matrix.shape[-1:])) mel_phase_spectrograms = tf.tensordot(phase_spectrograms, linear_to_mel_weight_matrix, axes=1) mel_phase_spectrograms.set_shape(phase_spectrograms.shape[:-1].concatenate( linear_to_mel_weight_matrix.shape[-1:])) log_mel_magnitude_spectrograms = tf.log(mel_magnitude_spectrograms + 1.0e-6) mel_instantaneous_frequencies = instantaneous_frequency( mel_phase_spectrograms, axis=-2) log_mel_magnitude_spectrograms = normalize(log_mel_magnitude_spectrograms, -3.76, 10.05) mel_instantaneous_frequencies = normalize(mel_instantaneous_frequencies, 0.0, 1.0) return log_mel_magnitude_spectrograms, mel_instantaneous_frequencies
def rect_to_polar(X): Z = channels_to_complex(X) R = tf.abs(Z) THETA = tf.angle(Z) if Z.shape[-1] == 1: R = tf.squeeze(R, (-1)) THETA = tf.squeeze(THETA, (-1)) return tf.stack([R, THETA], axis=-1)
def call(self, x): if self.use_magnitude: y_mag = tf.abs(x) y_phase = tf.angle(x) y_mag = self.activation(y_mag) y = tf.complex(y_mag, 0.) * tf.exp(tf.complex(0., y_phase)) else: y = tf.complex(self.activation(tf.real(x)), self.activation(tf.imag(x))) return y
def call(self, x, training=None): if self.use_magnitude: y_mag = tf.abs(x) y_phase = tf.angle(x) y_mag = self.dropout_real(y_mag) y = tf.complex(y_mag, 0.) * tf.exp(tf.complex(0., y_phase)) else: y = tf.complex(self.dropout_real(tf.real(x)), self.dropout_imag(tf.imag(x))) return y
def add_histogram(cls, name, x): if x.dtype == cls.TF_REAL: tf.summary.histogram(name, x) elif x.dtype == cls.TF_COMPLEX: with tf.name_scope(name): tf.summary.histogram('amplitude', tf.abs(x)) tf.summary.histogram('phase', tf.angle(x)) else: raise TypeError('Variable has the unsupported type {}'.format( x.dtype))
def convert_to_spectrograms(waveforms, waveform_length, sample_rate, spectrogram_shape, overlap): def normalize(inputs, mean, std): return (inputs - mean) / std # ========================================================================================= time_steps, num_freq_bins = spectrogram_shape frame_length = num_freq_bins * 2 frame_step = int((1 - overlap) * frame_length) num_samples = frame_step * (time_steps - 1) + frame_length # ========================================================================================= # For Nsynth dataset, we are putting all padding in the front # This causes edge effects in the tail waveforms = tf.pad(waveforms, [[0, 0], [num_samples - waveform_length, 0]]) # ========================================================================================= stfts = tf.signal.stft(signals=waveforms, frame_length=frame_length, frame_step=frame_step, window_fn=functools.partial(tf.signal.hann_window, periodic=True)) # ========================================================================================= # discard_dc stfts = stfts[..., 1:] # ========================================================================================= magnitude_spectrograms = tf.abs(stfts) phase_spectrograms = tf.angle(stfts) # ========================================================================================= linear_to_mel_weight_matrix = tf.signal.linear_to_mel_weight_matrix( num_mel_bins=num_freq_bins, num_spectrogram_bins=num_freq_bins, sample_rate=sample_rate, lower_edge_hertz=0, upper_edge_hertz=sample_rate / 2) mel_magnitude_spectrograms = tf.tensordot(magnitude_spectrograms, linear_to_mel_weight_matrix, axes=1) mel_magnitude_spectrograms.set_shape( magnitude_spectrograms.shape[:-1].concatenate( linear_to_mel_weight_matrix.shape[-1:])) mel_phase_spectrograms = tf.tensordot(phase_spectrograms, linear_to_mel_weight_matrix, axes=1) mel_phase_spectrograms.set_shape(phase_spectrograms.shape[:-1].concatenate( linear_to_mel_weight_matrix.shape[-1:])) # ========================================================================================= log_mel_magnitude_spectrograms = tf.log(mel_magnitude_spectrograms + 1e-6) mel_instantaneous_frequencies = instantaneous_frequency( mel_phase_spectrograms) # ========================================================================================= log_mel_magnitude_spectrograms = normalize(log_mel_magnitude_spectrograms, -4, 10) mel_instantaneous_frequencies = normalize(mel_instantaneous_frequencies, 0, 1) # ========================================================================================= return log_mel_magnitude_spectrograms, mel_instantaneous_frequencies
def sumofsq(image_in, keep_dims=False, axis=-1, name="sumofsq", type="mag"): """Compute square root of sum of squares.""" with tf.variable_scope(name): if type == "mag": image_out = tf.square(tf.abs(image_in)) else: image_out = tf.square(tf.angle(image_in)) image_out = tf.reduce_sum(image_out, keep_dims=keep_dims, axis=axis) image_out = tf.sqrt(image_out) return image_out
def __SFKernel__(self, mycount, myimage, myimage_fft_mod): myimage = 2. * (self._support * myimage) - myimage myimage = tf.ifft3d( tf.multiply( self._modulus, tf.exp( tf.complex(tf.zeros(myimage.shape), tf.angle(tf.fft3d(myimage)))))) myimage = 2. * (self._support * myimage) - myimage mycount -= 1 return mycount, myimage, myimage_fft_mod
def enhanced_sources(self): if self._enhanced_sources is None: mixed_mag_specs = tf.abs(self.mixed_specs)**0.3 masked_mag_specs = (mixed_mag_specs * tf.cast(self.prediction, tf.float32))**(1 / 0.3) self._enhanced_sources = get_sources( masked_mag_specs, tf.angle(self.mixed_specs), num_samples=self.num_audio_samples) return tf.identity(self._enhanced_sources, name='enhanced_sources')
def grinffin_lim_tf(magnitude_spec, iterations=hparams['iterations']): # magnitude_spec: [frames, fft_bins], of type tf.float32 angles = tf.cast(tf.exp(2j * np.pi * tf.cast( tf.random_uniform(tf.shape(magnitude_spec)), dtype=tf.complex64)), dtype=tf.complex64) complex_mag = tf.cast(tf.abs(magnitude_spec), tf.complex64) stft_0 = complex_mag * angles y = istft_tf(stft_0) for i in range(iterations): angles = tf.exp(1j * tf.cast(tf.angle(stft_tf(y)), tf.complex64)) y = istft_tf(complex_mag * angles) return y
def call(self, feature_sP, training): ''' return [batch, T, F]->complex ''' out = feature_sP for layer_fn in self._layers: out = layer_fn(out) # out: [batch, T, F, 2] out_complex = tf.complex(out[..., 0], out[..., 1]) out_angle = tf.angle(out_complex) normed_out = tf.exp(tf.complex(0.0, out_angle)) return normed_out
def __ERKernel__(self, mycount, myimage, myimage_fft_mod): myimage = tf.ifft3d( tf.multiply( self._modulus, tf.exp( tf.complex(tf.zeros(myimage.shape), tf.angle(tf.fft3d(myimage)))))) myimage = tf.multiply(myimage, self._support) myimage_fft_mod = tf.cast(tf.abs(tf.fft3d(myimage)), dtype=tf.complex64) mycount -= 1 return mycount, myimage, myimage_fft_mod
def __HIOKernel__(self, mycount, myimage, myimage_fft_mod): origImage = tf.identity(myimage) myimage = tf.ifft3d( tf.multiply( self._modulus, tf.exp( tf.complex(tf.zeros(myimage.shape), tf.angle(tf.fft3d(myimage)))))) myimage = tf.multiply(self._support, myimage) + tf.multiply( self._support_comp, origImage - self._beta * myimage) mycount -= 1 return mycount, myimage, myimage_fft_mod
def preprocess(x): specgram = signal.stft( x, 400, # 16000 [samples per second] * 0.025 [s] -- default stft window frame 160, # 16000 * 0.010 -- default stride ) # specgram is a complex tensor, so split it into abs and phase parts: phase = tf.angle(specgram) / np.pi # log(1 + abs) is a default transformation for energy units amp = tf.log1p(tf.abs(specgram)) x2 = tf.stack([amp, phase], axis=3) # shape is [bs, time, freq_bins, 2] x2 = tf.to_float(x2) return x2
def stfts_to_specgrams(self, stfts): """Converts stfts to specgrams. Args: stfts: Complex64 tensor of stft, shape [batch, time, freq, 1]. Returns: specgrams: Tensor of log magnitudes and instantaneous frequencies, shape [batch, time, freq, 2]. """ stfts = stfts[:, :, :, 0] logmag = self._safe_log(tf.abs(stfts)) phase_angle = tf.angle(stfts) if self._ifreq: p = spectral_ops.instantaneous_frequency(phase_angle) else: p = phase_angle / np.pi return tf.concat( [logmag[:, :, :, tf.newaxis], p[:, :, :, tf.newaxis]], axis=-1)