def synthesis(f0, spectrogram, aperiodicity, fs, frame_period=DEFAULT_FRAME_PERIOD): '''WORLD synthesis Args: f0 (ndarray(dtype=double, ndim=1)): F0 spectrogram (ndarray(dtype=double, ndim=2)): Spectral envelope aperiodicity (ndarray(dtype=double, ndim=2)): Aperiodicity fs (int): Sampling frequency [Hz] frame_period (double, optional): Frame shift [ms] Returns: ndarray(dtype=double, ndim=1): Output waveform ''' f0_length = len(f0) y_length = int(f0_length * 5.0 * fs // 1000) + 1 y = numpy.zeros(y_length, dtype=numpy.dtype('float64')) fft_size = (spectrogram.shape[1] - 1) * 2 like_2d_array_sp = utils.get_2d_pointer(spectrogram) like_2d_array_ap = utils.get_2d_pointer(aperiodicity) apidefinitions._Synthesis(f0, f0_length, like_2d_array_sp, like_2d_array_ap, fft_size, frame_period, fs, y_length, y) return y
def decode_aperiodicity(coded_aperiodicity, fs, fft_size): '''Restore compressed aperiodicity's dimension Args: coded_aperiodicity (ndarray(dtype=double, ndim=2)): Coded aperiodicity fs (int): Sampling frequency [Hz] fft_size (int): FFT size Returns: ndarray(dtype=double, ndim=2): Aperiodicity ''' f0_length = coded_aperiodicity.shape[0] aperiodicity = numpy.zeros((f0_length, fft_size // 2 + 1), dtype=numpy.float64) like_2d_array = utils.get_2d_pointer(aperiodicity) like_2d_array_coded = utils.get_2d_pointer(code_aperiodicity) apidefinitions._DecodeAperiodicity(coded_aperiodicity, f0_length, fs, fft_size, aperiodicity) return aperiodicity
def code_aperiodicity(aperiodicity, fs): '''Compress aperiodicity's dimension Args: aperiodicity (ndarray(dtype=double, ndim=2)): Aperiodicity fs (int): Sampling frequency [Hz] Returns: ndarray(dtype=double, ndim=2): compressed aperiodicity ''' f0_length = aperiodicity.shape[0] fft_size = (aperiodicity.shape[1] - 1) * 2 coded_aperiodicity = numpy.zeros((f0_length, get_codec_aperiodicity_num(fs)), dtype=numpy.float64) like_2d_array_coded = utils.get_2d_pointer(code_aperiodicity) like_2d_array = utils.get_2d_pointer(aperiodicity) apidefinitions._CodeAperiodicity(like_2d_array, f0_length, fs, fft_size, like_2d_array_coded) return code_aperiodicity
def decode_spectral_envelope(coded_spectral_envelope, fs, fft_size): '''Restore compressed spectral envelope's dimension Args: coded_spectral_envelope (ndarray(dtype=double, ndim=2)): Coded spectral envelope fs (int): Sampling frequency [Hz] fft_size (int): FFT size Returns: ndarray(dtype=double, ndim=2): Spectral envelope ''' f0_length = coded_spectral_envelope.shape[0] number_of_dimensions = coded_spectral_envelope.shape[1] spectrogram = numpy.zeros((f0_length, fft_size // 2 + 1), dtype=numpy.float64) like_2d_array = utils.get_2d_pointer(spectrogram) like_2d_array_coded = utils.get_2d_pointer(code_spectral_envelope) apidefinitions._DecodeSpectralEnvelope(like_2d_array_coded, f0_length, fs, fft_size, number_of_dimensions, like_2d_array) return spectrogram
def code_spectral_envelope(spectrogram, fs, number_of_dimensions): '''Compress spectral envelope's dimension Args: spectrogram (ndarray(dtype=double, ndim=2)): Spectral envelope fs (int): Sampling frequency [Hz] number_of_dimensions (int): Number of compressed spectral envelope's dimension Returns: ndarray(dtype=double, ndim=2): compressed spectral envelope ''' f0_length = spectrogram.shape[0] fft_size = (spectrogram.shape[1] - 1) * 2 coded_spectrogram = numpy.zeros((f0_length, number_of_dimensions), dtype=numpy.float64) like_2d_array_coded = utils.get_2d_pointer(coded_spectrogram) like_2d_array = utils.get_2d_pointer(spectrogram) apidefinitions._CodeSpectralEnvelope(like_2d_array, f0_length, fs, fft_size, number_of_dimensions, like_2d_array_coded) return code_spectral_envelope
def d4c(x, fs, temporal_positions, f0, threshold=0.85, f0_floor=DEFAULT_F0_FLOOR, fft_size=None, **dummy): '''Aperiodicity estimation by D4C Args: x (ndarray(dtype=double, ndim=1)): Input waveform fs (int): a temporal_positions (ndarray(dtype=double, ndim=1)): Temporal positions f0 (ndarray(dtype=double, ndim=1)): Extracted F0 threshold (double, optional): To determine frame is unvoice or not (high value is tending to regard as unvoiced) f0_floor (double, optional): To determine fft_size fft_size (double, optional): When fft_size is not set, determined by fs and default f0_floor Returns: ndarray(dtype=double, ndim=2): Aperiodicity ''' option = structures.D4COption() apidefinitions._InitializeD4COption(option) option.threshold = threshold if fft_size is None: tmp_fft_size = get_fft_size(fs, f0_floor) else: tmp_fft_size = fft_size f0_length = len(f0) aperiodicity = numpy.zeros((f0_length, tmp_fft_size // 2 + 1), dtype=numpy.float64) like_2d_array = utils.get_2d_pointer(aperiodicity) apidefinitions._D4C(x, len(x), fs, temporal_positions, f0, f0_length, tmp_fft_size, option, like_2d_array) return aperiodicity
def cheap_trick(x, fs, temporal_positions, f0, q1=-0.15, f0_floor=DEFAULT_F0_FLOOR, fft_size=None, **dummy): '''Spectral envelope estimation by CheapTrick Args: x (ndarray(dtype=double, ndim=1)): Input waveform fs (int): Sampling frequency [Hz] (To determine fft_size) temporal_positions (ndarray(dtype=double, ndim=1)): Temporal positions f0 (ndarray(dtype=double, ndim=1)): Extracted F0 q1 (double, optional): Spectral recovery parameter (should not change) f0_floor (double, optional): To determine fft_size fft_size (int, optional): When fft_size is not set, determined by fs and default f0_floor Returns: ndarray(dtype=double, ndim=2): Spectral envelope ''' option = structures.CheapTrickOption() apidefinitions._InitializeCheapTrickOption(fs, option) option.q1 = q1 if fft_size is None: option.f0_floor = f0_floor option.fft_size = apidefinitions._GetFFTSizeForCheapTrick(fs, option) else: option.fft_size = fft_size f0_length = len(f0) spectrogram = numpy.zeros((f0_length, option.fft_size // 2 + 1), dtype=numpy.float64) like_2d_array = utils.get_2d_pointer(spectrogram) apidefinitions._CheapTrick(x, len(x), fs, temporal_positions, f0, f0_length, option, like_2d_array) return spectrogram