Example #1
0
def THDN(f0, signal, sample_rate):
    signal = signal.astype(np.double)
    signal = signal * scipy.signal.hann(len(signal))
    if not rms(signal) > 0:
        print "no signal"
        return

    #f0 Frequency to be removed from signal (Hz)
    q = math.trunc(math.sqrt(f0) / 2.)
    w0 = f0 / (sample_rate / 2.)
    b, a = scipy.signal.iirnotch(w0, q)

    filtered_signal = scipy.signal.filtfilt(b, a, signal)

    signal_original = signal

    #scipy.io.wavfile.write("filtered.wav", sample_rate, filtered_signal / max(signal))

    total_rms = rms(signal_original)
    other_rms = rms(filtered_signal)
    print "origin rms: %.2f filtered rms: %.2f" % (total_rms, other_rms)
    # thdn is (noise+harmonic) / fundamental
    thdn = other_rms / total_rms
    print "THD+N:     %.1f%% or %.1f dB" % (thdn * 100, 20 * math.log10(thdn))

    return (thdn, 0, 0)
Example #2
0
def generate(sound, sampling_rate, duration, num, logmel, output):
    """Generates num variations of specified sound and saves to disk"""
    os.makedirs(output, exist_ok=True)

    # Select generator
    if sound == 'sine':
        generator = sine
    elif sound == 'square':
        generator = square
    elif sound == 'white_noise':
        generator = white_noise
    elif sound == 'click':
        generator = click
    else:
        raise ValueError('Selected sound {} is not implemented'.format(sound))

    for i in range(num):
        # Generate
        signal = generator(sampling_rate, duration)

        # Save audio
        filename = os.path.join(output, sound + '-' + format(i, '06d'))
        scipy.io.wavfile.write(filename + '.wav', sampling_rate,
                               signal.astype(np.float32))

        if logmel:
            # Save spectrogram
            spectrogram = logmelspectrogram(signal, sampling_rate)
            np.save(filename + '.npy', spectrogram)
Example #3
0
def denoise_audio(
    signal: np.ndarray,
    rate: int,
    including_multipass: bool = True
) -> Tuple[np.ndarray, np.ndarray, np.ndarray, int, int]:
    """
    Denoises given audio signal.
    :param signal: audio signal as int16 values
    :param rate: audio sample rate in samples per second
    :param including_multipass: also use Multi-band Spectral subtraction for advanced denoising. Caution: returned audio length may then be smaller.
    :returns: denoised audio signal as int16 values
    :returns: intervals of pure noise
    :returns: start of used noise interval
    :returns: end of used noise interval
    """
    intervals = get_noise_intervals(signal, rate)
    a, b = get_largest_noise_interval(intervals)
    noisy_part = signal[a:b]
    # perform noise reduction
    reduced_noise = nr.reduce_noise(audio_clip=signal.astype(np.float16),
                                    noise_clip=noisy_part.astype(np.float16),
                                    verbose=False).astype(np.int16)
    noise_signal = signal - reduced_noise
    # Call multipass if needed
    if including_multipass:
        voice_leakage = multiband_substraction_denoise(noise_signal, rate, a,
                                                       b)
        noise_signal = noise_signal[:voice_leakage.size] - voice_leakage
    return reduced_noise, noise_signal, intervals, a, b
Example #4
0
def audioread(path, offset=0.0, duration=None, samp_rate=16000):
    signal, sr = librosa.load(path,
                              mono=False,
                              sr=samp_rate,
                              offset=offset,
                              duration=duration)
    return signal.astype(np.float32)
Example #5
0
 def generate_sine(max_amp=100,
                   noise_amp=0,
                   duration=2.0,
                   sample_rate=8000):
     """
     Generates a sinewave
     :param max_amp: max amplitude of the sinusoid
     :type max_amp: int
     :param noise_amp: max amplitude of the added noise
     :type noise_amp: int
     :param duration: duration of the signal in seconds
     :type duration: float
     :param sample_rate: sampling rate of the generated digital signal
     :type sample_rate: int
     :return: signal
     :rtype: numpy.ndarray, dtype: int
     """
     pitch = 200
     time = np.linspace(0,
                        duration,
                        int(duration * sample_rate),
                        endpoint=False)
     signal = max_amp * np.sin(2 * np.pi * pitch * time)
     signal = signal.astype(int)
     noise = np.random.normal(0, noise_amp, len(signal))
     signal += noise.astype(int)
     return signal
Example #6
0
 def mulaw_encode(self, wav, qc):
     mu = qc - 1
     wav_abs = np.minimum(np.abs(wav), 1.0)
     magnitude = np.log(1 + mu * wav_abs) / np.log(1. + mu)
     signal = np.sign(wav) * magnitude
     # Quantize signal to the specified number of levels.
     signal = (signal + 1) / 2 * mu + 0.5
     return signal.astype(np.int32)
Example #7
0
def wavread(path):
    f = wave.open(path)
    channels = f.getnchannels()
    sampwidth = f.getsampwidth()
    assert channels == 1 and sampwidth == 2
    nframes = f.getnframes()
    signal = np.fromstring(f.readframes(nframes), dtype=np.short)
    signal /= int16_ampmax
    return signal.astype(np.float32)
Example #8
0
def _deconvolve(signal, kernel):
    _h = fft(kernel)
    length = len(signal) - len(kernel) + 1
    kernel = np.hstack(
        (kernel,
         np.zeros(len(signal) - len(kernel),
                  dtype=np.float32)))  # zero pad the kernel to same length
    H = fft(kernel)
    deconvolved = ifft(
        fft(signal.astype(np.float32)) * np.conj(H) / (H * np.conj(H)))
    return deconvolved[:length]
Example #9
0
    def signal(self, normalize_signal=False):
        (_, signal) = scipy.io.wavfile.read(self.wav)
        self._signal_shape = signal.shape

        # There is an issue in numpy where np.linalg.norm can overflow for
        # 16bit ints. Thus the signal is converted to float32 if normalized
        # https://github.com/numpy/numpy/issues/6128
        if (normalize_signal):
            signal = signal.astype('float32')
            signal = signal / np.linalg.norm(signal)

        return signal
Example #10
0
 def __setitem__(self, key, value):
     rate, signal = value
     assert isinstance(rate, int), type(rate)
     assert isinstance(signal, np.ndarray), type(signal)
     wav = self.dir / f'{key}.{self.format}'
     wav.parent.mkdir(parents=True, exist_ok=True)
     if self.dtype is not None:
         signal = signal.astype(self.dtype)
     if self.format == 'wav':
         scipy_wav.write(wav, rate, signal)
     else:
         soundfile.write(str(wav), signal, rate)
     self.fscp.write(f'{key} {wav}\n')
def read_audio(filename):
	spf = wave.open(filename,'r')
	signal = spf.readframes(-1)
	signal = np.fromstring(signal, 'Int16')
	p = spf.getnchannels()
	f = spf.getframerate()
	sound_info = np.zeros(len(signal),dtype=float)
	signal = signal.astype(np.float)
	sound_info = signal/max(signal)

	#sound_info = sound_info[1:len(sound_info):2]
	if p==2:
		sound_info = scipy.signal.decimate(sound_info,2)

	return p ,f , sound_info
Example #12
0
def read_audio(filename):
    spf = wave.open(filename, 'r')
    signal = spf.readframes(-1)
    signal = np.fromstring(signal, 'Int16')
    p = spf.getnchannels()
    f = spf.getframerate()
    sound_info = np.zeros(len(signal), dtype=float)
    signal = signal.astype(np.float)
    sound_info = signal / max(signal)

    #sound_info = sound_info[1:len(sound_info):2]
    if p == 2:
        sound_info = scipy.signal.decimate(sound_info, 2)

    return p, f, sound_info
Example #13
0
def raw(signal):
    '''
    compute the raw audio signal with limited range

    Args:
        signal: the audio signal from which to compute features. Should be an
            N*1 array

    Returns:
        A numpy array of size (N by 1) containing the raw audio limited to a
        range between -1 and 1
    '''
    feat = signal.astype(numpy.float32) / numpy.max(numpy.abs(signal))

    return feat[:, numpy.newaxis]
Example #14
0
def THDNoctave(f0, signal, sample_rate):
    signal = signal.astype(np.double)
    sample_rate = float(sample_rate)
    if not rms(signal) > 0:
        print "no signal"
        return
    t = np.arange(0, len(signal) / sample_rate, 1 / sample_rate)
    T = len(signal) / sample_rate
    freq = f0
    # condition input time vector
    input_error = 0
    # add two samples to complete the last cycle.
    dtt = 1 / sample_rate
    x = signal

    # truncate extra samples, to fit in an integer number of cycles of freq
    T = math.floor(T * freq) / freq

    # resample on a linear grid:
    # t1, x1 is the new input, not including the last sample
    x = x - sum(x) / len(x)  # remove any DC offset

    N = max(1e6, len(x))  # number of samples
    dt = T / N
    t1 = np.arange(0, t[-1], dt)  # 0:dt:(T-dt);
    x1 = interp1d(t, x, kind='cubic')(t1)  #interp1(t,x,t1,'cubic');

    # compute cos-sin fourier coefficients
    w = 2 * math.pi * freq
    acs = (2 / T) * sum(
        x1 * np.cos(w * t1)) * dt  # basic frequency cos coefficient.
    bsn = (2 / T) * sum(
        x1 * np.sin(w * t1)) * dt  # basic frequency sin coefficient.
    amp = (acs**2 + bsn**2)**0.5
    ph = math.pi / 2 - np.sign(acs) * math.acos(bsn / amp)

    rms22 = (2 / T) * sum(x1**2) * dt
    THD = (rms22 / amp**2 - 1)**0.5

    # correct phase to be in the range [-pi : pi]
    if ph > math.pi:
        ph = ph - 2 * math.pi
    if ph < -math.pi:
        ph = ph + 2 * math.pi

    print "THD+N:     %.1f%% or %.1f dB" % (THD * 100, 20 * math.log10(THD))

    return THD, ph, amp
Example #15
0
def read_input(wavfile):
    """
    Reads in the wavfile as a numpy array and cleans and normalizes the signal.
    Returns the normalized signal and the sampling frequency.

    Args:
        wavfile: file path to .wav file

    Returns:
        signal: numpy array of the wavfile sampled at fs
        fs: sampling frequency
    """
    signal, fs = sf.read(wavfile)
    signal = signal.astype(np.float64)  # Cleaning
    signal = signal / np.abs(np.max(signal))  # Normalization

    return signal, fs
Example #16
0
def wf_test(signal, noise, signal_boost, npix = 400):
    
    pixel_space = ift.RGSpace([npix, npix]) 
    fourier_space = pixel_space.get_default_codomain()

    signal_field = ift.Field.from_global_data(pixel_space, signal.astype(float))
    
    HT = ift.HartleyOperator(fourier_space, target=pixel_space) 
    power_field = ift.power_analyze(HT.inverse(signal_field), binbounds=ift.PowerSpace.useful_binbounds(fourier_space, True))

    Sh = ift.create_power_operator(fourier_space, power_spectrum=power_field) 
    R = HT
 
    noise_field = ift.Field.from_global_data(pixel_space, noise.astype(float))
    noise_power_field = ift.power_analyze(HT.inverse(noise_field), binbounds=ift.PowerSpace.useful_binbounds(fourier_space, True))

    N = ift.create_power_operator(HT.domain, noise_power_field)
    N_inverse = HT@[email protected]
    
    amplify = len(signal_boost)
    
    s_data = np.zeros((amplify, npix, npix))
    m_data = np.zeros((amplify, npix, npix))
    d_data = np.zeros((amplify, npix, npix))

    for i in np.arange(amplify):
        
        data = noise_field 

        # Wiener filtering the data

        j = (R.adjoint @N_inverse.inverse)(data)
        D_inv = R.adjoint @ N_inverse.inverse @ R + Sh.inverse

        IC = ift.GradientNormController(iteration_limit=500, tol_abs_gradnorm=1e-3)
        D = ift.InversionEnabler(D_inv, IC, approximation=Sh.inverse).inverse
        m = D(j)

        s_data[i,:,:] = (signal_field * signal_boost[i]).to_global_data()
        m_data[i,:,:] = HT(m).to_global_data()
        d_data[i,:,:] = data.to_global_data()

    return (s_data, m_data, d_data)
Example #17
0
def F0_detection_wav(wav_path, signal, args):
    """F0_detection_wav."""
    f0_max = 1100.0
    f0_min = 50.0
    frame_shift = 30 / 1000

    if wav_path is not None:
        signal, osr = librosa.load(wav_path, sr=None)
    else:
        osr = args.sampling_rate
    seg_signal = signal.astype("double")
    _f0, t = pw.harvest(
        seg_signal,
        osr,
        f0_floor=f0_min,
        f0_ceil=f0_max,
        frame_period=frame_shift * 1000,
    )
    _f0 = pw.stonemask(seg_signal, _f0, t, osr)

    return _f0
Example #18
0
    def process(self, signal):
        # ensure the signal is correct
        if signal.nchannels != 1:
            raise ValueError(
                'signal must have one dimension, but it has {}'.format(
                    signal.nchannels))

        if self.sample_rate != signal.sample_rate:
            raise ValueError('processor and signal mismatch in sample rates: '
                             '{} != {}'.format(self.sample_rate,
                                               signal.sample_rate))

        # force the signal to be int16
        signal = signal.astype(np.int16)

        # extract the features
        data = self._rastaplp(signal)

        return Features(data.T.astype(np.float32),
                        self.times(data.T.shape[0]),
                        properties=self.get_properties())
Example #19
0
    def _compute(self, signal, vtln_warp):
        """Reimplementation of Kaldi OfflineFeatureTpl::Compute

        From src/feat/feature-common.h. Reimplementation needed to integrate
        Rasta filtering.

        """
        rows_out = kaldi.feat.window.num_frames(signal.nsamples,
                                                self._frame_options)
        cols_out = self.ndims

        if rows_out == 0:  # pragma: nocover
            return np.zeros((0, 0))

        # force the input signal to be 16 bits integers
        signal = kaldi.matrix.SubVector(signal.astype(np.int16).data)

        # allocate the output data
        output = kaldi.matrix.Matrix(rows_out, cols_out)

        # windowed waveform and windowing function
        window = kaldi.matrix.Vector()
        window_function = kaldi.feat.window.FeatureWindowFunction.from_options(
            self._frame_options)

        # for each frame
        for row in range(rows_out):
            # extract its window and its log energy...
            raw_log_energy = _extract_window(
                0, signal, row, self._frame_options, window_function, window,
                self.use_energy and self.raw_energy)

            # ... and extract PLP with optional Rasta filtering
            self._compute_frame(raw_log_energy, vtln_warp, window,
                                output[row, :])

        return output.numpy()
Example #20
0
    def decompress(self, f, audio_data):
        print('Decompressing (STFT)...')

        data = f.read(5)
        if data != b'ANMFS':
            raise Exception('Invalid file format. Expected .anmfs.')
        channel_count, sample_rate = struct.unpack('<HI', f.read(6))
        audio_data.sample_rate = sample_rate

        for i in range(channel_count):
            channel = Channel()

            # read magnitude matrix chunk count
            chunk_count = struct.unpack('<I', f.read(4))[0]

            # read phases matrix
            Prows, Plen = struct.unpack('<II', f.read(8))
            Pbytes = f.read(Plen)

            # Huffman decode the matrix to gain quantized values
            Pq = self.Phuffman.decode_int_matrix(Pbytes, Prows)

            # multiply each value by step to gain original values
            phases = self.Pdequantize_vec(Pq)

            # read and multiply NMF chunks to obtain magnitude matrix
            chunks = list()

            for _ in range(chunk_count):
                # read minimum value
                min_val = struct.unpack('<d', f.read(8))[0]

                # read min and max for re-scaling
                matrix_min, matrix_max = struct.unpack('<dd', f.read(16))

                # read companded scaled matrix W
                Wscs = deserialize_matrix(f, 'I')

                # read Huffman encoded matrix H
                Hrows, Hlen = struct.unpack('<II', f.read(8))
                Hbytes = f.read(Hlen)

                # scale matrix W back
                Wsc = self.scale_matrix(Wscs, 0, 2**32, 0, 1)

                # Huffman decode the matrix to gain quantized values
                Hscq = self.Hhuffman.decode_int_matrix(Hbytes, Hrows)

                # multiply each value by step to gain original values
                Hsc = self.Hdequantize_vec(Hscq)

                # expand the scaled matrices using mu-law
                Ws = self.expand(Wsc, self.MU_LAW_W)
                Hs = self.expand(Hsc, self.MU_LAW_H)

                # scale matrices back to normal
                W = self.scale_matrix(Ws, 0, 1, matrix_min, matrix_max)
                H = self.scale_matrix(Hs, 0, 1, matrix_min, matrix_max)

                # get original chunk back
                mag_chunk = nmf_matrix_original(W, H, min_val)

                # append it to the list
                chunks.append(mag_chunk)

            # concatenate magnitude matrix chunks
            magnitudes = numpy.concatenate(chunks)

            # join matrices back into the original STFT matrix
            stft = magnitudes * numpy.cos(
                phases) + 1j * magnitudes * numpy.sin(phases)

            # transpose back into original form
            stft = numpy.transpose(stft)

            # run inverse STFT
            signal = scipy.signal.istft(
                stft,
                fs=audio_data.sample_rate,
                window='hann',
                noverlap=self.FRAME_SIZE // 2,
                nperseg=self.FRAME_SIZE,
            )[1]

            # convert back to 16-bit signed
            signal = signal.astype(numpy.int16)

            # add samples to channel and finalize
            channel.add_sample_array(signal)
            audio_data.add_channel(channel)
Example #21
0
def rms(signal):
    signal = signal.astype('float64')
    return np.mean((signal * signal))**0.5
def encode(signal):
    return signal.astype(np.float32).tostring()
Example #23
0
def _signal_arma_burg(signal, order=16, criteria="KIC", corrected=True):

    # Sanitize order and signal
    if order <= 0.0:
        raise ValueError("Order must be > 0")
    if order > len(signal):
        raise ValueError("Order must be less than length signal minus 2")
    if not isinstance(signal, np.ndarray):
        signal = np.array(signal)

    N = len(signal)

    # Initialisation
    # rho is variance of driving white noise process (prediction error)
    rho = sum(abs(signal)**2.0) / float(N)
    denominator = rho * 2.0 * N

    ar = np.zeros(0, dtype=complex)  # AR parametric signal model estimate
    ref = np.zeros(
        0, dtype=complex
    )  # vector K of reflection coefficients (parcor coefficients)
    ef = signal.astype(complex)  # forward prediction error
    eb = signal.astype(complex)  # backward prediction error
    temp = 1.0

    # Main recursion

    for k in range(0, order):

        # calculate the next order reflection coefficient
        numerator = sum(
            [ef[j] * eb[j - 1].conjugate() for j in range(k + 1, N)])
        denominator = temp * denominator - abs(ef[k])**2 - abs(eb[N - 1])**2
        kp = -2.0 * numerator / denominator

        # Update the prediction error
        temp = 1.0 - abs(kp)**2.0
        new_rho = temp * rho

        if criteria is not None:
            # k=k+1 because order goes from 1 to P whereas k starts at 0.
            residual_new = _criteria(criteria=criteria,
                                     N=N,
                                     k=k + 1,
                                     rho=new_rho,
                                     corrected=corrected)
            if k == 0:
                residual_old = 2.0 * abs(residual_new)

            # Stop as criteria has reached
            if residual_new > residual_old:
                break

            # This should be after the criteria
            residual_old = residual_new
        rho = new_rho
        if rho <= 0:
            raise ValueError(
                "Found a negative value (expected positive strictly) %s."
                "Decrease the order" % rho)

        ar = np.resize(ar, ar.size + 1)
        ar[k] = kp
        if k == 0:
            for j in range(N - 1, k, -1):
                ef_previous = ef[j]  # previous value
                ef[j] = ef_previous + kp * eb[j - 1]  # Eq. (8.7)
                eb[j] = eb[j - 1] + kp.conjugate() * ef_previous

        else:
            # Update the AR coeff
            khalf = (k + 1) // 2  # khalf must be an integer
            for j in range(0, khalf):
                ar_previous = ar[j]  # previous value
                ar[j] = ar_previous + kp * ar[k - j -
                                              1].conjugate()  # Eq. (8.2)
                if j != k - j - 1:
                    ar[k - j -
                       1] = ar[k - j -
                               1] + kp * ar_previous.conjugate()  # Eq. (8.2)

            # Update the forward and backward prediction errors
            for j in range(N - 1, k, -1):
                ef_previous = ef[j]  # previous value
                ef[j] = ef_previous + kp * eb[j - 1]  # Eq. (8.7)
                eb[j] = eb[j - 1] + kp.conjugate() * ef_previous

        # save the reflection coefficient
        ref = np.resize(ref, ref.size + 1)
        ref[k] = kp

    return ar, rho, ref
Example #24
0
def encode(signal):
    return signal.astype(np.float32).tostring()