Example #1
2
def problem4():
	# read in tada.wav
	rate, tada = wavfile.read('tada.wav')
	
	# upon inspection, we find that tada.wav is a stereo audio file. 
	# we create stereo white noise that lasts 10 seconds
	L_white = sp.int16(sp.random.randint(-32767,32767,rate*10))
	R_white = sp.int16(sp.random.randint(-32767,32767,rate*10))
	white = sp.zeros((len(L_white),2))
	white[:,0] = L_white
	white[:,1] = R_white
	
	# pad tada signal with zeros
	padded_tada = sp.zeros_like(white)
	padded_tada[:len(tada)] = tada
	ptada = padded_tada
	
	# fourier transforms
	ftada = sp.fft(ptada,axis=0)
	fwhite = sp.fft(white,axis=0)
	
	# inverse transform of convolution
	out = sp.ifft((ftada*fwhite),axis=0)
	
	# prepping output and writing file
	out = sp.real(out)
	scaled = sp.int16(out / sp.absolute(out).max() * 32767)
	wavfile.write('my_tada_conv.wav',rate,scaled)
Example #2
0
def generate_audio(ifilename, ofilename, buffer_size, hop, oracle, seq_len, p, k):
    fs, x = wavfile.read(ifilename)
    xmat = []
    for i in range(0, len(x), hop):
        new_mat = np.array(x[i:i+buffer_size]) # try changing array type?
        xmat.append(new_mat)
    xmat = np.array(xmat)

    s, kend, ktrace = generate(oracle, seq_len, p, k) 
    xnewmat = xmat[:, s]

    framelen = len(xnewmat[0])
    nframes = len(xnewmat)

    wsum = np.zeros(((nframes-1) * hop + framelen, 2)) 

    win = make_win(framelen)

    x = np.zeros(((nframes-1) * hop + framelen, 2)) 
    win_pos = range(0, len(x), hop)
    for i in range(0, nframes):
        # this is the overlap add sec
        win = make_win(len(xnewmat[i]))
        x[win_pos[i]:win_pos[i]+len(xnewmat[i])] = x[win_pos[i]:win_pos[i]+len(xnewmat[i])] + xnewmat[i] * win
        wsum[win_pos[i]:win_pos[i]+len(xnewmat[i])] = wsum[win_pos[i]:win_pos[i]+len(xnewmat[i])] + win 
    x[hop:-hop] = x[hop:-hop] / wsum[hop:-hop]
    x = np.array(x, dtype=np.int32)
    wavfile.write(ofilename, fs, x)
    return x, wsum
Example #3
0
def play2(filename):
    fs, data = wavfile.read(filename)
    spd2 = speedx(data, 4)

    # scaled = np.int16(data/np.max(np.abs(data)) * 32767)
    wavfile.write('test.wav', len(data), data)
    wavfile.write('spd2.wav', len(spd2), spd2)
Example #4
0
def generate_bit(name):
    offset = 240
    l = 96
    count = 2

    sample = numpy.zeros(3 * l)
    sample[l] = 1
    sample[2 * l] = -1

    # Apply the data-shaping filter
    sf = rds.pulse_shaping_filter(96 * 8, 228000)
    shapedSamples = numpy.convolve(sample, sf)

    out = shapedSamples[528 - 288 : 528 + 288]  # [offset:offset+l*count]
    # plt.plot(sf)
    # plt.plot(out)
    # plt.show()

    iout = (out * 20000.0 / max(abs(out))).astype(numpy.dtype(">i2"))
    wavfile.write(u"waveform_{}.wav".format(name), sample_rate, iout)

    outc.write(
        u"float waveform_{name}[] = {{{values}}};\n\n".format(name=name, values=u", ".join(map(unicode, out / 2.5)))
    )
    # note: need to limit the amplitude so as not to saturate when the biphase
    # waveforms are summed

    outh.write(u"extern float waveform_{name}[{size}];\n".format(name=name, size=len(out)))
Example #5
0
def save_wavfile(signal, file_prefix, rate=16000):
    num_samples = signal.shape[0]
    time_length = signal.shape[1]

    for s in xrange(num_samples):
        file_path = file_prefix+'_{}.wav'.format(s)
        wav.write(file_path, rate, signal[s][:])
    def convert_gen_to_out(data_dir):

        gen_dir = os.path.join(data_dir, 'gen')
        out_dir = os.path.join(data_dir, 'out')

        datatools.ensure_dir_exists(gen_dir)
        datatools.ensure_dir_exists(out_dir)

        gen_glob_file_path = os.path.join(gen_dir, '*.npy')
        write_flush('-- Converting gen to out...')

        for npy_data_file in glob.glob(gen_glob_file_path):
            blocks = []
            filename = npy_data_file.split('/')[-1]
            wav_filename = os.path.join(out_dir, filename.replace('.npy','.wav'))

            data_as_fft = np.load(npy_data_file)

            for fft_block in data_as_fft:
                real_imag_split = fft_block.shape[0] // 2
                real = fft_block[0:real_imag_split]
                imag = fft_block[real_imag_split:]
                time_domain = np.fft.ifft(real + 1.0j * imag)
                blocks.append(time_domain)

            song_blocks = np.concatenate(blocks)
            song_blocks = song_blocks * 32767.0
            song_blocks = song_blocks.astype('int16')
            wavfile.write(wav_filename, 44100, song_blocks)
            write_flush('finished.      \n')
Example #7
0
    def write_wav(self):
        """
        Synthesizes the analysis to a WAV file in the synthesis directory
        """
        print("Performing Resynthesis...")
        progress = ProgressBar(
                widgets=[Percentage(), Bar()],
                maxval=self.num_samples
                ).start()


        sines = []
        for i in range(len(self.bins)):
            data = [sin(2 * pi * self.bins[i][0] * (x / self.frate))
                    for x in range(self.num_samples)]
            for s in data:
                s = s * self.bins[i][1]
            sines.append(data)
        samples = []
        for i in range(len(sines[0])):
            s = 0
            for j in range(len(sines)):
                s += sines[j][i]
            samples.append(s)
            progress.update(i+1)
        samples = scale(samples, -1.0, 1.0)
        wavfile.write(
                "../synthesis/" + self.wav_name + "_resynth.wav",
                self.sample_rate,
                array(samples)
                )
        progress.finish()
Example #8
0
def _check_roundtrip(realfile, rate, dtype, channels):
    if realfile:
        fd, tmpfile = tempfile.mkstemp(suffix='.wav')
        os.close(fd)
    else:
        tmpfile = BytesIO()
    try:
        data = np.random.rand(100, channels)
        if channels == 1:
            data = data[:,0]
        if dtype.kind == 'f':
            # The range of the float type should be in [-1, 1]
            data = data.astype(dtype)
        else:
            data = (data*128).astype(dtype)

        wavfile.write(tmpfile, rate, data)

        for mmap in [False, True]:
            rate2, data2 = wavfile.read(tmpfile, mmap=mmap)

            assert_equal(rate, rate2)
            assert_(data2.dtype.byteorder in ('<', '=', '|'), msg=data2.dtype)
            assert_array_equal(data, data2)

            del data2
    finally:
        if realfile:
            os.unlink(tmpfile)
Example #9
0
def generate(freq, length, amp, samprate, frames, comptype, compname):
   """ generates sine wave with the specified parameters """
   
   # creates numpy array of values equally spaced from 0 to length.
   # how many values in array specified by frames
   values = linspace(0,length,frames)
   
   # takes values and multiplies each values by 2pi * freq
   # Then takes the sin of each of those values
   # finally multiplies each value by the amplitude
   wave = sin(2*pi*freq*values)*amp
   
   # makes each value a 16 bit integer
   # this array will be written into the wav file later
   tone = wave.astype(int16) 
   
   # requests a filename with .wav suffix from user
   filename = raw_input("Enter filename (needs .wav suffix): ")
   
   # makes sure .wav suffix was given
   try:
      filename[-4:] == '.wav'
   except:
      print 'ERROR: Does not have proper .wav suffix'
      generate(freq, length, amp, samprate, frames, comptype, compname)
   
   # check if file with this name already exists
   # if it does, enter another name
   # if it does not, sine wave is generated
   if os.path.exists(filename):
      print "File already exists."
      generate(freq, length, amp, samprate, frames, comptype, compname)
   else:
      write(filename, samprate, tone)
Example #10
0
def datawrite(filename,rate,data):
    try:
        write(filename, rate, data)
    except IOError:
        print("IOError:Wrong file or file path")
        #TODO we will trace back and add codes for the exit code
        sys.exit()
Example #11
0
def writeWaveFile(data, fileName, SRate=44100.0, normalize=False, removeDcWhenNormalizing=True):
    """ 
	write an array of floats to a 16 bit wave file 
	@param data a list of lists or numpy array containing the frame data
	@param fileName the output file name
	@param SRate the sampling frequency [Hz]
	@param normalize if the parameter normalize is set to True, the signal 
		will be normalized to the maximally possible value (i.e. 1). if no
		normalization is performed, and if the input signal has a maximum 
		absolute ampitude greater than 1 (i.e. if the output would be clipped),
		the function throws an error. 
	@param removeDcWhenNormalizing if we're normalizing, this determines whether
		we should remove the DC offset before doing so.
	@return nothing 
	"""

    if not type(data).__name__ in ["list", "ndarray"]:
        raise Exception("expected a list data type")
    numChannels = 1
    valMin, valMax = None, None
    dataTmp = None
    dataType = type(data[0]).__name__
    if dataType in ["list", "ndarray"]:
        numChannels = len(data)
        n = len(data[0])
        dataTmp = numpy.zeros((n, numChannels))
        for chIdx in range(numChannels):
            dataTmp2 = None
            dType2 = type(data[chIdx]).__name__
            if dType2 == "ndarray":
                dataTmp2 = data[chIdx]
            elif dType2 == "list":
                dataTmp2 = numpy.array(data[chIdx], dtype=float32)
            else:
                raise Exception("channel data is not a list or a numpy array")
            dataTmp[0:, chIdx] = dataTmp2
            del dataTmp2
    else:
        # this is a mono file
        # force creating a copy, to avoid scaling the original data...
        dataTmp = numpy.array(data)

        # normalize
    if normalize:
        if removeDcWhenNormalizing:
            dataTmp -= dataTmp.mean()
        valMin = dataTmp.min()
        valMax = dataTmp.max()
        absMax = abs(valMin)
        if abs(valMax) > absMax:
            absMax = abs(valMax)
        dataTmp /= absMax * 1.000001

        # save
        # print dataTmp.dtype, dataTmp.shape
    dataTmp *= float(2 ** 15 - 1)
    dataTmp2 = numpy.asarray(dataTmp, dtype=numpy.int16)
    sciWav.write(fileName, SRate, dataTmp2)
    del dataTmp, dataTmp2
    gc.collect()
Example #12
0
def main():
    fs, data = wavfile.read('Music.wav') # load the data
    audio = data.T[0] # un flux de la stereo

    Xf = zTransform(audio, 44100)


    b, a = peaking(100,10,44100,15)
    b1, a1 = peaking(1000,15,44100,15)
##    w,h = freqz(b, a, worN = 44100)
##    plt.figure()
##    plt.plot(w, 20 * np.log10(np.abs(h)))
##    plt.show()


##    Y1 = lfilter(b,a,Xf)
##    Y = lfilter(b1,a1,Y1)

    Y = lfilter(b,a, Xf)



    finalAudio = zTransformInverse(Y, 44100).real.astype(np.int16)

    wavfile.write("Test.wav",fs,finalAudio)
Example #13
0
def test():
    rate, data = wavfile.read('/Users/hehehehehe/Desktop/workspace/final/data/musicvideo/musicvideo.wav')
    filtereddata = numpy.fft.rfft(data, axis=0)
    print (data)
    filteredwrite = numpy.fft.irfft(filtereddata, axis=0)
    print (filteredwrite)
    wavfile.write('TestFiltered.wav', rate, filteredwrite)
Example #14
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('output')
    args = parser.parse_args()

    data = []
    data.extend(gen_single(0, 2))
    scale = np.array([-9, -7, -5, -4, -2, 0, 2, 3])
    freq = 880 * np.power(2, scale / 12.0)
    print map(int, freq)
    def gen_song():
        song = [1, 5, 6, 5, 4, 3, 2, 1]
        song = [freq[i - 1] for i in song]
        print map(int, song)
        for f in song:
            data.extend(gen_single(f, 0.5) * np.log(f / 440))
    def gen_chord():
        chord = [1, 3, 5]
        chord = [freq[i - 1] for i in chord]
        print map(int, chord)
        val = gen_single(chord[0], 4)
        for i in chord[1:]:
            val += gen_single(i, 4)
        data.extend(val)
    gen_song()
    #gen_chord()
    data /= np.max(np.abs(data))
    data = np.array(data) * 32767
    data = data.astype('int16')
    wavfile.write(args.output, SAMPLE_RATE, data)
def main(argv):
    if len(argv) == 6:
        window_size = int(argv[1])
        a_fname = argv[2]
        b_fname = argv[3]
        a_phase_b_mag_fname = argv[4]
        b_phase_a_mag_fname = argv[5]
    else:
        print(
            'Usage: %s ' % argv[0] +
            '<WINDOW_WIDTH> <FILE_1> <FILE_2> <OUTFILE_1> <OUTFILE_2>' + (
                '\n\nSwap magnitude and phase of WAV files FILE_1 and FILE_2.'
                '\n\nWINDOW_WIDTH: STFT frame length (integer # of samples)'
                '\nOUTFILE_1: phase of FILE_1, magnitude of FILE_2'
                '\nOUTFILE_2: phase of FILE_2, magnitude of FILE_1'))
        return 1

    a_rate, a = wavfile.read(a_fname)
    b_rate, b = wavfile.read(b_fname)
    assert a_rate == b_rate
    assert a.dtype == b.dtype

    print('Window width: %d samples = %.3f ms' %
          (window_size, 1e3 * window_size / a_rate))

    a_phase_b_mag, b_phase_a_mag = swap_wav_magnitude(a, b, window_size)

    wavfile.write(a_phase_b_mag_fname, a_rate, a_phase_b_mag)
    wavfile.write(b_phase_a_mag_fname, a_rate, b_phase_a_mag)

    return 0
  def energyDenoise(self, audio_file, scale, denoised_audio_file, energy_denoising_debug):
    if not os.path.isfile(audio_file):
        return False
    samp_freq, signal = wavfile.read(audio_file)
    samples = signal.shape[0]
    sq_signal = signal * 1.0

    if energy_denoising_debug:
      timearray = arange(0, samples*1.0, 1)
      timearray /= samp_freq
      timearray *= 1000.0
      subplot(3,1,1)
      plot(timearray, signal, color = 'k')

    for i in range(0, len(sq_signal)):
      sq_signal[i] *= sq_signal[i]
    mean_sq = mean(sq_signal)

    for i in range(0, len(sq_signal)):
      if sq_signal[i] < scale * mean_sq:
        signal[i] = 0

    if energy_denoising_debug:
      timearray = arange(0, samples*1.0, 1)
      timearray /= samp_freq
      timearray *= 1000.0
      subplot(3,1,2)
      plot(timearray, signal, color = 'k')

    if energy_denoising_debug:
      show()

    wavfile.write(denoised_audio_file, samp_freq, signal)

    return True
Example #17
0
def wavwrite(data, fs, nbits, fname):
    """
    Write a numpy array as a WAV file.
    
    Parameters
    ----------
    data : array of floats
           The data to be written to the WAV file.
    fs : int
         Sampling frequency of the sound.
    nbits : int
         Bit depth of the WAV file (currently only values of 16 and 32 are supported)
    fname : string
            Name of the WAV file.

    Examples
    --------
    >>> wavwrite(data, 48000, 32, "file.wav")
    """
    if nbits not in [16, 32]:
        print("Sorry can only write 16 or 32 bits at the moment! Exiting")
        return

    if nbits == 16:
        data = data*(2.**15)
        data = data.astype(int16)
    elif nbits == 32:
        data = data*(2.**31)
        data = data.astype(int32)

    wavfile.write(fname, fs, data)

    return
Example #18
0
def write_wave_file(signal, filename, sample_rate=None):
    """
    Write the signal to disk as a .wav file.

    Parameters
    ----------
    signal : numpy array or Signal
        The signal to be written to file.
    filename : str
        Name of the file.
    sample_rate : int, optional
        Sample rate of the signal [Hz].

    Returns
    -------
    filename : str
        Name of the file.

    Notes
    -----
    `sample_rate` can be 'None' if `signal` is a :class:`Signal` instance. If
    set, the given `sample_rate` is used instead of the signal's sample rate.
    Must be given if `signal` is a ndarray.

    """
    from scipy.io import wavfile
    if isinstance(signal, Signal) and sample_rate is None:
        sample_rate = int(signal.sample_rate)
    wavfile.write(filename, rate=sample_rate, data=signal)
    return filename
Example #19
0
	def volumeshift(self, filename, factor):
		#Increases or decreases the volume of the array.
		factor = 10**(factor/20.0)
		fs, data = wavfile.read(filename)
		data = np.multiply(data, factor)
		print factor
		wavfile.write("final_form", fs, data)
Example #20
0
    def generate(self, seed=None, minutes=0.5):
        print "Generating module"
        timestep = self.time_dim*(self.sequence_dim-1)
        samples = minutes*self.samplerate*60
        song = np.zeros(samples, dtype=np.float32)

        if seed is None :
            datafile = self.get_datafile()
            seed = datafile.get_data(None, range(timestep))
            seed = seed[0].flatten()

        song[:timestep] = seed

        print
        for i in range(0, len(song)-self.time_dim-timestep, self.time_dim) :
            sys.stdout.write('\rGenerating %d/%d samples'%(i, samples))
            sys.stdout.flush()

            params = self.fprop(song[i:i+timestep].reshape(
                (self.batch_dim, self.sequence_dim-1, self.time_dim)))
            try :
                song[i+timestep:i+timestep+self.time_dim] = self.sample_from_gmm(params)
            except ValueError :
                import ipdb ; ipdb.set_trace()

        write(EXP_PATH+"generation.wav", self.samplerate, song)
Example #21
0
def wavwriteStereo(yLeft, yRight, fs, filename, inputSound):
    """
    Write a stereo sound file from 2 arrays with the channels sounds and the sampling rate
    yLeft: floating point array of one dimension,
    yRight: floating point array of one dimension,
    fs: sampling rate
    filename: name of file to create
    inputSound: original sound, used for auto-attenuation of the output sound
    """
    if yLeft.size != yRight.size: raise ValueError('wavwriteStereo: Left and Right sound samples input arrays have different sizes')
    yMaxMagnitude = max(abs(max(yLeft)),abs(max(yRight)),abs(min(yLeft)),abs(min(yRight)))
    inputMaxMagnitude = max(abs(max(inputSound)),abs(min(inputSound)))
    attenuationRatio = inputMaxMagnitude / yMaxMagnitude
#    print 'yMaxMagnitude ==',yMaxMagnitude
#    print 'inputMaxMagnitude ==',inputMaxMagnitude
#    print 'attenuationRatio ==',attenuationRatio
    xLeft = copy.deepcopy(yLeft)                     # copy array
    xLeft *= attenuationRatio                        # scale output sound to the input sound range
    xLeft *= INT16_FAC                               # scaling floating point -1 to 1 range signal to int16 range
    xLeft = np.int16(xLeft)                          # converting to int16 type
    xRight = copy.deepcopy(yRight)                   # copy array
    xRight *= attenuationRatio                       # scale output sound to the input sound range
    xRight *= INT16_FAC                              # scaling floating point -1 to 1 range signal to int16 range
    xRight = np.int16(xRight)                        # converting to int16 type
    xStereo = np.transpose(np.array([xLeft,xRight]))
    write(filename, fs, xStereo)
Example #22
0
	def stretch(self, filename, factor, window_size=2048, h=512):
		""" Stretches the sound by a factor """
		fs, data = wavfile.read(filename)
		sound_array = data

		phase  = np.zeros(window_size)
		hanning_window = np.hanning(window_size)
		result = np.zeros( len(sound_array) /factor + window_size)

		for i in np.arange(0, len(sound_array)-(window_size+h), h*factor):

			# two potentially overlapping subarrays
			a1 = sound_array[i: i + window_size]
			a2 = sound_array[i + h: i + window_size + h]

			# resynchronize the second array on the first
			s1 =  np.fft.fft(hanning_window * a1)
			s2 =  np.fft.fft(hanning_window * a2)
			phase = (phase + np.angle(s2/s1)) % 2*np.pi
			a2_rephased = np.fft.ifft(np.abs(s2)*np.exp(1j*phase))

			# add to result
			i2 = int(i/factor)
			result[i2 : i2 + window_size] += hanning_window*a2_rephased

		result = ((2**(16-4)) * result/result.max()) # normalize (16bit)
		result = result.astype('int16')

		wavfile.write('pitchshift.wav', len(result), result) #write
Example #23
0
def synt_all_method3(folder_in):
    folder_out = folder_in[:-15] + "_synt_mcep_mat"
    if not os.path.exists(folder_out):
        os.mkdir(folder_out)

    mcep_mats = sorted([item for item in os.listdir(folder_in) if "_mc_mat" in item])
    f0s = sorted([item for item in os.listdir(folder_in) if "_f0" in item])
    aper_mats = sorted([item for item in os.listdir(folder_in) if "_aper_mat" in item])
    world = World(samplingrate, float(hop_length) / samplingrate * 1000)

    for mcep_mat_file, f0_file, aper_mat_file in zip(mcep_mats, f0s, aper_mats):
        print(mcep_mat_file)
        res = synt_from_mcep_matrix_to_spec(
            world,
            np.load(os.path.join(folder_in, f0_file)),
            np.load(os.path.join(folder_in, mcep_mat_file)),
            np.load(os.path.join(folder_in, aper_mat_file)))
        print("writing synth for {0}".format(mcep_mat_file))
        # wavfile.write("norm.wav", 16000, normalize_int16(s))
        wavfile.write(
            os.path.join(
                folder_out,
                mcep_mat_file.replace("mc_mat.npy", "") + "synth.wav"),
            sample_rate,
            normalize_int16(res))
Example #24
0
def test_noise(noise_coeff=0.00):
    file = 'test16k.wav'
    fs, x = wavfile.read(file)
    fs, nbit, x_length, x = readwav(file)
    period = 5.0
    opt = pyDioOption(40.0, 700, 2.0, period, 4)

    if noise_coeff < 1:
        noise_str = str(noise_coeff).split('.')[-1]
    else:
        noise_str = str(noise_coeff).split('.')[0]
    f0, time_axis = dio(x, fs, period, opt)

    f0_by_dio = copy.deepcopy(f0)
    f0 = stonemask(x, fs, period, time_axis, f0)
    spectrogram = star(x, fs, period, time_axis, f0)
    spectrogram = cheaptrick(x, fs, period, time_axis, f0)
    residual = platinum(x, fs, period, time_axis, f0, spectrogram)
    old_spectrogram = np.copy(spectrogram)
    plt.matshow(old_spectrogram, cmap="gray")
    plt.title("Before %s noise" % noise_str)
    plt.savefig("before_%s.png" % noise_str)
    random_state = np.random.RandomState(1999)
    spectrogram += noise_coeff * np.abs(random_state.randn(*spectrogram.shape))
    residual += noise_coeff * np.abs(random_state.randn(*residual.shape))
    y = synthesis(fs, period, f0, spectrogram, residual, len(x))
    ys = synthesis(fs, period, f0, old_spectrogram, residual, len(x))
    wavfile.write("y_%s.wav" % noise_str, fs, soundsc(y))
    wavfile.write("y_no_noise.wav", fs, soundsc(ys))
    plt.clf()
    plt.plot(soundsc(ys), label='orig')
    plt.plot(soundsc(y), label='noisy', color='red')
    plt.title("Comparison of time series with %s noise" % noise_str)
    plt.legend()
    plt.savefig("comparison_%s.png" % noise_str)
Example #25
0
def array2audio(sDir, iRate, aData):
    """
    writes an .wav audio file to disk from an array
    """
    from scipy.io.wavfile import write
    
    write(sDir, iRate, aData)
    def _process_loop(self):
        with WavProcessor() as proc:
            self._ask_data.set()
            while True:
                if self._process_buf is None:
                    # Waiting for data to process
                    time.sleep(self._processor_sleep_time)
                    continue

                self._ask_data.clear()
                if self._save_path:
                    f_path = os.path.join(
                        self._save_path, 'record_{:.0f}.wav'.format(time.time())
                    )
                    wavfile.write(f_path, self._sample_rate, self._process_buf)
                    logger.info('"{}" saved.'.format(f_path))

                logger.info('Start processing.')
                predictions = proc.get_predictions(
                    self._sample_rate, self._process_buf)
                logger.info(
                    'Predictions: {}'.format(format_predictions(predictions))
                )

                logger.info('Stop processing.')
                self._process_buf = None
                self._ask_data.set()
Example #27
0
def writewav(audiopath="wave.npz", outpath="out.wav"):
    """ Write a wav file given an input sample array file that can be read with readfile. """
    import scipy.io.wavfile as wav

    samplerate, samples = readfile(audiopath)
    wav.write(outpath, samplerate, samples)
    return
Example #28
0
def test_filter(n):
	audio = wave.read("signal-echo.wav")

	audio[1][:,0] = convolve(audio[1][:,0], filters[n], 'same')
	audio[1][:,1] = audio[1][:,0]

	wave.write("signal-echo-out.wav", audio[0], audio[1])
Example #29
0
def thumbnailWrapper(inputFile, thumbnailWrapperSize):
    st_window = 0.5
    st_step = 0.5
    if not os.path.isfile(inputFile):
        raise Exception("Input audio file not found!")

    [fs, x] = audioBasicIO.readAudioFile(inputFile)
    if fs == -1:    # could not read file
        return

    [A1, A2, B1, B2, Smatrix] = aS.musicThumbnailing(x, fs, st_window, st_step,
                                                     thumbnailWrapperSize)

    # write thumbnailWrappers to WAV files:
    if inputFile.endswith(".wav"):
        thumbnailWrapperFileName1 = inputFile.replace(".wav", "_thumb1.wav")
        thumbnailWrapperFileName2 = inputFile.replace(".wav", "_thumb2.wav")
    if inputFile.endswith(".mp3"):
        thumbnailWrapperFileName1 = inputFile.replace(".mp3", "_thumb1.mp3")
        thumbnailWrapperFileName2 = inputFile.replace(".mp3", "_thumb2.mp3")
    wavfile.write(thumbnailWrapperFileName1, fs, x[int(fs * A1):int(fs * A2)])
    wavfile.write(thumbnailWrapperFileName2, fs, x[int(fs * B1):int(fs * B2)])
    print("1st thumbnailWrapper (stored in file {0:s}): {1:4.1f}sec" \
          " -- {2:4.1f}sec".format(thumbnailWrapperFileName1, A1, A2))
    print("2nd thumbnailWrapper (stored in file {0:s}): {1:4.1f}sec" \
          " -- {2:4.1f}sec".format(thumbnailWrapperFileName2, B1, B2))

    # Plot self-similarity matrix:
    fig = plt.figure()
    ax = fig.add_subplot(111, aspect="auto")
    plt.imshow(Smatrix)
    # Plot best-similarity diagonal:
    Xcenter = (A1 / st_step + A2 / st_step) / 2.0
    Ycenter = (B1 / st_step + B2 / st_step) / 2.0

    e1 = matplotlib.patches.Ellipse((Ycenter, Xcenter),
                                    thumbnailWrapperSize * 1.4, 3, angle=45,
                                    linewidth=3, fill=False)
    ax.add_patch(e1)

    plt.plot([B1/ st_step, Smatrix.shape[0]], [A1/ st_step, A1/ st_step], color="k",
             linestyle="--", linewidth=2)
    plt.plot([B2/ st_step, Smatrix.shape[0]], [A2/ st_step, A2/ st_step], color="k",
             linestyle="--", linewidth=2)
    plt.plot([B1/ st_step, B1/ st_step], [A1/ st_step, Smatrix.shape[0]], color="k",
             linestyle="--", linewidth=2)
    plt.plot([B2/ st_step, B2/ st_step], [A2/ st_step, Smatrix.shape[0]], color="k",
             linestyle="--", linewidth=2)

    plt.xlim([0, Smatrix.shape[0]])
    plt.ylim([Smatrix.shape[1], 0])

    ax.yaxis.set_label_position("right")
    ax.yaxis.tick_right()

    plt.xlabel("frame no")
    plt.ylabel("frame no")
    plt.title("Self-similarity matrix")

    plt.show()
def generate_from_image(filename, random_phases=False):
    SAMPLES_PER_WINDOW = 326
    SAMPLING_RATE = 44100
    spectrogram = mpimg.imread(filename)
    lum_spectrogram = luminosity(spectrogram)

    # mpimg reads in an image with an upside-down y-axis (i.e. 0 at the top and
    # max(y) at the bottom), so we need to flip it    
    lum_spectrogram = np.flipud(lum_spectrogram)    
    num_rows, num_cols = lum_spectrogram.shape
    num_samples = num_cols * SAMPLES_PER_WINDOW
    t = np.matrix(time(num_samples))
    f = np.matrix(np.apply_along_axis(frequency, 0, np.arange(num_rows)/float(num_rows)))
    f = f.transpose()
    if random_phases:
        phi = np.matrix(np.random.rand(num_rows)*2*np.pi)
        phi = phi.transpose()
    else:
        phi = np.random.random()*2*np.pi
    a = amplitude(lum_spectrogram)
    oscillators = np.multiply(a, np.sin(2*np.pi*f*t+phi))
    signal = oscillators.sum(axis=0)
    signal = signal / np.amax(np.absolute(signal))
    signal = np.squeeze(np.asarray(signal))
    wavfile.write("output_signal.wav", SAMPLING_RATE, signal)
from pydub.playback import play
warnings.filterwarnings('ignore')

path = r'/Users/peterzuker/Desktop/Audio Modification/10047/model_input/spells/1/exemplars/1499777912068.wav'

#reload the audio to use librosa's expected format
lr_speech_data, lr_speech_rate = librosa.load(path)

stretched = librosa.effects.time_stretch(lr_speech_data, 1.47)

y, sr = librosa.load(path)
D = librosa.stft(y, n_fft=2048, hop_length=512)
D_slow = librosa.phase_vocoder(D, 1. / 3, hop_length=512)
y_slow = librosa.istft(D_slow, hop_length=512)

wavfile.write('test.wav', y_slow, D_slow)

rate, data = wavfile.read(path)
sound = AudioSegment.from_file(path, format="wav")

play(sound)


def remove_silence(audio, threshold):
    #identify all samples with an absolute value greater than the threshold
    greater_index = numpy.greater(numpy.absolute(audio), threshold)
    #filter to only include the identified samples
    above_threshold_data = audio[greater_index]
    return above_threshold_data

Example #32
0
 def save_wav(self, wav, path):
     wav *= 32767 / max(0.01, np.max(np.abs(wav)))
     wavfile.write(path, self.sample_rate, wav.astype(np.int16))
Example #33
0
                                               hparams.quantize_channels - 1)
                elif is_mulaw(hparams.input_type):
                    ref = P.inv_mulaw(ref, hparams.quantize_channels - 1)
                if hparams.postprocess is not None and hparams.postprocess not in [
                        "", "none"
                ]:
                    ref = getattr(audio, hparams.postprocess)(ref)
                if hparams.global_gain_scale > 0:
                    ref /= hparams.global_gain_scale

            # clip (just in case)
            gen = np.clip(gen, -1.0, 1.0)
            if has_ref_file:
                ref = np.clip(ref, -1.0, 1.0)

            wavfile.write(dst_wav_path, hparams.sample_rate, to_int16(gen))
            if has_ref_file:
                wavfile.write(target_wav_path, hparams.sample_rate,
                              to_int16(ref))

            # log (TODO)
            if output_html and False:
                print("""
    <audio controls="controls" >
    <source src="/{}/audio/{}/{}" autoplay/>
    Your browser does not support the audio element.
    </audio>
    """.format(hparams.name, dst_dir_name, basename(dst_wav_path)))

    print(
        "Finished! Check out {} for generated audio samples.".format(dst_dir))
Example #34
0
import utilities as utl
from scipy.io import wavfile
import numpy as np

# Read the files as numpy array
rate1, data1 = wavfile.read("13-84-1.wav")
rate2, data2 = wavfile.read("13-172-1.wav")

# Using the mixSounds helper function from utilities.py
mixedX = utl.mixSounds([data1, data2], [0.3, 0.7]).astype(np.int16)
mixedY = utl.mixSounds([data1, data2], [0.6, 0.4]).astype(np.int16)

# Plot the mixed sound sources
utl.plotSounds([mixedX, mixedY], ["mixed-1", "mixed-3"], rate1,
               "../plots/sounds/Ring_StarWars_mixed", False)

# Save the mixed sources as wav files
wavfile.write("mixed-1.wav", rate1, mixedX)
wavfile.write("mixed-3.wav", rate1, mixedY)
Example #35
0
print(test.shape)

#
# INFO: The test data will have two parts, X_test and y_test. X_test is
# going to be the first portion of the test audio file, which we will
# be providing the computer as input. y_test, the "label" if you will,
# is going to be the remaining portion of the audio file. Like such,
# the computer will use linear regression to derive the missing
# portion of the sound file based off of the training data its received!

#
# Save the original 'test' clip, the one you're about to delete
# half of, so that you can compare it to the 'patched' clip once
# you've generated it. HINT: you should have got the sample_rate
# when you were loading up the .wav files:
wavfile.write('Original Test Clip.wav', sample_rate, test)

#
# TODO: Prepare the TEST date by creating a slice called X_test. It
# should have Provided_Portion * n_audio_samples audio sample features,
# taken from your test audio file, currently stored in the variable
# 'test'. In other words, grab the FIRST Provided_Portion *
# n_audio_samples audio features from test and store it in X_test. This
# should be accomplished using indexing.
#
# .. your code here ..

slice_size = Provided_Portion * n_audio_samples
X_test = test[:slice_size]

#
Example #36
0
    def test_st_audio(self):
        """Test st.audio."""

        # Fake audio data: expect the resultant mimetype to be audio default.
        fake_audio_data = "\x11\x22\x33\x44\x55\x66".encode("utf-8")

        st.audio(fake_audio_data)

        el = self.get_delta_from_queue().new_element

        # locate resultant file in InMemoryFileManager and test its properties.
        file_id = _calculate_file_id(fake_audio_data, "audio/wav")
        self.assertTrue(file_id in in_memory_file_manager)

        afile = in_memory_file_manager.get(file_id)
        self.assertEqual(afile.mimetype, "audio/wav")
        self.assertEqual(afile.url, el.audio.url)

        # Test using generated data in a file-like object.

        sampleRate = 44100
        frequency = 440
        length = 5

        t = np.linspace(
            0, length, sampleRate * length
        )  #  Produces a 5 second Audio-File
        y = np.sin(frequency * 2 * np.pi * t)  #  Has frequency of 440Hz

        wavfile.write("test.wav", sampleRate, y)

        with io.open("test.wav", "rb") as f:
            st.audio(f)

        el = self.get_delta_from_queue().new_element
        self.assertTrue(".wav" in el.audio.url)

        os.remove("test.wav")

        # Test using a URL instead of data
        some_url = "https://www.soundhelix.com/examples/mp3/SoundHelix-Song-3.mp3"
        st.audio(some_url)

        el = self.get_delta_from_queue().new_element
        self.assertEqual(el.audio.url, some_url)

        # Test that a non-URL string is assumed to be a filename
        bad_filename = "blah"
        with self.assertRaises(FileNotFoundError):
            st.audio(bad_filename)

        # Test that we can use an empty/None value without error.
        st.audio(None)
        el = self.get_delta_from_queue().new_element
        self.assertEqual(el.audio.url, "")

        # Test that our other data types don't result in an error.
        st.audio(b"bytes_data")
        st.audio("str_data".encode("utf-8"))
        st.audio(BytesIO(b"bytesio_data"))
        st.audio(np.array([0, 1, 2, 3]))
Example #37
0
 def export(self, filename):
     # Casting as reals and absolute maximum?
     scaled = sp.real(self.wave)
     scaled = sp.int16(scaled * 32676. / scaled.max())
     wavfile.write(filename, self.rate, scaled)
Example #38
0
             marker='o')
    plt.legend()

    plt.tight_layout(pad=0.5)

    ## GUI
    if not args.gui:
        plt.show()
    else:
        plt.show(block=False)

    if args.save:
        from scipy.io import wavfile

        wavfile.write(
            'bss_iva_mix.wav', room.fs,
            pra.normalize(mics_signals[0, :], bits=16).astype(np.int16))
        for i, sig in enumerate(y):
            wavfile.write('bss_iva_source{}.wav'.format(i + 1), room.fs,
                          pra.normalize(sig, bits=16).astype(np.int16))

    if args.gui:

        # Make a simple GUI to listen to the separated samples
        from tkinter import Tk, Button, Label
        import sounddevice as sd

        # Now comes the GUI part
        class PlaySoundGUI(object):
            def __init__(self, master, fs, mix, sources):
                self.master = master
Example #39
0
spec_thresh = 4  # threshold for spectrograms (lower filters out more noise)
lowcut = 500  # Hz # Low cut for our butter bandpass filter
highcut = 15000  # Hz # High cut for our butter bandpass filter
# For mels
n_mel_freq_components = 64  # number of mel frequency channels
shorten_factor = 10  # how much should we compress the x-axis (time)
start_freq = 300  # Hz # What frequency to start sampling our melS from
end_freq = 8000  # Hz # What frequency to stop sampling our melS from

# Grab your wav and filter it
mywav = "sound01.wav"
rate, data = wavfile.read(mywav)


data = data[:,0] # Only take one channel
wavfile.write("audio_in_out01.wav", rate, data)

data = butter_bandpass_filter(data, lowcut, highcut, rate, order=1)

# Only use a short clip for our demo
if np.shape(data)[0] / float(rate) > 10:
    data = data[0 : rate * 10]
print("Length in time (s): ", np.shape(data)[0] / float(rate))


wav_spectrogram = pretty_spectrogram(
    data.astype("float64"),
    fft_size=fft_size,
    step_size=step_size,
    log=True,
    thresh=spec_thresh,
Example #40
0
 def write(self, filename):
     write(filename, 44100, self.full_sample)
Example #41
0
def logMMSE(inputFilePath, outputFilePath):

    [sample_rate, sample_data] = wavfile.read(inputFilePath, True)

    # Frame size in samples
    len = np.int(np.floor(20 * sample_rate * 0.001))
    if len % 2 == 1:
        len += 1

    # window overlap in percent of frame size
    perc = 50
    len1 = np.floor(len * perc * 0.01)
    len2 = len - len1

    win = np.hanning(len)
    win = win * len2 / sum(win)

    # Noise magnitude calculations - assuming that the first 6 frames is noise / silence
    nFFT = len << 2
    noise_mean = np.zeros([nFFT, 1])
    dtype = 2 << 14
    j = 0

    for i in range(1, 7):

        s1 = j
        s2 = j + np.int(len)

        batch = sample_data[s1:s2] / dtype

        X = win * batch

        foo = np.fft.fft(X, np.int(nFFT))

        noise_mean += np.abs(foo.reshape(foo.shape[0], 1))

        j += len

    noise_mu = np.square(noise_mean / 6)

    # Allocate memory and initialize various variables

    x_old = np.zeros([np.int(len1), 1])
    Nframes = np.floor(sample_data.shape[0] / len2) - np.floor(len / len2)
    xfinal = np.zeros([np.int(Nframes * len2), 1])

    # Start Processing
    k = 0
    aa = 0.98
    mu = 0.98
    eta = 0.15

    ksi_min = 10**(-25 * 0.1)

    for n in range(0, np.int(Nframes)):

        s1 = k
        s2 = k + np.int(len)

        batch = sample_data[s1:s2] / dtype
        insign = win * batch

        spec = np.fft.fft(insign, nFFT)

        # Compute the magnitude
        sig = abs(spec)
        sig2 = sig**2

        # Limit post SNR to avoid overflows
        gammak = np.divide(sig2.reshape(sig2.shape[0], 1),
                           noise_mu.reshape(noise_mu.shape[0], 1))
        gammak[gammak > 40] = 40

        foo = gammak - 1
        foo[foo < 0] = 0

        if 0 == n:
            ksi = aa + (1 - aa) * foo
        else:

            # a priori SNR
            ksi = aa * Xk_prev / noise_mu + (1 - aa) * foo

            # limit ksi to - 25 db
            ksi[ksi < ksi_min] = ksi_min

        log_sigma_k = gammak * ksi / (1 + ksi) - np.log(1 + ksi)
        vad_decision = sum(log_sigma_k) / len

        # noise only frame found
        if vad_decision < eta:
            noise_mu = mu * noise_mu + (1 - mu) * sig2.reshape(
                [sig2.shape[0], 1])

        # == = end of vad == =

        # Log - MMSE estimator
        A = ksi / (1 + ksi)
        vk = A * gammak

        ei_vk = 0.5 * expn(1, vk)
        hw = A * np.exp(ei_vk)

        sig = sig.reshape([sig.shape[0], 1]) * hw
        Xk_prev = sig**2

        xi_w = ifft(hw * spec.reshape([spec.shape[0], 1]), nFFT, 0)
        xi_w = np.real(xi_w)

        xfinal[k:k + np.int(len2)] = x_old + xi_w[0:np.int(len1)]
        x_old = xi_w[np.int(len1):np.int(len)]

        k = k + np.int(len2)

    wavfile.write(outputFilePath, sample_rate, xfinal)
Example #42
0
chunks.append([chunks[-1][1], audioFrameCount, shouldIncludeFrame[i - 1]])
chunks = chunks[1:]

outputAudioData = np.zeros((0, audioData.shape[1]))
outputPointer = 0

lastExistingFrame = None
for chunk in chunks:
    audioChunk = audioData[int(chunk[0] *
                               samplesPerFrame):int(chunk[1] *
                                                    samplesPerFrame)]

    sFile = TEMP_FOLDER + "/" + TEMP_START_FILE_NAME
    eFile = TEMP_FOLDER + "/" + TEMP_END_FILE_NAME
    wavfile.write(sFile, SAMPLE_RATE, audioChunk)
    with WavReader(sFile) as reader:
        with WavWriter(eFile, reader.channels, reader.samplerate) as writer:
            tsm = phasevocoder(reader.channels, speed=NEW_SPEED[int(chunk[2])])
            tsm.run(reader, writer)
    _, alteredAudioData = wavfile.read(eFile)
    leng = alteredAudioData.shape[0]
    endPointer = outputPointer + leng
    outputAudioData = np.concatenate(
        (outputAudioData, alteredAudioData / maxAudioVolume))

    #outputAudioData[outputPointer:endPointer] = alteredAudioData/maxAudioVolume

    # smooth out transitiion's audio by quickly fading in/out

    if leng < AUDIO_FADE_ENVELOPE_SIZE:
                    prev_w = w_s
                cond = c_mb
                print("Completed sampling after %i steps" % fixed_steps)
            completed = np.array(completed).transpose(1, 0, 2)
            rlookup = {v: k for k, v in vocabulary.items()}
            all_strings = []
            for yi in y:
                ex_str = "".join([rlookup[c] for c in np.argmax(yi, axis=1)])
                all_strings.append(ex_str)
            for i in range(len(completed)):
                ex = completed[i]
                ex_str = "".join(
                    [rlookup[c] for c in np.argmax(cond[:, i], axis=1)])
                s = "gen_%s_%i.wav" % (ex_str, i)
                ii = reconstruct(ex)
                wavfile.write(s, fs, soundsc(ii))
                if ex_str in all_strings:
                    inds = [
                        n for n, s in enumerate(all_strings) if ex_str == s
                    ]
                    ind = inds[0]
                    it = reconstruct(X[ind])
                    s = "orig_%s_%i.wav" % (ex_str, i)
                    wavfile.write(s, fs, soundsc(it))
        valid_itr.reset()
        print("Sampling complete, exiting...")
        sys.exit()
    else:
        print("No plotting arguments, starting training mode!")

    X_sym = tensor.tensor3("X_sym")
Example #44
0
f_R = 6000
f_G = 8000
f_B = 10000
fs = 40000

# Periods per bit
periods = 2000

# Generate signal
signal = generate_signal(word,
                         image_path,
                         f_word,
                         f_R,
                         f_G,
                         f_B,
                         fs,
                         periods,
                         repetitions=3,
                         sync_repetitions=3,
                         estimation_repetitions=3,
                         inter_repetition_periods=1)

signal = np.append(np.zeros(800), signal)

scaled = np.float32((signal / np.max(np.abs(signal))))
wav.write('test.wav', 44100, scaled)
fs1, test = wav.read(
    r'C:\Users\dudam\PycharmProjects\principios\grabacion2.wav')

word = decode_signal(test, f_word, f_R, f_G, f_B, fs)
Example #45
0
import recombination
import genetic_algorithm
import objective_function

# Parameters that can be tuned to your liking
input_dir = 'inputs'
output_dir = 'outputs'
desired_chord = 'C_MAJOR'
desired_scale = 'C_MAJOR'
desired_generations = 1
desired_crossover_points = 5
desired_mutations = 10
desired_prob = 50

if __name__ == '__main__':
    file_list = listdir(input_dir)
    init_pop = []
    rand_idxs = random.sample(range(len(file_list)), int(len(file_list) / 2))
    for i in rand_idxs:
        dat = (wavfile.read(input_dir + '/' + file_list[i]))[1]
        if (dat.ndim == 1):
            dat_mono = dat
        else:
            dat_mono = dat.T[0]
        loudest = np.amax(np.abs(dat_mono))
        init_pop.append(np.float32(dat_mono / loudest))
    pop = genetic_algorithm.evolve(init_pop, desired_generations,
                                   desired_chord, desired_scale)
    for i in range(len(pop)):
        wavfile.write(output_dir + '/result_' + str(i) + '.wav', 44100, pop[i])
Example #46
0
                     arpabet_dict))[None, :].cuda()
pitch_contour = dataloader[file_idx][3][None].cuda()
mel = load_mel(audio_path)
print(audio_path, text)

# load source data to obtain rhythm using tacotron 2 as a forced aligner
x, y = mellotron.parse_batch(datacollate([dataloader[file_idx]]))
ipd.Audio(audio_path, rate=hparams.sampling_rate)

with torch.no_grad():
    # get rhythm (alignment map) using tacotron 2
    _, _, _, rhythm = mellotron.forward(x)
    rhythm = rhythm.permute(1, 0, 2)
speaker_id = torch.LongTensor([1]).cuda()

with torch.no_grad():
    mel_outputs, mel_outputs_postnet, gate_outputs, _ = mellotron.inference_noattention(
        (text_encoded, mel, speaker_id, lang_code, pitch_contour, rhythm))

plot_mel_f0_alignment(x[2].data.cpu().numpy()[0],
                      mel_outputs_postnet.data.cpu().numpy()[0],
                      pitch_contour.data.cpu().numpy()[0, 0],
                      rhythm.data.cpu().numpy()[:, 0].T)
with torch.no_grad():
    audio = denoiser(waveglow.infer(mel_outputs_postnet, sigma=0.8), 0.01)[:,
                                                                           0]
    audio = audio.cpu().numpy()
    audio = audio / np.max(np.abs(audio))
    write("{} {}.wav".format(str(file_idx), speaker_id.item()),
          hparams.sampling_rate, audio)
def evaluate(model,
             musdb_path,
             eval_folder,
             workers=2,
             device="cpu",
             rank=0,
             save=False,
             shifts=0,
             split=False,
             overlap=0.25,
             is_wav=False,
             world_size=1):
    """
    Evaluate model using museval. Run the model
    on a single GPU, the bottleneck being the call to museval.
    """

    output_dir = eval_folder / "results"
    output_dir.mkdir(exist_ok=True, parents=True)
    json_folder = eval_folder / "results/test"
    json_folder.mkdir(exist_ok=True, parents=True)

    # we load tracks from the original musdb set
    test_set = musdb.DB(musdb_path, subsets=["test"], is_wav=is_wav)
    src_rate = 44100  # hardcoded for now...

    for p in model.parameters():
        p.requires_grad = False
        p.grad = None

    pendings = []
    with futures.ProcessPoolExecutor(workers or 1) as pool:
        for index in tqdm.tqdm(range(rank, len(test_set), world_size),
                               file=sys.stdout):
            track = test_set.tracks[index]

            out = json_folder / f"{track.name}.json.gz"
            if out.exists():
                continue

            mix = th.from_numpy(track.audio).t().float()
            ref = mix.mean(dim=0)  # mono mixture
            mix = (mix - ref.mean()) / ref.std()
            mix = convert_audio(mix, src_rate, model.samplerate,
                                model.audio_channels)
            estimates = apply_model(model,
                                    mix.to(device),
                                    shifts=shifts,
                                    split=split,
                                    overlap=overlap)
            estimates = estimates * ref.std() + ref.mean()

            estimates = estimates.transpose(1, 2)
            references = th.stack([
                th.from_numpy(track.targets[name].audio).t()
                for name in model.sources
            ])
            references = convert_audio(references, src_rate, model.samplerate,
                                       model.audio_channels)
            references = references.transpose(1, 2).numpy()
            estimates = estimates.cpu().numpy()
            win = int(1. * model.samplerate)
            hop = int(1. * model.samplerate)
            if save:
                folder = eval_folder / "wav/test" / track.name
                folder.mkdir(exist_ok=True, parents=True)
                for name, estimate in zip(model.sources, estimates):
                    wavfile.write(str(folder / (name + ".wav")), 44100,
                                  estimate)

            if workers:
                pendings.append((track.name,
                                 pool.submit(museval.evaluate,
                                             references,
                                             estimates,
                                             win=win,
                                             hop=hop)))
            else:
                pendings.append((track.name,
                                 museval.evaluate(references,
                                                  estimates,
                                                  win=win,
                                                  hop=hop)))
            del references, mix, estimates, track

        for track_name, pending in tqdm.tqdm(pendings, file=sys.stdout):
            if workers:
                pending = pending.result()
            sdr, isr, sir, sar = pending
            track_store = museval.TrackStore(win=44100,
                                             hop=44100,
                                             track_name=track_name)
            for idx, target in enumerate(model.sources):
                values = {
                    "SDR": sdr[idx].tolist(),
                    "SIR": sir[idx].tolist(),
                    "ISR": isr[idx].tolist(),
                    "SAR": sar[idx].tolist()
                }

                track_store.add_target(target_name=target, values=values)
                json_path = json_folder / f"{track_name}.json.gz"
                gzip.open(json_path,
                          "w").write(track_store.json.encode('utf-8'))
    if world_size > 1:
        distributed.barrier()
Example #48
0
import scipy.io.wavfile as wav
import matplotlib.pyplot as plt
import numpy as np
import sounddevice as sd

filename = 'worksample.wav'

# GRAVAÇÃO DO ARQUIVO
fs = 44100  # Sample rate
seconds = 3  # Duração da gravação

print('Gravação em Andamento...')
myrecording = sd.rec(int(seconds * fs), samplerate=fs, channels=1)
sd.wait()  # Espera até o fim do processo de gravamento
print('Gravação completa')
wav.write(filename, fs, myrecording)  # Salva como um arquivo WAV

# APLICAÇÃO DA FFT E GRÁFICOS
samplerate, data = wav.read(
    filename)  # Lê o samplerate e informações do arquivo

nf = 16384
Y = np.fft.fft(data, nf)  # Aplicação da FFT no áudio

# Normalização da saída da FFT
ynorm = np.abs(Y[0:round(nf / 2 + 1)])
ynorm = (ynorm - np.min(ynorm)) / (np.max(ynorm) - np.min(ynorm))
f = samplerate / 2 * np.linspace(0, 1, round(nf / 2 + 1))

# Plotagem da transformada e do audio.
plt.figure(1)
Example #49
0
def write_wav(signal, filename, overwrite=True):
    """
    Write a signal as a WAV file.

    Parameters
    ----------
    signal : Signal object
        An audio signal object from the pyfar Signal class.

    filename : string or open file handle
        Output wav file.

    overwrite : bool
        Select wether to overwrite the WAV file, if it already exists.
        The default is True.

    Notes
    -----
    * This function is based on scipy.io.wavfile.write().
    * Writes a simple uncompressed WAV file.
    * Signals of shape larger than 1D are flattened.
    * The bits-per-sample and PCM/float will be determined by the data-type.

    Common data types: [1]_

    =====================  ===========  ===========  =============
         WAV format            Min          Max       NumPy dtype
    =====================  ===========  ===========  =============
    32-bit floating-point  -1.0         +1.0         float32
    32-bit PCM             -2147483648  +2147483647  int32
    16-bit PCM             -32768       +32767       int16
    8-bit PCM              0            255          uint8
    =====================  ===========  ===========  =============

    Note that 8-bit PCM is unsigned.

    References
    ----------
    .. [1] IBM Corporation and Microsoft Corporation, "Multimedia Programming
       Interface and Data Specifications 1.0", section "Data Format of the
       Samples", August 1991
       http://www.tactilemedia.com/info/MCI_Control_Info.html

    """
    sampling_rate = signal.sampling_rate
    data = signal.time

    # Reshape to 2D
    data = data.reshape(-1, data.shape[-1])
    if len(signal.cshape) != 1:
        warnings.warn(f"Signal flattened to {data.shape[0]} channels.")

    # .wav file extension
    filename = pathlib.Path(filename).with_suffix('.wav')

    # Check if file exists and for overwrite
    if overwrite is False and os.path.isfile(filename):
        raise FileExistsError("File already exists,"
                              "use overwrite option to disable error.")
    else:
        wavfile.write(filename, sampling_rate, data.T)
Example #50
0
    simplest_possible_plot(timelimited_signal_data_normalized,
                           "timelimited_normalized_signal_data.png")

    # Process a convolution on signal data using impulse response
    convolved = signal.convolve(
        timelimited_signal_data_normalized,
        timelimited_impulse_response_data,
    )

    # Normalize convolved data
    convolved_normalized = normalize_np_array(convolved)

    simplest_possible_plot(convolved_normalized, "normalized_convolved.png")

    # Save normalized convolved data as wav file.
    wavfile.write(convolved_signal_file_path, signal_fs, convolved_normalized)

    # Read convolved data from wav file. It is simulation of real-life usage when we have convoluted signal and
    # impulse response, not pure input signal.
    convolved_fs, read_convolved_data = wavfile.read(
        convolved_signal_file_path)

    read_convolved_data_normalized = normalize_np_array(read_convolved_data)

    simplest_possible_plot(read_convolved_data_normalized,
                           "normalized_read_convolved_data.png")

    # Deconvolve read convolved signal with given impulse response.
    recovered, remainder = signal.deconvolve(
        read_convolved_data_normalized, timelimited_impulse_response_data)
Example #51
0
 def save_wave(self, path, wave):
     wavfile.write(path, self.sampling_rate, self.to_int(wave))
Example #52
0
 # numpy.array(raw_video, dtype="float32")
 # a_norm = np.linalg.norm(raw_video)
 # raw_vv = raw_video/a_norm
 # print(raw_vv)
 # print(max(raw_vv))
 # print(min(raw_vv))
 # print(max(raw_video))
 # print(min(raw_video))
 samplerate = 48000
 # fs = 100
 # t = np.linspace(0., 1., samplerate)
 # amplitude = np.iinfo(np.int16).max
 # data = amplitude * np.sin(2. * np.pi * fs * t)
 raw2_name = raw_list[j][:-4] + '.wav'
 nn = join(v_path, raw2_name)
 write(nn, samplerate, raw_video.astype(np.float32))
 # print(raw_video)
 #
 # np.save(raw2_name, raw_video)
 # print("save .npy done")
 #     # if k in range(6):
 #     #     break
 #     # ag_video = get_ag_video(raw_video, k)
 #     ag_video = get_ag_video(raw_video, k+1)
 #     # save audio
 #     sampleRate = 48000  # hertz
 # w_name1 = join(c_path, ag_list[k])
 # w_name2 = raw_list[j][:-4] + '_' + ag_list[k] + '.wav'
 # w_name = join(w_name1, w_name2)
 #     # print(w_name)
 #     obj = wave.open(w_name, 'w')
 def write(self, file_name):
     wav.write(file_name, self.rate, self.samples)
Example #54
0
def separateHP(filename, winlength, alpha=0.5, gamma=0.4):
    # Open and read the file
    fs, y = read(filename)

    # Calculate the STFT i.e. transform to time-frequency domain
    winlen = int(float(winlength) / 1000.0 * float(fs))
    hoplen = int(winlen / 2)
    y = y / np.max(np.abs(y))
    F = stft(y, n_fft=winlen, hop_length=hoplen, window=windows.hann)

    # Calculate range-compressed version of power spectrogram
    W = np.power(np.abs(F), 2 * gamma)

    # Initialize percussion and harmonic values
    H = W / 2
    P = W / 2

    # Length of transform timewise
    K = np.size(W, 1)

    # Update P & H
    for i in range(0, K - 1):
        # Calculate delta matrix which describes energy changes between frequencies
        delta = np.zeros([hoplen + 1, 1])
        for h in range(1, hoplen):
            delta[h, 0] = alpha * (
                (H[h, i - 1] - 2 * H[h, i] + H[h, i + 1]) /
                4) - (1 - alpha) * (
                    (P[h - 1, i] - 2 * P[h, i] + P[h + 1, i]) / 4)

        H[:, i + 1] = np.minimum(np.maximum(np.add(H[:, i], delta[:, 0]), 0),
                                 W[:, i + 1])
        P[:, i + 1] = np.subtract(W[:, i + 1], H[:, i + 1])

    # Binarize the separation
    for i in range(0, K - 1):
        for h in range(0, hoplen):
            if H[h, i - 1] < P[h, i - 1]:
                H[h, i] = 0
                P[h, i] = W[h, i]
            else:
                H[h, i] = W[h, i]
                P[h, i] = 0

    # Visualize P & H
    plt.figure(figsize=[15, 5])
    plt.subplot(1, 2, 1)
    #specshow(20*np.log10(1e-10+np.abs(H)), y_axis='log', x_axis='time',sr=fs,hop_length=hoplen)
    specshow(H, y_axis='linear', x_axis='time', sr=fs, hop_length=hoplen)
    plt.title('H')
    #plt.pcolormesh(H)
    plt.subplot(1, 2, 2)
    #specshow(20*np.log10(1e-10+np.abs(P)), y_axis='log', x_axis='time',sr=fs,hop_length=hoplen)
    specshow(P, y_axis='linear', x_axis='time', sr=fs, hop_length=hoplen)
    plt.title('P')
    #plt.pcolormesh(P)
    plt.show()

    # Convert into waveform
    h = istft(np.power(H, 1 / (2 * gamma)) * np.exp(1j * np.angle(F)),
              win_length=winlen,
              hop_length=hoplen,
              window=windows.hann)
    p = istft(np.power(P, 1 / (2 * gamma)) * np.exp(1j * np.angle(F)),
              win_length=winlen,
              hop_length=hoplen,
              window=windows.hann)

    h = h / np.max(np.abs(h))
    p = p / np.max(np.abs(p))

    # Save harmonic and percussive audiofiles
    audio_name = '%s-harmonics.wav' % (filename[:-4])
    write(audio_name, fs, h)

    audio_name = '%s-percussive.wav' % (filename[:-4])
    write(audio_name, fs, p)

    SNR = 20 * np.log10(
        np.abs((np.sum(y)) / (np.sum(y) - np.sum(p) - np.sum(h))))
    print("SNR for signal p:", SNR, "dB")
Example #55
0
        with torch.no_grad():
            audio = model.inference(src_seq,
                                    src_pos,
                                    mel_max_len,
                                    alignment_target,
                                    sigma=1.0,
                                    alpha=1.0)
            audio = audio * MAX_WAV_VALUE
        audio = audio.squeeze()
        print(torch.mean(audio))
        audio = audio.cpu().numpy()

        audio_tgt = torch.cat(audio_tgt)
        audio_tgt = audio_tgt * MAX_WAV_VALUE
        print(torch.mean(audio_tgt))
        #print (audio_tgt)
        audio_tgt = audio_tgt.squeeze()
        audio_tgt = audio_tgt.cpu().numpy()

        audio = audio.astype('int16')
        audio_tgt = audio_tgt.astype('int16')
        audio_path = os.path.join("results", "test_{}_synthesis.wav".format(i))
        '''audio_tgt_path = os.path.join(
            "results", "{}_tgt.wav".format(i))'''

        write(audio_path, sampling_rate, audio)
        # write(audio_tgt_path, sampling_rate, audio_tgt)

        if i >= 10:
            break
Example #56
0
def microphone():
	fs = 44100
	second = microphone_time
	myrecording = sd.rec(int(second * fs), samplerate=fs, channels=2)
	sd.wait()
	write(file_path+extend+audio_info, fs, myrecording)
                    tsm.run(reader, writer)
                    spedupAudio = writer.output

            yPointerEnd = yPointer + spedupAudio.shape[0]
            y[yPointer : yPointerEnd] = spedupAudio
            yPointer = yPointerEnd

            writeFrames(frameBuffer, yPointerEnd, silentSpeed, sampleRate, out)
            frameBuffer = []
            switchStart = switchEnd

        normal = 1
    if skipped % 1000 == 0:
        print("{} frames inspected".format(skipped))
        skipped += 1

y = y[:yPointer]

wavfile.write("spedupAudio.wav", sampleRate, y)

cap.release()
out.release()
cv2.destroyAllWindows()

mergeCommand = "ffmpeg -i spedup.mp4 -i spedupAudio.wav -c:v h264_nvenc -threads 0 out_{}".format(videoFile)
error = subprocess.call(mergeCommand, shell=True)
if error == 0:
    removeCommand = "rm output.wav spedup.mp4 spedupAudio.wav"
    subprocess.call(removeCommand, shell=True)

    #plt.plot(wav_data)

    ### Filters audio data ###
    low_cutoff = 60
    high_cutoff = 6000
    wn = [low_cutoff / (fs / 2), high_cutoff / (fs / 2)]
    b, a = dsp.butter(4, wn, 'band')
    wav_data_filt = dsp.filtfilt(b, a, wav_data)
    #plt.plot(wav_data_filt)

    ### Resamples audio to 12 kHz ###
    wav_data_12kHz = dsp.resample(wav_data_filt,
                                  int(wav_data_filt.shape[0] / 4))
    #plt.plot(wav_data_12kHz)

    ### Splits data into N chunks ###
    N = int(len(wav_data_12kHz) / 10)
    n = 1
    for i in range(0, len(wav_data_12kHz), N):
        temp_wav_data = wav_data_12kHz[i:i + N]
        temp_wav_data = temp_wav_data * 32767
        temp_wav_data.astype(np.int16)

        newWavName = file[:-7] + 'feat_' + str(n) + '.wav'
        wavfile.write('./processedRecordings/' + newWavName, 12000,
                      temp_wav_data)
        n += 1
        #plt.plot(temp_wav_data)
        #print(i)
Example #59
0
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

# Commentary:
# - PyWORLDによる音声の分析再合成

from scipy.io import wavfile
import numpy as np
import pyworld

IN_WAVE_FILE = "in.wav"  # 入力音声
OUT_WAVE_FILE = "out.wav"  # 分析再合成した音声

# 音声の読み込み
fs, x = wavfile.read(IN_WAVE_FILE)
x = x.astype(np.float64)

# 音声の分析 (基本周波数、スペクトル包絡、非周期性指標)
f0, sp, ap = pyworld.wav2world(x, fs)

# 音声の再合成
y = pyworld.synthesize(f0, sp, ap, fs)
y = y.astype(np.int16)

# wavファイルに保存
wavfile.write(OUT_WAVE_FILE, fs, y)
Example #60
0
def write_audio_file(filename, data, sr=44100):
    wavfile.write(filename, sr, data)
    #librosa.output.write_wav(filename, data, sr)
    #torchaudio.save(filename, torch.Tensor(data).unsqueeze(1), sr)
    return