Esempio n. 1
0
 def _load_sounds(self):
     self._bar_audio_data, self._bar_audio_fs = soundfile.read(self._bar_file)
     self._beat_audio_data, self._beat_audio_fs = soundfile.read(self._beat_file)
     if self._division_file:
         self._division_audio_data, self._division_audio_fs = soundfile.read(self._division_file)
     else:
         self._division_audio_data, self._division_audio_fs = None, None
Esempio n. 2
0
File: ad-ltsd.py Progetto: jlep/vad
def main():
    #fs, bg_signal = wavfile.read(sys.argv[1])
    if argv[1] == 'batch':
        files = []
        for f in os.listdir(argv[2]):
            if os.path.splitext(f)[1] == ".flac":
                files.append(f)
        args = [(f, argv[2], argv[3]) for f in files]
        pool = multiprocessing.Pool(12)
        r = pool.map_async(compute_vad, args)
        r.wait()
        pool.close()
        pool.join()
        #for a in args:
        #    compute_vad(a)
    else:
        bg_signal, fs = soundfile.read(argv[1])
        ltsd = LTSD_VAD()
        bg_signal=bg_signal[:2000]
        print(bg_signal)
        ltsd.init_params_by_noise(fs, bg_signal)
        signal, fs = soundfile.read(argv[1])
        #vaded_signal = ltsd.filter(signal)
        segments, sig_len = ltsd.segments(signal)
        print(ltsd.segments(signal)[0])
Esempio n. 3
0
def test_write_non_seekable_file(file_w):
    with sf.SoundFile(file_w, 'w', 44100, 1, format='XI') as f:
        assert not f.seekable()
        assert f.frames == 0
        f.write(data_mono)
        assert f.frames == len(data_mono)

        with pytest.raises(RuntimeError) as excinfo:
            f.seek(2)
        assert "unseekable" in str(excinfo.value)

    with sf.SoundFile(filename_new) as f:
        assert not f.seekable()
        assert f.frames == len(data_mono)
        data = f.read(3, dtype='int16')
        assert np.all(data == data_mono[:3])
        data = f.read(666, dtype='int16')
        assert np.all(data == data_mono[3:])

        with pytest.raises(RuntimeError) as excinfo:
            f.seek(2)
        assert "unseekable" in str(excinfo.value)

        with pytest.raises(ValueError) as excinfo:
            f.read()
        assert "frames" in str(excinfo.value)

    data, fs = sf.read(filename_new, dtype='int16')
    assert np.all(data == data_mono)
    assert fs == 44100

    with pytest.raises(ValueError) as excinfo:
        sf.read(filename_new, start=3)
    assert "start is only allowed for seekable files" in str(excinfo.value)
Esempio n. 4
0
def test_read_into_non_contiguous_out(file_stereo_r):
    out = np.empty(data_stereo.shape[::-1], dtype='float64')
    if getattr(sys, 'pypy_version_info', (999,)) < (2, 6):
        # The test for C-contiguous doesn't work with PyPy 2.5.0
        sf.read(file_stereo_r, out=out.T)
    else:
        with pytest.raises(ValueError) as excinfo:
            sf.read(file_stereo_r, out=out.T)
        assert "C-contiguous" in str(excinfo.value)
Esempio n. 5
0
def get_traindata(gesfile, audio_f, dt,
                  audio_fargs=None, wavfile=None, ignore_f0=True):
    """Get input, output pairs for supervised learning training or testing.

    Parameters
    ----------
    dt : float
        Sampling step size for the gesture and
    gesfile : str
        Path to a .ges gesture file (XML format).
    audio_f : function
        A function that will be applied to the audio stream
    audio_fargs : dict, optional
        Keyword arguments that will be provided to ``audio_f``.
        By default, audio, sampling rate, and dt will be provided.
    wavfile : str, optional
        A .wav file that corresponds to the ``gesfile``.
        If specified but the file does not exist, it will be generated.
        If not specified, audio will be synthesized but not saved.
    """
    gs = parse_ges(gesfile, ignore_f0=ignore_f0)
    y = gs.trajectory(dt=dt)

    if wavfile is None:
        audio, fs = synthesize(gesfile)
    elif not os.path.exists(wavfile):
        synthesize(gesfile, wavfile)
        audio, fs = sf.read(wavfile)
    else:
        audio, fs = sf.read(wavfile)

    audio_fargs = {} if audio_fargs is None else audio_fargs.copy()
    audio_fargs.update({'audio': audio, 'fs': fs, 'dt': dt})
    x = audio_f(**audio_fargs)

    # For some reason, the wav file size and the gesture trajectory size
    # are often off by one or two. Here, we lengthen or shorten ``y``,
    # assuming that VTL is doing it correctly.
    # Not sure if that assumption is correct.
    if x.shape[0] > y.shape[0]:
        # Extend y by n timesteps
        toadd = np.tile(y[np.newaxis, -1], (x.shape[0] - y.shape[0], 1))
        y = np.concatenate((y, toadd))
    if x.shape[0] < y.shape[0]:
        # Shorten y by n timesteps
        todelete = list(range(x.shape[0], y.shape[0]))
        y = np.delete(y, todelete, 0)

    assert x.shape[0] == y.shape[0], "Misaligned; %s %s" % (x.shape, y.shape)
    return x, y, fs
Esempio n. 6
0
def show_file_hystogram(filename):
#	data, sample_rate = sf.read(filename) # load the data
	data, sample_rate = sf.read(filename) # load the data
	#a = data.T[0] # this is a two channel soundtrack, I get the first track
	b=[(ele/2**8.)*2-1 for ele in data] # this is 8-bit track, b is now normalized on [-1,1)
	#print("vector more than 0 vals number: {0}".format(len(c)))
	c = np.fft.fft(b) # calculate fourier transform (complex numbers list)
	d = len(c)/2 - 1 # you only need half of the fft list (real signal symmetry)
	#d = len(c)  # you only need half of the fft list (real signal symmetry)
	#print("complex val: {0} abs(complex val): {1}".format(c[0], abs(c[0])))

	k = np.arange(d)
	fs = 8000 # 8kHz
	T = d/fs
	frqLabel = k/T

	c = c[:d]
	c = abs(c)
	c = [round(i, 1) for i in c]

	print("vector dimensionality: {0}".format(d))
	print("min: {0} max:{1}".format(min(c), max(c)))
		
	plt.gca().set_ylim([min(c), 20])
	plt.plot(frqLabel, c, 'g') 
	plt.show()
Esempio n. 7
0
def experimental_random_segmentation(audio_input, segments, options, sr):
    """
		(branch mir-dev en Sonidos Mutantes)
        Segmenta con valores aleatorios según opciones
    """
    outputPath = options['outputPath']    
    min_dur,max_dur = options['duration']

    try:
        x = read(audio_input)[0]
        for i in range(segments):
            while(1):
                pos = random.uniform(0.,2.) #posición en el archivo normalizada    
                dur = random.uniform(min_dur,max_dur) 
                durSamples = dur*sr
                posSamples = int( pos*len(x) )
                if posSamples+durSamples<len(x):
                    break

            signalOut = x[pos:pos+durSamples]
            baseName = os.path.splitext(filename)[0].split('/')[-1]
            if not os.path.exists(outputPath):                         
                os.makedirs(outputPath)                                
                print("Creating samples directory")
                time.sleep(4) 
            outputFilename = outputPath+'/'+baseName+'_sample'+str(i)+'.wav'
            write_file(outputFilename,signalOut,sr)
            print(("File generated: %s"%outputFilename))
            time.sleep(1)
    except Exception as e:
        print(("Error: %s"%e))
Esempio n. 8
0
def test_buffer_write_with_bytes(sf_stereo_w):
    b = b"\x01\x00\xFF\xFF\xFF\x00\x00\xFF"
    sf_stereo_w.buffer_write(b, 'short')
    sf_stereo_w.close()
    data, fs = sf.read(filename_new, dtype='int16')
    assert np.all(data == [[1, -1], [255, -256]])
    assert fs == 44100
Esempio n. 9
0
    def test_process_multiple(self):
        keyword_file_names = ['alexa', 'americano', 'avocado', 'blueberry', 'bumblebee', 'caterpillar', 'christina',
                              'dragonfly', 'flamingo', 'francesca', 'grapefruit', 'grasshopper', 'iguana', 'picovoice',
                              'pineapple', 'porcupine', 'raspberry', 'terminator', 'vancouver']

        keyword_file_paths = [
            self._abs_path('../../resources/keyword_files/%s_%s.ppn' % (name, self._keyword_file_extension())) for name in keyword_file_names]

        porcupine = Porcupine(
            library_path=self._library_path(),
            model_file_path=self._abs_path('../../lib/common/porcupine_params.pv'),
            keyword_file_paths=keyword_file_paths,
            sensitivities=[0.5] * len(keyword_file_paths))

        audio, sample_rate = soundfile.read(
            self._abs_path('../../resources/audio_samples/multiple_keywords.wav'),
            dtype='int16')
        assert sample_rate == porcupine.sample_rate

        num_frames = len(audio) // porcupine.frame_length
        results = []
        for i in range(num_frames):
            frame = audio[i * porcupine.frame_length:(i + 1) * porcupine.frame_length]
            result = porcupine.process(frame)
            if result >= 0:
                results.append(result)

        self.assertEqual(results, [15, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18])

        porcupine.delete()
Esempio n. 10
0
def test_write_int_data_to_float_file(file_inmemory):
    """This is a very uncommon use case."""
    sf.write(file_inmemory, data_mono, 44100, format='WAV', subtype='FLOAT')
    file_inmemory.seek(0)
    read, fs = sf.read(file_inmemory, always_2d=False, dtype='float32')
    assert np.all(read == data_mono)
    assert fs == 44100
Esempio n. 11
0
def test_rplus_append_data(sf_stereo_rplus):
    sf_stereo_rplus.seek(0, sf.SEEK_END)
    sf_stereo_rplus.write(data_stereo / 2)
    sf_stereo_rplus.close()
    data, fs = sf.read(filename_new)
    assert np.all(data[:len(data_stereo)] == data_stereo)
    assert np.all(data[len(data_stereo):] == data_stereo / 2)
Esempio n. 12
0
def importRirs(downloadDir, insertIntoDbF):
    url = "http://www.openslr.org/resources/13/RWCP.tar.gz"
    filename = join(downloadDir, "rwcp.tar.gz")
    unpackDir = join(downloadDir, "rwcp")

    dl = util.FileDownloader(url, filename)
    dl.download()
    dl.unpackTo(unpackDir)

    files = []
    for root, dirnames, filenames in os.walk(join(unpackDir, "RWCP/micarray/MICARRAY/data1")):
        for filename in filenames:
            if filename[-2:] != ".1":
                continue  # we only use the front microphone
            files.append(join(root, filename))

    pattern = re.compile("(circle|cirline)\/(\w{3})\/imp(\d{3})")

    bar = util.ConsoleProgressBar()
    bar.start("Import RWCP")
    for i, file in enumerate(sorted(files)):  # we sort to get same identifiers cross-platform
        m = pattern.search(file)
        assert m, "Could parse room from path ({})".format(file)
        room = m.group(2)
        identifier = "{:04d}_{}_{}".format(i, room.lower(), m.group(3))

        x, fs = sf.read(file, dtype="float32", **RawFormat)
        x /= max(abs(x))
        x = (3 ** 15 * x).astype(np.int16)

        insertIntoDbF((x, fs), identifier, {"source": "RWCP", "room": room})
        bar.progress(i / len(files))
    bar.end()
Esempio n. 13
0
def update_max_len(file_path_list, max_len):
    tmp_max_len = 0
    # Update the max length based on the given dataset
    signal_set = set()
    for file_path in file_path_list:
        file_list = open(file_path)
        for line in file_list:
            line = line.strip().split()
            if len(line) < 2:
                print 'Wrong audio list file record in the line:', line
                continue
            file_str = line[0]
            if file_str in signal_set:
                continue
            signal_set.add(file_str)
            signal, rate = sf.read(file_str)  # signal: sample values,rate: sample rate
            if len(signal.shape) > 1:
                signal = signal[:, 0]
            if rate != FRAME_RATE:
                # up-sample or down-sample for predefined sample rate
                signal = resampy.resample(signal, rate, FRAME_RATE, filter='kaiser_fast')
            if len(signal) > tmp_max_len:
                tmp_max_len = len(signal)
        file_list.close()
    if tmp_max_len < max_len:
        max_len = tmp_max_len
    return max_len
Esempio n. 14
0
 def _process_function(self, track, user_function, estimates_dir, evaluate):
     # load estimates from disk instead of processing
     if user_function is None:
         track_estimate_dir = op.join(
             estimates_dir,
             track.subset,
             track.filename
         )
         user_results = {}
         for target_path in glob.glob(track_estimate_dir + '/*.wav'):
             target_name = op.splitext(
                 os.path.basename(target_path)
             )[0]
             try:
                 target_audio, rate = sf.read(
                     target_path,
                     always_2d=True
                 )
                 user_results[target_name] = target_audio
             except RuntimeError:
                 pass
     else:
         # call the user provided function
         user_results = user_function(track)
     if estimates_dir and not evaluate and user_function is not None:
         self._save_estimates(user_results, track, estimates_dir)
     if evaluate:
         self._evaluate_estimates(user_results, track)
Esempio n. 15
0
    def play(self, file_path):

        if self.convert:
            self.convert_mp3_to_wav(file_path_mp3=file_path)
        data, fs = sf.read(file_path)
        sd.play(data, fs)
        sd.wait()
Esempio n. 16
0
def do_segmentation(audio_input, audio_input_from_filename = True, audio_input_from_array = False, sec_len = 6, save_file = True):

    lenght = int(sec_len) * 10

    if audio_input_from_filename == True:                                           
        x = read(audio_input)[0]
    if (audio_input_from_filename == False) and audio_input_from_array == True:                                           
        x = audio_input

    retriever = MIR(x, 44100)

    frame_size = 4096

    hop_size = 1024
 
    segments = [len(frame) / 44100 for frame in retriever.FrameGenerator()]

    output = []
    for segment in segments:                                           
        sample = int(segment*44100) 
        output.append(x[:sample*lenght]) #extend duration of segment

    output = choice(output)                                           

    if save_file == True:                                          
        baseName = os.path.splitext(audio_input)[0].split('/')[-1]                                                                       
        outputFilename = 'samples'+'/'+baseName+'_sample'+'.wav'                                                       
        write_file(outputFilename, 44100, output)
        print(("File generated: %s"%outputFilename))
    if save_file == False:
        return output
Esempio n. 17
0
def get_data(rootdir = TIMIT_main_dir):	
	inputs = []
	targets = []
	for dir_path, sub_dirs, files in os.walk(rootdir):
		for file in files:	        
			if (os.path.join(dir_path, file)).endswith('.wav'):
				wav_file_name = os.path.join(dir_path, file)
				input_data, f_s = sf.read(wav_file_name)
				# mfcc_feat = MFCC_input(mfcc(input_data,f_s))
				mfcc_feat = mfcc(input_data,f_s)
				#Delta features
				delta_feat = mfcc_feat[:-1]-mfcc_feat[1:]
				#Delta-Delta features
				deltadelta_feat = delta_feat[:-1]-delta_feat[1:]

				#Removing the first two frames
				mfcc_feat = mfcc_feat[2:]
				delta_feat = delta_feat[1:]

				#Concatenating mfcc, delta and delta-delta features
				full_input = np.concatenate((mfcc_feat,delta_feat,deltadelta_feat), axis=1)

				inputs.append(np.asarray(full_input, dtype=theano.config.floatX))#Rakeshvar wants one frame along each column but i am using Lasagne

				text_file_name = wav_file_name[:-4] + '.txt'
				target_data_file = open(text_file_name)
				target_data = str(target_data_file.read()).lower().translate(None, '!:,".;?')
				# target_data = str(target_data_file.read()).lower().translate(str.maketrans('','', '!:,".;?'))
				target_data = target_data[8:-1]#No '.' in lexfree dictionary
				targets.append(target_data)
	return inputs, targets
Esempio n. 18
0
def test_buffer_write(sf_stereo_w):
    buf = np.array([[1, 2], [-1, -2]], dtype='int16')
    sf_stereo_w.buffer_write(buf, 'short')
    sf_stereo_w.close()
    data, fs = sf.read(filename_new, dtype='int16')
    assert np.all(data == buf)
    assert fs == 44100
Esempio n. 19
0
 def _transform(self, row):
   if len(row) == 7:
     path, channel, name, spkid, dataset, start_time, end_time = row
   else:
     path, channel, name, spkid, dataset = row[:5]
     start_time = None
     end_time = None
   # ====== read audio ====== #
   # for voxceleb1
   if dataset == 'voxceleb1':
     with open(path, 'rb') as f:
       y, sr = sf.read(f)
       y = pp.signal.resample(y, sr_orig=sr, sr_new=8000,
                              best_algorithm=True)
       sr = 8000
   # for sre, fisher and swb
   elif (dataset[:3] == 'sre' or
    dataset == 'swb' or
    dataset == 'fisher'):
     with open(path, 'rb') as f:
       y, sr = sf.read(f)
       y = pp.signal.resample(y, sr_orig=sr, sr_new=8000,
                              best_algorithm=True)
       if y.ndim == 2:
         y = y[:, int(channel)]
       sr = 8000
   # all other dataset: mix6, voxceleb2
   else:
     y, sr = pp.signal.anything2wav(inpath=path, outpath=None,
                                    channel=channel,
                                    dataset=dataset,
                                    start=start_time, end=end_time,
                                    sample_rate=Config.SAMPLE_RATE,
                                    return_data=True)
   # ====== error happen ignore file ====== #
   if len(y) == 0:
     return None
   # ====== remove DC offset ====== #
   y = y - np.mean(y, 0)
   duration = max(y.shape) / sr
   ret = {'raw': y, 'sr': sr, 'duration': duration, # in second
          'path': path,
          'spkid': spkid,
          'name': name,
          'dsname': dataset}
   return ret
Esempio n. 20
0
def load_bgd_wav(file_path):
    signal, rate = sf.read(file_path)  # signal: sample values,rate: sample rate
    if len(signal.shape) > 1:
        signal = signal[:, 0]
    if rate != FRAME_RATE:
        # up-sample or down-sample for predefined sample rate
        signal = resampy.resample(signal, rate, FRAME_RATE, filter='kaiser_fast')
    return signal
Esempio n. 21
0
def open_sound_and_normalise(path):
    """
    returns mono audio of given samplerate
    """
    orig_samples, orig_samplerate = soundfile.read(path)
    ratio = orig_samplerate / samplerate
    samples = orig_samples[::ratio, 0]
    return samples
Esempio n. 22
0
 def getData(self, params):
     ticker = params['ticker']
     import soundfile
     sig, samplerate = soundfile.read(ticker + ".wav")
     df = pd.Series({"filename": ticker, "length": len(sig),
         "samplerate": samplerate})
     df = df.to_frame().transpose()
     return df
Esempio n. 23
0
def __rubberband(y, sr, **kwargs):
    '''Execute rubberband

    Parameters
    ----------
    y : np.ndarray [shape=(n,) or (n, c)]
        Audio time series, either single or multichannel

    sr : int > 0
        sampling rate of y

    **kwargs
        keyword arguments to rubberband

    Returns
    -------
    y_mod : np.ndarray [shape=(n,) or (n, c)]
        `y` after rubberband transformation

    '''

    assert sr > 0

    # Get the input and output tempfile
    fd, infile = tempfile.mkstemp(suffix='.wav')
    os.close(fd)
    fd, outfile = tempfile.mkstemp(suffix='.wav')
    os.close(fd)

    # dump the audio
    sf.write(infile, y, sr)

    try:
        # Execute rubberband
        arguments = ['rubberband', '-q']

        for key, value in six.iteritems(kwargs):
            arguments.append(str(key))
            arguments.append(str(value))

        arguments.extend([infile, outfile])

        subprocess.check_call(arguments)

        # Load the processed audio.
        y_out, _ = sf.read(outfile, always_2d=True)

        # make sure that output dimensions matches input
        if y.ndim == 1:
            y_out = np.squeeze(y_out)

    finally:
        # Remove temp files
        os.unlink(infile)
        os.unlink(outfile)
        pass

    return y_out
def getAllFeatures(featureType, wavFileList, samplerate=16000,winlen=0.0256,winstep=0.01, 
                  nfilt=40, nfft=512,lowfreq=133.3333,highfreq=6855.4976,preemph=0.97,
                  winSzForDelta=2, numcep=13, ceplifter=22, appendEnergy=True):
    '''
    Computes all features of a given numpy vector of file paths to .wav files. Reads the
    wav files specified in 'wavFileList' the package 'PySoundFile'.
    PySoundFile is able to read the format of the files from TIMIT database.
    See: http://pysoundfile.readthedocs.org/en/0.7.0/ and
    https://github.com/bastibe/PySoundFile

    For other parameters see function getFeatures, once signal is read from path,
    signal and other parameters are forwarded to 'getFeatures'
    :parameters:
        - featureType: either 'mfcc' or 'logFB'
        - wavFileList: list of file paths
        - samplerate
        - winlen
        - winstep
        - nfilt
        - nfft
        - lowfreq
        - highfreq
        - preemph
        - winSzForDelta
    :returns:
        - featureList: numpy vector of np.arrays
        list of same length as input wavFileList, dimensions of every element
        of the list specified by signal duration and winstep (1st dim), and
        number of filters (2nd dim)
    '''
        
    featureList = []
    for f in wavFileList:
        signal, _ = sf.read(f)
        # equalize rms --> same power in all speech signals. Note that later features will be normalised
        # to have zero mean and unit variance, but that is w.r.t all signals. Before, make sure that signals
        # have same energy.
        rms = np.sqrt(np.mean(np.square(signal)))
        signal=signal/rms
        if featureType == 'mfcc':
            featureList.append(mfccFeatures(
                signal=signal,samplerate=samplerate,winlen=winlen,
                winstep=winstep, nfilt=nfilt,nfft=nfft,lowfreq=lowfreq,
                highfreq=highfreq,preemph=preemph, winSzForDelta=winSzForDelta, 
                numcep=numcep, ceplifter=ceplifter, appendEnergy=appendEnergy))
        elif featureType == 'logFB':
            featureList.append(logFilterbankFeatures(
                signal=signal,samplerate=samplerate,winlen=winlen,winstep=winstep,
                nfilt=nfilt,nfft=nfft,lowfreq=lowfreq,highfreq=highfreq,preemph=preemph,
                winSzForDelta=winSzForDelta))
        elif featureType == 'FB':
            featureList.append(filterbankFeatures(
                signal=signal,samplerate=samplerate,winlen=winlen,winstep=winstep,
                nfilt=nfilt,nfft=nfft,lowfreq=lowfreq,highfreq=highfreq,preemph=preemph,
                winSzForDelta=winSzForDelta))
        else:
            raise ValueError
    return np.array(featureList)
Esempio n. 25
0
File: ad-ltsd.py Progetto: jlep/vad
def compute_vad(args):
    filename, path, resultpath = args
    signame = os.path.basename(os.path.splitext(filename)[0])
    ids = signame.split("_")
    print("computing: "+path+filename)
    bg_signal, rate = soundfile.read(path+filename)
    ltsd = LTSD_VAD()
    bg_signal=bg_signal[:2000]
    print(bg_signal)
    ltsd.init_params_by_noise(rate, bg_signal)
    signal, rate = soundfile.read(path+filename)
    #vaded_signal = ltsd.filter(signal)
    segments, sig_len = ltsd.segments(signal)
    #seconds = float(len(sig))/rate
    res_name = resultpath+"/ad-ltsd_"+os.path.basename(os.path.splitext(filename)[0])+".txt"
    segments = librosa.core.samples_to_time(segments, rate).tolist()
    len_s = librosa.core.samples_to_time(sig_len, rate)
    write_results(segments, res_name, len_s)
Esempio n. 26
0
 def Load(cls, filename):
     data, samplingrate = soundfile.read(file="%s.%s" % (filename, cls.ending))
     if numpy.size(data) == len(data):   # single channel files are imported into a one dimensional row array, so len and size are the same. These need not be transposed
         channels = (data,)
     else:
         channels = numpy.transpose(data)
     return sumpf.Signal(channels=channels,
                         samplingrate=samplingrate,
                         labels=[str(" ".join([filename.split(os.sep)[-1], str(c + 1)])) for c in range(len(data))])
Esempio n. 27
0
def test_wplus_read_written_data(sf_stereo_wplus):
    sf_stereo_wplus.write(data_stereo)
    assert sf_stereo_wplus.seek(0, sf.SEEK_CUR) == len(data_stereo)
    sf_stereo_wplus.seek(0)
    assert np.all(sf_stereo_wplus.read() == data_stereo)
    assert sf_stereo_wplus.seek(0, sf.SEEK_CUR) == len(data_stereo)
    sf_stereo_wplus.close()
    data, fs = sf.read(filename_new)
    assert np.all(data == data_stereo)
Esempio n. 28
0
 def audio(self, audio_):
     if is_string(audio_):
         # Assuming this is a wav file
         audio_, fs = sf.read(audio_)
         self.fs = fs
     assert is_array(audio_)
     if audio_.ndim == 1:
         audio_ = audio_[:, np.newaxis]
     self.mfcc.audio = audio_
     self.periphery.sound_process = ArrayProcess(audio_)
Esempio n. 29
0
 def __init__(self, filename):
     data, self.sr = sf.read(filename)
     self.rawdata = np.array(data)
     if len(self.rawdata.shape) == 1:
         self.frames = self.rawdata.shape[0]
         self.data = np.array(self.rawdata)
     else:
         self.frames, self.channels = self.rawdata.shape
         self.data = self.rawdata[:,0]
     self.length = self.frames/self.sr
Esempio n. 30
0
    def readFromAudioFile(filename, mono=False):
        '''
        Calls audiolab to generate Sound object from wav and aiff files.
        If mono is true, returns the left channel only.
        '''
        data, fs = sf.read(filename)

        if (len(data.shape) == 2 and mono):
            return Sound(data[:, 0], fs, filename)
        else:
            return Sound(data, fs, filename)
Esempio n. 31
0
import numpy as np
import matplotlib.pyplot as plt
import soundfile as sf
from SingleMic import segment_overlap as s_o
from SingleMic import inverse_segment_overlap as i_s_o
import time

start_time = time.time()

#Variables
tsegment = 20e-3  #20ms segment
overlap = 0.5

#Import data & fs
data, fs = sf.read('Audio/clean.wav')

# Calc
s_segment = int(tsegment * fs)
s_overlap = int(overlap * s_segment)
# pad data with zeros
remainder = s_segment - (len(data) % s_segment)
data_extended = np.ravel(
    np.asmatrix(np.pad(data, (0, int(remainder)), 'constant')))

x_array = s_o.segment_overlap(data_extended, s_segment, s_overlap)
x_truncarray = i_s_o.inverse_segment_overlap(x_array, len(data_extended),
                                             s_segment, s_overlap)

#calculate difference between initial and reconstructed signals
residual = data_extended - x_truncarray
Esempio n. 32
0
 def read_audio(path):
     filepath = get_abs_path(src_path, path)
     return soundfile.read(filepath)
    :param extract: (Function) extraction method to use
    :param multi: (bool) specify if several samples can be extract
        from one audio file
    :param audio_dir: (str) directory where the audio files are located
    :return: (features:list, labels:list)
    """
    features = []
    speakers = []

    for index, row in data.iterrows():
        audio_name = row.loc[AUDIO]
        speaker = row.loc[SPEAKER_ID]
        audio, samplerate = sf.read(audio_dir + audio_name)
        audio_extracts = segment_audio(audio, samplerate)

        if not multi:
            audio_extracts = audio_extracts[0:1]

        for audio_extract in audio_extracts:
            # extract the features using the given extraction function
            features.append(extract(audio_extract, samplerate))
            speakers.append(speaker)

    return features, speakers


if __name__ == '__main__':
    audio, sp = sf.read("database/dev/audio/aahtm.flac")
    sp_audio = segment_audio(audio, sp)
    lpc = extract_with_lpc(audio, sp)
Esempio n. 34
0
    def wav_read(wav_file):

        wav_data, sr = sf.read(wav_file, dtype='int16')

        return wav_data, sr
Esempio n. 35
0
import sounddevice as sd
import numpy as np
import soundfile as sf
import matplotlib.pyplot as plt
import time
import os
from scipy import signal


# In[2]:
files  = os.listdir(r'C:\Users\Hp\Desktop\music\written')
s = 'C:/Users/Hp/Desktop/music/written/'

duration = 5  # seconds
fs = 44100
sd.default.device = 1
print('Started recording')
myrecording = sd.rec(int(duration * fs), samplerate=fs, channels=1)
sd.wait()
print('Stopped recording')
m = []
for i in range(len(files)):
    data, samplerate = sf.read(s+files[i])
    a = signal.correlate(data,myrecording[0::20,0])
    m.append(np.max(abs(a)))
I = m.index(max(m))
print(files[I])
Esempio n. 36
0
import soundfile as sf
import numpy as np
from utils import signals_to_string

filepath = 'result.wav'
data, _ = sf.read(filepath)

print(len(data))
size = len(data)
data = set(["{:.6f}".format(d) for d in data])

rev_data = []

for i in range(size):
    k = np.sin(i * 439.97 / 44100 * (2 * np.pi))
    ks = "{:.6f}".format(k)
    if ks in data:
        rev_data.append(k)
    else:
        rev_data.append("x")

signal = []
for i in range(0, len(rev_data), 2000):
    if rev_data[i:i + 2000].count("x") > 500:
        signal.append(0)
    else:
        signal.append(1)

print(len(rev_data))
print(signal)
print(signals_to_string(signal))
Esempio n. 37
0
def find_peaks(ramec):
    t = np.zeros(100)
    N = len(ramec)
    prev_val = 0
    curr_val = 0
    n_val = 0
    index = int(0)
    for i in range(N):

        if (curr_val > prev_val and curr_val < n_val):
            t[index] = curr_val


znely_ramec = 45

s, fs = sf.read('xsiska16.wav')

print("Pocet vzorkov: ", s.size)

max = max(s)
min = min(s)

print("max ", max)
print("min ", min)

t = np.arange(s.size) / fs

time = s.size / fs

print("Cas nahravky v sekundach: ", time)
Esempio n. 38
0
def playsound(filename):
    data, fs = soundfile.read(filename, dtype='float32')
    sounddevice.play(data, fs)
    status = sounddevice.wait()
Esempio n. 39
0
 def size_hours(self):
     return sum(
         soundfile.read(self.get(i)[0])[0].size / (16000 * 3600)
         for i in range(self.size()))
Esempio n. 40
0
# From specs
samplingRateVar = rootgrp.createVariable('Data.SamplingRate', 'f8', ('I'))
samplingRateVar.Units = 'hertz'
samplingRateVar[:] = 44100

# No delay found
delayVar = rootgrp.createVariable('Data.Delay', 'f8', ('I', 'R', 'E'))
delay = np.zeros((I, R, E))
delayVar[:, :, :] = delay

# Parse the audio files...
dataIRVar = rootgrp.createVariable('Data.IR', 'f8', ('M', 'R', 'E', 'N'))
dataIRVar.ChannelOrdering = 'fuma'
dataIRVar.Normalization = 'fuma'

audioFilesPath = '/Volumes/Dinge/audio/S3A_original/MainChurch/Soundfield/'
for e in range(E):

    fileIdx = e + 1  # Numeration starts at 1
    fileName = 'ls' + str(fileIdx) + '.wav'

    # Open the audio file
    data, samplerate = sf.read(audioFilesPath + fileName)
    assert samplerate == 44100
    assert np.shape(data) == (65536, 4)

    dataIRVar[:, :, e, :] = data

#----------Close it----------#

rootgrp.close()
Esempio n. 41
0
                    self.w)
                if (len(frame) != self.wl):
                    frame = np.concatenate(
                        (frame, np.zeros((self.wl - len(frame)))))
                #print('min '+str(self.bw(self.gama[i]))+' max '+str(self.ew(self.gama[i]))+' sigma ' +'min '+str(self.bw(self.sigma[i]))+' max '+str(self.ew(self.sigma[i])))
                self.y[self.bw(self.gama[i]):(
                    self.ew(self.gama[i]))] = self.y[self.bw(self.gama[i]):(
                        self.ew(self.gama[i]))] + frame
        except:
            print('El factor de escalamiento no funciona')


Fs = 41000  #frecuencia de sampleo
f = 20
timeVector = np.arange(0, 1, 1 / Fs)
Audio, Fs = sf.read('guitarra.wav')
#Audio= (np.sin(2*pi*f*timeVector) + np.sin(2*50*pi*f*timeVector) + np.sin(2*100*pi*f*timeVector))/3
#Audio= np.sin(2*pi*f*timeVector)

abc = ola()
abc.run(Audio, 2)
sd.play(abc.y, Fs)
sd.wait()
#sf.write('speech_dobleDuracion.wav',abc.y,Fs)

n = len(Audio)
timeVector = np.arange(0, n * (1 / Fs), 1 / Fs)

n = len(Audio)
frecVector = fftfreq(n)
espectroVector = fft(Audio)
Esempio n. 42
0
 def file_to_text(self, filename):
     audio_input, samplerate = sf.read(filename)
     assert samplerate == 16000
     return self.buffer_to_text(audio_input)
Esempio n. 43
0
'''demo for using sound device and sound file:
    Taken from: https://python-sounddevice.readthedocs.io/en/0.2.1/examples.html
'''

import argparse
import logging

# To use, cd into helpers directory, run >> python demo/sound_card_demo.py "filename"
# Example: python demo/sound_card_demo.py "../static/sounds/chime.wav"

parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument("filename", help="audio file to be played back")
parser.add_argument("-d", "--device", type=int, help="device ID")
args = parser.parse_args()

try:
    import sounddevice as sd
    import soundfile as sf
    devices = sd.query_devices()
    print(devices)
    data, fs = sf.read(args.filename, dtype='float32')
    sd.play(data, fs, device=args.device, blocking=True)
    status = sd.get_status()
    if status:
        logging.warning(str(status))
except BaseException as e:
    # This avoids printing the traceback, especially if Ctrl-C is used.
    raise SystemExit(str(e))
Esempio n. 44
0
def asr_worker(text_queue: Queue, run: Value, done_loading: Value):
    try:
        import sounddevice as sd
        import soundfile as sf

        # Initialise CUDA.
        cuda.init()
        device = cuda.Device(0)
        ctx = device.make_context()

        # Load the QuartzNet ASR model.
        logging.info('Loading QuartzNet model for ASR...')
        featurizer = MelFeaturizer()
        quartznet = QuartzNet()

        # Initialise the Decoder.
        logging.info('Loading CTC Beam Decoder...')
        decoder = Decoder(model_path='models/lm/3_gram_lm.trie',
                          alpha=1,
                          beta=0.5)

        with done_loading.get_lock():
            done_loading.value = 1

        chunk_size = 1 * 16000
        n_past_chunks = 5
        past_chunks_size = chunk_size * (n_past_chunks - 1)
        activation_words = ['jarvis', 'jervis']
        beep, _ = sf.read('assets/wav/beep.wav', dtype='float32')
        peeb = np.ascontiguousarray(np.flip(beep))
        activation_waveform = np.zeros((n_past_chunks * chunk_size, 1),
                                       dtype=np.float32)
        in_stream = sd.InputStream(samplerate=16000, channels=1)
        out_stream = sd.OutputStream(samplerate=44100, channels=2)
        in_stream.start()
        out_stream.start()

        while run.value:
            # Read waveform from the microphone and store in the rolling buffer.
            data, overflowed = in_stream.read(chunk_size)
            if overflowed:
                logging.warning('ASR process is skipping microphone frames!')
            activation_waveform = np.roll(activation_waveform, -chunk_size)
            activation_waveform[past_chunks_size:, 0] = data[:, 0]
            # Run ASR.
            token_probs = quartznet(featurizer(activation_waveform.T))
            decoded = decoder(token_probs)
            # If the keyword was said...
            if any([word in decoded for word in activation_words]):
                logging.info('ASR triggered!')
                # Play a beep sound.
                out_stream.write(beep)
                # Read waveform from the microphone.
                _data = in_stream.read(5 * 16000)[0]
                # Play a peeb sound.
                out_stream.write(peeb)
                # Run ASR.
                token_probs = quartznet(featurizer(_data.T))
                decoded = decoder(token_probs)
                # Add the recognised text to the text queue and reset the activation waveform buffer.
                logging.info(f'ASR recognised: "{decoded}".')
                text_queue.put(decoded)
                activation_waveform *= 0
    except KeyboardInterrupt:
        pass

    ctx.pop()
    in_stream.stop()
    out_stream.stop()
Esempio n. 45
0
 def test_call(self):
     sample, sr = sf.read(f"{CWD}/tests/test_data/test-clean/61/70968/61-70968-0000.flac")
     path = f"{CWD}/tests/test_data/test-clean/61/70968/61-70968-0000.flac"
     res, sr = self.add_noise(path)
     self.assertEqual(res.shape, sample.shape)
     sf.write("foo.flac", res, sr)
Esempio n. 46
0
 def test_stereo_to_mono(self):
     sample, sr = sf.read("test_data/UrbanSound8K/audio/fold1/118279-8-0-5.wav")
     res = stereo_to_mono(sample)
     self.assertEqual(res.shape, (192000,))
Esempio n. 47
0
 def test_match_to_speech(self):
     sample, sr = sf.read("test_data/UrbanSound8K/audio/fold1/118279-8-0-5.wav")
     func = create_match_to_speech(noise_sr=44_100, speech_sr=16_000)
     res = func(sample)
     print(type(res))
Esempio n. 48
0
def _trim(input_folder, sound_list, output_folder):
    for sound in sound_list:
        data, sample_rate = sf.read(os.path.join(input_folder, sound))
        sf.write(os.path.join(output_folder, sound), data[:(sample_rate * 5)],
                 sample_rate)
Esempio n. 49
0
def sampler(video_1, video_2, rate=32, augment=False, precompute=False, include_metadata=False):
    """Sample one frame from video_file, with 50% chance sample one second from corresponding audio_file,
       50% chance sample one second from another audio_file in the list of audio_files.

    Args:
        video_1: dict for candidate video to sample from
        video_2: dict for candidate video to sample from

    Keyword Args:
        rate: Poisson rate parameter. Used for precomputing samples
        augment: If True, perform data augmention
        precompute: If True, precompute samples during initialization so that
                    memory can be discarded

    Returns:
        A generator that yields dictionary of video sample, audio sample,
        and label (0: not from corresponding files, 1: from corresponding files)

    """
    video_file_1 = video_1['video_filepath']
    video_file_2 = video_2['video_filepath']
    audio_file_1 = video_1['audio_filepath']
    audio_file_2 = video_2['audio_filepath']

    debug_msg = 'Initializing streamer with videos "{}" and "{}"'
    LOGGER.debug(debug_msg.format(video_file_1, video_file_2))

    # Hack: choose a number of samples such that we with high probability, we
    #       won't run out of samples, but is also less than the entire length of
    #       the video so we don't have to resize all of the frames
    num_samples = int(scipy.stats.poisson.ppf(0.999, rate))


    try:
        with LogTimer(LOGGER, 'Reading video'):
            video_data_1 = read_video(video_file_1)
    except Exception as e:
        warn_msg = 'Could not open video file {} - {}: {}; Skipping...'
        warn_msg = warn_msg.format(video_file_1, type(e), e)
        LOGGER.warning(warn_msg)
        warnings.warn(warn_msg)
        raise StopIteration()

    try:
        with LogTimer(LOGGER, 'Reading video'):
            video_data_2 = read_video(video_file_2)
    except Exception as e:
        warn_msg = 'Could not open video file {} - {}: {}; Skipping...'
        warn_msg = warn_msg.format(video_file_2, type(e), e)
        LOGGER.warning(warn_msg)
        warnings.warn(warn_msg)
        raise StopIteration()

    try:
        with LogTimer(LOGGER, 'Reading audio'):
            audio_data_1, sampling_frequency = sf.read(audio_file_1,
                                                       dtype='int16',
                                                       always_2d=True)
            audio_data_1 = audio_data_1.mean(axis=-1).astype('int16')

    except Exception as e:
        warn_msg = 'Could not open audio file {} - {}: {}; Skipping...'
        warn_msg = warn_msg.format(audio_file_1, type(e), e)
        LOGGER.warning(warn_msg)
        warnings.warn(warn_msg)
        raise StopIteration()

    try:
        with LogTimer(LOGGER, 'Reading audio'):
            audio_data_2, sampling_frequency = sf.read(audio_file_2,
                                                       dtype='int16',
                                                       always_2d=True)
            audio_data_2 = audio_data_2.mean(axis=-1).astype('int16')
    except Exception as e:
        warn_msg = 'Could not open audio file {} - {}: {}; Skipping...'
        warn_msg = warn_msg.format(audio_file_2, type(e), e)
        LOGGER.warning(warn_msg)
        warnings.warn(warn_msg)
        raise StopIteration()

    if precompute:
        samples = []
        for _ in range(num_samples):
            sample = generate_sample(
                audio_file_1, audio_data_1, audio_file_2, audio_data_2,
                video_file_1, video_data_1, video_file_2, video_data_2,
                sampling_frequency, augment=augment, include_metadata=include_metadata)

            samples.append(sample)

        # Clear the data from memory
        video_data_1 = None
        video_data_2 = None
        audio_data_1 = None
        audio_data_2 = None
        video_data = None
        audio_data = None
        del video_data_1
        del video_data_2
        del audio_data_1
        del audio_data_2
        del video_data
        del audio_data

        while samples:
            # Yield the sample, and remove from the list to free up some memory
            yield samples.pop()
    else:
        while True:
            yield generate_sample(
                audio_file_1, audio_data_1, audio_file_2, audio_data_2,
                video_file_1, video_data_1, video_file_2, video_data_2,
                sampling_frequency, augment=augment, include_metadata=include_metadata)

    raise StopIteration()
Esempio n. 50
0
            print("Warning: cupy is not installed. 'gpu' argument should be set to -1. Switched to CPU.\n")
            import numpy as xp

    separater = AR_FastMNMF2(
        n_source=args.n_source,
        n_basis=args.n_basis,
        xp=xp,
        init_SCM=args.init_SCM,
        n_tap_AR=args.n_tap_AR,
        n_delay_AR=args.n_delay_AR,
        n_bit=args.n_bit,
        algo=args.algo,
        n_iter_init=args.n_iter_init
    )

    wav, sample_rate = sf.read(args.input_fname)
    wav /= np.abs(wav).max() * 1.2
    M = min(len(wav), args.n_mic)
    spec_FTM = MultiSTFT(wav[:, :M], n_fft=args.n_fft)

    separater.file_id = args.file_id
    separater.load_spectrogram(spec_FTM, sample_rate)
    separater.solve(
        n_iter=args.n_iter,
        save_dir="./",
        save_likelihood=False,
        save_param=False,
        save_wav=True,
        interval_save=5,
    )
Esempio n. 51
0
def play_from_file(file):
    data, fs = soundfile.read(file)
    sd.play(data, fs, device=sd.default.device)
    status = sd.wait()
Esempio n. 52
0
def process(args):

    f0_max = 1100.0
    f0_min = 50.0

    frame_shift = args.shift_size / 1000

    hop_length = int(args.sr * frame_shift)

    lab_list = os.listdir(args.labdir)
    phone_set = []
    idscp = {}
    index = 1
    for lab in lab_list:
        lab_id = lab[:-4]
        idscp[lab_id] = index

        segments, phone = load_label(
            os.path.join(args.labdir, lab),
            s_type=args.label_type,
            sr=args.sr,
            frame_shift=frame_shift,
            sil=args.sil,
        )

        for p in phone:
            if p not in phone_set:
                phone_set.append(p)

        wav_path = os.path.join(args.wavdir, lab_id + "." + args.wav_extention)
        if args.wav_extention == "raw":
            signal, osr = sf.read(
                wav_path,
                subtype="PCM_16",
                channels=1,
                samplerate=args.sr,
                endian="LITTLE",
            )
        else:
            signal, osr = librosa.load(wav_path, sr=None)

        if osr != args.sr:
            signal = librosa.resample(signal, osr, args.sr)

        song_align = os.path.join(args.outdir, "alignment")
        song_wav = os.path.join(args.outdir, "wav_info", str(index))
        song_pitch_beat = os.path.join(args.outdir, "pitch_beat_extraction",
                                       str(index))

        if not os.path.exists(song_align):
            os.makedirs(song_align)
        if not os.path.exists(song_wav):
            os.makedirs(song_wav)
        if not os.path.exists(song_pitch_beat):
            os.makedirs(song_pitch_beat)
        print("processing {}".format(song_wav))
        for seg in segments.keys():
            alignment = segments[seg]["alignment"]
            start = segments[seg]["start"]
            name = seg
            seg_signal = signal[int(start *
                                    hop_length):int(start * hop_length +
                                                    len(alignment) *
                                                    hop_length)]
            """extract beats"""
            tempo, beats = librosa.beat.beat_track(y=seg_signal,
                                                   sr=args.sr,
                                                   hop_length=hop_length)
            # times = librosa.frames_to_time(beats, sr=args.sr)
            # frames = librosa.time_to_frames(
            #     times, sr=args.sr, hop_length=hop_length, n_fft=n_fft
            # )
            np.save(
                os.path.join(song_pitch_beat, name) + "_beats",
                np.array(beats))
            """extract pitch"""
            seg_signal = seg_signal.astype("double")
            _f0, t = pw.harvest(
                seg_signal,
                args.sr,
                f0_floor=f0_min,
                f0_ceil=f0_max,
                frame_period=frame_shift * 1000,
            )
            _f0 = pw.stonemask(seg_signal, _f0, t, args.sr)

            np.save(
                os.path.join(song_pitch_beat, name) + "_pitch", np.array(_f0))

            alignment_id = np.zeros((len(alignment)))
            for i in range(len(alignment)):
                alignment_id[i] = phone_set.index(alignment[i])
            np.save(
                os.path.join(song_align,
                             pack_zero(index) + name),
                np.array(alignment_id),
            )

            sf.write(os.path.join(song_wav, name) + ".wav",
                     seg_signal,
                     samplerate=args.sr)
            print("saved {}".format(os.path.join(song_wav, name) + ".wav"))
        index += 1

    with open(os.path.join(args.outdir, "phone_set.txt"), "w") as f:
        for p_id, p in enumerate(phone_set):
            f.write(str(p_id) + " " + p)
            f.write("\n")
Esempio n. 53
0
                pl.show()
                '''

        # Plot Spectrogram for each detection, plot to screen and to file
        DETECTION_THRESHOLD = 2.0
        SPECTROGRAM_NFFT = 1024
        SPECTROGRAM_STEP = 128
        print("Events detected from statistical deviation:")
        for d in detections_final:
            if d[2] > DETECTION_THRESHOLD:
                print('  {}: {:>3.1f} {:>5.1f}'.format(hhmmss(
                    d[0]), d[1], d[2]))  # start_time, duration, significance
                start_i = d[3] - DETECTION_CONTEXT_N
                stop_i = d[4] + 1 + DETECTION_CONTEXT_N
                wave = sf.read(wave_file,
                               start=start_i * BLOCK_DURATION_N,
                               stop=stop_i * BLOCK_DURATION_N)[0]
                wave = fixup_wave(wave, f.samplerate)
                # Apply noise cancellation to signal
                wave = filter_signal(wave, f.samplerate)
                # Generate plot of wave, spectrogram, and power and save as image file
                pl.subplot(3, 1, 1)
                pl.plot(wave)
                pl.title('start={}, dur={:.1f}, sig={:.1f}'.format(
                    hhmmss(d[0]), d[1], d[2]))
                # start_time, duration, significance
                pl.xticks([])
                pl.subplot(3, 1, 2)
                pl.specgram(wave,
                            SPECTROGRAM_NFFT,
                            f.samplerate,
Esempio n. 54
0
    os.makedirs(cropped_threshold_dir)

fc = 3000 / 22050
b, a = signal.butter(10, fc, 'low')

for file_name in os.listdir(files_dir):

    direction = re.findall("\d+", file_name)[0]

    paths = capsule_path_difference(polar, [1.5, 0, direction])
    max_channel, min_channel, max_onset, min_onset = 0, 0, 0, sys.maxsize

    onsets = []

    # Read wav file
    data, fs = sf.read(os.path.join(files_dir, file_name))

    closest = np.argmin(paths)
    # low pass filter at fc
    filtered = signal.filtfilt(b, a, data.T[closest])
    filter_max = max(filtered)
    closest_onset = next(x[0] for x in enumerate(filtered)
                         if abs(x[1]) == filter_max)
    time = [n / fs for n in range(filtered.size)]

    onsets_distance = np.array([
        round(((paths[i] - paths[closest]) / c) * fs) + closest_onset
        for i in range(len(paths))
    ],
                               dtype='int32')
import sys
import numpy as np
import math
import librosa
import soundfile as sf
import json
from librosa.core.spectrum import power_to_db
import scipy

file_path = sys.argv[1]
data, samplerate = sf.read(file_path)
#data = np.clip(data*3, -1, 1)

with open("MfccConfig.json", "r") as f:
    config = json.load(f)

frame_size = config['frame_size']
frame_step = config['frame_step']
n_fft = config['n_fft']
n_mels = config['mfcc_bank_cnt']
fmin = config['fmin']
fmax = config['fmax']
dtype = config.get('dtype', "int")
high_prec = config.get('use_high_prec', False) or dtype == "fix32_scal"
use_power = False
rad4 = round(math.log(n_fft // 2, 4)) == math.log(n_fft // 2, 4)
ndct = config.get('n_dct', False)

from librosa.filters import get_window
from librosa import util
librosa_fft_window = get_window("hann", frame_size, fftbins=True)
Esempio n. 56
0
 def load_audio_from_path(self, wav_path):
     assert os.path.isfile(wav_path) and wav_path.endswith('.wav')
     samples, _ = soundfile.read(wav_path, dtype="int16")
     self.samples = samples.tolist()
Esempio n. 57
0
import soundfile as sf
import torch
from transformers import Wav2Vec2ForCTC, Wav2Vec2Tokenizer
from torch.utils.mobile_optimizer import optimize_for_mobile

tokenizer = Wav2Vec2Tokenizer.from_pretrained("facebook/wav2vec2-base-960h")
model = Wav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-base-960h")
model.eval()

audio_input, _ = sf.read("scent_of_a_woman_future.wav")
input_values = tokenizer(audio_input, return_tensors="pt").input_values
logits = model(input_values).logits
predicted_ids = torch.argmax(logits, dim=-1)
transcription = tokenizer.batch_decode(predicted_ids)[0]

model_dynamic_quantized = torch.quantization.quantize_dynamic(
    model, qconfig_spec={torch.nn.Linear}, dtype=torch.qint8)
traced_quantized_model = torch.jit.trace(model_dynamic_quantized,
                                         input_values,
                                         strict=False)
optimized_traced_quantized_model = optimize_for_mobile(traced_quantized_model)
optimized_traced_quantized_model.save("wav2vec2.pt")
 def map_to_array(batch):
     speech, _ = sf.read(batch["file"])
     batch["speech"] = speech
     return batch
Esempio n. 59
0
 def read_wav(self, bytes):
     waveform, sample_rate = sf.read(BytesIO(bytes), dtype="float32")
     return waveform, sample_rate
import playsound
import soundfile as sf
import numpy as np
#import matplotlib.pyplot as plt

x, fs = sf.read('carrie1.wav')
audio = x

#=================================================================================#

fc = 4000
M = 20
wc = (2*np.pi*fc)/fs


#=================================================================================#

w = np.hamming(M)[:M-1]
hd = []
for n in range(M-1):
 hd.insert(n, (wc/np.pi)* np.sinc((wc/np.pi)*(n-(M/2))))

#=================================================================================#

h = hd*w

#=================================================================================#

audio_filtrado = np.convolve(h, audio)