Ejemplos de read en Python, ejemplos de soundfile.read en Python

Ejemplo n.º 1

0

Mostrar archivo

Archivo: metronomiconic.py Proyecto: trueneu/metronomiconic

 def _load_sounds(self):
     self._bar_audio_data, self._bar_audio_fs = soundfile.read(self._bar_file)
     self._beat_audio_data, self._beat_audio_fs = soundfile.read(self._beat_file)
     if self._division_file:
         self._division_audio_data, self._division_audio_fs = soundfile.read(self._division_file)
     else:
         self._division_audio_data, self._division_audio_fs = None, None

Ejemplo n.º 2

0

Mostrar archivo

Archivo: ad-ltsd.py Proyecto: jlep/vad

def main():
    #fs, bg_signal = wavfile.read(sys.argv[1])
    if argv[1] == 'batch':
        files = []
        for f in os.listdir(argv[2]):
            if os.path.splitext(f)[1] == ".flac":
                files.append(f)
        args = [(f, argv[2], argv[3]) for f in files]
        pool = multiprocessing.Pool(12)
        r = pool.map_async(compute_vad, args)
        r.wait()
        pool.close()
        pool.join()
        #for a in args:
        #    compute_vad(a)
    else:
        bg_signal, fs = soundfile.read(argv[1])
        ltsd = LTSD_VAD()
        bg_signal=bg_signal[:2000]
        print(bg_signal)
        ltsd.init_params_by_noise(fs, bg_signal)
        signal, fs = soundfile.read(argv[1])
        #vaded_signal = ltsd.filter(signal)
        segments, sig_len = ltsd.segments(signal)
        print(ltsd.segments(signal)[0])

Ejemplo n.º 3

0

Mostrar archivo

Archivo: test_pysoundfile.py Proyecto: bastibe/PySoundFile

def test_write_non_seekable_file(file_w):
    with sf.SoundFile(file_w, 'w', 44100, 1, format='XI') as f:
        assert not f.seekable()
        assert f.frames == 0
        f.write(data_mono)
        assert f.frames == len(data_mono)

        with pytest.raises(RuntimeError) as excinfo:
            f.seek(2)
        assert "unseekable" in str(excinfo.value)

    with sf.SoundFile(filename_new) as f:
        assert not f.seekable()
        assert f.frames == len(data_mono)
        data = f.read(3, dtype='int16')
        assert np.all(data == data_mono[:3])
        data = f.read(666, dtype='int16')
        assert np.all(data == data_mono[3:])

        with pytest.raises(RuntimeError) as excinfo:
            f.seek(2)
        assert "unseekable" in str(excinfo.value)

        with pytest.raises(ValueError) as excinfo:
            f.read()
        assert "frames" in str(excinfo.value)

    data, fs = sf.read(filename_new, dtype='int16')
    assert np.all(data == data_mono)
    assert fs == 44100

    with pytest.raises(ValueError) as excinfo:
        sf.read(filename_new, start=3)
    assert "start is only allowed for seekable files" in str(excinfo.value)

Ejemplo n.º 4

0

Mostrar archivo

Archivo: test_pysoundfile.py Proyecto: dewabayu/PySoundFile

def test_read_into_non_contiguous_out(file_stereo_r):
    out = np.empty(data_stereo.shape[::-1], dtype='float64')
    if getattr(sys, 'pypy_version_info', (999,)) < (2, 6):
        # The test for C-contiguous doesn't work with PyPy 2.5.0
        sf.read(file_stereo_r, out=out.T)
    else:
        with pytest.raises(ValueError) as excinfo:
            sf.read(file_stereo_r, out=out.T)
        assert "C-contiguous" in str(excinfo.value)

Ejemplo n.º 5

0

Mostrar archivo

Archivo: __init__.py Proyecto: tbekolay/phd

def get_traindata(gesfile, audio_f, dt,
                  audio_fargs=None, wavfile=None, ignore_f0=True):
    """Get input, output pairs for supervised learning training or testing.

    Parameters
    ----------
    dt : float
        Sampling step size for the gesture and
    gesfile : str
        Path to a .ges gesture file (XML format).
    audio_f : function
        A function that will be applied to the audio stream
    audio_fargs : dict, optional
        Keyword arguments that will be provided to ``audio_f``.
        By default, audio, sampling rate, and dt will be provided.
    wavfile : str, optional
        A .wav file that corresponds to the ``gesfile``.
        If specified but the file does not exist, it will be generated.
        If not specified, audio will be synthesized but not saved.
    """
    gs = parse_ges(gesfile, ignore_f0=ignore_f0)
    y = gs.trajectory(dt=dt)

    if wavfile is None:
        audio, fs = synthesize(gesfile)
    elif not os.path.exists(wavfile):
        synthesize(gesfile, wavfile)
        audio, fs = sf.read(wavfile)
    else:
        audio, fs = sf.read(wavfile)

    audio_fargs = {} if audio_fargs is None else audio_fargs.copy()
    audio_fargs.update({'audio': audio, 'fs': fs, 'dt': dt})
    x = audio_f(**audio_fargs)

    # For some reason, the wav file size and the gesture trajectory size
    # are often off by one or two. Here, we lengthen or shorten ``y``,
    # assuming that VTL is doing it correctly.
    # Not sure if that assumption is correct.
    if x.shape[0] > y.shape[0]:
        # Extend y by n timesteps
        toadd = np.tile(y[np.newaxis, -1], (x.shape[0] - y.shape[0], 1))
        y = np.concatenate((y, toadd))
    if x.shape[0] < y.shape[0]:
        # Shorten y by n timesteps
        todelete = list(range(x.shape[0], y.shape[0]))
        y = np.delete(y, todelete, 0)

    assert x.shape[0] == y.shape[0], "Misaligned; %s %s" % (x.shape, y.shape)
    return x, y, fs

Ejemplo n.º 6

0

Mostrar archivo

Archivo: wav_hystogramm_sf.py Proyecto: denis-plotnikov/code

def show_file_hystogram(filename):
#	data, sample_rate = sf.read(filename) # load the data
	data, sample_rate = sf.read(filename) # load the data
	#a = data.T[0] # this is a two channel soundtrack, I get the first track
	b=[(ele/2**8.)*2-1 for ele in data] # this is 8-bit track, b is now normalized on [-1,1)
	#print("vector more than 0 vals number: {0}".format(len(c)))
	c = np.fft.fft(b) # calculate fourier transform (complex numbers list)
	d = len(c)/2 - 1 # you only need half of the fft list (real signal symmetry)
	#d = len(c)  # you only need half of the fft list (real signal symmetry)
	#print("complex val: {0} abs(complex val): {1}".format(c[0], abs(c[0])))

	k = np.arange(d)
	fs = 8000 # 8kHz
	T = d/fs
	frqLabel = k/T

	c = c[:d]
	c = abs(c)
	c = [round(i, 1) for i in c]

	print("vector dimensionality: {0}".format(d))
	print("min: {0} max:{1}".format(min(c), max(c)))
		
	plt.gca().set_ylim([min(c), 20])
	plt.plot(frqLabel, c, 'g') 
	plt.show()

Ejemplo n.º 7

0

Mostrar archivo

Archivo: RandomSegmentation.py Proyecto: MarsCrop/apicultor

def experimental_random_segmentation(audio_input, segments, options, sr):
    """
		(branch mir-dev en Sonidos Mutantes)
        Segmenta con valores aleatorios según opciones
    """
    outputPath = options['outputPath']    
    min_dur,max_dur = options['duration']

    try:
        x = read(audio_input)[0]
        for i in range(segments):
            while(1):
                pos = random.uniform(0.,2.) #posición en el archivo normalizada    
                dur = random.uniform(min_dur,max_dur) 
                durSamples = dur*sr
                posSamples = int( pos*len(x) )
                if posSamples+durSamples<len(x):
                    break

            signalOut = x[pos:pos+durSamples]
            baseName = os.path.splitext(filename)[0].split('/')[-1]
            if not os.path.exists(outputPath):                         
                os.makedirs(outputPath)                                
                print("Creating samples directory")
                time.sleep(4) 
            outputFilename = outputPath+'/'+baseName+'_sample'+str(i)+'.wav'
            write_file(outputFilename,signalOut,sr)
            print(("File generated: %s"%outputFilename))
            time.sleep(1)
    except Exception as e:
        print(("Error: %s"%e))

Ejemplo n.º 8

0

Mostrar archivo

Archivo: test_pysoundfile.py Proyecto: dewabayu/PySoundFile

def test_buffer_write_with_bytes(sf_stereo_w):
    b = b"\x01\x00\xFF\xFF\xFF\x00\x00\xFF"
    sf_stereo_w.buffer_write(b, 'short')
    sf_stereo_w.close()
    data, fs = sf.read(filename_new, dtype='int16')
    assert np.all(data == [[1, -1], [255, -256]])
    assert fs == 44100

Ejemplo n.º 9

0

Mostrar archivo

Archivo: test_porcupine.py Proyecto: 2vin/pg_voice

    def test_process_multiple(self):
        keyword_file_names = ['alexa', 'americano', 'avocado', 'blueberry', 'bumblebee', 'caterpillar', 'christina',
                              'dragonfly', 'flamingo', 'francesca', 'grapefruit', 'grasshopper', 'iguana', 'picovoice',
                              'pineapple', 'porcupine', 'raspberry', 'terminator', 'vancouver']

        keyword_file_paths = [
            self._abs_path('../../resources/keyword_files/%s_%s.ppn' % (name, self._keyword_file_extension())) for name in keyword_file_names]

        porcupine = Porcupine(
            library_path=self._library_path(),
            model_file_path=self._abs_path('../../lib/common/porcupine_params.pv'),
            keyword_file_paths=keyword_file_paths,
            sensitivities=[0.5] * len(keyword_file_paths))

        audio, sample_rate = soundfile.read(
            self._abs_path('../../resources/audio_samples/multiple_keywords.wav'),
            dtype='int16')
        assert sample_rate == porcupine.sample_rate

        num_frames = len(audio) // porcupine.frame_length
        results = []
        for i in range(num_frames):
            frame = audio[i * porcupine.frame_length:(i + 1) * porcupine.frame_length]
            result = porcupine.process(frame)
            if result >= 0:
                results.append(result)

        self.assertEqual(results, [15, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18])

        porcupine.delete()

Ejemplo n.º 10

0

Mostrar archivo

Archivo: test_pysoundfile.py Proyecto: dewabayu/PySoundFile

def test_write_int_data_to_float_file(file_inmemory):
    """This is a very uncommon use case."""
    sf.write(file_inmemory, data_mono, 44100, format='WAV', subtype='FLOAT')
    file_inmemory.seek(0)
    read, fs = sf.read(file_inmemory, always_2d=False, dtype='float32')
    assert np.all(read == data_mono)
    assert fs == 44100

Ejemplo n.º 11

0

Mostrar archivo

Archivo: test_pysoundfile.py Proyecto: dewabayu/PySoundFile

def test_rplus_append_data(sf_stereo_rplus):
    sf_stereo_rplus.seek(0, sf.SEEK_END)
    sf_stereo_rplus.write(data_stereo / 2)
    sf_stereo_rplus.close()
    data, fs = sf.read(filename_new)
    assert np.all(data[:len(data_stereo)] == data_stereo)
    assert np.all(data[len(data_stereo):] == data_stereo / 2)

Ejemplo n.º 12

0

Mostrar archivo

Archivo: Rwcp.py Proyecto: Marvin182/rir-database

def importRirs(downloadDir, insertIntoDbF):
    url = "http://www.openslr.org/resources/13/RWCP.tar.gz"
    filename = join(downloadDir, "rwcp.tar.gz")
    unpackDir = join(downloadDir, "rwcp")

    dl = util.FileDownloader(url, filename)
    dl.download()
    dl.unpackTo(unpackDir)

    files = []
    for root, dirnames, filenames in os.walk(join(unpackDir, "RWCP/micarray/MICARRAY/data1")):
        for filename in filenames:
            if filename[-2:] != ".1":
                continue  # we only use the front microphone
            files.append(join(root, filename))

    pattern = re.compile("(circle|cirline)\/(\w{3})\/imp(\d{3})")

    bar = util.ConsoleProgressBar()
    bar.start("Import RWCP")
    for i, file in enumerate(sorted(files)):  # we sort to get same identifiers cross-platform
        m = pattern.search(file)
        assert m, "Could parse room from path ({})".format(file)
        room = m.group(2)
        identifier = "{:04d}_{}_{}".format(i, room.lower(), m.group(3))

        x, fs = sf.read(file, dtype="float32", **RawFormat)
        x /= max(abs(x))
        x = (3 ** 15 * x).astype(np.int16)

        insertIntoDbF((x, fs), identifier, {"source": "RWCP", "room": room})
        bar.progress(i / len(files))
    bar.end()

Ejemplo n.º 13

0

Mostrar archivo

Archivo: config.py Proyecto: zhaoforever/ASAM

def update_max_len(file_path_list, max_len):
    tmp_max_len = 0
    # Update the max length based on the given dataset
    signal_set = set()
    for file_path in file_path_list:
        file_list = open(file_path)
        for line in file_list:
            line = line.strip().split()
            if len(line) < 2:
                print 'Wrong audio list file record in the line:', line
                continue
            file_str = line[0]
            if file_str in signal_set:
                continue
            signal_set.add(file_str)
            signal, rate = sf.read(file_str)  # signal: sample values，rate: sample rate
            if len(signal.shape) > 1:
                signal = signal[:, 0]
            if rate != FRAME_RATE:
                # up-sample or down-sample for predefined sample rate
                signal = resampy.resample(signal, rate, FRAME_RATE, filter='kaiser_fast')
            if len(signal) > tmp_max_len:
                tmp_max_len = len(signal)
        file_list.close()
    if tmp_max_len < max_len:
        max_len = tmp_max_len
    return max_len

Ejemplo n.º 14

0

Mostrar archivo

Archivo: __init__.py Proyecto: faroit/dsdtools

 def _process_function(self, track, user_function, estimates_dir, evaluate):
     # load estimates from disk instead of processing
     if user_function is None:
         track_estimate_dir = op.join(
             estimates_dir,
             track.subset,
             track.filename
         )
         user_results = {}
         for target_path in glob.glob(track_estimate_dir + '/*.wav'):
             target_name = op.splitext(
                 os.path.basename(target_path)
             )[0]
             try:
                 target_audio, rate = sf.read(
                     target_path,
                     always_2d=True
                 )
                 user_results[target_name] = target_audio
             except RuntimeError:
                 pass
     else:
         # call the user provided function
         user_results = user_function(track)
     if estimates_dir and not evaluate and user_function is not None:
         self._save_estimates(user_results, track, estimates_dir)
     if evaluate:
         self._evaluate_estimates(user_results, track)

Ejemplo n.º 15

0

Mostrar archivo

Archivo: sounddeviceplayer.py Proyecto: igorstarki/kalliope

    def play(self, file_path):

        if self.convert:
            self.convert_mp3_to_wav(file_path_mp3=file_path)
        data, fs = sf.read(file_path)
        sd.play(data, fs)
        sd.wait()

Ejemplo n.º 16

0

Mostrar archivo

Archivo: DoSegmentation.py Proyecto: MarsCrop/apicultor

def do_segmentation(audio_input, audio_input_from_filename = True, audio_input_from_array = False, sec_len = 6, save_file = True):

    lenght = int(sec_len) * 10

    if audio_input_from_filename == True:                                           
        x = read(audio_input)[0]
    if (audio_input_from_filename == False) and audio_input_from_array == True:                                           
        x = audio_input

    retriever = MIR(x, 44100)

    frame_size = 4096

    hop_size = 1024
 
    segments = [len(frame) / 44100 for frame in retriever.FrameGenerator()]

    output = []
    for segment in segments:                                           
        sample = int(segment*44100) 
        output.append(x[:sample*lenght]) #extend duration of segment

    output = choice(output)                                           

    if save_file == True:                                          
        baseName = os.path.splitext(audio_input)[0].split('/')[-1]                                                                       
        outputFilename = 'samples'+'/'+baseName+'_sample'+'.wav'                                                       
        write_file(outputFilename, 44100, output)
        print(("File generated: %s"%outputFilename))
    if save_file == False:
        return output

Ejemplo n.º 17

0

Mostrar archivo

Archivo: TIMIT_utils.py Proyecto: KGPML/KGP-ASR

def get_data(rootdir = TIMIT_main_dir):	
	inputs = []
	targets = []
	for dir_path, sub_dirs, files in os.walk(rootdir):
		for file in files:	        
			if (os.path.join(dir_path, file)).endswith('.wav'):
				wav_file_name = os.path.join(dir_path, file)
				input_data, f_s = sf.read(wav_file_name)
				# mfcc_feat = MFCC_input(mfcc(input_data,f_s))
				mfcc_feat = mfcc(input_data,f_s)
				#Delta features
				delta_feat = mfcc_feat[:-1]-mfcc_feat[1:]
				#Delta-Delta features
				deltadelta_feat = delta_feat[:-1]-delta_feat[1:]

				#Removing the first two frames
				mfcc_feat = mfcc_feat[2:]
				delta_feat = delta_feat[1:]

				#Concatenating mfcc, delta and delta-delta features
				full_input = np.concatenate((mfcc_feat,delta_feat,deltadelta_feat), axis=1)

				inputs.append(np.asarray(full_input, dtype=theano.config.floatX))#Rakeshvar wants one frame along each column but i am using Lasagne

				text_file_name = wav_file_name[:-4] + '.txt'
				target_data_file = open(text_file_name)
				target_data = str(target_data_file.read()).lower().translate(None, '!:,".;?')
				# target_data = str(target_data_file.read()).lower().translate(str.maketrans('','', '!:,".;?'))
				target_data = target_data[8:-1]#No '.' in lexfree dictionary
				targets.append(target_data)
	return inputs, targets

Ejemplo n.º 18

0

Mostrar archivo

Archivo: test_pysoundfile.py Proyecto: dewabayu/PySoundFile

def test_buffer_write(sf_stereo_w):
    buf = np.array([[1, 2], [-1, -2]], dtype='int16')
    sf_stereo_w.buffer_write(buf, 'short')
    sf_stereo_w.close()
    data, fs = sf.read(filename_new, dtype='int16')
    assert np.all(data == buf)
    assert fs == 44100

Ejemplo n.º 19

0

Mostrar archivo

Archivo: feature_recipes.py Proyecto: imito/odin

 def _transform(self, row):
   if len(row) == 7:
     path, channel, name, spkid, dataset, start_time, end_time = row
   else:
     path, channel, name, spkid, dataset = row[:5]
     start_time = None
     end_time = None
   # ====== read audio ====== #
   # for voxceleb1
   if dataset == 'voxceleb1':
     with open(path, 'rb') as f:
       y, sr = sf.read(f)
       y = pp.signal.resample(y, sr_orig=sr, sr_new=8000,
                              best_algorithm=True)
       sr = 8000
   # for sre, fisher and swb
   elif (dataset[:3] == 'sre' or
    dataset == 'swb' or
    dataset == 'fisher'):
     with open(path, 'rb') as f:
       y, sr = sf.read(f)
       y = pp.signal.resample(y, sr_orig=sr, sr_new=8000,
                              best_algorithm=True)
       if y.ndim == 2:
         y = y[:, int(channel)]
       sr = 8000
   # all other dataset: mix6, voxceleb2
   else:
     y, sr = pp.signal.anything2wav(inpath=path, outpath=None,
                                    channel=channel,
                                    dataset=dataset,
                                    start=start_time, end=end_time,
                                    sample_rate=Config.SAMPLE_RATE,
                                    return_data=True)
   # ====== error happen ignore file ====== #
   if len(y) == 0:
     return None
   # ====== remove DC offset ====== #
   y = y - np.mean(y, 0)
   duration = max(y.shape) / sr
   ret = {'raw': y, 'sr': sr, 'duration': duration, # in second
          'path': path,
          'spkid': spkid,
          'name': name,
          'dsname': dataset}
   return ret

Ejemplo n.º 20

0

Mostrar archivo

Archivo: config.py Proyecto: zhaoforever/ASAM

def load_bgd_wav(file_path):
    signal, rate = sf.read(file_path)  # signal: sample values，rate: sample rate
    if len(signal.shape) > 1:
        signal = signal[:, 0]
    if rate != FRAME_RATE:
        # up-sample or down-sample for predefined sample rate
        signal = resampy.resample(signal, rate, FRAME_RATE, filter='kaiser_fast')
    return signal

Ejemplo n.º 21

0

Mostrar archivo

Archivo: extract.py Proyecto: gijzelaerr/sonicdeepdream

def open_sound_and_normalise(path):
    """
    returns mono audio of given samplerate
    """
    orig_samples, orig_samplerate = soundfile.read(path)
    ratio = orig_samplerate / samplerate
    samples = orig_samples[::ratio, 0]
    return samples

Ejemplo n.º 22

0

Mostrar archivo

Archivo: myapp.py Proyecto: sequana/presentations

 def getData(self, params):
     ticker = params['ticker']
     import soundfile
     sig, samplerate = soundfile.read(ticker + ".wav")
     df = pd.Series({"filename": ticker, "length": len(sig),
         "samplerate": samplerate})
     df = df.to_frame().transpose()
     return df

Ejemplo n.º 23

0

Mostrar archivo

Archivo: pyrb.py Proyecto: faroit/pyrubberband

def __rubberband(y, sr, **kwargs):
    '''Execute rubberband

    Parameters
    ----------
    y : np.ndarray [shape=(n,) or (n, c)]
        Audio time series, either single or multichannel

    sr : int > 0
        sampling rate of y

    **kwargs
        keyword arguments to rubberband

    Returns
    -------
    y_mod : np.ndarray [shape=(n,) or (n, c)]
        `y` after rubberband transformation

    '''

    assert sr > 0

    # Get the input and output tempfile
    fd, infile = tempfile.mkstemp(suffix='.wav')
    os.close(fd)
    fd, outfile = tempfile.mkstemp(suffix='.wav')
    os.close(fd)

    # dump the audio
    sf.write(infile, y, sr)

    try:
        # Execute rubberband
        arguments = ['rubberband', '-q']

        for key, value in six.iteritems(kwargs):
            arguments.append(str(key))
            arguments.append(str(value))

        arguments.extend([infile, outfile])

        subprocess.check_call(arguments)

        # Load the processed audio.
        y_out, _ = sf.read(outfile, always_2d=True)

        # make sure that output dimensions matches input
        if y.ndim == 1:
            y_out = np.squeeze(y_out)

    finally:
        # Remove temp files
        os.unlink(infile)
        os.unlink(outfile)
        pass

    return y_out

Ejemplo n.º 24

0

Mostrar archivo

Archivo: TimitFeatureExtraction.py Proyecto: Richi91/SpeechRecognition

def getAllFeatures(featureType, wavFileList, samplerate=16000,winlen=0.0256,winstep=0.01, 
                  nfilt=40, nfft=512,lowfreq=133.3333,highfreq=6855.4976,preemph=0.97,
                  winSzForDelta=2, numcep=13, ceplifter=22, appendEnergy=True):
    '''
    Computes all features of a given numpy vector of file paths to .wav files. Reads the
    wav files specified in 'wavFileList' the package 'PySoundFile'.
    PySoundFile is able to read the format of the files from TIMIT database.
    See: http://pysoundfile.readthedocs.org/en/0.7.0/ and
    https://github.com/bastibe/PySoundFile

    For other parameters see function getFeatures, once signal is read from path,
    signal and other parameters are forwarded to 'getFeatures'
    :parameters:
        - featureType: either 'mfcc' or 'logFB'
        - wavFileList: list of file paths
        - samplerate
        - winlen
        - winstep
        - nfilt
        - nfft
        - lowfreq
        - highfreq
        - preemph
        - winSzForDelta
    :returns:
        - featureList: numpy vector of np.arrays
        list of same length as input wavFileList, dimensions of every element
        of the list specified by signal duration and winstep (1st dim), and
        number of filters (2nd dim)
    '''
        
    featureList = []
    for f in wavFileList:
        signal, _ = sf.read(f)
        # equalize rms --> same power in all speech signals. Note that later features will be normalised
        # to have zero mean and unit variance, but that is w.r.t all signals. Before, make sure that signals
        # have same energy.
        rms = np.sqrt(np.mean(np.square(signal)))
        signal=signal/rms
        if featureType == 'mfcc':
            featureList.append(mfccFeatures(
                signal=signal,samplerate=samplerate,winlen=winlen,
                winstep=winstep, nfilt=nfilt,nfft=nfft,lowfreq=lowfreq,
                highfreq=highfreq,preemph=preemph, winSzForDelta=winSzForDelta, 
                numcep=numcep, ceplifter=ceplifter, appendEnergy=appendEnergy))
        elif featureType == 'logFB':
            featureList.append(logFilterbankFeatures(
                signal=signal,samplerate=samplerate,winlen=winlen,winstep=winstep,
                nfilt=nfilt,nfft=nfft,lowfreq=lowfreq,highfreq=highfreq,preemph=preemph,
                winSzForDelta=winSzForDelta))
        elif featureType == 'FB':
            featureList.append(filterbankFeatures(
                signal=signal,samplerate=samplerate,winlen=winlen,winstep=winstep,
                nfilt=nfilt,nfft=nfft,lowfreq=lowfreq,highfreq=highfreq,preemph=preemph,
                winSzForDelta=winSzForDelta))
        else:
            raise ValueError
    return np.array(featureList)

Ejemplo n.º 25

0

Mostrar archivo

Archivo: ad-ltsd.py Proyecto: jlep/vad

def compute_vad(args):
    filename, path, resultpath = args
    signame = os.path.basename(os.path.splitext(filename)[0])
    ids = signame.split("_")
    print("computing: "+path+filename)
    bg_signal, rate = soundfile.read(path+filename)
    ltsd = LTSD_VAD()
    bg_signal=bg_signal[:2000]
    print(bg_signal)
    ltsd.init_params_by_noise(rate, bg_signal)
    signal, rate = soundfile.read(path+filename)
    #vaded_signal = ltsd.filter(signal)
    segments, sig_len = ltsd.segments(signal)
    #seconds = float(len(sig))/rate
    res_name = resultpath+"/ad-ltsd_"+os.path.basename(os.path.splitext(filename)[0])+".txt"
    segments = librosa.core.samples_to_time(segments, rate).tolist()
    len_s = librosa.core.samples_to_time(sig_len, rate)
    write_results(segments, res_name, len_s)

Ejemplo n.º 26

0

Mostrar archivo

 def Load(cls, filename):
     data, samplingrate = soundfile.read(file="%s.%s" % (filename, cls.ending))
     if numpy.size(data) == len(data):   # single channel files are imported into a one dimensional row array, so len and size are the same. These need not be transposed
         channels = (data,)
     else:
         channels = numpy.transpose(data)
     return sumpf.Signal(channels=channels,
                         samplingrate=samplingrate,
                         labels=[str(" ".join([filename.split(os.sep)[-1], str(c + 1)])) for c in range(len(data))])

Ejemplo n.º 27

0

Mostrar archivo

Archivo: test_pysoundfile.py Proyecto: dewabayu/PySoundFile

def test_wplus_read_written_data(sf_stereo_wplus):
    sf_stereo_wplus.write(data_stereo)
    assert sf_stereo_wplus.seek(0, sf.SEEK_CUR) == len(data_stereo)
    sf_stereo_wplus.seek(0)
    assert np.all(sf_stereo_wplus.read() == data_stereo)
    assert sf_stereo_wplus.seek(0, sf.SEEK_CUR) == len(data_stereo)
    sf_stereo_wplus.close()
    data, fs = sf.read(filename_new)
    assert np.all(data == data_stereo)

Ejemplo n.º 28

0

Mostrar archivo

Archivo: sermo.py Proyecto: tbekolay/phd

 def audio(self, audio_):
     if is_string(audio_):
         # Assuming this is a wav file
         audio_, fs = sf.read(audio_)
         self.fs = fs
     assert is_array(audio_)
     if audio_.ndim == 1:
         audio_ = audio_[:, np.newaxis]
     self.mfcc.audio = audio_
     self.periphery.sound_process = ArrayProcess(audio_)

Ejemplo n.º 29

0

Mostrar archivo

Archivo: fileio.py Proyecto: peteraldaron/547take2

 def __init__(self, filename):
     data, self.sr = sf.read(filename)
     self.rawdata = np.array(data)
     if len(self.rawdata.shape) == 1:
         self.frames = self.rawdata.shape[0]
         self.data = np.array(self.rawdata)
     else:
         self.frames, self.channels = self.rawdata.shape
         self.data = self.rawdata[:,0]
     self.length = self.frames/self.sr

Ejemplo n.º 30

0

Mostrar archivo

Archivo: sound.py Proyecto: faroit/loudness

    def readFromAudioFile(filename, mono=False):
        '''
        Calls audiolab to generate Sound object from wav and aiff files.
        If mono is true, returns the left channel only.
        '''
        data, fs = sf.read(filename)

        if (len(data.shape) == 2 and mono):
            return Sound(data[:, 0], fs, filename)
        else:
            return Sound(data, fs, filename)

Ejemplo n.º 31

0

Mostrar archivo

Archivo: main.py Proyecto: bgenerowicz/Speech-Enhancement

import numpy as np
import matplotlib.pyplot as plt
import soundfile as sf
from SingleMic import segment_overlap as s_o
from SingleMic import inverse_segment_overlap as i_s_o
import time

start_time = time.time()

#Variables
tsegment = 20e-3  #20ms segment
overlap = 0.5

#Import data & fs
data, fs = sf.read('Audio/clean.wav')

# Calc
s_segment = int(tsegment * fs)
s_overlap = int(overlap * s_segment)
# pad data with zeros
remainder = s_segment - (len(data) % s_segment)
data_extended = np.ravel(
    np.asmatrix(np.pad(data, (0, int(remainder)), 'constant')))

x_array = s_o.segment_overlap(data_extended, s_segment, s_overlap)
x_truncarray = i_s_o.inverse_segment_overlap(x_array, len(data_extended),
                                             s_segment, s_overlap)

#calculate difference between initial and reconstructed signals
residual = data_extended - x_truncarray

Ejemplo n.º 32

0

Mostrar archivo

 def read_audio(path):
     filepath = get_abs_path(src_path, path)
     return soundfile.read(filepath)

Ejemplo n.º 33

0

Mostrar archivo

Archivo: extract.py Proyecto: laurenttainturier/voice_recognition

    :param extract: (Function) extraction method to use
    :param multi: (bool) specify if several samples can be extract
        from one audio file
    :param audio_dir: (str) directory where the audio files are located
    :return: (features:list, labels:list)
    """
    features = []
    speakers = []

    for index, row in data.iterrows():
        audio_name = row.loc[AUDIO]
        speaker = row.loc[SPEAKER_ID]
        audio, samplerate = sf.read(audio_dir + audio_name)
        audio_extracts = segment_audio(audio, samplerate)

        if not multi:
            audio_extracts = audio_extracts[0:1]

        for audio_extract in audio_extracts:
            # extract the features using the given extraction function
            features.append(extract(audio_extract, samplerate))
            speakers.append(speaker)

    return features, speakers


if __name__ == '__main__':
    audio, sp = sf.read("database/dev/audio/aahtm.flac")
    sp_audio = segment_audio(audio, sp)
    lpc = extract_with_lpc(audio, sp)

Ejemplo n.º 34

0

Mostrar archivo

Archivo: vggish_input.py Proyecto: ShreeshaN/COVID_19

    def wav_read(wav_file):

        wav_data, sr = sf.read(wav_file, dtype='int16')

        return wav_data, sr

Ejemplo n.º 35

0

Mostrar archivo

Archivo: detectingsong.py Proyecto: tarundecipher/AudioSync

import sounddevice as sd
import numpy as np
import soundfile as sf
import matplotlib.pyplot as plt
import time
import os
from scipy import signal


# In[2]:
files  = os.listdir(r'C:\Users\Hp\Desktop\music\written')
s = 'C:/Users/Hp/Desktop/music/written/'

duration = 5  # seconds
fs = 44100
sd.default.device = 1
print('Started recording')
myrecording = sd.rec(int(duration * fs), samplerate=fs, channels=1)
sd.wait()
print('Stopped recording')
m = []
for i in range(len(files)):
    data, samplerate = sf.read(s+files[i])
    a = signal.correlate(data,myrecording[0::20,0])
    m.append(np.max(abs(a)))
I = m.index(max(m))
print(files[I])

Ejemplo n.º 36

0

Mostrar archivo

import soundfile as sf
import numpy as np
from utils import signals_to_string

filepath = 'result.wav'
data, _ = sf.read(filepath)

print(len(data))
size = len(data)
data = set(["{:.6f}".format(d) for d in data])

rev_data = []

for i in range(size):
    k = np.sin(i * 439.97 / 44100 * (2 * np.pi))
    ks = "{:.6f}".format(k)
    if ks in data:
        rev_data.append(k)
    else:
        rev_data.append("x")

signal = []
for i in range(0, len(rev_data), 2000):
    if rev_data[i:i + 2000].count("x") > 500:
        signal.append(0)
    else:
        signal.append(1)

print(len(rev_data))
print(signal)
print(signals_to_string(signal))

Ejemplo n.º 37

0

Mostrar archivo

def find_peaks(ramec):
    t = np.zeros(100)
    N = len(ramec)
    prev_val = 0
    curr_val = 0
    n_val = 0
    index = int(0)
    for i in range(N):

        if (curr_val > prev_val and curr_val < n_val):
            t[index] = curr_val


znely_ramec = 45

s, fs = sf.read('xsiska16.wav')

print("Pocet vzorkov: ", s.size)

max = max(s)
min = min(s)

print("max ", max)
print("min ", min)

t = np.arange(s.size) / fs

time = s.size / fs

print("Cas nahravky v sekundach: ", time)

Ejemplo n.º 38

0

Mostrar archivo

Archivo: synthesize_example.py Proyecto: atriantafybbc/dc_tts

def playsound(filename):
    data, fs = soundfile.read(filename, dtype='float32')
    sounddevice.play(data, fs)
    status = sounddevice.wait()

Ejemplo n.º 39

0

Mostrar archivo

Archivo: dataset.py Proyecto: teddius/speech-to-text-benchmark

 def size_hours(self):
     return sum(
         soundfile.read(self.get(i)[0])[0].size / (16000 * 3600)
         for i in range(self.size()))

Ejemplo n.º 40

0

Mostrar archivo

Archivo: MainChurch.py Proyecto: jdemuynke/pysofaconventions

# From specs
samplingRateVar = rootgrp.createVariable('Data.SamplingRate', 'f8', ('I'))
samplingRateVar.Units = 'hertz'
samplingRateVar[:] = 44100

# No delay found
delayVar = rootgrp.createVariable('Data.Delay', 'f8', ('I', 'R', 'E'))
delay = np.zeros((I, R, E))
delayVar[:, :, :] = delay

# Parse the audio files...
dataIRVar = rootgrp.createVariable('Data.IR', 'f8', ('M', 'R', 'E', 'N'))
dataIRVar.ChannelOrdering = 'fuma'
dataIRVar.Normalization = 'fuma'

audioFilesPath = '/Volumes/Dinge/audio/S3A_original/MainChurch/Soundfield/'
for e in range(E):

    fileIdx = e + 1  # Numeration starts at 1
    fileName = 'ls' + str(fileIdx) + '.wav'

    # Open the audio file
    data, samplerate = sf.read(audioFilesPath + fileName)
    assert samplerate == 44100
    assert np.shape(data) == (65536, 4)

    dataIRVar[:, :, e, :] = data

#----------Close it----------#

rootgrp.close()

Ejemplo n.º 41

0

Mostrar archivo

Archivo: OLA.py Proyecto: 1c2019itbaelct/ASSD_TP_2

                    self.w)
                if (len(frame) != self.wl):
                    frame = np.concatenate(
                        (frame, np.zeros((self.wl - len(frame)))))
                #print('min '+str(self.bw(self.gama[i]))+' max '+str(self.ew(self.gama[i]))+' sigma ' +'min '+str(self.bw(self.sigma[i]))+' max '+str(self.ew(self.sigma[i])))
                self.y[self.bw(self.gama[i]):(
                    self.ew(self.gama[i]))] = self.y[self.bw(self.gama[i]):(
                        self.ew(self.gama[i]))] + frame
        except:
            print('El factor de escalamiento no funciona')


Fs = 41000  #frecuencia de sampleo
f = 20
timeVector = np.arange(0, 1, 1 / Fs)
Audio, Fs = sf.read('guitarra.wav')
#Audio= (np.sin(2*pi*f*timeVector) + np.sin(2*50*pi*f*timeVector) + np.sin(2*100*pi*f*timeVector))/3
#Audio= np.sin(2*pi*f*timeVector)

abc = ola()
abc.run(Audio, 2)
sd.play(abc.y, Fs)
sd.wait()
#sf.write('speech_dobleDuracion.wav',abc.y,Fs)

n = len(Audio)
timeVector = np.arange(0, n * (1 / Fs), 1 / Fs)

n = len(Audio)
frecVector = fftfreq(n)
espectroVector = fft(Audio)

Ejemplo n.º 42

0

Mostrar archivo

Archivo: inference.py Proyecto: marcalph/sr

 def file_to_text(self, filename):
     audio_input, samplerate = sf.read(filename)
     assert samplerate == 16000
     return self.buffer_to_text(audio_input)

Ejemplo n.º 43

0

Mostrar archivo

'''demo for using sound device and sound file:
    Taken from: https://python-sounddevice.readthedocs.io/en/0.2.1/examples.html
'''

import argparse
import logging

# To use, cd into helpers directory, run >> python demo/sound_card_demo.py "filename"
# Example: python demo/sound_card_demo.py "../static/sounds/chime.wav"

parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument("filename", help="audio file to be played back")
parser.add_argument("-d", "--device", type=int, help="device ID")
args = parser.parse_args()

try:
    import sounddevice as sd
    import soundfile as sf
    devices = sd.query_devices()
    print(devices)
    data, fs = sf.read(args.filename, dtype='float32')
    sd.play(data, fs, device=args.device, blocking=True)
    status = sd.get_status()
    if status:
        logging.warning(str(status))
except BaseException as e:
    # This avoids printing the traceback, especially if Ctrl-C is used.
    raise SystemExit(str(e))

Ejemplo n.º 44

0

Mostrar archivo

def asr_worker(text_queue: Queue, run: Value, done_loading: Value):
    try:
        import sounddevice as sd
        import soundfile as sf

        # Initialise CUDA.
        cuda.init()
        device = cuda.Device(0)
        ctx = device.make_context()

        # Load the QuartzNet ASR model.
        logging.info('Loading QuartzNet model for ASR...')
        featurizer = MelFeaturizer()
        quartznet = QuartzNet()

        # Initialise the Decoder.
        logging.info('Loading CTC Beam Decoder...')
        decoder = Decoder(model_path='models/lm/3_gram_lm.trie',
                          alpha=1,
                          beta=0.5)

        with done_loading.get_lock():
            done_loading.value = 1

        chunk_size = 1 * 16000
        n_past_chunks = 5
        past_chunks_size = chunk_size * (n_past_chunks - 1)
        activation_words = ['jarvis', 'jervis']
        beep, _ = sf.read('assets/wav/beep.wav', dtype='float32')
        peeb = np.ascontiguousarray(np.flip(beep))
        activation_waveform = np.zeros((n_past_chunks * chunk_size, 1),
                                       dtype=np.float32)
        in_stream = sd.InputStream(samplerate=16000, channels=1)
        out_stream = sd.OutputStream(samplerate=44100, channels=2)
        in_stream.start()
        out_stream.start()

        while run.value:
            # Read waveform from the microphone and store in the rolling buffer.
            data, overflowed = in_stream.read(chunk_size)
            if overflowed:
                logging.warning('ASR process is skipping microphone frames!')
            activation_waveform = np.roll(activation_waveform, -chunk_size)
            activation_waveform[past_chunks_size:, 0] = data[:, 0]
            # Run ASR.
            token_probs = quartznet(featurizer(activation_waveform.T))
            decoded = decoder(token_probs)
            # If the keyword was said...
            if any([word in decoded for word in activation_words]):
                logging.info('ASR triggered!')
                # Play a beep sound.
                out_stream.write(beep)
                # Read waveform from the microphone.
                _data = in_stream.read(5 * 16000)[0]
                # Play a peeb sound.
                out_stream.write(peeb)
                # Run ASR.
                token_probs = quartznet(featurizer(_data.T))
                decoded = decoder(token_probs)
                # Add the recognised text to the text queue and reset the activation waveform buffer.
                logging.info(f'ASR recognised: "{decoded}".')
                text_queue.put(decoded)
                activation_waveform *= 0
    except KeyboardInterrupt:
        pass

    ctx.pop()
    in_stream.stop()
    out_stream.stop()

Ejemplo n.º 45

0

Mostrar archivo

 def test_call(self):
     sample, sr = sf.read(f"{CWD}/tests/test_data/test-clean/61/70968/61-70968-0000.flac")
     path = f"{CWD}/tests/test_data/test-clean/61/70968/61-70968-0000.flac"
     res, sr = self.add_noise(path)
     self.assertEqual(res.shape, sample.shape)
     sf.write("foo.flac", res, sr)

Ejemplo n.º 46

0

Mostrar archivo

 def test_stereo_to_mono(self):
     sample, sr = sf.read("test_data/UrbanSound8K/audio/fold1/118279-8-0-5.wav")
     res = stereo_to_mono(sample)
     self.assertEqual(res.shape, (192000,))

Ejemplo n.º 47

0

Mostrar archivo

 def test_match_to_speech(self):
     sample, sr = sf.read("test_data/UrbanSound8K/audio/fold1/118279-8-0-5.wav")
     func = create_match_to_speech(noise_sr=44_100, speech_sr=16_000)
     res = func(sample)
     print(type(res))

Ejemplo n.º 48

0

Mostrar archivo

def _trim(input_folder, sound_list, output_folder):
    for sound in sound_list:
        data, sample_rate = sf.read(os.path.join(input_folder, sound))
        sf.write(os.path.join(output_folder, sound), data[:(sample_rate * 5)],
                 sample_rate)

Ejemplo n.º 49

0

Mostrar archivo

Archivo: sample.py Proyecto: smallflyingpig/l3embedding

def sampler(video_1, video_2, rate=32, augment=False, precompute=False, include_metadata=False):
    """Sample one frame from video_file, with 50% chance sample one second from corresponding audio_file,
       50% chance sample one second from another audio_file in the list of audio_files.

    Args:
        video_1: dict for candidate video to sample from
        video_2: dict for candidate video to sample from

    Keyword Args:
        rate: Poisson rate parameter. Used for precomputing samples
        augment: If True, perform data augmention
        precompute: If True, precompute samples during initialization so that
                    memory can be discarded

    Returns:
        A generator that yields dictionary of video sample, audio sample,
        and label (0: not from corresponding files, 1: from corresponding files)

    """
    video_file_1 = video_1['video_filepath']
    video_file_2 = video_2['video_filepath']
    audio_file_1 = video_1['audio_filepath']
    audio_file_2 = video_2['audio_filepath']

    debug_msg = 'Initializing streamer with videos "{}" and "{}"'
    LOGGER.debug(debug_msg.format(video_file_1, video_file_2))

    # Hack: choose a number of samples such that we with high probability, we
    #       won't run out of samples, but is also less than the entire length of
    #       the video so we don't have to resize all of the frames
    num_samples = int(scipy.stats.poisson.ppf(0.999, rate))


    try:
        with LogTimer(LOGGER, 'Reading video'):
            video_data_1 = read_video(video_file_1)
    except Exception as e:
        warn_msg = 'Could not open video file {} - {}: {}; Skipping...'
        warn_msg = warn_msg.format(video_file_1, type(e), e)
        LOGGER.warning(warn_msg)
        warnings.warn(warn_msg)
        raise StopIteration()

    try:
        with LogTimer(LOGGER, 'Reading video'):
            video_data_2 = read_video(video_file_2)
    except Exception as e:
        warn_msg = 'Could not open video file {} - {}: {}; Skipping...'
        warn_msg = warn_msg.format(video_file_2, type(e), e)
        LOGGER.warning(warn_msg)
        warnings.warn(warn_msg)
        raise StopIteration()

    try:
        with LogTimer(LOGGER, 'Reading audio'):
            audio_data_1, sampling_frequency = sf.read(audio_file_1,
                                                       dtype='int16',
                                                       always_2d=True)
            audio_data_1 = audio_data_1.mean(axis=-1).astype('int16')

    except Exception as e:
        warn_msg = 'Could not open audio file {} - {}: {}; Skipping...'
        warn_msg = warn_msg.format(audio_file_1, type(e), e)
        LOGGER.warning(warn_msg)
        warnings.warn(warn_msg)
        raise StopIteration()

    try:
        with LogTimer(LOGGER, 'Reading audio'):
            audio_data_2, sampling_frequency = sf.read(audio_file_2,
                                                       dtype='int16',
                                                       always_2d=True)
            audio_data_2 = audio_data_2.mean(axis=-1).astype('int16')
    except Exception as e:
        warn_msg = 'Could not open audio file {} - {}: {}; Skipping...'
        warn_msg = warn_msg.format(audio_file_2, type(e), e)
        LOGGER.warning(warn_msg)
        warnings.warn(warn_msg)
        raise StopIteration()

    if precompute:
        samples = []
        for _ in range(num_samples):
            sample = generate_sample(
                audio_file_1, audio_data_1, audio_file_2, audio_data_2,
                video_file_1, video_data_1, video_file_2, video_data_2,
                sampling_frequency, augment=augment, include_metadata=include_metadata)

            samples.append(sample)

        # Clear the data from memory
        video_data_1 = None
        video_data_2 = None
        audio_data_1 = None
        audio_data_2 = None
        video_data = None
        audio_data = None
        del video_data_1
        del video_data_2
        del audio_data_1
        del audio_data_2
        del video_data
        del audio_data

        while samples:
            # Yield the sample, and remove from the list to free up some memory
            yield samples.pop()
    else:
        while True:
            yield generate_sample(
                audio_file_1, audio_data_1, audio_file_2, audio_data_2,
                video_file_1, video_data_1, video_file_2, video_data_2,
                sampling_frequency, augment=augment, include_metadata=include_metadata)

    raise StopIteration()

Ejemplo n.º 50

0

Mostrar archivo

            print("Warning: cupy is not installed. 'gpu' argument should be set to -1. Switched to CPU.\n")
            import numpy as xp

    separater = AR_FastMNMF2(
        n_source=args.n_source,
        n_basis=args.n_basis,
        xp=xp,
        init_SCM=args.init_SCM,
        n_tap_AR=args.n_tap_AR,
        n_delay_AR=args.n_delay_AR,
        n_bit=args.n_bit,
        algo=args.algo,
        n_iter_init=args.n_iter_init
    )

    wav, sample_rate = sf.read(args.input_fname)
    wav /= np.abs(wav).max() * 1.2
    M = min(len(wav), args.n_mic)
    spec_FTM = MultiSTFT(wav[:, :M], n_fft=args.n_fft)

    separater.file_id = args.file_id
    separater.load_spectrogram(spec_FTM, sample_rate)
    separater.solve(
        n_iter=args.n_iter,
        save_dir="./",
        save_likelihood=False,
        save_param=False,
        save_wav=True,
        interval_save=5,
    )

Ejemplo n.º 51

0

Mostrar archivo

Archivo: simple_audio.py Proyecto: m1kra/GlosBio

def play_from_file(file):
    data, fs = soundfile.read(file)
    sd.play(data, fs, device=sd.default.device)
    status = sd.wait()

Ejemplo n.º 52

0

Mostrar archivo

def process(args):

    f0_max = 1100.0
    f0_min = 50.0

    frame_shift = args.shift_size / 1000

    hop_length = int(args.sr * frame_shift)

    lab_list = os.listdir(args.labdir)
    phone_set = []
    idscp = {}
    index = 1
    for lab in lab_list:
        lab_id = lab[:-4]
        idscp[lab_id] = index

        segments, phone = load_label(
            os.path.join(args.labdir, lab),
            s_type=args.label_type,
            sr=args.sr,
            frame_shift=frame_shift,
            sil=args.sil,
        )

        for p in phone:
            if p not in phone_set:
                phone_set.append(p)

        wav_path = os.path.join(args.wavdir, lab_id + "." + args.wav_extention)
        if args.wav_extention == "raw":
            signal, osr = sf.read(
                wav_path,
                subtype="PCM_16",
                channels=1,
                samplerate=args.sr,
                endian="LITTLE",
            )
        else:
            signal, osr = librosa.load(wav_path, sr=None)

        if osr != args.sr:
            signal = librosa.resample(signal, osr, args.sr)

        song_align = os.path.join(args.outdir, "alignment")
        song_wav = os.path.join(args.outdir, "wav_info", str(index))
        song_pitch_beat = os.path.join(args.outdir, "pitch_beat_extraction",
                                       str(index))

        if not os.path.exists(song_align):
            os.makedirs(song_align)
        if not os.path.exists(song_wav):
            os.makedirs(song_wav)
        if not os.path.exists(song_pitch_beat):
            os.makedirs(song_pitch_beat)
        print("processing {}".format(song_wav))
        for seg in segments.keys():
            alignment = segments[seg]["alignment"]
            start = segments[seg]["start"]
            name = seg
            seg_signal = signal[int(start *
                                    hop_length):int(start * hop_length +
                                                    len(alignment) *
                                                    hop_length)]
            """extract beats"""
            tempo, beats = librosa.beat.beat_track(y=seg_signal,
                                                   sr=args.sr,
                                                   hop_length=hop_length)
            # times = librosa.frames_to_time(beats, sr=args.sr)
            # frames = librosa.time_to_frames(
            #     times, sr=args.sr, hop_length=hop_length, n_fft=n_fft
            # )
            np.save(
                os.path.join(song_pitch_beat, name) + "_beats",
                np.array(beats))
            """extract pitch"""
            seg_signal = seg_signal.astype("double")
            _f0, t = pw.harvest(
                seg_signal,
                args.sr,
                f0_floor=f0_min,
                f0_ceil=f0_max,
                frame_period=frame_shift * 1000,
            )
            _f0 = pw.stonemask(seg_signal, _f0, t, args.sr)

            np.save(
                os.path.join(song_pitch_beat, name) + "_pitch", np.array(_f0))

            alignment_id = np.zeros((len(alignment)))
            for i in range(len(alignment)):
                alignment_id[i] = phone_set.index(alignment[i])
            np.save(
                os.path.join(song_align,
                             pack_zero(index) + name),
                np.array(alignment_id),
            )

            sf.write(os.path.join(song_wav, name) + ".wav",
                     seg_signal,
                     samplerate=args.sr)
            print("saved {}".format(os.path.join(song_wav, name) + ".wav"))
        index += 1

    with open(os.path.join(args.outdir, "phone_set.txt"), "w") as f:
        for p_id, p in enumerate(phone_set):
            f.write(str(p_id) + " " + p)
            f.write("\n")

Ejemplo n.º 53

0

Mostrar archivo

                pl.show()
                '''

        # Plot Spectrogram for each detection, plot to screen and to file
        DETECTION_THRESHOLD = 2.0
        SPECTROGRAM_NFFT = 1024
        SPECTROGRAM_STEP = 128
        print("Events detected from statistical deviation:")
        for d in detections_final:
            if d[2] > DETECTION_THRESHOLD:
                print('  {}: {:>3.1f} {:>5.1f}'.format(hhmmss(
                    d[0]), d[1], d[2]))  # start_time, duration, significance
                start_i = d[3] - DETECTION_CONTEXT_N
                stop_i = d[4] + 1 + DETECTION_CONTEXT_N
                wave = sf.read(wave_file,
                               start=start_i * BLOCK_DURATION_N,
                               stop=stop_i * BLOCK_DURATION_N)[0]
                wave = fixup_wave(wave, f.samplerate)
                # Apply noise cancellation to signal
                wave = filter_signal(wave, f.samplerate)
                # Generate plot of wave, spectrogram, and power and save as image file
                pl.subplot(3, 1, 1)
                pl.plot(wave)
                pl.title('start={}, dur={:.1f}, sig={:.1f}'.format(
                    hhmmss(d[0]), d[1], d[2]))
                # start_time, duration, significance
                pl.xticks([])
                pl.subplot(3, 1, 2)
                pl.specgram(wave,
                            SPECTROGRAM_NFFT,
                            f.samplerate,

Ejemplo n.º 54

0

Mostrar archivo

    os.makedirs(cropped_threshold_dir)

fc = 3000 / 22050
b, a = signal.butter(10, fc, 'low')

for file_name in os.listdir(files_dir):

    direction = re.findall("\d+", file_name)[0]

    paths = capsule_path_difference(polar, [1.5, 0, direction])
    max_channel, min_channel, max_onset, min_onset = 0, 0, 0, sys.maxsize

    onsets = []

    # Read wav file
    data, fs = sf.read(os.path.join(files_dir, file_name))

    closest = np.argmin(paths)
    # low pass filter at fc
    filtered = signal.filtfilt(b, a, data.T[closest])
    filter_max = max(filtered)
    closest_onset = next(x[0] for x in enumerate(filtered)
                         if abs(x[1]) == filter_max)
    time = [n / fs for n in range(filtered.size)]

    onsets_distance = np.array([
        round(((paths[i] - paths[closest]) / c) * fs) + closest_onset
        for i in range(len(paths))
    ],
                               dtype='int32')

Ejemplo n.º 55

0

Mostrar archivo

Archivo: gen_ground_truth.py Proyecto: GreenWaves-Technologies/gap_sdk

import sys
import numpy as np
import math
import librosa
import soundfile as sf
import json
from librosa.core.spectrum import power_to_db
import scipy

file_path = sys.argv[1]
data, samplerate = sf.read(file_path)
#data = np.clip(data*3, -1, 1)

with open("MfccConfig.json", "r") as f:
    config = json.load(f)

frame_size = config['frame_size']
frame_step = config['frame_step']
n_fft = config['n_fft']
n_mels = config['mfcc_bank_cnt']
fmin = config['fmin']
fmax = config['fmax']
dtype = config.get('dtype', "int")
high_prec = config.get('use_high_prec', False) or dtype == "fix32_scal"
use_power = False
rad4 = round(math.log(n_fft // 2, 4)) == math.log(n_fft // 2, 4)
ndct = config.get('n_dct', False)

from librosa.filters import get_window
from librosa import util
librosa_fft_window = get_window("hann", frame_size, fftbins=True)

Ejemplo n.º 56

0

Mostrar archivo

Archivo: instance.py Proyecto: xutaima/SimulEval

 def load_audio_from_path(self, wav_path):
     assert os.path.isfile(wav_path) and wav_path.endswith('.wav')
     samples, _ = soundfile.read(wav_path, dtype="int16")
     self.samples = samples.tolist()

Ejemplo n.º 57

0

Mostrar archivo

Archivo: create_wav2vec2.py Proyecto: khurshudoff/ios-demo-app

import soundfile as sf
import torch
from transformers import Wav2Vec2ForCTC, Wav2Vec2Tokenizer
from torch.utils.mobile_optimizer import optimize_for_mobile

tokenizer = Wav2Vec2Tokenizer.from_pretrained("facebook/wav2vec2-base-960h")
model = Wav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-base-960h")
model.eval()

audio_input, _ = sf.read("scent_of_a_woman_future.wav")
input_values = tokenizer(audio_input, return_tensors="pt").input_values
logits = model(input_values).logits
predicted_ids = torch.argmax(logits, dim=-1)
transcription = tokenizer.batch_decode(predicted_ids)[0]

model_dynamic_quantized = torch.quantization.quantize_dynamic(
    model, qconfig_spec={torch.nn.Linear}, dtype=torch.qint8)
traced_quantized_model = torch.jit.trace(model_dynamic_quantized,
                                         input_values,
                                         strict=False)
optimized_traced_quantized_model = optimize_for_mobile(traced_quantized_model)
optimized_traced_quantized_model.save("wav2vec2.pt")

Ejemplo n.º 58

0

Mostrar archivo

Archivo: test_modeling_flax_wav2vec2.py Proyecto: Mehrad0711/transformers

 def map_to_array(batch):
     speech, _ = sf.read(batch["file"])
     batch["speech"] = speech
     return batch

Ejemplo n.º 59

0

Mostrar archivo

Archivo: raw_audio_dataset.py Proyecto: rokasgie/fairseq

 def read_wav(self, bytes):
     waveform, sample_rate = sf.read(BytesIO(bytes), dtype="float32")
     return waveform, sample_rate

Ejemplo n.º 60

0

Mostrar archivo

Archivo: Filtro Python.py Proyecto: StdioDesign/Python-Course-Challenges

import playsound
import soundfile as sf
import numpy as np
#import matplotlib.pyplot as plt

x, fs = sf.read('carrie1.wav')
audio = x

#=================================================================================#

fc = 4000
M = 20
wc = (2*np.pi*fc)/fs


#=================================================================================#

w = np.hamming(M)[:M-1]
hd = []
for n in range(M-1):
 hd.insert(n, (wc/np.pi)* np.sinc((wc/np.pi)*(n-(M/2))))

#=================================================================================#

h = hd*w

#=================================================================================#

audio_filtrado = np.convolve(h, audio)