Beispiel #1
0
def OPENFILE():

    global BUFFER
    global CHUNK_TIMEOUT
    count = 0  # paketnum

    try:
        with sf.SoundFile(args.filename) as f:

            filesize = os.path.getsize(args.filename)
            data = f.buffer_read(args.blocksize, dtype='int16')
            print(' [INFO] -> start Playing')
            print(
                ' [OPENFILE] Samplerate: {}  Channels: {}  Blocksize: {}   Filesize: {}'
                .format(f.samplerate, f.channels, args.blocksize, filesize))

            while data:

                start = time.time()  # paket creation needs time
                data = f.buffer_read(args.blocksize, dtype='int16')  # get data
                paket = rtp.createPacket(1, 77777, 1,
                                         data)  # build and send paket

                # write it in ringbuffer                                # BUFFER.append(paket)
                index = count % args.buffersize
                BUFFER[index] = paket
                count += 1
                end = time.time()  # end of creation

                #paket_creation_time = (end-start)+0.00000005            # addtion time-function
                paket_creation_time = (end - start)

                # global periodtime
                CHUNK_TIMEOUT = (float(args.blocksize) /
                                 f.samplerate) - paket_creation_time

                # wait for first paket
                time.sleep(CHUNK_TIMEOUT - paket_creation_time)

                #print (' [OPENFILE] Chunklength: {}'.format(CHUNK_TIMEOUT))

    except:
        print(' [OPENFILE] -> [ERROR] ...')
Beispiel #2
0
def record_and_decode():
    os.remove('demo.wav')
    try:
        with sf.SoundFile('demo.wav',
                          mode='x',
                          samplerate=16000,
                          channels=1,
                          subtype='PCM_16') as file:
            with sd.InputStream(samplerate=16000,
                                channels=1,
                                callback=record_callback):
                print('#' * 80)
                print('press Ctrl+C to stop the recording')
                print('#' * 80)
                while True:
                    file.write(q.get())

    except KeyboardInterrupt:
        test_wav('demo.wav')
Beispiel #3
0
    def _setSndFromFile(self, fileName):
        # load the file
        if not path.isfile(fileName):
            msg = "Sound file %s could not be found." % fileName
            logging.error(msg)
            raise ValueError(msg)
        self.fileName = fileName
        # in case a tone with inf loops had been used before
        self.loops = self.requestedLoops
        try:
            self.sndFile = sndfile.SoundFile(fileName)
            sndArr = self.sndFile.read()
            self.sndFile.close()
            self._setSndFromArray(sndArr)

        except Exception:
            msg = "Sound file %s could not be opened using pysoundcard for sound."
            logging.error(msg % fileName)
            raise ValueError(msg % fileName)
Beispiel #4
0
 def __call__(self, flo):
     with soundfile.SoundFile(flo) as f:
         samples = f.read()
         samples *= ((2**16) // 2)
         samples = samples.astype(np.int16)
         bio = BytesIO()
         proc = subprocess.Popen(args=[
             'ffmpeg', '-y', '-loglevel', 'error', '-f', 's16le', '-ac',
             str(f.channels), '-ar',
             str(f.samplerate), '-i', '-', '-acodec', 'libmp3lame', '-f',
             'mp3', '-'
         ],
                                 stdin=subprocess.PIPE,
                                 stdout=subprocess.PIPE,
                                 stderr=subprocess.PIPE)
         stdout, stderr = proc.communicate(input=samples.tobytes())
         bio.write(stdout)
         bio.seek(0)
         return bio
 def addFrameWithTransitionAndPause(self, image_file, audio_file,
                                    transition_file, pause):
     media_info = MediaInfo.parse(transition_file)
     duration_in_ms = media_info.tracks[0].duration
     audio_file = r"%s" % audio_file
     f = sf.SoundFile(audio_file)
     try:
         audio_clip = AudioSegment.from_wav(audio_file)
     except:
         print("error with frame audio transition pause for %s" %
               audio_file)
         audio_clip = AudioSegment.silent(duration=pause)
     duration = (len(f) / f.samplerate)
     audio_clip_with_pause = audio_clip
     self.imageframes.append(image_file)
     self.audiofiles.append(audio_clip_with_pause)
     self.durations.append(duration + (pause / 1000))
     self.transitions.append((transition_file, len(self.imageframes) - 1,
                              (duration_in_ms / 1000) + (pause / 1000)))
def read_audio(filename, scipy_=True, normalization=True):

    if scipy_:
        audio, sample_rate = wavread(filename)
        audio = audio.astype('float32')
        if audio.ndim >= 2:
            audio = np.mean(audio, 1)
    else:
        with soundfile.SoundFile(filename) as sound_file:
            audio = sound_file.read(dtype='float32')
            sample_rate = sound_file.samplerate

    if audio.ndim >= 2:
        audio = np.mean(audio, 1)

    if normalization:
        audio = normalize(audio, sample_rate)

    return audio, sample_rate
Beispiel #7
0
def extract_feature(file_name, mfcc, chroma, mel):
    with soundfile.SoundFile(file_name) as sound_file:
        X = sound_file.read(dtype="float32")
        sample_rate=sound_file.samplerate
        if len(X.shape) > 1:
            X = X.mean(axis=1)
        if chroma:
            stft=np.abs(librosa.stft(X))
        result=np.array([])
        if mfcc:
            mfccs=np.mean(librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=40).T, axis=0)
            result=np.hstack((result, mfccs))
        if chroma:
            chroma=np.mean(librosa.feature.chroma_stft(S=stft, sr=sample_rate).T,axis=0)
            result=np.hstack((result, chroma))
        if mel:
            mel=np.mean(librosa.feature.melspectrogram(X, sr=sample_rate).T,axis=0)
            result=np.hstack((result, mel))
    return result
Beispiel #8
0
def particion():
	for archivo in os.listdir(UPLOAD_FOLDER):
		name_file=archivo.split('.')
		print(name_file)
		if(name_file[1]=='wav'):
			f=sf.SoundFile(archivo)
			time = 30000
			j=1
			with contextlib.closing(wave.open(UPLOAD_FOLDER+archivo,'r')) as f:
	    			frames = f.getnframes()
	    			rate = f.getframerate()
	    			duration = int((frames / float(rate))*1000)
				for i in range (0,duration,time):
					newAudio = AudioSegment.from_wav(archivo)
					newAudio = newAudio[i:i+30000]
					newAudio.export(name_file[0]+'_'+str(j)+".---wav", format="wav")
					j=j+1
	#return redirect('/velocidad')
	return '''
Beispiel #9
0
def analyzeAudio():
    with open('audio.wav', 'wb') as infile:
        infile.write(request.data)
    tempoJson = getTempo('audio.wav')
    src = sr.AudioFile('audio.wav')
    f = sf.SoundFile('audio.wav')
    print('seconds = {}'.format(len(f) / f.samplerate))
    print('tempo = ', tempoJson['auftakt_result']['overall_tempo'])
    rap.make_music(round(tempoJson['auftakt_result']['overall_tempo']))
    with src as source:
        audio = r.record(source)
        print(type(audio))
        try:
            text = r.recognize_google(audio)
            print("lyrics:{} ".format(text))
        except:
            print("didnt recognize")

    return ('This is the analyze')
def select_speakers(dataset_path,
                    max_speakers=100,
                    min_len=20,
                    max_file_len=20,
                    min_file_len=2):
    """Select speakers of which the audio length longer than the min_len

    Args:
        dataset_path (str): wav path including spk subfolders 

    Returns:
        poss_speakers (dict): {[spk, filelist], ...}
        to_move (list): spk list > min_len
    """
    possible_map = {}
    for spk in os.listdir(dataset_path):
        spk_dur = 0
        spk_included = []
        for fn in os.listdir(os.path.join(dataset_path, spk)):
            if fn.endswith(".wav"):
                full_fn = os.path.join(dataset_path, spk, fn)
                wav = sf.SoundFile(full_fn)
                dur = len(wav)/wav.samplerate
                if dur < min_file_len or dur > max_file_len:
                    print(f"Ignore {full_fn}")
                    continue
                else:
                    spk_dur += dur
                    spk_included.append(full_fn)
        possible_map[spk] = {"dur": spk_dur, "wav_path_included": spk_included}

    poss_speakers = [[k, v["wav_path_included"], v["dur"]] for k, v in possible_map.items() if v["dur"]/float(60) >= min_len]

    poss_speakers = sorted(poss_speakers, key = lambda x: x[2])[::-1]

    to_move = [s[0] for s in poss_speakers[:max_speakers]]

    print('selected speakers', len(to_move))

    poss_speakers = {s[0]: s[1] for s in poss_speakers}

    return poss_speakers, to_move
Beispiel #11
0
def play_wav(wav_file, buffersize=20, blocksize=1024, socketio=None):
    q = queue.Queue(maxsize=buffersize)
    event = threading.Event()

    def callback(outdata, frames, time, status):
        assert frames == blocksize
        if status.output_underflow:
            print('Output underflow: increase blocksize?', file=sys.stderr)
            raise sd.CallbackAbort
        assert not status
        try:
            data = q.get_nowait()
        except queue.Empty:
            print('Buffer is empty: increase buffersize?', file=sys.stderr)
            raise sd.CallbackAbort
        if len(data) < len(outdata):
            outdata[:len(data)] = data
            outdata[len(data):] = b'\x00' * (len(outdata) - len(data))
            raise sd.CallbackStop
        else:
            outdata[:] = data

    with sf.SoundFile(wav_file) as f:
        for _ in range(buffersize):
            data = f.buffer_read(blocksize, dtype='float32')
            if not data:
                break
            q.put_nowait(data)  # Pre-fill queue

        stream = sd.RawOutputStream(samplerate=f.samplerate,
                                    blocksize=blocksize,
                                    channels=f.channels,
                                    dtype='float32',
                                    callback=callback,
                                    finished_callback=event.set)
        with stream:
            timeout = blocksize * buffersize / f.samplerate
            while data:
                data = f.buffer_read(blocksize, dtype='float32')
                q.put(data, timeout=timeout)
            event.wait()  # Wait until playback is finished
        return stream
Beispiel #12
0
    def sampleNote(self, channel, note, velocity, fileprefix='', folder=None, subfolder=None):
        if not folder is None:
            if not os.path.exists(folder):
                os.mkdir(folder)
            if not os.path.exists('%s\\%s'%(folder, subfolder)):
                os.mkdir('%s\\%s'%(folder, subfolder))
            file = '%s\\%s\\%s_%s_%s_%s.wav'%(folder, subfolder, fileprefix, channel, note, velocity)
        else:
            file = '%s_%s_%s_%s.wav'%(fileprefix, channel, note, velocity)

        if os.path.exists(file):
            os.remove(file)

        self.q = queue.Queue()
        self.record = True
        self.slagTime = 35 * 3
        self.thresholdStart = False
        self.startRecord = False
        self.timeStart = time.time()

        def callback(indata, frames, timeCFFI, status):
            peak=np.average(np.abs(indata))*32*64
            if self.thresholdStart:
                if peak>=.1:
                    self.startRecord=True
                if self.startRecord:
                    self.q.put(indata.copy())
            else:
                self.q.put(indata.copy())
            bars="#"*int(128*64*peak/2**16)
            timeDiff = round((time.time() - self.timeStart) * 1000)
            print('  Preset %s - Channel %s - Note %s - Velocity %s - Device %s - Input Channels %s - Sample Rate %s - Bit Depth %s - Sample Time %sms - Used Frames %04d - Peak %05d %s'%(fileprefix, channel+1, note, velocity, sd.default.device[0], self.inputChannels, self.samplerate, self.subtype, timeDiff, frames, peak, bars))
            if peak<.1:
                self.slagTime -= 1
                if self.slagTime == 0:
                    self.record = False

        with sf.SoundFile(file, mode='x', samplerate=sd.default.samplerate, channels=self.channels_in, subtype=self.subtype) as f:
            with sd.InputStream(callback=callback):
                self.md.sendNote(channel, note, velocity)
                while self.record:
                    f.write(self.q.get())
	def load_audio(self, filename):
		#called whenever a potential audio file is set as self.filename - via drag& drop or open_audio
		if filename:
			if filename != self.filename:
				#ask the user if it should really be opened, if another file was already open
				if self.filename:
					qm = QtWidgets.QMessageBox
					ret = qm.question(self,'', "Do you really want to load "+os.path.basename(filename)+"? You will lose unsaved work on "+os.path.basename(self.filename)+"!", qm.Yes | qm.No)
					if ret == qm.No:
						return
				
				# is the (dropped) file an audio file, ie. can it be read by pysoundfile?
				try:
					soundob = sf.SoundFile(filename)
					self.filename = filename
				except:
					print(filename+" could not be read, is it a valid audio file?")
					return
				
				#Cleanup of old data
				self.parent.canvas.init_fft_storages()
				self.delete_traces(not_only_selected=True)
				self.resampling_widget.refill(soundob.channels)
				
				#finally - proceed with spectrum stuff elsewhere
				self.parent.setWindowTitle('pyrespeeder '+os.path.basename(self.filename))

				self.parent.canvas.set_file_or_fft_settings((filename,),
													 fft_size = self.display_widget.fft_size,
													 fft_overlap = self.display_widget.fft_overlap)
				# also force a cmap update here
				self.display_widget.update_cmap()
				
				#read any saved traces or regressions
				data = resampling.read_trace(self.filename)
				for offset, times, freqs in data:
					TraceLine(self.parent.canvas, times, freqs, offset=offset)
				self.parent.canvas.master_speed.update()
				data = resampling.read_regs(self.filename)
				for t0, t1, amplitude, omega, phase, offset in data:
					RegLine(self.parent.canvas, t0, t1, amplitude, omega, phase, offset)
				self.parent.canvas.master_reg_speed.update()
Beispiel #14
0
    def __init__(self, infile):
        """
        Initialize Sound object.

        Parameters
        ----------
        infile : str
            Path of the sound file.

        Raises
        ------
        ValueError
            If sound file can't be found.

        Returns
        -------
        Sound object.

        """
        if os.path.isfile(infile):
            myfile = sf.SoundFile(infile)
            self._file_duration_sample = myfile.seek(0, sf.SEEK_END)
            self._file_sampling_frequency = myfile.samplerate
            self._file_duration_sec = self._file_duration_sample / \
                self._file_sampling_frequency
            self._channels = myfile.channels
            self._channel_selected = []
            self._file_dir = os.path.dirname(infile)
            self._file_name = os.path.basename(os.path.splitext(infile)[0])
            self._file_extension = os.path.splitext(infile)[1]
            self._filter_applied = False
            self._waveform = []
            self._waveform_start_sample = []
            self._waveform_stop_sample = []
            self._waveform_duration_sample = 0
            self._waveform_duration_sec = 0
            self._waveform_sampling_frequency = self._file_sampling_frequency
            self.detrended = []
            myfile.close()
        else:
            raise ValueError("The sound file can't be found. Please verify" +
                             ' sound file name and path')
def gen_json():
    with open("vocab.json", encoding="utf8") as v_file:
        v = v_file.readline()
        vocab = json.loads(v)
    partitions = ["test", "dev", "train"]
    main_path = "./data_thchs30/"
    jsons = []
    for part in partitions:
        path = os.path.abspath(os.path.join(main_path, part))
        os.chdir(path)
        files = glob.glob(os.path.abspath("*.wav"))
        js = []
        for f in files:
            d = {}
            suf = f.split("\\")[-1]
            audio_path = os.path.join(
                os.path.join(os.path.abspath("../../rnn/data"), part), suf)
            audio = sf.SoundFile(audio_path)
            duration = int(10**6 * len(audio) / audio.samplerate)
            trn = os.path.abspath(f + ".trn")
            label = []
            with open(trn, encoding="utf8") as file:
                label_path = file.readline()[0:-1]
                abs_path = os.path.abspath(label_path)
                with open(abs_path, encoding="utf8") as label_file:
                    _ = label_file.readline()
                    label = label_file.readline()[0:-1].split()
            d["audio"] = audio_path
            d["text"] = label
            d["duration"] = duration
            js.append(d)
        if part != "train":
            jsons.append(js)
        else:
            js += jsons[0]
            js += jsons[1]

        os.chdir(os.path.abspath("../../rnn/data/partitions"))
        with open(part + ".json", "w") as outfile:
            for j in js:
                outfile.write(json.dumps(j) + '\n')
        os.chdir(os.path.abspath("../../../"))
def cqt_from_file(filename, step=10, max_freq=None, trim=False):

    sample_rate = 16000

    with soundfile.SoundFile(filename) as sound_file:

        n_cqt = 13

        y, sr = librosa.load(filename, sr=sample_rate)

        if trim:
            audio = trim_silence(y)
        else:
            audio = y

        if audio.size == 0:
            audio = y

        sample_rate = sound_file.samplerate

        if audio.ndim >= 2:
            audio = np.mean(audio, 1)

        if max_freq is None:
            max_freq = sample_rate / 2

        if max_freq > sample_rate / 2:
            raise ValueError("max_freq must not be greater than half of "
                             " sample rate")

        hop_length = int(0.001 * step * sample_rate)

        f_min = max_freq / 2**9
        cqt = librosa.feature.chroma_cqt(audio,
                                         sample_rate,
                                         hop_length=hop_length,
                                         fmin=f_min,
                                         n_chroma=n_cqt,
                                         n_octaves=5)

        # Unable to vstack cqt ValueError: all the input array dimensions except for the concatenation axis must match exactly
        return cqt
Beispiel #17
0
def record_audio(dir_name="", rec_itter=0):

    filename = WAVE_OUTPUT_FILE_HEADER + str(rec_itter) + ".wav"
    filename = os.path.join(dir_name, filename)

    stop_filename = os.path.join(dir_name, STOP_SIGNAL)

    device_info = sd.query_devices(DEVICE, 'input')
    # print(device_info)
    rate = int(device_info['default_samplerate'])
    # print(rate)

    q = queue.Queue()

    def callback(indata, frames, time, status):
        del frames, time
        """This is called (from a separate thread) for each audio block."""
        if status:
            print(status, file=sys.stderr)
        q.put(indata.copy())

    with sf.SoundFile(filename, mode='x', samplerate=rate,
                      channels=CHANNELS) as file:
        # signal.signal(signal.SIGINT, lambda n, f: (file.close(),
        #                                            print("* done recording"),
        #                                            exit(0)))
        # signal.signal(signal.SIGTERM, lambda n, f: (file.close(),
        #                                             print("* done recording"),
        #                                             exit(0)))

        with sd.InputStream(samplerate=rate,
                            device=DEVICE,
                            channels=CHANNELS,
                            callback=callback):
            print("* recording")
            while True:
                file.write(q.get())

                if os.path.isfile(stop_filename):
                    file.close()
                    print("* done recording")
                    exit(0)
Beispiel #18
0
def play_audio(data, fs, volumn=1, speed=1):
    ''' Play audio from numpy array.

  Parameters
  ----------
  data : numpy.ndarray
          signal data
  fs : int
          sample rate
  volumn: float
          between 0 and 1
  speed: float
          > 1 mean faster, < 1 mean slower

  Note
  ----
  Only support play audio on MacOS
  '''
    import soundfile as sf
    import os

    data = numpy.asarray(data, dtype=numpy.int16)
    if data.ndim == 1:
        channels = 1
    else:
        channels = data.shape[1]
    with TemporaryDirectory() as temppath:
        path = os.path.join(temppath, 'tmp_play.wav')
        with sf.SoundFile(path,
                          'w',
                          fs,
                          channels,
                          subtype=None,
                          endian=None,
                          format=None,
                          closefd=None) as f:
            f.write(data)
        os.system('afplay -v %f -q 1 -r %f %s &' % (volumn, speed, path))
        raw_input('<enter> to stop audio.')
        os.system(
            "kill -9 `ps aux | grep -v 'grep' | grep afplay | awk '{print $2}'`"
        )
def extract_feature(file_name, **kwargs):
    """
    Extract feature from audio file `file_name`
        Features supported:
            - MFCC (mfcc)
            - Chroma (chroma)
            - MEL Spectrogram Frequency (mel)
            - Contrast (contrast)
            - Tonnetz (tonnetz)
        e.g:
        `features = extract_feature(path, mel=True, mfcc=True)`
    """
    mfcc = kwargs.get("mfcc")
    chroma = kwargs.get("chroma")
    mel = kwargs.get("mel")
    contrast = kwargs.get("contrast")
    tonnetz = kwargs.get("tonnetz")
    with soundfile.SoundFile(file_name) as sound_file:
        X = sound_file.read(dtype="float32")
        
        
        sample_rate = sound_file.samplerate
        if chroma or contrast:
            stft = np.abs(librosa.stft(X))

        result = np.array([])
        if mfcc:
            mfccs = np.mean(librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=40).T, axis=0)
            result = np.hstack((result, mfccs))
        if chroma:
            chroma = np.mean(librosa.feature.chroma_stft(S=stft, sr=sample_rate).T,axis=0)
            result = np.hstack((result, chroma))
        if mel:
            mel = np.mean(librosa.feature.melspectrogram(X, sr=sample_rate).T,axis=0)
            result = np.hstack((result, mel))
        if contrast:
            contrast = np.mean(librosa.feature.spectral_contrast(S=stft, sr=sample_rate).T,axis=0)
            result = np.hstack((result, contrast))
        if tonnetz:
            tonnetz = np.mean(librosa.feature.tonnetz(y=librosa.effects.harmonic(X), sr=sample_rate).T,axis=0)
            result = np.hstack((result, tonnetz))
    return result
Beispiel #20
0
    def __init__(self,
                 filename,
                 target_sr=None,
                 int_values=False,
                 offset=0,
                 duration=0,
                 trim=False,
                 trim_db=60):
        """Create audio segment from samples.

        Samples are converted to float32 internally, with int scaled to [-1, 1].
        Load a file supported by librosa and return as an AudioSegment.
        :param filename: path of file to load
        :param target_sr: the desired sample rate
        :param int_values: if true, load samples as 32-bit integers
        :param offset: offset in seconds when loading audio
        :param duration: duration in seconds when loading audio
        :return: numpy array of samples
        """
        with sf.SoundFile(filename, 'r') as f:
            dtype = 'int32' if int_values else 'float32'
            sample_rate = f.samplerate
            if offset > 0:
                f.seek(int(offset * sample_rate))
            if duration > 0:
                samples = f.read(int(duration * sample_rate), dtype=dtype)
            else:
                samples = f.read(dtype=dtype)
        samples = samples.transpose()

        samples = self._convert_samples_to_float32(samples)
        if target_sr is not None and target_sr != sample_rate:
            samples = librosa.resample(samples,
                                       orig_sr=sample_rate,
                                       target_sr=target_sr)
            sample_rate = target_sr
        if trim:
            samples, _ = librosa.effects.trim(samples, top_db=trim_db)
        self._samples = samples
        self._sample_rate = sample_rate
        if self._samples.ndim >= 2:
            self._samples = np.mean(self._samples, 1)
def audio_generator_from_file(input_filename, target_sr, int_values,
                              chunk_duration):

    sf = soundfile.SoundFile(input_filename, 'rb')
    chunk_size = int(chunk_duration * sf.samplerate)
    start = True
    end = False

    while not end:

        audio_signal, end = get_audio_chunk_from_soundfile(
            sf, chunk_size, int_values)

        audio_segment = AudioSegment(audio_signal, sf.samplerate, target_sr)

        yield audio_segment.samples, target_sr, start, end

        start = False

    sf.close()
Beispiel #22
0
def change_audio_rate(annotation_path):
    for annotation_text in os.listdir(annotation_path):
        print('正在将%s音频的采样率改为16000Hz,将消耗大量的时间,请等待 ...' % annotation_text)
        annotation_text = os.path.join(annotation_path, annotation_text)
        with open(annotation_text, 'r', encoding='utf-8') as f:
            lines = f.readlines()
        for line in tqdm(lines):
            audio_path = line.split('\t')[0]
            sndfile = soundfile.SoundFile(audio_path)
            samplerate = sndfile.samplerate
            if samplerate != 16000:
                f = wave.open(audio_path, "rb")
                str_data = f.readframes(f.getnframes())
                f.close()
                file = wave.open(audio_path, 'wb')
                file.setnchannels(1)
                file.setsampwidth(4)
                file.setframerate(16000)
                file.writeframes(str_data)
                file.close()
Beispiel #23
0
def wav_to_chroma(path_to_wav_file, offset=0.0, duration=None, dtype=np.float32):

    print("using soundfile")

    with sf.SoundFile(path_to_wav_file) as sf_desc:
        sr_native = sf_desc.samplerate
        if offset:
            # Seek to the start of the target read
            sf_desc.seek(int(offset * sr_native))
        if duration is not None:
            frame_duration = int(duration * sr_native)
        else:
            frame_duration = -1
        # Load the target number of frames, and transpose to match librosa form
        wav = sf_desc.read(frames=frame_duration, dtype=dtype, always_2d=False).T

    # create chroma (STFT --> spectrogram --> chromagram)
    stft = create_stft(wav)
    chroma = create_chroma(stft)
    return chroma
Beispiel #24
0
    def get_chunks(self, labels):

        chunks = {}
        chunk_size = self.configs["data"]["segment"]
        frame_shift = self.configs["data"]["segment"]

        for l in labels:
            sess = Path(l).stem.split("-")[-1]
            chunks[sess] = []
            # generate chunks for this file
            c_length = len(sf.SoundFile(
                l))  # get the length of the session files in samples
            for st, ed in _gen_frame_indices(c_length,
                                             chunk_size,
                                             frame_shift,
                                             use_last_samples=False):
                if st < 7000:  # exclude first minute which contains enrollement for each speaker
                    continue
                chunks[sess].append([st, ed])
        return chunks
Beispiel #25
0
def read_audio(
        path_or_fd: Union[Pathlike, FileObject],
        offset: Seconds = 0.0,
        duration: Optional[Seconds] = None
) -> Tuple[np.ndarray, int]:
    try:
        import soundfile as sf
        with sf.SoundFile(path_or_fd) as sf_desc:
            sampling_rate = sf_desc.samplerate
            if offset > 0:
                # Seek to the start of the target read
                sf_desc.seek(compute_num_samples(offset, sampling_rate))
            if duration is not None:
                frame_duration = compute_num_samples(duration, sampling_rate)
            else:
                frame_duration = -1
            # Load the target number of frames, and transpose to match librosa form
            return sf_desc.read(frames=frame_duration, dtype=np.float32, always_2d=False).T, sampling_rate
    except:
        return _audioread_load(path_or_fd, offset=offset, duration=duration)
Beispiel #26
0
    def segment_from_file(cls,
                          filename,
                          target_sr=None,
                          n_segments=0,
                          trim=False):
        """Grabs n_segments number of samples from filename randomly from the
        file as opposed to at a specified offset.
        """
        with sf.SoundFile(filename, 'r') as f:
            sample_rate = f.samplerate
            if n_segments > 0 and len(f) > n_segments:
                max_audio_start = len(f) - n_segments
                audio_start = random.randint(0, max_audio_start)
                f.seek(audio_start)
                samples = f.read(n_segments, dtype='float32')
            else:
                samples = f.read(dtype='float32')

        samples = samples.transpose()
        return cls(samples, sample_rate, target_sr=target_sr, trim=trim)
Beispiel #27
0
def recorder( file_name):
    if args.samplerate is None:
        device_info = sd.query_devices(args.device, 'input')
        # soundfile expects an int, sounddevice provides a float:
        args.samplerate = int(device_info['default_samplerate'])

    #open(file_name, 'w')
    # Make sure the file is opened before recording anything:
    try:
        os.remove(file_name)
    except:
        pass
    with sf.SoundFile(file_name, mode='x', samplerate=args.samplerate,
                      channels=args.channels, subtype=args.subtype) as file:
        with sd.InputStream(samplerate=args.samplerate, device=args.device,
                            channels=args.channels, callback=callback):
            while True:
                file.write(q.get())
                if( keyboard.is_pressed("left")):
                    break
Beispiel #28
0
def sound_buffer_save():
    global buffer_idx
    global data_idx
    global buffer
    a = np.array([])
    idx = (buffer_idx + 1) % BUFFER_LENGTH
    for i in range(BUFFER_LENGTH - 1):
        a = np.append(a, buffer[idx])
        idx = (idx + 1) % BUFFER_LENGTH
    #a = np.reshape(a, (int(a.shape[0]/cp["audio_channels"]), cp["audio_channels"]))
    filename = datetime.now().strftime("%Y-%m-%d_%H_%M_%S") + ".wav"
    sfile = sf.SoundFile(filename,
                         mode='x',
                         samplerate=8000,
                         channels=1,
                         subtype="PCM_24")

    sfile.write(a)
    time.sleep(1)
    sfile.close()
Beispiel #29
0
def trans(path,sound):
    os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = '/Users/hbk/data/speech_key.json'
    client = speech.SpeechClient()
    file_name = path+sound
    
    with io.open(file_name, 'rb') as audio_file:
        content = audio_file.read()
        audio = types.RecognitionAudio(content=content)
    
    config = types.RecognitionConfig(
        encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16,
        sample_rate_hertz=sf.SoundFile(file_name).samplerate,
        language_code='ko-KR')
    
    response = client.recognize(config, audio)
    
    for result in response.results:
        #print('Transcript: {}'.format(result.alternatives[0].transcript))
        res = result.alternatives[0].transcript
    return res
def divisor_audio(file, paths):
	for file in os.listdir(UPLOAD_FOLDER):
			name_file=file.split('.')

			if(name_file[1]=='wav'): #Buscamos dentro de la carpeta donde está el archivo python, un archivo con extension ".wav"

				f=sf.SoundFile(file)
				time = 30000 
				j=1
				with contextlib.closing(wave.open(UPLOAD_FOLDER+ "/"+ file,'r')) as f:
		    			frames = f.getnframes()
		    			rate = f.getframerate()

		    			duration = int((frames / float(rate))*1000) #Duracion total del audio
					for i in range (0,duration,time): #rango de duracion del audio, tiempo que deseamos partir el audio (30seg) 
						newAudio = AudioSegment.from_wav(file)
						newAudio = newAudio[i:i+30000] #partimos el audio cada 30 segundos
						destino = paths+"/"+name_file[0]+'_'+str(j)
						newAudio.export(destino, format="wav")
						j=j+1 #repetimos la accion hasta terminar la duracion