Ejemplo n.º 1
0
def signalMixer(data1: np.ndarray,
                data2: np.ndarray,
                saveFilePath="./mixed.wav"):
    segment1 = pydub.AudioSegment(data1,
                                  sample_width=Cai.sampleWidthInBytes,
                                  frame_rate=Cai.frameRate,
                                  channels=Cai.numberOfChannels)
    segment2 = pydub.AudioSegment(data2,
                                  sample_width=Cai.sampleWidthInBytes,
                                  frame_rate=Cai.frameRate,
                                  channels=Cai.numberOfChannels)

    combined_sounds = segment1 + segment2
    combined_sounds.export(saveFilePath, format="wav")
    Logger.info("saved mixed audio file at {}".format(saveFilePath))
Ejemplo n.º 2
0
def record(*,
           target_segment_duration: int = 5,
           output_queue: multiprocessing.Queue):
    """Record from the default microphone and write segments.

    Write segments as .ts files (MPEG-TS) along with a master.m3u8 playlist.

    """
    rate = 44100
    chunk_size = rate // 10

    stream = make_stream(chunk_size, rate, channels=1)

    while True:
        frames = []
        n_frames = round(target_segment_duration / (chunk_size / rate))
        for _ in range(n_frames):
            data = stream.read(chunk_size, exception_on_overflow=True)
            frames.append(data)

        segment = pydub.AudioSegment(data=b"".join(frames),
                                     sample_width=2,
                                     frame_rate=44100,
                                     channels=1)

        output_queue.put(segment)
Ejemplo n.º 3
0
def _cached_get_segment_audio_data(audio_file_name, database_id, fs, start,
                                   end):
    wav_file_path = data_path('audio/wav/{}'.format(database_id),
                              '{}.wav'.format(audio_file_name))
    chunk = wavfile.read_segment(wav_file_path,
                                 start,
                                 end,
                                 normalised=False,
                                 mono=True)

    audio_segment = pydub.AudioSegment(chunk.tobytes(),
                                       frame_rate=fs,
                                       sample_width=chunk.dtype.itemsize,
                                       channels=1)

    audio_segment = _match_target_amplitude(audio_segment)

    out = io.BytesIO()
    audio_segment.export(out, format=settings.AUDIO_COMPRESSED_FORMAT)
    binary_content = out.getvalue()
    out.close()

    response = HttpResponse()
    response.write(binary_content)
    response['Content-Type'] = 'audio/' + settings.AUDIO_COMPRESSED_FORMAT
    response['Content-Length'] = len(binary_content)
    return response
Ejemplo n.º 4
0
    def make_spectrogram(self, export=False, filename="{}/spectrogram.png".format(FIGURE_DIR)):

        x = self.tape

        audio = pydub.AudioSegment(x.tobytes(), frame_rate=FRAME_RATE, channels=1, sample_width=x.dtype.itemsize)
        waveform = np.array(audio.get_array_of_samples(), dtype=np.float32)

        signals = tf.reshape(waveform, [1, -1])
        stfts = tf.contrib.signal.stft(signals, frame_length=FFT_FRAME_LENGTH, frame_step=FFT_FRAME_STEP,
                                       fft_length=FFT_LENGTH)
        magnitude_spectrograms = tf.abs(stfts)

        num_spectrogram_bins = magnitude_spectrograms.shape[-1].value

        linear_to_mel_weight_matrix = tf.contrib.signal.linear_to_mel_weight_matrix(NUM_MEL_BINS, num_spectrogram_bins,
                                                                                    FRAME_RATE, LOWER_EDGE_HERTZ,
                                                                                    UPPER_EDGE_HERTZ)

        mel_spectrograms = tf.tensordot(magnitude_spectrograms, linear_to_mel_weight_matrix, 1)
        log_mel_spectrograms = tf.log(mel_spectrograms + tf.keras.backend.epsilon())

        if export:
            sns_plot = sns.heatmap(np.swapaxes(log_mel_spectrograms.numpy()[0], 0, 1))
            sns_plot.get_figure().savefig(filename)
            plt.close('all')

        return log_mel_spectrograms
Ejemplo n.º 5
0
    def start(self):
        segment = self.segment or AUDIO_SEGMENT_LENGTH
        self.num_frames = int(RATE / FRAMES_PER_BUFFER * segment)
        if self.seconds:
            signal.setitimer(signal.ITIMER_REAL, self.seconds)
        if self.verbose:
            self._timer = time.time()
        if self.collect:
            print('Collecting RMS values...')
        if self.action:
            # Interpret threshold
            self.get_threshold()

        try:
            self.is_running = True
            record = self.record()
            while not self._graceful:
                record.send(True)  # Record stream `AUDIO_SEGMENT_LENGTH' long
                data = self.output.getvalue()
                segment = pydub.AudioSegment(data)
                rms = segment.rms
                if self.collect:
                    self.collect_rms(rms)
                self.meter(rms)
                if self.action:
                    if self.is_triggered(rms):
                        self.execute(rms)
                self.monitor(rms)
            self.is_running = False
            self.stop()

        except self.__class__.StopException:
            self.is_running = False
            self.stop()
Ejemplo n.º 6
0
def write(path, sr, x, codec, normalized=False):
    """numpy array to MP3"""
    audio_segment = pydub.AudioSegment(x.tobytes(),
                                       frame_rate=sr,
                                       sample_width=x.dtype.itemsize,
                                       channels=1)

    audio_segment.export(path, format=codec)
Ejemplo n.º 7
0
 def toAudio(self, rate, signal, channels):
     channel1 = signal[:, 0]
     audio_segment = pydub.AudioSegment(
         channel1.tobytes(),
         frame_rate=rate,
         sample_width=channel1.dtype.itemsize,
         channels=channels)
     return audio_segment
Ejemplo n.º 8
0
def write(f, sr, x, normalized=False):
    """numpy array to MP3"""
    channels = 2 if (x.ndim == 2 and x.shape[1] == 2) else 1
    if normalized:  # normalized array - each item should be a float in [-1, 1)
        y = np.int16(x * 2 ** 15)
    else:
        y = np.int16(x)
    song = pydub.AudioSegment(y.tobytes(), frame_rate=sr, sample_width=2, channels=channels)
    song.export(f, format="mp3", bitrate="320k")
Ejemplo n.º 9
0
def create_audio_segments_udf(
    audio_bytes_series: pd.Series,
    audio_type_series: pd.Series,
    audio_name_series: pd.Series,
    start_ms_array_series: pd.Series,
    end_ms_array_series: pd.Series,
    output_audio_codec_series: pd.Series,
) -> pd.DataFrame:
    output_array = []
    assert (
        len(audio_bytes_series) == len(audio_type_series)
        and len(audio_type_series) == len(audio_name_series)
        and len(audio_name_series) == len(start_ms_array_series)
        and len(start_ms_array_series) == len(end_ms_array_series)
        and len(end_ms_array_series) == len(output_audio_codec_series)
    )
    for (
        audio_bytes,
        audio_type,
        audio_name,
        start_ms_array,
        end_ms_array,
        output_audio_codec,
    ) in zip(
        audio_bytes_series,
        audio_type_series,
        audio_name_series,
        start_ms_array_series,
        end_ms_array_series,
        output_audio_codec_series,
    ):
        assert audio_type == "mp3"
        decoded_bytes = DecodeToRawPipe(audio_bytes, audio_type)
        audio_segment = pydub.AudioSegment(
            decoded_bytes, frame_rate=16_000, sample_width=2, channels=1
        )
        segmented_audio = {"audio_name": [], "audio": []}
        for i, (start_ms, end_ms) in enumerate(zip(start_ms_array, end_ms_array)):
            chunk = audio_segment[start_ms:end_ms]
            assert (
                abs(len(chunk.raw_data) / 16_000 / 2) * 1000 - (end_ms - start_ms)
            ) <= 1.0, (
                f"{(len(chunk.raw_data) / 16_000 / 2) * 1000} vs. {end_ms - start_ms}"
            )
            segment_flac_bytes = EncodeFromRawPipe(chunk.raw_data, output_audio_codec)
            segmented_audio["audio"].append(segment_flac_bytes)
        output_array.append(segmented_audio)
    audio_segment_names_series = create_audio_segment_names_udf.func(
        audio_name_series, end_ms_array_series.transform(len), output_audio_codec_series
    )
    assert len(output_array) == len(audio_segment_names_series)
    for i, audio_segment_names in enumerate(audio_segment_names_series):
        output_array[i]["audio_name"] = audio_segment_names
        assert len(output_array[i]["audio_name"]) == len(output_array[i]["audio"])
    return pd.DataFrame(output_array)
Ejemplo n.º 10
0
 def write(self, f, sr, x, normalized=False):
     channels = 2 if (x.ndim == 2 and x.shape[1] == 2) else 1
     if normalized:
         y = np.int16(x * 2**15)
     else:
         y = np.int16(x)
     song = pydub.AudioSegment(y.tobytes(),
                               frame_rate=sr,
                               sample_width=2,
                               channels=channels)
     song.export(f, format="mp3", bitrate="320k")
Ejemplo n.º 11
0
def save_output(output_filepath, stereo_array):
  '''Writes a stereo array to the output filepath --
  accepts .wav, .mp3, .ogg, or anything else supported by ffmpeg
  '''
  pydub.AudioSegment.from_mono_audiosegments(*[
    pydub.AudioSegment(
      (channel * 32768).astype(np.int16).tobytes(),
      frame_rate=44100,
      sample_width=np.dtype(np.int16).itemsize,
      channels=1,
    )
    for channel in stereo_array.reshape((-1, 2)).T
  ]).export(output_filepath, format=output_filepath.split('.')[-1])
Ejemplo n.º 12
0
    def play_byte_stream(self, data):
        if not self._audio_device:
            return 0

        self.stop_playback()

        sample_width = 2  # 16 bit pcm
        frame_rate = 16000  # sample rate
        channels = 2  # stereo signal

        audio = pydub.AudioSegment(data=data, sample_width=sample_width,
                                   frame_rate=frame_rate, channels=channels)

        play(audio)
Ejemplo n.º 13
0
def _write_wave_file(np_audio, path):
    """Creates a random audio file."""
    num_channels = np_audio.shape[1] if len(np_audio.shape) == 2 else 1
    audio = pydub.AudioSegment(
        b'',
        sample_width=2,
        channels=num_channels,
        frame_rate=1,
    )
    # See documentation for _spawn usage:
    # https://github.com/jiaaro/pydub/blob/master/API.markdown#audiosegmentget_array_of_samples
    audio = audio._spawn(
        array.array(audio.array_type, np_audio.reshape((-1, ))))
    audio.export(path, format='wav')
def get_audiosegment(audio_file, desired_framerate, format="mp3"):
    audio = pydub.AudioSegment.from_file(audio_file, format=format)

    logger.debug(
        f"Audio duration before framerate change: {audio.duration_seconds}")
    data = convert_samplerate(audio, desired_framerate)
    del audio
    audio = pydub.AudioSegment(data,
                               sample_width=2,
                               channels=1,
                               frame_rate=desired_framerate)
    audio_seconds = audio.duration_seconds
    logger.debug(f"Audio duration after change: {audio_seconds}")

    return audio, audio_seconds
Ejemplo n.º 15
0
    def ApplyImpulseResponse(cls, signal, impulse_response):
        """Applies an impulse response to a signal.

    Args:
      signal: AudioSegment instance.
      impulse_response: list or numpy vector of float values.

    Returns:
      AudioSegment instance.
    """
        # Get samples.
        assert signal.channels == 1, (
            'multiple-channel recordings not supported')
        samples = signal.get_array_of_samples()

        # Convolve.
        logging.info(
            'applying %d order impulse response to a signal lasting %d ms',
            len(impulse_response), len(signal))
        convolved_samples = scipy.signal.fftconvolve(in1=samples,
                                                     in2=impulse_response,
                                                     mode='full').astype(
                                                         np.int16)
        logging.info('convolution computed')

        # Cast.
        convolved_samples = array.array(signal.array_type, convolved_samples)

        # Verify.
        logging.debug('signal length: %d samples', len(samples))
        logging.debug('convolved signal length: %d samples',
                      len(convolved_samples))
        assert len(convolved_samples) > len(samples)

        # Generate convolved signal AudioSegment instance.
        convolved_signal = pydub.AudioSegment(data=convolved_samples,
                                              metadata={
                                                  'sample_width':
                                                  signal.sample_width,
                                                  'frame_rate':
                                                  signal.frame_rate,
                                                  'frame_width':
                                                  signal.frame_width,
                                                  'channels': signal.channels,
                                              })
        assert len(convolved_signal) > len(signal)

        return convolved_signal
Ejemplo n.º 16
0
    def stream_utterance(self, audio_stream):
        silence_buffer = pydub.AudioSegment.empty()
        voice_buffer = pydub.AudioSegment.empty()
        silence_threshold = False
        for avf in audio_stream:
            audio_bytes = avf.to_ndarray().tobytes()
            c = (
                pydub.AudioSegment(
                    data=audio_bytes,
                    frame_rate=avf.sample_rate,
                    channels=len(avf.layout.channels),
                    sample_width=avf.format.bytes,
                )
                .set_channels(1)
                .set_sample_width(2)
                .set_frame_rate(16000)
            )
            voice_frame = is_frame_voice(self.vad, c, self.chunk_dur)
            # logger.info(f"is audio stream voice? {voice_frame}")
            if voice_frame:
                silence_threshold = False
                voice_buffer += c
                silence_buffer = pydub.AudioSegment.empty()
            else:
                silence_buffer += c
            voc_dur = voice_buffer.duration_seconds * 1000
            sil_dur = silence_buffer.duration_seconds * 1000

            if voc_dur >= self.max_utt:
                # logger.info(
                #     f"detected voice overflow: voice duration {voice_buffer.duration_seconds}"
                # )
                yield voice_buffer
                voice_buffer = pydub.AudioSegment.empty()

            if sil_dur >= self.max_sil:
                if voc_dur >= self.min_utt:
                    # logger.info(
                    #     f"detected silence: voice duration {voice_buffer.duration_seconds}"
                    # )
                    yield voice_buffer
                voice_buffer = pydub.AudioSegment.empty()
                # ignore/clear voice if silence reached threshold or indent the statement
                if not silence_threshold:
                    silence_threshold = True

        if voice_buffer:
            yield voice_buffer
Ejemplo n.º 17
0
    def Copy(cls, signal):
        """Makes a copy os a signal.

    Args:
      signal: AudioSegment instance.

    Returns:
      An AudioSegment instance.
    """
        return pydub.AudioSegment(data=signal.get_array_of_samples(),
                                  metadata={
                                      'sample_width': signal.sample_width,
                                      'frame_rate': signal.frame_rate,
                                      'frame_width': signal.frame_width,
                                      'channels': signal.channels,
                                  })
def write(f, x, sr=44100, normalized=True):

    # Check the amount of channels that we need to convert the file to
    channels = 2 if (x.ndim == 2 and x.shape[1] == 2) else 1

    # If the array is normalized, scale the array by 2**15, else do nothing to the array.
    if normalized:  # normalized array - each item should be a float in [-1, 1)
        y = np.int16(x * 2**15)
    else:
        y = np.int16(x)

    # Convert the array to mp3 with pydub.
    song = pydub.AudioSegment(y.tobytes(),
                              frame_rate=sr,
                              sample_width=2,
                              channels=channels)
    song.export(f, format="mp3", bitrate="64k")
def speed_pydub(samples, min_speed=0.9, max_speed=1.1):
    """
    pydub变速
    :param samples: 音频数据,一维
    :param max_speed: 不要低于0.9,太低效果不好
    :param min_speed: 不要高于1.1,太高效果不好
    :return:
    """
    samples = samples.copy()  # frombuffer()导致数据不可更改因此使用拷贝
    data_type = samples[0].dtype
    speed = random.uniform(min_speed, max_speed)

    segment = pydub.AudioSegment(samples)
    samples = pydub.audio_segment.effects.speedup(segment, playback_speed=1.2)

    samples = samples.astype(data_type)
    return samples
Ejemplo n.º 20
0
def soundcheck_kit(kit_name=None, kit_size=None):
    for i in range(kit_size):
        print('record samples for drum: {}, press "r" to start recording, "s" to stop and save samples'.format(i))
        audio, midi_notes = soundcheck_listener()
        # get most frequently hit note
        note = int(midi_notes['key'].value_counts().idxmax())
        label = f_to_l_map[note]
        instrument_name = names_l_map[label]
        sc_audio_filename = './soundcheck/' + kitname + '_' + instrument_name + '.mp3'
        audio_segment = pydub.AudioSegment(
            audio.tobytes(),
            frame_rate=RATE,
            sample_width=2,
            channels=1
        )
        audio_segment.export(sc_audio_filename, format='mp3')
        print('done')
Ejemplo n.º 21
0
def import_pcm(song, cur, audio_file, wav_file_path=None, compressed_url=None):
    if wav_file_path is None:
        wav_file_path = wav_path(audio_file)
    if compressed_url is None:
        compressed_url = audio_path(audio_file,
                                    settings.AUDIO_COMPRESSED_FORMAT)

    if not os.path.isfile(wav_file_path):
        # print('Importing {}'.format(song_name))
        song_id = song['songid']
        cur.execute('select wav from wavs where songid={};'.format(song_id))

        data = cur.fetchone()
        raw_pcm = str_to_bytes(data[0])

        nchannels = song['stereo']
        bitrate = int(song['ssizeinbits'])
        fs = int(song['samplerate'])

        byte_per_frame = int(bitrate / 8)
        nframes_all_channel = int(len(raw_pcm) / byte_per_frame)
        nframes_per_channel = int(nframes_all_channel / nchannels)
        length = nframes_per_channel
        ensure_parent_folder_exists(wav_file_path)

        if bitrate == 24:
            array1 = np.frombuffer(raw_pcm, dtype=np.ubyte)
            array2 = array1.reshape((nframes_per_channel, nchannels,
                                     byte_per_frame)).astype(np.uint8)
            wf.write_24b(wav_file_path, fs, array2)
        else:
            data = array.array('i', raw_pcm)
            sound = pydub.AudioSegment(data=data,
                                       sample_width=byte_per_frame,
                                       frame_rate=fs,
                                       channels=nchannels)
            sound.export(wav_file_path, 'wav')
    else:
        fs, length = get_wav_info(wav_file_path)

    if not os.path.isfile(compressed_url):
        ensure_parent_folder_exists(compressed_url)
        sound = pydub.AudioSegment.from_wav(wav_file_path)
        sound.export(compressed_url, format=settings.AUDIO_COMPRESSED_FORMAT)

    return fs, length
Ejemplo n.º 22
0
 def callback(self, in_data, frame_count, time_info, status):
     segment = pydub.AudioSegment(in_data,
                                  sample_width=self.WIDTH,
                                  frame_rate=self.RATE,
                                  channels=self.CHANNELS)
     # 是否到达阈值
     reached = segment.rms > self.threshold
     if reached and not self.recording:
         self.recording = True  #开始记录
         self.wav_buff = []  # 清空原始波数据
     if not reached:
         #RMS声音回落至阈值下面 调用触发器 传递记录的录音数据
         if self.recording:
             self.trigger(self.wav_buff)
         self.recording = False
     if self.recording:
         self.wav_buff.append(in_data)
     return (b'', pyaudio.paContinue)
Ejemplo n.º 23
0
	def start(self):
		segment = self.segment_length or AUDIO_SEGMENT_LENGTH
		self.num_frames = int(RATE / FRAMES_PER_BUFFER * segment)
		try:
			self.is_running = True
			record = self.record()
			while not self._graceful:
				next(record)  # Record stream `AUDIO_SEGMENT_LENGTH' long in the generator method 'record'
				data = self.output.getvalue()
				segment = pydub.AudioSegment(data)
				rms = segment.rms
				dbfs = segment.dBFS
				self.meter(rms, dbfs)
			self.is_running = False
			self.stop()

		except self.__class__.StopException:
			self.is_running = False
			self.stop()
Ejemplo n.º 24
0
def tts_sdk(text, **kwargs):
    """长文本的语音合成,包含简单分句模块。"""
    text_split_lst = split_text(text, kwargs.get('maxlen', 30))
    wav_lst = []
    for text_split in text_split_lst:
        logger.info(f'Synthesizing: {text_split}')
        wav = tts_sdk_base(text_split, **kwargs)
        wav_lst.append(wav)

    sil = pydub.AudioSegment.silent(300,
                                    frame_rate=kwargs.get(
                                        'sampling_rate', 22050))
    wav_out = sil
    for wav in wav_lst:
        wav = pydub.AudioSegment(wav)
        wav_out = wav_out + wav + sil
    out = io.BytesIO()
    wav_out.export(out, format='wav')
    return out.getvalue()
Ejemplo n.º 25
0
def create_media():
    data = request.data
    filename = datetime.datetime.now().strftime("%Y-%m-%d-%H-%M-%S.mp3")
    user = github.get("user")
    userdir = os.path.join(app.config['UPLOAD_FOLDER'], user.data['login'])

    #wavdata, samplerate = sf.read(io.BytesIO(data))
    if not os.path.exists(userdir):
        os.makedirs(userdir)
    #sf.write(os.path.join(userdir, filename), wavdata, samplerate)
    audio = pydub.AudioSegment(io.BytesIO(data))
    audio.export(os.path.join(userdir, filename))

    return jsonify({
        "response":
        "success",
        "uri":
        "{}/media/{}/{}".format(request.base_url, user.data['login'], filename)
    })
Ejemplo n.º 26
0
    def start(self, final_callback=None):
        self.final_callback = final_callback

        threshold_response_msg = ThresholdResponseMessage(True, self.threshold, self.num)
        # TODO: update topic - get rid of audioinput
        self.publish("audioinput/threshold/response", threshold_response_msg.to_json())

        segment = self.segment or self.config.AUDIO_SEGMENT_LENGTH
        self.num_frames = int(
            self.config.RATE / self.config.FRAMES_PER_BUFFER * segment)
        if self.seconds:
            signal.setitimer(signal.ITIMER_REAL, self.seconds)
        if self.verbose:
            self._timer = time.time()
        if self.collect:
            print('Collecting RMS values...')
        if self.action:
            # Interpret threshold
            self.get_threshold()

        try:
            self.is_running = True
            record = self.record()
            while not self._graceful:
                record.send(True)  # Record stream `AUDIO_SEGMENT_LENGTH' long

                data = self.output.getvalue()
                segment = pydub.AudioSegment(data)
                rms = segment.rms
                if self.collect:
                    self.collect_rms(rms)
                self.meter(rms)
                if self.action:
                    if self.is_triggered(rms):
                        self.execute(rms)
                self.monitor(rms)
            self.is_running = False
            self.stop()

        except self.__class__.StopException:
            self.is_running = False
            self.stop()
Ejemplo n.º 27
0
    def process_audio(frame: av.AudioFrame) -> av.AudioFrame:
        raw_samples = frame.to_ndarray()
        sound = pydub.AudioSegment(
            data=raw_samples.tobytes(),
            sample_width=frame.format.bytes,
            frame_rate=frame.sample_rate,
            channels=len(frame.layout.channels),
        )

        sound = sound.apply_gain(gain)

        # Ref: https://github.com/jiaaro/pydub/blob/master/API.markdown#audiosegmentget_array_of_samples  # noqa
        channel_sounds = sound.split_to_mono()
        channel_samples = [s.get_array_of_samples() for s in channel_sounds]
        new_samples: np.ndarray = np.array(channel_samples).T
        new_samples = new_samples.reshape(raw_samples.shape)

        new_frame = av.AudioFrame.from_ndarray(new_samples, layout=frame.layout.name)
        new_frame.sample_rate = frame.sample_rate
        return new_frame
Ejemplo n.º 28
0
    def apply_pydub(self, samples, sample_rate):
        try:
            import pydub
        except ImportError:
            print(
                "Failed to import pydub. Maybe it is not installed? "
                "To install the optional pydub dependency of audiomentations,"
                " do `pip install audiomentations[extras]` instead of"
                " `pip install audiomentations`",
                file=sys.stderr,
            )
            raise

        assert len(samples.shape) == 1
        assert samples.dtype == np.float32

        int_samples = convert_float_samples_to_int16(samples)

        audio_segment = pydub.AudioSegment(
            int_samples.tobytes(),
            frame_rate=sample_rate,
            sample_width=int_samples.dtype.itemsize,
            channels=1,
        )

        tmp_dir = tempfile.gettempdir()
        tmp_file_path = os.path.join(
            tmp_dir, "tmp_compressed_{}.mp3".format(str(uuid.uuid4())[0:12])
        )

        bitrate_string = "{}k".format(self.parameters["bitrate"])
        file_handle = audio_segment.export(tmp_file_path, bitrate=bitrate_string)
        file_handle.close()

        degraded_samples, _ = librosa.load(tmp_file_path, sample_rate)

        os.unlink(tmp_file_path)

        return degraded_samples
Ejemplo n.º 29
0
def save_audio(array, filename, sample_rate, dtype=np.int16, format=None):
    dtype = np.dtype(dtype)
    allowed_dtypes = [np.int8, np.int16, np.int32, np.int64]
    if dtype not in allowed_dtypes:
        raise TypeError("The dtype must be one of " + str(allowed_dtypes))

    if np.ndim(array) != 1:
        raise TypeError("Saving multi-channel audio is not supported!")

    if format is None:
        name = os.path.basename(filename)
        ext = name.rfind('.')
        if ext == -1:
            raise ValueError("Can not infer output format from the filename!")
        format = name[ext + 1:]

    array = convert_dtype(array, dtype)

    segment = pydub.AudioSegment(array.tobytes(),
                                 sample_width=dtype.itemsize,
                                 channels=1,
                                 frame_rate=sample_rate)
    segment.export(filename, format)
Ejemplo n.º 30
0
    def _build_audio(audio_file_id: int, applied_length: Optional[int] = None, volume: int = 255):

        if applied_length is not None and applied_length < 0:
            raise BadWriteError(f"File {audio_file_id}: length {applied_length} < 0")
        if volume < 0 or volume > 255:
            raise BadWriteError(f"The volume is {volume} but must be 0–255")

        import valarpy.model as model

        valar_obj = model.AudioFiles.select().where(model.AudioFiles.id == audio_file_id).first()
        if valar_obj is None:
            raise UnrecognizedKeyError(f"No audio file with ID {audio_file_id}".)
        song = pydub.AudioSegment(data=valar_obj.data, sample_width=2, frame_rate=44100, channels=1)
        n_sec_valar = valar_obj.n_seconds * 1000
        length_delta = abs(len(song) - n_sec_valar)
        if length_delta > 0.00001:
            raise AssertionError(f"File {audio_file_id} is {len(song)}, but Valar says it’s {n_sec_valar}")

        if applied_length is None:
            resized = song
        else:
            n_repeats = math.ceil(applied_length / len(song))
            resized = (song * n_repeats)[0:applied_length]

        if volume == 0 or applied_length == 0:
            final = pydub.AudioSegment.silent(duration=0.5)
        else:
            # noinspection PyTypeChecker
            volume_floor = config.get_float("sauron.hardware.stimuli.audio.audio_floor")
            volume_ceil = config.get_float("sauron.hardware.stimuli.audio.audio_ceil")
            # final = resized + (volume * (volume_floor / 255) - volume_floor)
            # print(volume * (volume_ceil - volume_floor) / 255 + volume_floor)
            final = resized + volume * (volume_ceil - volume_floor) / 255 + volume_floor

        play_obj = sa.WaveObject(final.raw_data, 1, 2, 44100)

        return AudioInfo(play_obj, applied_length, volume)