def __init__(self, filepath, samplerate=None, channels=None, bytedepth=None, mode="r"): """Base class for interfacing with audio files. When writing audio files, samplerate, channels, and bytedepth must be specified. Otherwise, these parameters may be None when reading to use the default values of the audio file. Parameters ---------- filepath : str Absolute path to a sound file. Does not need to exist (yet). samplerate : float, default=None Samplerate for the audio file. channels : int, default=None Number of channels for the audio file. bytedepth : int, default=None bytedepth (in bytes) of the returned file. For example, CD-quality audio has a bytedepth of 2 (16-bit). mode : str, default='r' Open the file for [r]eading or [w]riting. """ logging.debug(util.classy_print(AudioFile, "Constructor.")) if not sox.is_valid_file_format(filepath): raise ValueError("Cannot handle this filetype: {}" "".format(filepath)) if mode == "w": # TODO: If/raise assert samplerate, "Writing audiofiles requires a samplerate." assert channels, "Writing audiofiles requires channels." assert bytedepth, "Writing audiofiles requires a bytedepth." self._filepath = filepath self._wave_handle = None self._temp_filepath = util.temp_file(formats.WAVE) self._mode = mode logging.debug(util.classy_print(AudioFile, "Opening wave file.")) self.__get_handle__(self.filepath, samplerate, channels, bytedepth) logging.debug(util.classy_print(AudioFile, "Success!")) if self.duration == 0: warnings.warn("Caution: You have opened an empty sound file!")
def close(self): """Explicit destructor.""" logging.debug(util.classy_print(AudioFile, "Cleaning up.")) if self._wave_handle: self._wave_handle.close() if self._mode == 'w' and self._CONVERT: logging.debug( util.classy_print(AudioFile, "Conversion required for writing.")) # TODO: Update to if / raise assert sox.convert(input_file=self._temp_filepath, output_file=self.filepath, samplerate=self.samplerate, bytedepth=self.bytedepth, channels=self.channels) if self._temp_filepath and os.path.exists(self._temp_filepath): logging.debug(util.classy_print(AudioFile, "Temporary file deleted.")) os.remove(self._temp_filepath)
def __init__(self, filepath, framesize, samplerate=None, channels=None, bytedepth=None, overlap=0.5, stride=None, framerate=None, time_points=None, alignment='center', offset=0): # Always read. mode = 'r' logging.debug(util.classy_print(FramedAudioReader, "Constructor.")) self._wave_handle = None super(FramedAudioReader, self).__init__( filepath, framesize, samplerate, channels, bytedepth, mode, time_points, framerate, stride, overlap, alignment, offset)
def read_frame_at_index(self, sample_index, framesize=None): """Read 'framesize' samples starting at 'sample_index'. If framesize is None, defaults to current framesize. Parameters ---------- sample_index: int Index at which to center the frame. framesize: int, default=None Number of samples to read from the file. """ if not framesize: framesize = self.framesize frame_index = 0 frame = np.zeros([framesize, self.channels]) # Check boundary conditions if sample_index < 0 and sample_index + framesize > 0: framesize = framesize - np.abs(sample_index) frame_index = np.abs(sample_index) sample_index = 0 elif sample_index > self.num_samples: return frame elif (sample_index + framesize) <= 0: return frame logging.debug(util.classy_print( FramedAudioReader, "sample_index = %d" % sample_index)) self._wave_handle.setpos(sample_index) newdata = util.byte_string_to_array( byte_string=self._wave_handle.readframes(int(framesize)), channels=self.channels, bytedepth=self.bytedepth) # Place new data within the frame frame[frame_index:frame_index + newdata.shape[0]] = newdata return frame
def reset(self): """TODO(ejhumphrey)""" logging.debug(util.classy_print(FramedAudioFile, "Reset.")) super(FramedAudioFile, self).reset() self.framebuffer = np.zeros(self.frameshape) self._time_index = 0
def __init__(self, filepath, framesize, samplerate=None, channels=None, bytedepth=None, mode='r', time_points=None, framerate=None, stride=None, overlap=0.5, alignment='center', offset=0): """Frame-based audio file parsing. Parameters ---------- filepath : str Absolute path to an audio file. framesize : int Size of each frame of audio, as (num_samples, num_channels). samplerate : int, default = as-is Desired sample rate. channels : int, default = as-is Desired number of channels. bytedepth : int, default = as-is Desired byte depth. mode : str, default='r' Open the file for [r]eading or [w]riting. time_points : array_like Iteritable of absolute points in time to align frames. framerate : scalar, default = None Uniform frequency to advance frames from the given file. stride : int, default = None Integer number of samples to advance frames. overlap : scalar, default = 0.5 Percent overlap between adjacent frames. alignment : str, default = 'center' Controls alignment of the frame, one of ['left','center','right']. offset : scalar, default = 0 Time in seconds to shift the alignment of a frame. Notes ----- For frame-based audio processing, there are a few roughly equivalent ways of defining how to advance through the data. The order of preference for these strategies is defined as follows: 1. time_points : Explicit checkpoints in time to read or write a frame of audio, subject to alignment and offset parameters. This can gracefully handle asynchronous, un-ordered framing with no guarantee of the relationship between time points. Additionally, this is independent of both samplerate and framesize, which allows those parameters to change without affecting the frame-based process. 2. framerate : Framing with a constant frequency in Hz, i.e. M frames per second. This is also samplerate and framesize independent. 3. stride : Constant number of samples to advance between frames. This is independent of framesize but not samplerate, i.e. different samplerates will yield a different framerate, which may or may not be what you actually want. 4. overlap, default = 0.5 (50% overlap) : This is the simplest approach to setting a suitable frame stride, but also arguably the most fragile. Frames are produced as both a function of samplerate and framesize. """ logging.debug(util.classy_print(FramedAudioFile, "Constructor.")) super(FramedAudioFile, self).__init__( filepath, samplerate=samplerate, channels=channels, bytedepth=bytedepth, mode=mode) self._framesize = framesize self._alignment = alignment self._offset = offset self._time_points = [None] logging.debug(util.classy_print(FramedAudioFile, "Init Striding.")) self._init_striding(time_points, framerate, stride, overlap) self.reset()