예제 #1
0
    def __init__(self,
                 converter,
                 channels,
                 frame_length,
                 analysis_hop,
                 synthesis_hop,
                 analysis_window,
                 synthesis_window,
                 delta_before=0,
                 delta_after=0):
        # pylint: disable=too-many-arguments
        self._converter = converter

        self._channels = channels
        self._frame_length = frame_length
        self._analysis_hop = analysis_hop
        self._synthesis_hop = synthesis_hop

        self._analysis_window = analysis_window
        self._synthesis_window = synthesis_window

        self._delta_before = delta_before
        self._delta_after = delta_after

        # When the analysis hop is larger than the frame length, some samples
        # from the input need to be skipped. self._skip_input_samples tracks
        # how many samples should be skipped before reading the analysis frame.
        self._skip_input_samples = 0

        # This attribute is used to start the output signal in the middle of a
        # frame, which should be the peek of the window function
        self._skip_output_samples = 0

        # Compute the normalize window
        self._normalize_window = windows.product(self._analysis_window,
                                                 self._synthesis_window)

        if self._normalize_window is None:
            self._normalize_window = np.ones(self._frame_length)

        # Initialize the buffers
        delta = self._delta_before + self._delta_after
        self._in_buffer = CBuffer(self._channels, self._frame_length + delta)
        self._analysis_frame = np.empty(
            (self._channels, self._frame_length + delta))
        self._out_buffer = CBuffer(self._channels, self._frame_length)
        self._normalize_buffer = NormalizeBuffer(self._frame_length)

        self.clear()
예제 #2
0
class AnalysisSynthesisTSM(TSM):
    """A :class:`audiotsm.base.tsm.TSM` for real-time analysis-synthesis based
    time-scale modification procedures.

    The basic principle of an analysis-synthesis based TSM procedure is to
    first decompose the input signal into short overlapping frames, called the
    analysis frames. The frames have a fixed length ``frame_length``, and are
    separated by ``analysis_hop`` samples, as illustrated below::

                 <--------frame_length--------><-analysis_hop->
       Frame 1:  [~~~~~~~~~~~~~~~~~~~~~~~~~~~~]
       Frame 2:                  [~~~~~~~~~~~~~~~~~~~~~~~~~~~~]
       Frame 3:                                  [~~~~~~~~~~~~~~~~~~~~~~~~~~~~]
       ...

    It then relocates the frames on the time axis by changing the distance
    between them (to ``synthesis_hop``), as illustrated below::

                 <--------frame_length--------><----synthesis_hop---->
       Frame 1:  [~~~~~~~~~~~~~~~~~~~~~~~~~~~~]
       Frame 2:                         [~~~~~~~~~~~~~~~~~~~~~~~~~~~~]
       Frame 3:                                               [~~~~~~~~~~~~~~~~~~~~~~~~~~~~]
       ...

    This changes the speed of the signal by the ratio ``analysis_hop /
    synthesis_hop`` (for example, if the ``synthesis_hop`` is twice the
    ``analysis_hop``, the output signal will be half as fast as the input
    signal).

    However this simple method introduces artifacts to the signal. These
    artifacts can be reduced by modifying the analysis frames by various
    methods. This is done by a ``converter`` object, which converts the
    analysis frames into modified frames called the synthesis frames.

    To further reduce the artifacts, window functions (the ``analysis_window``
    and the ``synthesis_window``) can be applied to the analysis frames and the
    synthesis frames in order to smooth the signal.

    Some TSM procedures (e.g. WSOLA-like methods) may need to have access to
    some samples preceeding or following an analysis frame to generate the
    synthesis frame. The `delta_before` and `delta_after` parameters allow to
    specify the numbers of samples needed before and after the analysis frame,
    so that they are available to the ``converter``.

    For more details on Time-Scale Modification procedures, I recommend reading
    "`A Review of Time-Scale Modification of Music Signals`_" by Jonathan
    Driedger and Meinard Müller.

    .. _A Review of Time-Scale Modification of Music Signals:
        http://www.mdpi.com/2076-3417/6/2/57

    :param converter: an object that implements the conversion of the analysis
        frames into synthesis frames.
    :type converter: :class:`Converter`
    :param channels: the number of channels of the input signal.
    :type channels: int
    :param frame_length: the length of the frames.
    :type frame_length: int
    :param analysis_hop: the number of samples between two consecutive analysis
        frames.
    :type analysis_hop: int
    :param synthesis_hop: the number of samples between two consecutive
        synthesis frames.
    :type synthesis_hop: int
    :param analysis_window: a window applied to the analysis frames
    :type analysis_window: :class:`numpy.ndarray`
    :param synthesis_window: a window applied to the synthesis frames
    :type synthesis_window: :class:`numpy.ndarray`
    :param delta_before: the number of samples preceding an analysis frame that
        the converter requires (this is usually 0, except for WSOLA-like
        methods)
    :type delta_before: int
    :param delta_after: the number of samples following an analysis frame that
        the converter requires (this is usually 0, except for WSOLA-like
        methods)
    :type delta_after: int
    """  # noqa: E501
    # pylint: disable=too-many-instance-attributes
    def __init__(self, converter, channels, frame_length, analysis_hop,
                 synthesis_hop, analysis_window, synthesis_window,
                 delta_before=0, delta_after=0):
        # pylint: disable=too-many-arguments
        self._converter = converter

        self._channels = channels
        self._frame_length = frame_length
        self._analysis_hop = analysis_hop
        self._synthesis_hop = synthesis_hop

        self._analysis_window = analysis_window
        self._synthesis_window = synthesis_window

        self._delta_before = delta_before
        self._delta_after = delta_after

        # When the analysis hop is larger than the frame length, some samples
        # from the input need to be skipped. self._skip_input_samples tracks
        # how many samples should be skipped before reading the analysis frame.
        self._skip_input_samples = 0

        # This attribute is used to start the output signal in the middle of a
        # frame, which should be the peek of the window function
        self._skip_output_samples = 0

        # Compute the normalize window
        self._normalize_window = windows.product(self._analysis_window,
                                                 self._synthesis_window)

        if self._normalize_window is None:
            self._normalize_window = np.ones(self._frame_length)

        # Initialize the buffers
        delta = self._delta_before + self._delta_after
        self._in_buffer = CBuffer(self._channels, self._frame_length + delta)
        self._analysis_frame = np.empty(
            (self._channels, self._frame_length + delta))
        self._out_buffer = CBuffer(self._channels, self._frame_length)
        self._normalize_buffer = NormalizeBuffer(self._frame_length)
        
        self.stft_output_buffer = [] # duys
        self.stft_output_buffer2 = [] # duys
        
        self.clear()

    def clear(self):
        # Clear the buffers
        self._in_buffer.remove(self._in_buffer.length)
        self._out_buffer.remove(self._out_buffer.length)
        self._out_buffer.right_pad(self._frame_length)
        self._normalize_buffer.remove(self._normalize_buffer.length)

        # Left pad the input with half a frame of zeros, and ignore that half
        # frame in the output. This makes the output signal start in the middle
        # of a frame, which should be the peak of the window function.
        self._in_buffer.write(np.zeros(
            (self._channels, self._delta_before + self._frame_length // 2)))
        self._skip_output_samples = self._frame_length // 2

        # Clear the converter
        self._converter.clear()

    def flush_to(self, writer):
        if self._in_buffer.remaining_length == 0:
            raise RuntimeError("There is still data to process in the input "
                               "buffer, flush_to method should only be called "
                               "when write_to returns True.")

        if writer is not None:
            n = self._out_buffer.write_to(writer)
        else:
            n = self.clear_out_buffer()

        if self._out_buffer.ready == 0:
            # The output buffer is empty
            self.clear()
            return n, True

        return n, False

    def get_max_output_length(self, input_length):
        input_length -= self._skip_input_samples
        if input_length <= 0:
            return 0

        n_frames = input_length // self._analysis_hop + 1
        return n_frames * self._synthesis_hop

    def _process_frame(self):
        """Read an analysis frame from the input buffer, process it, and write
        the result to the output buffer."""
        # Generate the analysis frame and discard the input samples that will
        # not be needed anymore
        self._in_buffer.peek(self._analysis_frame)
        self._in_buffer.remove(self._analysis_hop)

        # Apply the analysis window
        windows.apply(self._analysis_frame, self._analysis_window)

        # Convert the analysis frame into a synthesis frame
        #synthesis_frame = self._converter.convert_frame(self._analysis_frame)
        synthesis_frame, stft_out = self._converter.convert_frame(self._analysis_frame) # duys
        # duys
        if stft_out is not None: 
            #stft_out = np.vstack((stft_out, stft_out))
            self.stft_output_buffer.append(stft_out) 
            stft_out = np.real(stft_out.reshape((self._channels, -1)))

            # --> duys
            # Apply the synthesis window
            #windows.apply(synthesis_frame, self._synthesis_window)
            #windows.apply(stft_out, self._synthesis_window)

            # Overlap and add the synthesis frame in the output buffer
            #self._out_buffer.add(synthesis_frame)
            #self._out_buffer.add(stft_out)

            # The overlap and add step changes the volume of the signal. The
            # normalize_buffer is used to keep track of "how much of the input
            # signal was added" to each part of the output buffer, allowing to
            # normalize it.
            #self._normalize_buffer.add(self._normalize_window)

            # Normalize the samples that are ready to be written to the output
            #normalize = self._normalize_buffer.to_array(end=self._synthesis_hop)
            #normalize[normalize < EPSILON] = 1
            #self._out_buffer.divide(normalize)
            
            # set ready before trying to read
            self._out_buffer.set_ready(self._synthesis_hop)
            
            # duys ---
            buf = np.empty((self._channels, self._out_buffer.length))
            n = self._out_buffer.peek(buf)
            self.stft_output_buffer2.append(buf[0,:]) 
            # ----

            self._normalize_buffer.remove(self._synthesis_hop)
            
            

    def read_from(self, reader):
        n = reader.skip(self._skip_input_samples)
        self._skip_input_samples -= n
        if self._skip_input_samples > 0:
            return n

        n += self._in_buffer.read_from(reader)

        if (self._in_buffer.remaining_length == 0 and
                self._out_buffer.remaining_length >= self._synthesis_hop):
            # The input buffer has enough data to process, and there is enough
            # space in the output buffer to store the output
            self._process_frame()

            # Skip output samples if necessary
            skipped = self._out_buffer.remove(self._skip_output_samples)
            self._out_buffer.right_pad(skipped)
            self._skip_output_samples -= skipped

            # Set the number of input samples to be skipped
            self._skip_input_samples = self._analysis_hop - self._frame_length
            if self._skip_input_samples < 0:
                self._skip_input_samples = 0

        return n

    def set_speed(self, speed):
        self._analysis_hop = int(self._synthesis_hop * speed)
        self._converter.set_analysis_hop(self._analysis_hop)

    def write_to(self, writer):

        if writer is not None:
            # duys - write to writer if supplied
            n = self._out_buffer.write_to(writer)
        else:
            # else, just remove frames from buffer as is done within write_to
            n = self.clear_out_buffer()
            
        self._out_buffer.right_pad(n)

        if (self._in_buffer.remaining_length > 0 and
                self._out_buffer.ready == 0):
            # There is not enough data to process in the input buffer, and the
            # output buffer is empty
            return n, True
        
        return n, False

    def clear_out_buffer(self):
        start = self._out_buffer._offset
        end = self._out_buffer._offset + self._out_buffer._ready           
        if end > self._out_buffer._max_length:
            end -= self._out_buffer._max_length
            n = len(self._out_buffer._data[:, start:])
            n += len(self._out_buffer._data[:, :end])
        else:
            n = len(self._out_buffer._data[:, start:end])

        self._out_buffer.remove(n)
        return n
    
    def write_stft_to_file(self, fname, dbg=False):
        """ duys """
        
        with open(fname, 'wb') as fout:
            n = len(self.stft_output_buffer)
            m = len(self.stft_output_buffer[0])
            if dbg: print(n,m)
            
            stft_out = np.array(self.stft_output_buffer)
            if dbg: print(stft_out[0])
                
            stft_out = stft_out.reshape((n,m)).transpose()  
            
            print('Writing to {}; Shape: {}'.format(fname, stft_out.shape))
            np.save(fout, stft_out)
        
        fname2 = ''.join(fname.split('.'))[:-1] + '_v2.npy'                          
        with open(fname2, 'wb') as fout:
            n = len(self.stft_output_buffer2)
            m = len(self.stft_output_buffer2[0])
            if dbg: print(n,m)
            
            stft_out = np.array(self.stft_output_buffer2)
            if dbg: print(stft_out[0])

            stft_out = stft_out.reshape((n,m)).transpose()
            
            print('Writing to {}; Shape: {}'.format(fname2, stft_out.shape))
            np.save(fout, stft_out)
예제 #3
0
def generate_cbuffers(array, ready, max_length):
    """Generate different CBuffers containing the same data as ``array``."""
    array = np.array(array)
    for i in range(0, max_length):
        buffer = CBuffer(array.shape[0], max_length)

        # Add and remove i samples to rotate the buffer
        buffer.right_pad(i)
        buffer.remove(i)

        buffer.right_pad(array.shape[1])
        buffer.add(array)
        buffer.set_ready(ready)
        yield buffer