Ejemplo n.º 1
0
def createDecoder(freq, channels):
    # Just as with the encoder, to create a decoder, we must first
    # allocate resources for it.  We want Python to be responsible for
    # the memory deallocation, and thus Python must be responsible for
    # the initial memory allocation.

    # The frequency must be passed in as a 32-bit int
    freq = opus.opus_int32(freq)

    # The number of channels must also be passed in as a 32-bit int
    channels = opus.opus_int32(channels)

    # Obtain the number of bytes of memory required for the decoder
    size = opus.opus_decoder_get_size(channels)

    # Allocate the required memory for the decoder
    memory = ctypes.create_string_buffer(size)

    # Cast the newly-allocated memory as a pointer to a decoder.  We
    # could also have used opus.od_p as the pointer type, but writing
    # it out in full may be clearer.
    decoder = ctypes.cast(memory, ctypes.POINTER(opus.OpusDecoder))

    # Initialise the decoder
    error = opus.opus_decoder_init(decoder, freq, channels)

    # Check that there hasn't been an error when initialising the
    # decoder
    if error != opus.OPUS_OK:
        raise Exception("An error occurred while creating the decoder: " +
                        opus.opus_strerror(error).decode("utf"))

    # Return our newly-created decoder
    return decoder
Ejemplo n.º 2
0
def create_encoder(npBufSource, samples_per_second):
    # To create an encoder, we must first allocate resources for it.
    # We want Python to be responsible for the memory deallocation,
    # and thus Python must be responsible for the initial memory
    # allocation.

    # Opus can encode both speech and music, and it can automatically
    # detect when the source swaps between the two.  Here we specify
    # automatic detection.
    application = opus.OPUS_APPLICATION_AUDIO

    # The frequency must be passed in as a 32-bit int
    samples_per_second = opus.opus_int32(samples_per_second)

    # The number of channels can be obtained from the shape of the
    # NumPy array that was passed in as npBufSource
    channels = npBufSource.shape[1]

    # Obtain the number of bytes of memory required for the encoder
    size = opus.opus_encoder_get_size(channels);

    # Allocate the required memory for the encoder
    memory = ctypes.create_string_buffer(size)

    # Cast the newly-allocated memory as a pointer to an encoder.  We
    # could also have used opus.oe_p as the pointer type, but writing
    # it out in full may be clearer.
    encoder = ctypes.cast(memory, ctypes.POINTER(opus.OpusEncoder))

    # Initialise the encoder
    error = opus.opus_encoder_init(
        encoder,
        samples_per_second,
        channels,
        application
    )

    # Check that there hasn't been an error when initialising the
    # encoder
    if error != opus.OPUS_OK:
        raise Exception("An error occurred while creating the encoder: "+
                        opus.opus_strerror(error).decode("utf"))

    # Return our newly-created encoder
    return encoder
Ejemplo n.º 3
0
    def record(self):
        # Define the stream's callback
        count = 0 # DEBUG
        last_inputBufferAdcTime = None
        
        def callback(indata, outdata, samples, time, status):
            nonlocal step_no, current_pos, stream, warmup_samples
            nonlocal samples_per_frame
            nonlocal count
            nonlocal frame_buffer
            nonlocal last_inputBufferAdcTime

            start_time = t.time()

            # internal_latency = (time.outputBufferDacTime
            #                     - time.inputBufferAdcTime)
            # print("internal latency:", internal_latency,
            #       "samples:", samples)

            # if last_inputBufferAdcTime is not None:
            #     print("time diff:", time.inputBufferAdcTime - last_inputBufferAdcTime)
            # last_inputBufferAdcTime = time.inputBufferAdcTime
            
            if status:
                print(status)

            count += 1
            
            # Grab the lock so that we're threadsafe
            with self._lock:
                # TODO: Need to add monitoring
                inx = []
                inx[:] = indata[:] 

                # Step No 0
                # =========
                if step_no == 0:

                    if status.input_underflow:
                        print("INPUT UNDERFLOW: Not a problem as we're just writing out.  Count:",count)
                    elif status.input_overflow:
                        print("INPUT OVERFLOW: Not a problem as we're just writing out. Count:",count)
                    elif status:
                        print(status)
                        print("count: ",count)
                        print("ABORTING")
                        raise sd.CallbackAbort


                    # If the number of output channels does not match
                    # out starting sound's PCM, we'll need to adjust
                    # it.
                    if outdata.shape[1] != self._starting_sound_pcm.shape[1]:
                        # Number of channels does not match
                        print("Number of channels does not match")

                    # Copy the starting sound to the output
                    remaining = len(self._starting_sound_pcm) - current_pos
                    if remaining >= samples:
                        outdata[:] = self._starting_sound_pcm[
                            current_pos : current_pos+samples
                        ]
                    else:
                        # Copy what's left of the starting sound, and fill
                        # the rest with silence
                        outdata[:remaining] = self._starting_sound_pcm[
                            current_pos : len(self._starting_sound_pcm)
                        ]
                        outdata[remaining:samples] = [[0.0, 0.0]] * (samples-remaining)

                    # Adjust the starting sound position
                    current_pos += samples

                    if current_pos >= len(self._starting_sound_pcm):
                        print("Finished playing starting sound; moving to next step")
                        step_no = 2#+= 1 FIXME
                        current_pos = 0

                        
                # Step No 1
                # =========
                elif step_no == 1:
                    if status:
                        print(status, "in step #1; ignoring")
                    
                    # Play silence
                    outdata[:] = [[0.0, 0.0]] * samples
                    current_pos += samples

                    # Warm-up the encoder.  See "Encoder Guidelines"
                    # at https://tools.ietf.org/html/rfc7845#page-27
                    frame_buffer.put(indata)
                    if frame_buffer.size() >= samples_per_frame:
                        # Pass the complete frame to another thread for processing
                        q.put(frame_buffer.get(samples_per_frame))

                    if current_pos >= warmup_samples:
                        print("Finished warming up the encoder; moving to next step")
                        step_no += 1
                        warmup_samples = current_pos
                        current_pos = 0
                    

                # Step No 2
                # =========
                elif step_no == 2:
                    if status:
                        print(status, "in step #2; aborting")
                        print("count: ",count)
                        print("ABORTING")
                        raise sd.CallbackAbort

                    
                    # Play backing track and record voice
                    # Copy the backing track to the output
                    remaining = len(self._backing_track_pcm) - current_pos
                    if remaining >= samples:
                        outdata[:] = self._backing_track_pcm[
                            current_pos : current_pos+samples
                        ]
                    else:
                        # Copy what's left of the backing track, and fill
                        # the rest with silence
                        outdata[:remaining] = self._backing_track_pcm[
                            current_pos : len(self._backing_track_pcm)
                        ]
                        outdata[remaining:samples] = [[0.0, 0.0]] * (samples-remaining)
                        
                    # Adjust the position
                    current_pos += samples


                    # DEBUG send outdata to q2
                    q2.put_nowait(outdata.copy())
                    
                    # Record the microphone
                    frame_buffer.put(indata)

                    # DEBUG send the microphone data straight to the queue
                    #q.put_nowait(indata.copy())

                    while frame_buffer.size() >= samples_per_frame:
                        # Pass complete frames to another thread for processing
                        frame = frame_buffer.get(samples_per_frame)
                        q.put_nowait(frame)

                    if current_pos >= len(self._backing_track_pcm):
                        print("Finished playing backing track; moving to next step")
                        step_no += 1
                        current_pos = 0
                        
                    end_time = t.time()
                    duration = end_time - start_time
                    if duration > 2/1000:
                        print("    thread call duration at end of step 2(ms):", round(duration*1000, 2))

                        
                # Step No 3
                # =========
                elif step_no == 3:
                    if status:
                        print(status, "in step #3; ignoring")
                        
                    # Play one frame's worth of silence, just to
                    # ensure we can keep the frame size constant.
                    outdata[:] = [[0.0, 0.0]] * samples
                    current_pos += samples

                    # Record the microphone
                    frame_buffer.put(indata)
                    if frame_buffer.size() >= samples_per_frame:
                        # Pass the complete frame to another thread for processing
                        q.put(frame_buffer.get(samples_per_frame))
                    
                    if current_pos >= warmup_samples:
                        print("Finished playing a frame's worth of silence; moving to next step")
                        step_no += 1
                        current_pos = 0
                    

                else:
                    print("Stopping")
                    raise sd.CallbackStop
                    

            if stream.cpu_load > 0.2:
                print("CPU Load above 20% during playback")

            end_time = t.time()
            duration = end_time - start_time
            if duration > 2/1000:
                print("    thread call duration (ms):", round(duration*1000, 2))



        # Step number indicates where we are in the recording process
        # 0: play starting sound
        # 1: silence used for warming up the encoder
        # 2: backing track and recording
        # 3: silence + recording to ensure we finish on a clean
        step_no = 0

        # Step 0: Set the current position for the sound being played
        current_pos = 0

        
        # Step 1: Encoder warmup
        # Obtain the algorithmic delay of the Opus encoder
        delay = opus.opus_int32()
        result = opus.opus_encoder_ctl(
            self._encoder,
            opus.OPUS_GET_LOOKAHEAD_REQUEST,
            ctypes.pointer(delay)
        )
        if result != opus.OPUS_OK:
            raise Exception("Failed in OPUS_GET_LOOKAHEAD_REQUEST")
        delay_samples = delay.value

        # The encoder guidelines recommend that at least an extra 120
        # samples is added to delay_samples.  See
        # https://tools.ietf.org/html/rfc7845#page-27
        extra_samples = 120
        warmup_samples = delay_samples + extra_samples 

        # Create a buffer capable of holding two frames 
        samples_per_frame = 960
        channels = 2
        frame_buffer = FrameBuffer(
            48000, # one second FIXME
            channels
        )

        
        # Create an event for communication between threads
        finished = threading.Event()

        
        # Create an input-output sounddevice stream
        print("Creating stream")
        stream = sd.Stream(
            samplerate=48000,
            #channels=channels,
            dtype=numpy.float32,
            latency=100/1000, #"high",
            callback=callback,
            finished_callback=finished.set
        )

        with stream:
            finished.wait()  # Wait until playback is finished

        # Store the final number of pre-skip warmup samples
        self._pre_skip = warmup_samples
Ejemplo n.º 4
0
    def write_opus(self, output_filename):
        # Go through the frames and save them as an OggOpus file

        # Create a new stream state with a random serial number
        stream_state = opus_helpers.create_stream_state()

        # Create a packet (reused for each pass)
        ogg_packet = ogg.ogg_packet()

        # Flag to indicate the start of stream
        start_of_stream = 1

        # Packet counter
        count_packets = 0

        # PCM samples counter
        count_samples = 0

        # Allocate memory for a page
        ogg_page = ogg.ogg_page()

        # Allocate storage space for the encoded frame.  4,000 bytes
        # is the recommended maximum buffer size for the encoded
        # frame.
        max_bytes_in_encoded_frame = opus.opus_int32(4000)
        EncodedFrameType = ctypes.c_ubyte * max_bytes_in_encoded_frame.value
        encoded_frame = EncodedFrameType()

        # Create a pointer to the first byte of the buffer for the
        # encoded frame.
        encoded_frame_ptr = ctypes.cast(
            ctypes.pointer(encoded_frame),
            ctypes.POINTER(ctypes.c_ubyte)
        )

        
        # Open file for writing
        f = open(output_filename, "wb")

        # Headers
        # =======
        
        # Specify the identification header
        id_header = opus.make_identification_header(
            pre_skip = self._pre_skip
        )

        # Specify the packet containing the identification header
        ogg_packet.packet = ctypes.cast(id_header, ogg.c_uchar_p)
        ogg_packet.bytes = len(id_header)
        ogg_packet.b_o_s = start_of_stream
        ogg_packet.e_o_s = 0
        ogg_packet.granulepos = 0
        ogg_packet.packetno = count_packets
        start_of_stream = 0
        count_packets += 1

        # Write the header
        result = ogg.ogg_stream_packetin(
            stream_state,
            ogg_packet
        )

        if result != 0:
            raise Exception("Failed to write Opus identification header")


        # Specify the comment header
        comment_header = opus.make_comment_header()

        # Specify the packet containing the identification header
        ogg_packet.packet = ctypes.cast(comment_header, ogg.c_uchar_p)
        ogg_packet.bytes = len(comment_header)
        ogg_packet.b_o_s = start_of_stream
        ogg_packet.e_o_s = 0
        ogg_packet.granulepos = 0
        ogg_packet.packetno = count_packets
        count_packets += 1

        # Write the header
        result = ogg.ogg_stream_packetin(
            stream_state,
            ogg_packet
        )

        if result != 0:
            raise Exception("Failed to write Opus comment header")


        # Write out pages to file
        while ogg.ogg_stream_flush(ctypes.pointer(stream_state),
                                   ctypes.pointer(ogg_page)) != 0:
            # Write page
            print("Writing header page")
            f.write(bytes(ogg_page.header[0:ogg_page.header_len]))
            f.write(bytes(ogg_page.body[0:ogg_page.body_len]))

            
        # Frames
        # ======

        # Loop through the PCM frames in the queue
        while not q.empty():
            # Get the frame from the queue
            frame_pcm = q.get_nowait()

            # Convert to opus_int16
            frame_pcm = numpy.array(frame_pcm * 2**15, dtype=opus.opus_int16) 

            # Create a pointer to the start of the frame's data
            source_ptr = frame_pcm.ctypes.data_as(ctypes.c_void_p)
            
            #print("Processing frame at sourcePtr ", sourcePtr.value)

            # Check if we have enough source data remaining to process at
            # the current frame size
            samples_per_frame = 960
            assert len(frame_pcm) == samples_per_frame

            # Encode the audio
            #print("Encoding audio")
            num_bytes = opus.opus_encode(
                self._encoder,
                ctypes.cast(source_ptr, ctypes.POINTER(opus.opus_int16)),
                samples_per_frame,
                encoded_frame_ptr,
                max_bytes_in_encoded_frame
            )
            #print("num_bytes: ", num_bytes)

            # Check for any errors during encoding
            if num_bytes < 0:
                raise Exception("Encoder error detected: "+
                                opus.opus_strerror(num_bytes).decode("utf"))

            # Writing OggOpus
            # ===============

            # Increase the number of samples
            count_samples += samples_per_frame

            # Place data into the packet
            ogg_packet.packet = encoded_frame_ptr
            ogg_packet.bytes = num_bytes
            ogg_packet.b_o_s = start_of_stream
            ogg_packet.e_o_s = 0 # FIXME: It needs to end!
            ogg_packet.granulepos = count_samples
            ogg_packet.packetno = count_packets

            # No longer the start of stream
            start_of_stream = 0

            # Increase the number of packets
            count_packets += 1

            # Place the packet in to the stream
            result = ogg.ogg_stream_packetin(
                stream_state,
                ogg_packet
            )

            # Check for errors
            if result != 0:
                raise Exception("Error while placing packet in Ogg stream")

            # Write out pages to file
            while ogg.ogg_stream_pageout(ctypes.pointer(stream_state),
                                         ctypes.pointer(ogg_page)) != 0:
                # Write page
                print("Writing page")
                f.write(bytes(ogg_page.header[0:ogg_page.header_len]))
                f.write(bytes(ogg_page.body[0:ogg_page.body_len]))

                
        # Force the writing of the final page
        while ogg.ogg_stream_flush(ctypes.pointer(stream_state),
                                   ctypes.pointer(ogg_page)) != 0:
            # Write page
            print("Writing final page")
            f.write(bytes(ogg_page.header[0:ogg_page.header_len]))
            f.write(bytes(ogg_page.body[0:ogg_page.body_len]))

                
        # Make sure the queue is empty
        if not q.empty():
            print("WARNING: Failed to completely process all the recorded frames")
        
        # Finished
        f.close()
        print("Finished writing file")
Ejemplo n.º 5
0
# Specify the desired frame size.  This will be used for the vast
# majority of the encoding, except possibly at the end of the
# buffer (as there may not be sufficient data left to fill a
# frame.)
frame_size_index = 5
frame_size = frame_sizes[frame_size_index]


# Function to calculate the size of a frame in bytes
def frame_size_bytes(frame_size):
    return frame_size * source_channels * bytes_per_sample

    
# Allocate storage space for the encoded frame.  4,000 bytes is
# the recommended maximum buffer size for the encoded frame.
max_encoded_frame_bytes = opus.opus_int32(4000)
EncodedFrameType = ctypes.c_ubyte * max_encoded_frame_bytes.value
encoded_frame = EncodedFrameType()


# Create a pointer to the first byte of the buffer for the encoded
# frame.
encoded_frame_ptr = ctypes.cast(ctypes.pointer(encoded_frame),
                                ctypes.POINTER(ctypes.c_ubyte))

    
# Number of bytes to process in buffer
length_bytes = (np_buf_source.shape[0]
                * np_buf_source.shape[1]
                * bytes_per_sample)
Ejemplo n.º 6
0
def encodeThenDecode(npBufSource, npBufTarget, freq):
    # Encoding
    # ========

    # Extract the number of channels in the source
    sourceChannels = npBufSource.shape[1]

    # Create an encoder
    encoder = createEncoder(npBufSource, freq)

    # Frame sizes are measured in number of samples.  There are only a
    # specified number of possible valid frame durations for Opus,
    # which (assuming a frequency of 48kHz) gives the following valid
    # sizes.
    frameSizes = [120, 240, 480, 960, 1920, 2880]

    # Specify the desired frame size.  This will be used for the vast
    # majority of the encoding, except possibly at the end of the
    # buffer (as there may not be sufficient data left to fill a
    # frame.)
    frameSizeIndex = 5
    frameSize = frameSizes[frameSizeIndex]

    # Function to calculate the size of a frame in bytes
    def frameSizeBytes(frameSize):
        global bytesPerSample
        return frameSize * sourceChannels * bytesPerSample

    # Allocate storage space for the encoded frame.  4,000 bytes is
    # the recommended maximum buffer size for the encoded frame.
    maxEncodedFrameBytes = opus.opus_int32(4000)
    encodedFrameType = ctypes.c_ubyte * maxEncodedFrameBytes.value
    encodedFrame = encodedFrameType()

    # Create a pointer to the first byte of the buffer for the encoded
    # frame.
    encodedFramePtr = ctypes.cast(ctypes.pointer(encodedFrame),
                                  ctypes.POINTER(ctypes.c_ubyte))

    # Number of bytes to process in buffer
    bytesPerSample = 2
    lengthBytes = buf.shape[0] * buf.shape[1] * bytesPerSample

    # Saving
    # ======

    # Create a new stream state with a random serial number
    stream_state = createStreamState()

    # Create a packet (reused for each pass)
    ogg_packet = ogg.ogg_packet()

    # Flag to indicate the start of stream
    start_of_stream = 1

    # Packet counter
    count_packets = 0

    # PCM samples counter
    count_samples = 0

    # Allocate memory for a page
    ogg_page = ogg.ogg_page()

    # Open file for writing
    output_filename = "test.opus"
    f = open(output_filename, "wb")

    # Specify the identification header
    id_header = opus.make_identification_header(pre_skip=312)

    # Specify the packet containing the identification header
    ogg_packet.packet = ctypes.cast(id_header, ogg.c_uchar_p)
    ogg_packet.bytes = len(id_header)
    ogg_packet.b_o_s = start_of_stream
    ogg_packet.e_o_s = 0
    ogg_packet.granulepos = 0
    ogg_packet.packetno = count_packets
    start_of_stream = 0
    count_packets += 1

    # Write the header
    result = ogg.ogg_stream_packetin(stream_state, ogg_packet)

    if result != 0:
        raise Exception("Failed to write Opus identification header")

    # Specify the comment header
    comment_header = opus.make_comment_header()

    # Specify the packet containing the identification header
    ogg_packet.packet = ctypes.cast(comment_header, ogg.c_uchar_p)
    ogg_packet.bytes = len(comment_header)
    ogg_packet.b_o_s = start_of_stream
    ogg_packet.e_o_s = 0
    ogg_packet.granulepos = 0
    ogg_packet.packetno = count_packets
    count_packets += 1

    # Write the header
    result = ogg.ogg_stream_packetin(stream_state, ogg_packet)

    if result != 0:
        raise Exception("Failed to write Opus comment header")

    # Write out pages to file
    while ogg.ogg_stream_flush(ctypes.pointer(stream_state),
                               ctypes.pointer(ogg_page)) != 0:
        # Write page
        print("Writing page")
        f.write(bytes(ogg_page.header[0:ogg_page.header_len]))
        f.write(bytes(ogg_page.body[0:ogg_page.body_len]))

    # Decoding
    # ========

    # Extract the number of channels for the target
    targetChannels = npBufTarget.shape[1]

    # Create a decoder
    decoderFreq = 48000  # TODO: Test changes to this
    decoderPtr = createDecoder(decoderFreq, targetChannels)

    # Encode and re-decode the audio
    # ==============================

    # Pointer to a location in the source buffer.  We will increment
    # this as we progress through the encoding of the buffer.  It
    # starts pointing to the first byte.
    sourcePtr = npBufSource.ctypes.data_as(ctypes.c_void_p)
    sourcePtr_init = sourcePtr

    # The number of bytes processed will be the difference between the
    # pointer's current location and the address of the first byte.
    bytesProcessed = sourcePtr.value - sourcePtr_init.value

    # Pointer to a location in the target buffer.  We will increment
    # this as we progress through re-decoding each encoded frame.
    targetPtr = npBufTarget.ctypes.data_as(ctypes.c_void_p)

    # Loop through the source buffer
    while bytesProcessed < lengthBytes:
        print("Processing frame at sourcePtr ", sourcePtr.value)

        # Check if we have enough source data remaining to process at
        # the current frame size
        print("lengthBytes: ", lengthBytes)
        print("bytesProcessed: ", bytesProcessed)
        print("bytes remaining (lengthBytes - bytesProcessed):",
              lengthBytes - bytesProcessed)
        print("frameSizeBytes(frameSize):", frameSizeBytes(frameSize))
        while lengthBytes - bytesProcessed < frameSizeBytes(frameSize):
            print("Warning! Not enough data for frame.")
            frameSizeIndex -= 1
            if frameSizeIndex < 0:
                # The data is less than the smallest number of samples
                # in a frame.  Either we ignore the remaining samples
                # and shorten the audio, or we pad the frame with
                # zeros and lengthen the audio.  We'll take the easy
                # option and shorten the audio.
                break
            frameSize = frameSizes[frameSizeIndex]
            print("Decreased frame size to ", frameSize)

        if frameSizeIndex < 0:
            print("Warning! Ignoring samples at the end of the audio\n" +
                  "as they do not fit into even the smallest frame.")
            break

        # Encode the audio
        print("Encoding audio")
        numBytes = opus.opus_encode(
            encoder, ctypes.cast(sourcePtr, ctypes.POINTER(opus.opus_int16)),
            frameSize, encodedFramePtr, maxEncodedFrameBytes)
        print("numBytes: ", numBytes)

        # Check for any errors during encoding
        if numBytes < 0:
            raise Exception("Encoder error detected: " +
                            opus.opus_strerror(numBytes).decode("utf"))

        # Move to next position in the buffer: encoder
        oldAddress = sourcePtr.value
        #print("oldAddress:",oldAddress)
        deltaBytes = frameSize * sourceChannels * 2
        newAddress = oldAddress + deltaBytes
        #print("newAddress:",newAddress)
        sourcePtr = ctypes.c_void_p(newAddress)

        bytesProcessed = sourcePtr.value - sourcePtr_init.value

        # Writing OggOpus
        # ===============

        # Increase the number of samples
        count_samples += frameSize

        # Place data into the packet
        ogg_packet.packet = encodedFramePtr
        ogg_packet.bytes = numBytes
        ogg_packet.b_o_s = start_of_stream
        ogg_packet.e_o_s = 0  # FIXME: It needs to end!
        ogg_packet.granulepos = count_samples
        ogg_packet.packetno = count_packets

        # No longer the start of stream
        start_of_stream = 0

        # Increase the number of packets
        count_packets += 1

        # Place the packet in to the stream
        result = ogg.ogg_stream_packetin(stream_state, ogg_packet)

        # Check for errors
        if result != 0:
            raise Exception("Error while placing packet in Ogg stream")

        # Write out pages to file
        while ogg.ogg_stream_pageout(ctypes.pointer(stream_state),
                                     ctypes.pointer(ogg_page)) != 0:
            # Write page
            print("Writing page")
            f.write(bytes(ogg_page.header[0:ogg_page.header_len]))
            f.write(bytes(ogg_page.body[0:ogg_page.body_len]))

        # Decode the audio
        if True:
            print("Decoding audio")
            numSamples = opus.opus_decode(
                decoderPtr,
                encodedFramePtr,
                numBytes,
                ctypes.cast(targetPtr, ctypes.POINTER(ctypes.c_short)),
                5760,  # Max space required in PCM
                0  # What's this about?
            )
            print("numSamples: ", numSamples)

            # Check for any errors during decoding
            if numSamples < 0:
                raise Exception("Decoder error detected: " +
                                opus.opus_strerror(numSamples).decode("utf"))

            # Move to next position in the buffer: decoder
            targetPtr.value += numSamples * targetChannels * 2

    # Write a packet saying we're at the end of the stream
    # Place data into the packet
    ogg_packet.packet = None
    ogg_packet.bytes = 0
    ogg_packet.b_o_s = start_of_stream
    ogg_packet.e_o_s = 1
    ogg_packet.granulepos = count_samples
    ogg_packet.packetno = count_packets

    # Increase the number of packets
    count_packets += 1

    # Place the packet in to the stream
    #result = ogg.ogg_stream_packetin(
    #    stream_state,
    #    ogg_packet
    #)

    # Check for errors
    if result != 0:
        raise Exception("Error while placing packet in Ogg stream")

    # Write out pages to file
    while ogg.ogg_stream_pageout(ctypes.pointer(stream_state),
                                 ctypes.pointer(ogg_page)) != 0:
        # Write page
        print("Writing page")
        f.write(bytes(ogg_page.header[0:ogg_page.header_len]))
        f.write(bytes(ogg_page.body[0:ogg_page.body_len]))

    # Close file
    f.close()
Ejemplo n.º 7
0
def encodeThenDecode(npBufSource, npBufTarget, freq):
    # Encoding
    # ========

    # Extract the number of channels in the source
    sourceChannels = npBufSource.shape[1]

    # Create an encoder
    encoder = createEncoder(npBufSource, freq)

    # Frame sizes are measured in number of samples.  There are only a
    # specified number of possible valid frame durations for Opus,
    # which (assuming a frequency of 48kHz) gives the following valid
    # sizes.
    frameSizes = [120, 240, 480, 960, 1920, 2880]

    # Specify the desired frame size.  This will be used for the vast
    # majority of the encoding, except possibly at the end of the
    # buffer (as there may not be sufficient data left to fill a
    # frame.)
    frameSizeIndex = 5
    frameSize = frameSizes[frameSizeIndex]

    # Function to calculate the size of a frame in bytes
    def frameSizeBytes(frameSize):
        global bytesPerSample
        return frameSize * sourceChannels * bytesPerSample

    # Allocate storage space for the encoded frame.  4,000 bytes is
    # the recommended maximum buffer size for the encoded frame.
    maxEncodedFrameBytes = opus.opus_int32(4000)
    encodedFrameType = ctypes.c_ubyte * maxEncodedFrameBytes.value
    encodedFrame = encodedFrameType()

    # Create a pointer to the first byte of the buffer for the encoded
    # frame.
    encodedFramePtr = ctypes.cast(ctypes.pointer(encodedFrame),
                                  ctypes.POINTER(ctypes.c_ubyte))

    # Number of bytes to process in buffer
    bytesPerSample = 2
    lengthBytes = buf.shape[0] * buf.shape[1] * bytesPerSample

    # Decoding
    # ========

    # Extract the number of channels for the target
    targetChannels = npBufTarget.shape[1]

    # Create a decoder
    decoderFreq = 48000  # TODO: Test changes to this
    decoderPtr = createDecoder(decoderFreq, targetChannels)

    # Encode and re-decode the audio
    # ==============================

    # Pointer to a location in the source buffer.  We will increment
    # this as we progress through the encoding of the buffer.  It
    # starts pointing to the first byte.
    sourcePtr = npBufSource.ctypes.data_as(ctypes.c_void_p)
    sourcePtr_init = sourcePtr

    # The number of bytes processed will be the difference between the
    # pointer's current location and the address of the first byte.
    bytesProcessed = sourcePtr.value - sourcePtr_init.value

    # Pointer to a location in the target buffer.  We will increment
    # this as we progress through re-decoding each encoded frame.
    targetPtr = npBufTarget.ctypes.data_as(ctypes.c_void_p)

    # Loop through the source buffer
    count = 0  # FIXME: debugging only
    while bytesProcessed < lengthBytes:
        print("processing frame: ", count)
        count += 1
        print("Processing frame at sourcePtr ", sourcePtr.value)

        # Check if we have enough source data remaining to process at
        # the current frame size
        print("lengthBytes: ", lengthBytes)
        print("bytesProcessed: ", bytesProcessed)
        print("bytes remaining (lengthBytes - bytesProcessed):",
              lengthBytes - bytesProcessed)
        print("frameSizeBytes(frameSize):", frameSizeBytes(frameSize))
        while lengthBytes - bytesProcessed < frameSizeBytes(frameSize):
            print("Warning! Not enough data for frame.")
            frameSizeIndex -= 1
            if frameSizeIndex < 0:
                # The data is less than the smallest number of samples
                # in a frame.  Either we ignore the remaining samples
                # and shorten the audio, or we pad the frame with
                # zeros and lengthen the audio.  We'll take the easy
                # option and shorten the audio.
                break
            frameSize = frameSizes[frameSizeIndex]
            print("Decreased frame size to ", frameSize)

        if frameSizeIndex < 0:
            print("Warning! Ignoring samples at the end of the audio\n" +
                  "as they do not fit into even the smallest frame.")
            break

        # Encode the audio
        if True:
            # Print out the PCM data to see that it's readable
            #p = sourcePtr
            #d = 0
            #while d < frameSize*2*2:
            #    p2 = ctypes.c_void_p(ctypes.cast(p,ctypes.c_void_p).value + d)
            #    print("p[",p2.value,"]:",ctypes.cast(p2, opus.opus_int16_p).contents.value)
            #    d += 2

            print("Encoding audio")
            numBytes = opus.opus_encode(
                encoder, ctypes.cast(sourcePtr,
                                     ctypes.POINTER(opus.opus_int16)),
                frameSize, encodedFramePtr, maxEncodedFrameBytes)
            print("numBytes: ", numBytes)

            # Check for any errors during encoding
            if numBytes < 0:
                raise Exception("Encoder error detected: " +
                                opus.opus_strerror(numBytes).decode("utf"))

            # Move to next position in the buffer: encoder
            oldAddress = sourcePtr.value
            #print("oldAddress:",oldAddress)
            deltaBytes = frameSize * sourceChannels * 2
            newAddress = oldAddress + deltaBytes
            #print("newAddress:",newAddress)
            sourcePtr = ctypes.c_void_p(newAddress)

            bytesProcessed = sourcePtr.value - sourcePtr_init.value

        # Decode the audio
        if True:
            print("Decoding audio")
            numSamples = opus.opus_decode(
                decoderPtr,
                encodedFramePtr,
                numBytes,
                ctypes.cast(targetPtr, ctypes.POINTER(ctypes.c_short)),
                5760,  # Max space required in PCM
                0  # What's this about?
            )
            print("numSamples: ", numSamples)

            # Check for any errors during decoding
            if numSamples < 0:
                raise Exception("Decoder error detected: " +
                                opus.opus_strerror(numSamples).decode("utf"))

            # Move to next position in the buffer: decoder
            targetPtr.value += numSamples * targetChannels * 2