def main(inwavfile, outmp3file, bitrate): """ENCODER MAIN FUNCTION""" # Read WAVE file and set MPEG encoder parameters input_buffer = WavRead(inwavfile) params = EncoderParameters(input_buffer.fs, input_buffer.nch, bitrate) # Read baseband filter samples baseband_filter = prototype_filter().astype = ('float32') subband_samples = np.zeros((params.nch, N_SUBBANDS, FRAMES_PER_BLOCK), dtype='float32') # Main loop executing until all samples have been processed while input_buffer.nprocessed_samples < input_buffer.nsamples: # In each block 12 frames are processed, which equals 12x32=384 samples per block for frm in range(FRAMES_PER_BLOCK): samples_read = input_buffer.read_sample(SHIFT_SIZE) # Perform zero padding if all samples have been read if samples_read < SHIFT_SIZE: input_buffer.audio[ch].insert( np.zeros(SHIFT_SIZE - samples_read)) # Filtering = dot product with reverse buffer for ch in range(params.nch): subband_samples[ch, :, frm] = subband_filtering( input_buffer.audio[ch].reversed(), baseband_filter) # Declaring arrays for keeping table indices of calculated scalefactors and bits allocated in subbands. # Number of bits allocated in subband is either 0 or in range [2,15] sfcindices = np.zeros((params.nch, N_SUBBANDS), dtype='uint8') subband_bit_allocation = np.zeros((params.nch, N_SUBBANDS), dtype='uint8') smr = np.zeros((params.nch, N_SUBBANDS), dtype='float32') # Finding scale factors, psychoacoustic model and bit allocation calculation for subbands. Although scaling is done later, # its result is necessary for the psychoacoustic model and calculation of sound pressure levels. for ch in range(params.nch): sfcindices[ch, :] = get_scalefactors(subband_samples[ch, :, :], params.table.scalefactor) subband_bit_allocation[ch, :] = psycho.model1( input_buffer.audio[ch].ordered(), params, sfindices) subband_samples_quantized = np.zeros(subband_samples.shape, dtype='uint32') for ch in range(params.nch): for sb in range(N_SUBBANDS): QCa = params.table.qca[subband_bit_allocation[ch, sb] - 2] QCb = params.table.qcb[subband_bit_allocation[ch, sb] - 2] scf = params.table.scalefactor[sfcindices[ch, sb]] ba = subband_bit_allocation[ch, sb] for ind in (FRAMES_PER_BLOCK): subband_samples_quantized[ch, sb, ind] = quantization( subband_samples[ch, sb, ind], scf, ba, QCa, QCb) # Fromatting output bitstream and appending it to the output file bitstream_formatting(outmp3file, params, subband_bit_allocation, sfcindices, subband_samples_quantized)
def encode(input_buffer,params,outmp3file,**kwargs): """Encode the rest of the file. If uniform=true, another file with uniform quantization is created.""" uniform = kwargs.get('uniform', False) if uniform: params_uniform = EncoderParameters(input_buffer.fs, input_buffer.nch, params.bitrate) uniform_bit_allocation = np.zeros((params.nch, N_SUBBANDS), dtype='uint8') for ch in range(params.nch): uniform_bit_allocation[ch,:] = psychoacoustic.smr_bit_allocation(params, np.zeros(N_SUBBANDS)) # Read baseband filter samples baseband_filter = filter_coeffs() # Allocate space for 32 subband filters of length 512. filterbank = np.zeros((N_SUBBANDS, FRAME_SIZE), dtype='float32') # Perform modulation. for sb in range(N_SUBBANDS): for n in range(FRAME_SIZE): filterbank[sb,n] = baseband_filter[n] * np.cos((2 * sb + 1) * (n - 16 ) * np.pi / 64) subband_samples = np.zeros((params.nch, N_SUBBANDS, FRAMES_PER_BLOCK), dtype='float32') # Main loop, executing until all samples have been processed. while input_buffer.nprocessed_samples < input_buffer.nsamples: # In each block 12 frames are processed, which equals 12x32=384 new samples per block. for frm in range(FRAMES_PER_BLOCK): samples_read = input_buffer.read_samples(SHIFT_SIZE) # If all samples have been read, perform zero padding. if samples_read < SHIFT_SIZE: for ch in range(params.nch): input_buffer.audio[ch].insert(np.zeros(SHIFT_SIZE - samples_read)) # Filtering = dot product with reversed buffer. for ch in range(params.nch): subband_samples[ch,:,frm] = np.dot(filterbank, input_buffer.audio[ch].reversed()) # Declaring arrays for keeping table indices of calculated scalefactors and bits allocated in subbands. scfindices = np.zeros((params.nch, N_SUBBANDS), dtype='uint8') subband_bit_allocation = np.zeros((params.nch, N_SUBBANDS), dtype='uint8') # Finding scale factors, psychoacoustic model and bit allocation calculation for subbands. Although # scaling is done later, its result is necessary for the psychoacoustic model and calculation of # sound pressure levels. for ch in range(params.nch): scfindices[ch,:] = get_scalefactors(subband_samples[ch,:,:], params.table.scalefactor) subband_bit_allocation[ch,:] = psychoacoustic.model1(input_buffer.audio[ch].ordered(), params, scfindices) # Scaling subband samples with determined scalefactors. for ind in range(FRAMES_PER_BLOCK): subband_samples[:,:,ind] /= params.table.scalefactor[scfindices] if uniform: subband_samples_uniform = np.copy(subband_samples) # Subband samples quantization. Multiplication with coefficients 'a' and adding coefficients 'b' is # defined in the ISO standard. subband_samples_quantized = subband_samples for ch in range(params.nch): for sb in range(N_SUBBANDS): if subband_bit_allocation[ch,sb] != 0: subband_samples[ch,sb,:] *= params.table.qca[subband_bit_allocation[ch,sb] - 2] subband_samples[ch,sb,:] += params.table.qcb[subband_bit_allocation[ch,sb] - 2] subband_samples[ch,sb,:] *= 1<<subband_bit_allocation[ch,sb] - 1 # Since subband_samples is a float array, it needs to be cast to unsigned integers. subband_samples_quantized = subband_samples.astype('uint32') # Forming output bitsream and appending it to the output file. bitstream_formatting(outmp3file, params, subband_bit_allocation, scfindices, subband_samples_quantized) if uniform: for ch in range(params.nch): for sb in range(N_SUBBANDS): if uniform_bit_allocation[ch,sb] != 0: subband_samples_uniform[ch,sb,:] *= params_uniform.table.qca[uniform_bit_allocation[ch,sb] - 2] subband_samples_uniform[ch,sb,:] += params_uniform.table.qcb[uniform_bit_allocation[ch,sb] - 2] subband_samples_uniform[ch,sb,:] *= 1<<uniform_bit_allocation[ch,sb] - 1 subband_samples_uniform = subband_samples_uniform.astype('uint32') bitstream_formatting(outmp3file[:-4] + '_uniform' + outmp3file[-4:], params_uniform, uniform_bit_allocation, scfindices, subband_samples_uniform)
def main(inwavfile, outmp3file, bitrate): """Encoder main function.""" #inwavfile = "../samples/sinestereo.wav" #outmp3file = "../samples/sinestereo.mp3" #bitrate = 320 # Read WAVE file and set MPEG encoder parameters. input_buffer = WavRead(inwavfile) params = EncoderParameters(input_buffer.fs, input_buffer.nch, bitrate) # Subband filter calculation from baseband prototype. # Very detailed analysis of MP3 subband filtering available at # http://cnx.org/content/m32148/latest/?collection=col11121/latest # Read baseband filter samples """ Prototype-filter """ baseband_filter = prototype_filter.prototype_filter().astype('float32') subband_samples = np.zeros((params.nch, N_SUBBANDS, FRAMES_PER_BLOCK), dtype='float32') # Main loop, executing until all samples have been processed. while input_buffer.nprocessed_samples < input_buffer.nsamples: # In each block 12 frames are processed, which equals 12x32=384 new samples per block. for frm in range(FRAMES_PER_BLOCK): samples_read = input_buffer.read_samples(SHIFT_SIZE) # If all samples have been read, perform zero padding. if samples_read < SHIFT_SIZE: for ch in range(params.nch): input_buffer.audio[ch].insert( np.zeros(SHIFT_SIZE - samples_read)) # Filtering = dot product with reversed buffer. """ Subband filtering """ for ch in range(params.nch): subband_samples[ch, :, frm] = subband_filtering.subband_filtering( input_buffer.audio[ch].reversed(), baseband_filter) # Declaring arrays for keeping table indices of calculated scalefactors and bits allocated in subbands. # Number of bits allocated in subband is either 0 or in range [2,15]. scfindices = np.zeros((params.nch, N_SUBBANDS), dtype='uint8') subband_bit_allocation = np.zeros((params.nch, N_SUBBANDS), dtype='uint8') smr = np.zeros((params.nch, N_SUBBANDS), dtype='float32') # Finding scale factors, psychoacoustic model and bit allocation calculation for subbands. Although # scaling is done later, its result is necessary for the psychoacoustic model and calculation of # sound pressure levels. for ch in range(params.nch): scfindices[ch, :] = get_scalefactors(subband_samples[ch, :, :], params.table.scalefactor) subband_bit_allocation[ch, :] = psycho.model1( input_buffer.audio[ch].ordered(), params, scfindices) """ Quantization """ subband_samples_quantized = np.zeros(subband_samples.shape, dtype='uint32') for ch in range(params.nch): for sb in range(N_SUBBANDS): QCa = params.table.qca[subband_bit_allocation[ch, sb] - 2] QCb = params.table.qcb[subband_bit_allocation[ch, sb] - 2] scf = params.table.scalefactor[scfindices[ch, sb]] ba = subband_bit_allocation[ch, sb] for ind in range(FRAMES_PER_BLOCK): subband_samples_quantized[ch, sb, ind] = quantization.quantization( subband_samples[ch, sb, ind], scf, ba, QCa, QCb) # Forming output bitsream and appending it to the output file. bitstream_formatting(outmp3file, params, subband_bit_allocation, scfindices, subband_samples_quantized)
def main(inwavfile, outmp3file, bitrate): """Encoder main function.""" #inwavfile = "../samples/sinestereo.wav" #outmp3file = "../samples/sinestereo.mp3" #bitrate = 320 # Read WAVE file and set MPEG encoder parameters. input_buffer = WavRead(inwavfile) params = EncoderParameters(input_buffer.fs, input_buffer.nch, bitrate) # Subband filter calculation from baseband prototype. # Very detailed analysis of MP3 subband filtering available at # http://cnx.org/content/m32148/latest/?collection=col11121/latest # Read baseband filter samples baseband_filter = filter_coeffs() # Allocate space for 32 subband filters of length 512. filterbank = np.zeros((N_SUBBANDS, FRAME_SIZE), dtype='float32') # Perform modulation. for sb in range(N_SUBBANDS): for n in range(FRAME_SIZE): filterbank[sb, n] = baseband_filter[n] * np.cos( (2 * sb + 1) * (n - 16) * np.pi / 64) subband_samples = np.zeros((params.nch, N_SUBBANDS, FRAMES_PER_BLOCK), dtype='float32') # Main loop, executing until all samples have been processed. while input_buffer.nprocessed_samples < input_buffer.nsamples: # In each block 12 frames are processed, which equals 12x32=384 new samples per block. for frm in range(FRAMES_PER_BLOCK): samples_read = input_buffer.read_samples(SHIFT_SIZE) # If all samples have been read, perform zero padding. if samples_read < SHIFT_SIZE: for ch in range(params.nch): input_buffer.audio[ch].insert( np.zeros(SHIFT_SIZE - samples_read)) # Filtering = dot product with reversed buffer. for ch in range(params.nch): subband_samples[ch, :, frm] = np.dot( filterbank, input_buffer.audio[ch].reversed()) # Declaring arrays for keeping table indices of calculated scalefactors and bits allocated in subbands. # Number of bits allocated in subband is either 0 or in range [2,15]. scfindices = np.zeros((params.nch, N_SUBBANDS), dtype='uint8') subband_bit_allocation = np.zeros((params.nch, N_SUBBANDS), dtype='uint8') smr = np.zeros((params.nch, N_SUBBANDS), dtype='float32') # Finding scale factors, psychoacoustic model and bit allocation calculation for subbands. Although # scaling is done later, its result is necessary for the psychoacoustic model and calculation of # sound pressure levels. for ch in range(params.nch): scfindices[ch, :] = get_scalefactors(subband_samples[ch, :, :], params.table.scalefactor) subband_bit_allocation[ch, :] = psycho.model1( input_buffer.audio[ch].ordered(), params, scfindices) # Scaling subband samples with determined scalefactors. for ind in range(FRAMES_PER_BLOCK): subband_samples[:, :, ind] /= params.table.scalefactor[scfindices] # Subband samples quantization. Multiplication with coefficients 'a' and adding coefficients 'b' is # defined in the ISO standard. for ch in range(params.nch): for sb in range(N_SUBBANDS): if subband_bit_allocation[ch, sb] != 0: subband_samples[ch, sb, :] *= params.table.qca[ subband_bit_allocation[ch, sb] - 2] subband_samples[ch, sb, :] += params.table.qcb[ subband_bit_allocation[ch, sb] - 2] subband_samples[ ch, sb, :] *= 1 << subband_bit_allocation[ch, sb] - 1 # Since subband_samples is a float array, it needs to be cast to unsigned integers. subband_samples_quantized = subband_samples.astype('uint32') # Forming output bitsream and appending it to the output file. bitstream_formatting(outmp3file, params, subband_bit_allocation, scfindices, subband_samples_quantized)
def main(inwavfile, outmp3file, bitrate): """Encoder main function.""" #inwavfile = "../samples/sinestereo.wav" #outmp3file = "../samples/sinestereo.mp3" #bitrate = 320 # Read WAVE file and set MPEG encoder parameters. input_buffer = WavRead(inwavfile) params = EncoderParameters(input_buffer.fs, input_buffer.nch, bitrate) # Subband filter calculation from baseband prototype. # Very detailed analysis of MP3 subband filtering available at # http://cnx.org/content/m32148/latest/?collection=col11121/latest # Read baseband filter samples """ ASSIGNMENT 2 """ baseband_filter = assignment2.prototype_filter().astype('float32') subband_samples = np.zeros((params.nch, N_SUBBANDS, FRAMES_PER_BLOCK), dtype='float32') # Main loop, executing until all samples have been processed. while input_buffer.nprocessed_samples < input_buffer.nsamples: # In each block 12 frames are processed, which equals 12x32=384 new samples per block. for frm in range(FRAMES_PER_BLOCK): samples_read = input_buffer.read_samples(SHIFT_SIZE) # If all samples have been read, perform zero padding. if samples_read < SHIFT_SIZE: for ch in range(params.nch): input_buffer.audio[ch].insert(np.zeros(SHIFT_SIZE - samples_read)) # Filtering = dot product with reversed buffer. """ ASSIGNMENT 3 : Subband filtering """ for ch in range(params.nch): subband_samples[ch,:,frm] = assignment3.subband_filtering(input_buffer.audio[ch].reversed(), baseband_filter) # Declaring arrays for keeping table indices of calculated scalefactors and bits allocated in subbands. # Number of bits allocated in subband is either 0 or in range [2,15]. scfindices = np.zeros((params.nch, N_SUBBANDS), dtype='uint8') subband_bit_allocation = np.zeros((params.nch, N_SUBBANDS), dtype='uint8') smr = np.zeros((params.nch, N_SUBBANDS), dtype='float32') # Finding scale factors, psychoacoustic model and bit allocation calculation for subbands. Although # scaling is done later, its result is necessary for the psychoacoustic model and calculation of # sound pressure levels. for ch in range(params.nch): scfindices[ch,:] = get_scalefactors(subband_samples[ch,:,:], params.table.scalefactor) subband_bit_allocation[ch,:] = psycho.model1(input_buffer.audio[ch].ordered(), params,scfindices) """ ASSIGNMENT 4 : Quantization """ subband_samples_quantized = np.zeros(subband_samples.shape, dtype='uint32') for ch in range(params.nch): for sb in range(N_SUBBANDS): QCa = params.table.qca[subband_bit_allocation[ch,sb]-2] QCb = params.table.qcb[subband_bit_allocation[ch,sb]-2] scf = params.table.scalefactor[scfindices[ch,sb]] ba = subband_bit_allocation[ch,sb] for ind in range(FRAMES_PER_BLOCK): subband_samples_quantized[ch,sb,ind] = assignment4.quantization(subband_samples[ch,sb,ind], scf, ba, QCa, QCb) # Forming output bitsream and appending it to the output file. bitstream_formatting(outmp3file, params, subband_bit_allocation, scfindices, subband_samples_quantized)
def main(inwavfile, outmp3file, bitrate): """Encoder main function.""" #inwavfile = "../samples/sinestereo.wav" #outmp3file = "../samples/sinestereo.mp3" #bitrate = 320 # Read WAVE file and set MPEG encoder parameters. input_buffer = WavRead(inwavfile) params = EncoderParameters(input_buffer.fs, input_buffer.nch, bitrate) # Subband filter calculation from baseband prototype. # Very detailed analysis of MP3 subband filtering available at # http://cnx.org/content/m32148/latest/?collection=col11121/latest # Read baseband filter samples baseband_filter = filter_coeffs() # Allocate space for 32 subband filters of length 512. filterbank = np.zeros((N_SUBBANDS, FRAME_SIZE), dtype='float32') # Perform modulation. for sb in range(N_SUBBANDS): for n in range(FRAME_SIZE): filterbank[sb,n] = baseband_filter[n] * np.cos((2 * sb + 1) * (n - 16) * np.pi / 64) subband_samples = np.zeros((params.nch, N_SUBBANDS, FRAMES_PER_BLOCK), dtype='float32') # Main loop, executing until all samples have been processed. while input_buffer.nprocessed_samples < input_buffer.nsamples: # In each block 12 frames are processed, which equals 12x32=384 new samples per block. for frm in range(FRAMES_PER_BLOCK): samples_read = input_buffer.read_samples(SHIFT_SIZE) # If all samples have been read, perform zero padding. if samples_read < SHIFT_SIZE: for ch in range(params.nch): input_buffer.audio[ch].insert(np.zeros(SHIFT_SIZE - samples_read)) # Filtering = dot product with reversed buffer. for ch in range(params.nch): subband_samples[ch,:,frm] = np.dot(filterbank, input_buffer.audio[ch].reversed()) # Declaring arrays for keeping table indices of calculated scalefactors and bits allocated in subbands. # Number of bits allocated in subband is either 0 or in range [2,15]. scfindices = np.zeros((params.nch, N_SUBBANDS), dtype='uint8') subband_bit_allocation = np.zeros((params.nch, N_SUBBANDS), dtype='uint8') smr = np.zeros((params.nch, N_SUBBANDS), dtype='float32') # Finding scale factors, psychoacoustic model and bit allocation calculation for subbands. Although # scaling is done later, its result is necessary for the psychoacoustic model and calculation of # sound pressure levels. for ch in range(params.nch): scfindices[ch,:] = get_scalefactors(subband_samples[ch,:,:], params.table.scalefactor) subband_bit_allocation[ch,:] = psycho.model1(input_buffer.audio[ch].ordered(), params,scfindices) # Scaling subband samples with determined scalefactors. for ind in range(FRAMES_PER_BLOCK): subband_samples[:,:,ind] /= params.table.scalefactor[scfindices] # Subband samples quantization. Multiplication with coefficients 'a' and adding coefficients 'b' is # defined in the ISO standard. for ch in range(params.nch): for sb in range(N_SUBBANDS): if subband_bit_allocation[ch,sb] != 0: subband_samples[ch,sb,:] *= params.table.qca[subband_bit_allocation[ch,sb] - 2] subband_samples[ch,sb,:] += params.table.qcb[subband_bit_allocation[ch,sb] - 2] subband_samples[ch,sb,:] *= 1<<subband_bit_allocation[ch,sb] - 1 # Since subband_samples is a float array, it needs to be cast to unsigned integers. subband_samples_quantized = subband_samples.astype('uint32') # Forming output bitsream and appending it to the output file. bitstream_formatting(outmp3file, params, subband_bit_allocation, scfindices, subband_samples_quantized)
def encode(input_buffer, params, outmp3file, **kwargs): """Encode the rest of the file. If uniform=true, another file with uniform quantization is created.""" uniform = kwargs.get('uniform', False) if uniform: params_uniform = EncoderParameters(input_buffer.fs, input_buffer.nch, params.bitrate) uniform_bit_allocation = np.zeros((params.nch, N_SUBBANDS), dtype='uint8') for ch in range(params.nch): uniform_bit_allocation[ch, :] = psychoacoustic.smr_bit_allocation( params, np.zeros(N_SUBBANDS)) # Read baseband filter samples baseband_filter = filter_coeffs() # Allocate space for 32 subband filters of length 512. filterbank = np.zeros((N_SUBBANDS, FRAME_SIZE), dtype='float32') # Perform modulation. for sb in range(N_SUBBANDS): for n in range(FRAME_SIZE): filterbank[sb, n] = baseband_filter[n] * np.cos( (2 * sb + 1) * (n - 16) * np.pi / 64) subband_samples = np.zeros((params.nch, N_SUBBANDS, FRAMES_PER_BLOCK), dtype='float32') # Main loop, executing until all samples have been processed. while input_buffer.nprocessed_samples < input_buffer.nsamples: # In each block 12 frames are processed, which equals 12x32=384 new samples per block. for frm in range(FRAMES_PER_BLOCK): samples_read = input_buffer.read_samples(SHIFT_SIZE) # If all samples have been read, perform zero padding. if samples_read < SHIFT_SIZE: for ch in range(params.nch): input_buffer.audio[ch].insert( np.zeros(SHIFT_SIZE - samples_read)) # Filtering = dot product with reversed buffer. for ch in range(params.nch): subband_samples[ch, :, frm] = np.dot( filterbank, input_buffer.audio[ch].reversed()) # Declaring arrays for keeping table indices of calculated scalefactors and bits allocated in subbands. scfindices = np.zeros((params.nch, N_SUBBANDS), dtype='uint8') subband_bit_allocation = np.zeros((params.nch, N_SUBBANDS), dtype='uint8') # Finding scale factors, psychoacoustic model and bit allocation calculation for subbands. Although # scaling is done later, its result is necessary for the psychoacoustic model and calculation of # sound pressure levels. for ch in range(params.nch): scfindices[ch, :] = get_scalefactors(subband_samples[ch, :, :], params.table.scalefactor) subband_bit_allocation[ch, :] = psychoacoustic.model1( input_buffer.audio[ch].ordered(), params, scfindices) # Scaling subband samples with determined scalefactors. for ind in range(FRAMES_PER_BLOCK): subband_samples[:, :, ind] /= params.table.scalefactor[scfindices] if uniform: subband_samples_uniform = np.copy(subband_samples) # Subband samples quantization. Multiplication with coefficients 'a' and adding coefficients 'b' is # defined in the ISO standard. subband_samples_quantized = subband_samples for ch in range(params.nch): for sb in range(N_SUBBANDS): if subband_bit_allocation[ch, sb] != 0: subband_samples[ch, sb, :] *= params.table.qca[ subband_bit_allocation[ch, sb] - 2] subband_samples[ch, sb, :] += params.table.qcb[ subband_bit_allocation[ch, sb] - 2] subband_samples[ ch, sb, :] *= 1 << subband_bit_allocation[ch, sb] - 1 # Since subband_samples is a float array, it needs to be cast to unsigned integers. subband_samples_quantized = subband_samples.astype('uint32') # Forming output bitsream and appending it to the output file. bitstream_formatting(outmp3file, params, subband_bit_allocation, scfindices, subband_samples_quantized) if uniform: for ch in range(params.nch): for sb in range(N_SUBBANDS): if uniform_bit_allocation[ch, sb] != 0: subband_samples_uniform[ ch, sb, :] *= params_uniform.table.qca[ uniform_bit_allocation[ch, sb] - 2] subband_samples_uniform[ ch, sb, :] += params_uniform.table.qcb[ uniform_bit_allocation[ch, sb] - 2] subband_samples_uniform[ ch, sb, :] *= 1 << uniform_bit_allocation[ch, sb] - 1 subband_samples_uniform = subband_samples_uniform.astype('uint32') bitstream_formatting( outmp3file[:-4] + '_uniform' + outmp3file[-4:], params_uniform, uniform_bit_allocation, scfindices, subband_samples_uniform)