def output(partIdx): """Uses the student code to compute the output for test cases.""" outputString = '' if partIdx == 0: # This is ScaledFFTdB from assignment1 import scaled_fft_db r,x = wavfile.read('data/a1_submissionInput.wav') X = scaled_fft_db(x) for val in X: outputString += '%.5f ' % (val) elif partIdx == 1: # This is PrototypeFilter from assignment2 import prototype_filter h = prototype_filter() # test signal s = np.loadtxt('data/a2_submissionInput.txt') r = np.convolve(h, s)[4*512:5*512]/2 for val in r: outputString += '%.5f ' % val elif partIdx == 2: # This is SubbandFiltering from assignment3 import subband_filtering r,x = wavfile.read('data/a3_submissionInput.wav') h = np.hanning(512) X = subband_filtering(x, h) for val in X: outputString += '%.5f ' % (val) elif partIdx == 3: # This is Quantization from assignment4 import quantization from parameters import EncoderParameters params = EncoderParameters(44100, 2, 64) val_in = np.loadtxt('data/a4_submissionInput.txt') for r,row in enumerate(val_in): val = row[0] scf = row[1] ba = int(row[2]) QCa = params.table.qca[ba-2] QCb = params.table.qcb[ba-2] val = quantization(val, scf, ba, QCa, QCb) outputString += '%d ' % (val) return outputString.strip()
def output(partIdx): """Uses the student code to compute the output for test cases.""" outputString = '' if partIdx == 0: # This is ScaledFFTdB from assignment1 import scaled_fft_db r, x = wavfile.read('data/a1_submissionInput.wav') X = scaled_fft_db(x) for val in X: outputString += '%.5f ' % (val) elif partIdx == 1: # This is PrototypeFilter from assignment2 import prototype_filter h = prototype_filter() # test signal s = np.loadtxt('data/a2_submissionInput.txt') r = np.convolve(h, s)[4 * 512:5 * 512] / 2 for val in r: outputString += '%.5f ' % val elif partIdx == 2: # This is SubbandFiltering from assignment3 import subband_filtering r, x = wavfile.read('data/a3_submissionInput.wav') h = np.hanning(512) X = subband_filtering(x, h) for val in X: outputString += '%.5f ' % (val) elif partIdx == 3: # This is Quantization from assignment4 import quantization from parameters import EncoderParameters params = EncoderParameters(44100, 2, 64) val_in = np.loadtxt('data/a4_submissionInput.txt') for r, row in enumerate(val_in): val = row[0] scf = row[1] ba = int(row[2]) QCa = params.table.qca[ba - 2] QCb = params.table.qcb[ba - 2] val = quantization(val, scf, ba, QCa, QCb) outputString += '%d ' % (val) return outputString.strip()
def output(partIdx): outputString = "" if partIdx == "1": # This is ScaledFFTdB from assignment1 import scaled_fft_db r, x = wavfile.read("data/a1_submissionInput.wav") X = scaled_fft_db(x) for val in X: outputString += "%.5f " % (val) elif partIdx == "2": # This is PrototypeFilter from assignment2 import prototype_filter h = prototype_filter() # test signal s = np.loadtxt("data/a2_submissionInput.txt") r = np.convolve(h, s)[4 * 512:5 * 512] / 2 for val in r: outputString += "%.5f " % val elif partIdx == "3": # This is SubbandFiltering from assignment3 import subband_filtering r, x = wavfile.read("data/a3_submissionInput.wav") h = np.hanning(512) X = subband_filtering(x, h) for val in X: outputString += "%.5f " % (val) elif partIdx == "4": # This is Quantization from assignment4 import quantization from parameters import EncoderParameters params = EncoderParameters(44100, 2, 64) val_in = np.loadtxt("data/a4_submissionInput.txt") for r, row in enumerate(val_in): val = row[0] scf = row[1] ba = int(row[2]) QCa = params.table.qca[ba - 2] QCb = params.table.qcb[ba - 2] val = quantization(val, scf, ba, QCa, QCb) outputString += "%d " % (val) else: print("Unknown assigment part number") if len(outputString) > 0: fileName = "res%s.txt" % partIdx with open(fileName, "w") as f: f.write(outputString.strip()) print("You can now submit the file " + fileName) else: print( "there was an error with the computation. Please check your code")
def output(partIdx): outputString = '' if partIdx == '1': # This is ScaledFFTdB from assignment1 import scaled_fft_db r,x = wavfile.read('data/a1_submissionInput.wav') X = scaled_fft_db(x) for val in X: outputString += '%.5f ' % (val) elif partIdx == '2': # This is PrototypeFilter from assignment2 import prototype_filter h = prototype_filter() # test signal s = np.loadtxt('data/a2_submissionInput.txt') r = np.convolve(h, s)[4*512:5*512]/2 for val in r: outputString += '%.5f ' % val elif partIdx == '3': # This is SubbandFiltering from assignment3 import subband_filtering r,x = wavfile.read('data/a3_submissionInput.wav') h = np.hanning(512) X = subband_filtering(x, h) for val in X: outputString += '%.5f ' % (val) elif partIdx == '4': # This is Quantization from assignment4 import quantization from parameters import EncoderParameters params = EncoderParameters(44100, 2, 64) val_in = np.loadtxt('data/a4_submissionInput.txt') for r,row in enumerate(val_in): val = row[0] scf = row[1] ba = int(row[2]) QCa = params.table.qca[ba-2] QCb = params.table.qcb[ba-2] val = quantization(val, scf, ba, QCa, QCb) outputString += '%d ' % (val) else: print "Unknown assigment part number" if len(outputString) > 0: fileName = "res%s.txt" % partIdx; with open(fileName, "w") as f: f.write(outputString.strip()) print "You can now submit the file " + fileName else: print "there was an error with the computation. Please check your code"
def model1(samples, params, sfindices): """Psychoacoustic model as described in ISO/IEC 11172-3, Annex D.1.""" table = params.table X = assignment1.scaled_fft_db(samples) scf = table.scalefactor[sfindices] subband_spl = np.zeros(N_SUBBANDS) for sb in range(N_SUBBANDS): subband_spl[sb] = np.max(X[1 + sb * SUB_SIZE:1 + sb * SUB_SIZE + SUB_SIZE]) subband_spl[sb] = np.maximum(subband_spl[sb], 20 * np.log10(scf[0, sb] * 32768) - 10) peaks = [] for i in range(3, FFT_SIZE / 2 - 6): if X[i] >= X[i + 1] and X[i] > X[i - 1]: peaks.append(i) # determining tonal and non-tonal components tonal = TonalComponents(X) tonal.flag[0:3] = IGNORE for k in peaks: is_tonal = True if k > 2 and k < 63: testj = [-2, 2] elif k >= 63 and k < 127: testj = [-3, -2, 2, 3] else: testj = [-6, -5, -4, -3, -2, 2, 3, 4, 5, 6] for j in testj: if tonal.spl[k] - tonal.spl[k + j] < 7: is_tonal = False break if is_tonal: tonal.spl[k] = add_db(tonal.spl[k - 1:k + 2]) tonal.flag[k + np.arange(testj[0], testj[-1] + 1)] = IGNORE tonal.flag[k] = TONE tonal.tonecomps.append(k) # non-tonal components for each critical band for i in range(table.cbnum - 1): weight = 0.0 msum = DBMIN for j in range(table.cbound[i], table.cbound[i + 1]): if tonal.flag[i] == UNSET: msum = add_db((tonal.spl[j], msum)) weight += np.power( 10, tonal.spl[j] / 10) * (table.bark[table.map[j]] - i) if msum > DBMIN: index = weight / np.power(10, msum / 10.0) center = table.cbound[i] + np.int( index * (table.cbound[i + 1] - table.cbound[i])) if tonal.flag[center] == TONE: center += 1 tonal.flag[center] = NOISE tonal.spl[center] = msum tonal.noisecomps.append(center) # decimation of tonal and non-tonal components # under the threshold in quiet for i in range(len(tonal.tonecomps)): if i >= len(tonal.tonecomps): break k = tonal.tonecomps[i] if tonal.spl[k] < table.hear[table.map[k]]: tonal.tonecomps.pop(i) tonal.flag[k] = IGNORE i -= 1 for i in range(len(tonal.noisecomps)): if i >= len(tonal.noisecomps): break k = tonal.noisecomps[i] if tonal.spl[k] < table.hear[table.map[k]]: tonal.noisecomps.pop(i) tonal.flag[k] = IGNORE i -= 1 # decimation of tonal components closer than 0.5 Bark for i in range(len(tonal.tonecomps) - 1): if i >= len(tonal.tonecomps) - 1: break this = tonal.tonecomps[i] next = tonal.tonecomps[i + 1] if table.bark[table.map[this]] - table.bark[table.map[next]] < 0.5: if tonal.spl[this] > tonal.spl[next]: tonal.flag[next] = IGNORE tonal.tonecomps.remove(next) else: tonal.flag[this] = IGNORE tonal.tonecomps.remove(this) # individual masking thresholds masking_tonal = [] masking_noise = [] for i in range(table.subsize): masking_tonal.append(()) zi = table.bark[i] for j in tonal.tonecomps: zj = table.bark[table.map[j]] dz = zi - zj if dz >= -3 and dz <= 8: avtm = -1.525 - 0.275 * zj - 4.5 if dz >= -3 and dz < -1: vf = 17 * (dz + 1) - (0.4 * X[j] + 6) elif dz >= -1 and dz < 0: vf = dz * (0.4 * X[j] + 6) elif dz >= 0 and dz < 1: vf = -17 * dz else: vf = -(dz - 1) * (17 - 0.15 * X[j]) - 17 masking_tonal[i] += (X[j] + vf + avtm, ) for i in range(table.subsize): masking_noise.append(()) zi = table.bark[i] for j in tonal.noisecomps: zj = table.bark[table.map[j]] dz = zi - zj if dz >= -3 and dz <= 8: avnm = -1.525 - 0.175 * zj - 0.5 if dz >= -3 and dz < -1: vf = 17 * (dz + 1) - (0.4 * X[j] + 6) elif dz >= -1 and dz < 0: vf = dz * (0.4 * X[j] + 6) elif dz >= 0 and dz < 1: vf = -17 * dz else: vf = -(dz - 1) * (17 - 0.15 * X[j]) - 17 masking_noise[i] += (X[j] + vf + avnm, ) # global masking thresholds masking_global = [] for i in range(table.subsize): maskers = (table.hear[i], ) + masking_tonal[i] + masking_noise[i] masking_global.append(add_db(maskers)) # minimum masking thresholds mask = np.zeros(N_SUBBANDS) for sb in range(N_SUBBANDS): first = table.map[sb * SUB_SIZE] after_last = table.map[(sb + 1) * SUB_SIZE - 1] + 1 mask[sb] = np.min(masking_global[first:after_last]) # signal-to-mask ratio for each subband smr = subband_spl - mask subband_bit_allocation = smr_bit_allocation(params, smr) return subband_bit_allocation
def model1(samples, params, sfindices): """Psychoacoustic model as described in ISO/IEC 11172-3, Annex D.1.""" table = params.table X = assignment1.scaled_fft_db(samples) scf = table.scalefactor[sfindices] subband_spl = np.zeros(N_SUBBANDS) for sb in range(N_SUBBANDS): subband_spl[sb] = np.max(X[1 + sb * SUB_SIZE : 1 + sb * SUB_SIZE + SUB_SIZE]) subband_spl[sb] = np.maximum(subband_spl[sb], 20 * np.log10(scf[0,sb] * 32768) - 10) peaks = [] for i in range(3, FFT_SIZE / 2 - 6): if X[i]>=X[i+1] and X[i]>X[i-1]: peaks.append(i) #determining tonal and non-tonal components tonal = TonalComponents(X) tonal.flag[0:3] = IGNORE for k in peaks: is_tonal = True if k > 2 and k < 63: testj = [-2,2] elif k >= 63 and k < 127: testj = [-3,-2,2,3] else: testj = [-6,-5,-4,-3,-2,2,3,4,5,6] for j in testj: if tonal.spl[k] - tonal.spl[k+j] < 7: is_tonal = False break if is_tonal: tonal.spl[k] = add_db(tonal.spl[k-1:k+2]) tonal.flag[k+np.arange(testj[0], testj[-1] + 1)] = IGNORE tonal.flag[k] = TONE tonal.tonecomps.append(k) #non-tonal components for each critical band for i in range(table.cbnum - 1): weight = 0.0 msum = DBMIN for j in range(table.cbound[i], table.cbound[i+1]): if tonal.flag[i] == UNSET: msum = add_db((tonal.spl[j], msum)) weight += np.power(10, tonal.spl[j] / 10) * (table.bark[table.map[j]] - i) if msum > DBMIN: index = weight/np.power(10, msum / 10.0) center = table.cbound[i] + np.int(index * (table.cbound[i+1] - table.cbound[i])) if tonal.flag[center] == TONE: center += 1 tonal.flag[center] = NOISE tonal.spl[center] = msum tonal.noisecomps.append(center) #decimation of tonal and non-tonal components #under the threshold in quiet for i in range(len(tonal.tonecomps)): if i >= len(tonal.tonecomps): break k = tonal.tonecomps[i] if tonal.spl[k] < table.hear[table.map[k]]: tonal.tonecomps.pop(i) tonal.flag[k] = IGNORE i -= 1 for i in range(len(tonal.noisecomps)): if i >= len(tonal.noisecomps): break k = tonal.noisecomps[i] if tonal.spl[k] < table.hear[table.map[k]]: tonal.noisecomps.pop(i) tonal.flag[k] = IGNORE i -= 1 #decimation of tonal components closer than 0.5 Bark for i in range(len(tonal.tonecomps) -1 ): if i >= len(tonal.tonecomps) -1: break this = tonal.tonecomps[i] next = tonal.tonecomps[i+1] if table.bark[table.map[this]] - table.bark[table.map[next]] < 0.5: if tonal.spl[this]>tonal.spl[next]: tonal.flag[next] = IGNORE tonal.tonecomps.remove(next) else: tonal.flag[this] = IGNORE tonal.tonecomps.remove(this) #individual masking thresholds masking_tonal = [] masking_noise = [] for i in range(table.subsize): masking_tonal.append(()) zi = table.bark[i] for j in tonal.tonecomps: zj = table.bark[table.map[j]] dz = zi - zj if dz >= -3 and dz <= 8: avtm = -1.525 - 0.275 * zj - 4.5 if dz >= -3 and dz < -1: vf = 17 * (dz + 1) - (0.4 * X[j] + 6) elif dz >= -1 and dz < 0: vf = dz * (0.4 * X[j] + 6) elif dz >= 0 and dz < 1: vf = -17 * dz else: vf = -(dz - 1) * (17 - 0.15 * X[j]) - 17 masking_tonal[i] += (X[j] + vf + avtm,) for i in range(table.subsize): masking_noise.append(()) zi = table.bark[i] for j in tonal.noisecomps: zj = table.bark[table.map[j]] dz = zi - zj if dz >= -3 and dz <= 8: avnm = -1.525 - 0.175 * zj - 0.5 if dz >= -3 and dz < -1: vf = 17 * (dz + 1) - (0.4 * X[j] + 6) elif dz >= -1 and dz < 0: vf = dz * (0.4 * X[j] + 6) elif dz >= 0 and dz < 1: vf = -17 * dz else: vf = -(dz - 1) * (17 - 0.15 * X[j]) - 17 masking_noise[i] += (X[j] + vf + avnm,) #global masking thresholds masking_global = [] for i in range(table.subsize): maskers = (table.hear[i],) + masking_tonal[i] + masking_noise[i] masking_global.append(add_db(maskers)) #minimum masking thresholds mask = np.zeros(N_SUBBANDS) for sb in range(N_SUBBANDS): first = table.map[sb * SUB_SIZE] after_last = table.map[(sb + 1) * SUB_SIZE - 1] + 1 mask[sb] = np.min(masking_global[first:after_last]) #signal-to-mask ratio for each subband smr = subband_spl - mask subband_bit_allocation = smr_bit_allocation(params, smr) return subband_bit_allocation