def output(partIdx):
  """Uses the student code to compute the output for test cases."""
  outputString = ''

  if partIdx == 0: # This is ScaledFFTdB

    from assignment1 import scaled_fft_db

    r,x = wavfile.read('data/a1_submissionInput.wav')
    X = scaled_fft_db(x)

    for val in X:
        outputString += '%.5f ' % (val)


  elif partIdx == 1: # This is PrototypeFilter

    from assignment2 import prototype_filter

    h = prototype_filter()
      
    # test signal
    s = np.loadtxt('data/a2_submissionInput.txt')
    r = np.convolve(h, s)[4*512:5*512]/2

    for val in r:
        outputString += '%.5f ' % val

  elif partIdx == 2: # This is SubbandFiltering

    from assignment3 import subband_filtering

    r,x = wavfile.read('data/a3_submissionInput.wav')

    h = np.hanning(512)
    X = subband_filtering(x, h)

    for val in X:
        outputString += '%.5f ' % (val)

  elif partIdx == 3: # This is Quantization

    from assignment4 import quantization

    from parameters import EncoderParameters
    params = EncoderParameters(44100, 2, 64)

    val_in = np.loadtxt('data/a4_submissionInput.txt')

    for r,row in enumerate(val_in):
        val = row[0]
        scf = row[1]
        ba = int(row[2])
        QCa = params.table.qca[ba-2]
        QCb = params.table.qcb[ba-2]
        val = quantization(val, scf, ba, QCa, QCb)
        outputString += '%d ' % (val)


  return outputString.strip()
Beispiel #2
0
def output(partIdx):
    """Uses the student code to compute the output for test cases."""
    outputString = ''

    if partIdx == 0:  # This is ScaledFFTdB

        from assignment1 import scaled_fft_db

        r, x = wavfile.read('data/a1_submissionInput.wav')
        X = scaled_fft_db(x)

        for val in X:
            outputString += '%.5f ' % (val)

    elif partIdx == 1:  # This is PrototypeFilter

        from assignment2 import prototype_filter

        h = prototype_filter()

        # test signal
        s = np.loadtxt('data/a2_submissionInput.txt')
        r = np.convolve(h, s)[4 * 512:5 * 512] / 2

        for val in r:
            outputString += '%.5f ' % val

    elif partIdx == 2:  # This is SubbandFiltering

        from assignment3 import subband_filtering

        r, x = wavfile.read('data/a3_submissionInput.wav')

        h = np.hanning(512)
        X = subband_filtering(x, h)

        for val in X:
            outputString += '%.5f ' % (val)

    elif partIdx == 3:  # This is Quantization

        from assignment4 import quantization

        from parameters import EncoderParameters
        params = EncoderParameters(44100, 2, 64)

        val_in = np.loadtxt('data/a4_submissionInput.txt')

        for r, row in enumerate(val_in):
            val = row[0]
            scf = row[1]
            ba = int(row[2])
            QCa = params.table.qca[ba - 2]
            QCb = params.table.qcb[ba - 2]
            val = quantization(val, scf, ba, QCa, QCb)
            outputString += '%d ' % (val)

    return outputString.strip()
Beispiel #3
0
def output(partIdx):
    outputString = ""

    if partIdx == "1":  # This is ScaledFFTdB

        from assignment1 import scaled_fft_db

        r, x = wavfile.read("data/a1_submissionInput.wav")
        X = scaled_fft_db(x)

        for val in X:
            outputString += "%.5f " % (val)

    elif partIdx == "2":  # This is PrototypeFilter

        from assignment2 import prototype_filter

        h = prototype_filter()

        # test signal
        s = np.loadtxt("data/a2_submissionInput.txt")
        r = np.convolve(h, s)[4 * 512:5 * 512] / 2

        for val in r:
            outputString += "%.5f " % val

    elif partIdx == "3":  # This is SubbandFiltering

        from assignment3 import subband_filtering

        r, x = wavfile.read("data/a3_submissionInput.wav")

        h = np.hanning(512)
        X = subband_filtering(x, h)

        for val in X:
            outputString += "%.5f " % (val)

    elif partIdx == "4":  # This is Quantization

        from assignment4 import quantization

        from parameters import EncoderParameters

        params = EncoderParameters(44100, 2, 64)

        val_in = np.loadtxt("data/a4_submissionInput.txt")

        for r, row in enumerate(val_in):
            val = row[0]
            scf = row[1]
            ba = int(row[2])
            QCa = params.table.qca[ba - 2]
            QCb = params.table.qcb[ba - 2]
            val = quantization(val, scf, ba, QCa, QCb)
            outputString += "%d " % (val)

    else:
        print("Unknown assigment part number")

    if len(outputString) > 0:
        fileName = "res%s.txt" % partIdx
        with open(fileName, "w") as f:
            f.write(outputString.strip())
            print("You can now submit the file " + fileName)
    else:
        print(
            "there was an error with the computation. Please check your code")
def output(partIdx):
  outputString = ''

  if partIdx == '1': # This is ScaledFFTdB

    from assignment1 import scaled_fft_db

    r,x = wavfile.read('data/a1_submissionInput.wav')
    X = scaled_fft_db(x)

    for val in X:
      outputString += '%.5f ' % (val)


  elif partIdx == '2': # This is PrototypeFilter

    from assignment2 import prototype_filter

    h = prototype_filter()
      
    # test signal
    s = np.loadtxt('data/a2_submissionInput.txt')
    r = np.convolve(h, s)[4*512:5*512]/2

    for val in r:
      outputString += '%.5f ' % val

  elif partIdx == '3': # This is SubbandFiltering

    from assignment3 import subband_filtering

    r,x = wavfile.read('data/a3_submissionInput.wav')

    h = np.hanning(512)
    X = subband_filtering(x, h)

    for val in X:
      outputString += '%.5f ' % (val)

  elif partIdx == '4': # This is Quantization

    from assignment4 import quantization

    from parameters import EncoderParameters
    params = EncoderParameters(44100, 2, 64)

    val_in = np.loadtxt('data/a4_submissionInput.txt')

    for r,row in enumerate(val_in):
      val = row[0]
      scf = row[1]
      ba = int(row[2])
      QCa = params.table.qca[ba-2]
      QCb = params.table.qcb[ba-2]
      val = quantization(val, scf, ba, QCa, QCb)
      outputString += '%d ' % (val)

  else:
    print "Unknown assigment part number"

  if len(outputString) > 0:
    fileName = "res%s.txt" % partIdx;
    with open(fileName, "w") as f:
      f.write(outputString.strip())
      print "You can now submit the file " + fileName
  else:
    print "there was an error with the computation. Please check your code"
def model1(samples, params, sfindices):
    """Psychoacoustic model as described in ISO/IEC 11172-3, Annex D.1."""

    table = params.table

    X = assignment1.scaled_fft_db(samples)

    scf = table.scalefactor[sfindices]
    subband_spl = np.zeros(N_SUBBANDS)
    for sb in range(N_SUBBANDS):
        subband_spl[sb] = np.max(X[1 + sb * SUB_SIZE:1 + sb * SUB_SIZE +
                                   SUB_SIZE])
        subband_spl[sb] = np.maximum(subband_spl[sb],
                                     20 * np.log10(scf[0, sb] * 32768) - 10)

    peaks = []
    for i in range(3, FFT_SIZE / 2 - 6):
        if X[i] >= X[i + 1] and X[i] > X[i - 1]:
            peaks.append(i)

    # determining tonal and non-tonal components
    tonal = TonalComponents(X)
    tonal.flag[0:3] = IGNORE

    for k in peaks:
        is_tonal = True
        if k > 2 and k < 63:
            testj = [-2, 2]
        elif k >= 63 and k < 127:
            testj = [-3, -2, 2, 3]
        else:
            testj = [-6, -5, -4, -3, -2, 2, 3, 4, 5, 6]
        for j in testj:
            if tonal.spl[k] - tonal.spl[k + j] < 7:
                is_tonal = False
                break
        if is_tonal:
            tonal.spl[k] = add_db(tonal.spl[k - 1:k + 2])
            tonal.flag[k + np.arange(testj[0], testj[-1] + 1)] = IGNORE
            tonal.flag[k] = TONE
            tonal.tonecomps.append(k)

    # non-tonal components for each critical band
    for i in range(table.cbnum - 1):
        weight = 0.0
        msum = DBMIN
        for j in range(table.cbound[i], table.cbound[i + 1]):
            if tonal.flag[i] == UNSET:
                msum = add_db((tonal.spl[j], msum))
                weight += np.power(
                    10, tonal.spl[j] / 10) * (table.bark[table.map[j]] - i)
        if msum > DBMIN:
            index = weight / np.power(10, msum / 10.0)
            center = table.cbound[i] + np.int(
                index * (table.cbound[i + 1] - table.cbound[i]))
            if tonal.flag[center] == TONE:
                center += 1
            tonal.flag[center] = NOISE
            tonal.spl[center] = msum
            tonal.noisecomps.append(center)

    # decimation of tonal and non-tonal components
    # under the threshold in quiet
    for i in range(len(tonal.tonecomps)):
        if i >= len(tonal.tonecomps):
            break
        k = tonal.tonecomps[i]
        if tonal.spl[k] < table.hear[table.map[k]]:
            tonal.tonecomps.pop(i)
            tonal.flag[k] = IGNORE
            i -= 1

    for i in range(len(tonal.noisecomps)):
        if i >= len(tonal.noisecomps):
            break
        k = tonal.noisecomps[i]
        if tonal.spl[k] < table.hear[table.map[k]]:
            tonal.noisecomps.pop(i)
            tonal.flag[k] = IGNORE
            i -= 1

    # decimation of tonal components closer than 0.5 Bark
    for i in range(len(tonal.tonecomps) - 1):
        if i >= len(tonal.tonecomps) - 1:
            break
        this = tonal.tonecomps[i]
        next = tonal.tonecomps[i + 1]
        if table.bark[table.map[this]] - table.bark[table.map[next]] < 0.5:
            if tonal.spl[this] > tonal.spl[next]:
                tonal.flag[next] = IGNORE
                tonal.tonecomps.remove(next)
            else:
                tonal.flag[this] = IGNORE
                tonal.tonecomps.remove(this)

    # individual masking thresholds
    masking_tonal = []
    masking_noise = []

    for i in range(table.subsize):
        masking_tonal.append(())
        zi = table.bark[i]
        for j in tonal.tonecomps:
            zj = table.bark[table.map[j]]
            dz = zi - zj
            if dz >= -3 and dz <= 8:
                avtm = -1.525 - 0.275 * zj - 4.5
                if dz >= -3 and dz < -1:
                    vf = 17 * (dz + 1) - (0.4 * X[j] + 6)
                elif dz >= -1 and dz < 0:
                    vf = dz * (0.4 * X[j] + 6)
                elif dz >= 0 and dz < 1:
                    vf = -17 * dz
                else:
                    vf = -(dz - 1) * (17 - 0.15 * X[j]) - 17
                masking_tonal[i] += (X[j] + vf + avtm, )

    for i in range(table.subsize):
        masking_noise.append(())
        zi = table.bark[i]
        for j in tonal.noisecomps:
            zj = table.bark[table.map[j]]
            dz = zi - zj
            if dz >= -3 and dz <= 8:
                avnm = -1.525 - 0.175 * zj - 0.5
                if dz >= -3 and dz < -1:
                    vf = 17 * (dz + 1) - (0.4 * X[j] + 6)
                elif dz >= -1 and dz < 0:
                    vf = dz * (0.4 * X[j] + 6)
                elif dz >= 0 and dz < 1:
                    vf = -17 * dz
                else:
                    vf = -(dz - 1) * (17 - 0.15 * X[j]) - 17
                masking_noise[i] += (X[j] + vf + avnm, )

    # global masking thresholds
    masking_global = []
    for i in range(table.subsize):
        maskers = (table.hear[i], ) + masking_tonal[i] + masking_noise[i]
        masking_global.append(add_db(maskers))

    # minimum masking thresholds
    mask = np.zeros(N_SUBBANDS)
    for sb in range(N_SUBBANDS):
        first = table.map[sb * SUB_SIZE]
        after_last = table.map[(sb + 1) * SUB_SIZE - 1] + 1
        mask[sb] = np.min(masking_global[first:after_last])

    # signal-to-mask ratio for each subband
    smr = subband_spl - mask

    subband_bit_allocation = smr_bit_allocation(params, smr)
    return subband_bit_allocation
def model1(samples, params, sfindices):
  """Psychoacoustic model as described in ISO/IEC 11172-3, Annex D.1."""
  
  table = params.table

  X = assignment1.scaled_fft_db(samples)

  scf = table.scalefactor[sfindices]  
  subband_spl = np.zeros(N_SUBBANDS)
  for sb in range(N_SUBBANDS):
    subband_spl[sb] = np.max(X[1 + sb * SUB_SIZE : 1 + sb * SUB_SIZE + SUB_SIZE])
    subband_spl[sb] = np.maximum(subband_spl[sb], 20 * np.log10(scf[0,sb] * 32768) - 10)
    
  peaks = []
  for i in range(3, FFT_SIZE / 2 - 6):
    if X[i]>=X[i+1] and X[i]>X[i-1]:
      peaks.append(i)


  #determining tonal and non-tonal components
  tonal = TonalComponents(X)
  tonal.flag[0:3] = IGNORE
  
  for k in peaks:
    is_tonal = True
    if k > 2 and k < 63:
      testj = [-2,2]
    elif k >= 63 and k < 127:
      testj = [-3,-2,2,3]
    else:
      testj = [-6,-5,-4,-3,-2,2,3,4,5,6]
    for j in testj:
      if tonal.spl[k] - tonal.spl[k+j] < 7:
        is_tonal = False
        break
    if is_tonal:
      tonal.spl[k] = add_db(tonal.spl[k-1:k+2])
      tonal.flag[k+np.arange(testj[0], testj[-1] + 1)] = IGNORE
      tonal.flag[k] = TONE
      tonal.tonecomps.append(k)
      

  #non-tonal components for each critical band
  for i in range(table.cbnum - 1):
    weight = 0.0
    msum = DBMIN
    for j in range(table.cbound[i], table.cbound[i+1]):
      if tonal.flag[i] == UNSET:
        msum = add_db((tonal.spl[j], msum))
        weight += np.power(10, tonal.spl[j] / 10) * (table.bark[table.map[j]] - i)
    if msum > DBMIN:
      index  = weight/np.power(10, msum / 10.0)
      center = table.cbound[i] + np.int(index * (table.cbound[i+1] - table.cbound[i])) 
      if tonal.flag[center] == TONE:
        center += 1
      tonal.flag[center] = NOISE
      tonal.spl[center] = msum
      tonal.noisecomps.append(center)
    
  
  #decimation of tonal and non-tonal components
  #under the threshold in quiet
  for i in range(len(tonal.tonecomps)):
    if i >= len(tonal.tonecomps):
      break
    k = tonal.tonecomps[i]
    if tonal.spl[k] < table.hear[table.map[k]]:
      tonal.tonecomps.pop(i)
      tonal.flag[k] = IGNORE
      i -= 1

  for i in range(len(tonal.noisecomps)):
    if i >= len(tonal.noisecomps):
      break
    k = tonal.noisecomps[i]
    if tonal.spl[k] < table.hear[table.map[k]]:
      tonal.noisecomps.pop(i)
      tonal.flag[k] = IGNORE
      i -= 1


  #decimation of tonal components closer than 0.5 Bark
  for i in range(len(tonal.tonecomps) -1 ):
    if i >= len(tonal.tonecomps) -1:
      break
    this = tonal.tonecomps[i]
    next = tonal.tonecomps[i+1]
    if table.bark[table.map[this]] - table.bark[table.map[next]] < 0.5:
      if tonal.spl[this]>tonal.spl[next]:
        tonal.flag[next] = IGNORE
        tonal.tonecomps.remove(next)
      else:
        tonal.flag[this] = IGNORE
        tonal.tonecomps.remove(this)

  

  #individual masking thresholds
  masking_tonal = []
  masking_noise = []

  for i in range(table.subsize):
    masking_tonal.append(())
    zi = table.bark[i]
    for j in tonal.tonecomps:
      zj = table.bark[table.map[j]]
      dz = zi - zj
      if dz >= -3 and dz <= 8:
        avtm = -1.525 - 0.275 * zj - 4.5
        if dz >= -3 and dz < -1:
          vf = 17 * (dz + 1) - (0.4 * X[j] + 6)
        elif dz >= -1 and dz < 0:
          vf = dz * (0.4 * X[j] + 6)
        elif dz >= 0 and dz < 1:
          vf = -17 * dz
        else:
          vf = -(dz - 1) * (17 - 0.15 * X[j]) - 17
        masking_tonal[i] += (X[j] + vf + avtm,)

  for i in range(table.subsize):
    masking_noise.append(())
    zi = table.bark[i]
    for j in tonal.noisecomps:
      zj = table.bark[table.map[j]]
      dz = zi - zj
      if dz >= -3 and dz <= 8:
        avnm = -1.525 - 0.175 * zj - 0.5
        if dz >= -3 and dz < -1:
          vf = 17 * (dz + 1) - (0.4 * X[j] + 6)
        elif dz >= -1 and dz < 0:
          vf = dz * (0.4 * X[j] + 6)
        elif dz >= 0 and dz < 1:
          vf = -17 * dz
        else:
          vf = -(dz - 1) * (17 - 0.15 * X[j]) - 17
        masking_noise[i] += (X[j] + vf + avnm,)


  #global masking thresholds
  masking_global = []
  for i in range(table.subsize):
    maskers = (table.hear[i],) + masking_tonal[i] + masking_noise[i]
    masking_global.append(add_db(maskers))


  #minimum masking thresholds
  mask = np.zeros(N_SUBBANDS)
  for sb in range(N_SUBBANDS):
    first = table.map[sb * SUB_SIZE]
    after_last  = table.map[(sb + 1) * SUB_SIZE - 1] + 1
    mask[sb] = np.min(masking_global[first:after_last])


  #signal-to-mask ratio for each subband
  smr = subband_spl - mask
  

  subband_bit_allocation = smr_bit_allocation(params, smr)
  return subband_bit_allocation