Exemple #1
0
def writeHuffTable2(huffTable, f):
  # Get the number of entries in the Huffman table
  numEntries = len(huffTable.keys())
  numEntriesOut = bin(numEntries)[2:]
  if len(numEntriesOut) > 32:
    print >> sys.stderr, "Number of entries is too long!"
  else:
    numEntriesOut = '0'*(32-len(numEntriesOut))+numEntriesOut

  remainder = 0
  numLeft   = 0
  (remainder, numLeft) = bits.stringToBitsOut(numEntriesOut, f, remainder, numLeft)
  
  # Loop through the keys
  for myKey in huffTable.keys():
    # Turn into binary representation the key, the codeword, and the length of the codeword
    keyOut = ''
    for i in xrange(len(myKey)):
      if myKey[i] == 'A':
        keyOut += '00'
      elif myKey[i] == 'C':
        keyOut += '01'
      elif myKey[i] == 'G':
        keyOut += '100'
      elif myKey[i] == 'T':
        keyOut += '101'
      elif myKey[i] == 'N':
        keyOut += '110'
    keyOut += '111'
    myCodeword = huffTable[myKey]
    myCodeLen  = bin(len(huffTable[myKey]))[2:]

    # 8 bits is not enough for the length of the codeword, so there is an error
    if len(myCodeLen) > 8:
      print >> sys.stderr, "Codeword length is too long!"
      continue
    else:
      myCodeLen = '0'*(8-len(myCodeLen))+myCodeLen

    # Write out the key
    (remainder, numLeft) = bits.stringToBitsOut(keyOut, f, remainder, numLeft)
    # Write out the length of the codeword
    (remainder, numLeft) = bits.stringToBitsOut(myCodeLen, f, remainder, numLeft)
    # Write out the codeword
    (remainder, numLeft) = bits.stringToBitsOut(myCodeword, f, remainder, numLeft)
  bits.flushBitsOutput(f, remainder, numLeft)
Exemple #2
0
def writeHuffTable(huffTable, f):

  # Get the number of entries in the Huffman table
  numEntries = len(huffTable.keys())
  numEntriesOut = bin(numEntries)[2:]
  if len(numEntriesOut) > 32:
    print >> sys.stderr, "Number of entries is too long!"
  else:
    numEntriesOut = '0'*(32-len(numEntriesOut))+numEntriesOut

  remainder = 0
  numLeft   = 0
  (remainder, numLeft) = bits.stringToBitsOut(numEntriesOut, f, remainder, numLeft)
  
  # Loop through the keys
  for myKey in huffTable.keys():
    # Turn into binary representation the key, the codeword, and the length of the codeword
    keyOut     = bin(int(myKey))[2:]
    myCodeword = huffTable[myKey]
    myCodeLen  = bin(len(huffTable[myKey]))[2:]
    #print myKey 
    #sys.stdout.flush()
    # 32 bits is not enough for the key, so there is an error
    if len(keyOut) > 32:
      print >> sys.stderr, "Key is too long!"
      continue
    else:
      keyOut = '0'*(32-len(keyOut))+keyOut

    # 8 bits is not enough for the length of the codeword, so there is an error
    if len(myCodeLen) > 8:
      print >> sys.stderr, "Codeword length is too long!"
      continue
    else:
      myCodeLen = '0'*(8-len(myCodeLen))+myCodeLen

    # Write out the key
    (remainder, numLeft) = bits.stringToBitsOut(keyOut, f, remainder, numLeft)
    # Write out the length of the codeword
    (remainder, numLeft) = bits.stringToBitsOut(myCodeLen, f, remainder, numLeft)
    # Write out the codeword
    (remainder, numLeft) = bits.stringToBitsOut(myCodeword, f, remainder, numLeft)
  bits.flushBitsOutput(f, remainder, numLeft)
Exemple #3
0
def writeGolombCodedHuffTable(huffTable, f):
  # Get the number of entries in the Huffman table
  numEntries = len(huffTable.keys())

  # Write out the number of entries in the Huffman table
  numEntriesOut = bin(numEntries)[2:]
  if len(numEntriesOut) > tableSize:
    print >> sys.stderr, "Number of entries is too long!"
  else:
    numEntriesOut = '0'*(tableSize-len(numEntriesOut))+numEntriesOut
  remainder = 0
  numLeft   = 0
  (remainder, numLeft) = bits.stringToBitsOut(numEntriesOut, f, remainder, numLeft)

  # Now we need to get a sorted list of the key and item pairs
  sortedKeyValueList = [(int(key),val) for (key,val) in huffTable.iteritems()]
  sortedKeyValueList.sort()

  # Find where our first nonconsecutive number occurs
  sparseStart = 0
  count = 0
  ind   = 0
  ok    = 0 
  while (ok == 0):
    ind = count
    while (sortedKeyValueList[ind-count][0] == ind):
      ok = 1
      ind = ind + 1
    sparseStart = ind-count
    count = count + 1
  count = count - 1

  # Write out the start of the sparse integers
  sparseStartOut = bin(sparseStart)[2:]
  if len(sparseStartOut) > denseSize:
    print >> sys.stderr, "Start of sparse integers too large!"
  else:
    sparseStartOut = '0'*(denseSize-len(sparseStartOut))+sparseStartOut
  (remainder, numLeft) = bits.stringToBitsOut(sparseStartOut, f, remainder, numLeft)

  # Calculate the deltas for the keys in the sparse region
  sparseDeltas = [sortedKeyValueList[i][0]-sortedKeyValueList[i-1][0]-1 for i in xrange(sparseStart,numEntries)]

  # Calculate M parameter in Golomb coding for the deltas
  M = sum(sparseDeltas)/len(sparseDeltas)

  # Write out count
  countOut = bin(count)[2:]
  if len(countOut) > countSize:
    print >> sys.stderr, "count value too large!"
  else:
    countOut = '0'*(countSize-len(countOut))+countOut
  (remainder, numLeft) = bits.stringToBitsOut(countOut, f, remainder, numLeft)

  # Write out M
  MOut = bin(M)[2:]
  if len(MOut) > MSize:
    print >> sys.stderr, "M value too large!"
  else:
    MOut = '0'*(MSize-len(MOut))+MOut
  (remainder, numLeft) = bits.stringToBitsOut(MOut, f, remainder, numLeft)

  # M also tells us the length (in bits) of our remainder part
  MLen = int(math.ceil(math.log(M,2)))

  # Loop through the sorted key/value list
  for ind in xrange(numEntries):
    # Write out the key if we're in the sparse region
    if (ind >= sparseStart):
      # Calculate the quotient and remainder of sparseDeltas divided by M
      quo  = int(sparseDeltas[ind-sparseStart]/M)
      rem = sparseDeltas[ind-sparseStart]%M

      # Get the two parts of the Golomb key
      unaryPart = '1'*quo + '0'
      huffPart  = bin(rem)[2:]
      huffPart  = '0'*(MLen-len(huffPart))+huffPart

      # Golomb code the key
      keyOut = unaryPart + huffPart

      # Write out the key
      (remainder, numLeft) = bits.stringToBitsOut(keyOut, f, remainder, numLeft)

    # Get the codeword
    codeword = sortedKeyValueList[ind][1]

    # Get the length of the codeword in binary
    codeLen  = bin(len(codeword))[2:]

    # Check the size of the codeword length
    if len(codeLen) > lenSize:
      print >> sys.stderr, "Codeword length is too long!"
      continue
    else:
      codeLen = '0'*(lenSize-len(codeLen))+codeLen

    # Write out the length of the codeword
    (remainder, numLeft) = bits.stringToBitsOut(codeLen, f, remainder, numLeft)

    # Write out the codeword
    (remainder, numLeft) = bits.stringToBitsOut(codeword, f, remainder, numLeft)

  bits.flushBitsOutput(f, remainder, numLeft)