예제 #1
0
def OutputBitList(bit_list, stream):
  # Make sure that the bit list is aligned to the byte boundary.
  assert len(bit_list) % 8 == 0
  for bits in code_generator_util.SplitChunk(bit_list, 8):
    byte = 0
    for bit_index, bit in enumerate(bits):
      if bit:
        # Fill in LSB to MSB order.
        byte |= (1 << bit_index)
    stream.write(struct.pack('B', byte))
예제 #2
0
def BuildBinaryData(matrix, mode_value_list, use_1byte_cost):
  # To compress the connection data, we use two-level succinct bit vector.
  #
  # The basic idea to compress the rid-lid matrix is compressing each row as
  # follows:
  # find the mode value of the row, and set the cells containins the value
  # empty, thus we get a sparse array.
  # We can compress sparse array by using succinct bit vector.
  # (Please see also storage/louds/simple_succinct_bit_vector_index and
  # storage/louds/bit_vector_based_array.)
  # In addition, we compress the bit vector, too. Fortunately the distribution
  # of bits is biased, so we group consecutive 8-bits and create another
  # bit vector, named chunk-bits;
  # - if no bits are 1, the corresponding bit is 0, otherwise 1.
  # By using the bit vector, we can compact the original bit vector by skipping
  # consecutive eight 0-bits. We can calculate the actual bit position in
  # the compact bit vector by using Rank1 operation on chunk-bits.
  #
  # The file format is as follows:
  # FILE_MAGIC (\xAB\xCD): 2bytes
  # Resolution: 2bytes
  # Num rids: 2bytes
  # Num lids: 2bytes
  # A list of mode values: 2bytes * rids (aligned to 32bits)
  # A list of row data.
  #
  # The row data format is as follows:
  # The size of compact bits in bytes: 2bytes
  # The size of values in bytes: 2bytes
  # chunk_bits, compact_bits, followed by values.

  if use_1byte_cost:
    resolution = RESOLUTION_FOR_1BYTE
  else:
    resolution = 1
  stream = StringIO.StringIO()

  # Output header.
  stream.write(FILE_MAGIC)
  matrix_size = len(matrix)
  assert 0 <= matrix_size <= 65535
  stream.write(struct.pack('<HHH', resolution, matrix_size, matrix_size))

  # Output mode value list.
  for value in mode_value_list:
    assert 0 <= value <= 65536
    stream.write(struct.pack('<H', value))

  # 4 bytes alignment.
  if len(mode_value_list) % 2:
    stream.write('\x00\x00')

  # Process each row:
  for row in matrix:
    chunk_bits = []
    compact_bits = []
    values = []

    for chunk in code_generator_util.SplitChunk(row, 8):
      if all(cost is None for cost in chunk):
        # All bits are 0, so output 0-chunk bit.
        chunk_bits.append(False)
        continue

      chunk_bits.append(True)
      for cost in chunk:
        if cost is None:
          compact_bits.append(False)
        else:
          compact_bits.append(True)
          if use_1byte_cost:
            if cost == INVALID_COST:
              cost = INVALID_1BYTE_COST
            else:
              cost /= resolution
              assert cost != INVALID_1BYTE_COST
          values.append(cost)

    # 4 bytes alignment.
    while len(chunk_bits) % 32:
      chunk_bits.append(False)
    while len(compact_bits) % 32:
      compact_bits.append(False)
    if use_1byte_cost:
      while len(values) % 4:
        values.append(0)
      values_size = len(values)
    else:
      while len(values) % 2:
        values.append(0)
      values_size = len(values) * 2

    # Output the bits for a row.
    stream.write(struct.pack('<HH', len(compact_bits) / 8, values_size))
    OutputBitList(chunk_bits, stream)
    OutputBitList(compact_bits, stream)
    if use_1byte_cost:
      for value in values:
        assert 0 <= value <= 255
        stream.write(struct.pack('<B', value))
    else:
      for value in values:
        assert 0 <= value <= 65535
        stream.write(struct.pack('<H', value))

  return stream.getvalue()