def cell_header(self, cellbytes): header = [] cell = binascii.hexlify(cellbytes) # Get total number of bytes of payload bytes_of_payload_tuple = _sqliteVarInt.parse_next_var_int( cell[:18] ) # a variable integer can be maximum 9 byte (= 18 nibbles) long bytes_of_payload = bytes_of_payload_tuple[0] # Get row_id row_id_string = cell[((bytes_of_payload_tuple[1] * 2)):((bytes_of_payload_tuple[1] + 9) * 2)] row_id_tuple = _sqliteVarInt.parse_next_var_int(row_id_string) row_id = row_id_tuple[0] header.append(bytes_of_payload) header.append(row_id) #print 'offset: %x'%(bytes_of_payload_tuple[1] + row_id_tuple[1]) header.append( struct.unpack( '=H', cellbytes[(bytes_of_payload_tuple[1] + row_id_tuple[1]):(bytes_of_payload_tuple[1] + row_id_tuple[1] + SIZEOFSHORT)])[0]) return header, (bytes_of_payload_tuple[1] + row_id_tuple[1] + SIZEOFSHORT)
def parse_record(record_hex_string): # parse the record header _adel_log.log( "parse_record: ----> parsing record header....", 4) header_length_tuple = _sqliteVarInt.parse_next_var_int( record_hex_string[0:18]) header_string = record_hex_string[(header_length_tuple[1] * 2):(header_length_tuple[0] * 2)] record_header_field_list = _sqliteVarInt.parse_all_var_ints(header_string) _adel_log.log( "parse_record: OK - record header field list is %(record_header_field_list)s" % vars(), 4) # Get the record content content_offset = header_length_tuple[0] * 2 content_list = [] element = 0 for var_int in record_header_field_list: entry_content = parse_content_entry(record_header_field_list[element], record_hex_string, content_offset) content_list.append(entry_content[0]) content_offset += entry_content[1] * 2 element += 1 # Return the record content list _adel_log.log( "parse_record: OK - returning list of record contents", 4) #: %(content_list)s" %vars(), 4) _adel_log.log("parse_record: ----> record header parsed", 4) return content_list
def parse_table_btree_interior_cell(page_hex_string, page_offset): # 1 byte is represented by two characters in the hexString, so internally we need to calculate the offset in nibbles page_offset_in_bytes = page_offset # store for log reasons only page_offset = page_offset * 2 # now dealing with nibbles because we treat a string (1 character = 1 nibble) _adel_log.log( "parse_table_btree_interior_cell: ----> parsing b-tree interior cell at offset %(page_offset_in_bytes)s...." % vars(), 4) # Get total number of bytes of payload left_child_pointer = int( page_hex_string[page_offset:(page_offset + (4 * 2))], 16) _adel_log.log( "parse_table_btree_interior_cell: OK - left child pointer is: %(left_child_pointer)s" % vars(), 4) # Get row_id row_id_string = page_hex_string[(page_offset + (4 * 2)):(page_offset + ((4 + 9) * 2))] row_id_tuple = _sqliteVarInt.parse_next_var_int(row_id_string) row_id = row_id_tuple[0] _adel_log.log( "parse_table_btree_interior_cell: ----> row_id (index) is: %(row_id)s...." % vars(), 4) # Build tuple of node contents node_tuple = [left_child_pointer, row_id] _adel_log.log( "parse_table_btree_interior_cell: OK - returning tuple of node content: %(node_tuple)s" % vars(), 4) _adel_log.log( "parse_table_btree_interior_cell: ----> b-tree interior cell at offset %(page_offset_in_bytes)s parsed" % vars(), 4) return node_tuple
def cell_header(self, cellbytes): header = [] cell = binascii.hexlify(cellbytes) # Get total number of bytes of payload bytes_of_payload_tuple = _sqliteVarInt.parse_next_var_int(cell[:18]) # a variable integer can be maximum 9 byte (= 18 nibbles) long bytes_of_payload = bytes_of_payload_tuple[0] # Get row_id row_id_string = cell[((bytes_of_payload_tuple[1] * 2)):((bytes_of_payload_tuple[1] + 9) * 2)] row_id_tuple = _sqliteVarInt.parse_next_var_int(row_id_string) row_id = row_id_tuple[0] header.append(bytes_of_payload) header.append(row_id) #print 'offset: %x'%(bytes_of_payload_tuple[1] + row_id_tuple[1]) header.append(struct.unpack('=H', cellbytes[(bytes_of_payload_tuple[1] + row_id_tuple[1]):(bytes_of_payload_tuple[1] + row_id_tuple[1] + SIZEOFSHORT)])[0]) return header, (bytes_of_payload_tuple[1] + row_id_tuple[1] + SIZEOFSHORT)
def parse_table_btree_interior_cell(page_hex_string, page_offset): # 1 byte is represented by two characters in the hexString, so internally we need to calculate the offset in nibbles page_offset_in_bytes = page_offset # store for log reasons only page_offset = page_offset * 2 # now dealing with nibbles because we treat a string (1 character = 1 nibble) _adel_log.log("parse_table_btree_interior_cell: ----> parsing b-tree interior cell at offset %(page_offset_in_bytes)s...." % vars(), 4) # Get total number of bytes of payload left_child_pointer = int(page_hex_string[page_offset:(page_offset + (4 * 2))], 16) _adel_log.log("parse_table_btree_interior_cell: OK - left child pointer is: %(left_child_pointer)s" % vars(), 4) # Get row_id row_id_string = page_hex_string[(page_offset + (4 * 2)):(page_offset + ((4 + 9) * 2))] row_id_tuple = _sqliteVarInt.parse_next_var_int(row_id_string) row_id = row_id_tuple[0] _adel_log.log("parse_table_btree_interior_cell: ----> row_id (index) is: %(row_id)s...." % vars(), 4) # Build tuple of node contents node_tuple = [left_child_pointer, row_id] _adel_log.log("parse_table_btree_interior_cell: OK - returning tuple of node content: %(node_tuple)s" % vars(), 4) _adel_log.log("parse_table_btree_interior_cell: ----> b-tree interior cell at offset %(page_offset_in_bytes)s parsed" % vars(), 4) return node_tuple
def parse_record(record_hex_string): # parse the record header _adel_log.log("parse_record: ----> parsing record header....", 4) header_length_tuple = _sqliteVarInt.parse_next_var_int(record_hex_string[0:18]) header_string = record_hex_string[(header_length_tuple[1] * 2):(header_length_tuple[0] * 2)] record_header_field_list = _sqliteVarInt.parse_all_var_ints(header_string) _adel_log.log("parse_record: OK - record header field list is %(record_header_field_list)s" % vars(), 4) # Get the record content content_offset = header_length_tuple[0] * 2 content_list = [] element = 0 for var_int in record_header_field_list: entry_content = parse_content_entry(record_header_field_list[element], record_hex_string, content_offset) content_list.append(entry_content[0]) content_offset += entry_content[1] * 2 element += 1 # Return the record content list _adel_log.log("parse_record: OK - returning list of record contents", 4)#: %(content_list)s" %vars(), 4) _adel_log.log("parse_record: ----> record header parsed", 4) return content_list
def parse_table_btree_leaf_cell(page_hex_string, page_offset, cell_pointers, free_block_pointer): # 1 byte is represented by two characters in the hexString, so internally we need to calculate the offset in nibbles page_offset_in_bytes = page_offset # store for log reasons only page_offset = page_offset * 2 # now dealing with nibbles because we treat a string (1 character = 1 nibble) db_page_size_in_bytes = _sqliteFileHandler.DB_PAGESIZE_IN_BYTES usable_page_space = db_page_size_in_bytes - _sqliteFileHandler.DB_RESERVED_SPACE _adel_log.log( "parse_table_btree_leaf_cell: ----> parsing b-tree leaf cell at offset %(page_offset_in_bytes)s...." % vars(), 4) # Get total number of bytes of payload bytes_of_payload_tuple = _sqliteVarInt.parse_next_var_int( page_hex_string[page_offset:(page_offset + 18)] ) # a variable integer can be maximum 9 byte (= 18 nibbles) long bytes_of_payload = bytes_of_payload_tuple[0] _adel_log.log( "parse_table_btree_leaf_cell: OK - payload is %(bytes_of_payload)s bytes long" % vars(), 4) # Get row_id row_id_string = page_hex_string[(page_offset + (bytes_of_payload_tuple[1] * 2)):( page_offset + (bytes_of_payload_tuple[1] + 9) * 2)] row_id_tuple = _sqliteVarInt.parse_next_var_int(row_id_string) row_id = row_id_tuple[0] _adel_log.log( "parse_table_btree_leaf_cell: ----> extracting contents for row_id %(row_id)s...." % vars(), 4) # Check for overflow pages and append content of those pages, if any # Calculate the overflow limits for table b-tree leaf cell remaining_page_space = db_page_size_in_bytes - page_offset_in_bytes if (bytes_of_payload > (remaining_page_space)): # We expext content to overflow, because there is not enough space left on this page _adel_log.log( "parse_table_btree_leaf_cell: OK - payload is too large for this page, there are overflow pages" % vars(), 4) # Check at which position the next cell starts next_cell = usable_page_space for cell_pointer in cell_pointers: if (cell_pointer > page_offset_in_bytes) and (cell_pointer < next_cell): next_cell = cell_pointer # Check at which position the next freeblock starts (we ignore theoretically possible freebytes in this case, # Because we expect no freebyte at the end of a cell that overflows to another page next_free_block = usable_page_space free_blocks = parse_free_blocks(page_hex_string, free_block_pointer) for free_block in free_blocks: if (free_block[0] > page_offset_in_bytes) and (free_block[0] < next_free_block): next_free_block = free_block[0] # Get the end of this record: either closest following cell or closest following freeblock or end of page end_of_record = usable_page_space # Check of the end of this record is given through a following cell if (next_cell != usable_page_space) and ( (next_cell <= next_free_block) or (next_free_block == usable_page_space)): # next element is not end of page but a cell end_of_record = next_cell # Check of the end of this record is given through a following free block if (next_free_block != usable_page_space) and ( (next_free_block < next_cell) or (next_cell == usable_page_space)): # Next element is not end of page but a free block end_of_record = next_free_block # Cut record hex string from the beginning to the offset of the next following element record_hex_string = page_hex_string[(page_offset + ( (bytes_of_payload_tuple[1] + row_id_tuple[1]) * 2)):( end_of_record * 2)] record_hex_string_length = len( record_hex_string ) / 2 # string length is count in nibbles, we need bytes here # Save overflow page pointer at the end of record hex string first_overflow_page_number = int( record_hex_string[((record_hex_string_length - 4) * 2):(record_hex_string_length * 2)], 16) _adel_log.log( "parse_table_btree_leaf_cell: ----> parsing overflow page chain beginning at page %(first_overflow_page_number)s...." % vars(), 4) # Cut off overflow page number from record_hex_string record_hex_string = record_hex_string[( 0):((record_hex_string_length - 4) * 2)] first_overflow_page_string = _sqliteFileHandler.read_page( first_overflow_page_number) # Ensure that read page could retrieve an existing page if (first_overflow_page_string == ""): _adel_log.log( "parse_table_btree_leaf_cell: ERROR - invalid overflow page pointer, cannot reference first overflow page: " + str(first_overflow_page_number), 1) return [] # Append content from overflow pages record_hex_string += parse_overflow_page_chain( first_overflow_page_string) # Ensure correct length of string (maybe not all bytes of the last overflow page in the chain contain content) record_hex_string_length = len( record_hex_string ) / 2 # string length is count in nibbles, we need bytes here if (bytes_of_payload < record_hex_string_length): # Cut record hex string again record_hex_string = record_hex_string[:bytes_of_payload * 2] else: # The entire payload is stored on this page record_hex_string = page_hex_string[(page_offset + ( (bytes_of_payload_tuple[1] + row_id_tuple[1]) * 2)):( page_offset + ((bytes_of_payload_tuple[1] + row_id_tuple[1] + bytes_of_payload_tuple[0]) * 2))] # Parse the record read_content_list = parse_record(record_hex_string) # Build the resulting list (including the row_id used sqlite internally) cell_content_list = [] cell_content_list.append(row_id) for element in range(len(read_content_list)): cell_content_list.append(read_content_list[element]) # Return results _adel_log.log( "parse_table_btree_leaf_cell: OK - returning list of cell contents", 4) _adel_log.log( "parse_table_btree_leaf_cell: ----> b-tree leaf cell at offset %(page_offset_in_bytes)s parsed" % vars(), 4) return cell_content_list
def parse_table_btree_leaf_cell(page_hex_string, page_offset, cell_pointers, free_block_pointer): # 1 byte is represented by two characters in the hexString, so internally we need to calculate the offset in nibbles page_offset_in_bytes = page_offset # store for log reasons only page_offset = page_offset * 2 # now dealing with nibbles because we treat a string (1 character = 1 nibble) db_page_size_in_bytes = _sqliteFileHandler.DB_PAGESIZE_IN_BYTES usable_page_space = db_page_size_in_bytes - _sqliteFileHandler.DB_RESERVED_SPACE _adel_log.log("parse_table_btree_leaf_cell: ----> parsing b-tree leaf cell at offset %(page_offset_in_bytes)s...." % vars(), 4) # Get total number of bytes of payload bytes_of_payload_tuple = _sqliteVarInt.parse_next_var_int(page_hex_string[page_offset:(page_offset + 18)]) # a variable integer can be maximum 9 byte (= 18 nibbles) long bytes_of_payload = bytes_of_payload_tuple[0] _adel_log.log("parse_table_btree_leaf_cell: OK - payload is %(bytes_of_payload)s bytes long" % vars(), 4) # Get row_id row_id_string = page_hex_string[(page_offset + (bytes_of_payload_tuple[1] * 2)):(page_offset + (bytes_of_payload_tuple[1] + 9) * 2)] row_id_tuple = _sqliteVarInt.parse_next_var_int(row_id_string) row_id = row_id_tuple[0] _adel_log.log("parse_table_btree_leaf_cell: ----> extracting contents for row_id %(row_id)s...." % vars(), 4) # Check for overflow pages and append content of those pages, if any # Calculate the overflow limits for table b-tree leaf cell remaining_page_space = db_page_size_in_bytes - page_offset_in_bytes if (bytes_of_payload > (remaining_page_space)): # We expext content to overflow, because there is not enough space left on this page _adel_log.log("parse_table_btree_leaf_cell: OK - payload is too large for this page, there are overflow pages" % vars(), 4) # Check at which position the next cell starts next_cell = usable_page_space for cell_pointer in cell_pointers: if (cell_pointer > page_offset_in_bytes) and (cell_pointer < next_cell): next_cell = cell_pointer # Check at which position the next freeblock starts (we ignore theoretically possible freebytes in this case, # Because we expect no freebyte at the end of a cell that overflows to another page next_free_block = usable_page_space free_blocks = parse_free_blocks(page_hex_string, free_block_pointer) for free_block in free_blocks: if (free_block[0] > page_offset_in_bytes) and (free_block[0] < next_free_block): next_free_block = free_block[0] # Get the end of this record: either closest following cell or closest following freeblock or end of page end_of_record = usable_page_space # Check of the end of this record is given through a following cell if (next_cell != usable_page_space) and ((next_cell <= next_free_block) or (next_free_block == usable_page_space)): # next element is not end of page but a cell end_of_record = next_cell # Check of the end of this record is given through a following free block if (next_free_block != usable_page_space) and ((next_free_block < next_cell) or (next_cell == usable_page_space)): # Next element is not end of page but a free block end_of_record = next_free_block # Cut record hex string from the beginning to the offset of the next following element record_hex_string = page_hex_string[(page_offset + ((bytes_of_payload_tuple[1] + row_id_tuple[1]) * 2)):(end_of_record * 2)] record_hex_string_length = len(record_hex_string) / 2 # string length is count in nibbles, we need bytes here # Save overflow page pointer at the end of record hex string first_overflow_page_number = int(record_hex_string[((record_hex_string_length - 4) * 2):(record_hex_string_length * 2)], 16) _adel_log.log("parse_table_btree_leaf_cell: ----> parsing overflow page chain beginning at page %(first_overflow_page_number)s...." % vars(), 4) # Cut off overflow page number from record_hex_string record_hex_string = record_hex_string[(0):((record_hex_string_length - 4) * 2)] first_overflow_page_string = _sqliteFileHandler.read_page(first_overflow_page_number) # Ensure that read page could retrieve an existing page if (first_overflow_page_string == ""): _adel_log.log("parse_table_btree_leaf_cell: ERROR - invalid overflow page pointer, cannot reference first overflow page: " + str(first_overflow_page_number), 1) return [] # Append content from overflow pages record_hex_string += parse_overflow_page_chain(first_overflow_page_string) # Ensure correct length of string (maybe not all bytes of the last overflow page in the chain contain content) record_hex_string_length = len(record_hex_string) / 2 # string length is count in nibbles, we need bytes here if (bytes_of_payload < record_hex_string_length): # Cut record hex string again record_hex_string = record_hex_string[:bytes_of_payload * 2] else: # The entire payload is stored on this page record_hex_string = page_hex_string[(page_offset + ((bytes_of_payload_tuple[1] + row_id_tuple[1]) * 2)):(page_offset + ((bytes_of_payload_tuple[1] + row_id_tuple[1] + bytes_of_payload_tuple[0]) * 2))] # Parse the record read_content_list = parse_record(record_hex_string) # Build the resulting list (including the row_id used sqlite internally) cell_content_list = [] cell_content_list.append(row_id) for element in range(len(read_content_list)): cell_content_list.append(read_content_list[element]) # Return results _adel_log.log("parse_table_btree_leaf_cell: OK - returning list of cell contents", 4) _adel_log.log("parse_table_btree_leaf_cell: ----> b-tree leaf cell at offset %(page_offset_in_bytes)s parsed" % vars(), 4) return cell_content_list