Esempio n. 1
0
def parse_overflow_page_chain(page_hex_string):
    if (len(page_hex_string) == 0):
        _adel_log.log(
            "parse_overflow_page_chain: WARNING! empty hexadecimal page string received",
            2)
        return ""
    if (len(page_hex_string) < 8):
        _adel_log.log(
            "parse_overflow_page_chain: WARNING! hexadecimal page string is too short: "
            + str(page_hex_string), 2)
        return ""

    # Check whether there is another overflow page: first 8 nibbles is 4-byte integer pointer to next overflow page or 00 00 00 00 if no further overflow page exists
    next_overflow_page_number = int(
        page_hex_string[0:8],
        16)  # will be zero if we reached the last overflow page in the chain

    # Build content string: append all content of this page
    overflow_page_content = page_hex_string[8:]
    _adel_log.log(
        "parse_overflow_page_chain:             OK - overflow page parsed" %
        vars(), 4)

    if next_overflow_page_number != 0:
        # There is at least one more overflow page: append further content
        _adel_log.log(
            "parse_overflow_page_chain:             ----> parsing next overflow page in chain, page number is: %(next_overflow_page_number)s...."
            % vars(), 4)
        overflow_page_content += parse_overflow_page_chain(
            _sqliteFileHandler.read_page(next_overflow_page_number))

    return overflow_page_content
Esempio n. 2
0
def parse_table_btree_interior_page(page_hex_string, page_offset):
    # Parse the page header
    header = parse_btree_page_header(page_hex_string, page_offset)

    # Ensure that we deal with a correct page
    header_length = len(header)
    if (header_length != 7 or header[0] != 5):
        # No valid header_length
        _adel_log.log(
            "parse_table_btree_interior_page: ERROR - invalid page type in table b-tree interior page header",
            1)
        _adel_log.log(
            "                             Page header was said to start at page offset: "
            + str(page_offset), 1)
        _adel_log.log("                             Printing page content....",
                      1)
        _adel_log.log(page_hex_string, 1)
        return []

    # initialize resulting list
    content_list = []
    # Initialize node list
    node_pointers = []
    # Initialize page content list
    page_contents = []
    # Parse cell pointer array
    cell_pointers = parse_cell_pointer_array(
        page_hex_string, (page_offset + header[header_length - 1]), header[2])
    # Parse cells
    for cell_pointer in cell_pointers:
        node_pointers.append(
            parse_table_btree_interior_cell(page_hex_string, cell_pointer))
    # This is an interior page, thus we append the right-most pointer as well
    node_pointers.append([header[5], 0])

    # Iterate through every node
    for node_tuple in node_pointers:
        _adel_log.log(
            "parse_table_btree_interior_page:  ----> fetching child page to parse, page number: "
            + str(node_tuple[0]) + "....", 3)
        child_page = _sqliteFileHandler.read_page(node_tuple[0])

        # Ensure we fetched a valid page
        if (child_page == ""):
            _adel_log.log(
                "parse_table_btree_interior_page: ERROR - invalid node tuple detected, cannot reference child page pointer: "
                + str(node_tuple), 1)
            continue
        # Parse child pages
        page_contents = parse_table_btree_page(child_page, 0)
        for page_content in page_contents:
            content_list.append(page_content)
        _adel_log.log(
            "parse_table_btree_interior_page:  ----> child page parsed, page number: "
            + str(node_tuple[0]) + "....", 4)

    return content_list
Esempio n. 3
0
def parse_table_btree_interior_page(page_hex_string, page_offset):
    # Parse the page header
    header = parse_btree_page_header(page_hex_string, page_offset)

    # Ensure that we deal with a correct page
    header_length = len(header)
    if (header_length != 7 or header[0] != 5):
        # No valid header_length
        _adel_log.log("parse_table_btree_interior_page: ERROR - invalid page type in table b-tree interior page header", 1)
        _adel_log.log("                             Page header was said to start at page offset: " + str(page_offset), 1)
        _adel_log.log("                             Printing page content....", 1)
        _adel_log.log(page_hex_string, 1)
        return []

    # initialize resulting list
    content_list = []
    # Initialize node list
    node_pointers = []
    # Initialize page content list
    page_contents = []
    # Parse cell pointer array
    cell_pointers = parse_cell_pointer_array(page_hex_string, (page_offset + header[header_length - 1]), header[2])
    # Parse cells
    for cell_pointer in cell_pointers:
        node_pointers.append(parse_table_btree_interior_cell(page_hex_string, cell_pointer))
    # This is an interior page, thus we append the right-most pointer as well
    node_pointers.append([header[5], 0])

    # Iterate through every node
    for node_tuple in node_pointers:
        _adel_log.log("parse_table_btree_interior_page:  ----> fetching child page to parse, page number: " + str(node_tuple[0]) + "....", 3)
        child_page = _sqliteFileHandler.read_page(node_tuple[0])

        # Ensure we fetched a valid page
        if (child_page == ""):
            _adel_log.log("parse_table_btree_interior_page: ERROR - invalid node tuple detected, cannot reference child page pointer: " + str(node_tuple), 1)
            continue
        # Parse child pages
        page_contents = parse_table_btree_page(child_page, 0)
        for page_content in page_contents:
            content_list.append(page_content)
        _adel_log.log("parse_table_btree_interior_page:  ----> child page parsed, page number: " + str(node_tuple[0]) + "....", 4)

    return content_list
Esempio n. 4
0
def parse_overflow_page_chain(page_hex_string):
    if (len(page_hex_string) == 0):
        _adel_log.log("parse_overflow_page_chain: WARNING! empty hexadecimal page string received", 2)
        return ""
    if (len(page_hex_string) < 8):
        _adel_log.log("parse_overflow_page_chain: WARNING! hexadecimal page string is too short: " + str(page_hex_string), 2)
        return ""

    # Check whether there is another overflow page: first 8 nibbles is 4-byte integer pointer to next overflow page or 00 00 00 00 if no further overflow page exists
    next_overflow_page_number = int(page_hex_string[0:8], 16) # will be zero if we reached the last overflow page in the chain

    # Build content string: append all content of this page
    overflow_page_content = page_hex_string[8:]
    _adel_log.log("parse_overflow_page_chain:             OK - overflow page parsed" % vars(), 4)

    if next_overflow_page_number != 0:
        # There is at least one more overflow page: append further content
        _adel_log.log("parse_overflow_page_chain:             ----> parsing next overflow page in chain, page number is: %(next_overflow_page_number)s...." % vars(), 4)
        overflow_page_content += parse_overflow_page_chain(_sqliteFileHandler.read_page(next_overflow_page_number))

    return overflow_page_content
Esempio n. 5
0
def parse_table_btree_leaf_cell(page_hex_string, page_offset, cell_pointers,
                                free_block_pointer):
    # 1 byte is represented by two characters in the hexString, so internally we need to calculate the offset in nibbles
    page_offset_in_bytes = page_offset  # store for log reasons only
    page_offset = page_offset * 2  # now dealing with nibbles because we treat a string (1 character = 1 nibble)
    db_page_size_in_bytes = _sqliteFileHandler.DB_PAGESIZE_IN_BYTES
    usable_page_space = db_page_size_in_bytes - _sqliteFileHandler.DB_RESERVED_SPACE

    _adel_log.log(
        "parse_table_btree_leaf_cell:      ----> parsing b-tree leaf cell at offset %(page_offset_in_bytes)s...."
        % vars(), 4)

    # Get total number of bytes of payload
    bytes_of_payload_tuple = _sqliteVarInt.parse_next_var_int(
        page_hex_string[page_offset:(page_offset + 18)]
    )  # a variable integer can be maximum 9 byte (= 18 nibbles) long
    bytes_of_payload = bytes_of_payload_tuple[0]
    _adel_log.log(
        "parse_table_btree_leaf_cell:            OK - payload is %(bytes_of_payload)s bytes long"
        % vars(), 4)
    # Get row_id
    row_id_string = page_hex_string[(page_offset +
                                     (bytes_of_payload_tuple[1] * 2)):(
                                         page_offset +
                                         (bytes_of_payload_tuple[1] + 9) * 2)]
    row_id_tuple = _sqliteVarInt.parse_next_var_int(row_id_string)
    row_id = row_id_tuple[0]
    _adel_log.log(
        "parse_table_btree_leaf_cell:      ----> extracting contents for row_id %(row_id)s...."
        % vars(), 4)

    # Check for overflow pages and append content of those pages, if any
    # Calculate the overflow limits for table b-tree leaf cell
    remaining_page_space = db_page_size_in_bytes - page_offset_in_bytes
    if (bytes_of_payload > (remaining_page_space)):
        # We expext content to overflow, because there is not enough space left on this page
        _adel_log.log(
            "parse_table_btree_leaf_cell:            OK - payload is too large for this page, there are overflow pages"
            % vars(), 4)

        # Check at which position the next cell starts
        next_cell = usable_page_space
        for cell_pointer in cell_pointers:
            if (cell_pointer > page_offset_in_bytes) and (cell_pointer <
                                                          next_cell):
                next_cell = cell_pointer

        # Check at which position the next freeblock starts (we ignore theoretically possible freebytes in this case,
        # Because we expect no freebyte at the end of a cell that overflows to another page
        next_free_block = usable_page_space
        free_blocks = parse_free_blocks(page_hex_string, free_block_pointer)
        for free_block in free_blocks:
            if (free_block[0] > page_offset_in_bytes) and (free_block[0] <
                                                           next_free_block):
                next_free_block = free_block[0]

        # Get the end of this record: either closest following cell or closest following freeblock or end of page
        end_of_record = usable_page_space
        # Check of the end of this record is given through a following cell
        if (next_cell != usable_page_space) and (
            (next_cell <= next_free_block) or
            (next_free_block == usable_page_space)):
            # next element is not end of page but a cell
            end_of_record = next_cell
        # Check of the end of this record is given through a following free block
        if (next_free_block != usable_page_space) and (
            (next_free_block < next_cell) or (next_cell == usable_page_space)):
            # Next element is not end of page but a free block
            end_of_record = next_free_block

        # Cut record hex string from the beginning to the offset of the next following element
        record_hex_string = page_hex_string[(page_offset + (
            (bytes_of_payload_tuple[1] + row_id_tuple[1]) * 2)):(
                end_of_record * 2)]
        record_hex_string_length = len(
            record_hex_string
        ) / 2  # string length is count in nibbles, we need bytes here

        # Save overflow page pointer at the end of record hex string
        first_overflow_page_number = int(
            record_hex_string[((record_hex_string_length - 4) *
                               2):(record_hex_string_length * 2)], 16)
        _adel_log.log(
            "parse_table_btree_leaf_cell:      ----> parsing overflow page chain beginning at page %(first_overflow_page_number)s...."
            % vars(), 4)
        # Cut off overflow page number from record_hex_string
        record_hex_string = record_hex_string[(
            0):((record_hex_string_length - 4) * 2)]

        first_overflow_page_string = _sqliteFileHandler.read_page(
            first_overflow_page_number)
        # Ensure that read page could retrieve an existing page
        if (first_overflow_page_string == ""):
            _adel_log.log(
                "parse_table_btree_leaf_cell: ERROR - invalid overflow page pointer, cannot reference first overflow page: "
                + str(first_overflow_page_number), 1)
            return []
        # Append content from overflow pages
        record_hex_string += parse_overflow_page_chain(
            first_overflow_page_string)

        # Ensure correct length of string (maybe not all bytes of the last overflow page in the chain contain content)
        record_hex_string_length = len(
            record_hex_string
        ) / 2  # string length is count in nibbles, we need bytes here
        if (bytes_of_payload < record_hex_string_length):
            # Cut record hex string again
            record_hex_string = record_hex_string[:bytes_of_payload * 2]
    else:
        # The entire payload is stored on this page
        record_hex_string = page_hex_string[(page_offset + (
            (bytes_of_payload_tuple[1] + row_id_tuple[1]) * 2)):(
                page_offset + ((bytes_of_payload_tuple[1] + row_id_tuple[1] +
                                bytes_of_payload_tuple[0]) * 2))]

    # Parse the record
    read_content_list = parse_record(record_hex_string)
    # Build the resulting list (including the row_id used sqlite internally)
    cell_content_list = []
    cell_content_list.append(row_id)
    for element in range(len(read_content_list)):
        cell_content_list.append(read_content_list[element])
    # Return results
    _adel_log.log(
        "parse_table_btree_leaf_cell:            OK - returning list of cell contents",
        4)
    _adel_log.log(
        "parse_table_btree_leaf_cell:      ----> b-tree leaf cell at offset %(page_offset_in_bytes)s parsed"
        % vars(), 4)
    return cell_content_list
Esempio n. 6
0
def parse_db(file_name):
    global DB_FILE_SIZE_IN_BYTES
    global ROW_ID_COLUMN

    _adel_log.log("\n############  SQLite PARSER -> " + file_name + "  ############ \n", 2)
    _adel_log.log("parse_db:                      ----> parsing sqlite3 database file....", 3)

    # Open the database
    DB_FILE_SIZE_IN_BYTES = _sqliteFileHandler.open_db(file_name)
    if DB_FILE_SIZE_IN_BYTES == 0:
        # file could not be opened correctly
        return []

    # Read first page of database file
    first_page_hex_string = _sqliteFileHandler.read_page(1)
    # ensure that read page could retrieve an existing page
    if (first_page_hex_string == ""):
        _adel_log.log("parse_db: ERROR - cannot read first page of database", 1)
        return []

    # Parse the database header on the first page (first 100 bytes in the database file)
    parse_db_header(first_page_hex_string)
    if HEADER_DATABASE_TEXT_ENCODING > 1:
        _adel_log.log("parse_db: ERROR - database text encoding " + str(HEADER_DATABASE_TEXT_ENCODING) + " not supported in this version of FSP", 1)
        return []

    # Parse database schema (first page of the database file is root b-tree page for the schema btree)
    # Database schema is stored in a well defined way (sqlite master table)
    # CREATE TABLE sqlite_master(
    # type text, # must be one of the following: ['table', 'index', 'view', 'trigger']
    # name text,
    # tbl_name text,
    # rootpage integer,
    # sql text
    # );
    _adel_log.log("\nparseDB:                      ----> parsing sqlite3 database SCHEMA....", 3)
    db_schemata = _sqlitePageParser.parse_table_btree_page(first_page_hex_string, 100) # 100 bytes database file header
    _adel_log.log("parse_db:                      ----> sqlite3 database SCHEMA parsed", 3)

    # Initialize the resulting content list
    result_list = []
    final_list = []

    # loop through all schemata of the database
    for db_schema in db_schemata:
        if len(db_schema) != 5 + 1: # +1 due to manually added leading rowID
            _adel_log.log("parse_db: WARNING! invalid length of database schema statement entry detected: ", 2)
            _adel_log.log(str(db_schema), 2)
            continue

        # Reset result list for new element
        result_list = []

        # Parse this database element (table, index, view or trigger)
        if (_helpersStringOperations.starts_with_string(str(db_schema[1]), "TABLE") == 0):
            # PARSE TABLE STATEMENT
            # Ensure that we treat a valid schema
            db_schemata_statement = db_schema[len(db_schema) - 1]
            if ((db_schemata_statement == None) or (db_schemata_statement == "")):
                _adel_log.log("parse_db: WARNING! missing database schema statement entry detected, printing schema statement:", 2)
                _adel_log.log(str(db_schema), 3)
                continue

            sql_statement = (db_schema[5]) # db_schema[5] is expected to be the "sql text" as defined in sqlite_master
            _adel_log.log("\nparseDB:                      ----> parsing new database structure with SQL statement:", 3)
            _adel_log.log(str(sql_statement), 3)

            # Extract and check command (expected to be CREATE)
            command_tuple = _helpersStringOperations.split_at_first_occurrence(sql_statement, " ")
            if (len(command_tuple) == 0):
                _adel_log.log("parse_db: WARNING! invalid sql COMMAND detected, continuing with next database element (e.g. next table)", 2)
                continue
            if (_helpersStringOperations.starts_with_string(str(command_tuple[0]), "CREATE") != 0):
                _adel_log.log("parse_db: WARNING! invalid sql COMMAND detected, expected \"CREATE\" but found: " + str(command_tuple[0]), 2)
                _adel_log.log("                  continuing with next database element (e.g. next table)", 2)
                continue
            # Extract and check first command operand (expected to be TEMP, TEMPORARY, TABLE or VIRTUAL TABLE)
            type_tuple = _helpersStringOperations.split_at_first_occurrence(command_tuple[1], " ")
            if len(type_tuple) == 0:
                _adel_log.log("parse_db: WARNING! invalid sql COMMAND TYPE detected, continuing with next database element (e.g. next table)", 2)
                continue
            # According to the syntax diagrams of the sqlite SQL create table statement there are TEMP or TEMPORARY key words allowed at this place
            if   (_helpersStringOperations.starts_with_string(str(type_tuple[0]), "TEMP") == 0
              or _helpersStringOperations.starts_with_string(str(type_tuple[0]), "TEMPORARY") == 0
              or _helpersStringOperations.starts_with_string(str(type_tuple[0]), "VIRTUAL") == 0):
                # Ignore and proceed with next fragement (must then be TABLE)
                type_tuple = _helpersStringOperations.split_at_first_occurrence(type_tuple[1], " ")
                if len(type_tuple) == 0:
                    _adel_log.log("parse_db: WARNING! invalid sql COMMAND TYPE after TEMP(ORARY) detected, continuing with next database element (e.g. next table)", 2)
                    continue
            # This fragment must be table
            if (_helpersStringOperations.starts_with_string(str(type_tuple[0]), "TABLE") != 0):
                _adel_log.log("parse_db: WARNING! invalid sql COMMAND TYPE detected, expected \"TABLE\" but found: " + str(type_tuple[0]), 2)
                _adel_log.log("                  continuing with next database element (e.g. next table)", 2)
                continue
            # Extract and check second command operand (expected to be table name)
            name_tuple = []
            next_space = _helpersStringOperations.split_at_first_occurrence(type_tuple[1], " ")
            next_parenthesis = _helpersStringOperations.split_at_first_occurrence(type_tuple[1], "(")
            if (next_space < next_parenthesis):
                # "IF NOT EXISTS" statement possible
                if (_helpersStringOperations.starts_with_string(str(_helpersStringOperations.crop_whitespace(type_tuple[1])), "IF") == 0):
                    type_tuple[1] = type_tuple[1][2:]
                if (_helpersStringOperations.starts_with_string(str(_helpersStringOperations.crop_whitespace(type_tuple[1])), "NOT") == 0):
                    type_tuple[1] = type_tuple[1][3:]
                if (_helpersStringOperations.starts_with_string(str(_helpersStringOperations.crop_whitespace(type_tuple[1])), "EXISTS") == 0):
                    type_tuple[1] = type_tuple[1][6:]
                type_tuple[1] = _helpersStringOperations.crop_whitespace(type_tuple[1])

                # Extract name tuple
                name_tuple = _helpersStringOperations.split_at_first_occurrence(type_tuple[1], " ")
                if len(name_tuple) == 0:
                    name_tuple = _helpersStringOperations.split_at_first_occurrence(type_tuple[1], "(")
                    if len(name_tuple) == 0:
                        _adel_log.log("parse_db: WARNING! invalid sql COMMAND TYPE NAME detected, continuing with next database element (e.g. next table)", 2)
                        continue
                    # Append leading opening parenthesis that we cut off before
                    name_tuple[1] = "(" + str(name_tuple[1])
                else:
                    # "AS ..." statement possible
                    tmp_string = _helpersStringOperations.crop_whitespace(name_tuple[1])
                    if (tmp_string.startswith("AS")):
                        _adel_log.log("parse_db:                            OK - \"AS\" statement detected: " + str(tmp_string), 3)
                        _adel_log.log("parse_db:                            OK - no data stored, thus continuing with next database element (e.g. next table)", 3)
                        continue
            else:
                name_tuple = _helpersStringOperations.split_at_first_occurrence(type_tuple[1], "(")
                if len(name_tuple) == 0:
                    _adel_log.log("parse_db: WARNING! invalid sql COMMAND TYPE NAME detected, continuing with next database element (e.g. next table)", 2)
                    continue
                # Append leading opening parenthesis that we cut off before
                name_tuple[1] = "(" + str(name_tuple[1])

            # Now ready to parse TABLE
            _adel_log.log("parse_db:                      ----> parsing database structure " + str(type_tuple[0]) + " \"" + str(name_tuple[0]) + "\"", 3)
            _adel_log.log("parse_db:                      ----> parsing SQL statement of " + str(type_tuple[0]) + "....", 3)
            _adel_log.log("parse_db:                            OK - SQL statement is of type: " + str(command_tuple[0]) + " " + str(type_tuple[0]), 3)

            # Parse and append sql statement
            name_tuple[1] = _helpersStringOperations.cut_first_last_exclude(name_tuple[1], "(", ")")
            result_list.append(parse_sql_statement_params(name_tuple[1]))

            # Ensure we deal with a real table, virtual tables have no b-tree and thus the b-tree root page pointer is 0
            if (db_schema[4] == 0):
                _adel_log.log("parse_db:                            OK - this table holds no content (e.g. virtual table), continuing with next database element (e.g. next table)", 3)
                _adel_log.log("parse_db:                      ----> database structure " + str(type_tuple[0]) + " \"" + str(name_tuple[0]) + "\" parsed", 3)
                # Append result from table, index, view or trigger to final list
                final_list.append(result_list)
                continue

            # Parse and append table contents
            btree_root_page_string = _sqliteFileHandler.read_page(db_schema[4])
            # Ensure that read page could retrieve an existing page
            if (btree_root_page_string == ""):
                _adel_log.log("parse_db: ERROR - could not refer to b-tree root page: " + str(db_schema[4]), 1)
                _adel_log.log("                 continuing with next database element (e.g. next table)", 1)
                continue
            _adel_log.log("parse_db:                      ----> parsing contents of " + str(type_tuple[0]) + "....", 3)
            table_contents = _sqlitePageParser.parse_table_btree_page(btree_root_page_string, 0)

            # Check whether the table contains a dedicated row ID column
            if (ROW_ID_COLUMN == 0):
                # Table has no dedicated row ID column, add "rowID" to the table statement (the rowID is already extractet)
                index_of_last_element_in_result_list = len(result_list) - 1
                temp_list = result_list[index_of_last_element_in_result_list]
                result_list[index_of_last_element_in_result_list] = [["rowID", "INTEGER"]]
                for element in range(len(temp_list)):
                    result_list[index_of_last_element_in_result_list].append(temp_list[element])
                # Append table contents to the result list
                for row in table_contents:
                    result_list.append(row)
            else:
                # Table has a dedicated row ID column (integer primary key column), link values stored as row ID in the b-tree to this column (at the place of this column)
                # Append table contents to the result list
                for row in table_contents:
                    # Replace "None" entries in integer primary key column of each row through the actual row ID
                    row[ROW_ID_COLUMN] = row[0]
                    # Delete manually appended row ID column (in parse_sql_statement_params)
                    temp_row = row
                    row = []
                    for index in range(len(temp_row) - 1):
                        row.append(temp_row[index + 1])
                    # Append corrected row
                    result_list.append(row)

            # Append result from table, index, view or trigger to final list
            final_list.append(result_list)
            _adel_log.log("parse_db:                      ----> database structure " + str(type_tuple[0]) + " \"" + str(name_tuple[0]) + "\" parsed", 3)

            # TODO: comment out the following print statements in productive environment
            #_adel_log.log("\n_sqliteParser.py:234, parse_db ----> printing database schema for " + str(type_tuple[0]) + " \"" + str(name_tuple[0]) + "\" for test purposes:", 4)
            #_adel_log.log(str(db_schema[len(db_schema) - 1]), 4)
            #_adel_log.log("\n_sqliteParser.py:236, parse_db ----> printing database contents for " + str(type_tuple[0]) + " \"" + str(name_tuple[0]) + "\" for test purposes:", 4)
            #for result in result_list:
            #    _adel_log.log(str(result), 4)
            # comment out the above print statements in productive environment

        # PARSE INDEX STATEMENT
        #if ((str(db_schema[1]) == "INDEX") or (str(db_schema[1]) == "Index") or (str(db_schema[1]) == "index")):
        # TODO: implement if necessary
        # IGNORED RIGHT NOW

        # PARSE VIEW STATEMENT
        #if ((str(db_schema[1]) == "VIEW") or (str(db_schema[1]) == "View") or (str(db_schema[1]) == "view")):
        # TODO: implement if necessary
        # IGNORED RIGHT NOW

        # PARSE TRIGGER STATEMENT
        #if ((str(db_schema[1]) == "TRIGGER") or (str(db_schema[1]) == "Trigger") or (str(db_schema[1]) == "trigger")):
        # TODO: implement if necessary
        # IGNORED RIGHT NOW

    _adel_log.log("\nparseDB:                      ----> returning contents of the database file", 3)
    # Close the database file
    _sqliteFileHandler.close_db()
    _adel_log.log("parse_db:                      ----> sqlite3 database file parsed", 3)

    return final_list
Esempio n. 7
0
def parse_table_btree_leaf_cell(page_hex_string, page_offset, cell_pointers, free_block_pointer):
    # 1 byte is represented by two characters in the hexString, so internally we need to calculate the offset in nibbles
    page_offset_in_bytes = page_offset # store for log reasons only
    page_offset = page_offset * 2 # now dealing with nibbles because we treat a string (1 character = 1 nibble)
    db_page_size_in_bytes = _sqliteFileHandler.DB_PAGESIZE_IN_BYTES
    usable_page_space = db_page_size_in_bytes - _sqliteFileHandler.DB_RESERVED_SPACE

    _adel_log.log("parse_table_btree_leaf_cell:      ----> parsing b-tree leaf cell at offset %(page_offset_in_bytes)s...." % vars(), 4)

    # Get total number of bytes of payload
    bytes_of_payload_tuple = _sqliteVarInt.parse_next_var_int(page_hex_string[page_offset:(page_offset + 18)]) # a variable integer can be maximum 9 byte (= 18 nibbles) long
    bytes_of_payload = bytes_of_payload_tuple[0]
    _adel_log.log("parse_table_btree_leaf_cell:            OK - payload is %(bytes_of_payload)s bytes long" % vars(), 4)
    # Get row_id
    row_id_string = page_hex_string[(page_offset + (bytes_of_payload_tuple[1] * 2)):(page_offset + (bytes_of_payload_tuple[1] + 9) * 2)]
    row_id_tuple = _sqliteVarInt.parse_next_var_int(row_id_string)
    row_id = row_id_tuple[0]
    _adel_log.log("parse_table_btree_leaf_cell:      ----> extracting contents for row_id %(row_id)s...." % vars(), 4)

    # Check for overflow pages and append content of those pages, if any
    # Calculate the overflow limits for table b-tree leaf cell
    remaining_page_space = db_page_size_in_bytes - page_offset_in_bytes
    if (bytes_of_payload > (remaining_page_space)):
        # We expext content to overflow, because there is not enough space left on this page
        _adel_log.log("parse_table_btree_leaf_cell:            OK - payload is too large for this page, there are overflow pages" % vars(), 4)

        # Check at which position the next cell starts
        next_cell = usable_page_space
        for cell_pointer in cell_pointers:
            if (cell_pointer > page_offset_in_bytes) and (cell_pointer < next_cell):
                next_cell = cell_pointer

        # Check at which position the next freeblock starts (we ignore theoretically possible freebytes in this case,
        # Because we expect no freebyte at the end of a cell that overflows to another page
        next_free_block = usable_page_space
        free_blocks = parse_free_blocks(page_hex_string, free_block_pointer)
        for free_block in free_blocks:
            if (free_block[0] > page_offset_in_bytes) and (free_block[0] < next_free_block):
                next_free_block = free_block[0]

        # Get the end of this record: either closest following cell or closest following freeblock or end of page
        end_of_record = usable_page_space
        # Check of the end of this record is given through a following cell
        if (next_cell != usable_page_space) and ((next_cell <= next_free_block) or (next_free_block == usable_page_space)):
            # next element is not end of page but a cell
            end_of_record = next_cell
        # Check of the end of this record is given through a following free block
        if (next_free_block != usable_page_space) and ((next_free_block < next_cell) or (next_cell == usable_page_space)):
            # Next element is not end of page but a free block
            end_of_record = next_free_block

        # Cut record hex string from the beginning to the offset of the next following element
        record_hex_string = page_hex_string[(page_offset + ((bytes_of_payload_tuple[1] + row_id_tuple[1]) * 2)):(end_of_record * 2)]
        record_hex_string_length = len(record_hex_string) / 2 # string length is count in nibbles, we need bytes here

        # Save overflow page pointer at the end of record hex string
        first_overflow_page_number = int(record_hex_string[((record_hex_string_length - 4) * 2):(record_hex_string_length * 2)], 16)
        _adel_log.log("parse_table_btree_leaf_cell:      ----> parsing overflow page chain beginning at page %(first_overflow_page_number)s...." % vars(), 4)
        # Cut off overflow page number from record_hex_string
        record_hex_string = record_hex_string[(0):((record_hex_string_length - 4) * 2)]

        first_overflow_page_string = _sqliteFileHandler.read_page(first_overflow_page_number)
        # Ensure that read page could retrieve an existing page
        if (first_overflow_page_string == ""):
            _adel_log.log("parse_table_btree_leaf_cell: ERROR - invalid overflow page pointer, cannot reference first overflow page: " + str(first_overflow_page_number), 1)
            return []
        # Append content from overflow pages
        record_hex_string += parse_overflow_page_chain(first_overflow_page_string)

        # Ensure correct length of string (maybe not all bytes of the last overflow page in the chain contain content)
        record_hex_string_length = len(record_hex_string) / 2 # string length is count in nibbles, we need bytes here
        if (bytes_of_payload < record_hex_string_length):
            # Cut record hex string again
            record_hex_string = record_hex_string[:bytes_of_payload * 2]
    else:
        # The entire payload is stored on this page
        record_hex_string = page_hex_string[(page_offset + ((bytes_of_payload_tuple[1] + row_id_tuple[1]) * 2)):(page_offset + ((bytes_of_payload_tuple[1] + row_id_tuple[1] + bytes_of_payload_tuple[0]) * 2))]

    # Parse the record
    read_content_list = parse_record(record_hex_string)
    # Build the resulting list (including the row_id used sqlite internally)
    cell_content_list = []
    cell_content_list.append(row_id)
    for element in range(len(read_content_list)):
        cell_content_list.append(read_content_list[element])
    # Return results
    _adel_log.log("parse_table_btree_leaf_cell:            OK - returning list of cell contents", 4)
    _adel_log.log("parse_table_btree_leaf_cell:      ----> b-tree leaf cell at offset %(page_offset_in_bytes)s parsed" % vars(), 4)
    return cell_content_list