Example #1
0
def parse_db(file_name):
    global DB_FILE_SIZE_IN_BYTES
    global ROW_ID_COLUMN

    _adel_log.log("\n############  SQLite PARSER -> " + file_name + "  ############ \n", 2)
    _adel_log.log("parse_db:                      ----> parsing sqlite3 database file....", 3)

    # Open the database
    DB_FILE_SIZE_IN_BYTES = _sqliteFileHandler.open_db(file_name)
    if DB_FILE_SIZE_IN_BYTES == 0:
        # file could not be opened correctly
        return []

    # Read first page of database file
    first_page_hex_string = _sqliteFileHandler.read_page(1)
    # ensure that read page could retrieve an existing page
    if (first_page_hex_string == ""):
        _adel_log.log("parse_db: ERROR - cannot read first page of database", 1)
        return []

    # Parse the database header on the first page (first 100 bytes in the database file)
    parse_db_header(first_page_hex_string)
    if HEADER_DATABASE_TEXT_ENCODING > 1:
        _adel_log.log("parse_db: ERROR - database text encoding " + str(HEADER_DATABASE_TEXT_ENCODING) + " not supported in this version of FSP", 1)
        return []

    # Parse database schema (first page of the database file is root b-tree page for the schema btree)
    # Database schema is stored in a well defined way (sqlite master table)
    # CREATE TABLE sqlite_master(
    # type text, # must be one of the following: ['table', 'index', 'view', 'trigger']
    # name text,
    # tbl_name text,
    # rootpage integer,
    # sql text
    # );
    _adel_log.log("\nparseDB:                      ----> parsing sqlite3 database SCHEMA....", 3)
    db_schemata = _sqlitePageParser.parse_table_btree_page(first_page_hex_string, 100) # 100 bytes database file header
    _adel_log.log("parse_db:                      ----> sqlite3 database SCHEMA parsed", 3)

    # Initialize the resulting content list
    result_list = []
    final_list = []

    # loop through all schemata of the database
    for db_schema in db_schemata:
        if len(db_schema) != 5 + 1: # +1 due to manually added leading rowID
            _adel_log.log("parse_db: WARNING! invalid length of database schema statement entry detected: ", 2)
            _adel_log.log(str(db_schema), 2)
            continue

        # Reset result list for new element
        result_list = []

        # Parse this database element (table, index, view or trigger)
        if (_helpersStringOperations.starts_with_string(str(db_schema[1]), "TABLE") == 0):
            # PARSE TABLE STATEMENT
            # Ensure that we treat a valid schema
            db_schemata_statement = db_schema[len(db_schema) - 1]
            if ((db_schemata_statement == None) or (db_schemata_statement == "")):
                _adel_log.log("parse_db: WARNING! missing database schema statement entry detected, printing schema statement:", 2)
                _adel_log.log(str(db_schema), 3)
                continue

            sql_statement = (db_schema[5]) # db_schema[5] is expected to be the "sql text" as defined in sqlite_master
            _adel_log.log("\nparseDB:                      ----> parsing new database structure with SQL statement:", 3)
            _adel_log.log(str(sql_statement), 3)

            # Extract and check command (expected to be CREATE)
            command_tuple = _helpersStringOperations.split_at_first_occurrence(sql_statement, " ")
            if (len(command_tuple) == 0):
                _adel_log.log("parse_db: WARNING! invalid sql COMMAND detected, continuing with next database element (e.g. next table)", 2)
                continue
            if (_helpersStringOperations.starts_with_string(str(command_tuple[0]), "CREATE") != 0):
                _adel_log.log("parse_db: WARNING! invalid sql COMMAND detected, expected \"CREATE\" but found: " + str(command_tuple[0]), 2)
                _adel_log.log("                  continuing with next database element (e.g. next table)", 2)
                continue
            # Extract and check first command operand (expected to be TEMP, TEMPORARY, TABLE or VIRTUAL TABLE)
            type_tuple = _helpersStringOperations.split_at_first_occurrence(command_tuple[1], " ")
            if len(type_tuple) == 0:
                _adel_log.log("parse_db: WARNING! invalid sql COMMAND TYPE detected, continuing with next database element (e.g. next table)", 2)
                continue
            # According to the syntax diagrams of the sqlite SQL create table statement there are TEMP or TEMPORARY key words allowed at this place
            if   (_helpersStringOperations.starts_with_string(str(type_tuple[0]), "TEMP") == 0
              or _helpersStringOperations.starts_with_string(str(type_tuple[0]), "TEMPORARY") == 0
              or _helpersStringOperations.starts_with_string(str(type_tuple[0]), "VIRTUAL") == 0):
                # Ignore and proceed with next fragement (must then be TABLE)
                type_tuple = _helpersStringOperations.split_at_first_occurrence(type_tuple[1], " ")
                if len(type_tuple) == 0:
                    _adel_log.log("parse_db: WARNING! invalid sql COMMAND TYPE after TEMP(ORARY) detected, continuing with next database element (e.g. next table)", 2)
                    continue
            # This fragment must be table
            if (_helpersStringOperations.starts_with_string(str(type_tuple[0]), "TABLE") != 0):
                _adel_log.log("parse_db: WARNING! invalid sql COMMAND TYPE detected, expected \"TABLE\" but found: " + str(type_tuple[0]), 2)
                _adel_log.log("                  continuing with next database element (e.g. next table)", 2)
                continue
            # Extract and check second command operand (expected to be table name)
            name_tuple = []
            next_space = _helpersStringOperations.split_at_first_occurrence(type_tuple[1], " ")
            next_parenthesis = _helpersStringOperations.split_at_first_occurrence(type_tuple[1], "(")
            if (next_space < next_parenthesis):
                # "IF NOT EXISTS" statement possible
                if (_helpersStringOperations.starts_with_string(str(_helpersStringOperations.crop_whitespace(type_tuple[1])), "IF") == 0):
                    type_tuple[1] = type_tuple[1][2:]
                if (_helpersStringOperations.starts_with_string(str(_helpersStringOperations.crop_whitespace(type_tuple[1])), "NOT") == 0):
                    type_tuple[1] = type_tuple[1][3:]
                if (_helpersStringOperations.starts_with_string(str(_helpersStringOperations.crop_whitespace(type_tuple[1])), "EXISTS") == 0):
                    type_tuple[1] = type_tuple[1][6:]
                type_tuple[1] = _helpersStringOperations.crop_whitespace(type_tuple[1])

                # Extract name tuple
                name_tuple = _helpersStringOperations.split_at_first_occurrence(type_tuple[1], " ")
                if len(name_tuple) == 0:
                    name_tuple = _helpersStringOperations.split_at_first_occurrence(type_tuple[1], "(")
                    if len(name_tuple) == 0:
                        _adel_log.log("parse_db: WARNING! invalid sql COMMAND TYPE NAME detected, continuing with next database element (e.g. next table)", 2)
                        continue
                    # Append leading opening parenthesis that we cut off before
                    name_tuple[1] = "(" + str(name_tuple[1])
                else:
                    # "AS ..." statement possible
                    tmp_string = _helpersStringOperations.crop_whitespace(name_tuple[1])
                    if (tmp_string.startswith("AS")):
                        _adel_log.log("parse_db:                            OK - \"AS\" statement detected: " + str(tmp_string), 3)
                        _adel_log.log("parse_db:                            OK - no data stored, thus continuing with next database element (e.g. next table)", 3)
                        continue
            else:
                name_tuple = _helpersStringOperations.split_at_first_occurrence(type_tuple[1], "(")
                if len(name_tuple) == 0:
                    _adel_log.log("parse_db: WARNING! invalid sql COMMAND TYPE NAME detected, continuing with next database element (e.g. next table)", 2)
                    continue
                # Append leading opening parenthesis that we cut off before
                name_tuple[1] = "(" + str(name_tuple[1])

            # Now ready to parse TABLE
            _adel_log.log("parse_db:                      ----> parsing database structure " + str(type_tuple[0]) + " \"" + str(name_tuple[0]) + "\"", 3)
            _adel_log.log("parse_db:                      ----> parsing SQL statement of " + str(type_tuple[0]) + "....", 3)
            _adel_log.log("parse_db:                            OK - SQL statement is of type: " + str(command_tuple[0]) + " " + str(type_tuple[0]), 3)

            # Parse and append sql statement
            name_tuple[1] = _helpersStringOperations.cut_first_last_exclude(name_tuple[1], "(", ")")
            result_list.append(parse_sql_statement_params(name_tuple[1]))

            # Ensure we deal with a real table, virtual tables have no b-tree and thus the b-tree root page pointer is 0
            if (db_schema[4] == 0):
                _adel_log.log("parse_db:                            OK - this table holds no content (e.g. virtual table), continuing with next database element (e.g. next table)", 3)
                _adel_log.log("parse_db:                      ----> database structure " + str(type_tuple[0]) + " \"" + str(name_tuple[0]) + "\" parsed", 3)
                # Append result from table, index, view or trigger to final list
                final_list.append(result_list)
                continue

            # Parse and append table contents
            btree_root_page_string = _sqliteFileHandler.read_page(db_schema[4])
            # Ensure that read page could retrieve an existing page
            if (btree_root_page_string == ""):
                _adel_log.log("parse_db: ERROR - could not refer to b-tree root page: " + str(db_schema[4]), 1)
                _adel_log.log("                 continuing with next database element (e.g. next table)", 1)
                continue
            _adel_log.log("parse_db:                      ----> parsing contents of " + str(type_tuple[0]) + "....", 3)
            table_contents = _sqlitePageParser.parse_table_btree_page(btree_root_page_string, 0)

            # Check whether the table contains a dedicated row ID column
            if (ROW_ID_COLUMN == 0):
                # Table has no dedicated row ID column, add "rowID" to the table statement (the rowID is already extractet)
                index_of_last_element_in_result_list = len(result_list) - 1
                temp_list = result_list[index_of_last_element_in_result_list]
                result_list[index_of_last_element_in_result_list] = [["rowID", "INTEGER"]]
                for element in range(len(temp_list)):
                    result_list[index_of_last_element_in_result_list].append(temp_list[element])
                # Append table contents to the result list
                for row in table_contents:
                    result_list.append(row)
            else:
                # Table has a dedicated row ID column (integer primary key column), link values stored as row ID in the b-tree to this column (at the place of this column)
                # Append table contents to the result list
                for row in table_contents:
                    # Replace "None" entries in integer primary key column of each row through the actual row ID
                    row[ROW_ID_COLUMN] = row[0]
                    # Delete manually appended row ID column (in parse_sql_statement_params)
                    temp_row = row
                    row = []
                    for index in range(len(temp_row) - 1):
                        row.append(temp_row[index + 1])
                    # Append corrected row
                    result_list.append(row)

            # Append result from table, index, view or trigger to final list
            final_list.append(result_list)
            _adel_log.log("parse_db:                      ----> database structure " + str(type_tuple[0]) + " \"" + str(name_tuple[0]) + "\" parsed", 3)

            # TODO: comment out the following print statements in productive environment
            #_adel_log.log("\n_sqliteParser.py:234, parse_db ----> printing database schema for " + str(type_tuple[0]) + " \"" + str(name_tuple[0]) + "\" for test purposes:", 4)
            #_adel_log.log(str(db_schema[len(db_schema) - 1]), 4)
            #_adel_log.log("\n_sqliteParser.py:236, parse_db ----> printing database contents for " + str(type_tuple[0]) + " \"" + str(name_tuple[0]) + "\" for test purposes:", 4)
            #for result in result_list:
            #    _adel_log.log(str(result), 4)
            # comment out the above print statements in productive environment

        # PARSE INDEX STATEMENT
        #if ((str(db_schema[1]) == "INDEX") or (str(db_schema[1]) == "Index") or (str(db_schema[1]) == "index")):
        # TODO: implement if necessary
        # IGNORED RIGHT NOW

        # PARSE VIEW STATEMENT
        #if ((str(db_schema[1]) == "VIEW") or (str(db_schema[1]) == "View") or (str(db_schema[1]) == "view")):
        # TODO: implement if necessary
        # IGNORED RIGHT NOW

        # PARSE TRIGGER STATEMENT
        #if ((str(db_schema[1]) == "TRIGGER") or (str(db_schema[1]) == "Trigger") or (str(db_schema[1]) == "trigger")):
        # TODO: implement if necessary
        # IGNORED RIGHT NOW

    _adel_log.log("\nparseDB:                      ----> returning contents of the database file", 3)
    # Close the database file
    _sqliteFileHandler.close_db()
    _adel_log.log("parse_db:                      ----> sqlite3 database file parsed", 3)

    return final_list
Example #2
0
def parse_sql_statement_params(hex_string):
    global ROW_ID_COLUMN

    # Build params list
    param_list = _helpersStringOperations.split_parenthesis_sensitive(hex_string, ",")

    # Initialise result list and reset ROW_ID_COLUMN
    result_list = []
    ROW_ID_COLUMN = 0

    # Create correct sql statement parameter list
    index = 0
    max_index = len(param_list)
    _adel_log.log("parse_sql_statement_params:      ----> printing SQL statement parameters in the form [column name, column type]....", 3)
    while index < max_index:
        # Crop any white space
        param_list[index] = _helpersStringOperations.crop_whitespace(param_list[index])

        # Ensure that we have a column (starts with column name) and no table constraint
        if (_helpersStringOperations.starts_with_string(param_list[index], "CONSTRAINT") == 0
          or _helpersStringOperations.starts_with_string(param_list[index], "PRIMARY KEY") == 0
          or _helpersStringOperations.starts_with_string(param_list[index], "UNIQUE") == 0
          or _helpersStringOperations.starts_with_string(param_list[index], "CHECK") == 0
          or _helpersStringOperations.starts_with_string(param_list[index], "FOREIGN") == 0):
            _adel_log.log("parse_sql_statement_params:            OK - TABLE constraint detected at positon: " + str(index + 1) + ", constraint is: " + str(param_list[index]), 3)
            index += 1
            continue

        # Ok, we deal with a column
        column_tuple = _helpersStringOperations.split_at_first_occurrence(param_list[index], " ")
        if len(column_tuple) == 0:
            # Append as is
            param_tuple = [param_list[index], ""]
            result_list.append(param_list[index])
            _adel_log.log("parse_sql_statement_params:            OK - " + str(index + 1) + ". column is: " + str(param_tuple), 3)
            index += 1
            continue

        # Otherwise we have to parse the statement
        # at this position we can have a type-name or column constraints
        column_name = _helpersStringOperations.crop_whitespace(column_tuple[0]) 
        column_string = _helpersStringOperations.crop_whitespace(column_tuple[1])

        # Check if we deal with a column constraint
        if ((_helpersStringOperations.starts_with_string(column_string, "CONSTRAINT") == 0)
          or (_helpersStringOperations.starts_with_string(column_string, "PRIMARY KEY") == 0)
          or (_helpersStringOperations.starts_with_string(column_string, "NOT") == 0)
          or (_helpersStringOperations.starts_with_string(column_string, "UNIQUE") == 0)
          or (_helpersStringOperations.starts_with_string(column_string, "CHECK") == 0)
          or (_helpersStringOperations.starts_with_string(column_string, "DEFAULT") == 0)
          or (_helpersStringOperations.starts_with_string(column_string, "COLLATE") == 0)
          or (_helpersStringOperations.starts_with_string(column_string, "REFERENCES")) == 0):
            # Create and append param_tuple
            param_tuple = [column_name, ""]
            result_list.append(param_tuple)
            _adel_log.log("parse_sql_statement_params:            OK - " + str(index + 1) + ". column is: " + str(param_tuple) + ", constraint(s): " + column_string, 3)
            # Check whether this row functions as row ID (integer PRIMARY KEY)
            if (_helpersStringOperations.starts_with_string(column_string, "PRIMARY KEY") == 0):
                ROW_ID_COLUMN = index + 1
            index += 1
            continue

        # There is no column constraint, so there must be a type-name
        type_tuple = []
        next_space = _helpersStringOperations.fist_occurrence(column_string, " ")
        next_parenthesis = _helpersStringOperations.fist_occurrence(column_string, "(")
        if (next_space >= 0) and ((next_space < next_parenthesis) or (next_parenthesis < 0)):
            # Cut at the next space
            type_tuple = _helpersStringOperations.split_at_first_occurrence(column_string, " ")
        else:
            if (next_parenthesis >= 0) and ((next_parenthesis <= next_space) or (next_space < 0)):
                # Cut at the next parenthesis
                type_tuple = _helpersStringOperations.split_at_first_occurrence(column_string, "(")
                type_tuple[1] = "(" + str(type_tuple[1]) # append the opening paranthesis that was cut off

        if len(type_tuple) == 0:
            # Create and append param_tuple
            param_tuple = [column_name, column_string]
            result_list.append(param_tuple)
            _adel_log.log("parse_sql_statement_params:            OK - " + str(index + 1) + ". column is: " + str(param_tuple), 3)
            index += 1
            continue

        # The statement continues, so continue to parse
        # set type name and type string
        type_name = _helpersStringOperations.crop_whitespace(type_tuple[0]) 
        type_string = _helpersStringOperations.crop_whitespace(type_tuple[1])

        # The remaining string can contain further type name definitions (e.g. varchar(20) or column constraint statements
        # check if we deal with a column constraint
        if ((_helpersStringOperations.starts_with_string(type_string, "CONSTRAINT") == 0)
          or (_helpersStringOperations.starts_with_string(type_string, "PRIMARY KEY") == 0)
          or (_helpersStringOperations.starts_with_string(type_string, "NOT") == 0)
          or (_helpersStringOperations.starts_with_string(type_string, "UNIQUE") == 0)
          or (_helpersStringOperations.starts_with_string(type_string, "CHECK") == 0)
          or (_helpersStringOperations.starts_with_string(type_string, "DEFAULT") == 0)
          or (_helpersStringOperations.starts_with_string(type_string, "COLLATE") == 0)
          or (_helpersStringOperations.starts_with_string(type_string, "REFERENCES")) == 0):
            # Create and append param_tuple
            param_tuple = [column_name, type_name]
            result_list.append(param_tuple)
            _adel_log.log("parse_sql_statement_params:            OK - " + str(index + 1) + ". column is: " + str(param_tuple) + ", constraint(s): " + type_string, 3)
            # Check whether this row functions as row ID (integer PRIMARY KEY)
            if (_helpersStringOperations.starts_with_string(type_string, "PRIMARY KEY") == 0):
                ROW_ID_COLUMN = index + 1
            index += 1
            continue

        # Check for further type name definitions, they must be enclosed in parenthesis and belong to the type name
        restTuple = _helpersStringOperations.cut_first_last_include_into_tuple(type_string, "(", ")")
        if len(restTuple) == 0:
            # this case should not occur
            _adel_log.log("parse_sql_statement_params: WARNING! invalid column TYPE STRING in " + str(index + 1) + ". column: " + str(type_string), 2)
            _adel_log.log("                                  continuing with next column definition", 2)
            index += 1
            continue
        else:
            # The statement continues, so continue to parse
            # set rest name and rest string
            rest_name = _helpersStringOperations.crop_whitespace(restTuple[0]) 
            rest_string = _helpersStringOperations.crop_whitespace(restTuple[1])
            # Rest name belongs to the type name, append it
            type_name = type_name + " " + str(rest_name)

            # Create and append param_tuple
            param_tuple = [column_name, type_name]
            result_list.append(param_tuple)
            # We log the param_tuple later, so possible column constraints can be included

            # Rest string can be column constraint, check it
            if len(rest_string) > 0:
                if ((_helpersStringOperations.starts_with_string(rest_string, "CONSTRAINT") == 0)
                  or (_helpersStringOperations.starts_with_string(rest_string, "PRIMARY KEY") == 0)
                  or (_helpersStringOperations.starts_with_string(rest_string, "NOT") == 0)
                  or (_helpersStringOperations.starts_with_string(rest_string, "UNIQUE") == 0)
                  or (_helpersStringOperations.starts_with_string(rest_string, "CHECK") == 0)
                  or (_helpersStringOperations.starts_with_string(rest_string, "DEFAULT") == 0)
                  or (_helpersStringOperations.starts_with_string(rest_string, "COLLATE") == 0)
                  or (_helpersStringOperations.starts_with_string(rest_string, "REFERENCES")) == 0):
                    # Log column constraint
                    _adel_log.log("parse_sql_statement_params:            OK - " + str(index + 1) + ". column is: " + str(param_tuple) + ", constraint(s): " + rest_string, 3)
                    # Check whether this row functions as row ID (integer PRIMARY KEY)
                    if (_helpersStringOperations.starts_with_string(rest_string, "PRIMARY KEY") == 0):
                        ROW_ID_COLUMN = index + 1
                else:
                    # This case should not occur
                    _adel_log.log("parse_sql_statement_params: WARNING! invalid column REST STRING in " + str(index + 1) + ". column: " + str(rest_string), 2)
                    _adel_log.log("                                  continuing with next column definition", 2)
            else:
                # Log without column constraints
                _adel_log.log("parse_sql_statement_params:            OK - " + str(index + 1) + ". column is: " + str(param_tuple), 3)
            index += 1

    return result_list