Example #1
0
    def _verify_file_format(self):
        """
        Verifies that the Excel document has the correct tabs and column headers.
        :return: True if the workbook has the correct worksheets and columns in each.
        :rtype: bool
        """

        is_valid = True  # hope for the best.

        sheet_names = self.workbook.sheet_names()
        for required_sheet in XLSReader.required_sheets:
            if required_sheet not in sheet_names:
                eprint("Error:  missing sheet %s!" % required_sheet)
                is_valid = False
            else:
                sheet = self.workbook.sheet_by_name(required_sheet)
                header_row = sheet.row_values(rowx=0, start_colx=0)
                for required_column in XLSReader.required_columns[
                    required_sheet
                ]:
                    if required_column not in header_row:
                        eprint(
                            "Error:  missing column %s in sheet %s!"
                            % (required_column, required_sheet)
                        )
                        is_valid = False

        return is_valid
Example #2
0
    def _read_relationships_from_workbook(self):
        """
        Reads the foreign keys for the tables from Excel.  
        """
        # "Relationships": ["Name", "Database", "Schema", "From Table", "To Table", "Conditions"]
        rel_sheet = self.workbook.sheet_by_name("Relationships")
        indices = self.indices["Relationships"]

        for row_count in range(1, rel_sheet.nrows):
            row = rel_sheet.row_values(rowx=row_count, start_colx=0)

            database_name = row[indices["Database"]]
            database = self.databases.get(database_name, None)
            if database is None:
                eprint(
                    "ERROR:  Database %s from the Relationships tab is not known."
                    % database_name
                )

            else:
                table_name = row[indices["From Table"]]
                table = database.get_table(table_name)
                if table is None:
                    eprint(
                        "ERROR:  Table %s from the Relationships tab is not known."
                        % table_name
                    )
                table.add_relationship(
                    to_table=row[indices["To Table"]],
                    conditions=row[indices["Conditions"]],
                )
Example #3
0
    def _read_columns_from_workbook(self):
        """
        Reads the columns for the tables from Excel.  
        """
        # "Columns":       ["Database", "Schema", "Table", "Column", "Name", "Type"],
        column_sheet = self.workbook.sheet_by_name("Columns")
        indices = self.indices["Columns"]

        for row_count in range(1, column_sheet.nrows):
            row = column_sheet.row_values(rowx=row_count, start_colx=0)

            database_name = row[indices["Database"]]
            database = self.databases.get(database_name, None)
            if database is None:
                eprint(
                    "ERROR:  Database %s from the Columns tab is not known."
                    % database_name
                )

            else:
                table_name = row[indices["Table"]]
                table = database.get_table(table_name)
                if table is None:
                    eprint(
                        "ERROR:  Table %s from the Columns tab is not known."
                        % table_name
                    )
                else:
                    table.add_column(
                        Column(
                            column_name=row[indices["Name"]],
                            column_type=row[indices["Type"]],
                        )
                    )
Example #4
0
    def _read_tables_from_workbook(self):
        """
        Reads the databases and tables from Excel.  These are used to populate from the remaining sheets.
        """

        # "Tables":        ["Database", "Schema", "Table", "Updated", "Update Type", "# Rows", "# Columns",
        #                   "Primary Key", "Shard Key", "# Shards", "RLS Column"],
        table_sheet = self.workbook.sheet_by_name("Tables")
        indices = self.indices["Tables"]

        for row_count in range(1, table_sheet.nrows):
            row = table_sheet.row_values(rowx=row_count, start_colx=0)

            database_name = row[indices["Database"]]
            database = self.databases.get(database_name, None)
            if database is None:
                database = Database(database_name=database_name)
                self.databases[database_name] = database

            pk = row[indices["Primary Key"]].strip()
            if pk == "":
                pk = None
            else:
                pk = [x.strip() for x in pk.split(",")]

            sk_name = row[indices["Shard Key"]].strip()
            sk_nbr_shards = row[indices["# Shards"]]

            if (sk_name == "" and sk_nbr_shards != "") or (
                sk_name != "" and sk_nbr_shards == ""
            ):
                eprint(
                    "ERROR:  %s need to provide both a shard key name and number of shards."
                    % row[indices["Table"]]
                )

            if sk_name == "":
                sk = None
            else:
                sk = [x.strip() for x in sk_name.split(",")]

            shard_key = None
            if sk_name != "" and sk_nbr_shards != "":
                shard_key = ShardKey(
                    shard_keys=sk, number_shards=sk_nbr_shards
                )

            table = Table(
                table_name=row[indices["Table"]],
                schema_name=row[indices["Schema"]],
                primary_key=pk,
                shard_key=None,
            )
            database.add_table(table)
def valid_args(args):
    """
    Checks to see if the arguments make sense.
    :param args: The command line arguments.
    :return: True if valid, false otherwise.
    """

    # make sure there is a to_ flag since data has to come from somewhere unless this is just creating blank Excel.
    if not args.from_ddl and not args.from_excel and not args.to_excel:
        eprint("--from_ddl or --from_excel must be provided as arguments.")
        return False

    if args.from_ddl and not args.database:
        eprint("--from_ddl requires the --database option.")
        return False

    return True
Example #6
0
def valid_args(args):
    """
    Checks to see if the arguments make sense.
    :param args: The command line arguments.
    :return: True if valid, false otherwise.
    """
    # TODO rethink what is and isn't valid and update.
    # make sure there is a to_ flag since data has to come from somewhere.
    if args.from_ddl is False and args.from_excel is False:
        eprint("--from_ddl or --from_excel must be provided as arguments.")
        return False

    # if args.to_excel is False and args.to_tql is False:
    #     eprint("--to_tql or --to_excel must be provided as arguments.")
    #     return False

    # if args.database is None:
    #     eprint("a database name must be provided with --database")

    return True
def read_excel(args):
    """
    Reads the database description from XLS and returns a database model.
    Note that XLSReader can read multiple databases at a time, but convert 
    only supports one.  If there are more than one database, the first will be
    returned and an error message written.
    :param args: The command line arguments.
    :returns: The database read from Excel.
    :rtype: Database
    """

    reader = XLSReader()
    databases = reader.read_xls(filepath=args.from_excel)
    if len(databases) == 0:
        eprint("ERROR:  No databases read.")
        return None

    if len(databases) > 1:
        eprint("WARNING:  multiple databases read.  Only using %s" %
               databases.values()[0].database_name)

    return databases.values()[0]
Example #8
0
def main():
    """Main function for the script."""
    args = parse_args()

    database = None
    if valid_args(args):
        print(args)

        if args.from_ddl:
            print("Reading DDL ...")
            database = read_ddl(args)

        if args.from_excel:
            print("Reading Excel")
            database = read_excel(args)

        if args.validate:
            print("Validating database")
            vr = database.validate()
            if not vr.is_valid:
                for issue in vr.issues:
                    eprint(issue)
            else:
                print("Database is valid.")

        if args.to_tql:
            print("Writing TQL ...")
            write_tql(args=args, database=database)

        if args.to_excel:
            print("Writing Excel ...")
            write_excel(args=args, database=database)

        if args.to_tsload:
            print("Writing tsload ...")
            write_tsload(args=args, database=database)
Example #9
0
    def _read_foreign_keys_from_workbook(self):
        """
        Reads the foreign keys for the tables from Excel.  
        """

        # "Foreign Keys":  ["Name", "Database", "Schema", "From Table", "Columns", "To Table", "Columns"],
        fk_sheet = self.workbook.sheet_by_name("Foreign Keys")
        indices = self.indices["Foreign Keys"]

        for row_count in range(1, fk_sheet.nrows):
            row = fk_sheet.row_values(rowx=row_count, start_colx=0)

            database_name = row[indices["Database"]]
            database = self.databases.get(database_name, None)
            if database is None:
                eprint(
                    "ERROR:  Database %s from the Foreign Keys tab is not known."
                    % database_name
                )

            else:
                table_name = row[indices["From Table"]]
                table = database.get_table(table_name)
                if table is None:
                    eprint(
                        "ERROR:  Table %s from the Foreign Keys tab is not known."
                        % table_name
                    )

                else:
                    key_name = row[indices["Name"]]
                    if key_name is None:
                        eprint(
                            "ERROR:  Table %s from the Foreign Keys tab is missing a FK name."
                            % table_name
                        )

                    from_keys = row[indices["From Columns"]]
                    from_keys = [x.strip() for x in from_keys.split(",")]
                    to_keys = row[indices["To Columns"]]
                    to_keys = [x.strip() for x in to_keys.split(",")]
                    table.add_foreign_key(
                        name=key_name,
                        from_keys=from_keys,
                        to_table=row[indices["To Table"]],
                        to_keys=to_keys,
                    )
def valid_args(args):
    """
    Checks to see if the arguments make sense.
    :param args: The command line arguments.
    :return: True if valid, False otherwise.
    """

    ret_value = True

    # make sure there is a to_ flag since data has to come from somewhere unless this is just creating blank Excel.
    if not args.ddl1 or not args.ddl2:
        eprint("--ddl1 and --ddl2 must be provided")
        ret_value = False

    else:
        if not os.path.exists(args.ddl1):
            eprint("file %s doesn't exist" % args.ddl1)
            ret_value = False
        if not os.path.exists(args.ddl2):
            eprint("file %s doesn't exist" % args.ddl2)
            ret_value = False

    return ret_value