Beispiel #1
0
    def check_entities_on_snowflake_server(self) -> None:
        """
        Make sure that all [warehouses, dbs, schemas, tables, users, roles]
        referenced in the spec are defined in Snowflake.

        Raises a SpecLoadingError with all the errors found while checking
        Snowflake for missinf entities.
        """
        error_messages = []

        conn = SnowflakeConnector()

        warehouses = conn.show_warehouses()
        for warehouse in self.entities["warehouses"]:
            if warehouse.upper() not in warehouses:
                error_messages.append(
                    f"Missing Entity Error: Warehouse {warehouse} was not found on"
                    " Snowflake Server. Please create it before continuing.")

        databases = conn.show_databases()
        for db in self.entities["databases"]:
            if db.upper() not in databases:
                error_messages.append(
                    f"Missing Entity Error: Database {db} was not found on"
                    " Snowflake Server. Please create it before continuing.")

        schemas = conn.show_schemas()
        for schema in self.entities["schema_refs"]:
            if "*" not in schema and schema.upper() not in schemas:
                error_messages.append(
                    f"Missing Entity Error: Schema {schema} was not found on"
                    " Snowflake Server. Please create it before continuing.")

        tables = conn.show_tables()
        views = conn.show_views()
        for table in self.entities["table_refs"]:
            if ("*" not in table and table.upper() not in tables
                    and table.upper() not in views):
                error_messages.append(
                    f"Missing Entity Error: Table/View {table} was not found on"
                    " Snowflake Server. Please create it before continuing.")

        roles = conn.show_roles()
        for role in self.entities["roles"]:
            if role.upper() not in roles:
                error_messages.append(
                    f"Missing Entity Error: Role {role} was not found on"
                    " Snowflake Server. Please create it before continuing.")

        users = conn.show_users()
        for user in self.entities["users"]:
            if user.upper() not in users:
                error_messages.append(
                    f"Missing Entity Error: User {user} was not found on"
                    " Snowflake Server. Please create it before continuing.")

        if error_messages:
            raise SpecLoadingError("\n".join(error_messages))
Beispiel #2
0
    def generate_grant_ownership(self, role: str, config: str) -> List[Dict]:
        """
        Generate the GRANT OWNERSHIP statements for DATABASEs, SCHEMAs and TABLEs.

        role: the name of the role (e.g. "LOADER") OWNERSHIP will be GRANTed to
        config: the subtree for the role as specified in the spec

        Returns the SQL commands generated as a List
        """
        sql_commands = []

        try:
            for database in config["owns"]["databases"]:
                if self.check_grant_to_role(role, "OWNERSHIP", "DATABASE",
                                            database):
                    already_granted = True
                else:
                    already_granted = False

                sql_commands.append({
                    "already_granted":
                    already_granted,
                    "sql":
                    GRANT_OWNERSHIP_TEMPLATE.format(
                        resource_type="DATABASE",
                        resource_name=SnowflakeConnector.snowflaky(database),
                        role_name=SnowflakeConnector.snowflaky(role),
                    ),
                })
        except KeyError:
            logging.debug(
                "`owns.databases` not found for role {}, skipping generation of DATABASE OWNERSHIP statements."
                .format(role))

        try:
            for schema in config["owns"]["schemas"]:
                name_parts = schema.split(".")
                info_schema = f"{name_parts[0].upper()}.INFORMATION_SCHEMA"

                schemas = []

                if name_parts[1] == "*":
                    conn = SnowflakeConnector()
                    db_schemas = conn.show_schemas(name_parts[0])

                    for db_schema in db_schemas:
                        if db_schema != info_schema:
                            schemas.append(db_schema)
                else:
                    schemas = [schema]

                for db_schema in schemas:
                    if self.check_grant_to_role(role, "OWNERSHIP", "SCHEMA",
                                                db_schema):
                        already_granted = True
                    else:
                        already_granted = False

                    sql_commands.append({
                        "already_granted":
                        already_granted,
                        "sql":
                        GRANT_OWNERSHIP_TEMPLATE.format(
                            resource_type="SCHEMA",
                            resource_name=SnowflakeConnector.snowflaky(
                                db_schema),
                            role_name=SnowflakeConnector.snowflaky(role),
                        ),
                    })
        except KeyError:
            logging.debug(
                "`owns.schemas` not found for role {}, skipping generation of SCHEMA OWNERSHIP statements."
                .format(role))

        try:
            # Gather the tables that OWNERSHIP will be granted to
            tables = []

            for table in config["owns"]["tables"]:
                name_parts = table.split(".")
                info_schema = f"{name_parts[0].upper()}.INFORMATION_SCHEMA"

                if name_parts[2] == "*":
                    schemas = []
                    conn = SnowflakeConnector()

                    if name_parts[1] == "*":
                        db_schemas = conn.show_schemas(name_parts[0])

                        for schema in db_schemas:
                            if schema != info_schema:
                                schemas.append(schema)
                    else:
                        schemas = [f"{name_parts[0]}.{name_parts[1]}"]

                    for schema in schemas:
                        tables.extend(conn.show_tables(schema=schema))
                else:
                    tables = [table]

            # And then grant OWNERSHIP to all tables
            for db_table in tables:
                if self.check_grant_to_role(role, "OWNERSHIP", "TABLE",
                                            db_table):
                    already_granted = True
                else:
                    already_granted = False

                sql_commands.append({
                    "already_granted":
                    already_granted,
                    "sql":
                    GRANT_OWNERSHIP_TEMPLATE.format(
                        resource_type="TABLE",
                        resource_name=SnowflakeConnector.snowflaky(db_table),
                        role_name=SnowflakeConnector.snowflaky(role),
                    ),
                })
        except KeyError:
            logging.debug(
                "`owns.tables` not found for role {}, skipping generation of TABLE OWNERSHIP statements."
                .format(role))

        return sql_commands
Beispiel #3
0
    def generate_table_and_view_grants(
        self,
        role: str,
        table: str,
        grant_type: str,
        usage_granted: Dict,
        shared_dbs: Set,
    ) -> Tuple[List[str], Dict]:
        """
        Generate the GRANT statements for TABLEs and VIEWs.

        role: the name of the role the privileges are GRANTed to
        table: the name of the TABLE/VIEW (e.g. "RAW.PUBLIC.MY_TABLE")
        grant_type: What type of privileges are granted? One of {"read", "write"}
        usage_granted: Passed by generate_grant_privileges_to_role() to track all
            all the entities a role has been granted access (USAGE) to.
        shared_dbs: a set of all the shared databases defined in the spec.

        Returns the SQL commands generated and the updated usage_granted as a Tuple
        """
        sql_commands = []

        # Split the table identifier into parts {DB_NAME}.{SCHEMA_NAME}.{TABLE_NAME}
        #  so that we can check and use each one
        name_parts = table.split(".")

        # Do nothing if this is a table inside a shared database:
        #  "Granting individual privileges on imported databases is not allowed."
        if name_parts[0] in shared_dbs:
            return (sql_commands, usage_granted)

        if grant_type == "read":
            privileges = "SELECT"
        elif grant_type == "write":
            privileges = "SELECT, INSERT, UPDATE, DELETE, TRUNCATE, REFERENCES"
        else:
            raise SpecLoadingError(
                f"Wrong grant_type {spec_path} provided to generate_table_and_view_grants()"
            )

        # Generate the INFORMATION_SCHEMA identifier for that database
        #  in order to be able to filter it out
        info_schema = f"{name_parts[0].upper()}.INFORMATION_SCHEMA"

        # Before assigning privileges to a Table, check if
        #  usage to the database has been granted and
        # if not grant usage first to the database
        # Schemas will be checked as we generate them
        if name_parts[0].upper() not in usage_granted["databases"]:
            new_commands, usage_granted = self.generate_database_grants(
                role=role,
                database=name_parts[0],
                grant_type="read",
                usage_granted=usage_granted,
                shared_dbs=shared_dbs,
            )
            sql_commands.extend(new_commands)

        # Gather the tables/views that privileges will be granted to
        tables = []
        views = []

        # List of all tables/views in schema
        table_list = []
        view_list = []

        schemas = []

        conn = SnowflakeConnector()

        if name_parts[1] == "*":
            # If {DB_NAME}.*.* was provided as the identifier, we have to fetch
            #  each schema in database DB_NAME, so that we can grant privileges
            #  for all tables in that schema
            # (You can not GRANT to all table with a wild card for the schema name)
            db_schemas = conn.show_schemas(name_parts[0])
            for schema in db_schemas:
                if schema != info_schema:
                    schemas.append(schema)
        elif "*" in name_parts[1]:
            conn = SnowflakeConnector()
            db_schemas = conn.show_schemas(name_parts[0])
            for db_schema in db_schemas:
                schema_name = db_schema.split(".", 1)[1].lower()
                if schema_name.startswith(name_parts[1].split("*", 1)[0]):
                    schemas.append(db_schema)
        else:
            schemas = [f"{name_parts[0]}.{name_parts[1]}"]

        for schema in schemas:
            # first check if the role has been granted usage on the schema
            # either directly or indirectly (by granting to DB.*)
            if (schema.upper() not in usage_granted["schemas"]
                    and f"{name_parts[0].upper()}.*"
                    not in usage_granted["schemas"]):
                new_commands, usage_granted = self.generate_schema_grants(
                    role=role,
                    schema=schema,
                    grant_type="read",
                    usage_granted=usage_granted,
                    shared_dbs=shared_dbs,
                )
                sql_commands.extend(new_commands)

            # And then add the tables for that schema to the tables[] and views[]
            #  that will be granted the permissions
            table_list.extend(conn.show_tables(schema=schema))
            view_list.extend(conn.show_views(schema=schema))

        if name_parts[2] == "*":
            # If *.* then you can grant all, grant future, and exit
            for schema in schemas:
                # Grant on ALL tables
                sql_commands.append({
                    "already_granted":
                    False,
                    "sql":
                    GRANT_ALL_PRIVILEGES_TEMPLATE.format(
                        privileges=privileges,
                        resource_type="TABLE",
                        resource_name=SnowflakeConnector.snowflaky(schema),
                        role=SnowflakeConnector.snowflaky(role),
                    ),
                })

                # Grant on ALL views
                sql_commands.append({
                    "already_granted":
                    False,
                    "sql":
                    GRANT_ALL_PRIVILEGES_TEMPLATE.format(
                        privileges=privileges,
                        resource_type="VIEW",
                        resource_name=SnowflakeConnector.snowflaky(schema),
                        role=SnowflakeConnector.snowflaky(role),
                    ),
                })

                # Grant future on all tables
                sql_commands.append({
                    "already_granted":
                    False,
                    "sql":
                    GRANT_FUTURE_PRIVILEGES_TEMPLATE.format(
                        privileges=privileges,
                        resource_type="TABLE",
                        resource_name=SnowflakeConnector.snowflaky(schema),
                        role=SnowflakeConnector.snowflaky(role),
                    ),
                })
                # Grant future on all views
                sql_commands.append({
                    "already_granted":
                    False,
                    "sql":
                    GRANT_FUTURE_PRIVILEGES_TEMPLATE.format(
                        privileges=privileges,
                        resource_type="VIEW",
                        resource_name=SnowflakeConnector.snowflaky(schema),
                        role=SnowflakeConnector.snowflaky(role),
                    ),
                })

            return (sql_commands, usage_granted)

        else:
            # Only one table/view to be granted permissions to
            if table.upper() in table_list:
                tables = [table]
            if table.upper() in view_list:
                views = [table]

        # And then grant privileges to all tables in that schema
        for db_table in tables:
            already_granted = False
            if (grant_type == "read" and self.check_grant_to_role(
                    role, "SELECT", "TABLE", db_table)) or (
                        grant_type == "write" and self.check_grant_to_role(
                            role, "SELECT", "TABLE", db_table)
                        and self.check_grant_to_role(role, "INSERT", "TABLE",
                                                     db_table)
                        and self.check_grant_to_role(role, "UPDATE", "TABLE",
                                                     db_table)
                        and self.check_grant_to_role(role, "DELETE", "TABLE",
                                                     db_table)
                        and self.check_grant_to_role(role, "TRUNCATE", "TABLE",
                                                     db_table)
                        and self.check_grant_to_role(role, "REFERENCES",
                                                     "TABLE", db_table)):
                already_granted = True

            # And then grant privileges to the db_table
            sql_commands.append({
                "already_granted":
                already_granted,
                "sql":
                GRANT_PRIVILEGES_TEMPLATE.format(
                    privileges=privileges,
                    resource_type="TABLE",
                    resource_name=SnowflakeConnector.snowflaky(db_table),
                    role=SnowflakeConnector.snowflaky(role),
                ),
            })

        for db_view in views:
            already_granted = False
            if (
                    grant_type == "read" and self.check_grant_to_role(
                        role, "SELECT", "VIEW", db_view)
            ) or (grant_type == "write"
                  and self.check_grant_to_role(role, "SELECT", "VIEW", db_view)
                  and self.check_grant_to_role(role, "INSERT", "VIEW", db_view)
                  and self.check_grant_to_role(role, "UPDATE", "VIEW", db_view)
                  and self.check_grant_to_role(role, "DELETE", "VIEW", db_view)
                  and self.check_grant_to_role(role, "TRUNCATE", "VIEW",
                                               db_view)
                  and self.check_grant_to_role(role, "REFERENCES", "VIEW",
                                               db_view)):
                already_granted = True

            # And then grant privileges to the db_view
            sql_commands.append({
                "already_granted":
                already_granted,
                "sql":
                GRANT_PRIVILEGES_TEMPLATE.format(
                    privileges=privileges,
                    resource_type="VIEW",
                    resource_name=SnowflakeConnector.snowflaky(db_view),
                    role=SnowflakeConnector.snowflaky(role),
                ),
            })

        return (sql_commands, usage_granted)
Beispiel #4
0
    def generate_schema_grants(
        self,
        role: str,
        schema: str,
        grant_type: str,
        usage_granted: Dict,
        shared_dbs: Set,
    ) -> Tuple[List[str], Dict]:
        """
        Generate the GRANT statements for SCHEMAs.

        role: the name of the role the privileges are GRANTed to
        schema: the name of the Schema (e.g. "RAW.PUBLIC")
        grant_type: What type of privileges are granted? One of {"read", "write"}
        usage_granted: Passed by generate_grant_privileges_to_role() to track all
            all the entities a role has been granted access (USAGE) to.
        shared_dbs: a set of all the shared databases defined in the spec.

        Returns the SQL commands generated and the updated usage_granted as a Tuple
        """
        sql_commands = []

        # Split the schema identifier into parts {DB_NAME}.{SCHEMA_NAME}
        #  so that we can check and use each one
        name_parts = schema.split(".")

        # Do nothing if this is a schema inside a shared database:
        #  "Granting individual privileges on imported databases is not allowed."
        if name_parts[0] in shared_dbs:
            return (sql_commands, usage_granted)

        if grant_type == "read":
            privileges = "USAGE"
        elif grant_type == "write":
            privileges = ("USAGE, MONITOR, CREATE TABLE,"
                          " CREATE VIEW, CREATE STAGE, CREATE FILE FORMAT,"
                          " CREATE SEQUENCE, CREATE FUNCTION, CREATE PIPE")
        else:
            raise SpecLoadingError(
                f"Wrong grant_type {spec_path} provided to generate_schema_grants()"
            )

        # Before assigning privileges to a schema, check if
        #  usage to the database has been granted and
        # if not grant usage first to the database
        if name_parts[0].upper() not in usage_granted["databases"]:
            new_commands, usage_granted = self.generate_database_grants(
                role=role,
                database=name_parts[0],
                grant_type="read",
                usage_granted=usage_granted,
                shared_dbs=shared_dbs,
            )
            sql_commands.extend(new_commands)

        # Generate the INFORMATION_SCHEMA identifier for that database
        #  in order to be able to filter it out
        info_schema = f"{name_parts[0].upper()}.INFORMATION_SCHEMA"

        schemas = []

        if name_parts[1] == "*":
            # If {DB_NAME}.* was provided as the schema identifier, we have to fetch
            #  each schema in database DB_NAME, so that we can grant privileges
            #  for each schema seperatelly.
            # We could GRANT {privileges} TO ALL SCHEMAS IN DATABASE
            #  but that would not allow us to know if a specific privilege has
            #  been already granted or not
            conn = SnowflakeConnector()
            db_schemas = conn.show_schemas(name_parts[0])
            for db_schema in db_schemas:
                if db_schema != info_schema:
                    schemas.append(db_schema)
        elif "*" in name_parts[1]:
            conn = SnowflakeConnector()
            db_schemas = conn.show_schemas(name_parts[0])
            for db_schema in db_schemas:
                schema_name = db_schema.split(".", 1)[1].lower()
                if schema_name.startswith(name_parts[1].split("*", 1)[0]):
                    schemas.append(db_schema)
        else:
            schemas = [schema]

        for db_schema in schemas:
            already_granted = False

            if (grant_type == "read" and self.check_grant_to_role(
                    role, "USAGE", "SCHEMA", db_schema)) or (
                        grant_type == "write" and self.check_grant_to_role(
                            role, "USAGE", "SCHEMA", db_schema)
                        and self.check_grant_to_role(role, "MONITOR", "SCHEMA",
                                                     db_schema)
                        and self.check_grant_to_role(role, "CREATE TABLE",
                                                     "SCHEMA", db_schema)
                        and self.check_grant_to_role(role, "CREATE VIEW",
                                                     "SCHEMA", db_schema)
                        and self.check_grant_to_role(role, "CREATE STAGE",
                                                     "SCHEMA", db_schema)
                        and self.check_grant_to_role(
                            role, "CREATE FILE FORMAT", "SCHEMA", db_schema)
                        and self.check_grant_to_role(role, "CREATE SEQUENCE",
                                                     "SCHEMA", db_schema)
                        and self.check_grant_to_role(role, "CREATE FUNCTION",
                                                     "SCHEMA", db_schema)
                        and self.check_grant_to_role(role, "CREATE PIPE",
                                                     "SCHEMA", db_schema)):
                already_granted = True

            sql_commands.append({
                "already_granted":
                already_granted,
                "sql":
                GRANT_PRIVILEGES_TEMPLATE.format(
                    privileges=privileges,
                    resource_type="SCHEMA",
                    resource_name=SnowflakeConnector.snowflaky(db_schema),
                    role=SnowflakeConnector.snowflaky(role),
                ),
            })

        usage_granted["schemas"].add(schema.upper())

        return (sql_commands, usage_granted)
Beispiel #5
0
    def generate_table_and_view_revokes(
        self, role: str, table: str, shared_dbs: Set
    ) -> List[str]:
        """
        Generate the REVOKE statements for TABLEs and VIEWs.

        role: the name of the role the privileges are REVOKEd from
        table: the name of the TABLE/VIEW (e.g. "RAW.PUBLIC.MY_TABLE")
        shared_dbs: a set of all the shared databases defined in the spec.

        Returns the SQL commands generated
        """
        sql_commands = []

        # Split the table identifier into parts {DB_NAME}.{SCHEMA_NAME}.{TABLE_NAME}
        #  so that we can check and use each one
        name_parts = table.split(".")

        # Do nothing if this is a table inside a shared database:
        #  "Granting individual privileges on imported databases is not allowed."
        if name_parts[0] in shared_dbs:
            return sql_commands

        # Generate the INFORMATION_SCHEMA identifier for that database
        #  in order to be able to filter it out
        info_schema = f"{name_parts[0].upper()}.INFORMATION_SCHEMA"

        schemas = []

        conn = SnowflakeConnector()

        if name_parts[1] == "*":
            # If {DB_NAME}.*.* was provided as the identifier, we have to fetch
            #  each schema in database DB_NAME, so that we can revoke privileges
            #  for all tables in that schema
            # (You can not GRANT to all table with a wild card for the schema name)
            db_schemas = conn.show_schemas(name_parts[0])
            for schema in db_schemas:
                if schema != info_schema:
                    schemas.append(schema)
        elif "*" in name_parts[1]:
            # Similar to above, if {DB_NAME}.partial_*.* was provided,
            #  we need to find each of the schemas that match the pattern.
            conn = SnowflakeConnector()
            db_schemas = conn.show_schemas(name_parts[0])
            for db_schema in db_schemas:
                schema_name = db_schema.split(".", 1)[1].lower()
                if schema_name.startswith(name_parts[1].split("*", 1)[0]):
                    schemas.append(db_schema)
        else:
            schemas = [f"{name_parts[0]}.{name_parts[1]}"]

        for schema in schemas:
            # Revoke on ALL tables
            sql_commands.append(
                {
                    "already_granted": False,
                    "sql": REVOKE_ALL_PRIVILEGES_OBJECT_TEMPLATE.format(
                        privileges="ALL",
                        resource_type="TABLE",
                        parent_resource_type="SCHEMA",
                        parent_resource_name=SnowflakeConnector.snowflaky(schema),
                        role=SnowflakeConnector.snowflaky(role),
                    ),
                }
            )

            # Revoke on ALL views
            sql_commands.append(
                {
                    "already_granted": False,
                    "sql": REVOKE_ALL_PRIVILEGES_OBJECT_TEMPLATE.format(
                        privileges="ALL",
                        resource_type="VIEW",
                        parent_resource_type="SCHEMA",
                        parent_resource_name=SnowflakeConnector.snowflaky(schema),
                        role=SnowflakeConnector.snowflaky(role),
                    ),
                }
            )

        return sql_commands