def get_sql(self, name): sql = self._sql_helper.get(name) if sql is None: raise exceptions.InvalidArgumentsException( f"Unable to look up SQL {name}") else: return sql
def get_table_name(table_name, deployment_stage, storage_engine: str = params.DYNAMO_STORAGE_HANDLER) -> str: if storage_engine == params.DYNAMO_STORAGE_HANDLER or storage_engine is None: return f"{table_name}-{deployment_stage}" elif storage_engine == params.RDBMS_STORAGE_HANDLER: return f"{table_name}_{deployment_stage}" else: raise exceptions.InvalidArgumentsException( f"Unable to generate table name for storage handler {storage_engine}" )
def validate_params(**kwargs): required_args = [ params.CLUSTER_ADDRESS, params.CLUSTER_PORT, params.DB_USERNAME, params.DB_NAME, params.DB_USERNAME_PSTORE_ARN, params.DB_USE_SSL, params.RDBMS_DIALECT, params.CRAWLER_ROLENAME, params.PRIMARY_KEY, params.TABLE_INDEXES, params.STORAGE_HANDLER, params.CONTROL_TYPE_RESOURCE_SCHEMA ] for r in required_args: if r not in kwargs: raise exceptions.InvalidArgumentsException( f"Unable to generate RDBMS Storage Handler without parameter {r}" ) if kwargs.get(params.RDBMS_DIALECT) not in [ engine_types.DIALECT_PG, engine_types.DIALECT_MYSQL ]: raise exceptions.InvalidArgumentsException( f"Invalid Engine Dialect {kwargs.get(params.RDBMS_DIALECT)}")
def __init__(self, dialect: str): if dialect not in [DIALECT_MYSQL, DIALECT_PG]: raise exceptions.InvalidArgumentsException( f"Unknown Dialect {dialect}") else: self._dialect = dialect self._logger = utils.setup_logging() # load the sql statement helper with open( os.path.join(os.path.dirname(__file__), f'sql_fragments_{self._dialect}.json'), 'r') as f: self._sql_helper = json.load(f)
def item_master_update(self, caller_identity: str, **kwargs): if self._pk_name not in kwargs or params.ITEM_MASTER_ID not in kwargs: raise exceptions.InvalidArgumentsException( f"Request must include {self._pk_name} and {params.ITEM_MASTER_ID}" ) else: # check that the item master exists item_master_id = kwargs.get(params.ITEM_MASTER_ID) target_id = "null" if item_master_id is not None: self.check(id=item_master_id) target_id = f"'{kwargs.get(params.ITEM_MASTER_ID)}'" pk = kwargs.get(self._pk_name) if pk is not None and ',' in pk: pk_vals = [f"'{x}'" for x in pk.split(',')] pk_clause = f"{self._pk_name} in ({','.join(pk_vals)})" else: pk_clause = f"{self._pk_name} = '{pk}'" update_attribute_clauses = [ f"{self._engine_type.get_who(params.ITEM_MASTER_ID)} = {target_id}" ] update_attribute_clauses.extend( self._engine_type.who_column_update( caller_identity=caller_identity, version_increment=True)) update_item_master = f"update {self._resource_table_name} set {','.join(update_attribute_clauses)} where {pk_clause}" counts, rows = self._engine_type.run_commands( conn=self._db_conn, commands=[update_item_master]) return { "RecordCount": counts[0], params.DATA_MODIFIED: True if counts[0] > 0 else False }
def find(self, **kwargs): query_table = None filters = None column_list = None # determine if we are performing a resource or metadata search if params.RESOURCE in kwargs and len(params.RESOURCE) > 0: query_table = self._resource_table_name filters = kwargs.get(params.RESOURCE) source_schema_properties = self._resource_schema.get("properties") elif params.METADATA in kwargs and len(params.METADATA) > 0: query_table = self._metadata_table_name filters = kwargs.get(params.METADATA) source_schema_properties = self._metadata_schema.get("properties") else: raise exceptions.InvalidArgumentsException( "Malformed Find Request") column_list = list(source_schema_properties.keys()) # transform column filters column_filters = self._json_to_column_list(input=filters, version_increment=False, add_who=False) # generate select statement query = f"select * from {query_table} where {','.join(column_filters)}" self._logger.debug(query) # return resultset count, rows = self._engine_type.run_commands(conn=self._db_conn, commands=[query]) return utils.pivot_resultset_into_json( rows=rows, column_spec=column_list, type_map=source_schema_properties)
def verify_crawler(table_name, crawler_rolename, catalog_db, datasource_type: str = params.DEFAULT_STORAGE_HANDLER, deployed_account: str = None, logger: Logger = None, crawler_prefix: str = None, **kwargs): glue_client = _get_glue_client() crawler_name = table_name if crawler_prefix is None else f"{crawler_prefix}-{table_name}" crawler_description = f'Crawler for AWS Data API Table {table_name}' try: glue_client.get_crawler(Name=table_name) except glue_client.exceptions.EntityNotFoundException: if datasource_type == params.DYNAMO_STORAGE_HANDLER: glue_client.create_crawler( Name=table_name, Role=crawler_rolename, DatabaseName=catalog_db, Description=crawler_description, Targets={'DynamoDBTargets': [ { 'Path': table_name }, ]}, # run every hour on the hour Schedule='cron(0 * * * ? *)', SchemaChangePolicy={ 'UpdateBehavior': 'UPDATE_IN_DATABASE', }) elif datasource_type == params.RDBMS_STORAGE_HANDLER: database_name = kwargs.get(params.CLUSTER_ADDRESS).split('.')[0] if deployed_account is None: raise exceptions.InvalidArgumentsException( "Cannot create RDS Crawler without Deployment Account Information" ) connection_name = f"{params.AWS_DATA_API_SHORTNAME}.{database_name}" _pwd = get_encrypted_parameter(parameter_name=kwargs.get( params.DB_USERNAME_PSTORE_ARN), region=get_region()) try: # create a connection conn_args = { 'Name': connection_name, 'Description': f"{params.AWS_DATA_API_NAME} - {database_name}", 'ConnectionType': 'JDBC', 'ConnectionProperties': { 'JDBC_CONNECTION_URL': f'jdbc:postgresql://{kwargs.get(params.CLUSTER_ADDRESS)}:{kwargs.get(params.CLUSTER_PORT)}/{kwargs.get(params.DB_NAME)}', 'USERNAME': kwargs.get(params.DB_USERNAME), 'PASSWORD': _pwd } } if params.SUBNETS in kwargs and params.SECURITY_GROUPS in kwargs: conn_args['PhysicalConnectionRequirements'] = { 'SubnetId': kwargs.get(params.SUBNETS)[0], 'SecurityGroupIdList': kwargs.get(params.SECURITY_GROUPS) } glue_client.create_connection(CatalogId=deployed_account, ConnectionInput=conn_args) logger.info(f"Created new Connection {connection_name}") crypt_settings = glue_client.get_data_catalog_encryption_settings( CatalogId=deployed_account) if crypt_settings is None or crypt_settings.get( "DataCatalogEncryptionSettings").get( "ConnectionPasswordEncryption").get( "ReturnConnectionPasswordEncrypted") is False: logger.warning( "Data Catalog is not encrypted. Passwords will be visible in cleartext. It is HIGHLY recommended that you enable Connection Password Encryption" ) except glue_client.exceptions.AlreadyExistsException: pass # create a crawler try: crawler_args = { "Name": crawler_name, "Role": crawler_rolename, "DatabaseName": catalog_db, "Description": crawler_description, "Targets": { 'JdbcTargets': [{ 'ConnectionName': connection_name, 'Path': f"{database_name}/public/{table_name}" }] }, # run every hour on the hour "Schedule": 'cron(0 * * * ? *)', "SchemaChangePolicy": { 'UpdateBehavior': 'UPDATE_IN_DATABASE', } } glue_client.create_crawler(**crawler_args) logger.info(f"Created new Glue Crawler {crawler_name}") except glue_client.exceptions.AlreadyExistsException: pass
def __init__(self, table_name, primary_key_attribute, region, delete_mode, allow_runtime_delete_mode_change, table_indexes, metadata_indexes, schema_validation_refresh_hitcount, crawler_rolename, catalog_database, allow_non_itemmaster_writes, strict_occv, deployed_account, pitr_enabled=None, kms_key_arn=None, logger=None, **kwargs): # setup class logger if logger is None: self._logger = utils.setup_logging() else: self._logger = logger self._logger.debug( "Creating new RDBMS Storage Handler with Properties:") self._logger.debug(kwargs) global log log = self._logger validate_params(**kwargs) # validate engine type self._engine_type = RdbmsEngineType(kwargs.get(params.RDBMS_DIALECT)) # setup foundation properties self._region = region self._resource_table_name = table_name.lower() self._logger.debug(f"Resource Table {self._resource_table_name}") # allow override of the metadata table name if params.OVERRIDE_METADATA_TABLENAME in kwargs: self._metadata_table_name = kwargs.get( params.OVERRIDE_METADATA_TABLENAME) else: self._metadata_table_name = f"{self._resource_table_name}_{params.METADATA}".lower( ) self._pk_name = primary_key_attribute self._logger.debug(f"Primary Key {self._pk_name}") self._deployed_account = deployed_account self._crawler_rolename = crawler_rolename self._catalog_database = catalog_database self._delete_mode = delete_mode # resolve connection details self._cluster_address = kwargs.get(params.CLUSTER_ADDRESS) self._cluster_port = kwargs.get(params.CLUSTER_PORT) self._cluster_user = kwargs.get(params.DB_USERNAME) self._cluster_db = kwargs.get(params.DB_NAME) self._cluster_pstore = kwargs.get(params.DB_USERNAME_PSTORE_ARN) self._ssl = kwargs.get(params.DB_USE_SSL) # pick up schemas to push table structure self._resource_schema = kwargs.get(params.CONTROL_TYPE_RESOURCE_SCHEMA) self._metadata_schema = kwargs.get(params.CONTROL_TYPE_METADATA_SCHEMA) # create schema validators if self._resource_schema is not None: self._resource_validator = fastjsonschema.compile( self._resource_schema) else: raise exceptions.InvalidArgumentsException( "Relational Storage Handler requires a JSON Schema to initialise" ) if self._metadata_schema is not None: self._metadata_validator = fastjsonschema.compile( self._metadata_schema) if self._cluster_pstore is None: raise exceptions.InvalidArgumentsException( "Unable to connect to Target Cluster Database without SSM Parameter Store Password ARN" ) # extract the password from ssm _pwd = utils.get_encrypted_parameter( parameter_name=self._cluster_pstore, region=self._region) # connect to the database self._db_conn = self._engine_type.get_connection( cluster_user=self._cluster_user, cluster_address=self._cluster_address, cluster_port=self._cluster_port, database=self._cluster_db, pwd=_pwd, ssl=self._ssl) self._logger.info( f"Connected to {self._cluster_address}:{self._cluster_port} as {self._cluster_user}" ) # verify the resource table, indexes, and catalog registry exists self._engine_type.verify_table(conn=self._db_conn, table_ref=self._resource_table_name, table_schema=self._resource_schema, pk_name=self._pk_name) self._engine_type.verify_indexes(self._db_conn, self._resource_table_name, table_indexes) self._verify_catalog(self._resource_table_name, **kwargs) # verify the metadata table, indexes, and catalog registry exists if self._metadata_validator is not None: self._logger.debug(f"Metadata Table {self._metadata_table_name}") self._engine_type.verify_table(conn=self._db_conn, table_ref=self._metadata_table_name, table_schema=self._metadata_schema, pk_name=self._pk_name) self._engine_type.verify_indexes(self._db_conn, self._metadata_table_name, metadata_indexes)