コード例 #1
0
 def get_sql(self, name):
     sql = self._sql_helper.get(name)
     if sql is None:
         raise exceptions.InvalidArgumentsException(
             f"Unable to look up SQL {name}")
     else:
         return sql
コード例 #2
0
ファイル: utils.py プロジェクト: IanMeyers/aws-data-api
def get_table_name(table_name,
                   deployment_stage,
                   storage_engine: str = params.DYNAMO_STORAGE_HANDLER) -> str:
    if storage_engine == params.DYNAMO_STORAGE_HANDLER or storage_engine is None:
        return f"{table_name}-{deployment_stage}"
    elif storage_engine == params.RDBMS_STORAGE_HANDLER:
        return f"{table_name}_{deployment_stage}"
    else:
        raise exceptions.InvalidArgumentsException(
            f"Unable to generate table name for storage handler {storage_engine}"
        )
コード例 #3
0
def validate_params(**kwargs):
    required_args = [
        params.CLUSTER_ADDRESS, params.CLUSTER_PORT, params.DB_USERNAME,
        params.DB_NAME, params.DB_USERNAME_PSTORE_ARN, params.DB_USE_SSL,
        params.RDBMS_DIALECT, params.CRAWLER_ROLENAME, params.PRIMARY_KEY,
        params.TABLE_INDEXES, params.STORAGE_HANDLER,
        params.CONTROL_TYPE_RESOURCE_SCHEMA
    ]

    for r in required_args:
        if r not in kwargs:
            raise exceptions.InvalidArgumentsException(
                f"Unable to generate RDBMS Storage Handler without parameter {r}"
            )

    if kwargs.get(params.RDBMS_DIALECT) not in [
            engine_types.DIALECT_PG, engine_types.DIALECT_MYSQL
    ]:
        raise exceptions.InvalidArgumentsException(
            f"Invalid Engine Dialect {kwargs.get(params.RDBMS_DIALECT)}")
コード例 #4
0
    def __init__(self, dialect: str):
        if dialect not in [DIALECT_MYSQL, DIALECT_PG]:
            raise exceptions.InvalidArgumentsException(
                f"Unknown Dialect {dialect}")
        else:
            self._dialect = dialect

        self._logger = utils.setup_logging()

        # load the sql statement helper
        with open(
                os.path.join(os.path.dirname(__file__),
                             f'sql_fragments_{self._dialect}.json'), 'r') as f:
            self._sql_helper = json.load(f)
コード例 #5
0
    def item_master_update(self, caller_identity: str, **kwargs):
        if self._pk_name not in kwargs or params.ITEM_MASTER_ID not in kwargs:
            raise exceptions.InvalidArgumentsException(
                f"Request must include {self._pk_name} and {params.ITEM_MASTER_ID}"
            )
        else:
            # check that the item master exists
            item_master_id = kwargs.get(params.ITEM_MASTER_ID)
            target_id = "null"
            if item_master_id is not None:
                self.check(id=item_master_id)
                target_id = f"'{kwargs.get(params.ITEM_MASTER_ID)}'"

            pk = kwargs.get(self._pk_name)
            if pk is not None and ',' in pk:
                pk_vals = [f"'{x}'" for x in pk.split(',')]
                pk_clause = f"{self._pk_name} in ({','.join(pk_vals)})"
            else:
                pk_clause = f"{self._pk_name} = '{pk}'"

            update_attribute_clauses = [
                f"{self._engine_type.get_who(params.ITEM_MASTER_ID)} = {target_id}"
            ]

            update_attribute_clauses.extend(
                self._engine_type.who_column_update(
                    caller_identity=caller_identity, version_increment=True))

            update_item_master = f"update {self._resource_table_name} set {','.join(update_attribute_clauses)} where {pk_clause}"

            counts, rows = self._engine_type.run_commands(
                conn=self._db_conn, commands=[update_item_master])

            return {
                "RecordCount": counts[0],
                params.DATA_MODIFIED: True if counts[0] > 0 else False
            }
コード例 #6
0
    def find(self, **kwargs):
        query_table = None
        filters = None
        column_list = None

        # determine if we are performing a resource or metadata search
        if params.RESOURCE in kwargs and len(params.RESOURCE) > 0:
            query_table = self._resource_table_name
            filters = kwargs.get(params.RESOURCE)
            source_schema_properties = self._resource_schema.get("properties")
        elif params.METADATA in kwargs and len(params.METADATA) > 0:
            query_table = self._metadata_table_name
            filters = kwargs.get(params.METADATA)
            source_schema_properties = self._metadata_schema.get("properties")
        else:
            raise exceptions.InvalidArgumentsException(
                "Malformed Find Request")

        column_list = list(source_schema_properties.keys())

        # transform column filters
        column_filters = self._json_to_column_list(input=filters,
                                                   version_increment=False,
                                                   add_who=False)

        # generate select statement
        query = f"select * from {query_table} where {','.join(column_filters)}"
        self._logger.debug(query)

        # return resultset
        count, rows = self._engine_type.run_commands(conn=self._db_conn,
                                                     commands=[query])
        return utils.pivot_resultset_into_json(
            rows=rows,
            column_spec=column_list,
            type_map=source_schema_properties)
コード例 #7
0
ファイル: utils.py プロジェクト: IanMeyers/aws-data-api
def verify_crawler(table_name,
                   crawler_rolename,
                   catalog_db,
                   datasource_type: str = params.DEFAULT_STORAGE_HANDLER,
                   deployed_account: str = None,
                   logger: Logger = None,
                   crawler_prefix: str = None,
                   **kwargs):
    glue_client = _get_glue_client()
    crawler_name = table_name if crawler_prefix is None else f"{crawler_prefix}-{table_name}"
    crawler_description = f'Crawler for AWS Data API Table {table_name}'
    try:
        glue_client.get_crawler(Name=table_name)
    except glue_client.exceptions.EntityNotFoundException:
        if datasource_type == params.DYNAMO_STORAGE_HANDLER:
            glue_client.create_crawler(
                Name=table_name,
                Role=crawler_rolename,
                DatabaseName=catalog_db,
                Description=crawler_description,
                Targets={'DynamoDBTargets': [
                    {
                        'Path': table_name
                    },
                ]},
                # run every hour on the hour
                Schedule='cron(0 * * * ? *)',
                SchemaChangePolicy={
                    'UpdateBehavior': 'UPDATE_IN_DATABASE',
                })
        elif datasource_type == params.RDBMS_STORAGE_HANDLER:
            database_name = kwargs.get(params.CLUSTER_ADDRESS).split('.')[0]

            if deployed_account is None:
                raise exceptions.InvalidArgumentsException(
                    "Cannot create RDS Crawler without Deployment Account Information"
                )

            connection_name = f"{params.AWS_DATA_API_SHORTNAME}.{database_name}"
            _pwd = get_encrypted_parameter(parameter_name=kwargs.get(
                params.DB_USERNAME_PSTORE_ARN),
                                           region=get_region())

            try:
                # create a connection
                conn_args = {
                    'Name': connection_name,
                    'Description':
                    f"{params.AWS_DATA_API_NAME} - {database_name}",
                    'ConnectionType': 'JDBC',
                    'ConnectionProperties': {
                        'JDBC_CONNECTION_URL':
                        f'jdbc:postgresql://{kwargs.get(params.CLUSTER_ADDRESS)}:{kwargs.get(params.CLUSTER_PORT)}/{kwargs.get(params.DB_NAME)}',
                        'USERNAME': kwargs.get(params.DB_USERNAME),
                        'PASSWORD': _pwd
                    }
                }

                if params.SUBNETS in kwargs and params.SECURITY_GROUPS in kwargs:
                    conn_args['PhysicalConnectionRequirements'] = {
                        'SubnetId': kwargs.get(params.SUBNETS)[0],
                        'SecurityGroupIdList':
                        kwargs.get(params.SECURITY_GROUPS)
                    }

                glue_client.create_connection(CatalogId=deployed_account,
                                              ConnectionInput=conn_args)
                logger.info(f"Created new Connection {connection_name}")

                crypt_settings = glue_client.get_data_catalog_encryption_settings(
                    CatalogId=deployed_account)

                if crypt_settings is None or crypt_settings.get(
                        "DataCatalogEncryptionSettings").get(
                            "ConnectionPasswordEncryption").get(
                                "ReturnConnectionPasswordEncrypted") is False:
                    logger.warning(
                        "Data Catalog is not encrypted. Passwords will be visible in cleartext. It is HIGHLY recommended that you enable Connection Password Encryption"
                    )
            except glue_client.exceptions.AlreadyExistsException:
                pass

            # create a crawler
            try:
                crawler_args = {
                    "Name": crawler_name,
                    "Role": crawler_rolename,
                    "DatabaseName": catalog_db,
                    "Description": crawler_description,
                    "Targets": {
                        'JdbcTargets': [{
                            'ConnectionName':
                            connection_name,
                            'Path':
                            f"{database_name}/public/{table_name}"
                        }]
                    },
                    # run every hour on the hour
                    "Schedule": 'cron(0 * * * ? *)',
                    "SchemaChangePolicy": {
                        'UpdateBehavior': 'UPDATE_IN_DATABASE',
                    }
                }
                glue_client.create_crawler(**crawler_args)
                logger.info(f"Created new Glue Crawler {crawler_name}")
            except glue_client.exceptions.AlreadyExistsException:
                pass
コード例 #8
0
    def __init__(self,
                 table_name,
                 primary_key_attribute,
                 region,
                 delete_mode,
                 allow_runtime_delete_mode_change,
                 table_indexes,
                 metadata_indexes,
                 schema_validation_refresh_hitcount,
                 crawler_rolename,
                 catalog_database,
                 allow_non_itemmaster_writes,
                 strict_occv,
                 deployed_account,
                 pitr_enabled=None,
                 kms_key_arn=None,
                 logger=None,
                 **kwargs):
        # setup class logger
        if logger is None:
            self._logger = utils.setup_logging()
        else:
            self._logger = logger

        self._logger.debug(
            "Creating new RDBMS Storage Handler with Properties:")
        self._logger.debug(kwargs)

        global log
        log = self._logger

        validate_params(**kwargs)

        # validate engine type
        self._engine_type = RdbmsEngineType(kwargs.get(params.RDBMS_DIALECT))

        # setup foundation properties
        self._region = region
        self._resource_table_name = table_name.lower()
        self._logger.debug(f"Resource Table {self._resource_table_name}")

        # allow override of the metadata table name
        if params.OVERRIDE_METADATA_TABLENAME in kwargs:
            self._metadata_table_name = kwargs.get(
                params.OVERRIDE_METADATA_TABLENAME)
        else:
            self._metadata_table_name = f"{self._resource_table_name}_{params.METADATA}".lower(
            )

        self._pk_name = primary_key_attribute
        self._logger.debug(f"Primary Key {self._pk_name}")
        self._deployed_account = deployed_account
        self._crawler_rolename = crawler_rolename
        self._catalog_database = catalog_database
        self._delete_mode = delete_mode

        # resolve connection details
        self._cluster_address = kwargs.get(params.CLUSTER_ADDRESS)
        self._cluster_port = kwargs.get(params.CLUSTER_PORT)
        self._cluster_user = kwargs.get(params.DB_USERNAME)
        self._cluster_db = kwargs.get(params.DB_NAME)
        self._cluster_pstore = kwargs.get(params.DB_USERNAME_PSTORE_ARN)
        self._ssl = kwargs.get(params.DB_USE_SSL)

        # pick up schemas to push table structure
        self._resource_schema = kwargs.get(params.CONTROL_TYPE_RESOURCE_SCHEMA)
        self._metadata_schema = kwargs.get(params.CONTROL_TYPE_METADATA_SCHEMA)

        # create schema validators
        if self._resource_schema is not None:
            self._resource_validator = fastjsonschema.compile(
                self._resource_schema)
        else:
            raise exceptions.InvalidArgumentsException(
                "Relational Storage Handler requires a JSON Schema to initialise"
            )

        if self._metadata_schema is not None:
            self._metadata_validator = fastjsonschema.compile(
                self._metadata_schema)

        if self._cluster_pstore is None:
            raise exceptions.InvalidArgumentsException(
                "Unable to connect to Target Cluster Database without SSM Parameter Store Password ARN"
            )

        # extract the password from ssm
        _pwd = utils.get_encrypted_parameter(
            parameter_name=self._cluster_pstore, region=self._region)

        # connect to the database
        self._db_conn = self._engine_type.get_connection(
            cluster_user=self._cluster_user,
            cluster_address=self._cluster_address,
            cluster_port=self._cluster_port,
            database=self._cluster_db,
            pwd=_pwd,
            ssl=self._ssl)

        self._logger.info(
            f"Connected to {self._cluster_address}:{self._cluster_port} as {self._cluster_user}"
        )

        # verify the resource table, indexes, and catalog registry exists
        self._engine_type.verify_table(conn=self._db_conn,
                                       table_ref=self._resource_table_name,
                                       table_schema=self._resource_schema,
                                       pk_name=self._pk_name)
        self._engine_type.verify_indexes(self._db_conn,
                                         self._resource_table_name,
                                         table_indexes)
        self._verify_catalog(self._resource_table_name, **kwargs)

        # verify the metadata table, indexes, and catalog registry exists
        if self._metadata_validator is not None:
            self._logger.debug(f"Metadata Table {self._metadata_table_name}")
            self._engine_type.verify_table(conn=self._db_conn,
                                           table_ref=self._metadata_table_name,
                                           table_schema=self._metadata_schema,
                                           pk_name=self._pk_name)
            self._engine_type.verify_indexes(self._db_conn,
                                             self._metadata_table_name,
                                             metadata_indexes)