Esempio n. 1
0
    def execute(self, context) -> None:
        redshift_hook = RedshiftSQLHook(redshift_conn_id=self.redshift_conn_id)
        conn = S3Hook.get_connection(conn_id=self.aws_conn_id)

        credentials_block = None
        if conn.extra_dejson.get('role_arn', False):
            credentials_block = f"aws_iam_role={conn.extra_dejson['role_arn']}"
        else:
            s3_hook = S3Hook(aws_conn_id=self.aws_conn_id, verify=self.verify)
            credentials = s3_hook.get_credentials()
            credentials_block = build_credentials_block(credentials)

        copy_options = '\n\t\t\t'.join(self.copy_options)
        destination = f'{self.schema}.{self.table}'
        copy_destination = f'#{self.table}' if self.method == 'UPSERT' else destination

        copy_statement = self._build_copy_query(copy_destination,
                                                credentials_block,
                                                copy_options)

        sql: Union[list, str]

        if self.method == 'REPLACE':
            sql = [
                "BEGIN;", f"DELETE FROM {destination};", copy_statement,
                "COMMIT"
            ]
        elif self.method == 'UPSERT':
            keys = self.upsert_keys or redshift_hook.get_table_primary_key(
                self.table, self.schema)
            if not keys:
                raise AirflowException(
                    f"No primary key on {self.schema}.{self.table}. Please provide keys on 'upsert_keys'"
                )
            where_statement = ' AND '.join(
                [f'{self.table}.{k} = {copy_destination}.{k}' for k in keys])

            sql = [
                f"CREATE TABLE {copy_destination} (LIKE {destination});",
                copy_statement,
                "BEGIN;",
                f"DELETE FROM {destination} USING {copy_destination} WHERE {where_statement};",
                f"INSERT INTO {destination} SELECT * FROM {copy_destination};",
                "COMMIT",
            ]

        else:
            sql = copy_statement

        self.log.info('Executing COPY command...')
        redshift_hook.run(sql, autocommit=self.autocommit)
        self.log.info("COPY command complete...")
Esempio n. 2
0
    def execute(self, context: 'Context') -> None:
        redshift_hook = RedshiftSQLHook(redshift_conn_id=self.redshift_conn_id)
        conn = S3Hook.get_connection(conn_id=self.aws_conn_id)
        if conn.extra_dejson.get('role_arn', False):
            credentials_block = f"aws_iam_role={conn.extra_dejson['role_arn']}"
        else:
            s3_hook = S3Hook(aws_conn_id=self.aws_conn_id, verify=self.verify)
            credentials = s3_hook.get_credentials()
            credentials_block = build_credentials_block(credentials)

        unload_options = '\n\t\t\t'.join(self.unload_options)

        unload_query = self._build_unload_query(credentials_block,
                                                self.select_query, self.s3_key,
                                                unload_options)

        self.log.info('Executing UNLOAD command...')
        redshift_hook.run(unload_query,
                          self.autocommit,
                          parameters=self.parameters)
        self.log.info("UNLOAD command complete...")