Example #1
0
    def migrate_oua_sftp_data(self):
        s3_protected_bucket = app.config['LOCH_S3_PROTECTED_BUCKET']
        oua_slate_sftp_path = app.config[
            'LOCH_S3_SLATE_DATA_SFTP_PATH'] + '/' + self.get_sftp_date_offset(
            ) + '/'
        oua_daily_dest_path = get_s3_oua_daily_path() + '/admissions/'

        # Gets list of keys under SFTP prefix and looks for csv files to migrate to OUA daily location
        keys = s3.get_keys_with_prefix(oua_slate_sftp_path,
                                       full_objects=False,
                                       bucket=s3_protected_bucket)

        if len(keys) > 0:
            for source_key in keys:
                if source_key.endswith('.csv'):
                    destination_key = source_key.replace(
                        oua_slate_sftp_path, oua_daily_dest_path)
                    if not s3.copy(s3_protected_bucket, source_key,
                                   s3_protected_bucket, destination_key):
                        raise BackgroundJobError(
                            f'Copy from SFTP location {source_key} to daily OUA destination {destination_key} failed.'
                        )
        else:
            raise BackgroundJobError(
                'No OUA files found in SFTP location today. Skipping OUA data refresh'
            )
Example #2
0
    def migrate_transient_to_destination(self,
                                         keys,
                                         destination_bucket,
                                         destination_path,
                                         unload_to_etl=False):
        destination_url = 's3://' + destination_bucket + '/' + destination_path
        destination_schema = app.config['REDSHIFT_SCHEMA_LRS']

        for transient_key in keys:
            destination_key = transient_key.replace(self.transient_path,
                                                    destination_path)
            if not s3.copy(self.transient_bucket, transient_key,
                           destination_bucket, destination_key):
                app.logger.error(
                    f'Copy from transient bucket to destination bucket {destination_bucket} failed.'
                )
                return False
        if not self.verify_migration(destination_url, destination_schema):
            return False
        if unload_to_etl:
            if not self.unload_to_etl(destination_schema, destination_bucket):
                app.logger.error(
                    f'Redshift statements unload from {destination_schema} to {destination_bucket} failed.'
                )
                return False
        redshift.drop_external_schema(destination_schema)
        return True
 def migrate_transient_to_destination(self, keys, destination_bucket, destination_path):
     destination_url = 's3://' + destination_bucket + '/' + destination_path
     for source_key in keys:
         destination_key = source_key.replace(self.source_output_path, destination_path)
         if not s3.copy(self.transient_bucket, source_key, destination_bucket, destination_key):
             raise BackgroundJobError(f'Copy from transient bucket to destination bucket {destination_bucket} failed.')
     self.verify_post_transform_statement_count(destination_url)
    def migrate_transient_to_destination(self, keys, destination_bucket, destination_path):
        destination_url = 's3://' + destination_bucket + '/' + destination_path
        redshift_schema = app.config['REDSHIFT_SCHEMA_LRS']

        for transient_key in keys:
            destination_key = transient_key.replace(self.transient_path, destination_path)
            if not s3.copy(self.transient_bucket, transient_key, destination_bucket, destination_key):
                raise BackgroundJobError(f'Copy from transient bucket to destination bucket {destination_bucket} failed.')
        self.verify_migration(destination_url, redshift_schema)
        redshift.drop_external_schema(redshift_schema)
Example #5
0
    def copy_to_destination(self, source_prefix, dest_prefix):
        bucket = app.config['LOCH_S3_PROTECTED_BUCKET']
        objects = s3.get_keys_with_prefix(source_prefix, bucket=app.config['LOCH_S3_PROTECTED_BUCKET'])
        for o in objects:
            file_name = normalize_sis_note_attachment_file_name(o)
            sid = file_name.split('_')[0]

            dest_key = f'{dest_prefix}/{sid}/{file_name}'
            app.logger.info(dest_key)
            if not s3.copy(bucket, o, bucket, dest_key):
                raise BackgroundJobError(f'Copy from source to destination {dest_key} failed.')

        app.logger.info(f'Copied {len(objects) if objects else 0} attachments to the destination folder.')
    def run(self):
        app.logger.info('Starting OUA Slate schema creation job...')
        app.logger.info('Executing SQL...')

        s3_protected_bucket = app.config['LOCH_S3_PROTECTED_BUCKET']
        oua_slate_sftp_path = app.config[
            'LOCH_S3_SLATE_DATA_SFTP_PATH'] + '/' + self.get_sftp_date_offset(
            ) + '/'
        oua_daily_dest_path = get_s3_oua_daily_path() + '/admissions/'

        # Gets list of keys under SFTP prefix and looks for csv files to migrate to OUA daily location
        keys = s3.get_keys_with_prefix(oua_slate_sftp_path,
                                       full_objects=False,
                                       bucket=s3_protected_bucket)

        if len(keys) > 0:
            for source_key in keys:
                if source_key.endswith('.csv'):
                    destination_key = source_key.replace(
                        oua_slate_sftp_path, oua_daily_dest_path)
                    if not s3.copy(s3_protected_bucket, source_key,
                                   s3_protected_bucket, destination_key):
                        raise BackgroundJobError(
                            f'Copy from SFTP location {source_key} to daily OUA destination {destination_key} failed.'
                        )
            external_schema = app.config['REDSHIFT_SCHEMA_OUA']
            redshift.drop_external_schema(external_schema)
            resolved_ddl = resolve_sql_template(
                'create_oua_schema_template.sql')
            if redshift.execute_ddl_script(resolved_ddl):
                verify_external_schema(external_schema, resolved_ddl)
                self.create_rds_tables_and_indexes()
                app.logger.info('OUA Slate RDS indexes created.')
                return 'OUA schema creation job completed.'

            else:
                raise BackgroundJobError(
                    'OUA Slate schema creation job failed.')

        else:
            return 'No OUA files found in SFTP location today. Skipping OUA data refresh'