def migrate_oua_sftp_data(self): s3_protected_bucket = app.config['LOCH_S3_PROTECTED_BUCKET'] oua_slate_sftp_path = app.config[ 'LOCH_S3_SLATE_DATA_SFTP_PATH'] + '/' + self.get_sftp_date_offset( ) + '/' oua_daily_dest_path = get_s3_oua_daily_path() + '/admissions/' # Gets list of keys under SFTP prefix and looks for csv files to migrate to OUA daily location keys = s3.get_keys_with_prefix(oua_slate_sftp_path, full_objects=False, bucket=s3_protected_bucket) if len(keys) > 0: for source_key in keys: if source_key.endswith('.csv'): destination_key = source_key.replace( oua_slate_sftp_path, oua_daily_dest_path) if not s3.copy(s3_protected_bucket, source_key, s3_protected_bucket, destination_key): raise BackgroundJobError( f'Copy from SFTP location {source_key} to daily OUA destination {destination_key} failed.' ) else: raise BackgroundJobError( 'No OUA files found in SFTP location today. Skipping OUA data refresh' )
def migrate_transient_to_destination(self, keys, destination_bucket, destination_path, unload_to_etl=False): destination_url = 's3://' + destination_bucket + '/' + destination_path destination_schema = app.config['REDSHIFT_SCHEMA_LRS'] for transient_key in keys: destination_key = transient_key.replace(self.transient_path, destination_path) if not s3.copy(self.transient_bucket, transient_key, destination_bucket, destination_key): app.logger.error( f'Copy from transient bucket to destination bucket {destination_bucket} failed.' ) return False if not self.verify_migration(destination_url, destination_schema): return False if unload_to_etl: if not self.unload_to_etl(destination_schema, destination_bucket): app.logger.error( f'Redshift statements unload from {destination_schema} to {destination_bucket} failed.' ) return False redshift.drop_external_schema(destination_schema) return True
def migrate_transient_to_destination(self, keys, destination_bucket, destination_path): destination_url = 's3://' + destination_bucket + '/' + destination_path for source_key in keys: destination_key = source_key.replace(self.source_output_path, destination_path) if not s3.copy(self.transient_bucket, source_key, destination_bucket, destination_key): raise BackgroundJobError(f'Copy from transient bucket to destination bucket {destination_bucket} failed.') self.verify_post_transform_statement_count(destination_url)
def migrate_transient_to_destination(self, keys, destination_bucket, destination_path): destination_url = 's3://' + destination_bucket + '/' + destination_path redshift_schema = app.config['REDSHIFT_SCHEMA_LRS'] for transient_key in keys: destination_key = transient_key.replace(self.transient_path, destination_path) if not s3.copy(self.transient_bucket, transient_key, destination_bucket, destination_key): raise BackgroundJobError(f'Copy from transient bucket to destination bucket {destination_bucket} failed.') self.verify_migration(destination_url, redshift_schema) redshift.drop_external_schema(redshift_schema)
def copy_to_destination(self, source_prefix, dest_prefix): bucket = app.config['LOCH_S3_PROTECTED_BUCKET'] objects = s3.get_keys_with_prefix(source_prefix, bucket=app.config['LOCH_S3_PROTECTED_BUCKET']) for o in objects: file_name = normalize_sis_note_attachment_file_name(o) sid = file_name.split('_')[0] dest_key = f'{dest_prefix}/{sid}/{file_name}' app.logger.info(dest_key) if not s3.copy(bucket, o, bucket, dest_key): raise BackgroundJobError(f'Copy from source to destination {dest_key} failed.') app.logger.info(f'Copied {len(objects) if objects else 0} attachments to the destination folder.')
def run(self): app.logger.info('Starting OUA Slate schema creation job...') app.logger.info('Executing SQL...') s3_protected_bucket = app.config['LOCH_S3_PROTECTED_BUCKET'] oua_slate_sftp_path = app.config[ 'LOCH_S3_SLATE_DATA_SFTP_PATH'] + '/' + self.get_sftp_date_offset( ) + '/' oua_daily_dest_path = get_s3_oua_daily_path() + '/admissions/' # Gets list of keys under SFTP prefix and looks for csv files to migrate to OUA daily location keys = s3.get_keys_with_prefix(oua_slate_sftp_path, full_objects=False, bucket=s3_protected_bucket) if len(keys) > 0: for source_key in keys: if source_key.endswith('.csv'): destination_key = source_key.replace( oua_slate_sftp_path, oua_daily_dest_path) if not s3.copy(s3_protected_bucket, source_key, s3_protected_bucket, destination_key): raise BackgroundJobError( f'Copy from SFTP location {source_key} to daily OUA destination {destination_key} failed.' ) external_schema = app.config['REDSHIFT_SCHEMA_OUA'] redshift.drop_external_schema(external_schema) resolved_ddl = resolve_sql_template( 'create_oua_schema_template.sql') if redshift.execute_ddl_script(resolved_ddl): verify_external_schema(external_schema, resolved_ddl) self.create_rds_tables_and_indexes() app.logger.info('OUA Slate RDS indexes created.') return 'OUA schema creation job completed.' else: raise BackgroundJobError( 'OUA Slate schema creation job failed.') else: return 'No OUA files found in SFTP location today. Skipping OUA data refresh'