def student_tables(app): """Use Postgres to mock the Redshift student schemas on local test runs.""" from nessie.externals import rds, redshift from nessie.lib.util import resolve_sql_template_string, resolve_sql_template rds.execute(resolve_sql_template('create_rds_indexes.template.sql')) fixture_path = f"{app.config['BASE_DIR']}/fixtures" with open(f'{fixture_path}/students.sql', 'r') as sql_file: student_sql = sql_file.read() params = {} for key in [ 'sis_api_drops_and_midterms_11667051_2178', 'sis_degree_progress_11667051', 'sis_student_api_11667051', 'sis_student_api_2345678901', ]: with open(f'{fixture_path}/{key}.json', 'r') as f: feed = f.read() if key.startswith('sis_student_api'): feed = json.dumps( json.loads(feed)['apiResponse']['response']['any'] ['students'][0]) params[key] = feed redshift.execute(resolve_sql_template_string(student_sql), params=params) yield for schema in ['asc_test', 'coe_test', 'student_test']: rds.execute(f'DROP SCHEMA {schema} CASCADE') redshift.execute(f'DROP SCHEMA {schema} CASCADE')
def prior_job_status(app): from nessie.externals import rds rds_schema = app.config['RDS_SCHEMA_METADATA'] rds.execute(f"""INSERT INTO {rds_schema}.background_job_status (job_id, status, instance_id, created_at, updated_at) VALUES ('MigrateSisAdvisingNoteAttachments_123', 'succeeded', 'abc', '2018-12-21 00:00:00', '2018-12-21 00:00:00')""" )
def update_photo_import_status(successes, failures, photo_not_found): rds.execute( f'DELETE FROM {_rds_schema()}.photo_import_status WHERE sid = ANY(%s)', params=(successes + failures + photo_not_found, ), ) now = datetime.utcnow().isoformat() success_records = [tuple([sid, 'success', now]) for sid in successes] failure_records = [tuple([sid, 'failure', now]) for sid in failures] photo_not_found_records = [ tuple([sid, 'photo_not_found', now]) for sid in photo_not_found ] rows = success_records + failure_records + photo_not_found_records with rds.transaction() as transaction: result = transaction.insert_bulk( f"""INSERT INTO {_rds_schema()}.photo_import_status (sid, status, updated_at) VALUES %s """, rows, ) if result: transaction.commit() else: transaction.rollback() app.logger.error( 'Error saving photo import status updates to RDS.')
def create_rds_indexes(self): resolved_ddl = resolve_sql_template('index_advisors.template.sql') if rds.execute(resolved_ddl): app.logger.info('Created RDS indexes for advisor schema.') else: raise BackgroundJobError( 'Failed to create RDS indexes for advisor schema.')
def run(self): app.logger.info('Starting intermediate table generation job...') sis_source_schema = app.config['REDSHIFT_SCHEMA_EDL'] if app.config[ 'FEATURE_FLAG_EDL_SIS_VIEWS'] else app.config['REDSHIFT_SCHEMA_SIS'] resolved_ddl_redshift = resolve_sql_template( 'create_intermediate_schema.template.sql', current_term_id=current_term_id(), redshift_schema_sis=sis_source_schema, ) if redshift.execute_ddl_script(resolved_ddl_redshift): app.logger.info('Redshift tables generated.') else: raise BackgroundJobError('Intermediate table creation job failed.') resolved_ddl_rds = resolve_sql_template( 'update_rds_indexes_sis.template.sql') if rds.execute(resolved_ddl_rds): app.logger.info('RDS indexes updated.') else: raise BackgroundJobError( 'Failed to update RDS indexes for intermediate schema.') return 'Intermediate table generation job completed.'
def create_advising_note_authors(self): resolved_ddl = resolve_sql_template( 'create_advising_note_authors.template.sql') if rds.execute(resolved_ddl): app.logger.info('Created advising note authors.') else: raise BackgroundJobError('Failed to create advising note authors.')
def index_appointment_advisors(self): resolved_ddl = resolve_sql_template( 'index_sis_appointment_advisors.template.sql') if rds.execute(resolved_ddl): app.logger.info('Indexed appointment advisors.') else: raise BackgroundJobError('Failed to index appointment advisors.')
def index_advising_notes(self): resolved_ddl = resolve_sql_template( 'index_advising_notes.template.sql') if rds.execute(resolved_ddl): app.logger.info('Indexed advising notes.') else: raise BackgroundJobError('Failed to index advising notes.')
def create_indexes(self): resolved_ddl = resolve_sql_template( 'index_e_i_advising_notes.template.sql') if rds.execute(resolved_ddl): app.logger.info('Created E&I Advising Notes RDS indexes.') else: raise BackgroundJobError( 'E&I Advising Notes schema creation job failed to create indexes.' )
def create_background_job_status(job_id): sql = f"""INSERT INTO {_rds_schema()}.background_job_status (job_id, status, instance_id, created_at, updated_at) VALUES (%s, 'started', %s, current_timestamp, current_timestamp) """ return rds.execute( sql, params=(job_id, _instance_id()), )
def run(self): app.logger.info('Starting RDS index creation job...') app.logger.info('Executing SQL...') resolved_ddl = resolve_sql_template('create_rds_indexes.template.sql') if rds.execute(resolved_ddl): app.logger.info('RDS indexes found or created.') return True else: raise BackgroundJobError('RDS index creation failed.')
def create_indexes(self): resolved_ddl = resolve_sql_template( 'index_data_science_advising.template.sql') if rds.execute(resolved_ddl): app.logger.info('Created Data Science Advising RDS indexes.') else: raise BackgroundJobError( 'Data Science Advising schema creation job failed to create indexes.' )
def create_canvas_sync_status(job_id, filename, canvas_table, source_url): sql = f"""INSERT INTO {_rds_schema()}.canvas_sync_job_status (job_id, filename, canvas_table, source_url, status, instance_id, created_at, updated_at) VALUES (%s, %s, %s, %s, 'created', %s, current_timestamp, current_timestamp) """ return rds.execute( sql, params=(job_id, filename, canvas_table, source_url, _instance_id()), )
def update_background_job_status(job_id, status, details=None): if details: details = details[:4096] sql = f"""UPDATE {_rds_schema()}.background_job_status SET status=%s, updated_at=current_timestamp, details=%s WHERE job_id=%s""" return rds.execute( sql, params=(status, details, job_id), )
def create_canvas_snapshot(key, size): canvas_table, filename = key.split('/')[-2:] url = s3.build_s3_url(key) sql = f"""INSERT INTO {_rds_schema()}.canvas_synced_snapshots (filename, canvas_table, url, size, created_at) VALUES (%s, %s, %s, %s, current_timestamp)""" return rds.execute( sql, params=(filename, canvas_table, url, size), )
def update_merged_enrollment_term_job_status(job_id, status, details): if details: details = details[:4096] sql = f"""UPDATE {_rds_schema()}.merged_enrollment_term_job_queue SET status=%s, updated_at=current_timestamp, details=%s WHERE id=%s""" return rds.execute( sql, params=(status, details, job_id), )
def create_rds_tables_and_indexes(self): resolved_ddl = resolve_sql_template( 'index_oua_admissions.template.sql') if rds.execute(resolved_ddl): app.logger.info( 'Created OUA Slate RDS tables and indexes successfully') else: raise BackgroundJobError( 'OUA Slate schema creation job failed to create rds tables and indexes.' )
def create_indexes(self): resolved_ddl = resolve_sql_template( 'index_sis_advising_notes.template.sql', redshift_schema=app.config['REDSHIFT_SCHEMA_EDL'] if self.feature_flag_edl else app.config['REDSHIFT_SCHEMA_SIS_ADVISING_NOTES_INTERNAL'], ) if rds.execute(resolved_ddl): app.logger.info('Created SIS Advising Notes RDS indexes.') else: raise BackgroundJobError('SIS Advising Notes schema creation job failed to create indexes.')
def create_canvas_api_import_status(job_id, term_id, course_id, table_name): if not job_id: return False sql = f"""INSERT INTO {_rds_schema()}.canvas_api_import_job_status (job_id, term_id, course_id, table_name, status, instance_id, created_at, updated_at) VALUES (%s, %s, %s, %s, 'created', %s, current_timestamp, current_timestamp) """ return rds.execute( sql, params=(job_id, term_id, str(course_id), table_name, _instance_id()), )
def run(self): app.logger.info('Starting Advisor schema creation job...') self.create_schema() if self.import_advisor_attributes(): # Create RDS indexes resolved_ddl = resolve_sql_template('index_advisors.template.sql') if not rds.execute(resolved_ddl): raise BackgroundJobError('Failed to create RDS indexes for advisor schema.') app.logger.info('Created RDS indexes for advisor schema.') return 'Advisor schema creation job completed.' else: raise BackgroundJobError('Failed to import advisor attributes from CalNet.')
def test_metadata_tracked(self, app, metadata_db, student_tables, caplog): from nessie.jobs.import_registrations import ImportRegistrations rows = rds.fetch('SELECT * FROM nessie_metadata_test.registration_import_status') assert len(rows) == 0 caplog.set_level(logging.DEBUG) with capture_app_logs(app): with mock_s3(app): ImportRegistrations().run_wrapped() rows = rds.fetch('SELECT * FROM nessie_metadata_test.registration_import_status') assert len(rows) == 10 assert len([r for r in rows if r['status'] == 'failure']) == 8 assert next(r['status'] for r in rows if r['sid'] == '11667051') == 'success' result = ImportRegistrations().run_wrapped() assert result == 'Registrations import completed: 0 succeeded, 8 failed.' result = ImportRegistrations().run_wrapped(load_mode='all') assert result == 'Registrations import completed: 2 succeeded, 8 failed.' rds.execute("DELETE FROM nessie_metadata_test.registration_import_status WHERE sid = '11667051'") result = ImportRegistrations().run_wrapped() assert result == 'Registrations import completed: 1 succeeded, 8 failed.' assert next(r['status'] for r in rows if r['sid'] == '11667051') == 'success' rds.execute("UPDATE nessie_metadata_test.registration_import_status SET status='failure' WHERE sid = '11667051'") result = ImportRegistrations().run_wrapped() assert result == 'Registrations import completed: 1 succeeded, 8 failed.' assert next(r['status'] for r in rows if r['sid'] == '11667051') == 'success'
def run(self): app.logger.info('Starting metadata schema creation job...') app.logger.info('Executing SQL...') resolved_ddl_rds = resolve_sql_template( 'create_metadata_schema_rds.template.sql') if rds.execute(resolved_ddl_rds): app.logger.info( f"Schema '{app.config['RDS_SCHEMA_METADATA']}' found or created." ) else: raise BackgroundJobError('RDS metadata schema creation failed.') return False return True
def update_canvas_api_import_status(job_id, course_id, status, details=None): if not job_id: return False sql = f"""UPDATE {_rds_schema()}.canvas_api_import_job_status SET status=%s, updated_at=current_timestamp""" params = [status] if details: sql += ', details=%s' params.append(details) sql += ' WHERE job_id=%s AND course_id=%s' params += [job_id, str(course_id)] return rds.execute( sql, params=tuple(params), )
def update_rds_profile_indexes(self): with rds.transaction() as transaction: if self.refresh_rds_indexes(None, transaction): transaction.commit() app.logger.info('Refreshed RDS indexes.') else: transaction.rollback() raise BackgroundJobError('Failed to refresh RDS indexes.') resolved_ddl_rds = resolve_sql_template( 'update_rds_indexes_student_profiles.template.sql') if rds.execute(resolved_ddl_rds): app.logger.info('RDS student profile indexes updated.') else: raise BackgroundJobError( 'Failed to update RDS student profile indexes.')
def update_canvas_sync_status(job_id, key, status, **kwargs): filename = key.split('/')[-1] destination_url = s3.build_s3_url(key) sql = f"""UPDATE {_rds_schema()}.canvas_sync_job_status SET destination_url=%s, status=%s, updated_at=current_timestamp""" params = [destination_url, status] for key in ['details', 'source_size', 'destination_size']: if kwargs.get(key): sql += f', {key}=%s' params.append(kwargs[key]) sql += ' WHERE job_id=%s AND filename=%s' params += [job_id, filename] return rds.execute( sql, params=tuple(params), )
def run(self): app.logger.info('Starting intermediate table generation job...') if app.config['FEATURE_FLAG_EDL_SIS_VIEWS']: sis_source_schema = app.config['REDSHIFT_SCHEMA_EDL'] where_clause_exclude_withdrawn = "AND en.enrollment_status_reason <> 'WDRW'" else: sis_source_schema = app.config['REDSHIFT_SCHEMA_SIS'] where_clause_exclude_withdrawn = f"""/* Enrollment with no primary section is likely a withdrawal. */ AND EXISTS ( SELECT en0.term_id, en0.section_id, en0.ldap_uid FROM {app.config['REDSHIFT_SCHEMA_SIS']}.enrollments en0 JOIN {app.config['REDSHIFT_SCHEMA_INTERMEDIATE']}.course_sections crs0 ON crs0.sis_section_id = en0.section_id AND crs0.sis_term_id = en0.term_id WHERE en0.term_id = en.term_id AND en0.ldap_uid = en.ldap_uid AND crs0.sis_course_name = crs.sis_course_name AND crs0.sis_primary = TRUE AND en0.enrollment_status != 'D' AND en0.grade != 'W' )""" resolved_ddl_redshift = resolve_sql_template( 'create_intermediate_schema.template.sql', current_term_id=current_term_id(), redshift_schema_sis=sis_source_schema, where_clause_exclude_withdrawn=where_clause_exclude_withdrawn, ) if redshift.execute_ddl_script(resolved_ddl_redshift): app.logger.info('Redshift tables generated.') else: raise BackgroundJobError('Intermediate table creation job failed.') resolved_ddl_rds = resolve_sql_template('update_rds_indexes_sis.template.sql') if rds.execute(resolved_ddl_rds): app.logger.info('RDS indexes updated.') else: raise BackgroundJobError('Failed to update RDS indexes for intermediate schema.') return 'Intermediate table generation job completed.'
def run(self): app.logger.info('Starting intermediate table generation job...') resolved_ddl_redshift = resolve_sql_template( 'create_intermediate_schema.template.sql', current_term_id=current_term_id(), ) if redshift.execute_ddl_script(resolved_ddl_redshift): app.logger.info('Redshift tables generated.') else: raise BackgroundJobError('Intermediate table creation job failed.') resolved_ddl_rds = resolve_sql_template( 'update_rds_indexes_sis.template.sql') if rds.execute(resolved_ddl_rds): app.logger.info('RDS indexes updated.') else: raise BackgroundJobError( 'Failed to update RDS indexes for intermediate schema.') return 'Intermediate table generation job completed.'
def generate_feeds(self): non_advisee_sids = queries.get_fetched_non_advisees() non_advisee_sids = [r['sid'] for r in non_advisee_sids] profile_count = self.generate_student_profile_table(non_advisee_sids) enrollment_count = self.generate_student_enrollments_table( non_advisee_sids) if profile_count and enrollment_count: resolved_ddl_rds = resolve_sql_template( 'update_rds_indexes_student_profiles_hist_enr.template.sql') if rds.execute(resolved_ddl_rds): app.logger.info('RDS indexes updated.') else: raise BackgroundJobError( 'Failed to refresh RDS copies of non-advisee data.') else: app.logger.warning( 'No non-advisee data loaded into Redshift; will not refresh RDS copies.' ) return f'Generated {profile_count} non-advisee profiles, {enrollment_count} enrollments.'
def current_term_index(app): current_term_name = app.config['CURRENT_TERM'] future_term_name = app.config['FUTURE_TERM'] s3_canvas_data_path_current_term = app.config['LOCH_S3_CANVAS_DATA_PATH_CURRENT_TERM'] app.config['CURRENT_TERM'] = 'auto' app.config['FUTURE_TERM'] = 'auto' app.config['LOCH_S3_CANVAS_DATA_PATH_CURRENT_TERM'] = 'auto' berkeley.cache_thread.config_terms = None rds_schema = app.config['RDS_SCHEMA_TERMS'] rds.execute(f'DROP SCHEMA {rds_schema} CASCADE') rds.execute(resolve_sql_template('create_rds_indexes.template.sql')) rds.execute(f"""INSERT INTO {rds_schema}.current_term_index (current_term_name, future_term_name) VALUES ('Spring 2018', 'Fall 2018') """) yield app.config['CURRENT_TERM'] = current_term_name app.config['FUTURE_TERM'] = future_term_name app.config['LOCH_S3_CANVAS_DATA_PATH_CURRENT_TERM'] = s3_canvas_data_path_current_term
def term_definitions(app): rds_schema = app.config['RDS_SCHEMA_TERMS'] rds.execute(f'DROP SCHEMA {rds_schema} CASCADE') rds.execute(resolve_sql_template('create_rds_indexes.template.sql')) rds.execute(f"""INSERT INTO {rds_schema}.term_definitions (term_id, term_name, term_begins, term_ends) VALUES ('2172', 'Spring 2017', '2017-01-10', '2017-05-12'), ('2175', 'Summer 2017', '2017-05-22', '2017-08-11'), ('2178', 'Fall 2017', '2017-08-16', '2017-12-15'), ('2182', 'Spring 2018', '2018-01-09', '2018-05-11'), ('2185', 'Summer 2018', '2018-05-21', '2018-08-10'), ('2188', 'Fall 2018', '2018-08-15', '2018-12-14'), ('2192', 'Spring 2019', '2019-01-15', '2019-05-17'), ('2195', 'Summer 2019', '2019-05-28', '2019-08-16'), ('2198', 'Fall 2019', '2019-08-21', '2019-12-20'), ('2202', 'Spring 2020', '2020-01-14', '2020-05-15'), ('2205', 'Summer 2020', '2020-05-26', '2020-08-14'), ('2208', 'Fall 2020', '2020-08-19', '2020-12-18') """)