def import_note_authors(self): notes_schema = app.config['RDS_SCHEMA_ADVISING_NOTES'] advisor_attributes = self._advisor_attributes_by_sid( ) + self._advisor_attributes_by_uid( ) + self._advisor_attributes_by_email() if not advisor_attributes: raise BackgroundJobError('Failed to fetch note author attributes.') unique_advisor_attributes = list( {adv['uid']: adv for adv in advisor_attributes}.values()) with rds.transaction() as transaction: insertable_rows = [] for entry in unique_advisor_attributes: first_name, last_name = calnet.split_sortable_name(entry) insertable_rows.append( tuple((entry.get('uid'), entry.get('csid'), first_name, last_name, entry.get('campus_email')))) result = transaction.insert_bulk( f'INSERT INTO {notes_schema}.advising_note_authors (uid, sid, first_name, last_name, campus_email) VALUES %s', insertable_rows, ) if result: transaction.commit() app.logger.info('Imported advising note author attributes.') else: transaction.rollback() raise BackgroundJobError( 'Failed to import advising note author attributes.')
def refresh_sis_term_definitions(self): if self.feature_flag_edl: rows = redshift.fetch(f""" SELECT semester_year_term_cd AS term_id, semester_year_name_concat_2 as term_name, TO_CHAR(semester_first_day_of_insr_dt, 'YYYY-MM-DD') AS term_begins, TO_CHAR(term_end_dt, 'YYYY-MM-DD') AS term_ends FROM {edl_external_schema()}.student_academic_terms_data WHERE semester_year_term_cd >= {app.config['EARLIEST_ACADEMIC_HISTORY_TERM_ID']} AND academic_career_cd = 'UGRD' ORDER BY semester_year_term_cd """) else: rows = redshift.fetch( f'SELECT * FROM {self.redshift_schema}.term_definitions') if len(rows): with rds.transaction() as transaction: if self.refresh_rds(rows, transaction): transaction.commit() app.logger.info('Refreshed RDS indexes.') else: transaction.rollback() raise BackgroundJobError( 'Error refreshing RDS term definitions.')
def queue_merged_enrollment_term_jobs(master_job_id, term_ids): now = datetime.now().replace(microsecond=0).isoformat() def insertable_tuple(term_id): return tuple([ master_job_id, term_id, 'created', None, now, now, ]) with rds.transaction() as transaction: insert_result = transaction.insert_bulk( f"""INSERT INTO {_rds_schema()}.merged_enrollment_term_job_queue (master_job_id, term_id, status, instance_id, created_at, updated_at) VALUES %s""", [insertable_tuple(term_id) for term_id in term_ids], ) if insert_result: transaction.commit() return True else: transaction.rollback() return False
def import_appointment_advisors(self): sis_notes_schema = app.config['RDS_SCHEMA_SIS_ADVISING_NOTES'] advisor_schema_redshift = app.config['REDSHIFT_SCHEMA_ADVISOR_INTERNAL'] advisor_sids_from_sis_appointments = set( [r['advisor_sid'] for r in rds.fetch(f'SELECT DISTINCT advisor_sid FROM {sis_notes_schema}.advising_appointments')], ) advisor_sids_from_advisors = set( [r['sid'] for r in redshift.fetch(f'SELECT DISTINCT sid FROM {advisor_schema_redshift}.advisor_departments')], ) advisor_sids = list(advisor_sids_from_sis_appointments | advisor_sids_from_advisors) advisor_attributes = calnet.client(app).search_csids(advisor_sids) if not advisor_attributes: raise BackgroundJobError('Failed to fetch note author attributes.') unique_advisor_attributes = list({adv['uid']: adv for adv in advisor_attributes}.values()) with rds.transaction() as transaction: insertable_rows = [] for entry in unique_advisor_attributes: first_name, last_name = calnet.split_sortable_name(entry) insertable_rows.append(tuple((entry.get('uid'), entry.get('csid'), first_name, last_name))) result = transaction.insert_bulk( f'INSERT INTO {sis_notes_schema}.advising_appointment_advisors (uid, sid, first_name, last_name) VALUES %s', insertable_rows, ) if result: transaction.commit() app.logger.info('Imported appointment advisor attributes.') else: transaction.rollback() raise BackgroundJobError('Failed to import appointment advisor attributes.')
def update_photo_import_status(successes, failures, photo_not_found): rds.execute( f'DELETE FROM {_rds_schema()}.photo_import_status WHERE sid = ANY(%s)', params=(successes + failures + photo_not_found, ), ) now = datetime.utcnow().isoformat() success_records = [tuple([sid, 'success', now]) for sid in successes] failure_records = [tuple([sid, 'failure', now]) for sid in failures] photo_not_found_records = [ tuple([sid, 'photo_not_found', now]) for sid in photo_not_found ] rows = success_records + failure_records + photo_not_found_records with rds.transaction() as transaction: result = transaction.insert_bulk( f"""INSERT INTO {_rds_schema()}.photo_import_status (sid, status, updated_at) VALUES %s """, rows, ) if result: transaction.commit() else: transaction.rollback() app.logger.error( 'Error saving photo import status updates to RDS.')
def refresh_current_term_index(self): today = datetime.now(pytz.utc).astimezone( pytz.timezone(app.config['TIMEZONE'])).date() current_term = self.get_sis_current_term(today) if current_term: current_term_id = current_term['term_id'] # If today is one month or less before the end of the current term, or if the current term is summer, # include the next term. if current_term_id[3] == '5' or (current_term['term_ends'] - timedelta(weeks=4)) < today: future_term_id = next_term_id(current_term['term_id']) # ... and if the upcoming term is Summer, include the next Fall term as well. if future_term_id[3] == '5': future_term_id = next_term_id(future_term_id) else: future_term_id = current_term_id with rds.transaction() as transaction: transaction.execute( f'TRUNCATE {rds_schema}.current_term_index') columns = ['current_term_name', 'future_term_name'] values = tuple([ current_term['term_name'], term_name_for_sis_id(future_term_id) ]) if transaction.execute( f'INSERT INTO {rds_schema}.current_term_index ({", ".join(columns)}) VALUES {values} ' ): transaction.commit() else: transaction.rollback() raise BackgroundJobError( 'Error refreshing RDS current term index.')
def refresh_sis_term_definitions(self): rows = redshift.fetch( f'SELECT * FROM {external_schema}.term_definitions') if len(rows): with rds.transaction() as transaction: if self.refresh_rds(rows, transaction): transaction.commit() app.logger.info('Refreshed RDS indexes.') else: transaction.rollback() raise BackgroundJobError( 'Error refreshing RDS term definitions.')
def update_rds_profile_indexes(self): with rds.transaction() as transaction: if self.refresh_rds_indexes(None, transaction): transaction.commit() app.logger.info('Refreshed RDS indexes.') else: transaction.rollback() raise BackgroundJobError('Failed to refresh RDS indexes.') resolved_ddl_rds = resolve_sql_template( 'update_rds_indexes_student_profiles.template.sql') if rds.execute(resolved_ddl_rds): app.logger.info('RDS student profile indexes updated.') else: raise BackgroundJobError( 'Failed to update RDS student profile indexes.')
def run(self, term_id=None): if not term_id: term_id = current_term_id() if term_id == 'all': app.logger.info('Starting enrollments index job for all terms...') else: app.logger.info( f'Starting enrollments index job for term {term_id}...') with rds.transaction() as transaction: if self.refresh_enrollments_index(term_id, transaction): transaction.commit() app.logger.info('Refreshed RDS indexes.') else: transaction.rollback() raise BackgroundJobError('Failed to refresh RDS indexes.') return f'Enrollments index job completed for term {term_id}.'
def run(self): app.logger.info('Starting Undergrads schema creation job...') redshift.drop_external_schema(external_schema) resolved_ddl = resolve_sql_template('create_undergrads_schema.template.sql') if redshift.execute_ddl_script(resolved_ddl): app.logger.info('Undergrads external schema created.') verify_external_schema(external_schema, resolved_ddl) else: raise BackgroundJobError('Undergrads external schema creation failed.') undergrads_rows = redshift.fetch(f'SELECT * FROM {external_schema}.students ORDER by sid') with rds.transaction() as transaction: if self.refresh_rds_indexes(undergrads_rows, transaction): transaction.commit() app.logger.info('Refreshed RDS indexes.') else: transaction.rollback() raise BackgroundJobError('Error refreshing RDS indexes.') return 'Undergrads internal schema created.'
def refresh_current_term_index(self): today = datetime.now(pytz.utc).astimezone( pytz.timezone(app.config['TIMEZONE'])).date() current_term = self.get_sis_current_term(today) if current_term: term_id = current_term['term_id'] # Check if the advance enrollment period has started for the next two upcoming terms. future_term_id = term_id for _ in range(2): term_id = next_term_id(term_id) term = self.get_sis_term_for_id(term_id) advance_enrollment_period = 0 if term_id[3] == '2': advance_enrollment_period = 95 elif term_id[3] == '5': advance_enrollment_period = 124 elif term_id[3] == '8': advance_enrollment_period = 140 if term['term_begins'] - timedelta( days=advance_enrollment_period) < today: future_term_id = term_id with rds.transaction() as transaction: transaction.execute( f'TRUNCATE {rds_schema}.current_term_index') columns = ['current_term_name', 'future_term_name'] values = tuple([ current_term['term_name'], term_name_for_sis_id(future_term_id) ]) if transaction.execute( f'INSERT INTO {rds_schema}.current_term_index ({", ".join(columns)}) VALUES {values} ' ): transaction.commit() else: transaction.rollback() raise BackgroundJobError( 'Error refreshing RDS current term index.')
def run(self, load_mode='new'): all_sids = [row['sid'] for row in get_all_student_ids()] previous_backfills = {row['sid'] for row in get_sids_with_registration_imports()} if load_mode == 'new': sids = list(set(all_sids).difference(previous_backfills)) elif load_mode == 'batch': new_sids = list(set(all_sids).difference(previous_backfills)) limit = app.config['CYCLICAL_API_IMPORT_BATCH_SIZE'] - len(new_sids) if limit > 0: oldest_backfills = [row['sid'] for row in get_active_sids_with_oldest_registration_imports(limit=limit)] sids = new_sids + oldest_backfills else: sids = new_sids elif load_mode == 'all': sids = all_sids app.logger.info(f'Starting registrations/demographics import job for {len(sids)} students...') rows = { 'term_gpas': [], 'last_registrations': [], 'api_demographics': [], } successes, failures = self.get_registration_data_per_sids(rows, sids) if load_mode != 'new' and (len(successes) == 0) and (len(failures) > 0): raise BackgroundJobError('Failed to import registration histories: aborting job.') for key in rows.keys(): s3_key = f'{get_s3_sis_api_daily_path(use_edl_if_feature_flag=True)}/{key}.tsv' app.logger.info(f'Will stash {len(successes)} feeds in S3: {s3_key}') if not s3.upload_tsv_rows(rows[key], s3_key): raise BackgroundJobError('Error on S3 upload: aborting job.') app.logger.info('Will copy S3 feeds into Redshift...') if not redshift.execute(f'TRUNCATE {student_schema()}_staging.student_{key}'): raise BackgroundJobError('Error truncating old staging rows: aborting job.') if not redshift.copy_tsv_from_s3(f'{student_schema()}_staging.student_{key}', s3_key): raise BackgroundJobError('Error on Redshift copy: aborting job.') staging_to_destination_query = resolve_sql_template_string( """ DELETE FROM {student_schema}.student_{table_key} WHERE sid IN (SELECT sid FROM {student_schema}_staging.student_{table_key}); INSERT INTO {student_schema}.student_{table_key} (SELECT * FROM {student_schema}_staging.student_{table_key}); TRUNCATE TABLE {student_schema}_staging.student_{table_key}; """, table_key=key, student_schema=student_schema(), ) if not redshift.execute(staging_to_destination_query): raise BackgroundJobError('Error inserting staging entries into destination: aborting job.') with rds.transaction() as transaction: if self.refresh_rds_indexes(sids, rows['term_gpas'], transaction): transaction.commit() app.logger.info('Refreshed RDS indexes.') else: transaction.rollback() raise BackgroundJobError('Failed to refresh RDS indexes.') update_registration_import_status(successes, failures) return ( f'Registrations import completed: {len(successes)} succeeded, {len(failures)} failed.' )
def generate_feeds(self): # Translation between canvas_user_id and UID/SID is needed to merge Canvas analytics data and SIS enrollment-based data. advisees_by_canvas_id = {} advisees_by_sid = {} self.successes = [] self.failures = [] profile_tables = self.generate_student_profile_tables( advisees_by_canvas_id, advisees_by_sid) if not profile_tables: raise BackgroundJobError( 'Failed to generate student profile tables.') feed_path = app.config['LOCH_S3_BOAC_ANALYTICS_DATA_PATH'] + '/feeds/' s3.upload_json(advisees_by_canvas_id, feed_path + 'advisees_by_canvas_id.json') upload_student_term_maps(advisees_by_sid) # Avoid processing Canvas analytics data for future terms and pre-CS terms. for term_id in (future_term_ids() + legacy_term_ids()): enrollment_term_map = s3.get_object_json( feed_path + f'enrollment_term_map_{term_id}.json') if enrollment_term_map: GenerateMergedEnrollmentTerm().refresh_student_enrollment_term( term_id, enrollment_term_map) canvas_integrated_term_ids = reverse_term_ids() app.logger.info( f'Will queue analytics generation for {len(canvas_integrated_term_ids)} terms on worker nodes.' ) result = queue_merged_enrollment_term_jobs(self.job_id, canvas_integrated_term_ids) if not result: raise BackgroundJobError('Failed to queue enrollment term jobs.') refresh_all_from_staging(profile_tables) self.update_redshift_academic_standing() self.update_rds_profile_indexes() app.logger.info( 'Profile generation complete; waiting for enrollment term generation to finish.' ) while True: sleep(1) enrollment_results = get_merged_enrollment_term_job_status( self.job_id) if not enrollment_results: raise BackgroundJobError('Failed to refresh RDS indexes.') any_pending_job = next( (row for row in enrollment_results if row['status'] == 'created' or row['status'] == 'started'), None) if not any_pending_job: break app.logger.info('Exporting analytics data for archival purposes.') unload_enrollment_terms([current_term_id(), future_term_id()]) app.logger.info('Refreshing enrollment terms in RDS.') with rds.transaction() as transaction: if self.refresh_rds_enrollment_terms(None, transaction): transaction.commit() app.logger.info('Refreshed RDS enrollment terms.') else: transaction.rollback() raise BackgroundJobError( 'Failed to refresh RDS enrollment terms.') status_string = f'Generated merged profiles ({len(self.successes)} successes, {len(self.failures)} failures).' errored = False for row in enrollment_results: status_string += f" {row['details']}" if row['status'] == 'error': errored = True truncate_staging_table('student_enrollment_terms') if errored: raise BackgroundJobError(status_string) else: return status_string
def run(self): app.logger.info('Starting ASC profile generation job...') asc_rows = redshift.fetch( 'SELECT * FROM {schema}.students ORDER by sid, UPPER(team_name)', schema=asc_schema_identifier, ) profile_rows = [] sids_for_inactive_deletion = [] for sid, rows_for_student in groupby(asc_rows, operator.itemgetter('sid')): rows_for_student = list(rows_for_student) # Since BOAC believes (falsely) that isActiveAsc and statusAsc are attributes of a student, not # a team membership, a bit of brutal simplification is needed. Students who are active in at least # one sport have inactive team memberships dropped. any_active_athletics = reduce( operator.or_, [r['active'] for r in rows_for_student], False) if any_active_athletics: rows_for_student = [r for r in rows_for_student if r['active']] sids_for_inactive_deletion.append(sid) athletics_profile = { 'athletics': [], 'inIntensiveCohort': rows_for_student[0]['intensive'], 'isActiveAsc': rows_for_student[0]['active'], 'statusAsc': rows_for_student[0]['status_asc'], } for row in rows_for_student: athletics_profile['athletics'].append({ 'groupCode': row['group_code'], 'groupName': row['group_name'], 'name': row['group_name'], 'teamCode': row['team_code'], 'teamName': row['team_name'], }) profile_rows.append( encoded_tsv_row([sid, json.dumps(athletics_profile)])) s3_key = f'{get_s3_asc_daily_path()}/athletics_profiles.tsv' app.logger.info( f'Will stash {len(profile_rows)} feeds in S3: {s3_key}') if not s3.upload_tsv_rows(profile_rows, s3_key): raise BackgroundJobError('Error on S3 upload: aborting job.') app.logger.info('Will copy S3 feeds into Redshift...') query = resolve_sql_template_string( """ TRUNCATE {redshift_schema_asc}.student_profiles; COPY {redshift_schema_asc}.student_profiles FROM '{loch_s3_asc_data_path}/athletics_profiles.tsv' IAM_ROLE '{redshift_iam_role}' DELIMITER '\\t'; """, ) if not redshift.execute(query): app.logger.error('Error on Redshift copy: aborting job.') return False with rds.transaction() as transaction: if self.refresh_rds_indexes(asc_rows, transaction): transaction.commit() app.logger.info('Refreshed RDS indexes.') else: transaction.rollback() raise BackgroundJobError('Error refreshing RDS indexes.') if sids_for_inactive_deletion: redshift.execute( f'DELETE FROM {asc_schema}.students WHERE active IS false AND sid = ANY(%s)', params=(sids_for_inactive_deletion, ), ) rds.execute( f'DELETE FROM {asc_schema}.students WHERE active IS false AND sid = ANY(%s)', params=(sids_for_inactive_deletion, ), ) return 'ASC profile generation complete.'
def run(self): app.logger.info('Starting COE schema creation job...') redshift.drop_external_schema(external_schema) resolved_ddl = resolve_sql_template('create_coe_schema.template.sql') # TODO This DDL drops and recreates the internal schema before the external schema is verified. We # ought to set up proper staging in conjunction with verification. It's also possible that a persistent # external schema isn't needed. if redshift.execute_ddl_script(resolved_ddl): app.logger.info('COE external schema created.') verify_external_schema(external_schema, resolved_ddl) else: raise BackgroundJobError('COE external schema creation failed.') coe_rows = redshift.fetch( 'SELECT * FROM {schema}.students ORDER by sid', schema=internal_schema_identifier, ) profile_rows = [] index = 1 for sid, rows_for_student in groupby(coe_rows, operator.itemgetter('sid')): app.logger.info( f'Generating COE profile for SID {sid} ({index} of {len(coe_rows)})' ) index += 1 row_for_student = list(rows_for_student)[0] coe_profile = { 'advisorUid': row_for_student.get('advisor_ldap_uid'), 'gender': row_for_student.get('gender'), 'ethnicity': row_for_student.get('ethnicity'), 'minority': row_for_student.get('minority'), 'didPrep': row_for_student.get('did_prep'), 'prepEligible': row_for_student.get('prep_eligible'), 'didTprep': row_for_student.get('did_tprep'), 'tprepEligible': row_for_student.get('tprep_eligible'), 'sat1read': row_for_student.get('sat1read'), 'sat1math': row_for_student.get('sat1math'), 'sat2math': row_for_student.get('sat2math'), 'inMet': row_for_student.get('in_met'), 'gradTerm': row_for_student.get('grad_term'), 'gradYear': row_for_student.get('grad_year'), 'probation': row_for_student.get('probation'), 'status': row_for_student.get('status'), } profile_rows.append(encoded_tsv_row([sid, json.dumps(coe_profile)])) s3_key = f'{get_s3_coe_daily_path()}/coe_profiles.tsv' app.logger.info( f'Will stash {len(profile_rows)} feeds in S3: {s3_key}') if not s3.upload_tsv_rows(profile_rows, s3_key): raise BackgroundJobError('Error on S3 upload: aborting job.') app.logger.info('Will copy S3 feeds into Redshift...') query = resolve_sql_template_string( """ COPY {redshift_schema_coe}.student_profiles FROM '{loch_s3_coe_data_path}/coe_profiles.tsv' IAM_ROLE '{redshift_iam_role}' DELIMITER '\\t'; """, ) if not redshift.execute(query): raise BackgroundJobError('Error on Redshift copy: aborting job.') with rds.transaction() as transaction: if self.refresh_rds_indexes(coe_rows, transaction): transaction.commit() app.logger.info('Refreshed RDS indexes.') else: transaction.rollback() raise BackgroundJobError('Error refreshing RDS indexes.') return 'COE internal schema created.'
def generate_feeds(self, term_id=None, sids=None): """Loop through all records stored in the Calnet external schema and write merged student data to the internal student schema.""" calnet_profiles = self.fetch_calnet_profiles(sids) # Jobs targeted toward a specific sid set (such as backfills) may return no CalNet profiles. Warn, don't error. if not calnet_profiles: app.logger.warn( f'No CalNet profiles returned, aborting job. (sids={sids})') return False # Jobs for non-current terms generate enrollment feeds only. if term_id and term_id != berkeley.current_term_id(): tables = ['student_enrollment_terms'] else: tables = [ 'student_profiles', 'student_academic_status', 'student_majors', 'student_enrollment_terms', 'student_holds' ] # In-memory storage for generated feeds prior to TSV output. self.rows = { 'student_profiles': [], 'student_academic_status': [], 'student_majors': [], 'student_enrollment_terms': [], 'student_holds': [], } # Track the results of course-level queries to avoid requerying. self.canvas_site_map = {} # Remove any old data from staging tables. for table in tables: redshift.execute( 'TRUNCATE {schema}.{table}', schema=self.staging_schema_identifier, table=psycopg2.sql.Identifier(table), ) app.logger.info( f'Will generate feeds for {len(calnet_profiles)} students (term_id={term_id}).' ) successes = [] failures = [] index = 1 for sid, profile_group in groupby(calnet_profiles, operator.itemgetter('sid')): app.logger.info( f'Generating feeds for sid {sid} ({index} of {len(calnet_profiles)})' ) index += 1 merged_profile = self.generate_or_fetch_merged_profile( term_id, sid, list(profile_group)[0]) if merged_profile: self.generate_merged_enrollment_terms(merged_profile, term_id) self.parse_holds(sid) successes.append(sid) else: failures.append(sid) for table in tables: if not self.rows[table]: continue self.upload_to_staging(table) if not self.verify_table(table): return False with redshift.transaction() as transaction: for table in tables: if not self.refresh_from_staging(table, term_id, sids, transaction): app.logger.error( f'Failed to refresh {self.destination_schema}.{table} from staging.' ) return False if not transaction.commit(): app.logger.error( f'Final transaction commit failed for {self.destination_schema}.' ) return False with rds.transaction() as transaction: if self.refresh_rds_indexes(sids, transaction): transaction.commit() app.logger.info('Refreshed RDS indexes.') else: transaction.rollback() app.logger.error('Failed to refresh RDS indexes.') return False update_merged_feed_status(term_id, successes, failures) app.logger.info(f'Updated merged feed status.') return f'Merged profile generation complete: {len(successes)} successes, {len(failures)} failures.'
def run(self, term_ids=None): if not term_ids: term_ids = reverse_term_ids() app.logger.info( f'Starting SIS terms API import job for {len(term_ids)} terms...') rows = [] success_count = 0 failure_count = 0 index = 1 for term_id in term_ids: app.logger.info( f'Fetching SIS terms API for term id {term_id} ({index} of {len(term_ids)})' ) feed = sis_terms_api.get_term(term_id) if feed: success_count += 1 for academic_career_term in feed: for session in academic_career_term.get('sessions', []): rows.append( '\t'.join([ academic_career_term.get('id', ''), academic_career_term.get('name', ''), academic_career_term.get('academicCareer', {}).get('code', ''), academic_career_term.get('beginDate', ''), academic_career_term.get('endDate', ''), session.get('id', ''), session.get('name', ''), session.get('beginDate', ''), session.get('endDate', ''), ]), ) else: failure_count += 1 app.logger.error( f'SIS terms API import failed for term id {term_id}.') index += 1 s3_key = f'{get_s3_sis_api_daily_path()}/terms.tsv' app.logger.info( f'Will stash {len(rows)} rows from {success_count} feeds in S3: {s3_key}' ) if not s3.upload_data('\n'.join(rows), s3_key): app.logger.error('Error on S3 upload: aborting job.') return False app.logger.info('Will copy S3 feeds into Redshift...') with redshift.transaction() as transaction: if self.update_redshift(term_ids, transaction): transaction.commit() app.logger.info('Updated Redshift.') else: transaction.rollback() app.logger.error('Failed to update Redshift.') return False with rds.transaction() as transaction: if self.update_rds(rows, term_ids, transaction): transaction.commit() app.logger.info('Updated RDS.') else: transaction.rollback() app.logger.error('Failed to update RDS.') return False return f'SIS terms API import job completed: {success_count} succeeded, {failure_count} failed.'
def run(self, csids=None): if not csids: csids = [row['sid'] for row in get_all_student_ids()] app.logger.info( f'Starting term GPA import job for {len(csids)} students...') rows = [] success_count = 0 no_registrations_count = 0 failure_count = 0 index = 1 for csid in csids: app.logger.info( f'Fetching term GPAs for SID {csid}, ({index} of {len(csids)})' ) feed = sis_student_api.get_term_gpas(csid) if feed: success_count += 1 for term_id, term_data in feed.items(): rows.append('\t'.join([ str(csid), str(term_id), str(term_data.get('gpa') or '0'), str(term_data.get('unitsTakenForGpa') or '0') ])) elif feed == {}: app.logger.info(f'No registrations found for SID {csid}.') no_registrations_count += 1 else: failure_count += 1 app.logger.error(f'Term GPA import failed for SID {csid}.') index += 1 if success_count == 0: app.logger.error('Failed to import term GPAs: aborting job.') return False s3_key = f'{get_s3_sis_api_daily_path()}/term_gpas.tsv' app.logger.info(f'Will stash {success_count} feeds in S3: {s3_key}') if not s3.upload_data('\n'.join(rows), s3_key): app.logger.error('Error on S3 upload: aborting job.') return False app.logger.info('Will copy S3 feeds into Redshift...') if not redshift.execute( f'TRUNCATE {self.destination_schema}_staging.student_term_gpas' ): app.logger.error( 'Error truncating old staging rows: aborting job.') return False if not redshift.copy_tsv_from_s3( f'{self.destination_schema}_staging.student_term_gpas', s3_key): app.logger.error('Error on Redshift copy: aborting job.') return False staging_to_destination_query = resolve_sql_template_string(""" DELETE FROM {redshift_schema_student}.student_term_gpas WHERE sid IN (SELECT sid FROM {redshift_schema_student}_staging.student_term_gpas); INSERT INTO {redshift_schema_student}.student_term_gpas (SELECT * FROM {redshift_schema_student}_staging.student_term_gpas); TRUNCATE TABLE {redshift_schema_student}_staging.student_term_gpas; """) if not redshift.execute(staging_to_destination_query): app.logger.error( 'Error inserting staging entries into destination: aborting job.' ) return False with rds.transaction() as transaction: if self.refresh_rds_indexes(csids, rows, transaction): transaction.commit() app.logger.info('Refreshed RDS indexes.') else: transaction.rollback() app.logger.error('Failed to refresh RDS indexes.') return False return ( f'Term GPA import completed: {success_count} succeeded, ' f'{no_registrations_count} returned no registrations, {failure_count} failed.' )