def _put_instructor_data_to_s3(uids): app.logger.info(f'Starting CalNet import job for {len(uids)} instructors...') all_attributes = calnet.client(app).search_uids(uids) if len(uids) != len(all_attributes): ldap_uids = [person['uid'] for person in all_attributes] missing = set(uids) - set(ldap_uids) app.logger.warning(f'Looked for {len(uids)} instructor UIDs but only found {len(all_attributes)} : missing {missing}') serialized_data = '' for index, a in enumerate(all_attributes): uid = a['uid'] affiliations = a['affiliations'] first_name, last_name = calnet.split_sortable_name(a) serialized_data += json.dumps({ 'affiliations': ','.join(affiliations) if isinstance(affiliations, list) else affiliations, 'campus_email': a['campus_email'], 'dept_code': calnet.get_dept_code(a), 'email': a['email'], 'first_name': first_name, 'last_name': last_name, 'ldap_uid': uid, 'csid': a['csid'], 'title': a['title'], }) + '\n' s3.upload_data(serialized_data, f'{get_s3_calnet_daily_path()}/instructors/instructors.json') app.logger.info(f'Uploaded data for {len(all_attributes)} instructors')
def import_advisor_attributes(self): csid_results = redshift.fetch( resolve_sql_template_string( 'SELECT DISTINCT advisor_sid FROM {redshift_schema_advisor_internal}.advisor_students' ), ) csids = [r['advisor_sid'] for r in csid_results] all_attributes = calnet.client(app).search_csids(csids) if len(csids) != len(all_attributes): ldap_csids = [person['csid'] for person in all_attributes] missing = set(csids) - set(ldap_csids) app.logger.warning( f'Looked for {len(csids)} advisor CSIDs but only found {len(all_attributes)} : missing {missing}' ) advisor_rows = [] total_count = len(all_attributes) for index, a in enumerate(all_attributes): sid = a['csid'] app.logger.info( f'CalNet import: Fetch attributes of advisor {sid} ({index + 1} of {total_count})' ) first_name, last_name = calnet.split_sortable_name(a) data = [ a['uid'], sid, first_name, last_name, a['title'], calnet.get_dept_code(a), a['email'], a['campus_email'], ] advisor_rows.append(encoded_tsv_row(data)) s3_key = f'{get_s3_calnet_daily_path()}/advisors/advisors.tsv' app.logger.info( f'Will stash {len(advisor_rows)} feeds in S3: {s3_key}') if not s3.upload_tsv_rows(advisor_rows, s3_key): raise BackgroundJobError('Error on S3 upload: aborting job.') app.logger.info('Will copy S3 feeds into Redshift...') query = resolve_sql_template_string( """ TRUNCATE {redshift_schema_advisor_internal}.advisor_attributes; COPY {redshift_schema_advisor_internal}.advisor_attributes FROM '{loch_s3_calnet_data_path}/advisors/advisors.tsv' IAM_ROLE '{redshift_iam_role}' DELIMITER '\\t'; """, ) if not redshift.execute(query): app.logger.error('Error on Redshift copy: aborting job.') return False