Beispiel #1
0
def _put_instructor_data_to_s3(uids):
    app.logger.info(f'Starting CalNet import job for {len(uids)} instructors...')
    all_attributes = calnet.client(app).search_uids(uids)
    if len(uids) != len(all_attributes):
        ldap_uids = [person['uid'] for person in all_attributes]
        missing = set(uids) - set(ldap_uids)
        app.logger.warning(f'Looked for {len(uids)} instructor UIDs but only found {len(all_attributes)} : missing {missing}')

    serialized_data = ''
    for index, a in enumerate(all_attributes):
        uid = a['uid']
        affiliations = a['affiliations']
        first_name, last_name = calnet.split_sortable_name(a)
        serialized_data += json.dumps({
            'affiliations': ','.join(affiliations) if isinstance(affiliations, list) else affiliations,
            'campus_email': a['campus_email'],
            'dept_code': calnet.get_dept_code(a),
            'email': a['email'],
            'first_name': first_name,
            'last_name': last_name,
            'ldap_uid': uid,
            'csid': a['csid'],
            'title': a['title'],
        }) + '\n'
    s3.upload_data(serialized_data, f'{get_s3_calnet_daily_path()}/instructors/instructors.json')
    app.logger.info(f'Uploaded data for {len(all_attributes)} instructors')
Beispiel #2
0
    def import_advisor_attributes(self):
        csid_results = redshift.fetch(
            resolve_sql_template_string(
                'SELECT DISTINCT advisor_sid FROM {redshift_schema_advisor_internal}.advisor_students'
            ), )
        csids = [r['advisor_sid'] for r in csid_results]
        all_attributes = calnet.client(app).search_csids(csids)
        if len(csids) != len(all_attributes):
            ldap_csids = [person['csid'] for person in all_attributes]
            missing = set(csids) - set(ldap_csids)
            app.logger.warning(
                f'Looked for {len(csids)} advisor CSIDs but only found {len(all_attributes)} : missing {missing}'
            )

        advisor_rows = []
        total_count = len(all_attributes)
        for index, a in enumerate(all_attributes):
            sid = a['csid']
            app.logger.info(
                f'CalNet import: Fetch attributes of advisor {sid} ({index + 1} of {total_count})'
            )
            first_name, last_name = calnet.split_sortable_name(a)
            data = [
                a['uid'],
                sid,
                first_name,
                last_name,
                a['title'],
                calnet.get_dept_code(a),
                a['email'],
                a['campus_email'],
            ]
            advisor_rows.append(encoded_tsv_row(data))

        s3_key = f'{get_s3_calnet_daily_path()}/advisors/advisors.tsv'
        app.logger.info(
            f'Will stash {len(advisor_rows)} feeds in S3: {s3_key}')
        if not s3.upload_tsv_rows(advisor_rows, s3_key):
            raise BackgroundJobError('Error on S3 upload: aborting job.')

        app.logger.info('Will copy S3 feeds into Redshift...')
        query = resolve_sql_template_string(
            """
            TRUNCATE {redshift_schema_advisor_internal}.advisor_attributes;
            COPY {redshift_schema_advisor_internal}.advisor_attributes
                FROM '{loch_s3_calnet_data_path}/advisors/advisors.tsv'
                IAM_ROLE '{redshift_iam_role}'
                DELIMITER '\\t';
            """, )
        if not redshift.execute(query):
            app.logger.error('Error on Redshift copy: aborting job.')
            return False