コード例 #1
0
    def import_appointment_advisors(self):
        sis_notes_schema = app.config['RDS_SCHEMA_SIS_ADVISING_NOTES']
        advisor_schema_redshift = app.config['REDSHIFT_SCHEMA_ADVISOR_INTERNAL']

        advisor_sids_from_sis_appointments = set(
            [r['advisor_sid'] for r in rds.fetch(f'SELECT DISTINCT advisor_sid FROM {sis_notes_schema}.advising_appointments')],
        )
        advisor_sids_from_advisors = set(
            [r['sid'] for r in redshift.fetch(f'SELECT DISTINCT sid FROM {advisor_schema_redshift}.advisor_departments')],
        )
        advisor_sids = list(advisor_sids_from_sis_appointments | advisor_sids_from_advisors)

        advisor_attributes = calnet.client(app).search_csids(advisor_sids)
        if not advisor_attributes:
            raise BackgroundJobError('Failed to fetch note author attributes.')

        unique_advisor_attributes = list({adv['uid']: adv for adv in advisor_attributes}.values())

        with rds.transaction() as transaction:
            insertable_rows = []
            for entry in unique_advisor_attributes:
                first_name, last_name = calnet.split_sortable_name(entry)
                insertable_rows.append(tuple((entry.get('uid'), entry.get('csid'), first_name, last_name)))

            result = transaction.insert_bulk(
                f'INSERT INTO {sis_notes_schema}.advising_appointment_advisors (uid, sid, first_name, last_name) VALUES %s',
                insertable_rows,
            )
            if result:
                transaction.commit()
                app.logger.info('Imported appointment advisor attributes.')
            else:
                transaction.rollback()
                raise BackgroundJobError('Failed to import appointment advisor attributes.')
コード例 #2
0
ファイル: import_calnet_data.py プロジェクト: dkawase/nessie
def _put_instructor_data_to_s3(uids):
    app.logger.info(f'Starting CalNet import job for {len(uids)} instructors...')
    all_attributes = calnet.client(app).search_uids(uids)
    if len(uids) != len(all_attributes):
        ldap_uids = [person['uid'] for person in all_attributes]
        missing = set(uids) - set(ldap_uids)
        app.logger.warning(f'Looked for {len(uids)} instructor UIDs but only found {len(all_attributes)} : missing {missing}')

    serialized_data = ''
    for index, a in enumerate(all_attributes):
        uid = a['uid']
        affiliations = a['affiliations']
        first_name, last_name = calnet.split_sortable_name(a)
        serialized_data += json.dumps({
            'affiliations': ','.join(affiliations) if isinstance(affiliations, list) else affiliations,
            'campus_email': a['campus_email'],
            'dept_code': calnet.get_dept_code(a),
            'email': a['email'],
            'first_name': first_name,
            'last_name': last_name,
            'ldap_uid': uid,
            'csid': a['csid'],
            'title': a['title'],
        }) + '\n'
    s3.upload_data(serialized_data, f'{get_s3_calnet_daily_path()}/instructors/instructors.json')
    app.logger.info(f'Uploaded data for {len(all_attributes)} instructors')
コード例 #3
0
ファイル: import_calnet_data.py プロジェクト: dkawase/nessie
def _put_advisee_data_to_s3(sids):
    app.logger.info(f'Starting CalNet import job for {len(sids)} advisees...')
    all_attributes = calnet.client(app).search_csids(sids)
    if len(sids) != len(all_attributes):
        ldap_sids = [person['csid'] for person in all_attributes]
        missing = set(sids) - set(ldap_sids)
        app.logger.warning(f'Looked for {len(sids)} advisee SIDs but only found {len(all_attributes)} : missing {missing}')

    serialized_data = ''
    for index, a in enumerate(all_attributes):
        sid = a['csid']
        affiliations = a['affiliations']
        first_name, last_name = calnet.split_sortable_name(a)
        # JsonSerDe in Redshift schema creation requires one and only one JSON record per line in text file in S3.
        serialized_data += json.dumps({
            'affiliations': ','.join(affiliations) if isinstance(affiliations, list) else affiliations,
            'campus_email': a['campus_email'],
            'email': a['email'],
            'first_name': first_name,
            'last_name': last_name,
            'ldap_uid': a['uid'],
            'sid': sid,
        }) + '\n'
    s3.upload_data(serialized_data, f'{get_s3_calnet_daily_path()}/advisees/advisees.json')
    app.logger.info(f'Uploaded data for {len(all_attributes)} advisees')
コード例 #4
0
 def _advisor_attributes_by_email(self):
     data_science_schema = app.config['RDS_SCHEMA_DATA_SCIENCE']
     sql = f"""
         SELECT DISTINCT advisor_email FROM {data_science_schema}.advising_notes
         WHERE advisor_email IS NOT NULL
     """
     advisor_emails = set([r['advisor_email'] for r in rds.fetch(sql)])
     return calnet.client(app).search_emails(list(advisor_emails))
コード例 #5
0
    def import_advisor_attributes(self):
        csid_results = redshift.fetch(
            resolve_sql_template_string(
                'SELECT DISTINCT advisor_sid FROM {redshift_schema_advisor_internal}.advisor_students'
            ), )
        csids = [r['advisor_sid'] for r in csid_results]
        all_attributes = calnet.client(app).search_csids(csids)
        if len(csids) != len(all_attributes):
            ldap_csids = [person['csid'] for person in all_attributes]
            missing = set(csids) - set(ldap_csids)
            app.logger.warning(
                f'Looked for {len(csids)} advisor CSIDs but only found {len(all_attributes)} : missing {missing}'
            )

        advisor_rows = []
        total_count = len(all_attributes)
        for index, a in enumerate(all_attributes):
            sid = a['csid']
            app.logger.info(
                f'CalNet import: Fetch attributes of advisor {sid} ({index + 1} of {total_count})'
            )
            first_name, last_name = calnet.split_sortable_name(a)
            data = [
                a['uid'],
                sid,
                first_name,
                last_name,
                a['title'],
                calnet.get_dept_code(a),
                a['email'],
                a['campus_email'],
            ]
            advisor_rows.append(encoded_tsv_row(data))

        s3_key = f'{get_s3_calnet_daily_path()}/advisors/advisors.tsv'
        app.logger.info(
            f'Will stash {len(advisor_rows)} feeds in S3: {s3_key}')
        if not s3.upload_tsv_rows(advisor_rows, s3_key):
            raise BackgroundJobError('Error on S3 upload: aborting job.')

        app.logger.info('Will copy S3 feeds into Redshift...')
        query = resolve_sql_template_string(
            """
            TRUNCATE {redshift_schema_advisor_internal}.advisor_attributes;
            COPY {redshift_schema_advisor_internal}.advisor_attributes
                FROM '{loch_s3_calnet_data_path}/advisors/advisors.tsv'
                IAM_ROLE '{redshift_iam_role}'
                DELIMITER '\\t';
            """, )
        if not redshift.execute(query):
            app.logger.error('Error on Redshift copy: aborting job.')
            return False
コード例 #6
0
    def _advisor_attributes_by_uid(self):
        asc_schema = app.config['RDS_SCHEMA_ASC']
        e_i_schema = app.config['RDS_SCHEMA_E_I']

        advisor_uids_from_asc_notes = set([
            r['advisor_uid'] for r in rds.fetch(
                f'SELECT DISTINCT advisor_uid FROM {asc_schema}.advising_notes'
            )
        ], )
        advisor_uids_from_e_i_notes = set([
            r['advisor_uid'] for r in rds.fetch(
                f'SELECT DISTINCT advisor_uid FROM {e_i_schema}.advising_notes'
            )
        ], )
        advisor_uids = list(advisor_uids_from_asc_notes
                            | advisor_uids_from_e_i_notes)
        return calnet.client(app).search_uids(advisor_uids)
コード例 #7
0
    def _advisor_attributes_by_sid(self):
        sis_notes_schema = app.config['RDS_SCHEMA_SIS_ADVISING_NOTES']
        advisor_schema_redshift = app.config[
            'REDSHIFT_SCHEMA_ADVISOR_INTERNAL']

        advisor_sids_from_sis_notes = set([
            r['advisor_sid'] for r in rds.fetch(
                f'SELECT DISTINCT advisor_sid FROM {sis_notes_schema}.advising_notes'
            )
        ], )
        advisor_sids_from_advisors = set([
            r['sid'] for r in redshift.fetch(
                f'SELECT DISTINCT sid FROM {advisor_schema_redshift}.advisor_departments'
            )
        ], )
        advisor_sids = list(advisor_sids_from_sis_notes
                            | advisor_sids_from_advisors)
        return calnet.client(app).search_csids(advisor_sids)
コード例 #8
0
ファイル: import_calnet_data.py プロジェクト: lyttam/nessie
def _put_calnet_data_to_s3(sids):
    all_attributes = calnet.client(app).search_csids(sids)
    if len(sids) != len(all_attributes):
        ldap_sids = [l['csid'] for l in all_attributes]
        missing = set(sids) - set(ldap_sids)
        app.logger.warning(
            f'Looked for {len(sids)} SIDs but only found {len(all_attributes)} : missing {missing}'
        )

    serialized_data = ''
    total_count = len(all_attributes)
    for index, a in enumerate(all_attributes):
        sid = a['csid']
        app.logger.info(
            f'CalNet import: Fetch attributes of student {sid} ({index + 1} of {total_count})'
        )
        affiliations = a['affiliations']
        first_name, last_name = _split_sortable_name(a)
        # JsonSerDe in Redshift schema creation requires one and only one JSON record per line in text file in S3.
        serialized_data += json.dumps({
            'affiliations':
            ','.join(affiliations)
            if isinstance(affiliations, list) else affiliations,
            'campus_email':
            a['campus_email'],
            'email':
            a['email'],
            'first_name':
            first_name,
            'last_name':
            last_name,
            'ldap_uid':
            a['uid'],
            'sid':
            sid,
        }) + '\n'
    s3.upload_data(serialized_data,
                   f'{get_s3_calnet_daily_path()}/persons.json')