예제 #1
0
 def test_auto_terms(self, app, current_term_index):
     all_term_ids = set(berkeley.reverse_term_ids(include_future_terms=True, include_legacy_terms=True))
     canvas_integrated_term_ids = set(berkeley.reverse_term_ids())
     assert canvas_integrated_term_ids < all_term_ids
     assert berkeley.current_term_id() == '2182'
     assert berkeley.future_term_id() == '2188'
     assert berkeley.s3_canvas_data_path_current_term() == 'canvas-data/term/spring-2018'
    def run(self):
        app.logger.info('Starting intermediate table generation job...')

        sis_source_schema = app.config['REDSHIFT_SCHEMA_EDL'] if app.config[
            'FEATURE_FLAG_EDL_SIS_VIEWS'] else app.config['REDSHIFT_SCHEMA_SIS']

        resolved_ddl_redshift = resolve_sql_template(
            'create_intermediate_schema.template.sql',
            current_term_id=current_term_id(),
            redshift_schema_sis=sis_source_schema,
        )
        if redshift.execute_ddl_script(resolved_ddl_redshift):
            app.logger.info('Redshift tables generated.')
        else:
            raise BackgroundJobError('Intermediate table creation job failed.')

        resolved_ddl_rds = resolve_sql_template(
            'update_rds_indexes_sis.template.sql')
        if rds.execute(resolved_ddl_rds):
            app.logger.info('RDS indexes updated.')
        else:
            raise BackgroundJobError(
                'Failed to update RDS indexes for intermediate schema.')

        return 'Intermediate table generation job completed.'
예제 #3
0
    def test_term_id_lists(self, app):
        all_term_ids = set(
            berkeley.reverse_term_ids(include_future_terms=True,
                                      include_legacy_terms=True))
        canvas_integrated_term_ids = set(berkeley.reverse_term_ids())
        future_term_ids = set(berkeley.future_term_ids())
        legacy_term_ids = set(berkeley.legacy_term_ids())
        assert canvas_integrated_term_ids < all_term_ids
        assert berkeley.sis_term_id_for_name(
            app.config['EARLIEST_LEGACY_TERM']) in all_term_ids
        assert berkeley.sis_term_id_for_name(
            app.config['EARLIEST_TERM']) in all_term_ids
        assert berkeley.sis_term_id_for_name(
            app.config['CURRENT_TERM']) in all_term_ids
        assert berkeley.sis_term_id_for_name(
            app.config['FUTURE_TERM']) in all_term_ids

        assert berkeley.current_term_id() in canvas_integrated_term_ids
        assert berkeley.earliest_term_id() in canvas_integrated_term_ids

        assert future_term_ids.isdisjoint(canvas_integrated_term_ids)
        assert future_term_ids < all_term_ids
        assert berkeley.future_term_id() in future_term_ids

        assert legacy_term_ids.isdisjoint(canvas_integrated_term_ids)
        assert legacy_term_ids < all_term_ids
        assert berkeley.earliest_legacy_term_id() in berkeley.legacy_term_ids()
예제 #4
0
def async_get_feeds(app_obj, up_to_100_sids, as_of):
    with app_obj.app_context():
        feeds = get_v2_by_sids_list(up_to_100_sids, term_id=current_term_id(), with_registration=True, as_of=as_of)
        result = {
            'sids': up_to_100_sids,
            'feeds': feeds,
        }
    return result
예제 #5
0
    def run(self, term_id=None):
        job_id = self.generate_job_id()
        if not term_id:
            term_id = current_term_id()
        if app.config['TEST_CANVAS_COURSE_IDS']:
            canvas_course_ids = app.config['TEST_CANVAS_COURSE_IDS']
        else:
            canvas_course_ids = [
                row['canvas_course_id']
                for row in get_enrolled_canvas_sites_for_term(term_id)
            ]
        app.logger.info(
            f'Starting Canvas grade change log import job {job_id} for term {term_id}, {len(canvas_course_ids)} course sites...'
        )

        success_count = 0
        failure_count = 0
        index = 1
        for course_id in canvas_course_ids:
            path = f'/api/v1/audit/grade_change/courses/{course_id}'
            s3_key = f'{get_s3_canvas_api_path()}/grade_change_log/grade_change_log_{course_id}'
            create_canvas_api_import_status(
                job_id=job_id,
                term_id=term_id,
                course_id=course_id,
                table_name='grade_change_log',
            )
            app.logger.info(
                f'Fetching Canvas grade change log for course id {course_id}, term {term_id} ({index} of {len(canvas_course_ids)})',
            )
            response = dispatch(
                'import_canvas_api_data',
                data={
                    'course_id': course_id,
                    'path': path,
                    's3_key': s3_key,
                    'job_id': job_id,
                },
            )
            if not response:
                app.logger.error(
                    f'Canvas grade change log import failed for course id {course_id}.'
                )
                update_canvas_api_import_status(
                    job_id=job_id,
                    course_id=course_id,
                    status='error',
                )
                failure_count += 1
            else:
                success_count += 1
            index += 1

        return (
            f'Canvas grade change log import completed for term {term_id}: {success_count} succeeded, '
            f'{failure_count} failed.')
예제 #6
0
 def test_inner_get_students(self, app):
     """Returns fixture data."""
     oski_response = student_api._get_v2_by_sids_list(
         TEST_SID_LIST,
         term_id=current_term_id(),
         as_of=None,
         with_registration=True,
         with_contacts=True,
     )
     assert oski_response
     assert oski_response.status_code == 200
     students = oski_response.json()['apiResponse']['response']['students']
     assert len(students) == 3
예제 #7
0
def app_config():
    def _to_api_key(key):
        chunks = key.split('_')
        return f"{chunks[0].lower()}{''.join(chunk.title() for chunk in chunks[1:])}"

    return tolerant_jsonify(
        {
            **dict((_to_api_key(key), app.config[key] if key in app.config else None) for key in PUBLIC_CONFIGS),
            **{
                'currentEnrollmentTerm': current_term_name(),
                'currentEnrollmentTermId': int(current_term_id()),
                'futureTermId': int(future_term_id()),
            },
        }, )
예제 #8
0
def app_config():
    current_term_name = berkeley.current_term_name()
    current_term_id = berkeley.current_term_id()
    future_term_id = berkeley.future_term_id()
    return tolerant_jsonify({
        'currentEnrollmentTerm':
        current_term_name,
        'currentEnrollmentTermId':
        int(current_term_id),
        'futureTermId':
        int(future_term_id),
        'ebEnvironment':
        app.config['EB_ENVIRONMENT']
        if 'EB_ENVIRONMENT' in app.config else None,
        'nessieEnv':
        app.config['NESSIE_ENV'],
    })
예제 #9
0
    def run(self, term_id=None):
        if not term_id:
            term_id = current_term_id()
        if term_id == 'all':
            app.logger.info('Starting enrollments index job for all terms...')
        else:
            app.logger.info(
                f'Starting enrollments index job for term {term_id}...')

        with rds.transaction() as transaction:
            if self.refresh_enrollments_index(term_id, transaction):
                transaction.commit()
                app.logger.info('Refreshed RDS indexes.')
            else:
                transaction.rollback()
                raise BackgroundJobError('Failed to refresh RDS indexes.')

        return f'Enrollments index job completed for term {term_id}.'
    def run(self):
        app.logger.info('Starting intermediate table generation job...')

        if app.config['FEATURE_FLAG_EDL_SIS_VIEWS']:
            sis_source_schema = app.config['REDSHIFT_SCHEMA_EDL']
            where_clause_exclude_withdrawn = "AND en.enrollment_status_reason <> 'WDRW'"
        else:
            sis_source_schema = app.config['REDSHIFT_SCHEMA_SIS']
            where_clause_exclude_withdrawn = f"""/* Enrollment with no primary section is likely a withdrawal. */
                AND EXISTS (
                    SELECT
                        en0.term_id,
                        en0.section_id,
                        en0.ldap_uid
                    FROM {app.config['REDSHIFT_SCHEMA_SIS']}.enrollments en0
                    JOIN {app.config['REDSHIFT_SCHEMA_INTERMEDIATE']}.course_sections crs0
                        ON crs0.sis_section_id = en0.section_id
                        AND crs0.sis_term_id = en0.term_id
                    WHERE en0.term_id = en.term_id
                    AND en0.ldap_uid = en.ldap_uid
                    AND crs0.sis_course_name = crs.sis_course_name
                    AND crs0.sis_primary = TRUE
                    AND en0.enrollment_status != 'D'
                    AND en0.grade != 'W'
                )"""

        resolved_ddl_redshift = resolve_sql_template(
            'create_intermediate_schema.template.sql',
            current_term_id=current_term_id(),
            redshift_schema_sis=sis_source_schema,
            where_clause_exclude_withdrawn=where_clause_exclude_withdrawn,
        )
        if redshift.execute_ddl_script(resolved_ddl_redshift):
            app.logger.info('Redshift tables generated.')
        else:
            raise BackgroundJobError('Intermediate table creation job failed.')

        resolved_ddl_rds = resolve_sql_template('update_rds_indexes_sis.template.sql')
        if rds.execute(resolved_ddl_rds):
            app.logger.info('RDS indexes updated.')
        else:
            raise BackgroundJobError('Failed to update RDS indexes for intermediate schema.')

        return 'Intermediate table generation job completed.'
예제 #11
0
    def _find_last_registration(rows):
        last_registration = None

        for row in rows:
            # We prefer registration data from: 1) the current term; 2) failing that, the nearest past term; 3) failing that,
            # the nearest future term. Which is to say, skip future terms unless that's all we have.
            if (row['term_id'] > current_term_id()) and last_registration:
                continue

            # At present, terms spent as an Extension student are not included in Term GPAs (but see BOAC-2266).
            # However, if there are no other types of registration, the Extension term is used for academicCareer.
            if row['academic_career_cd'] == 'UCBX':
                if last_registration and last_registration[
                        'academic_career_cd'] != 'UCBX':
                    continue

            last_registration = row

        return last_registration
예제 #12
0
 def generate_or_fetch_merged_profile(self, term_id, sid, calnet_profile):
     merged_profile = None
     if term_id is None or term_id == berkeley.current_term_id():
         merged_profile = self.generate_merged_profile(sid, calnet_profile)
     else:
         profile_result = redshift.fetch(
             'SELECT profile FROM {schema}.student_profiles WHERE sid = %s',
             params=(sid, ),
             schema=self.destination_schema_identifier,
         )
         merged_profile = profile_result and profile_result[
             0] and json.loads(profile_result[0].get('profile', '{}'))
         if not merged_profile:
             merged_profile = self.generate_merged_profile(
                 sid, calnet_profile)
     if not merged_profile:
         app.logger.error(
             f'Failed to generate merged profile for sid {sid}.')
     return merged_profile
예제 #13
0
    def run(self):
        app.logger.info('Starting intermediate table generation job...')

        resolved_ddl_redshift = resolve_sql_template(
            'create_intermediate_schema.template.sql',
            current_term_id=current_term_id(),
        )
        if redshift.execute_ddl_script(resolved_ddl_redshift):
            app.logger.info('Redshift tables generated.')
        else:
            raise BackgroundJobError('Intermediate table creation job failed.')

        resolved_ddl_rds = resolve_sql_template(
            'update_rds_indexes_sis.template.sql')
        if rds.execute(resolved_ddl_rds):
            app.logger.info('RDS indexes updated.')
        else:
            raise BackgroundJobError(
                'Failed to update RDS indexes for intermediate schema.')

        return 'Intermediate table generation job completed.'
예제 #14
0
    def generate_feeds(self):
        # Translation between canvas_user_id and UID/SID is needed to merge Canvas analytics data and SIS enrollment-based data.
        advisees_by_canvas_id = {}
        advisees_by_sid = {}
        self.successes = []
        self.failures = []
        profile_tables = self.generate_student_profile_tables(
            advisees_by_canvas_id, advisees_by_sid)
        if not profile_tables:
            raise BackgroundJobError(
                'Failed to generate student profile tables.')

        feed_path = app.config['LOCH_S3_BOAC_ANALYTICS_DATA_PATH'] + '/feeds/'
        s3.upload_json(advisees_by_canvas_id,
                       feed_path + 'advisees_by_canvas_id.json')

        upload_student_term_maps(advisees_by_sid)

        # Avoid processing Canvas analytics data for future terms and pre-CS terms.
        for term_id in (future_term_ids() + legacy_term_ids()):
            enrollment_term_map = s3.get_object_json(
                feed_path + f'enrollment_term_map_{term_id}.json')
            if enrollment_term_map:
                GenerateMergedEnrollmentTerm().refresh_student_enrollment_term(
                    term_id, enrollment_term_map)

        canvas_integrated_term_ids = reverse_term_ids()
        app.logger.info(
            f'Will queue analytics generation for {len(canvas_integrated_term_ids)} terms on worker nodes.'
        )
        result = queue_merged_enrollment_term_jobs(self.job_id,
                                                   canvas_integrated_term_ids)
        if not result:
            raise BackgroundJobError('Failed to queue enrollment term jobs.')

        refresh_all_from_staging(profile_tables)
        self.update_redshift_academic_standing()
        self.update_rds_profile_indexes()

        app.logger.info(
            'Profile generation complete; waiting for enrollment term generation to finish.'
        )

        while True:
            sleep(1)
            enrollment_results = get_merged_enrollment_term_job_status(
                self.job_id)
            if not enrollment_results:
                raise BackgroundJobError('Failed to refresh RDS indexes.')
            any_pending_job = next(
                (row for row in enrollment_results
                 if row['status'] == 'created' or row['status'] == 'started'),
                None)
            if not any_pending_job:
                break

        app.logger.info('Exporting analytics data for archival purposes.')
        unload_enrollment_terms([current_term_id(), future_term_id()])

        app.logger.info('Refreshing enrollment terms in RDS.')
        with rds.transaction() as transaction:
            if self.refresh_rds_enrollment_terms(None, transaction):
                transaction.commit()
                app.logger.info('Refreshed RDS enrollment terms.')
            else:
                transaction.rollback()
                raise BackgroundJobError(
                    'Failed to refresh RDS enrollment terms.')

        status_string = f'Generated merged profiles ({len(self.successes)} successes, {len(self.failures)} failures).'
        errored = False
        for row in enrollment_results:
            status_string += f" {row['details']}"
            if row['status'] == 'error':
                errored = True

        truncate_staging_table('student_enrollment_terms')
        if errored:
            raise BackgroundJobError(status_string)
        else:
            return status_string
예제 #15
0
    def run(self, term_id=None):
        if not term_id:
            term_id = current_term_id()
        canvas_course_ids = [
            row['canvas_course_id']
            for row in get_enrolled_canvas_sites_for_term(term_id)
        ]

        app.logger.info(
            f'Starting Canvas enrollments API import job for term {term_id}, {len(canvas_course_ids)} course sites...'
        )

        rows = []
        success_count = 0
        failure_count = 0
        index = 1
        for course_id in canvas_course_ids:
            app.logger.info(
                f'Fetching Canvas enrollments API for course id {course_id}, term {term_id} ({index} of {len(canvas_course_ids)})'
            )
            feed = canvas_api.get_course_enrollments(course_id)
            if feed:
                success_count += 1
                for enrollment in feed:
                    user_id = str(enrollment.get('user_id'))
                    last_activity_at = str(
                        enrollment.get('last_activity_at') or '')
                    rows.append('\t'.join([
                        str(course_id), user_id,
                        str(term_id), last_activity_at,
                        json.dumps(enrollment)
                    ]))
            else:
                failure_count += 1
                app.logger.error(
                    f'Canvas enrollments API import failed for course id {course_id}.'
                )
            index += 1

        s3_key = f'{get_s3_sis_api_daily_path()}/canvas_api_enrollments_{term_id}.tsv'
        app.logger.info(f'Will stash {success_count} feeds in S3: {s3_key}')
        if not s3.upload_data('\n'.join(rows), s3_key):
            app.logger.error('Error on S3 upload: aborting job.')
            return False

        app.logger.info('Will copy S3 feeds into Redshift...')
        query = resolve_sql_template_string(
            """
            DELETE FROM {redshift_schema_student}_staging.canvas_api_enrollments WHERE term_id = '{term_id}';
            COPY {redshift_schema_student}_staging.canvas_api_enrollments
                FROM '{loch_s3_sis_api_data_path}/canvas_api_enrollments_{term_id}.tsv'
                IAM_ROLE '{redshift_iam_role}'
                DELIMITER '\\t'
                TIMEFORMAT 'YYYY-MM-DDTHH:MI:SSZ';
            DELETE FROM {redshift_schema_student}.canvas_api_enrollments
                WHERE term_id = '{term_id}'
                AND course_id IN
                (SELECT course_id FROM {redshift_schema_student}_staging.canvas_api_enrollments WHERE term_id = '{term_id}');
            INSERT INTO {redshift_schema_student}.canvas_api_enrollments
                (SELECT * FROM {redshift_schema_student}_staging.canvas_api_enrollments WHERE term_id = '{term_id}');
            DELETE FROM {redshift_schema_student}_staging.canvas_api_enrollments
                WHERE term_id = '{term_id}';
            """,
            term_id=term_id,
        )
        if not redshift.execute(query):
            app.logger.error('Error on Redshift copy: aborting job.')
            return False

        return (
            f'Canvas enrollments API import completed for term {term_id}: {success_count} succeeded, '
            f'{failure_count} failed.')
예제 #16
0
    def run(self, term_id=None):
        if not term_id:
            term_id = current_term_id()
        canvas_course_ids = [
            row['canvas_course_id']
            for row in get_enrolled_canvas_sites_for_term(term_id)
        ]

        app.logger.info(
            f'Starting Canvas enrollments API import job for term {term_id}, {len(canvas_course_ids)} course sites...'
        )

        rows = []
        success_count = 0
        failure_count = 0
        index = 1
        for course_id in canvas_course_ids:
            app.logger.info(
                f'Fetching Canvas enrollments API for course id {course_id}, term {term_id} ({index} of {len(canvas_course_ids)})'
            )
            feed = canvas_api.get_course_enrollments(course_id)
            if feed:
                success_count += 1
                for enrollment in feed:
                    user_id = enrollment.get('user_id')
                    last_activity_at = enrollment.get('last_activity_at') or ''
                    rows.append(
                        encoded_tsv_row([
                            course_id, user_id, term_id, last_activity_at,
                            json.dumps(enrollment)
                        ]))
            else:
                failure_count += 1
                app.logger.error(
                    f'Canvas enrollments API import failed for course id {course_id}.'
                )
            index += 1

        s3_key = f'{get_s3_sis_api_daily_path()}/canvas_api_enrollments/canvas_api_enrollments_{term_id}.tsv'
        app.logger.info(f'Will stash {success_count} feeds in S3: {s3_key}')
        if not s3.upload_tsv_rows(rows, s3_key):
            raise BackgroundJobError('Error on S3 upload: aborting job.')

        app.logger.info('Will copy S3 feeds into Redshift...')
        query = resolve_sql_template_string(
            """
            CREATE EXTERNAL SCHEMA {redshift_schema_student}_staging_ext_tmp FROM data catalog
                DATABASE '{redshift_schema_student}_staging_ext_tmp'
                IAM_ROLE '{redshift_iam_role}'
                CREATE EXTERNAL DATABASE IF NOT EXISTS;
            CREATE EXTERNAL TABLE {redshift_schema_student}_staging_ext_tmp.canvas_api_enrollments (
                course_id VARCHAR,
                user_id VARCHAR,
                term_id VARCHAR,
                last_activity_at TIMESTAMP,
                feed VARCHAR
            )
            ROW FORMAT DELIMITED
            FIELDS TERMINATED BY '\\t'
            STORED AS TEXTFILE
            LOCATION '{loch_s3_sis_api_data_path}/canvas_api_enrollments';

            DELETE FROM {redshift_schema_student}_staging.canvas_api_enrollments WHERE term_id = '{term_id}';
            INSERT INTO {redshift_schema_student}_staging.canvas_api_enrollments
                (SELECT * FROM {redshift_schema_student}_staging_ext_tmp.canvas_api_enrollments);
            DROP TABLE {redshift_schema_student}_staging_ext_tmp.canvas_api_enrollments;
            DROP SCHEMA {redshift_schema_student}_staging_ext_tmp;

            DELETE FROM {redshift_schema_student}.canvas_api_enrollments
                WHERE term_id = '{term_id}'
                AND course_id IN
                (SELECT course_id FROM {redshift_schema_student}_staging.canvas_api_enrollments WHERE term_id = '{term_id}');
            INSERT INTO {redshift_schema_student}.canvas_api_enrollments
                (SELECT * FROM {redshift_schema_student}_staging.canvas_api_enrollments WHERE term_id = '{term_id}');
            DELETE FROM {redshift_schema_student}_staging.canvas_api_enrollments
                WHERE term_id = '{term_id}';
            """,
            term_id=term_id,
        )
        if not redshift.execute_ddl_script(query):
            raise BackgroundJobError('Error on Redshift copy: aborting job.')

        return (
            f'Canvas enrollments API import completed for term {term_id}: {success_count} succeeded, '
            f'{failure_count} failed.')
예제 #17
0
    def generate_feeds(self, term_id=None, sids=None):
        """Loop through all records stored in the Calnet external schema and write merged student data to the internal student schema."""
        calnet_profiles = self.fetch_calnet_profiles(sids)

        # Jobs targeted toward a specific sid set (such as backfills) may return no CalNet profiles. Warn, don't error.
        if not calnet_profiles:
            app.logger.warn(
                f'No CalNet profiles returned, aborting job. (sids={sids})')
            return False

        # Jobs for non-current terms generate enrollment feeds only.
        if term_id and term_id != berkeley.current_term_id():
            tables = ['student_enrollment_terms']
        else:
            tables = [
                'student_profiles', 'student_academic_status',
                'student_majors', 'student_enrollment_terms', 'student_holds'
            ]

        # In-memory storage for generated feeds prior to TSV output.
        self.rows = {
            'student_profiles': [],
            'student_academic_status': [],
            'student_majors': [],
            'student_enrollment_terms': [],
            'student_holds': [],
        }

        # Track the results of course-level queries to avoid requerying.
        self.canvas_site_map = {}

        # Remove any old data from staging tables.
        for table in tables:
            redshift.execute(
                'TRUNCATE {schema}.{table}',
                schema=self.staging_schema_identifier,
                table=psycopg2.sql.Identifier(table),
            )

        app.logger.info(
            f'Will generate feeds for {len(calnet_profiles)} students (term_id={term_id}).'
        )
        successes = []
        failures = []
        index = 1
        for sid, profile_group in groupby(calnet_profiles,
                                          operator.itemgetter('sid')):
            app.logger.info(
                f'Generating feeds for sid {sid} ({index} of {len(calnet_profiles)})'
            )
            index += 1
            merged_profile = self.generate_or_fetch_merged_profile(
                term_id, sid,
                list(profile_group)[0])
            if merged_profile:
                self.generate_merged_enrollment_terms(merged_profile, term_id)
                self.parse_holds(sid)
                successes.append(sid)
            else:
                failures.append(sid)

        for table in tables:
            if not self.rows[table]:
                continue
            self.upload_to_staging(table)
            if not self.verify_table(table):
                return False

        with redshift.transaction() as transaction:
            for table in tables:
                if not self.refresh_from_staging(table, term_id, sids,
                                                 transaction):
                    app.logger.error(
                        f'Failed to refresh {self.destination_schema}.{table} from staging.'
                    )
                    return False
            if not transaction.commit():
                app.logger.error(
                    f'Final transaction commit failed for {self.destination_schema}.'
                )
                return False

        with rds.transaction() as transaction:
            if self.refresh_rds_indexes(sids, transaction):
                transaction.commit()
                app.logger.info('Refreshed RDS indexes.')
            else:
                transaction.rollback()
                app.logger.error('Failed to refresh RDS indexes.')
                return False

        update_merged_feed_status(term_id, successes, failures)
        app.logger.info(f'Updated merged feed status.')

        return f'Merged profile generation complete: {len(successes)} successes, {len(failures)} failures.'
예제 #18
0
파일: scheduling.py 프로젝트: lyttam/nessie
def schedule_all_jobs(force=False):
    from nessie.jobs.create_calnet_schema import CreateCalNetSchema
    from nessie.jobs.create_canvas_schema import CreateCanvasSchema
    from nessie.jobs.create_coe_schema import CreateCoeSchema
    from nessie.jobs.create_sis_schema import CreateSisSchema
    from nessie.jobs.generate_asc_profiles import GenerateAscProfiles
    from nessie.jobs.generate_boac_analytics import GenerateBoacAnalytics
    from nessie.jobs.generate_intermediate_tables import GenerateIntermediateTables
    from nessie.jobs.generate_merged_student_feeds import GenerateMergedStudentFeeds
    from nessie.jobs.import_asc_athletes import ImportAscAthletes
    from nessie.jobs.import_calnet_data import ImportCalNetData
    from nessie.jobs.import_canvas_enrollments_api import ImportCanvasEnrollmentsApi
    from nessie.jobs.import_degree_progress import ImportDegreeProgress
    from nessie.jobs.import_lrs_incrementals import ImportLrsIncrementals
    from nessie.jobs.import_sis_enrollments_api import ImportSisEnrollmentsApi
    from nessie.jobs.import_sis_student_api import ImportSisStudentApi
    from nessie.jobs.refresh_boac_cache import RefreshBoacCache
    from nessie.jobs.resync_canvas_snapshots import ResyncCanvasSnapshots
    from nessie.jobs.sync_canvas_snapshots import SyncCanvasSnapshots

    schedule_job(sched, 'JOB_SYNC_CANVAS_SNAPSHOTS', SyncCanvasSnapshots,
                 force)
    schedule_job(sched, 'JOB_RESYNC_CANVAS_SNAPSHOTS', ResyncCanvasSnapshots,
                 force)
    schedule_chained_job(
        sched,
        'JOB_IMPORT_STUDENT_POPULATION',
        [
            CreateCoeSchema,
            ImportAscAthletes,
            GenerateAscProfiles,
            ImportCalNetData,
            CreateCalNetSchema,
        ],
        force,
    )
    schedule_job(sched, 'JOB_IMPORT_DEGREE_PROGRESS', ImportDegreeProgress,
                 force)
    schedule_job(sched,
                 'JOB_IMPORT_LRS_INCREMENTALS',
                 ImportLrsIncrementals,
                 force,
                 truncate_lrs=True)
    schedule_job(sched, 'JOB_IMPORT_SIS_ENROLLMENTS', ImportSisEnrollmentsApi,
                 force)
    schedule_job(sched, 'JOB_IMPORT_SIS_STUDENTS', ImportSisStudentApi, force)
    schedule_job(sched, 'JOB_IMPORT_CANVAS_ENROLLMENTS',
                 ImportCanvasEnrollmentsApi, force)
    schedule_chained_job(
        sched,
        'JOB_GENERATE_ALL_TABLES',
        [
            CreateCanvasSchema,
            CreateSisSchema,
            GenerateIntermediateTables,
            GenerateBoacAnalytics,
        ],
        force,
    )
    schedule_job(
        sched,
        'JOB_GENERATE_CURRENT_TERM_FEEDS',
        GenerateMergedStudentFeeds,
        force,
        term_id=current_term_id(),
        backfill_new_students=True,
    )
    schedule_job(sched, 'JOB_REFRESH_BOAC_CACHE', RefreshBoacCache, force)
예제 #19
0
    def run(self, csids=None, term_id=None):
        if not csids:
            csids = [row['sid'] for row in get_all_student_ids()]
        if not term_id:
            term_id = current_term_id()
        app.logger.info(
            f'Starting SIS enrollments API import job for term {term_id}, {len(csids)} students...'
        )

        rows = []
        success_count = 0
        no_enrollments_count = 0
        failure_count = 0
        index = 1
        for csid in csids:
            app.logger.info(
                f'Fetching SIS enrollments API for SID {csid}, term {term_id} ({index} of {len(csids)})'
            )
            feed = sis_enrollments_api.get_drops_and_midterms(csid, term_id)
            if feed:
                success_count += 1
                rows.append('\t'.join(
                    [str(csid), str(term_id),
                     json.dumps(feed)]))
            elif feed is False:
                app.logger.info(
                    f'SID {csid} returned no enrollments for term {term_id}.')
                no_enrollments_count += 1
            else:
                failure_count += 1
                app.logger.error(
                    f'SIS enrollments API import failed for CSID {csid}.')
            index += 1

        s3_key = f'{get_s3_sis_api_daily_path()}/drops_and_midterms_{term_id}.tsv'
        app.logger.info(f'Will stash {success_count} feeds in S3: {s3_key}')
        if not s3.upload_data('\n'.join(rows), s3_key):
            app.logger.error('Error on S3 upload: aborting job.')
            return False

        app.logger.info('Will copy S3 feeds into Redshift...')
        if not redshift.execute(
                f"DELETE FROM {self.destination_schema}_staging.sis_api_drops_and_midterms WHERE term_id = '{term_id}'"
        ):
            app.logger.error(
                'Error truncating old staging rows: aborting job.')
            return False
        if not redshift.copy_tsv_from_s3(
                f'{self.destination_schema}_staging.sis_api_drops_and_midterms',
                s3_key):
            app.logger.error('Error on Redshift copy: aborting job.')
            return False
        staging_to_destination_query = resolve_sql_template_string(
            """
            DELETE FROM {redshift_schema_student}.sis_api_drops_and_midterms
                WHERE term_id = '{term_id}'
                AND sid IN
                (SELECT sid FROM {redshift_schema_student}_staging.sis_api_drops_and_midterms WHERE term_id = '{term_id}');
            INSERT INTO {redshift_schema_student}.sis_api_drops_and_midterms
                (SELECT * FROM {redshift_schema_student}_staging.sis_api_drops_and_midterms WHERE term_id = '{term_id}');
            DELETE FROM {redshift_schema_student}_staging.sis_api_drops_and_midterms
                WHERE term_id = '{term_id}';
            """,
            term_id=term_id,
        )
        if not redshift.execute(staging_to_destination_query):
            app.logger.error('Error on Redshift copy: aborting job.')
            return False

        return (
            f'SIS enrollments API import completed for term {term_id}: {success_count} succeeded, '
            f'{no_enrollments_count} returned no enrollments, {failure_count} failed.'
        )