コード例 #1
0
    def merge_analytics_data_for_term(self, term_id):
        feed_path = app.config['LOCH_S3_BOAC_ANALYTICS_DATA_PATH'] + '/feeds/'

        advisees_by_canvas_id_path = feed_path + 'advisees_by_canvas_id.json'
        advisees_by_canvas_id = s3.get_object_json(advisees_by_canvas_id_path)
        if not advisees_by_canvas_id:
            raise BackgroundJobError(
                f'Failed to retrieve advisee map at {advisees_by_canvas_id_path}, aborting'
            )

        enrollment_term_map_path = feed_path + f'enrollment_term_map_{term_id}.json'
        enrollment_term_map = s3.get_object_json(enrollment_term_map_path)
        if not enrollment_term_map:
            raise BackgroundJobError(
                f'Failed to retrieve enrollment term map at {enrollment_term_map_path}, aborting'
            )

        canvas_site_map = self.merge_canvas_analytics_for_term(
            term_id, feed_path)

        self.merge_course_analytics_for_term(term_id, canvas_site_map,
                                             enrollment_term_map,
                                             advisees_by_canvas_id)
        self.merge_advisee_assignment_submissions_for_term(
            term_id, enrollment_term_map, advisees_by_canvas_id)
        return enrollment_term_map
コード例 #2
0
    def create_schema(self):
        base_s3_key = app.config['LOCH_S3_E_I_DATA_PATH']
        external_schema = app.config['REDSHIFT_SCHEMA_E_I_ADVISING_NOTES']
        redshift.drop_external_schema(external_schema)
        # Flatten E&I-sourced JSON files into two schema-friendly JSON files.
        notes = []
        topics = []
        for key in s3.get_keys_with_prefix(base_s3_key):
            if key.endswith('.json'):
                notes_json = s3.get_object_json(key)
                if notes_json and 'notes' in notes_json:
                    notes += notes_json['notes']
                    for note in notes:
                        topics += _extract_topics(note)

        if s3.upload_json(obj=notes, s3_key=f'{base_s3_key}/aggregated_notes/data.json') \
                and s3.upload_json(obj=topics, s3_key=f'{base_s3_key}/aggregated_topics/data.json'):
            # Create schema
            app.logger.info('Executing SQL...')
            resolved_ddl = resolve_sql_template('create_e_i_advising_notes_schema.template.sql')
            if redshift.execute_ddl_script(resolved_ddl):
                verify_external_schema(external_schema, resolved_ddl)
            else:
                raise BackgroundJobError('E&I Advising Notes schema creation job failed.')
        else:
            raise BackgroundJobError('Failed to upload aggregated E&I advising notes and topics.')
コード例 #3
0
 def merge_canvas_analytics_for_term(self, term_id, feed_path):
     if term_id not in reverse_term_ids():
         return {}
     canvas_site_map_path = feed_path + f'canvas_site_map_{term_id}.json'
     canvas_site_map = s3.get_object_json(canvas_site_map_path)
     if not canvas_site_map:
         raise BackgroundJobError(
             f'Failed to retrieve Canvas site map at {canvas_site_map_path}, aborting'
         )
     return canvas_site_map
コード例 #4
0
 def transform(self, s3_source, s3_dest, key=None):
     objects = s3.get_keys_with_prefix(s3_source)
     app.logger.info(
         f'Will transform {len(objects)} objects from {s3_source} and put results to {s3_dest}.'
     )
     skip_count = 0
     for o in objects:
         file_name = o.split('/')[-1]
         if s3.object_exists(f'{s3_dest}/{file_name}'):
             skip_count += 1
             continue
         canvas_api_data = s3.get_object_json(o).get(
             key) if key else s3.get_object_json(o)
         with tempfile.TemporaryFile() as result:
             course_id = int(file_name.split('_')[-2])
             for record in canvas_api_data:
                 record['course_id'] = course_id
                 result.write(json.dumps(record).encode() + b'\n')
             s3.upload_file(result, f'{s3_dest}/{file_name}')
     app.logger.info(
         f'Transformed {len(objects) - skip_count} new objects; skipped {skip_count} existing objects.'
     )
コード例 #5
0
    def generate_feeds(self):
        # Translation between canvas_user_id and UID/SID is needed to merge Canvas analytics data and SIS enrollment-based data.
        advisees_by_canvas_id = {}
        advisees_by_sid = {}
        self.successes = []
        self.failures = []
        profile_tables = self.generate_student_profile_tables(
            advisees_by_canvas_id, advisees_by_sid)
        if not profile_tables:
            raise BackgroundJobError(
                'Failed to generate student profile tables.')

        feed_path = app.config['LOCH_S3_BOAC_ANALYTICS_DATA_PATH'] + '/feeds/'
        s3.upload_json(advisees_by_canvas_id,
                       feed_path + 'advisees_by_canvas_id.json')

        upload_student_term_maps(advisees_by_sid)

        # Avoid processing Canvas analytics data for future terms and pre-CS terms.
        for term_id in (future_term_ids() + legacy_term_ids()):
            enrollment_term_map = s3.get_object_json(
                feed_path + f'enrollment_term_map_{term_id}.json')
            if enrollment_term_map:
                GenerateMergedEnrollmentTerm().refresh_student_enrollment_term(
                    term_id, enrollment_term_map)

        canvas_integrated_term_ids = reverse_term_ids()
        app.logger.info(
            f'Will queue analytics generation for {len(canvas_integrated_term_ids)} terms on worker nodes.'
        )
        result = queue_merged_enrollment_term_jobs(self.job_id,
                                                   canvas_integrated_term_ids)
        if not result:
            raise BackgroundJobError('Failed to queue enrollment term jobs.')

        refresh_all_from_staging(profile_tables)
        self.update_redshift_academic_standing()
        self.update_rds_profile_indexes()

        app.logger.info(
            'Profile generation complete; waiting for enrollment term generation to finish.'
        )

        while True:
            sleep(1)
            enrollment_results = get_merged_enrollment_term_job_status(
                self.job_id)
            if not enrollment_results:
                raise BackgroundJobError('Failed to refresh RDS indexes.')
            any_pending_job = next(
                (row for row in enrollment_results
                 if row['status'] == 'created' or row['status'] == 'started'),
                None)
            if not any_pending_job:
                break

        app.logger.info('Exporting analytics data for archival purposes.')
        unload_enrollment_terms([current_term_id(), future_term_id()])

        app.logger.info('Refreshing enrollment terms in RDS.')
        with rds.transaction() as transaction:
            if self.refresh_rds_enrollment_terms(None, transaction):
                transaction.commit()
                app.logger.info('Refreshed RDS enrollment terms.')
            else:
                transaction.rollback()
                raise BackgroundJobError(
                    'Failed to refresh RDS enrollment terms.')

        status_string = f'Generated merged profiles ({len(self.successes)} successes, {len(self.failures)} failures).'
        errored = False
        for row in enrollment_results:
            status_string += f" {row['details']}"
            if row['status'] == 'error':
                errored = True

        truncate_staging_table('student_enrollment_terms')
        if errored:
            raise BackgroundJobError(status_string)
        else:
            return status_string