def test_parsed(self, app):
     """Returns the front-end-friendly data."""
     parsed = sis_degree_progress_api.parsed_degree_progress(11667051)
     assert parsed['reportDate'] == '2017-03-03'
     reqs = parsed['requirements']
     assert reqs['entryLevelWriting']['status'] == 'Satisfied'
     assert reqs['americanHistory']['status'] == 'Not Satisfied'
     assert reqs['americanCultures']['status'] == 'In Progress'
     assert reqs['americanInstitutions']['status'] == 'Not Satisfied'
 def test_user_not_found(self, app, caplog):
     """Returns empty when CS delivers an error in the XML."""
     response = sis_degree_progress_api._get_degree_progress(9999999)
     assert response
     parsed = sis_degree_progress_api.parsed_degree_progress(9999999)
     assert parsed == {}
Example #3
0
    def run(self, csids=None):
        if not csids:
            csids = [row['sid'] for row in get_all_student_ids()]
        app.logger.info(
            f'Starting SIS degree progress API import job for {len(csids)} students...'
        )

        rows = []
        success_count = 0
        no_information_count = 0
        failure_count = 0
        index = 1

        # TODO The SIS degree progress API will return useful data only for students with a UGRD current registration.
        # We get that registration from the SIS student API, which is imported concurrently with this job. Is there an
        # alternative way to filter out non-UGRD students?
        for csid in csids:
            app.logger.info(
                f'Fetching degree progress API for SID {csid} ({index} of {len(csids)})'
            )
            feed = sis_degree_progress_api.parsed_degree_progress(csid)
            if feed:
                success_count += 1
                rows.append('\t'.join([str(csid), json.dumps(feed)]))
            elif feed == {}:
                app.logger.info(
                    f'No degree progress information found for SID {csid}.')
                no_information_count += 1
            else:
                failure_count += 1
                app.logger.error(
                    f'SIS get_degree_progress failed for SID {csid}.')
            index += 1

        s3_key = f'{get_s3_sis_api_daily_path()}/degree_progress.tsv'
        app.logger.info(f'Will stash {success_count} feeds in S3: {s3_key}')
        if not s3.upload_data('\n'.join(rows), s3_key):
            app.logger.error('Error on S3 upload: aborting job.')
            return False

        app.logger.info('Will copy S3 feeds into Redshift...')
        if not redshift.execute(
                f'TRUNCATE {self.destination_schema}_staging.sis_api_degree_progress'
        ):
            app.logger.error(
                'Error truncating old staging rows: aborting job.')
            return False
        if not redshift.copy_tsv_from_s3(
                f'{self.destination_schema}_staging.sis_api_degree_progress',
                s3_key):
            app.logger.error('Error on Redshift copy: aborting job.')
            return False
        staging_to_destination_query = resolve_sql_template_string(
            """
            DELETE FROM {redshift_schema_student}.sis_api_degree_progress
                WHERE sid IN (SELECT sid FROM {redshift_schema_student}_staging.sis_api_degree_progress);
            INSERT INTO {redshift_schema_student}.sis_api_degree_progress
                (SELECT * FROM {redshift_schema_student}_staging.sis_api_degree_progress);
            TRUNCATE {redshift_schema_student}_staging.sis_api_profiles;
            """, )
        if not redshift.execute(staging_to_destination_query):
            app.logger.error('Error on Redshift copy: aborting job.')
            return False

        return (
            f'SIS degree progress API import job completed: {success_count} succeeded, '
            f'{no_information_count} returned no information, {failure_count} failed.'
        )
    def run(self, csids=None):
        if not csids:
            all_sids = get_all_student_ids()
            if all_sids:
                csids = [row['sid'] for row in all_sids]
        app.logger.info(
            f'Starting SIS degree progress API import job for {len(csids)} students...'
        )

        rows = []
        success_count = 0
        no_information_count = 0
        failure_count = 0
        index = 1

        # TODO The SIS degree progress API will return useful data only for students with a UGRD current registration.
        # We get that registration from the SIS student API, which is imported concurrently with this job. Is there an
        # alternative way to filter out non-UGRD students?
        for csid in csids:
            app.logger.info(
                f'Fetching degree progress API for SID {csid} ({index} of {len(csids)})'
            )
            feed = sis_degree_progress_api.parsed_degree_progress(csid)
            if feed:
                success_count += 1
                rows.append(encoded_tsv_row([csid, json.dumps(feed)]))
            elif feed == {}:
                app.logger.info(
                    f'No degree progress information found for SID {csid}.')
                no_information_count += 1
            else:
                failure_count += 1
                app.logger.error(
                    f'SIS get_degree_progress failed for SID {csid}.')
            index += 1

        s3_key = f'{get_s3_sis_api_daily_path()}/degree_progress/degree_progress.tsv'
        app.logger.info(f'Will stash {success_count} feeds in S3: {s3_key}')
        if not s3.upload_tsv_rows(rows, s3_key):
            raise BackgroundJobError('Error on S3 upload: aborting job.')

        app.logger.info('Will copy S3 feeds into Redshift...')
        if not redshift.execute(
                f'TRUNCATE {self.redshift_schema}_staging.sis_api_degree_progress'
        ):
            raise BackgroundJobError(
                'Error truncating old staging rows: aborting job.')

        query = resolve_sql_template_string(
            """
            CREATE EXTERNAL SCHEMA {redshift_schema_student}_staging_ext_tmp FROM data catalog
                DATABASE '{redshift_schema_student}_staging_ext_tmp'
                IAM_ROLE '{redshift_iam_role}'
                CREATE EXTERNAL DATABASE IF NOT EXISTS;
            CREATE EXTERNAL TABLE {redshift_schema_student}_staging_ext_tmp.sis_api_degree_progress (
                sid VARCHAR,
                feed VARCHAR(MAX)
            )
            ROW FORMAT DELIMITED
            FIELDS TERMINATED BY '\\t'
            STORED AS TEXTFILE
            LOCATION '{loch_s3_sis_api_data_path}/degree_progress';

            DELETE FROM {redshift_schema_student}_staging.sis_api_degree_progress
                WHERE sid IN (SELECT sid FROM {redshift_schema_student}_staging_ext_tmp.sis_api_degree_progress);
            INSERT INTO {redshift_schema_student}_staging.sis_api_degree_progress
                (SELECT * FROM {redshift_schema_student}_staging_ext_tmp.sis_api_degree_progress);
            DROP TABLE {redshift_schema_student}_staging_ext_tmp.sis_api_degree_progress;
            DROP SCHEMA {redshift_schema_student}_staging_ext_tmp;

            DELETE FROM {redshift_schema_student}.sis_api_degree_progress
                WHERE sid IN (SELECT sid FROM {redshift_schema_student}_staging.sis_api_degree_progress);
            INSERT INTO {redshift_schema_student}.sis_api_degree_progress
                (SELECT * FROM {redshift_schema_student}_staging.sis_api_degree_progress);
            TRUNCATE {redshift_schema_student}_staging.sis_api_degree_progress;
            """, )
        if not redshift.execute_ddl_script(query):
            raise BackgroundJobError('Error on Redshift copy: aborting job.')

        redshift.execute('VACUUM; ANALYZE;')

        return (
            f'SIS degree progress API import job completed: {success_count} succeeded, '
            f'{no_information_count} returned no information, {failure_count} failed.'
        )