def get_all_instructor_uids(): sql = f"""SELECT DISTINCT instructor_uid FROM {sis_schema()}.courses WHERE instructor_uid IS NOT NULL AND instructor_uid != '' """ return redshift.fetch(sql)
def get_edl_student_registrations(sids): # TODO: After move to EDL is done, compact the SQL below. For now, it is one column name per line, for readability. results = [] count_per_chunk = 10000 for chunk in range(0, len(sids), count_per_chunk): sids_subset = sids[chunk:chunk + count_per_chunk] # TODO: This query must follow the 'as_of=near_future' logic used by _get_api_v2_registrations (see NS-1105). sql = f"""SELECT r.academic_career_cd, r.academic_level_beginning_of_term_cd, r.academic_level_beginning_of_term_desc, r.academic_level_end_of_term_cd, r.academic_level_end_of_term_desc, r.current_term_gpa_nbr AS current_term_gpa, r.eligibility_status_desc, r.eligible_to_enroll_flag, r.expected_graduation_term, r.intends_to_graduate_flag, r.load_dt AS edl_load_date, r.maximum_term_enrollment_units_limit AS units_term_enrollment_max, r.minimum_term_enrollment_units_limit AS units_term_enrollment_min, r.registered_flag, r.registrn_eligibility_status_cd, r.semester_year_term_cd AS term_id, r.student_id, r.term_enrolled_units AS units_term_enrolled, r.terms_in_attendance, r.total_cumulative_gpa_nbr, r.total_units_completed_qty AS units_term_completed, s.acad_career, s.acad_career_first, s.acad_group_advis, s.acad_level_bot, s.acad_level_eot, s.acad_level_proj, s.acad_load_appr, s.acad_prog_primary, s.acad_year, s.academic_load, s.academic_load_dt, s.billing_career, s.class_rank_nbr, s.class_rank_tot, s.country, s.cum_gpa as gpa_cumulative, s.cum_resident_terms, s.cur_gpa, s.cur_resident_terms, s.elig_to_enroll, s.end_date, s.enrl_on_trans_dt, s.ext_org_id, s.fa_eligibility, s.fa_load, s.fa_stats_calc_dttm, s.fa_stats_calc_req, s.form_of_study, s.fully_enrl_dt, s.fully_graded_dt, s.grade_points, s.grade_points_fa, s.institution, s.last_date_attended, s.lock_in_amt, s.lock_in_dt, s.max_audit_unit, s.max_crse_count, s.max_nogpa_unit, s.max_total_unit, s.max_wait_unit, s.min_total_unit, s.nslds_loan_year, s.ovrd_acad_lvl_all, s.ovrd_acad_lvl_proj, s.ovrd_bill_units, s.ovrd_init_add_fee, s.ovrd_init_enr_fee, s.ovrd_max_units, s.ovrd_tuit_group, s.ovrd_wdrw_sched, s.pro_rata_eligible, s.proj_bill_unt, s.refund_pct, s.refund_scheme, s.reg_card_date, s.registered, s.reset_cum_stats, s.sel_group, s.ssr_activation_dt, s.ssr_comb_cur_gpa, s.ssr_cum_en_gpa, s.ssr_cum_tr_gpa, s.ssr_tot_en_grdpts, s.ssr_tot_en_tkngpa, s.ssr_tot_tr_grdpts, s.ssr_tot_tr_tkngpa, s.ssr_trf_cur_gpa, s.start_date, s.stats_on_trans_dt, s.stdnt_car_nbr, s.study_agreement, s.tc_units_adjust, s.term_type, s.tot_audit, s.tot_cumulative AS units_cumulative, s.tot_grade_points, s.tot_grd_points_fa, s.tot_inprog_gpa, s.tot_inprog_nogpa, s.tot_other, s.tot_passd_fa, s.tot_passd_gpa, s.tot_passd_nogpa, s.tot_passd_prgrss, s.tot_taken_fa, s.tot_taken_fa_gpa, s.tot_taken_gpa, s.tot_taken_nogpa, s.tot_taken_prgrss, s.tot_test_credit, s.tot_trnsfr, s.trf_grade_points, s.trf_passed_gpa, s.trf_passed_nogpa, s.trf_resident_terms, s.trf_taken_gpa, s.trf_taken_nogpa, s.tuit_calc_dttm, s.tuit_calc_req, s.tuition_res_terms, s.unit_multiplier, s.unt_audit, s.unt_inprog_gpa, s.unt_inprog_nogpa, s.unt_other, s.unt_passd_fa, s.unt_passd_gpa, s.unt_passd_nogpa, s.unt_passd_prgrss, s.unt_taken_fa, s.unt_taken_fa_gpa, s.unt_taken_gpa, s.unt_taken_nogpa, s.unt_taken_prgrss, s.unt_term_tot, s.unt_test_credit, s.unt_trnsfr, s.untprg_chg_nslc_dt, s.withdraw_code, s.withdraw_date, s.withdraw_reason FROM {edl_external_schema()}.student_registration_term_data r JOIN {edl_external_schema_staging()}.cs_ps_stdnt_car_term s ON r.student_id = s.emplid AND r.semester_year_term_cd = s.strm WHERE r.student_id=ANY('{{{','.join(sids_subset)}}}') ORDER BY r.student_id, r.semester_year_term_cd DESC """ results += redshift.fetch(sql) return results
def test_generate_demographics_feeds(self, app, student_tables): """Builds JSON feeds and uploads to S3.""" from nessie.jobs.create_edl_schema import CreateEdlSchema with override_config(app, 'FEATURE_FLAG_EDL_STUDENT_PROFILES', True): with override_config(app, 'FEATURE_FLAG_EDL_DEMOGRAPHICS', True): with mock_s3(app): CreateEdlSchema().generate_demographics_feeds() rows = redshift.fetch(f'SELECT * FROM {student_schema()}.student_demographics') assert len(rows) == 11 assert rows[0]['sid'] == '11667051' feed = json.loads(rows[0]['feed']) assert feed['gender'] == 'Female' assert feed['ethnicities'] == ['African-American / Black', 'Chinese / Chinese-American', 'East Indian / Pakistani'] assert feed['nationalities'] == ['Singapore'] assert feed['underrepresented'] is True assert feed['visa']['type'] == 'PR' assert feed['visa']['status'] == 'A' assert rows[1]['sid'] == '1234567890' feed = json.loads(rows[1]['feed']) assert feed['gender'] == 'Male' assert feed['ethnicities'] == ['Mexican / Mexican-American / Chicano', 'White'] assert feed['nationalities'] == ['Iran (Islamic Republic Of)'] assert feed['underrepresented'] is True assert feed['visa']['type'] == 'F1' assert feed['visa']['status'] == 'A' assert rows[2]['sid'] == '2345678901' feed = json.loads(rows[2]['feed']) assert feed['gender'] == 'Female' assert feed['ethnicities'] == ['White'] assert feed['nationalities'] == ['Taiwan'] assert feed['underrepresented'] is False assert feed['visa'] is None assert rows[3]['sid'] == '3456789012' feed = json.loads(rows[3]['feed']) assert feed['gender'] == 'Decline to State' assert feed['ethnicities'] == ['American Indian / Alaska Native', 'Filipino / Filipino-American'] assert feed['nationalities'] == ['Korea, Republic of'] assert feed['underrepresented'] is True assert feed['visa']['type'] == 'J1' assert feed['visa']['status'] == 'G' assert rows[4]['sid'] == '5000000000' feed = json.loads(rows[4]['feed']) assert feed['gender'] == 'Female' assert feed['ethnicities'] == ['Not Specified'] assert feed['nationalities'] == [] assert feed['underrepresented'] is False assert feed['visa'] is None assert rows[7]['sid'] == '8901234567' feed = json.loads(rows[7]['feed']) assert feed['gender'] == 'Decline to State' assert feed['ethnicities'] == ['Not Specified'] assert feed['nationalities'] == [] assert feed['underrepresented'] is False assert feed['visa'] is None assert rows[9]['sid'] == '9000000000' feed = json.loads(rows[9]['feed']) assert feed['gender'] == 'Nonbinary' assert feed['ethnicities'] == ['African-American / Black', 'Other Asian', 'Pacific Islander'] assert feed['nationalities'] == ["Lao People's Democratic Rep", 'Saint Kitts and Nevis'] assert feed['underrepresented'] is True assert feed['visa'] is None
def get_all_student_ids(): sql = f"""SELECT sid FROM {asc_schema()}.students UNION SELECT sid FROM {coe_schema()}.students UNION SELECT sid FROM {undergrads_schema()}.students UNION SELECT sid FROM {advisee_schema()}.non_current_students""" return redshift.fetch(sql)
def get_sis_api_drops_and_midterms(csid, term_ids): sql = f"""SELECT term_id, feed from {student_schema()}.sis_api_drops_and_midterms WHERE sid='{csid}' AND term_id = ANY('{{{','.join(term_ids)}}}')""" return redshift.fetch(sql)
def get_imported(self, query): schema = app.config['REDSHIFT_SCHEMA_CANVAS_API'] results = redshift.fetch(query.format(schema)) return {r['course_id']: r['assignment_count'] for r in results}
def get_all_student_ids(): sql = f"""SELECT sid FROM {asc_schema()}.students UNION SELECT sid FROM {coe_schema()}.students""" return redshift.fetch(sql)
def get_sis_api_degree_progress(csid): sql = f"""SELECT feed from {student_schema()}.sis_api_degree_progress WHERE sid='{csid}'""" return redshift.fetch(sql)
def get_user_for_uid(uid): sql = f"""SELECT canvas_id, name, uid FROM {intermediate_schema()}.users WHERE uid = {uid} """ return redshift.fetch(sql)
def get_successfully_backfilled_students(): sql = f"""SELECT sid FROM {metadata_schema()}.merged_feed_status WHERE term_id = 'all' AND status = 'success'""" return redshift.fetch(sql)
def get_submissions_turned_in_relative_to_user(user_id): sql = f"""SELECT course_id, canvas_user_id, submissions_turned_in FROM {boac_schema()}.assignment_submissions_relative WHERE reference_user_id = {user_id}""" return redshift.fetch(sql)
def test_resync_canvas_snapshots(self, app, metadata_db, caplog): """Dispatches a complete resync job against fixtures.""" caplog.set_level(logging.INFO) snapshots = canvas_data.get_snapshots()['files'] def mock_metadata(job_id, snapshot, status, destination_size): metadata.create_canvas_sync_status(job_id, snapshot['filename'], snapshot['table'], snapshot['url']) key = '/'.join([ get_s3_canvas_daily_path(), snapshot['table'], snapshot['filename'] ]) metadata.update_canvas_sync_status( job_id, key, status, source_size=1048576, destination_size=destination_size) old_sync_job = 'sync_152550000' latest_sync_job = 'sync_152560000' # The older job should be ignored by the resync. for snapshot in snapshots[0:5]: mock_metadata(old_sync_job, snapshot, 'complete', 1048576) for snapshot in snapshots[5:10]: mock_metadata(old_sync_job, snapshot, 'error', None) # The latest job synced five files successfully and ran into three problems. for snapshot in snapshots[10:15]: mock_metadata(latest_sync_job, snapshot, 'complete', 1048576) stalled = snapshots[15] errored = snapshots[16] size_discrepancy = snapshots[17] mock_metadata(latest_sync_job, stalled, 'streaming', None) mock_metadata(latest_sync_job, errored, 'error', None) mock_metadata(latest_sync_job, size_discrepancy, 'complete', 65536) schema = app.config['REDSHIFT_SCHEMA_METADATA'] with capture_app_logs(app): assert redshift.fetch( f'SELECT count(*) FROM {schema}.canvas_sync_job_status' )[0]['count'] == 18 with mock_s3(app): result = ResyncCanvasSnapshots().run_wrapped() assert 'Canvas snapshot resync job dispatched to workers' in result assert_background_job_status('resync') assert f"Dispatched S3 resync of snapshot {stalled['filename']}" in caplog.text assert f"Dispatched S3 resync of snapshot {errored['filename']}" in caplog.text assert f"Dispatched S3 resync of snapshot {size_discrepancy['filename']}" in caplog.text assert '3 successful dispatches, 0 failures' in caplog.text assert redshift.fetch( f'SELECT count(*) FROM {schema}.canvas_sync_job_status' )[0]['count'] == 21 resync_results = redshift.fetch( f"SELECT * FROM {schema}.canvas_sync_job_status WHERE job_id LIKE 'resync%'" ) assert len(resync_results) == 3 urls = [] for r in resync_results: assert r['job_id'].startswith('resync_') assert r['filename'] assert r['canvas_table'] assert r['created_at'] assert r['updated_at'] urls.append(r['source_url']) assert stalled['url'] in urls assert errored['url'] in urls assert size_discrepancy['url'] in urls
def sis_api_degree_progress(app, student_tables): from nessie.externals import redshift sql = """SELECT sid, feed FROM student_test.sis_api_degree_progress""" return redshift.fetch(sql)