ratings = ('INCOMPLETE', 'PASSED', "DISTINCTION", 'UNGRADED', '') def valid_rating(x): return x in ratings states = ("CREATED", 'EVALUATED') def valid_state(x): return x in states submissions_schema = Schema([{'creation_date': And(Use(parse_date), dt, error='creation_date failure'), 'completion_date': And(Use(parse_date), Or(dt, None), error='completion_date_failure'), "assigned_rating": valid_rating, "processing_state": valid_state, str: object}]) submissions = DataLoader(data_files.submissions, schema=submissions_schema) if PWEAVE: print(submissions.data[0]) for engagement in engagements.data: engagement['account_key'] = engagement['acct'] del(engagement['acct']) # this is messed up in pweave with pypy if not PWEAVE: write_pickle(engagements.data, 'engagements') write_pickle(enrollments.data, 'enrollments') write_pickle(submissions.data, 'submissions')
#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Created on 6/1/20 @author waldo Take a .csv file obtained from the Harvard Training Portal of HUIDs that have passed the COVID return training and create a set of those HUIDs. This may need to be changed if the format of the file is different than a simple list of HUIDs """ import sys, csv, utilities if __name__ == '__main__': if len(sys.argv) < 2: print("Usage: python build_trained_set.py input_file.csv") sys.exit(1) huid_s = set() fin = open(sys.argv[1], 'r') cin = csv.reader(fin) for l in cin: if l[0].isdigit(): huid_s.add(l[0]) fin.close() utilities.write_pickle('trained_set.pkl', huid_s)
distinctive = Statistics(distinctive_array) non_passing = Statistics(non_passing_array) out_string = "{0}:\t{1:.2f}\t{2:.2f}\t{3:.2f}" print(column.title()) print('-' * len(column)) print("\tPassed\tDistinctive\tNon-Passing") print(out_string.format('Mean', passing.mean, distinctive.mean, non_passing.mean)) print(out_string.format("STD", passing.std, distinctive.std, non_passing.std)) print(out_string.format('Min', passing.min, distinctive.min, non_passing.min)) print(out_string.format('QI', passing.q1, distinctive.q1, non_passing.q1)) print(out_string.format('Median', passing.median, distinctive.median, non_passing.median)) print(out_string.format('QIII', passing.q3, distinctive.q3, non_passing.q3)) print(out_string.format("Max", passing.max, distinctive.max, non_passing.max)) print(out_string.format('IQR', passing.iqr, distinctive.iqr, non_passing.iqr)) print(out_string.format("Count", passing.count, distinctive.count, non_passing.count)) if PWEAVE: print_columns(b'total_minutes_visited') passed = get_columns(passed_engagement, b'total_minutes_visited') distinctive = get_columns(distinctive_engagement, b'total_minutes_visited') non_passing = get_columns(non_passing_engagement, b'total_minutes_visited') passing = get_columns(passing_engagement, b'total_minutes_visited') if PWEAVE: print(passing.mean()) write_pickle(passed, 'passed') write_pickle(distinctive, 'distinctive') write_pickle(non_passing, 'non_passing')
:return: list of data with only paid_students """ return [datum for datum in data if datum['account_key'] in paid_students] paid_engagements = paid_only(engagements) paid_enrollments = paid_only(enrollments) paid_submissions = paid_only(submissions) assert len(paid_enrollments) == 1293 assert len(paid_engagements) == 134549 assert len(paid_submissions) == 3618 paid_engagement_less_than_week = [e for e in paid_engagements if less_than_one_week(paid_students[e['account_key']], e['utc_date'])] paid_engagement_in_first_week = [e for e in paid_engagements if within_one_week(paid_students[e['account_key']], e['utc_date'])] expected = 21508 assert len(paid_engagement_less_than_week) == expected, \ "Expected: {2} Actual: {0} Difference: {1}".format(len(paid_engagement_in_first_week), len(paid_engagement_in_first_week) - expected, expected) if PWEAVE: print(len(paid_engagement_in_first_week)) write_pickle(paid_engagements, 'paid_engagements') write_pickle(paid_submissions, 'paid_submissions')
Created on 5/29/20 @author waldo """ import sys, csv, pickle import interval_rec as ir import utilities as ut if __name__ == '__main__': if len(sys.argv) < 3: print("Usage: python build_attest_d.py attest_file_in.csv dictionary_file_out.pkl") sys.exit(1) fin = open(sys.argv[1], 'r') cin = csv.reader(fin) h = next(cin) att_d = {} for l in cin: v = ir.TimeInterval(l) huid = l[0] att_d[huid] = att_d.setdefault(huid, []) att_d[huid].append(v) fin.close() ut.write_pickle(sys.argv[2], att_d)