ratings =  ('INCOMPLETE', 'PASSED', "DISTINCTION", 'UNGRADED', '')
def valid_rating(x):
    return x in ratings

states = ("CREATED", 'EVALUATED')
def valid_state(x):
    return x in states

submissions_schema = Schema([{'creation_date': And(Use(parse_date), dt,
                                                   error='creation_date failure'),
                              'completion_date': And(Use(parse_date),
                                                     Or(dt, None),
                                                     error='completion_date_failure'),
                              "assigned_rating": valid_rating,
                              "processing_state": valid_state,
                              str: object}])

submissions = DataLoader(data_files.submissions, schema=submissions_schema)

if PWEAVE:
   print(submissions.data[0])

for engagement in engagements.data:
    engagement['account_key'] = engagement['acct']
    del(engagement['acct'])

# this is messed up in pweave with pypy
if not PWEAVE:
    write_pickle(engagements.data, 'engagements')
    write_pickle(enrollments.data, 'enrollments')
    write_pickle(submissions.data, 'submissions')
Exemple #2
0
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on 6/1/20

@author waldo

Take a .csv file obtained from the Harvard Training Portal of HUIDs that have passed the COVID return training and
create a set of those HUIDs. This may need to be changed if the format of the file is different than a simple list
of HUIDs
"""

import sys, csv, utilities

if __name__ == '__main__':
    if len(sys.argv) < 2:
        print("Usage: python build_trained_set.py input_file.csv")
        sys.exit(1)

    huid_s = set()
    fin = open(sys.argv[1], 'r')
    cin = csv.reader(fin)
    for l in cin:
        if l[0].isdigit():
            huid_s.add(l[0])
    fin.close()
    utilities.write_pickle('trained_set.pkl', huid_s)
    distinctive = Statistics(distinctive_array)
    non_passing = Statistics(non_passing_array)

    out_string = "{0}:\t{1:.2f}\t{2:.2f}\t{3:.2f}"
    print(column.title())
    print('-' * len(column))
    print("\tPassed\tDistinctive\tNon-Passing")
    print(out_string.format('Mean', passing.mean, distinctive.mean, non_passing.mean))
    print(out_string.format("STD", passing.std, distinctive.std, non_passing.std))
    print(out_string.format('Min', passing.min, distinctive.min, non_passing.min))
    print(out_string.format('QI', passing.q1, distinctive.q1, non_passing.q1))
    print(out_string.format('Median', passing.median, distinctive.median, non_passing.median))
    print(out_string.format('QIII', passing.q3, distinctive.q3, non_passing.q3))
    print(out_string.format("Max", passing.max, distinctive.max, non_passing.max))
    print(out_string.format('IQR', passing.iqr, distinctive.iqr, non_passing.iqr))
    print(out_string.format("Count", passing.count, distinctive.count, non_passing.count))

if PWEAVE:
    print_columns(b'total_minutes_visited')

passed = get_columns(passed_engagement, b'total_minutes_visited')
distinctive = get_columns(distinctive_engagement, b'total_minutes_visited')
non_passing = get_columns(non_passing_engagement, b'total_minutes_visited')

passing = get_columns(passing_engagement, b'total_minutes_visited')
if PWEAVE:
    print(passing.mean())

write_pickle(passed, 'passed')
write_pickle(distinctive, 'distinctive')
write_pickle(non_passing, 'non_passing')
    :return: list of data with only paid_students
    """
    return [datum for datum in data if datum['account_key'] in paid_students]

paid_engagements = paid_only(engagements)
paid_enrollments = paid_only(enrollments)
paid_submissions = paid_only(submissions)

assert len(paid_enrollments) == 1293
assert len(paid_engagements) == 134549
assert len(paid_submissions) == 3618

paid_engagement_less_than_week = [e for e in paid_engagements
    if less_than_one_week(paid_students[e['account_key']],
    e['utc_date'])]

paid_engagement_in_first_week = [e for e in paid_engagements
    if within_one_week(paid_students[e['account_key']],
    e['utc_date'])]

expected = 21508
assert len(paid_engagement_less_than_week) == expected, \
    "Expected: {2} Actual: {0} Difference: {1}".format(len(paid_engagement_in_first_week),
    len(paid_engagement_in_first_week) - expected,
    expected)

if PWEAVE:
    print(len(paid_engagement_in_first_week))

write_pickle(paid_engagements, 'paid_engagements')
write_pickle(paid_submissions, 'paid_submissions')
Exemple #5
0
Created on 5/29/20

@author waldo


"""
import sys, csv, pickle
import interval_rec as ir
import utilities as ut

if __name__ == '__main__':
    if len(sys.argv) < 3:
        print("Usage: python build_attest_d.py attest_file_in.csv dictionary_file_out.pkl")
        sys.exit(1)

    fin = open(sys.argv[1], 'r')
    cin = csv.reader(fin)
    h = next(cin)

    att_d = {}
    for l in cin:
        v = ir.TimeInterval(l)
        huid = l[0]
        att_d[huid] = att_d.setdefault(huid, [])
        att_d[huid].append(v)

    fin.close()

    ut.write_pickle(sys.argv[2], att_d)