def reportBroadStats (): d = pandas.read_csv('course_to_discipline.csv') courseToDiscipline = { d.course_id.iloc[i]:d.discipline_grouping.iloc[i] for i in range(len(d)) } START_DATES, PREDICTION_DATES_0_5, PREDICTION_DATES_1_0 = loadCourseDates() resultsRepeatedCourse = cPickle.load(open("results_prong1.pkl", "rb")) allStudents = set n = 0 for courseId in resultsRepeatedCourse.keys(): if len(resultsRepeatedCourse[courseId]) > 0: directory = CHARLESRIVERX_COURSE_ROOT + "/" + courseId.replace(HARVARDX, "").replace("/", "-") pc = pandas.read_csv(directory + "/" + "person_course.csv.gz") pc = convertTimes(pc, "start_time") numRegistrants = np.sum(pc.start_time < PREDICTION_DATES_1_0[courseId]) n += numRegistrants idxs = np.nonzero((pc.start_time < PREDICTION_DATES_1_0[courseId]) & (pc.viewed == 1))[0] allStudents = allStudents.union(set(pc.username)) pc = pc.iloc[idxs] print "{} & {} & {} & {} & {}\\\\".format( \ courseId, courseToDiscipline[courseId], numRegistrants, pc.shape[0], pc.certified.sum() \ ) print "Total participants: {}".format(len(allStudents))
import util import pandas import cPickle import pandas import math import numpy as np import sklearn.metrics import sklearn.linear_model from common import loadCourseDates, loadData, getCourseStartAndEndDates, NUM_WEEKS_HEURISTIC, getDummiesFixedSet BATCH_SIZE = 100 WEEK = np.timedelta64(7, 'D') MIN_EXAMPLES = 10 START_DATES, PREDICTION_DATES_0_5, PREDICTION_DATES_1_0 = loadCourseDates() #START_DATES = { # 'HarvardX/SW25x/1T2014': np.datetime64('2014-02-25'), # 'HarvardX/SW12x/2013_SOND': np.datetime64('2013-10-31'), # 'HarvardX/SW12.2x/1T2014': np.datetime64('2014-01-02'), # 'HarvardX/SW12.3x/1T2014': np.datetime64('2014-02-13'), # 'HarvardX/SW12.4x/1T2014': np.datetime64('2014-03-20'), # 'HarvardX/SW12.5x/2T2014': np.datetime64('2014-04-24'), # 'HarvardX/SW12.6x/2T2014': np.datetime64('2014-05-22'), # 'HarvardX/SW12.7x/3T2014': np.datetime64('2014-09-04'), # 'HarvardX/SW12.8x/3T2014': np.datetime64('2014-10-09'), # 'HarvardX/SW12.9x/3T2014': np.datetime64('2014-11-20'), # 'HarvardX/SW12.10x/1T2015': np.datetime64('2015-01-08'), # 'HarvardX/PH231x/1T2016': np.datetime64('2016-01-25'), # 'HarvardX/PH557/3T2015': np.datetime64('2015-12-03'), # 'HarvardX/PH525.4x/3T2015': np.datetime64('2016-01-15'),