def reportBroadStats ():
	d = pandas.read_csv('course_to_discipline.csv')
	courseToDiscipline = { d.course_id.iloc[i]:d.discipline_grouping.iloc[i] for i in range(len(d)) }
	START_DATES, PREDICTION_DATES_0_5, PREDICTION_DATES_1_0 = loadCourseDates()
	resultsRepeatedCourse = cPickle.load(open("results_prong1.pkl", "rb"))
	allStudents = set
	n = 0
	for courseId in resultsRepeatedCourse.keys():
		if len(resultsRepeatedCourse[courseId]) > 0:
			directory = CHARLESRIVERX_COURSE_ROOT + "/" + courseId.replace(HARVARDX, "").replace("/", "-")
			pc = pandas.read_csv(directory + "/" + "person_course.csv.gz")
			pc = convertTimes(pc, "start_time")
			numRegistrants = np.sum(pc.start_time < PREDICTION_DATES_1_0[courseId])
			n += numRegistrants
			idxs = np.nonzero((pc.start_time < PREDICTION_DATES_1_0[courseId]) & (pc.viewed == 1))[0]
			allStudents = allStudents.union(set(pc.username))
			pc = pc.iloc[idxs]
			print "{} & {} & {} & {} & {}\\\\".format( \
			  courseId, courseToDiscipline[courseId], numRegistrants, pc.shape[0], pc.certified.sum() \
			)
	print "Total participants: {}".format(len(allStudents))
import util
import pandas
import cPickle
import pandas
import math
import numpy as np
import sklearn.metrics
import sklearn.linear_model
from common import loadCourseDates, loadData, getCourseStartAndEndDates, NUM_WEEKS_HEURISTIC, getDummiesFixedSet

BATCH_SIZE = 100
WEEK = np.timedelta64(7, 'D')
MIN_EXAMPLES = 10

START_DATES, PREDICTION_DATES_0_5, PREDICTION_DATES_1_0 = loadCourseDates()

#START_DATES = {
#	'HarvardX/SW25x/1T2014': np.datetime64('2014-02-25'),
#	'HarvardX/SW12x/2013_SOND': np.datetime64('2013-10-31'),
#	'HarvardX/SW12.2x/1T2014': np.datetime64('2014-01-02'),
#	'HarvardX/SW12.3x/1T2014': np.datetime64('2014-02-13'),
#	'HarvardX/SW12.4x/1T2014': np.datetime64('2014-03-20'),
#	'HarvardX/SW12.5x/2T2014': np.datetime64('2014-04-24'),
#	'HarvardX/SW12.6x/2T2014': np.datetime64('2014-05-22'),
#	'HarvardX/SW12.7x/3T2014': np.datetime64('2014-09-04'),
#	'HarvardX/SW12.8x/3T2014': np.datetime64('2014-10-09'),
#	'HarvardX/SW12.9x/3T2014': np.datetime64('2014-11-20'),
#	'HarvardX/SW12.10x/1T2015': np.datetime64('2015-01-08'),
#	'HarvardX/PH231x/1T2016': np.datetime64('2016-01-25'),
#	'HarvardX/PH557/3T2015': np.datetime64('2015-12-03'),
#	'HarvardX/PH525.4x/3T2015': np.datetime64('2016-01-15'),