def timeseries_grade_percentiles(c, assignment_name, num_points=40): """ Returns a timeseries of grades with percentiles. Here is an example: [["2015-07-17 19:00:36-0700", 0.0, 0.0, 0.0, ... 0.0, 0.0], ["2015-07-17 19:10:36-0700", 0.0, 0.0, 0.0, ... 1.0, 2.0], ["2015-07-17 19:20:36-0700", 0.0, 0.0, 0.0, ... 3.0, 4.0], ["2015-07-17 19:30:36-0700", 0.0, 0.0, 0.5, ... 5.0, 6.0], ["2015-07-17 19:40:36-0700", 0.0, 0.0, 1.0, ... 7.0, 8.0]] """ data_keys = range(0, 105, 5) assignment = get_assignment_by_name(assignment_name) if not assignment: return # There is a slight problem that because of DST, ordering by "started" may not always # produce the correct result. When the timezone changes, lexicographical order does not # match the actual order of the times. However, this only happens once a year in the middle # of the night, so f**k it. c.execute( """SELECT source, score, started FROM builds WHERE job = ? AND status = ? ORDER BY started""", [assignment_name, SUCCESS], ) # XXX: There is no easy way to exclude builds started by staff ("super") groups. # But because this graph is to show the general trend, it's usually fine if staff builds # are included. Plus, the graph only shows up in the admin interface anyway. builds = [(source, score, parse_time(started)) for source, score, started in c.fetchall()] if not builds: return [] source_set = map(lambda b: b[0], builds) started_time_set = map(lambda b: b[2], builds) min_started = min(started_time_set) max_started = max(started_time_set) assignment_min_started = parse_time(assignment.not_visible_before) assignment_max_started = parse_time(assignment.due_date) data_min = min(min_started, assignment_min_started) data_max = max(max_started, assignment_max_started) data_points = [] best_scores_so_far = {source: 0 for source in source_set} time_delta = (data_max - data_min) / (num_points - 1) current_time = data_min for source, score, started_time in builds: while current_time < started_time: percentiles = np.percentile(best_scores_so_far.values(), data_keys) data_points.append([format_js_compatible_time(current_time)] + list(percentiles)) current_time += time_delta if score is not None: best_scores_so_far[source] = max(score, best_scores_so_far[source]) percentiles = list(np.percentile(best_scores_so_far.values(), data_keys)) now_time = now() while current_time - (time_delta / 2) < data_max: data_points.append([format_js_compatible_time(current_time)] + percentiles) if current_time >= now_time: percentiles = [None] * len(percentiles) current_time += time_delta return data_points
def timeseries_grade_percentiles(c, assignment_name, num_points=40): """ Returns a timeseries of grades with percentiles. Here is an example: [["2015-07-17 19:00:36-0700", 0.0, 0.0, 0.0, ... 0.0, 0.0], ["2015-07-17 19:10:36-0700", 0.0, 0.0, 0.0, ... 1.0, 2.0], ["2015-07-17 19:20:36-0700", 0.0, 0.0, 0.0, ... 3.0, 4.0], ["2015-07-17 19:30:36-0700", 0.0, 0.0, 0.5, ... 5.0, 6.0], ["2015-07-17 19:40:36-0700", 0.0, 0.0, 1.0, ... 7.0, 8.0]] """ data_keys = range(0, 105, 5) assignment = get_assignment_by_name(assignment_name) if not assignment: return # There is a slight problem that because of DST, ordering by "started" may not always # produce the correct result. When the timezone changes, lexicographical order does not # match the actual order of the times. However, this only happens once a year in the middle # of the night, so f**k it. c.execute( '''SELECT source, score, started FROM builds WHERE job = ? AND status = ? ORDER BY started''', [assignment_name, SUCCESS]) # XXX: There is no easy way to exclude builds started by staff ("super") groups. # But because this graph is to show the general trend, it's usually fine if staff builds # are included. Plus, the graph only shows up in the admin interface anyway. builds = [(source, score, parse_time(started)) for source, score, started in c.fetchall()] if not builds: return [] source_set = tuple(map(lambda b: b[0], builds)) started_time_set = tuple(map(lambda b: b[2], builds)) min_started = min(started_time_set) max_started = max(started_time_set) assignment_min_started = parse_time(assignment.not_visible_before) assignment_max_started = parse_time(assignment.due_date) data_min = min(min_started, assignment_min_started) data_max = max(max_started, assignment_max_started) data_points = [] best_scores_so_far = {source: 0 for source in source_set} time_delta = (data_max - data_min) / (num_points - 1) current_time = data_min for source, score, started_time in builds: while current_time < started_time: percentiles = np.percentile(tuple(best_scores_so_far.values()), data_keys) data_points.append([format_js_compatible_time(current_time)] + list(percentiles)) current_time += time_delta if score is not None: best_scores_so_far[source] = max(score, best_scores_so_far[source]) percentiles = list( np.percentile(tuple(best_scores_so_far.values()), data_keys)) now_time = now() while current_time - (time_delta / 2) < data_max: data_points.append([format_js_compatible_time(current_time)] + percentiles) if current_time >= now_time: percentiles = [None] * len(percentiles) current_time += time_delta return data_points
def test_time_functions(self): timestamp_str = now_str() timestamp_obj = parse_time(timestamp_str) self.assertEqual(timestamp_str, format_time(timestamp_obj))