def timeseries_grade_percentiles(c, assignment_name, num_points=40): """ Returns a timeseries of grades with percentiles. Here is an example: [["2015-07-17 19:00:36-0700", 0.0, 0.0, 0.0, ... 0.0, 0.0], ["2015-07-17 19:10:36-0700", 0.0, 0.0, 0.0, ... 1.0, 2.0], ["2015-07-17 19:20:36-0700", 0.0, 0.0, 0.0, ... 3.0, 4.0], ["2015-07-17 19:30:36-0700", 0.0, 0.0, 0.5, ... 5.0, 6.0], ["2015-07-17 19:40:36-0700", 0.0, 0.0, 1.0, ... 7.0, 8.0]] """ data_keys = range(0, 105, 5) assignment = get_assignment_by_name(assignment_name) if not assignment: return # There is a slight problem that because of DST, ordering by "started" may not always # produce the correct result. When the timezone changes, lexicographical order does not # match the actual order of the times. However, this only happens once a year in the middle # of the night, so f**k it. c.execute( """SELECT source, score, started FROM builds WHERE job = ? AND status = ? ORDER BY started""", [assignment_name, SUCCESS], ) # XXX: There is no easy way to exclude builds started by staff ("super") groups. # But because this graph is to show the general trend, it's usually fine if staff builds # are included. Plus, the graph only shows up in the admin interface anyway. builds = [(source, score, parse_time(started)) for source, score, started in c.fetchall()] if not builds: return [] source_set = map(lambda b: b[0], builds) started_time_set = map(lambda b: b[2], builds) min_started = min(started_time_set) max_started = max(started_time_set) assignment_min_started = parse_time(assignment.not_visible_before) assignment_max_started = parse_time(assignment.due_date) data_min = min(min_started, assignment_min_started) data_max = max(max_started, assignment_max_started) data_points = [] best_scores_so_far = {source: 0 for source in source_set} time_delta = (data_max - data_min) / (num_points - 1) current_time = data_min for source, score, started_time in builds: while current_time < started_time: percentiles = np.percentile(best_scores_so_far.values(), data_keys) data_points.append([format_js_compatible_time(current_time)] + list(percentiles)) current_time += time_delta if score is not None: best_scores_so_far[source] = max(score, best_scores_so_far[source]) percentiles = list(np.percentile(best_scores_so_far.values(), data_keys)) now_time = now() while current_time - (time_delta / 2) < data_max: data_points.append([format_js_compatible_time(current_time)] + percentiles) if current_time >= now_time: percentiles = [None] * len(percentiles) current_time += time_delta return data_points
def _log(self, message, exc=False): payload = (now(), message) if exc: payload += (traceback.format_exc(),) else: payload += (None,) with self.lock: self.log.append(payload)
def __init__(self, build_name, source, trigger): """ Creates a new dockergrader job to be added to the queue. """ self.build_name = build_name self.source = source self.trigger = trigger self.updated = now()
def timeseries_grade_percentiles(c, assignment_name, num_points=40): """ Returns a timeseries of grades with percentiles. Here is an example: [["2015-07-17 19:00:36-0700", 0.0, 0.0, 0.0, ... 0.0, 0.0], ["2015-07-17 19:10:36-0700", 0.0, 0.0, 0.0, ... 1.0, 2.0], ["2015-07-17 19:20:36-0700", 0.0, 0.0, 0.0, ... 3.0, 4.0], ["2015-07-17 19:30:36-0700", 0.0, 0.0, 0.5, ... 5.0, 6.0], ["2015-07-17 19:40:36-0700", 0.0, 0.0, 1.0, ... 7.0, 8.0]] """ data_keys = range(0, 105, 5) assignment = get_assignment_by_name(assignment_name) if not assignment: return # There is a slight problem that because of DST, ordering by "started" may not always # produce the correct result. When the timezone changes, lexicographical order does not # match the actual order of the times. However, this only happens once a year in the middle # of the night, so f**k it. c.execute( '''SELECT source, score, started FROM builds WHERE job = ? AND status = ? ORDER BY started''', [assignment_name, SUCCESS]) # XXX: There is no easy way to exclude builds started by staff ("super") groups. # But because this graph is to show the general trend, it's usually fine if staff builds # are included. Plus, the graph only shows up in the admin interface anyway. builds = [(source, score, parse_time(started)) for source, score, started in c.fetchall()] if not builds: return [] source_set = tuple(map(lambda b: b[0], builds)) started_time_set = tuple(map(lambda b: b[2], builds)) min_started = min(started_time_set) max_started = max(started_time_set) assignment_min_started = parse_time(assignment.not_visible_before) assignment_max_started = parse_time(assignment.due_date) data_min = min(min_started, assignment_min_started) data_max = max(max_started, assignment_max_started) data_points = [] best_scores_so_far = {source: 0 for source in source_set} time_delta = (data_max - data_min) / (num_points - 1) current_time = data_min for source, score, started_time in builds: while current_time < started_time: percentiles = np.percentile(tuple(best_scores_so_far.values()), data_keys) data_points.append([format_js_compatible_time(current_time)] + list(percentiles)) current_time += time_delta if score is not None: best_scores_so_far[source] = max(score, best_scores_so_far[source]) percentiles = list( np.percentile(tuple(best_scores_so_far.values()), data_keys)) now_time = now() while current_time - (time_delta / 2) < data_max: data_points.append([format_js_compatible_time(current_time)] + percentiles) if current_time >= now_time: percentiles = [None] * len(percentiles) current_time += time_delta return data_points
def _process_job(self, job): build_name = job.build_name with self.lock: self.status = build_name self.updated = now() # Mark the job as In Progress while True: try: with DbCursor() as c: c.execute('''SELECT source, `commit`, message, job, started FROM builds WHERE build_name = ? AND status = ? LIMIT 1''', [build_name, QUEUED]) row = c.fetchone() if row is None: self._log("Build %s was missing from the database. Skipping." % build_name) return source, commit, message, job_name, started = row owners = get_repo_owners(c, source) owner_emails = {owner: email for owner, (_, _, _, _, _, email) in get_users_by_ids(c, owners).items()} c.execute("UPDATE builds SET status = ?, updated = ? WHERE build_name = ?", [IN_PROGRESS, now_str(), build_name]) break except apsw.Error: self._log("Exception raised while setting status to IN_PROGRESS. Retrying...", exc=True) logging.exception("Failed to retrieve next dockergrader job") self._log("Started building %s" % build_name) try: # if the job doesn't exist for some reason, the resulting TypeError will be caught # and logged assignment = get_assignment_by_name(job_name) due_date = assignment.due_date job_handler = get_job(job_name) log, score = job_handler(source, commit) # Ignore any special encoding inside the log, and just treat it as a bytes log = buffer(log) min_score, max_score = assignment.min_score, assignment.max_score full_score = assignment.full_score if score < min_score or score > max_score: raise ValueError("A score of %s is not in the acceptable range of %f to %f" % (str(score), min_score, max_score)) except JobFailedError as e: self._log("Failed %s with JobFailedError" % build_name, exc=True) with DbCursor() as c: c.execute('''UPDATE builds SET status = ?, updated = ?, log = ? WHERE build_name = ?''', [FAILED, now_str(), str(e), build_name]) if config.mailer_enabled: try: for owner in owners: email = owner_emails.get(owner) if not email: continue subject = "%s failed to complete" % build_name send_template("build_failed", email, subject, build_name=build_name, job_name=job_name, source=source, commit=commit, message=message, error_message=str(e)) except Exception: self._log("Exception raised while reporting JobFailedError", exc=True) logging.exception("Exception raised while reporting JobFailedError") else: self._log("JobFailedError successfully reported via email") return except Exception as e: self._log("Exception raised while building %s" % build_name, exc=True) logging.exception("Internal error within build %s" % build_name) with DbCursor() as c: c.execute('''UPDATE builds SET status = ?, updated = ?, log = ? WHERE build_name = ?''', [FAILED, now_str(), "Build failed due to an internal error.", build_name]) return self._log("Autograder build %s complete (score: %s)" % (build_name, str(score))) while True: try: with DbCursor() as c: c.execute('''UPDATE builds SET status = ?, score = ?, updated = ?, log = ? WHERE build_name = ?''', [SUCCESS, score, now_str(), log, build_name]) slipunits = slip_units(due_date, started) affected_users = assign_grade_batch(c, owners, job_name, float(score), slipunits, build_name, "Automatic build.", "autograder", dont_lower=True) break except apsw.Error: self._log("Exception raised while assigning grades", exc=True) logging.exception("Failed to update build %s after build completed" % build_name) return if config.mailer_enabled: try: for owner in owners: email = owner_emails.get(owner) if not email: continue subject = "%s complete - score %s / %s" % (build_name, str(score), str(full_score)) if owner not in affected_users: subject += " (no effect on grade)" else: if slipunits == 1: subject += " (1 %s used)" % config.slip_unit_name_singular elif slipunits > 0: subject += " (%s slip %s used)" % (str(slipunits), config.slip_unit_name_plural) send_template("build_finished", email, subject, build_name=build_name, job_name=job_name, score=score, full_score=str(full_score), slipunits=slipunits, log=log, source=source, commit=commit, message=message, affected=(owner in affected_users)) except Exception: self._log("Exception raised while reporting grade", exc=True) logging.exception("Exception raised while reporting grade") else: self._log("Grade successfully reported via email")
def _dequeue_job(self): with self.lock: self.status = None self.updated = now() self._log("Waiting for a new job to run") return dockergrader_queue.dequeue()
def __init__(self): self.lock = threading.Lock() self.log = deque(maxlen=100) self.status = None self.updated = now() self.identifier = dockergrader_queue.register_worker(self)