Example #1
0
    def timeseries_grade_percentiles(c, assignment_name, num_points=40):
        """
        Returns a timeseries of grades with percentiles. Here is an example:

            [["2015-07-17 19:00:36-0700", 0.0, 0.0, 0.0, ... 0.0, 0.0],
             ["2015-07-17 19:10:36-0700", 0.0, 0.0, 0.0, ... 1.0, 2.0],
             ["2015-07-17 19:20:36-0700", 0.0, 0.0, 0.0, ... 3.0, 4.0],
             ["2015-07-17 19:30:36-0700", 0.0, 0.0, 0.5, ... 5.0, 6.0],
             ["2015-07-17 19:40:36-0700", 0.0, 0.0, 1.0, ... 7.0, 8.0]]

        """
        data_keys = range(0, 105, 5)
        assignment = get_assignment_by_name(assignment_name)
        if not assignment:
            return
        # There is a slight problem that because of DST, ordering by "started" may not always
        # produce the correct result. When the timezone changes, lexicographical order does not
        # match the actual order of the times. However, this only happens once a year in the middle
        # of the night, so f**k it.
        c.execute(
            """SELECT source, score, started FROM builds WHERE job = ? AND status = ?
                     ORDER BY started""",
            [assignment_name, SUCCESS],
        )
        # XXX: There is no easy way to exclude builds started by staff ("super") groups.
        # But because this graph is to show the general trend, it's usually fine if staff builds
        # are included. Plus, the graph only shows up in the admin interface anyway.
        builds = [(source, score, parse_time(started)) for source, score, started in c.fetchall()]
        if not builds:
            return []
        source_set = map(lambda b: b[0], builds)
        started_time_set = map(lambda b: b[2], builds)
        min_started = min(started_time_set)
        max_started = max(started_time_set)
        assignment_min_started = parse_time(assignment.not_visible_before)
        assignment_max_started = parse_time(assignment.due_date)
        data_min = min(min_started, assignment_min_started)
        data_max = max(max_started, assignment_max_started)
        data_points = []
        best_scores_so_far = {source: 0 for source in source_set}
        time_delta = (data_max - data_min) / (num_points - 1)
        current_time = data_min
        for source, score, started_time in builds:
            while current_time < started_time:
                percentiles = np.percentile(best_scores_so_far.values(), data_keys)
                data_points.append([format_js_compatible_time(current_time)] + list(percentiles))
                current_time += time_delta
            if score is not None:
                best_scores_so_far[source] = max(score, best_scores_so_far[source])

        percentiles = list(np.percentile(best_scores_so_far.values(), data_keys))
        now_time = now()
        while current_time - (time_delta / 2) < data_max:
            data_points.append([format_js_compatible_time(current_time)] + percentiles)
            if current_time >= now_time:
                percentiles = [None] * len(percentiles)
            current_time += time_delta

        return data_points
Example #2
0
 def _log(self, message, exc=False):
     payload = (now(), message)
     if exc:
         payload += (traceback.format_exc(),)
     else:
         payload += (None,)
     with self.lock:
         self.log.append(payload)
Example #3
0
    def __init__(self, build_name, source, trigger):
        """
        Creates a new dockergrader job to be added to the queue.

        """
        self.build_name = build_name
        self.source = source
        self.trigger = trigger
        self.updated = now()
Example #4
0
    def timeseries_grade_percentiles(c, assignment_name, num_points=40):
        """
        Returns a timeseries of grades with percentiles. Here is an example:

            [["2015-07-17 19:00:36-0700", 0.0, 0.0, 0.0, ... 0.0, 0.0],
             ["2015-07-17 19:10:36-0700", 0.0, 0.0, 0.0, ... 1.0, 2.0],
             ["2015-07-17 19:20:36-0700", 0.0, 0.0, 0.0, ... 3.0, 4.0],
             ["2015-07-17 19:30:36-0700", 0.0, 0.0, 0.5, ... 5.0, 6.0],
             ["2015-07-17 19:40:36-0700", 0.0, 0.0, 1.0, ... 7.0, 8.0]]

        """
        data_keys = range(0, 105, 5)
        assignment = get_assignment_by_name(assignment_name)
        if not assignment:
            return
        # There is a slight problem that because of DST, ordering by "started" may not always
        # produce the correct result. When the timezone changes, lexicographical order does not
        # match the actual order of the times. However, this only happens once a year in the middle
        # of the night, so f**k it.
        c.execute(
            '''SELECT source, score, started FROM builds WHERE job = ? AND status = ?
                     ORDER BY started''', [assignment_name, SUCCESS])
        # XXX: There is no easy way to exclude builds started by staff ("super") groups.
        # But because this graph is to show the general trend, it's usually fine if staff builds
        # are included. Plus, the graph only shows up in the admin interface anyway.
        builds = [(source, score, parse_time(started))
                  for source, score, started in c.fetchall()]
        if not builds:
            return []
        source_set = tuple(map(lambda b: b[0], builds))
        started_time_set = tuple(map(lambda b: b[2], builds))
        min_started = min(started_time_set)
        max_started = max(started_time_set)
        assignment_min_started = parse_time(assignment.not_visible_before)
        assignment_max_started = parse_time(assignment.due_date)
        data_min = min(min_started, assignment_min_started)
        data_max = max(max_started, assignment_max_started)
        data_points = []
        best_scores_so_far = {source: 0 for source in source_set}
        time_delta = (data_max - data_min) / (num_points - 1)
        current_time = data_min
        for source, score, started_time in builds:
            while current_time < started_time:
                percentiles = np.percentile(tuple(best_scores_so_far.values()),
                                            data_keys)
                data_points.append([format_js_compatible_time(current_time)] +
                                   list(percentiles))
                current_time += time_delta
            if score is not None:
                best_scores_so_far[source] = max(score,
                                                 best_scores_so_far[source])

        percentiles = list(
            np.percentile(tuple(best_scores_so_far.values()), data_keys))
        now_time = now()
        while current_time - (time_delta / 2) < data_max:
            data_points.append([format_js_compatible_time(current_time)] +
                               percentiles)
            if current_time >= now_time:
                percentiles = [None] * len(percentiles)
            current_time += time_delta

        return data_points
Example #5
0
    def _process_job(self, job):
        build_name = job.build_name
        with self.lock:
            self.status = build_name
            self.updated = now()

        # Mark the job as In Progress
        while True:
            try:
                with DbCursor() as c:
                    c.execute('''SELECT source, `commit`, message, job, started FROM builds
                                 WHERE build_name = ? AND status = ? LIMIT 1''',
                              [build_name, QUEUED])
                    row = c.fetchone()
                    if row is None:
                        self._log("Build %s was missing from the database. Skipping." % build_name)
                        return
                    source, commit, message, job_name, started = row
                    owners = get_repo_owners(c, source)
                    owner_emails = {owner: email for owner, (_, _, _, _, _, email)
                                    in get_users_by_ids(c, owners).items()}
                    c.execute("UPDATE builds SET status = ?, updated = ? WHERE build_name = ?",
                              [IN_PROGRESS, now_str(), build_name])
                    break
            except apsw.Error:
                self._log("Exception raised while setting status to IN_PROGRESS. Retrying...",
                          exc=True)
                logging.exception("Failed to retrieve next dockergrader job")

        self._log("Started building %s" % build_name)
        try:
            # if the job doesn't exist for some reason, the resulting TypeError will be caught
            # and logged
            assignment = get_assignment_by_name(job_name)
            due_date = assignment.due_date
            job_handler = get_job(job_name)
            log, score = job_handler(source, commit)
            # Ignore any special encoding inside the log, and just treat it as a bytes
            log = buffer(log)
            min_score, max_score = assignment.min_score, assignment.max_score
            full_score = assignment.full_score
            if score < min_score or score > max_score:
                raise ValueError("A score of %s is not in the acceptable range of %f to %f" %
                                 (str(score), min_score, max_score))
        except JobFailedError as e:
            self._log("Failed %s with JobFailedError" % build_name, exc=True)
            with DbCursor() as c:
                c.execute('''UPDATE builds SET status = ?, updated = ?, log = ?
                             WHERE build_name = ?''', [FAILED, now_str(), str(e), build_name])
            if config.mailer_enabled:
                try:
                    for owner in owners:
                        email = owner_emails.get(owner)
                        if not email:
                            continue
                        subject = "%s failed to complete" % build_name
                        send_template("build_failed", email, subject, build_name=build_name,
                                      job_name=job_name, source=source, commit=commit,
                                      message=message, error_message=str(e))
                except Exception:
                    self._log("Exception raised while reporting JobFailedError", exc=True)
                    logging.exception("Exception raised while reporting JobFailedError")
                else:
                    self._log("JobFailedError successfully reported via email")
            return
        except Exception as e:
            self._log("Exception raised while building %s" % build_name, exc=True)
            logging.exception("Internal error within build %s" % build_name)
            with DbCursor() as c:
                c.execute('''UPDATE builds SET status = ?, updated = ?, log = ?
                             WHERE build_name = ?''',
                          [FAILED, now_str(), "Build failed due to an internal error.", build_name])
            return

        self._log("Autograder build %s complete (score: %s)" % (build_name, str(score)))

        while True:
            try:
                with DbCursor() as c:
                    c.execute('''UPDATE builds SET status = ?, score = ?, updated = ?,
                                 log = ? WHERE build_name = ?''',
                              [SUCCESS, score, now_str(), log, build_name])
                    slipunits = slip_units(due_date, started)
                    affected_users = assign_grade_batch(c, owners, job_name, float(score),
                                                        slipunits, build_name, "Automatic build.",
                                                        "autograder", dont_lower=True)
                    break
            except apsw.Error:
                self._log("Exception raised while assigning grades", exc=True)
                logging.exception("Failed to update build %s after build completed" % build_name)
                return

        if config.mailer_enabled:
            try:
                for owner in owners:
                    email = owner_emails.get(owner)
                    if not email:
                        continue
                    subject = "%s complete - score %s / %s" % (build_name, str(score),
                                                               str(full_score))
                    if owner not in affected_users:
                        subject += " (no effect on grade)"
                    else:
                        if slipunits == 1:
                            subject += " (1 %s used)" % config.slip_unit_name_singular
                        elif slipunits > 0:
                            subject += " (%s slip %s used)" % (str(slipunits),
                                                               config.slip_unit_name_plural)
                    send_template("build_finished", email, subject, build_name=build_name,
                                  job_name=job_name, score=score, full_score=str(full_score),
                                  slipunits=slipunits, log=log, source=source, commit=commit,
                                  message=message, affected=(owner in affected_users))
            except Exception:
                self._log("Exception raised while reporting grade", exc=True)
                logging.exception("Exception raised while reporting grade")
            else:
                self._log("Grade successfully reported via email")
Example #6
0
 def _dequeue_job(self):
     with self.lock:
         self.status = None
         self.updated = now()
     self._log("Waiting for a new job to run")
     return dockergrader_queue.dequeue()
Example #7
0
 def __init__(self):
     self.lock = threading.Lock()
     self.log = deque(maxlen=100)
     self.status = None
     self.updated = now()
     self.identifier = dockergrader_queue.register_worker(self)