Esempio n. 1
0
 def record_monitor_known_counts_line(self):
     timestamp = time.strftime(time_format, time.gmtime())
     path = self.monitor_counts_path()
     try:
         with path.open('a') as f:
             f.write("# " + " ".join(self.known_counts) + "\n")
     except Exception as e:
         logger.exception("Cannot store headers line into {}".format(path))
Esempio n. 2
0
    def monitor_counts(self):
        """
        read the counts file for that course and produce
        data arrays suitable for plotly

        returns a dictionary with the following keys
        * 'timestamps' : the times where monitor reported the figures
        plus, for known counts as listed in known_counts,
          made plural by adding a 's', so e.g.
        * 'running_jupyters': running containers
        * 'total_jupyters' :  total containers
        * ...
        """
        counts_path = self.monitor_counts_path()
        timestamps = []
        counts = {count: [] for count in self.known_counts}
        known_counts = self.known_counts
        max_counts = len(known_counts)
        try:
            with counts_path.open() as f:
                for lineno, line in enumerate(f, 1):
                    if line.startswith('#'):
                        # ignore any comment
                        continue
                    try:
                        timestamp, *values = line.split()
                        timestamps.append(timestamp)
                        # each line should have at most len(known_counts)
                        # and should all contain integers
                        if len(values) > max_counts:
                            logger.error(
                                "{}:{}: counts line has too many fields - {} > {}"
                                .format(counts_path, lineno, len(values),
                                        max_counts))
                            continue
                        ivalues = [int(v) for v in values]
                        for count, ivalue in zip(known_counts, ivalues):
                            counts[count].append(ivalue)
                        # fill in for missing values
                        missing = max_counts - len(values)
                        if missing > 0:
                            for count in known_counts[-missing:]:
                                counts[count].append(None)
                    except Exception as e:
                        logger.exception(
                            "{}:{}: skipped misformed counts line - {}".format(
                                counts_path, lineno, line))
        except:
            pass
        finally:
            # add as many keys (with an extra 's') as we have known keys
            result = {
                "{}s".format(count): counts[count]
                for count in self.known_counts
            }
            # do not forget the timestamps
            result['timestamps'] = timestamps
            return result
Esempio n. 3
0
 def record_monitor_counts(self, *args):
     timestamp = time.strftime(time_format, time.gmtime())
     path = self.monitor_counts_path()
     if len(args) > len(self.known_counts):
         logger.error(
             "two many arguments to counts line - dropped {} from {}".
             format(args, path))
     try:
         with path.open('a') as f:
             f.write("{} {}\n".format(timestamp,
                                      " ".join(str(arg) for arg in args)))
     except Exception as e:
         logger.exception("Cannot store counts line into {}".format(path))
Esempio n. 4
0
 def _write_events_line(self, student, notebook, action, port):
     timestamp = time.strftime(time_format, time.gmtime())
     path = self.notebook_events_path()
     course = self.course
     try:
         with path.open("a") as f:
             f.write(
                 "{timestamp} {course} {student} {notebook} {action} {port}\n"
                 .format(timestamp=timestamp,
                         course=course,
                         student=student,
                         notebook=notebook,
                         action=action,
                         port=port))
     except Exception as e:
         logger.exception("Cannot store stats line into {}".format(path))
Esempio n. 5
0
    def material_usage(self):
        """
        read the events file and produce data about relations 
        between notebooks and students
        remember we cannot serialize a set, plus a sorted result is better
        'nbstudents' : how many students are considered (test students are removed..)
        'nbstudents_per_notebook' : a sorted list of tuples (notebook, nb_students)
                                  how many students have read this notebook
        'nbstudents_per_notebook_animated' : same but animated over time
        'nbstudents_per_nbnotebooks' : a sorted list of tuples (nb_notebooks, nb_students)
                                  how many students have read exactly that number of notebooks
        'heatmap' : a complete matrix notebook x student ready to feed to plotly.heatmap
                    comes with 'x', 'y' and 'z' keys
        """

        events_path = self.notebook_events_path()
        # a dict notebook -> set of students
        set_by_notebook = defaultdict(set)
        nbstudents_per_notebook_buckets = TimeBuckets(grain=timedelta(hours=6),
                                                      time_format=time_format)
        # a dict student -> set of notebooks
        set_by_student = defaultdict(set)
        # a dict hashed on a tuple (notebook, student) -> number of visits
        raw_counts = defaultdict(int)
        #
        staff = CourseDir(self.course).staff
        try:
            with events_path.open() as f:
                for lineno, line in enumerate(f, 1):
                    date, _, student, notebook, action, *_ = line.split()
                    # action 'killing' needs to be ignored
                    if action in ('killing', ):
                        continue
                    # ignore staff or other artefact users
                    if student in staff or not edx_hash_regexp.match(student):
                        logger.debug(
                            "ignoring staff or artefact student {}".format(
                                student))
                        continue
                    # animated data must be taken care of before anything else
                    previous, next, changed = nbstudents_per_notebook_buckets.prepare(
                        date)
                    if changed:
                        nspn = [(notebook, len(set_by_notebook[notebook]))
                                for notebook in sorted(set_by_notebook)]
                        nbstudents_per_notebook_buckets.record_data(
                            nspn, previous, next)
                    set_by_notebook[notebook].add(student)
                    set_by_student[student].add(notebook)
                    raw_counts[notebook, student] += 1
        except Exception as e:
            logger.exception(
                "could not read {} to count students per notebook".format(
                    events_path))

        finally:
            nbstudents_per_notebook = [(notebook,
                                        len(set_by_notebook[notebook]))
                                       for notebook in sorted(set_by_notebook)]
            nb_by_student = {
                student: len(s)
                for (student, s) in set_by_student.items()
            }

            nbstudents_per_notebook_animated = nbstudents_per_notebook_buckets.wrap(
                nbstudents_per_notebook)

            # counting in the other direction is surprisingly tedious
            nbstudents_per_nbnotebooks = [
                (number, iter_len(v))
                for (number,
                     v) in itertools.groupby(sorted(nb_by_student.values()))
            ]
            # the heatmap
            heatmap_notebooks = sorted(set_by_notebook.keys())
            heatmap_students = sorted(set_by_student.keys())
            # a first attempt at showing the number of times a given notebook was open
            # by a given student resulted in poor outcome
            # problem being mostly with colorscale, we'd need to have '0' stick out
            # as transparent or something, but OTOH sending None instead or 0
            heatmap_z = [[
                raw_counts.get((
                    notebook,
                    student,
                ), None) for notebook in heatmap_notebooks
            ] for student in heatmap_students]
            # sort students on total number of opened notebooks
            heatmap_z.sort(
                key=lambda student_line: sum(x for x in student_line if x))

            zmax = max(max(x for x in line if x) for line in heatmap_z)
            zmin = min(min(x for x in line if x) for line in heatmap_z)

            return {
                'nbnotebooks': len(set_by_notebook),
                'nbstudents': len(set_by_student),
                'nbstudents_per_notebook': nbstudents_per_notebook,
                'nbstudents_per_notebook_animated':
                nbstudents_per_notebook_animated,
                'nbstudents_per_nbnotebooks': nbstudents_per_nbnotebooks,
                'heatmap': {
                    'x': heatmap_notebooks,
                    'y': heatmap_students,
                    'z': heatmap_z,
                    'zmin': zmin,
                    'zmax': zmax,
                },
            }
Esempio n. 6
0
    def daily_metrics(self):
        """
        read the events file for that course and produce
        data arrays suitable for being composed under plotly
        
        returns a dict with the following components
        * 'daily': { 'timestamps', 'new_students', 'new_notebooks',
                     'unique_students', 'unique_notebooks' }
          - all 5 same size
          (one per day, time is always 23:59:59)
        * 'events': { 'timestamps', 'total_students', 'total_notebooks' } 
           - all 3 same size
        """

        events_path = self.notebook_events_path()
        # a dictionary day -> figures
        figures_by_day = OrderedDict()
        previous_figures = None
        current_figures = DailyFigures()
        # the events dimension
        accumulator = TotalsAccumulator()
        #
        staff = CourseDir(self.course).staff
        try:
            with events_path.open() as f:
                for lineno, line in enumerate(f, 1):
                    try:
                        timestamp, course, student, notebook, action, port = line.split(
                        )
                        # if action is 'killing' then notebook is '-'
                        # which should not be counted as a notebook of course
                        # so let's ignore these lines altogether
                        if action == 'killing':
                            continue
                        # ignore staff or other artefact users
                        if student in staff or not edx_hash_regexp.match(
                                student):
                            continue
                        day = timestamp.split('T')[0] + ' 23:59:59'
                        if day in figures_by_day:
                            current_figures = figures_by_day[day]
                        else:
                            current_figures.wrap()
                            previous_figures = current_figures
                            current_figures = DailyFigures(previous_figures)
                            figures_by_day[day] = current_figures
                        current_figures.add_notebook(notebook)
                        current_figures.add_student(student)
                        accumulator.insert(
                            timestamp, current_figures.nb_total_students(),
                            current_figures.nb_total_notebooks())
                    except Exception as e:
                        logger.exception(
                            "{}:{}: skipped misformed events line :{}".format(
                                events_path, lineno, line))
                        continue
        except Exception as e:
            logger.exception("unexpected exception in daily_metrics")
        finally:
            current_figures.wrap()
            accumulator.wrap()
            daily_timestamps = []
            unique_students = []
            unique_notebooks = []
            new_students = []
            new_notebooks = []

            for timestamp, figures in figures_by_day.items():
                daily_timestamps.append(timestamp)
                unique_students.append(figures.nb_unique_students)
                unique_notebooks.append(figures.nb_unique_notebooks)
                new_students.append(figures.nb_new_students)
                new_notebooks.append(figures.nb_new_notebooks)

            return {
                'daily': {
                    'timestamps': daily_timestamps,
                    'unique_students': unique_students,
                    'unique_notebooks': unique_notebooks,
                    'new_students': new_students,
                    'new_notebooks': new_notebooks
                },
                'events': {
                    'timestamps': accumulator.timestamps,
                    'total_students': accumulator.students,
                    'total_notebooks': accumulator.notebooks
                }
            }