def _probe_settings_yaml(self): yaml_filename = self.customized("nbhosting.yaml") logger.debug(f"yaml filename {yaml_filename}") if not yaml_filename: return False try: with open(yaml_filename) as feed: yaml_config = yaml.safe_load(feed.read()) # if 'static-mappings' not in yaml_config: self.static_mappings = StaticMapping.defaults() else: logger.debug(f"populating static-mappings from yaml") self.static_mappings = [ StaticMapping(D['source'], D['destination']) for D in yaml_config['static-mappings'] ] # if 'builds' not in yaml_config: self.builds = [] else: self.builds = [Build(D) for D in yaml_config['builds']] self._yaml_config = yaml_config except Exception: logger.exception( f"could not load yaml file {yaml_filename} - ignoring") return False return True
def record_monitor_known_counts_line(self): # timestamp = time.strftime(time_format, time.gmtime()) path = self.monitor_counts_path() try: with path.open('a') as f: f.write("# " + " ".join(self.known_counts) + "\n") except Exception as exc: logger.exception(f"Cannot store headers line into {path}, {type(exc)}")
def _write_events_line(self, student, notebook, action, port): timestamp = time.strftime(time_format, time.gmtime()) path = self.notebook_events_path() coursename = self.coursename try: with path.open("a") as f: f.write(f"{timestamp} {coursename} {student} {notebook} {action} {port}\n") except Exception as exc: logger.exception(f"Cannot store stats line into {path}, {type (exc)}")
def monitor_counts(self): """ read the counts file for that course and produce data arrays suitable for plotly returns a dictionary with the following keys * 'timestamps' : the times where monitor reported the figures * plus, for each known count as listed in known_counts, a key made plural by adding a 's', so e.g. 'running_jupyters': running containers """ counts_path = self.monitor_counts_path() timestamps = [] counts = {count: [] for count in self.known_counts} known_counts = self.known_counts max_counts = len(known_counts) try: with counts_path.open() as f: for lineno, line in enumerate(f, 1): if line.startswith('#'): # ignore any comment continue try: timestamp, *values = line.split() timestamps.append(timestamp) # each line should have at most len(known_counts) # and should all contain integers if len(values) > max_counts: logger.error(f"{counts_path}:{lineno}: " f"counts line has too many fields " f"- {len(values)} > {max_counts}") continue ivalues = [int(v) for v in values] for count, ivalue in zip(known_counts, ivalues): counts[count].append(ivalue) # fill in for missing values missing = max_counts - len(values) if missing > 0: for count in known_counts[-missing:]: counts[count].append(None) except Exception as _exc: logger.exception( f"{counts_path}:{lineno}: " f"skipped misformed counts line - {line}") except: pass finally: # add as many keys (with an extra 's') as we have known keys result = { f"{count}s": counts[count] for count in self.known_counts } # do not forget the timestamps result['timestamps'] = timestamps return result
def record_monitor_counts(self, *args): timestamp = time.strftime(time_format, time.gmtime()) path = self.monitor_counts_path() if len(args) > len(self.known_counts): logger.error(f"two many arguments to counts line " f"- dropped {args} from {path}") try: with path.open('a') as f: payload = " ".join(str(arg) for arg in args) f.write(f"{timestamp} {payload}\n") except Exception as exc: logger.exception(f"Cannot store counts line into {path}, {type(exc)}")
def __init__(self, line): try: line = line.strip() if '#' in line: self.valid = False else: left, right = line.split("->") self.local, self.from_top = left.strip(), right.strip() self.valid = True except: logger.exception(f"Could not read static mapping {line}") self.local = self.from_top = None self.valid = False
def image_hash(self): """ the hash of the image that should be used for containers in this course or None if something goes wrong """ with podman.PodmanClient(base_url=PODMAN_URL) as podman_api: try: return podman_api.images.get(self.image).attrs['Id'] except podman.errors.ImageNotFound: logger.error(f"Course {self.coursename} " f"uses unknown podman image {self.image}") except Exception: logger.exception("Can't figure image hash") return None
def _fetch_course_custom_tracks(self): """ locate and load <course>/nbhosting/tracks.py objective is to make this customizable so that some notebooks in the repo can be ignored and the others organized along different view points the tracks() function will receive self as its single parameter it is expected to return a dictionary track_name -> Track instance see flotpython/nbhosting/tracks.py for a realistic example the keys in this dictionary are used in the web interface to propose the list of available tracks absence of tracks.py, or inability to run it, triggers the default policy (per directory) implemented in model_track.py """ course_tracks_py = self.customized("tracks.py") if course_tracks_py: modulename = (f"{self.coursename}_tracks".replace("-", "_")) try: logger.debug(f"{self} loading module {course_tracks_py}") spec = spec_from_file_location( modulename, course_tracks_py, ) module = module_from_spec(spec) spec.loader.exec_module(module) tracks_fun = module.tracks logger.debug(f"triggerring {tracks_fun.__qualname__}()") tracks = tracks_fun(self) if self._check_tracks(tracks): return tracks except Exception: logger.exception(f"{self} could not do load custom tracks") finally: # make sure to reload the python code next time # we will need it, in case the course has published an update if modulename in sys.modules: del sys.modules[modulename] else: logger.info(f"{self} no tracks.py hook found") logger.warning(f"{self} resorting to generic filesystem-based track") return [generic_track(self)]
def teacher_dropped_deploy(request, course, droparea): """ deploy one, several or all dropped files onto the registered students space incoming in request.POST.deploy: . '*' which means all known dropped . a single str . a list of str incoming in request.POST.dry_run: . missing or false: do the deploy . true: do not deploy, just return statistics (and then of course news=0) Result is a JSON-encoded list of dicts like this one [ { 'relpath': filename, 'total': nb of students concerned, 'availables': how many students have it now, 'news': how many students actually have the newly deployed version} } ] """ coursedir = CourseDir.objects.get(coursename=course) if request.method == 'POST': try: data = json.loads(request.body.decode()) deploy = data['deploy'] dry_run = data['dry_run'] if deploy == '*': droppeds = Dropped.scan_course_droparea(coursedir, droparea) elif isinstance(deploy, list): droppeds = [ Dropped(coursedir, droparea, relpath) for relpath in deploy ] else: droppeds = [Dropped(coursedir, droparea, deploy)] result = [] for dropped in droppeds: deploys = dropped.deploy_to_students(dry_run=dry_run) deploys['relpath'] = str(dropped.relpath) result.append(deploys) return HttpResponse(json.dumps(result)) except Exception as exc: logger.exception("cannot teacher_dropped_deploy") return HttpResponseBadRequest( f"{type(exc)}, could not deploy, {exc}")
def _probe_settings_old(self): custom_static_mappings = self.customized("static-mappings") if not custom_static_mappings: self.static_mappings = StaticMapping.defaults() else: try: with custom_static_mappings.open() as storage: for line in storage: mapping = StaticMapping(line) if mapping: self.static_mappings.append(mapping) except FileNotFoundError: # unfortunately this goes to stdout and # screws up the expose-static-* business #logger.info(f"mappings file not found {path}") self.static_mappings = StaticMapping.defaults() except Exception: logger.exception(f"could not load static-mappings for {self}") self.static_mappings = StaticMapping.defaults()
def teacher_dropped(request, course, droparea): coursedir = CourseDir.objects.get(coursename=course) if request.method == 'POST': # even when multiple files get dropped # simultaneously, we get called once per file # here's how to locate the instance of InMemoryUploadedFile inmemory = request.FILES['filepond'] try: dropped = Dropped.from_uploaded(coursedir, droparea, inmemory) return HttpResponse( f"{dropped} uploaded, size={dropped.bytes_size}B") except Exception as exc: logger.exception("teacher_dropped could not upload") return HttpResponseBadRequest( f"{type(exc)}, could not upload, {exc}") elif request.method == 'DELETE': # https://stackoverflow.com/questions/4994789/django-where-are-the-params-stored-on-a-put-delete-request delete = QueryDict(request.body) relpath = list(delete.keys())[0] dropped = Dropped(coursedir, droparea, relpath) try: dropped.remove() return HttpResponse(f"{droparea}/{dropped.relpath} removed") except Exception as exc: logger.exception("teacher_dropped could not delete") return HttpResponseBadRequest( f"{type(exc)}, could not delete, {exc}") else: logger.exception(f"unsupported method {request.method}") return HttpResponseBadRequest("unsupported request method")
def material_usage(self): """ read the events file and produce data about relations between notebooks and students remember we cannot serialize a set, plus a sorted result is better 'nbstudents' : how many students are considered (test students are removed..) 'nbstudents_per_notebook' : a sorted list of tuples (notebook, nb_students) how many students have read this notebook 'nbstudents_per_notebook_animated' : same but animated over time 'nbstudents_per_nbnotebooks' : a sorted list of tuples (nb_notebooks, nb_students) how many students have read exactly that number of notebooks 'heatmap' : a complete matrix notebook x student ready to feed to plotly.heatmap comes with 'x', 'y' and 'z' keys """ events_path = self.notebook_events_path() # a dict notebook -> set of students set_by_notebook = defaultdict(set) nbstudents_per_notebook_buckets = TimeBuckets(grain=timedelta(hours=6), time_format=time_format) # a dict student -> set of notebooks set_by_student = defaultdict(set) # a dict hashed on a tuple (notebook, student) -> number of visits raw_counts = defaultdict(int) # staff_names = {username for username in CourseDir.objects.get(coursename=self.coursename).staff_usernames.split()} try: with events_path.open() as f: for _lineno, line in enumerate(f, 1): date, _, student, notebook, action, *_ = line.split() # action 'killing' needs to be ignored if action in ('killing',): continue # ignore staff or other artefact users if student in staff_names or artefact_user(student): logger.debug(f"ignoring staff or artefact student {student}") continue # animated data must be taken care of before anything else previous, next, changed = nbstudents_per_notebook_buckets.prepare(date) if changed: nspn = [ (notebook, len(set_by_notebook[notebook])) for notebook in sorted(set_by_notebook)] nbstudents_per_notebook_buckets.record_data(nspn, previous, next) notebook = canonicalize(notebook) set_by_notebook[notebook].add(student) set_by_student[student].add(notebook) raw_counts[notebook, student] += 1 except Exception as _exc: logger.exception(f"could not read {events_path} to count students per notebook") finally: nbstudents_per_notebook = [ (notebook, len(set_by_notebook[notebook])) for notebook in sorted(set_by_notebook) ] nb_by_student = { student: len(s) for (student, s) in set_by_student.items() } nbstudents_per_notebook_animated = nbstudents_per_notebook_buckets.wrap(nbstudents_per_notebook) # counting in the other direction is surprisingly tedious nbstudents_per_nbnotebooks = [ (number, iter_len(v)) for (number, v) in itertools.groupby(sorted(nb_by_student.values())) ] # the heatmap heatmap_notebooks = sorted(set_by_notebook.keys()) heatmap_students = sorted(set_by_student.keys()) # a first attempt at showing the number of times a given notebook was open # by a given student resulted in poor outcome # problem being mostly with colorscale, we'd need to have '0' stick out # as transparent or something, but OTOH sending None instead or 0 heatmap_z = [ [raw_counts.get( (notebook, student,), None) for notebook in heatmap_notebooks] for student in heatmap_students ] # sort students on total number of opened notebooks heatmap_z.sort(key = lambda student_line: sum(x for x in student_line if x)) zmax = max((max(x for x in line if x) for line in heatmap_z), default=0) zmin = min((min(x for x in line if x) for line in heatmap_z), default=0) return { 'nbnotebooks' : len(set_by_notebook), 'nbstudents' : len(set_by_student), 'nbstudents_per_notebook' : nbstudents_per_notebook, 'nbstudents_per_notebook_animated' : nbstudents_per_notebook_animated, 'nbstudents_per_nbnotebooks' : nbstudents_per_nbnotebooks, 'heatmap' : {'x' : heatmap_notebooks, 'y' : heatmap_students, 'z' : heatmap_z, 'zmin' : zmin, 'zmax' : zmax, }, }
def daily_metrics(self): """ read the events file for that course and produce data arrays suitable for being composed under plotly returns a dict with the following components * 'daily': { 'timestamps', 'new_students', 'new_notebooks', 'unique_students', 'unique_notebooks' } - all 5 same size (one per day, time is always 23:59:59) * 'events': { 'timestamps', 'total_students', 'total_notebooks' } - all 3 same size """ events_path = self.notebook_events_path() # a dictionary day -> figures figures_by_day = OrderedDict() previous_figures = None current_figures = DailyFigures() # the events dimension accumulator = TotalsAccumulator() # staff_names = {username for username in CourseDir.objects.get(coursename=self.coursename).staff_usernames.split()} try: with events_path.open() as f: for lineno, line in enumerate(f, 1): try: timestamp, _coursename, student, notebook, action, _port = line.split() # if action is 'killing' then notebook is '-' # which should not be counted as a notebook of course # so let's ignore these lines altogether if action == 'killing': continue # ignore staff or other artefact users if student in staff_names or artefact_user(student): continue day = timestamp.split('T')[0] + ' 23:59:59' if day in figures_by_day: current_figures = figures_by_day[day] else: current_figures.wrap() previous_figures = current_figures current_figures = DailyFigures(previous_figures) figures_by_day[day] = current_figures notebook = canonicalize(notebook) current_figures.add_notebook(notebook) current_figures.add_student(student) accumulator.insert( timestamp, current_figures.nb_total_students(), current_figures.nb_total_notebooks()) except Exception as exc: logger.exception(f"{events_path}:{lineno}: " f"skipped misformed events line {type(exc)}:{line}") continue except Exception as _exc: logger.exception("unexpected exception in daily_metrics") finally: current_figures.wrap() accumulator.wrap() daily_timestamps = [] unique_students = [] unique_notebooks = [] new_students = [] new_notebooks = [] for timestamp, figures in figures_by_day.items(): daily_timestamps.append(timestamp) unique_students.append(figures.nb_unique_students) unique_notebooks.append(figures.nb_unique_notebooks) new_students.append(figures.nb_new_students) new_notebooks.append(figures.nb_new_notebooks) return { 'daily' : { 'timestamps' : daily_timestamps, 'unique_students' : unique_students, 'unique_notebooks' : unique_notebooks, 'new_students' : new_students, 'new_notebooks' : new_notebooks}, 'events' : { 'timestamps' : accumulator.timestamps, 'total_students' : accumulator.students, 'total_notebooks' : accumulator.notebooks}}