def __init__(self, school, data=None, output=None, diff=True, load=True, display_progress_bar=True, tracker=NullTracker()): with open(data, 'r') as f: data = json.load(f) self.meta = data.pop('$meta') self.data = [DotDict(obj) for obj in data.pop('$data')] self.cache = DotDict(dict(course={'code': '_'}, section={'code': '_'})) self.school = school self.adapter = DigestionAdapter(school, self.cache) self.strategy = self._resolve_strategy(diff, load, output) # Setup tracker for digestion and progress bar. self.tracker = tracker self.tracker.mode = 'digesting' if display_progress_bar: self.tracker.add_viewer(ProgressBar('{total}'))
def digest_section(self, section, course_model=None): """Create section in database from info in model map. Args: course_model: django course model object Keyword args: clean (boolean): removes course offerings associated with section if set Returns: django section model object """ section_model = self.strategy.digest_section( self.adapter.adapt_section(section, course_model=course_model)) if section_model: self.cache.course = course_model self.cache.section = section_model for meeting in section.get("meetings", []): self.digest_meeting(DotDict(meeting), section_model) for textbook_link in section.get("textbooks", []): self.digest_textbook_link(DotDict(textbook_link), section_model=section_model) self._update_progress("section", bool(section_model)) return section_model
def test_dotdict(self): d = DotDict({'a': 1, 'b': 2, 'c': {'ca': 31}}) self.assertEqual((1, 2), (d.a, d.b)) self.assertEqual(1, d['a']) d['a'] = 3 self.assertEqual((3, 2), (d.a, d['b'])) self.assertEqual((31, 31), (d.c.ca, d.c['ca'])) e = DotDict({'a': [{'b': 1}, {'c': 2}]}) self.assertEqual(e.a[1]['c'], 2)
def test_dotdict(self): d = DotDict({"a": 1, "b": 2, "c": {"ca": 31}}) self.assertEqual((1, 2), (d.a, d.b)) self.assertEqual(1, d["a"]) d["a"] = 3 self.assertEqual((3, 2), (d.a, d["b"])) self.assertEqual((31, 31), (d.c.ca, d.c["ca"])) e = DotDict({"a": [{"b": 1}, {"c": 2}]}) self.assertEqual(e.a[1]["c"], 2)
def adapt_evaluation(self, evaluation): """Adapt evaluation to model dictionary. Args: evaluation (dict): validated evaluation. Returns: dict: Description """ professor = '' if evaluation.instructors is not None: for instructor in evaluation.instructors: instructor = DotDict(instructor) if isinstance(instructor.name, basestring): if professor is not '': professor += ', ' professor += instructor.name else: raise DigestionError('get your instructors straight') evaluation = { 'course': Course.objects.get(code=evaluation.course.code), 'score': evaluation.score, 'summary': evaluation.summary, 'professor': professor, 'course_code': evaluation.course.code, 'year': evaluation.year, } for key in evaluation: if evaluation[key] is None: evaluation[key] = 'Cannot be found' return evaluation
def load_schemas(cls, schema_path=None): """Load JSON validation schemas. NOTE: Will load schemas as static variable (i.e. once per definition), unless schema_path is specifically defined. Args: schema_path (None, str, optional): Override default schema_path """ if hasattr(cls, 'SCHEMAS') and schema_path is None: return if schema_path is None: schema_path = '{}/{}/library/schemas'.format( settings.BASE_DIR, settings.PARSING_MODULE) def load(kind): filepath = '{}/{}.json'.format(schema_path, kind) with open(filepath, 'r') as file: schema = json.load(file) resolved = jsonschema.RefResolver('file://{}/'.format(schema_path), schema) return (schema, resolved) cls.SCHEMAS = DotDict({kind: load(kind) for kind in cls.KINDS})
def __init__(self, config, tracker=None, relative=True): """Construct validator instance. Args: config (dict): School config dictionary. tracker (None, optional): Description relative (bool, optional): Enforce relative ordering in validation. """ Validator.load_schemas() self.kind_to_validation_function = { kind: getattr(self, 'validate_' + kind) if hasattr(self, 'validate_' + kind) else lambda *_, **__: None for kind in Validator.KINDS } # Running monitor of validated course and section codes. self.seen = {} self.config = DotDict(config) self.config['kind'] = 'config' self.validate(self.config) self.course_code_regex = re.compile(self.config.course_code_regex) self.relative = relative if tracker is None: # Used during self-contained validation. self.tracker = Tracker() self.tracker.school = self.config.school.code self.tracker.mode = 'validating' self.tracker.start() else: self.tracker = tracker
def adapt_evaluation(self, evaluation): """Adapt evaluation to model dictionary. Args: evaluation (dict): validated evaluation. Returns: dict: Description """ professor = "" if evaluation.instructors is not None: for instructor in evaluation.instructors: instructor = DotDict(instructor) if isinstance(instructor.name, str): if professor != "": professor += ", " professor += instructor.name else: raise DigestionError("get your instructors straight") evaluation = { "course": Course.objects.get(code=evaluation.course.code), "score": evaluation.score, "summary": evaluation.summary, "professor": professor, "course_code": evaluation.course.code, "year": evaluation.year, } for key in evaluation: if evaluation[key] is None: evaluation[key] = "Cannot be found" return evaluation
def __init__(self, school, meta, tracker=NullTracker()): """Construct Digestor instance. Args: school (str): Description data (None, optional): Description output (None, optional): Description diff (bool, optional): Description load (bool, optional): Description tracker (TYPE, optional): Description """ # with open(data, 'r') as f: # data = json.load(f) self.cache = DotDict( dict(course={"code": None}, section={"code": None})) self.school = school self.adapter = DigestionAdapter( school, self.cache, SCHOOLS_MAP[self.school].short_course_weeks_limit) self.meta = meta # Setup tracker for digestion and progress bar. self.tracker = tracker self.tracker.mode = "digesting"
def validate(self, data): """Validation entry/dispatcher. Args: data (list, dict): Data to validate. """ for obj in make_list(data): obj = DotDict(obj) Validator.schema_validate(obj, *Validator.SCHEMAS[obj.kind]) self.kind_to_validation_function[obj.kind](obj)
def load_school(school): config_file = '{}/{}/schools/{}/config.json'.format(settings.BASE_DIR, settings.PARSING_MODULE, school) with open(config_file) as f: config = DotDict(json.load(f)) active_semesters = OrderedDict( sorted(config.active_semesters.items(), key=lambda x: x[0]) ) return School(code=config.school.code, name=config.school.name, active_semesters=active_semesters, granularity=config.granularity, ampm=config.ampm, full_academic_year_registration=config.full_academic_year_registration, single_access=config.single_access, final_exams=config.get('final_exams'), parsers=load_parsers(school))
def load_school(school): config_file = '{}/{}/schools/{}/config.json'.format( settings.BASE_DIR, settings.PARSING_MODULE, school) with open(config_file) as f: config = DotDict(json.load(f)) active_semesters = OrderedDict( sorted(config.active_semesters.items(), key=lambda x: x[0])) return School( code=config.school.code, name=config.school.name, active_semesters=active_semesters, granularity=config.granularity, ampm=config.ampm, full_academic_year_registration=config.full_academic_year_registration, single_access=config.single_access, final_exams=config.get('final_exams'), parsers=load_parsers(school), registrar=config.get('registrar'))
def validate_self_contained(self, data_path, break_on_error=True, break_on_warning=False, output_error=None, display_progress_bar=True, master_log_path=None): """Validate JSON file as without ingestor. Args: data_path (str): Path to data file. break_on_error (bool, optional) break_on_warning (bool, optional) output_error (None, optional): Error output file path. display_progress_bar (bool, optional) Raises: e: TODO """ # TODO - iter errors and catch exceptions within method if display_progress_bar: self.tracker.add_viewer(ProgressBar('{total}')) logger = Logger(errorfile=output_error) try: # self.validate_directory(directory) data = Validator.file_to_json(data_path)['$data'] Validator.schema_validate(data, *Validator.SCHEMAS.datalist) except (JsonValidationError, json.scanner.JSONDecodeError) as e: logger.log(e) raise e # fatal error, cannot continue for obj in data: obj = DotDict(obj) try: self.kind_to_validation_function[obj.kind](obj) self.tracker.status = dict(kind=obj.kind, status='valid') except JsonValidationError as e: logger.log(e) if break_on_error: raise e except JsonValidationWarning as e: logger.log(e) if break_on_warning: raise e self.tracker.status = dict(kind=obj.kind, status='total') self.tracker.end()
def load_school(school): from django.conf import settings config_file = "{}/{}/schools/{}/config.json".format( settings.BASE_DIR, settings.PARSING_MODULE, school) with open(config_file) as f: config = DotDict(json.load(f)) active_semesters = OrderedDict( sorted(list(config.active_semesters.items()), key=lambda x: x[0])) return School( code=config.school.code, name=config.school.name, active_semesters=active_semesters, granularity=config.granularity, ampm=config.ampm, full_academic_year_registration=config.full_academic_year_registration, single_access=config.single_access, final_exams=config.get("final_exams"), parsers={}, registrar=config.get("registrar"), short_course_weeks_limit=config.get("short_course_weeks_limit"), )
def validate(self, data, transact=True): """Validation entry/dispatcher. Args: data (list, dict): Data to validate. """ if transact: self.transaction = SimpleNamespace(key=None, values=set()) data = DotDict(data) Validator.schema_validate(data, *Validator.SCHEMAS[data.kind]) self.kind_to_validation_function[data.kind](data) if transact and self.transaction.key: self.seen.setdefault(self.transaction.key, set()).update(self.transaction.values)
def validate_eval(self, course_eval): """Validate evaluation object. Args: course_eval (DotDict): Evaluation to validate. Raises: JsonValidationError: Invalid evaulation. """ if not isinstance(course_eval, DotDict): course_eval = DotDict(course_eval) if self.course_code_regex.match(course_eval.course.code) is None: raise JsonValidationError( "course code {} does not match r'{}'".format( course_eval.course.code, self.config.course_code_regex), course_eval)
def digest_course(self, course): """Create course in database from info in json model. Returns: django course model object """ course_model = self.strategy.digest_course(self.adapter.adapt_course(course)) if course_model: self.cache.course = course_model for section in course.get('sections', []): self.digest_section(DotDict(section), course_model) self._update_progress('course', bool(course_model)) return course_model
def digest(self, data, diff=True, load=True, output=None): """Digest data.""" self.data = [DotDict(obj) for obj in make_list(data)] self.strategy = self._resolve_strategy(diff, load, output) do_digestion = { "course": lambda x: self.digest_course(x), "section": lambda x: self.digest_section(x), "meeting": lambda x: self.digest_meeting(x), "textbook": lambda x: self.digest_textbook(x), "textbook_link": lambda x: self.digest_textbook_link(x), "eval": lambda x: self.digest_eval(x), } if self.tracker.has_viewer("progressbar"): bar = self.tracker.get_viewer("progressbar").bar for obj in bar(make_list(self.data)): do_digestion[obj.kind](obj) else: for obj in make_list(self.data): do_digestion[obj.kind](obj) self.wrap_up()
def adapt_section(self, section, course_model=None): """Adapt section to Django model. Args: section (TYPE): Description course_model (None, optional): Description Returns: dict: formatted section dictionary Raises: DigestionError: Description """ if course_model is None: if self.cache.course and section.course.code == self.cache.course.code: course_model = self.cache.course else: course_model = Course.objects.filter( school=self.school, code=section.course.code).first() if course_model is None: # TODO - run tests with different database print( "course %s section not already in database".format( section.course.code), file=sys.stderr, ) adapted = {} if "capacity" in section: adapted["size"] = section.capacity if "enrollment" in section: # TODO - change 'enrolment' to 'enrollment' in django model adapted["enrolment"] = section.enrollment if "waitlist" in section: adapted["waitlist"] = section.waitlist if "waitlist_size" in section: adapted["waitlist_size"] = section.waitlist_size if "remaining_seats" in section: pass # NOTE: possible logic conflict with other data # adapted['remaining_seats'] = section.remaining_seats if "course_section_id" in section: adapted["course_section_id"] = section.course_section_id section_type_map = { "Lecture": "L", "Laboratory": "P", "Discussion": "T", } if "type" in section: adapted["section_type"] = section_type_map.get(section.type, "L") if "fees" in section: pass # TODO - add fees to database for instructor in section.get("instructors", []): instructor = DotDict(instructor) adapted.setdefault("instructors", "") if isinstance(instructor.name, str): adapted["instructors"] += instructor.name elif isinstance(instructor.name, dict): adapted["instructors"] += "{} {}".format( instructor.name.first, instructor.name.last) else: raise DigestionError("get your instructors straight") if "final_exam" in section: pass # TODO - add to database # Grab semester. semester, _ = Semester.objects.update_or_create(name=section.term, year=section.year) if semester is None: raise DigestionError("Semester {} {} not in DB".format( section.term, section.year)) return { "course": course_model, "semester": semester, "meeting_section": section.code, "defaults": adapted, }
def validate_course(self, course): """Validate course. Args: course (DotDict): Course object to validate. Raises: MultipleDefinitionsWarning: Course has already been validated in same session. ValidationError: Invalid course. """ if "kind" in course and course.kind != "course": raise ValidationError(course, "course object must be of kind course") if "school" in course and course.school.code != self.config.school.code: raise ValidationError(course, "course schools does not match config") if self.course_code_regex.match(course.code) is None: raise ValidationError( course, "course code {} does not match r'{}'".format( course.code, self.config.course_code_regex), ) if ("department" in course and "code" in course.department and "departments" in self.config): department_codes = {d.code for d in self.config.departments} if course.department.code not in department_codes: raise ValidationError( course, "department {} is not in config.json departments".format( course.department), ) if "homepage" in course: self.validate_website(course.homepage) for sa in course.get("same_as", []): if self.course_code_regex.match(sa) is not None: continue # FIXME -- should still do this check but it breaks due to the course not being written # raise ValidationWarning( # course, # "same as course code {} does not match r'{}'".format( # course.code, # self.config.course_code_regex # ) # ) if self.relative: if course.code in self.seen: raise MultipleDefinitionsWarning( course, "multiple definitions of course {}".format(course.code)) self.transaction.key = course.code for section in course.get("sections", []): if "course" in section and section["course"]["code"] != course.code: raise ValidationError( course, "nested {} does not match parent {}".format( section["course"]["code"], course.code), ) # NOTE: mutating dictionary section["course"] = {"code": course.code} section["kind"] = "section" self.validate(DotDict(section), transact=False)
def adapt_section(self, section, course_model=None): """Adapt section to Django model. Args: section (TYPE): Description course_model (None, optional): Description Returns: dict: formatted section dictionary Raises: DigestionError: Description """ if course_model is None: if (self.cache.course and section.course.code == self.cache.course.code): course_model = self.cache.course else: course_model = Course.objects.filter( school=self.school, code=section.course.code).first() if course_model is None: # TODO - run tests with different database print('course %s section not already in database'.format( section.course.code), file=sys.stderr) adapted = {} if 'capacity' in section: adapted['size'] = section.capacity if 'enrollment' in section: # TODO - change 'enrolment' to 'enrollment' in django model adapted['enrolment'] = section.enrollment if 'waitlist' in section: adapted['waitlist'] = section.waitlist if 'waitlist_size' in section: adapted['waitlist_size'] = section.waitlist_size if 'remaining_seats' in section: pass # NOTE: possible logic conflict with other data # adapted['remaining_seats'] = section.remaining_seats section_type_map = { 'Lecture': 'L', 'Laboratory': 'P', 'Discussion': 'T', } if 'type' in section: adapted['section_type'] = section_type_map.get(section.type, 'L') if 'fees' in section: pass # TODO - add fees to database for instructor in section.get('instructors', []): instructor = DotDict(instructor) adapted.setdefault('instructors', '') if isinstance(instructor.name, basestring): adapted['instructors'] += instructor.name elif isinstance(instructor.name, dict): adapted['instructors'] += '{} {}'.format( instructor.name.first, instructor.name.last) else: raise DigestionError('get your instructors straight') if 'final_exam' in section: pass # TODO - add to database # Grab semester. semester, _ = Semester.objects.update_or_create(name=section.term, year=section.year) if semester is None: raise DigestionError('Semester {} {} not in DB'.format( section.term, section.year)) return { 'course': course_model, 'semester': semester, 'meeting_section': section.code, 'defaults': adapted }
def validate_section(self, section): """Validate section object. Args: section (DotDict): Section object to validate. Raises: MultipleDefinitionsWarning: Invalid section. ValidationError: Description """ if "course" not in section: raise ValidationError(section, "section doesnt define a parent course") if "kind" in section and section.kind != "section": raise ValidationError(section, "section must be of kind section") if ("course" in section and self.course_code_regex.match(section.course.code) is None): raise ValidationError( section, "course code {} does not match r'{}'".format( section.course.code, self.config.course_code_regex), ) if "term" in section and section.term not in self.config.terms: raise ValidationError( section, "term {} not in config.json term list".format(section.term)) if "instructors" in section: db_instructor_textfield_max_size = 500 instructor_textfield = "" for instructor in section.get("instructors", []): instructor = DotDict(instructor) if isinstance(instructor.name, str): instructor_textfield += instructor.name elif isinstance(instructor.name, dict): instructor_textfield += "{} {}".format( instructor.name.first, instructor.name.last) db_instructor_textfield_size = len(instructor_textfield) if db_instructor_textfield_size > db_instructor_textfield_max_size: raise ValidationError( section, "db field too small for comma-joined instructor names") for instructor in section.get("instructors", []): self.validate_instructor(instructor) if "final_exam" in section: if ("course" in section.final_exam and section.final_exam.course.code != section.course.code): raise ValidationError( section, "final exam course {} doesnt match course code {}".format( section.final_exam.course.code, section.course.code), ) if ("section" in section.final_exam and section.final_exam.section.code != section.code): raise ValidationError( section, "final exam section {} doesnt match section {}".format( section.final_exam.section.code, section.code), ) # final_exam['course'] = section.course # final_exam['section'] = {'code': section.code} # self.validate_final_exam(section.final_exam) if self.relative: if (section.course.code not in self.seen and self.transaction.key != section.course.code): raise ValidationError( "course code {} isnt defined".format(section.course.code), section) elif (section.code, section.year, section.term) in self.seen.get( section.course.code, set()) | self.transaction.values: raise MultipleDefinitionsWarning( section, "multiple defs for {} {} - {} already defined".format( section.course.code, section.code, section.year), ) self.transaction.key = section.course.code self.transaction.values.add( (section.code, section.year, section.term)) for meeting in section.get("meetings", []): meeting = DotDict(meeting) if "course" in meeting and meeting.course.code != section.course.code: raise ValidationError( section, "course code {} in meeting doesnt match parent section \ course code {}".format(meeting.course.code, section.course.code), ) if "section" in meeting and meeting.section.code != section.code: raise ValidationError( section, "section code {} in nested meeting doesnt match parent \ section code {}".format(meeting.section.code, section.code), ) # NOTE: mutating obj meeting["course"] = section.course meeting["section"] = { "code": section.code, "year": section.year, "term": section.term, } meeting["kind"] = "meeting" self.validate(DotDict(meeting), transact=False) if "textbooks" in section: for textbook in section.textbooks: self.validate_textbook_link(textbook)
def validate_section(self, section): """Validate section object. Args: section (DotDict): Section object to validate. Raises: MultipleDefinitionsWarning: Invalid section. ValidationError: Description """ if 'course' not in section: raise ValidationError(section, 'section doesnt define a parent course') if 'kind' in section and section.kind != 'section': raise ValidationError(section, 'section must be of kind section') if ('course' in section and self.course_code_regex.match(section.course.code) is None): raise ValidationError( section, 'course code {} does not match r\'{}\''.format( section.course.code, self.config.course_code_regex)) if 'term' in section and section.term not in self.config.terms: raise ValidationError( section, 'term {} not in config.json term list'.format(section.term)) if 'instructors' in section: db_instructor_textfield_max_size = 500 instructor_textfield = '' for instructor in section.get('instructors', []): instructor = DotDict(instructor) if isinstance(instructor.name, basestring): instructor_textfield += instructor.name elif isinstance(instructor.name, dict): instructor_textfield += '{} {}'.format( instructor.name.first, instructor.name.last) db_instructor_textfield_size = len(instructor_textfield) if db_instructor_textfield_size > db_instructor_textfield_max_size: raise ValidationError( section, 'db field too small for comma-joined instructor names') for instructor in section.get('instructors', []): self.validate_instructor(instructor) if 'final_exam' in section: if ('course' in section.final_exam and section.final_exam.course.code != section.course.code): raise ValidationError( section, 'final exam course {} doesnt match course code {}'.format( section.final_exam.course.code, section.course.code)) if ('section' in section.final_exam and section.final_exam.section.code != section.code): raise ValidationError( section, 'final exam section {} doesnt match section {}'.format( section.final_exam.section.code, section.code)) # final_exam['course'] = section.course # final_exam['section'] = {'code': section.code} # self.validate_final_exam(section.final_exam) if self.relative: if section.course.code not in self.seen and self.transaction.key != section.course.code: print(self.seen) raise ValidationError( 'course code {} isnt defined'.format(section.course.code), section) elif ((section.code, section.year, section.term) in self.seen.get(section.course.code, set()) | self.transaction.values): raise MultipleDefinitionsWarning( section, 'multiple defs for {} {} - {} already defined'.format( section.course.code, section.code, section.year)) self.transaction.key = section.course.code self.transaction.values.add( (section.code, section.year, section.term)) for meeting in section.get('meetings', []): meeting = DotDict(meeting) if ('course' in meeting and meeting.course.code != section.course.code): raise ValidationError( section, 'course code {} in meeting doesnt match parent section \ course code {}'.format(meeting.course.code, section.course.code)) if 'section' in meeting and meeting.section.code != section.code: raise ValidationError( section, 'section code {} in nested meeting doesnt match parent \ section code {}'.format(meeting.section.code, section.code)) # NOTE: mutating obj meeting['course'] = section.course meeting['section'] = { 'code': section.code, 'year': section.year, 'term': section.term } meeting['kind'] = 'meeting' self.validate(DotDict(meeting), transact=False) if 'textbooks' in section: for textbook in section.textbooks: self.validate_textbook_link(textbook)
def validate_course(self, course): """Validate course. Args: course (DotDict): Course object to validate. Raises: MultipleDefinitionsWarning: Course has already been validated in same session. ValidationError: Invalid course. """ if 'kind' in course and course.kind != 'course': raise ValidationError(course, 'course object must be of kind course') if ('school' in course and course.school.code != self.config.school.code): raise ValidationError(course, 'course schools does not match config') if self.course_code_regex.match(course.code) is None: raise ValidationError( course, "course code {} does not match r'{}'".format( course.code, self.config.course_code_regex)) if ('department' in course and 'code' in course.department and 'departments' in self.config): department_codes = {d.code for d in self.config.departments} if course.department.code not in department_codes: raise ValidationError( course, 'department {} is not in config.json departments'.format( course.department)) if 'homepage' in course: self.validate_website(course.homepage) for sa in course.get('same_as', []): if self.course_code_regex.match(sa) is not None: continue # raise ValidationError( # course, # "same as course code {} does not match r'{}'".format( # course.code, # self.config.course_code_regex # ) # ) if self.relative: if course.code in self.seen: raise MultipleDefinitionsWarning( course, 'multiple definitions of course {}'.format(course.code)) self.transaction.key = course.code for section in course.get('sections', []): if ('course' in section and section['course']['code'] != course.code): raise ValidationError( course, 'nested {} does not match parent {}'.format( section['course']['code'], course.code)) # NOTE: mutating dictionary section['course'] = {'code': course.code} section['kind'] = 'section' self.validate(DotDict(section), transact=False)