def __init__(self, notifier): self.semesters = {} # semester.ref: semester obj self.notifier = notifier for semester in Semester.objects.filter(ref__startswith='http://sis.rpi.edu/reg/rocs/'): self.semesters[semester.ref] = semester self.latest_semester = None if len(self.semesters) > 0: self.latest_semester = max(self.semesters.values()) self.sections_changed = False self.SectionPeriod = Synchronizer(SectionPeriod, SectionPeriod.objects.values_list('id', flat=True))
def cache_conflicts(semester_year=None, semester_month=None, semester=None, sql=True, stdout=False): assert (semester_year and semester_month) or semester, "Semester year & month must be provided or the semester object." import sys # trash existing conflict data... if not semester: semester = courses.Semester.objects.get(year=semester_year, month=semester_month) #SectionConflict.objects.filter(semester=semester).delete() Syncer = Synchronizer(SectionConflict, SectionConflict.objects.values_list('id', flat=True)) sections = courses.Section.objects .select_related('course', 'semester') \ .by_semester(semester).prefetch_periods() section_courses = dict_by_attr(sections, 'course') mapping = {} for id, sid1, sid2 in SectionConflict.objects.filter(semester=semester).values_list('id', 'section1', 'section2'): mapping[(sid1, sid2)] = id conflicts = [] def log(msg): sys.stdout.write(msg) sys.stdout.flush() def perform_insert(conflicts): SectionConflict.objects.bulk_create(conflicts) count = 0 for course1, course2 in itertools.combinations(section_courses.keys(), 2): for section1, section2 in itertools.product(section_courses[course1], section_courses[course2]): if section1.conflicts_with(section2): if section1.id > section2.id: section1, section2 = section2, section1 count += 1 if sql: if count % 10000 == 0: perform_insert(conflicts) conflicts = [] log('.') if mapping.get((section1.id, section2.id), None) is None: conflicts.append( SectionConflict(section1=section1, section2=section2, semester=semester) ) else: Syncer.exclude_id(mapping[(section1.id, section2.id)]) else: log('C') Syncer.get_or_create( section1=section1, section2=section2, semester=semester, ) if sql and conflicts: perform_insert(conflicts) log('.') log('\n') Syncer.trim(semester=semester) log('\n')
def __init__(self): self.semesters = {} # semester.ref: semester obj for semester in Semester.objects.filter(ref__startswith='http://sis.rpi.edu/reg/rocs/'): self.semesters[semester.ref] = semester self.latest_semester = None if len(self.semesters) > 0: self.latest_semester = max(self.semesters.values()) self.sections_changed = False self.SectionPeriod = Synchronizer(SectionPeriod, SectionPeriod.objects.values_list('id', flat=True))
class ROCSRPIImporter(object): """Handles the importation of RPI course data into the database.""" FILE_RE = re.compile(r'(\d+)\.xml') def __init__(self): self.semesters = {} # semester.ref: semester obj for semester in Semester.objects.filter(ref__startswith='http://sis.rpi.edu/reg/rocs/'): self.semesters[semester.ref] = semester self.latest_semester = None if len(self.semesters) > 0: self.latest_semester = max(self.semesters.values()) self.sections_changed = False self.SectionPeriod = Synchronizer(SectionPeriod, SectionPeriod.objects.values_list('id', flat=True)) def clear_unused(self, semester): self.SectionPeriod.trim(semester=semester) def sync(self, get_files=None, get_catalog=None): "Performs the updating of the database data from RPI's SIS" if get_files is None: from rpi_courses import list_rocs_xml_files get_files = list_rocs_xml_files if get_catalog is None: from rpi_courses import ROCSCourseCatalog get_catalog = ROCSCourseCatalog.from_url for filename in get_files(): name = self.FILE_RE.finditer(filename).next().groups()[0] semester = self.semesters.get(name + '.xml') # if latest semester or newer semester if (not semester) or semester == self.latest_semester: catalog = get_catalog(filename) if self.latest_semester and semester == self.latest_semester and catalog.datetime <= self.latest_semester.date_updated: continue # already up-to-date logger.debug('found catalog for: %r %r' % (catalog.year, catalog.month)) semester_obj, created = Semester.objects.get_or_create( year=catalog.year, month=catalog.month, defaults={ 'name': catalog.name, 'ref': filename, }) self.create_courses(catalog, semester_obj) self.create_crosslistings(semester_obj, set(catalog.crosslistings.values())) semester_obj.save() # => update date_updated property if created: logger.debug(' CREATE SEMESTER ' + repr(semester_obj)) else: logger.debug(' EXISTS SEMESTER ' + repr(semester_obj)) if self.sections_changed: sections_modified.send(sender=self, semester=semester_obj) self.clear_unused(semester_obj) def create_courses(self, catalog, semester_obj): "Inserts all the course data, including section information, into the database from the catalog." list = self.add_comm_intense(catalog, semester_obj) for course in catalog.get_courses(): comm = False for course_name in list: if course.name == course_name: comm = True course_obj, created = Course.objects.get_or_create( number=course.num, department=self.get_or_create_department(semester_obj, code=course.dept, name=course.full_dept), defaults=dict( name=course.name, min_credits=course.cred[0], max_credits=course.cred[1], grade_type=course.grade_type, is_comm_intense=comm, ) ) if not created: if self.forced: course_obj.name = course.name course_obj.min_credits, course_obj.max_credits = course.cred course_obj.grade_type = course.grade_type course_obj.is_comm_intense = comm course_obj.save() OfferedFor.objects.get_or_create(course=course_obj, semester=semester_obj) self.create_sections(course, course_obj, semester_obj) logger.debug((' + ' if created else ' ') + course.name) def add_comm_intense(self, catalog, semester): from rpi_courses import get_comm_file pdf = get_comm_file(semester) list = [] crns = re.findall(r"\d{5}\s[A-Z]{4}", pdf) print "Found " + str(len(crns)) + " communication intensive sections" for i in crns: course = catalog.find_course_by_crn(int(i.split()[0])) if (course != None): print course.name list.append(course.name) return list def create_sections(self, course, course_obj, semester_obj): "Inserts all section data, including time period information, into the database from the catalog." for section in course.sections: # TODO: encode prereqs / notes remove_prereq_notes(section) section_obj, created = Section.objects.get_or_create( crn=section.crn, semester=semester_obj, defaults=dict( notes='\n'.join(section.notes), number=section.num, seats_taken=section.seats_taken, seats_total=section.seats_total, course=course_obj, ) ) if not created: section_obj.number = section.num section_obj.seats_taken = section.seats_taken section_obj.seats_total = section.seats_total section_obj.course = course_obj section_obj.notes = '\n'.join(section.notes) section_obj.save() else: self.sections_changed = False self.create_timeperiods(semester_obj, section, section_obj) # maps from catalog data to database representation DOW_MAPPER = { 'Monday': Period.MONDAY, 'Tuesday': Period.TUESDAY, 'Wednesday': Period.WEDNESDAY, 'Thursday': Period.THURSDAY, 'Friday': Period.FRIDAY, 'Saturday': Period.SATURDAY, 'Sunday': Period.SUNDAY, } def compute_dow(self, days_of_week): """Assists in converting rpi_course's representation of days of the week to the database kind.""" value = 0 for dow in days_of_week: value = value | self.DOW_MAPPER.get(dow, 0) return value def create_timeperiods(self, semester_obj, section, section_obj): """Creates all the SectionPeriod and Period instances for the given section object from the catalog and the section_obj database equivalent to refer to. """ for period in section.periods: if None in (period.start, period.end): continue # invalid period for all we care about... ignore. day = 0 period_obj, pcreated = Period.objects.get_or_create( start=period.start_time, end=period.end_time, days_of_week_flag=self.compute_dow(period.days), ) sectionperiod_obj, created = self.SectionPeriod.get_or_create( period=period_obj, section=section_obj, semester=semester_obj, defaults=dict( instructor=period.instructor, location=period.location, kind=period.type, ) ) if not created: sectionperiod_obj.instructor = period.instructor sectionperiod_obj.location = period.location sectionperiod_obj.kind = period.type sectionperiod_obj.save() def get_or_create_department(self, semester_obj, code, name=None): dept, created = Department.objects.get_or_create( code=code, defaults={ 'name': name or '' } ) SemesterDepartment.objects.get_or_create( semester=semester_obj, department=dept ) return dept def create_crosslistings(self, semester_obj, crosslistings): "Creates all crosslisting information into the database for all the sections." for crosslisting in crosslistings: refid = ','.join(map(str, sorted(tuple(crosslisting.crns)))) crosslisting_obj, created = SectionCrosslisting.objects.get_or_create(semester=semester_obj, ref=refid) Section.objects.filter(crn__in=crosslisting.crns).update(crosslisted=crosslisting_obj)
def cache_conflicts(semester_year=None, semester_month=None, semester=None, sql=True, stdout=False): assert ( semester_year and semester_month ) or semester, "Semester year & month must be provided or the semester object." import sys # trash existing conflict data... if not semester: semester = courses.Semester.objects.get(year=semester_year, month=semester_month) with transaction.atomic(): # we don't want to increment IDs too quickly (ev 25 minutes) Syncer = Synchronizer( SectionConflict, SectionConflict.objects.values_list('id', flat=True)) sections = courses.Section.objects.select_related('course', 'semester') \ .by_semester(semester).prefetch_periods() section_courses = dict_by_attr(sections, 'course') mapping = {} for id, sid1, sid2 in SectionConflict.objects.filter( semester=semester).values_list('id', 'section1', 'section2'): mapping[(sid1, sid2)] = id conflicts = [] def log(msg): sys.stdout.write(msg) sys.stdout.flush() def perform_insert(conflicts): SectionConflict.objects.bulk_create(conflicts) count = 0 for course1, course2 in itertools.combinations(section_courses.keys(), 2): for section1, section2 in itertools.product( section_courses[course1], section_courses[course2]): if section1.conflicts_with(section2): if section1.id > section2.id: section1, section2 = section2, section1 count += 1 if sql: if count % 500 == 0: perform_insert(conflicts) conflicts = [] log('.') if (section1.id, section2.id) not in mapping: log('C') conflicts.append( SectionConflict(section1=section1, section2=section2, semester=semester)) else: Syncer.exclude_id(mapping[(section1.id, section2.id)]) else: log('C') Syncer.get_or_create( section1=section1, section2=section2, semester=semester, ) if sql and conflicts: log('C') perform_insert(conflicts) log('\n') Syncer.trim(semester=semester) log('\n')
class ROCSRPIImporter(object): """Handles the importation of RPI course data into the database.""" FILE_RE = re.compile(r'(\d+)\.xml') def __init__(self, notifier): self.semesters = {} # semester.ref: semester obj self.notifier = notifier for semester in Semester.objects.filter( ref__startswith='http://sis.rpi.edu/reg/rocs/'): self.semesters[semester.ref] = semester self.latest_semester = None if len(self.semesters) > 0: self.latest_semester = max(self.semesters.values()) self.sections_changed = False self.SectionPeriod = Synchronizer( SectionPeriod, SectionPeriod.objects.values_list('id', flat=True)) def clear_unused(self, semester): self.SectionPeriod.trim(semester=semester) def sync(self, get_files=None, get_catalog=None): "Performs the updating of the database data from RPI's SIS" if get_files is None: from rpi_courses import list_rocs_xml_files get_files = list_rocs_xml_files if get_catalog is None: from rpi_courses import ROCSCourseCatalog get_catalog = ROCSCourseCatalog.from_url for filename in get_files(): name = self.FILE_RE.finditer(filename).next().groups()[0] semester = self.semesters.get(name + '.xml') # if latest semester or newer semester if (not semester) or semester == self.latest_semester: catalog = get_catalog(filename) if self.latest_semester and semester == self.latest_semester and catalog.datetime <= self.latest_semester.date_updated: continue # already up-to-date logger.debug('found catalog for: %r %r' % (catalog.year, catalog.month)) semester_obj = None try: semester_obj, created = Semester.objects.get_or_create( year=catalog.year, month=catalog.month, defaults={ 'visible': True, 'name': catalog.name, 'ref': filename, }) except IntegrityError as error: logger.debug(' DUPLICATE SEMESTER ' + repr(semester_obj) + ': ' + repr(error)) continue self.create_courses(catalog, semester_obj) self.create_crosslistings(semester_obj, set(catalog.crosslistings.values())) semester_obj.save() # => update date_updated property if created: logger.debug(' CREATE SEMESTER ' + repr(semester_obj)) self.notifier.requires_notification() else: logger.debug(' EXISTS SEMESTER ' + repr(semester_obj)) if self.sections_changed: sections_modified.send(sender=self, semester=semester_obj) self.clear_unused(semester_obj) def create_courses(self, catalog, semester_obj): "Inserts all the course data, including section information, into the database from the catalog." list = self.add_comm_intense(catalog, semester_obj) for course in catalog.get_courses(): comm = (course.name in list) department = self.get_or_create_department(semester_obj, code=course.dept, name=course.full_dept) # we use our OfferedFor.ref to determine if we need to create a new # course or not. ref_name = '%r:%r:%r' % (course.name, course.dept, course.num) qs = OfferedFor.objects.filter( semester=semester_obj, course__department__code=course.dept, course__number=course.num) qs = qs.select_related('course') try: offered_for = qs.get(ref=ref_name) course_obj = offered_for.course created = False except OfferedFor.DoesNotExist: course_obj = None if not course_obj: # for migration support... set empty refs. try: offered_for = qs.get(ref='') offered_for.ref = ref_name offered_for.save() course_obj = offered_for.course created = False except OfferedFor.DoesNotExist: course_obj = None if not course_obj: course_obj = Course.objects.create( name=course.name, number=course.num, department=department, min_credits=course.cred[0], max_credits=course.cred[1], grade_type=course.grade_type, is_comm_intense=comm, ) created = True if not created: if self.forced: course_obj.name = course.name course_obj.min_credits, course_obj.max_credits = course.cred course_obj.grade_type = course.grade_type course_obj.is_comm_intense = comm course_obj.save() else: OfferedFor.objects.get_or_create(course=course_obj, semester=semester_obj, ref=ref_name) self.create_sections(course, course_obj, semester_obj) crns = [str(s.crn) for s in course_obj.sections.all()] logger.debug( ' %s %s (crns: %s)' % (('+' if created else ' '), course.name, ', '.join(crns))) def add_comm_intense(self, catalog, semester): from rpi_courses import get_comm_file pdf = get_comm_file(semester) list = [] crns = re.findall(r"\d{5}\s[A-Z]{4}", pdf) print "Found " + str(len(crns)) + " communication intensive sections" for i in crns: course = catalog.find_course_by_crn(int(i.split()[0])) if (course != None): print course.name list.append(course.name) return list def create_sections(self, course, course_obj, semester_obj): "Inserts all section data, including time period information, into the database from the catalog." for section in course.sections: # TODO: encode prereqs / notes remove_prereq_notes(section) section_obj, created = Section.objects.get_or_create( crn=section.crn, semester=semester_obj, defaults=dict( notes='\n'.join(section.notes), number=section.num, seats_taken=section.seats_taken, seats_total=section.seats_total, course=course_obj, )) if not created: section_obj.number = section.num section_obj.seats_taken = section.seats_taken section_obj.seats_total = section.seats_total section_obj.course = course_obj section_obj.notes = '\n'.join(section.notes) section_obj.save() else: self.sections_changed = False self.create_timeperiods(semester_obj, section, section_obj) # maps from catalog data to database representation DOW_MAPPER = { 'Monday': Period.MONDAY, 'Tuesday': Period.TUESDAY, 'Wednesday': Period.WEDNESDAY, 'Thursday': Period.THURSDAY, 'Friday': Period.FRIDAY, 'Saturday': Period.SATURDAY, 'Sunday': Period.SUNDAY, } def compute_dow(self, days_of_week): """Assists in converting rpi_course's representation of days of the week to the database kind.""" value = 0 for dow in days_of_week: value = value | self.DOW_MAPPER.get(dow, 0) return value def create_timeperiods(self, semester_obj, section, section_obj): """Creates all the SectionPeriod and Period instances for the given section object from the catalog and the section_obj database equivalent to refer to. """ for period in section.periods: if None in (period.start, period.end): continue # invalid period for all we care about... ignore. day = 0 period_obj, pcreated = Period.objects.get_or_create( start=period.start_time, end=period.end_time, days_of_week_flag=self.compute_dow(period.days), ) sectionperiod_obj, created = self.SectionPeriod.get_or_create( period=period_obj, section=section_obj, semester=semester_obj, defaults=dict( instructor=period.instructor, location=period.location, kind=period.type, )) if not created: sectionperiod_obj.instructor = period.instructor sectionperiod_obj.location = period.location sectionperiod_obj.kind = period.type sectionperiod_obj.save() def get_or_create_department(self, semester_obj, code, name=None): dept, created = Department.objects.get_or_create( code=code, defaults={'name': name or ''}) SemesterDepartment.objects.get_or_create(semester=semester_obj, department=dept) return dept def create_crosslistings(self, semester_obj, crosslistings): "Creates all crosslisting information into the database for all the sections." for crosslisting in crosslistings: refid = ','.join(map(str, sorted(tuple(crosslisting.crns)))) crosslisting_obj, created = SectionCrosslisting.objects.get_or_create( semester=semester_obj, ref=refid) Section.objects.filter(crn__in=crosslisting.crns).update( crosslisted=crosslisting_obj)
def cache_conflicts(semester_year=None, semester_month=None, semester=None, sql=True, stdout=False): assert ( semester_year and semester_month ) or semester, "Semester year & month must be provided or the semester object." import sys # trash existing conflict data... if not semester: semester = courses.Semester.objects.get(year=semester_year, month=semester_month) with transaction.atomic(): # we don't want to increment IDs too quickly (ev 25 minutes) Syncer = Synchronizer(SectionConflict, SectionConflict.objects.values_list("id", flat=True)) sections = courses.Section.objects.select_related("course", "semester").by_semester(semester).prefetch_periods() section_courses = dict_by_attr(sections, "course") mapping = {} for id, sid1, sid2 in SectionConflict.objects.filter(semester=semester).values_list( "id", "section1", "section2" ): mapping[(sid1, sid2)] = id conflicts = [] def log(msg): sys.stdout.write(msg) sys.stdout.flush() def perform_insert(conflicts): SectionConflict.objects.bulk_create(conflicts) count = 0 for course1, course2 in itertools.combinations(section_courses.keys(), 2): for section1, section2 in itertools.product(section_courses[course1], section_courses[course2]): if section1.conflicts_with(section2): if section1.id > section2.id: section1, section2 = section2, section1 count += 1 if sql: if count % 500 == 0: perform_insert(conflicts) conflicts = [] log(".") if (section1.id, section2.id) not in mapping: log("C") conflicts.append(SectionConflict(section1=section1, section2=section2, semester=semester)) else: Syncer.exclude_id(mapping[(section1.id, section2.id)]) else: log("C") Syncer.get_or_create(section1=section1, section2=section2, semester=semester) if sql and conflicts: log("C") perform_insert(conflicts) log("\n") Syncer.trim(semester=semester) log("\n")