Exemple #1
0
    def __init__(self, notifier):
        self.semesters = {}  # semester.ref: semester obj
        self.notifier = notifier
        for semester in Semester.objects.filter(ref__startswith='http://sis.rpi.edu/reg/rocs/'):
            self.semesters[semester.ref] = semester

        self.latest_semester = None
        if len(self.semesters) > 0:
            self.latest_semester = max(self.semesters.values())

        self.sections_changed = False
        self.SectionPeriod = Synchronizer(SectionPeriod, SectionPeriod.objects.values_list('id', flat=True))
Exemple #2
0
def cache_conflicts(semester_year=None, semester_month=None, semester=None, sql=True, stdout=False):
    assert (semester_year and semester_month) or semester, "Semester year & month must be provided or the semester object."
    import sys
    # trash existing conflict data...
    if not semester:
        semester = courses.Semester.objects.get(year=semester_year, month=semester_month)
    #SectionConflict.objects.filter(semester=semester).delete()
    Syncer = Synchronizer(SectionConflict, SectionConflict.objects.values_list('id', flat=True))

    sections = courses.Section.objects .select_related('course', 'semester') \
            .by_semester(semester).prefetch_periods()
    section_courses = dict_by_attr(sections, 'course')

    mapping = {}
    for id, sid1, sid2 in SectionConflict.objects.filter(semester=semester).values_list('id', 'section1', 'section2'):
        mapping[(sid1, sid2)] = id

    conflicts = []

    def log(msg):
        sys.stdout.write(msg)
        sys.stdout.flush()

    def perform_insert(conflicts):
        SectionConflict.objects.bulk_create(conflicts)

    count = 0
    for course1, course2 in itertools.combinations(section_courses.keys(), 2):
        for section1, section2 in itertools.product(section_courses[course1], section_courses[course2]):
            if section1.conflicts_with(section2):
                if section1.id > section2.id:
                    section1, section2 = section2, section1

                count += 1
                if sql:
                    if count % 10000 == 0:
                        perform_insert(conflicts)
                        conflicts = []
                        log('.')
                    if mapping.get((section1.id, section2.id), None) is None:
                        conflicts.append(
                            SectionConflict(section1=section1, section2=section2, semester=semester)
                        )
                    else:
                        Syncer.exclude_id(mapping[(section1.id, section2.id)])
                else:
                    log('C')
                    Syncer.get_or_create(
                        section1=section1,
                        section2=section2,
                        semester=semester,
                    )

    if sql and conflicts:
        perform_insert(conflicts)
        log('.')

    log('\n')
    Syncer.trim(semester=semester)
    log('\n')
Exemple #3
0
    def __init__(self):
        self.semesters = {}  # semester.ref: semester obj
        for semester in Semester.objects.filter(ref__startswith='http://sis.rpi.edu/reg/rocs/'):
            self.semesters[semester.ref] = semester

        self.latest_semester = None
        if len(self.semesters) > 0:
            self.latest_semester = max(self.semesters.values())

        self.sections_changed = False

        self.SectionPeriod = Synchronizer(SectionPeriod, SectionPeriod.objects.values_list('id', flat=True))
Exemple #4
0
class ROCSRPIImporter(object):
    """Handles the importation of RPI course data into the database."""
    FILE_RE = re.compile(r'(\d+)\.xml')

    def __init__(self):
        self.semesters = {}  # semester.ref: semester obj
        for semester in Semester.objects.filter(ref__startswith='http://sis.rpi.edu/reg/rocs/'):
            self.semesters[semester.ref] = semester

        self.latest_semester = None
        if len(self.semesters) > 0:
            self.latest_semester = max(self.semesters.values())

        self.sections_changed = False

        self.SectionPeriod = Synchronizer(SectionPeriod, SectionPeriod.objects.values_list('id', flat=True))

    def clear_unused(self, semester):
        self.SectionPeriod.trim(semester=semester)

    def sync(self, get_files=None, get_catalog=None):
        "Performs the updating of the database data from RPI's SIS"
        if get_files is None:
            from rpi_courses import list_rocs_xml_files
            get_files = list_rocs_xml_files

        if get_catalog is None:
            from rpi_courses import ROCSCourseCatalog
            get_catalog = ROCSCourseCatalog.from_url

        for filename in get_files():
            name = self.FILE_RE.finditer(filename).next().groups()[0]
            semester = self.semesters.get(name + '.xml')
            # if latest semester or newer semester
            if (not semester) or semester == self.latest_semester:
                catalog = get_catalog(filename)

                if self.latest_semester and semester == self.latest_semester and catalog.datetime <= self.latest_semester.date_updated:
                    continue  # already up-to-date

                logger.debug('found catalog for: %r %r' % (catalog.year, catalog.month))

                semester_obj, created = Semester.objects.get_or_create(
                    year=catalog.year,
                    month=catalog.month,
                    defaults={
                        'name': catalog.name,
                        'ref': filename,
                    })
                self.create_courses(catalog, semester_obj)
                self.create_crosslistings(semester_obj, set(catalog.crosslistings.values()))
                semester_obj.save()  # => update date_updated property
                if created:
                    logger.debug(' CREATE SEMESTER ' + repr(semester_obj))
                else:
                    logger.debug(' EXISTS SEMESTER ' + repr(semester_obj))
                if self.sections_changed:
                    sections_modified.send(sender=self, semester=semester_obj)

                self.clear_unused(semester_obj)

    def create_courses(self, catalog, semester_obj):
        "Inserts all the course data, including section information, into the database from the catalog."
        list = self.add_comm_intense(catalog, semester_obj)
        for course in catalog.get_courses():
            comm = False
            for course_name in list:
                if course.name == course_name:
                    comm = True
            course_obj, created = Course.objects.get_or_create(
                number=course.num,
                department=self.get_or_create_department(semester_obj, code=course.dept, name=course.full_dept),
                defaults=dict(
                    name=course.name,
                    min_credits=course.cred[0],
                    max_credits=course.cred[1],
                    grade_type=course.grade_type,
                    is_comm_intense=comm,
                )
            )
            if not created:
                if self.forced:
                    course_obj.name = course.name
                course_obj.min_credits, course_obj.max_credits = course.cred
                course_obj.grade_type = course.grade_type
                course_obj.is_comm_intense = comm
                course_obj.save()
            OfferedFor.objects.get_or_create(course=course_obj, semester=semester_obj)
            self.create_sections(course, course_obj, semester_obj)
            logger.debug((' + ' if created else '   ') + course.name)

    def add_comm_intense(self, catalog, semester):
        from rpi_courses import get_comm_file
        pdf = get_comm_file(semester)
        list = []
        crns = re.findall(r"\d{5}\s[A-Z]{4}", pdf)
        print "Found " + str(len(crns)) + " communication intensive sections"
        for i in crns:
            course = catalog.find_course_by_crn(int(i.split()[0]))
            if (course != None):
                print course.name
                list.append(course.name)
        return list

    def create_sections(self, course, course_obj, semester_obj):
        "Inserts all section data, including time period information, into the database from the catalog."
        for section in course.sections:
            # TODO: encode prereqs / notes
            remove_prereq_notes(section)
            section_obj, created = Section.objects.get_or_create(
                crn=section.crn,
                semester=semester_obj,
                defaults=dict(
                    notes='\n'.join(section.notes),
                    number=section.num,
                    seats_taken=section.seats_taken,
                    seats_total=section.seats_total,
                    course=course_obj,
                )
            )

            if not created:
                section_obj.number = section.num
                section_obj.seats_taken = section.seats_taken
                section_obj.seats_total = section.seats_total
                section_obj.course = course_obj
                section_obj.notes = '\n'.join(section.notes)
                section_obj.save()
            else:
                self.sections_changed = False

            self.create_timeperiods(semester_obj, section, section_obj)

    # maps from catalog data to database representation
    DOW_MAPPER = {
        'Monday': Period.MONDAY,
        'Tuesday': Period.TUESDAY,
        'Wednesday': Period.WEDNESDAY,
        'Thursday': Period.THURSDAY,
        'Friday': Period.FRIDAY,
        'Saturday': Period.SATURDAY,
        'Sunday': Period.SUNDAY,
    }

    def compute_dow(self, days_of_week):
        """Assists in converting rpi_course's representation of days of the week to the database kind."""
        value = 0
        for dow in days_of_week:
            value = value | self.DOW_MAPPER.get(dow, 0)
        return value

    def create_timeperiods(self, semester_obj, section, section_obj):
        """Creates all the SectionPeriod and Period instances for the given section object from
        the catalog and the section_obj database equivalent to refer to.
        """
        for period in section.periods:
            if None in (period.start, period.end):
                continue  # invalid period for all we care about... ignore.
            day = 0
            period_obj, pcreated = Period.objects.get_or_create(
                start=period.start_time,
                end=period.end_time,
                days_of_week_flag=self.compute_dow(period.days),
            )
            sectionperiod_obj, created = self.SectionPeriod.get_or_create(
                period=period_obj,
                section=section_obj,
                semester=semester_obj,
                defaults=dict(
                    instructor=period.instructor,
                    location=period.location,
                    kind=period.type,
                )
            )
            if not created:
                sectionperiod_obj.instructor = period.instructor
                sectionperiod_obj.location = period.location
                sectionperiod_obj.kind = period.type
                sectionperiod_obj.save()

    def get_or_create_department(self, semester_obj, code, name=None):
        dept, created = Department.objects.get_or_create(
            code=code,
            defaults={
                'name': name or ''
            }
        )
        SemesterDepartment.objects.get_or_create(
            semester=semester_obj,
            department=dept
        )
        return dept

    def create_crosslistings(self, semester_obj, crosslistings):
        "Creates all crosslisting information into the database for all the sections."
        for crosslisting in crosslistings:
            refid = ','.join(map(str, sorted(tuple(crosslisting.crns))))
            crosslisting_obj, created = SectionCrosslisting.objects.get_or_create(semester=semester_obj, ref=refid)
            Section.objects.filter(crn__in=crosslisting.crns).update(crosslisted=crosslisting_obj)
Exemple #5
0
def cache_conflicts(semester_year=None,
                    semester_month=None,
                    semester=None,
                    sql=True,
                    stdout=False):
    assert (
        semester_year and semester_month
    ) or semester, "Semester year & month must be provided or the semester object."
    import sys
    # trash existing conflict data...
    if not semester:
        semester = courses.Semester.objects.get(year=semester_year,
                                                month=semester_month)

    with transaction.atomic():
        # we don't want to increment IDs too quickly (ev 25 minutes)
        Syncer = Synchronizer(
            SectionConflict,
            SectionConflict.objects.values_list('id', flat=True))

        sections = courses.Section.objects.select_related('course', 'semester') \
            .by_semester(semester).prefetch_periods()
        section_courses = dict_by_attr(sections, 'course')

        mapping = {}
        for id, sid1, sid2 in SectionConflict.objects.filter(
                semester=semester).values_list('id', 'section1', 'section2'):
            mapping[(sid1, sid2)] = id

        conflicts = []

        def log(msg):
            sys.stdout.write(msg)
            sys.stdout.flush()

        def perform_insert(conflicts):
            SectionConflict.objects.bulk_create(conflicts)

        count = 0
        for course1, course2 in itertools.combinations(section_courses.keys(),
                                                       2):
            for section1, section2 in itertools.product(
                    section_courses[course1], section_courses[course2]):
                if section1.conflicts_with(section2):
                    if section1.id > section2.id:
                        section1, section2 = section2, section1

                    count += 1
                    if sql:
                        if count % 500 == 0:
                            perform_insert(conflicts)
                            conflicts = []
                            log('.')
                        if (section1.id, section2.id) not in mapping:
                            log('C')
                            conflicts.append(
                                SectionConflict(section1=section1,
                                                section2=section2,
                                                semester=semester))
                        else:
                            Syncer.exclude_id(mapping[(section1.id,
                                                       section2.id)])
                    else:
                        log('C')
                        Syncer.get_or_create(
                            section1=section1,
                            section2=section2,
                            semester=semester,
                        )

        if sql and conflicts:
            log('C')
            perform_insert(conflicts)

        log('\n')
        Syncer.trim(semester=semester)
        log('\n')
Exemple #6
0
class ROCSRPIImporter(object):
    """Handles the importation of RPI course data into the database."""
    FILE_RE = re.compile(r'(\d+)\.xml')

    def __init__(self, notifier):
        self.semesters = {}  # semester.ref: semester obj
        self.notifier = notifier
        for semester in Semester.objects.filter(
                ref__startswith='http://sis.rpi.edu/reg/rocs/'):
            self.semesters[semester.ref] = semester

        self.latest_semester = None
        if len(self.semesters) > 0:
            self.latest_semester = max(self.semesters.values())

        self.sections_changed = False
        self.SectionPeriod = Synchronizer(
            SectionPeriod, SectionPeriod.objects.values_list('id', flat=True))

    def clear_unused(self, semester):
        self.SectionPeriod.trim(semester=semester)

    def sync(self, get_files=None, get_catalog=None):
        "Performs the updating of the database data from RPI's SIS"
        if get_files is None:
            from rpi_courses import list_rocs_xml_files
            get_files = list_rocs_xml_files

        if get_catalog is None:
            from rpi_courses import ROCSCourseCatalog
            get_catalog = ROCSCourseCatalog.from_url

        for filename in get_files():
            name = self.FILE_RE.finditer(filename).next().groups()[0]
            semester = self.semesters.get(name + '.xml')
            # if latest semester or newer semester
            if (not semester) or semester == self.latest_semester:
                catalog = get_catalog(filename)

                if self.latest_semester and semester == self.latest_semester and catalog.datetime <= self.latest_semester.date_updated:
                    continue  # already up-to-date

                logger.debug('found catalog for: %r %r' %
                             (catalog.year, catalog.month))

                semester_obj = None
                try:
                    semester_obj, created = Semester.objects.get_or_create(
                        year=catalog.year,
                        month=catalog.month,
                        defaults={
                            'visible': True,
                            'name': catalog.name,
                            'ref': filename,
                        })
                except IntegrityError as error:
                    logger.debug(' DUPLICATE SEMESTER ' + repr(semester_obj) +
                                 ': ' + repr(error))
                    continue
                self.create_courses(catalog, semester_obj)
                self.create_crosslistings(semester_obj,
                                          set(catalog.crosslistings.values()))
                semester_obj.save()  # => update date_updated property
                if created:
                    logger.debug(' CREATE SEMESTER ' + repr(semester_obj))
                    self.notifier.requires_notification()
                else:
                    logger.debug(' EXISTS SEMESTER ' + repr(semester_obj))
                if self.sections_changed:
                    sections_modified.send(sender=self, semester=semester_obj)

                self.clear_unused(semester_obj)

    def create_courses(self, catalog, semester_obj):
        "Inserts all the course data, including section information, into the database from the catalog."
        list = self.add_comm_intense(catalog, semester_obj)
        for course in catalog.get_courses():
            comm = (course.name in list)
            department = self.get_or_create_department(semester_obj,
                                                       code=course.dept,
                                                       name=course.full_dept)
            # we use our OfferedFor.ref to determine if we need to create a new
            # course or not.
            ref_name = '%r:%r:%r' % (course.name, course.dept, course.num)
            qs = OfferedFor.objects.filter(
                semester=semester_obj,
                course__department__code=course.dept,
                course__number=course.num)
            qs = qs.select_related('course')
            try:
                offered_for = qs.get(ref=ref_name)
                course_obj = offered_for.course
                created = False
            except OfferedFor.DoesNotExist:
                course_obj = None

            if not course_obj:
                # for migration support... set empty refs.
                try:
                    offered_for = qs.get(ref='')
                    offered_for.ref = ref_name
                    offered_for.save()
                    course_obj = offered_for.course
                    created = False
                except OfferedFor.DoesNotExist:
                    course_obj = None

            if not course_obj:
                course_obj = Course.objects.create(
                    name=course.name,
                    number=course.num,
                    department=department,
                    min_credits=course.cred[0],
                    max_credits=course.cred[1],
                    grade_type=course.grade_type,
                    is_comm_intense=comm,
                )
                created = True

            if not created:
                if self.forced:
                    course_obj.name = course.name
                course_obj.min_credits, course_obj.max_credits = course.cred
                course_obj.grade_type = course.grade_type
                course_obj.is_comm_intense = comm
                course_obj.save()
            else:
                OfferedFor.objects.get_or_create(course=course_obj,
                                                 semester=semester_obj,
                                                 ref=ref_name)
            self.create_sections(course, course_obj, semester_obj)
            crns = [str(s.crn) for s in course_obj.sections.all()]
            logger.debug(
                ' %s %s (crns: %s)' %
                (('+' if created else ' '), course.name, ', '.join(crns)))

    def add_comm_intense(self, catalog, semester):
        from rpi_courses import get_comm_file
        pdf = get_comm_file(semester)
        list = []
        crns = re.findall(r"\d{5}\s[A-Z]{4}", pdf)
        print "Found " + str(len(crns)) + " communication intensive sections"
        for i in crns:
            course = catalog.find_course_by_crn(int(i.split()[0]))
            if (course != None):
                print course.name
                list.append(course.name)
        return list

    def create_sections(self, course, course_obj, semester_obj):
        "Inserts all section data, including time period information, into the database from the catalog."
        for section in course.sections:
            # TODO: encode prereqs / notes
            remove_prereq_notes(section)
            section_obj, created = Section.objects.get_or_create(
                crn=section.crn,
                semester=semester_obj,
                defaults=dict(
                    notes='\n'.join(section.notes),
                    number=section.num,
                    seats_taken=section.seats_taken,
                    seats_total=section.seats_total,
                    course=course_obj,
                ))

            if not created:
                section_obj.number = section.num
                section_obj.seats_taken = section.seats_taken
                section_obj.seats_total = section.seats_total
                section_obj.course = course_obj
                section_obj.notes = '\n'.join(section.notes)
                section_obj.save()
            else:
                self.sections_changed = False

            self.create_timeperiods(semester_obj, section, section_obj)

    # maps from catalog data to database representation
    DOW_MAPPER = {
        'Monday': Period.MONDAY,
        'Tuesday': Period.TUESDAY,
        'Wednesday': Period.WEDNESDAY,
        'Thursday': Period.THURSDAY,
        'Friday': Period.FRIDAY,
        'Saturday': Period.SATURDAY,
        'Sunday': Period.SUNDAY,
    }

    def compute_dow(self, days_of_week):
        """Assists in converting rpi_course's representation of days of the week to the database kind."""
        value = 0
        for dow in days_of_week:
            value = value | self.DOW_MAPPER.get(dow, 0)
        return value

    def create_timeperiods(self, semester_obj, section, section_obj):
        """Creates all the SectionPeriod and Period instances for the given section object from
        the catalog and the section_obj database equivalent to refer to.
        """
        for period in section.periods:
            if None in (period.start, period.end):
                continue  # invalid period for all we care about... ignore.
            day = 0
            period_obj, pcreated = Period.objects.get_or_create(
                start=period.start_time,
                end=period.end_time,
                days_of_week_flag=self.compute_dow(period.days),
            )
            sectionperiod_obj, created = self.SectionPeriod.get_or_create(
                period=period_obj,
                section=section_obj,
                semester=semester_obj,
                defaults=dict(
                    instructor=period.instructor,
                    location=period.location,
                    kind=period.type,
                ))
            if not created:
                sectionperiod_obj.instructor = period.instructor
                sectionperiod_obj.location = period.location
                sectionperiod_obj.kind = period.type
                sectionperiod_obj.save()

    def get_or_create_department(self, semester_obj, code, name=None):
        dept, created = Department.objects.get_or_create(
            code=code, defaults={'name': name or ''})
        SemesterDepartment.objects.get_or_create(semester=semester_obj,
                                                 department=dept)
        return dept

    def create_crosslistings(self, semester_obj, crosslistings):
        "Creates all crosslisting information into the database for all the sections."
        for crosslisting in crosslistings:
            refid = ','.join(map(str, sorted(tuple(crosslisting.crns))))
            crosslisting_obj, created = SectionCrosslisting.objects.get_or_create(
                semester=semester_obj, ref=refid)
            Section.objects.filter(crn__in=crosslisting.crns).update(
                crosslisted=crosslisting_obj)
Exemple #7
0
def cache_conflicts(semester_year=None, semester_month=None, semester=None, sql=True, stdout=False):
    assert (
        semester_year and semester_month
    ) or semester, "Semester year & month must be provided or the semester object."
    import sys

    # trash existing conflict data...
    if not semester:
        semester = courses.Semester.objects.get(year=semester_year, month=semester_month)

    with transaction.atomic():
        # we don't want to increment IDs too quickly (ev 25 minutes)
        Syncer = Synchronizer(SectionConflict, SectionConflict.objects.values_list("id", flat=True))

        sections = courses.Section.objects.select_related("course", "semester").by_semester(semester).prefetch_periods()
        section_courses = dict_by_attr(sections, "course")

        mapping = {}
        for id, sid1, sid2 in SectionConflict.objects.filter(semester=semester).values_list(
            "id", "section1", "section2"
        ):
            mapping[(sid1, sid2)] = id

        conflicts = []

        def log(msg):
            sys.stdout.write(msg)
            sys.stdout.flush()

        def perform_insert(conflicts):
            SectionConflict.objects.bulk_create(conflicts)

        count = 0
        for course1, course2 in itertools.combinations(section_courses.keys(), 2):
            for section1, section2 in itertools.product(section_courses[course1], section_courses[course2]):
                if section1.conflicts_with(section2):
                    if section1.id > section2.id:
                        section1, section2 = section2, section1

                    count += 1
                    if sql:
                        if count % 500 == 0:
                            perform_insert(conflicts)
                            conflicts = []
                            log(".")
                        if (section1.id, section2.id) not in mapping:
                            log("C")
                            conflicts.append(SectionConflict(section1=section1, section2=section2, semester=semester))
                        else:
                            Syncer.exclude_id(mapping[(section1.id, section2.id)])
                    else:
                        log("C")
                        Syncer.get_or_create(section1=section1, section2=section2, semester=semester)

        if sql and conflicts:
            log("C")
            perform_insert(conflicts)

        log("\n")
        Syncer.trim(semester=semester)
        log("\n")