Beispiel #1
0
    def section_from_header_link(self, header_link, course, term):
        """Builds a section from the information in the header link, as well as the supplied course and term"""
        m = re.search('(\S+)-(\S+)\s+\((\S+)\)', header_link.get_text())

        # Make a section type for the supplied type, first checking if there is one
        if cc.SectionType.objects.filter(abbreviation=m.group(2)).count() == 0:
            print('WARNING: No section type for abbreviation: "{0}"'.format(m.group(2)))

        section_type = e_or_n(cc.SectionType, abbreviation=m.group(2))
        section_type.save()

        attrs = {
            'solus_id': m.group(3),
            'index_in_course': m.group(1),
            'type': section_type,
            'course': course,
            'term': term,
        }
        
        # Make a base section from the supplied attributes
        section = e_or_n(cc.Section, **attrs)

        # Store the link action for visiting later in case of a deep scrape
        section.click_action = header_link['id']

        return section
Beispiel #2
0
    def build_timeslots(self, all_days, start_time_str, end_time_str):
        """
        Returns a list of all the timeslots present in a combo like 'MoTuWeSaSu'
        """
        if not all_days or all_days == "TBA":
            return None

        start_time = None
        end_time = None

        if start_time_str and start_time_str != "TBA":
            start_time = datetime.strptime(start_time_str, "%I:%M%p")
        if end_time_str and end_time_str != "TBA":
            end_time = datetime.strptime(end_time_str, "%I:%M%p")

        timeslots = []

        #loop through all days, 2 characters at a time
        while len(all_days) > 0:
            day_abbr = all_days[-2:]
            all_days = all_days[:-2]

            weekday =  e_or_n(cc.DayOfWeek, abbreviation=day_abbr)
            weekday.save()

            timeslot_attributes = {'day_of_week' : weekday,
                                   'start_time' : start_time,
                                   'end_time' : end_time}

            timeslot = e_or_n(cc.Timeslot, **timeslot_attributes)
            timeslot.save(was_scraped=True)

            timeslots.append(timeslot)

        return timeslots
Beispiel #3
0
    def section_from_header_link(self, header_link, course, term):
        """Builds a section from the information in the header link, as well as the supplied course and term"""
        m = re.search('(\S+)-(\S+)\s+\((\S+)\)', header_link.get_text())

        # Make a section type for the supplied type, first checking if there is one
        if cc.SectionType.objects.filter(abbreviation=m.group(2)).count() == 0:
            print('WARNING: No section type for abbreviation: "{0}"'.format(
                m.group(2)))

        section_type = e_or_n(cc.SectionType, abbreviation=m.group(2))
        section_type.save()

        attrs = {
            'solus_id': m.group(3),
            'index_in_course': m.group(1),
            'type': section_type,
            'course': course,
            'term': term,
        }

        # Make a base section from the supplied attributes
        section = e_or_n(cc.Section, **attrs)

        # Store the link action for visiting later in case of a deep scrape
        section.click_action = header_link['id']

        return section
Beispiel #4
0
    def terms_offered(self, ):
        """Returns the terms during which the current term is offered"""

        terms = []

        term_dropdown = self.soup.find("select", {'id': 'DERIVED_SAA_CRS_TERM_ALT'})

        for option in term_dropdown.find_all("option"):

            m = re.search('^([^\s]+) (.*)$', option.get_text())

            dropdown_value = option['value']
            year = m.group(1)
            season = m.group(2)
            
            # Check if this term is old enough to be skipped
            now = datetime.now()
            if int(year) < now.year or (int(year) == now.year and season == "Winter"):
                print ("--------Omitting outdated term: {season} - {year}".format(season=season, year=year))
                continue

            # Otherwise, make a real season and term
            season = e_or_n(cc.Season, name=season)
            season.save(was_scraped=True)

            term = e_or_n(cc.Term, year=year, season=season)

            # Store the dropdown value so we can request this term later in scraping
            term.dropdown_value = dropdown_value

            terms.append(term)

        return terms
Beispiel #5
0
    def add_attribute_pair(self, attr, value, course):
        """
        Recieves and attribute name and value, and adds it to the course,
        converting it to a model instance first if necessary
        """

        if attr in self.attribute_mappings:

            # Find the name of the attribute that this value will be assigned to in the model
            attribute_name = self.attribute_mappings[attr]

            # Check if we need to make an actual model. If not, it'll just be assigned as is (as a str, probably)
            if attr in self.attribute_class_mappings:

                cls = self.attribute_class_mappings[attr]
                value = e_or_n(cls, name=value)

                # This model will have to be saved if it's new
                value.save()

            if attr in self.many_attribute_mappings:
                # ie., requisities
                self.many_attribute_mappings[attr](self, value, course)

            # Add the attribute's value to the course
            setattr(course, attribute_name, value)

        else:
            raise Exception(
                'Encountered unexpected course attribute with label: "{0}"'.
                format(attr))
    def add_attribute_pair(self, attr, value, course):
        """
        Recieves and attribute name and value, and adds it to the course,
        converting it to a model instance first if necessary
        """

        if attr in self.attribute_mappings:

            # Find the name of the attribute that this value will be assigned to in the model
            attribute_name = self.attribute_mappings[attr]

            # Check if we need to make an actual model. If not, it'll just be assigned as is (as a str, probably)
            if attr in self.attribute_class_mappings:

                cls = self.attribute_class_mappings[attr]
                value = e_or_n(cls, name=value)

                # This model will have to be saved if it's new
                value.save()

            if attr in self.many_attribute_mappings:
                # ie., requisities
                self.many_attribute_mappings[attr](self, value, course)

            # Add the attribute's value to the course
            setattr(course, attribute_name, value)

        else:
            raise Exception('Encountered unexpected course attribute with label: "{0}"'.format(attr))
Beispiel #7
0
    def current_course(self, subject):
        """Returns the course built from the current page"""

        # Gather the title and description to create a new course
        title, number = self.get_title()

        attributes = {'title': title, 'number': number, 'subject': subject}

        course = e_or_n(cc.Course, **attributes)

        self.add_info_table_attributes(course)

        return course
Beispiel #8
0
    def terms_offered(self, ):
        """Returns the terms during which the current term is offered"""

        terms = []

        term_dropdown = self.soup.find("select",
                                       {'id': 'DERIVED_SAA_CRS_TERM_ALT'})

        for option in term_dropdown.find_all("option"):

            m = re.search('^([^\s]+) (.*)$', option.get_text())

            dropdown_value = option['value']
            year = m.group(1)
            season = m.group(2)

            # Check if this term is old enough to be skipped
            now = datetime.now()
            if int(year) < now.year or (int(year) == now.year
                                        and season == "Winter"):
                print(
                    "--------Omitting outdated term: {season} - {year}".format(
                        season=season, year=year))
                continue

            # Otherwise, make a real season and term
            season = e_or_n(cc.Season, name=season)
            season.save(was_scraped=True)

            term = e_or_n(cc.Term, year=year, season=season)

            # Store the dropdown value so we can request this term later in scraping
            term.dropdown_value = dropdown_value

            terms.append(term)

        return terms
Beispiel #9
0
    def build_timeslots(self, all_days, start_time_str, end_time_str):
        """
        Returns a list of all the timeslots present in a combo like 'MoTuWeSaSu'
        """
        if not all_days or all_days == "TBA":
            return None

        start_time = None
        end_time = None

        if start_time_str and start_time_str != "TBA":
            start_time = datetime.strptime(start_time_str, "%I:%M%p")
        if end_time_str and end_time_str != "TBA":
            end_time = datetime.strptime(end_time_str, "%I:%M%p")

        timeslots = []

        #loop through all days, 2 characters at a time
        while len(all_days) > 0:
            day_abbr = all_days[-2:]
            all_days = all_days[:-2]

            weekday = e_or_n(cc.DayOfWeek, abbreviation=day_abbr)
            weekday.save()

            timeslot_attributes = {
                'day_of_week': weekday,
                'start_time': start_time,
                'end_time': end_time
            }

            timeslot = e_or_n(cc.Timeslot, **timeslot_attributes)
            timeslot.save(was_scraped=True)

            timeslots.append(timeslot)

        return timeslots
Beispiel #10
0
    def build_section(self, header_link, component_table, course, term):

        section = self.section_from_header_link(header_link, course, term)
        section.save()

        component_rows = component_table.find_all('tr', {})

        # Remove the header row
        del (component_rows[0])

        for row in component_rows:
            values = row.find_all('span')
            values = [self.clean_HTML(v.get_text()) for v in values]

            room = values[3]

            instructors_str = values[4]

            # start/end dates
            start_date, end_date = self.date_range(values[5])

            instructors = self.instructors_from_string(instructors_str)

            # Timeslot
            all_days_offered = values[0]
            start_time = values[1]
            end_time = values[2]
            timeslots = self.build_timeslots(all_days_offered, start_time,
                                             end_time)

            if timeslots is None:
                # If there's no timeslot, we should still create one component with a TBA timeslot
                timeslots = [None]

            attrs = {
                'section': section,
                'room': room,
                'start_date': start_date,
                'end_date': end_date,
            }

            #Create a section component for each day
            for timeslot in timeslots:
                attrs['timeslot'] = timeslot
                component = e_or_n(cc.SectionComponent, **attrs)
                component.instructors = instructors
                component.save(was_scraped=True)

        return section
    def current_course(self, subject):
        """Returns the course built from the current page"""

        # Gather the title and description to create a new course
        title, number = self.get_title()

        attributes = {'title': title,
                      'number': number,
                      'subject': subject}

        course = e_or_n(cc.Course, **attributes)

        self.add_info_table_attributes(course)

        return course
Beispiel #12
0
    def add_requisites(self, enrollment_reqs, course):
        course_re = r'(?P<abbr>[A-Z]{3,4})\s*(?P<num>\d{3}[AB]?)'
        itermatches = re.finditer(course_re, enrollment_reqs)

        for match in itermatches:
            abbr, num = match.groups()
            properties = {
                'subject_abbr': abbr,
                'course_number': num,
                'left_index': match.start(),
                'right_index': match.end(),
                'for_course': course,
            }
            req = e_or_n(cc.Requisite, **properties)
            req.save()
    def add_requisites(self, enrollment_reqs, course):
        course_re = r'(?P<abbr>[A-Z]{3,4})\s*(?P<num>\d{3}[AB]?)'
        itermatches = re.finditer(course_re, enrollment_reqs)

        for match in itermatches:
            abbr, num = match.groups()
            properties = {
                'subject_abbr': abbr,
                'course_number': num,
                'left_index': match.start(),
                'right_index': match.end(),
                'for_course': course,
            }
            req = e_or_n(cc.Requisite, **properties)
            req.save()
Beispiel #14
0
    def build_section(self, header_link, component_table, course, term):

        section = self.section_from_header_link(header_link, course, term)
        section.save()

        component_rows = component_table.find_all('tr', {})

        # Remove the header row
        del(component_rows[0])

        for row in component_rows:
            values = row.find_all('span')
            values = [self.clean_HTML(v.get_text()) for v in values]

            room = values[3]

            instructors_str = values[4]

            # start/end dates
            start_date, end_date = self.date_range(values[5])

            instructors = self.instructors_from_string(instructors_str)

            # Timeslot
            all_days_offered = values[0]
            start_time = values[1]
            end_time = values[2]
            timeslots = self.build_timeslots(all_days_offered, start_time, end_time)

            if timeslots is None:
                # If there's no timeslot, we should still create one component with a TBA timeslot
                timeslots = [None]

            attrs = {
                'section': section,
                'room': room,
                'start_date': start_date,
                'end_date': end_date,
            }
    
            #Create a section component for each day
            for timeslot in timeslots:
                attrs['timeslot'] = timeslot
                component = e_or_n(cc.SectionComponent, **attrs)
                component.instructors = instructors
                component.save(was_scraped=True)

        return section
Beispiel #15
0
    def subject_from_dropdown(self, subject_index):
        """Returns the subject on the dropdown with name "link_name" on the current alphanum's page, or none if the dropdown does not exist"""

        link_name = self._subject_link_name(subject_index)

        dropdown_link = self.soup.find("a", { "name" : link_name })

        if not dropdown_link:
            # Doesn't exist
            return None

        # Extract the subject title and abbreviation
        m = re.search("^([^-]*) - (.*)$", dropdown_link.get_text().strip())
        subject_abbr = m.group(1)
        subject_title = m.group(2)

        subject = e_or_n(cc.Subject, title=subject_title, abbreviation=subject_abbr)

        # Store the link name so we can click on it later
        subject.click_action = link_name

        return subject
Beispiel #16
0
    def instructors_from_string(self, instructors_str):
        """Returns a list of instructors built out of a comma separated list of instructors"""

        instructors = []

        if instructors_str and instructors_str != "TBA" and instructors_str != "Staff":

            # Split the list on every comma (one between profs, one after last names)
            fragments = re.sub(r'\s+', ' ', instructors_str).split(",")
            fragments = [l.strip() for l in fragments]

            # Associate every pair of fragments as a full name
            for i in range(0, len(fragments), 2):
                last_name = fragments[i]
                other_names = fragments[i + 1]
                full_name = u"%s, %s" % (last_name, other_names)

                instructor = e_or_n(cc.Instructor, name=full_name)
                instructor.save(was_scraped=True)
                instructors.append(instructor)

        return instructors
Beispiel #17
0
    def instructors_from_string(self, instructors_str):
        """Returns a list of instructors built out of a comma separated list of instructors"""

        instructors = []

        if instructors_str and instructors_str != "TBA" and instructors_str != "Staff":

            # Split the list on every comma (one between profs, one after last names)
            fragments = re.sub(r'\s+', ' ', instructors_str).split(",")
            fragments = [l.strip() for l in fragments]

            # Associate every pair of fragments as a full name
            for i in range(0, len(fragments), 2):
                last_name = fragments[i]
                other_names = fragments[i+1]
                full_name = u"%s, %s" % (last_name, other_names)

                instructor = e_or_n(cc.Instructor, name=full_name)
                instructor.save(was_scraped=True)
                instructors.append(instructor)

        return instructors