Example #1
0
    def extract_time_range(self, unformatted_time_range):
        if unformatted_time_range == 'TBA' or unformatted_time_range == '':
            raise ParseJump(self.ingestor['course_code'] + ' time TBA')

        search = re.match(r'(.*) \- (.*)', unformatted_time_range)
        if search is None:
            raise ParseJump('time not found on page')

        def ampm(x):
            return x.replace('a', 'am').replace('p', 'pm')
        self.ingestor['time_start'] = ampm(search.group(1))
        self.ingestor['time_end'] = ampm(search.group(2))
Example #2
0
    def extract_time_range(self, unformatted_time_range):
        if unformatted_time_range == "TBA" or unformatted_time_range == "":
            raise ParseJump(self.ingestor["course_code"] + " time TBA")

        search = re.match(r"(.*) \- (.*)", unformatted_time_range)
        if search is None:
            raise ParseJump("time not found on page")

        def ampm(x):
            return x.replace("a", "am").replace("p", "pm")

        self.ingestor["time_start"] = ampm(search.group(1))
        self.ingestor["time_end"] = ampm(search.group(2))
Example #3
0
 def extract_days(self, unformatted_days):
     if unformatted_days == 'TBA' or unformatted_days == '':
         raise ParseJump(self.ingestor['course_code'] + ' days TBA')
     self.ingestor['days'] = list(unformatted_days)
Example #4
0
    def parse_course(self, soup):
        # remove cancelled classes
        if soup.find('a', class_='cancelledStatus'):
            raise ParseJump('cancelled course')

        # Extract course code and term number to generate access to more info
        details = soup.find('td', class_='classSection')['onclick']

        # Extract course number and term code
        search = re.search(
            r"showClassDetailPanel.fire\({classNumber : '([0-9]*)', termCode : '([0-9]*)',",
            details)
        course_number = search.group(1)

        soup = self.requester.get(Parser.URL + '/GetClassSectionDetail.action',
                                  params={
                                      'classNumber': course_number,
                                      'termCode': search.group(2)
                                  })

        # Extract course name and abbreviation details
        search = re.search(
            r'(.*):.*\n(.*)',
            soup.find(id='classSectionDetailDialog').find('h1').text)
        abbr = search.group(1)

        # Extract department code, catalog ID, and section number from abbr
        title = re.match(r'(\S*)-(\S*)-(\S*)', abbr)

        if not title:
            raise ParseJump('no title in course')

        self.ingestor['course_name'] = search.group(2)
        self.ingestor['course_code'] = title.group(1) + '-' + title.group(2)
        self.ingestor['section_code'] = '(' + title.group(3).strip() + ')'

        # Deal with course details as subgroups seen on details page
        detail_headers = soup.find_all('div', class_='detailHeader')
        detail_panels = soup.find_all('div', class_='detailPanel')

        if len(detail_headers) != len(detail_panels):
            raise ParseError('there should be equal detail headers and panels')

        for i in range(len(detail_headers)):

            # Extract header name
            header = detail_headers[i].text.strip()

            # Choose parsing strategy dependent on header
            if header == "Details" or header == "Availability":
                self.parse_labeled_table(detail_panels[i])

            elif header == "Description":
                self.extract_description(detail_panels[i])

            elif header == "Notes":
                self.extract_notes(detail_panels[i])

            elif header == "Meeting Times":
                self.parse_meeting_times(detail_panels[i])

            elif header == "Cross Listings":
                pass

            elif header == "Attributes":
                self.parse_attributes(detail_panels[i])

            elif header == "Ad Hoc Meeting Times":
                pass

        course = self.ingestor.ingest_course()
        self.ingestor.ingest_section(course)
        self.ingestor['meetings'] = []

        return course_number
Example #5
0
 def extract_days(self, unformatted_days):
     if unformatted_days == "TBA" or unformatted_days == "":
         raise ParseJump(self.ingestor["course_code"] + " days TBA")
     self.ingestor["days"] = list(unformatted_days)
Example #6
0
    def parse_course(self, soup):
        # remove cancelled classes
        if soup.find("a", class_="cancelledStatus"):
            raise ParseJump("cancelled course")

        # Extract course code and term number to generate access to more info
        details = soup.find("td", class_="classSection")["onclick"]

        # Extract course number and term code
        search = re.search(
            r"showClassDetailPanel.fire\({classNumber : '([0-9]*)', termCode : '([0-9]*)',",
            details,
        )
        course_number = search.group(1)

        soup = self.requester.get(
            Parser.URL + "/GetClassSectionDetail.action",
            params={
                "classNumber": course_number,
                "termCode": search.group(2)
            },
        )

        # Extract course name and abbreviation details
        search = re.search(
            r"(.*):.*\n(.*)",
            soup.find(id="classSectionDetailDialog").find("h1").text)
        abbr = search.group(1)

        # Extract department code, catalog ID, and section number from abbr
        title = re.match(r"(\S*)-(\S*)-(\S*)", abbr)

        if not title:
            raise ParseJump("no title in course")

        self.ingestor["course_name"] = search.group(2)
        self.ingestor["course_code"] = title.group(1) + "-" + title.group(2)
        self.ingestor["section_code"] = "(" + title.group(3).strip() + ")"

        # Deal with course details as subgroups seen on details page
        detail_headers = soup.find_all("div", class_="detailHeader")
        detail_panels = soup.find_all("div", class_="detailPanel")

        if len(detail_headers) != len(detail_panels):
            raise ParseError("there should be equal detail headers and panels")

        for i in range(len(detail_headers)):

            # Extract header name
            header = detail_headers[i].text.strip()

            # Choose parsing strategy dependent on header
            if header == "Details" or header == "Availability":
                self.parse_labeled_table(detail_panels[i])

            elif header == "Description":
                self.extract_description(detail_panels[i])

            elif header == "Notes":
                self.extract_notes(detail_panels[i])

            elif header == "Meeting Times":
                self.parse_meeting_times(detail_panels[i])

            elif header == "Cross Listings":
                pass

            elif header == "Attributes":
                self.parse_attributes(detail_panels[i])

            elif header == "Ad Hoc Meeting Times":
                pass

        course = self.ingestor.ingest_course()
        self.ingestor.ingest_section(course)
        self.ingestor["meetings"] = []

        return course_number