Beispiel #1
0
    def merge_course_if_fullyear(self):

        if self.course.num[-1] == "A":
            other_half_letter = "B"
        elif self.course.num[-1] == "B":
            other_half_letter = "A"
        else:
            #Not a full year course
            return

        other_half_key = self.course.get_key()[:-1] + other_half_letter

        if other_half_key not in self.courses_dict:
            #Haven't scraped the other course yet
            print "Haven't found other half yet."
            return

        other_half = self.courses_dict[other_half_key]
        print "Merging with other half: %s" % (other_half.get_key())

        #Remove the other course from the dict so we can re-add the full course
        del self.courses_dict[other_half_key]
        del self.courses_dict[self.course.get_key()]

        merged = SolusModels.SolusCourse()

        merged.add_merged_info(other_half, self.course)

        self.add_course(merged)
Beispiel #2
0
    def read_from_file(self):
        with open(self.read_file_name) as f:
            all_str = f.read()
            all_list = json.loads(all_str)
            for course_dict in all_list:
                course = SolusModels.SolusCourse(course_dict)
                self.courses_dict[course.get_key()] = course

        print len(self.courses_dict)
Beispiel #3
0
 def scrape_single_section_component(self, piece_array):
     if len(piece_array) < 6:
         import pdb; pdb.set_trace()
     
     components = []
     
     #Date range
     m = re.search('^([\S]+)\s*-\s*([\S]+)$', piece_array.pop())
     start_date = m.group(1)
     end_date = m.group(2)
     
     instructor = piece_array.pop()
     room = piece_array.pop()
     
     #Timeslot
     end = piece_array.pop()
     start = piece_array.pop()
     #Sometimes day is e.g. "MoTuWeSaSu"
     all_days = piece_array.pop()
     
     
     if all_days == "TBA":
         all_days = "TB"
     
     while len(all_days) > 0:
         day = SolusModels.index_of_day_abbr(all_days[-2:])
         all_days = all_days[:-2]
         
         section_component = SolusModels.SectionComponent()
         components.append(section_component)
         
         section_component.start_date = start_date
         section_component.end_date = end_date
     
         section_component.instructor = instructor
         section_component.room = room
         
         section_component.timeslot = SolusModels.timeslot_index_by_components(day, start, end)
     
     
     
     return components
Beispiel #4
0
 def scrape_section_header(self, piece_array, section):
     section_info = piece_array.pop()
     m = re.search('^([\S]+)-([\S]+)\s+\((\S+)\)$', section_info)
     
     while not m:
         section_info = piece_array.pop()
         m = re.search('^([\S]+)-([\S]+)\s+\((\S+)\)$', section_info)
     
     section.index = m.group(1)
     section.type = SolusModels.section_type_index_by_key(m.group(2))
     section.id = m.group(3) 
Beispiel #5
0
    def scrape_section_header(self, piece_array, section):
        section_info = piece_array.pop()
        m = re.search('^([\S]+)-([\S]+)\s+\((\S+)\)$', section_info)

        while not m:
            section_info = piece_array.pop()
            m = re.search('^([\S]+)-([\S]+)\s+\((\S+)\)$', section_info)

        section.index = m.group(1)
        section.type = SolusModels.section_type_index_by_key(m.group(2))
        section.id = m.group(3)
Beispiel #6
0
    def scrape_section_page(self):

        section_pieces = self.section_pieces_from_page()

        while len(section_pieces) > 0:
            section = SolusModels.Section()
            self.course.sections.append(section)

            section.term = self.current_term

            self.scrape_single_section(section_pieces, section)
Beispiel #7
0
    def scrape_single_section_component(self, piece_array):
        if len(piece_array) < 6:
            import pdb
            pdb.set_trace()

        components = []

        #Date range
        m = re.search('^([\S]+)\s*-\s*([\S]+)$', piece_array.pop())
        start_date = m.group(1)
        end_date = m.group(2)

        instructor = piece_array.pop()
        room = piece_array.pop()

        #Timeslot
        end = piece_array.pop()
        start = piece_array.pop()
        #Sometimes day is e.g. "MoTuWeSaSu"
        all_days = piece_array.pop()

        if all_days == "TBA":
            all_days = "TB"

        while len(all_days) > 0:
            day = SolusModels.index_of_day_abbr(all_days[-2:])
            all_days = all_days[:-2]

            section_component = SolusModels.SectionComponent()
            components.append(section_component)

            section_component.start_date = start_date
            section_component.end_date = end_date

            section_component.instructor = instructor
            section_component.room = room

            section_component.timeslot = SolusModels.timeslot_index_by_components(
                day, start, end)

        return components
Beispiel #8
0
    def scrape_sections(self):
        sel = self.selenium

        term_options = sel.get_select_options("id=DERIVED_SAA_CRS_TERM_ALT")

        for option in term_options:
            if not len(term_options) == 1:
                sel.select("id=DERIVED_SAA_CRS_TERM_ALT", "label=%s" % option)
                sel.click("id=DERIVED_SAA_CRS_SSR_PB_GO$92$")
                sel.wait_for_page_to_load(self.timeout_milliseconds)

            self.current_term = SolusModels.term_index_by_key(option)
            self.scrape_term()
Beispiel #9
0
 def scrape_sections(self):
     sel = self.selenium
     
     term_options = sel.get_select_options("id=DERIVED_SAA_CRS_TERM_ALT")
     
     for option in term_options:
         if not len(term_options) == 1:
             sel.select("id=DERIVED_SAA_CRS_TERM_ALT", "label=%s" % option)
             sel.click("id=DERIVED_SAA_CRS_SSR_PB_GO$92$")
             sel.wait_for_page_to_load(self.timeout_milliseconds)
         
         
         self.current_term = SolusModels.term_index_by_key(option)
         self.scrape_term()
Beispiel #10
0
 def scrape_subjects_for_alphanum(self, alphanum):
     sel = self.selenium
     sel.click("id=DERIVED_SSS_BCC_SSR_ALPHANUM_" + alphanum)
     sel.wait_for_page_to_load(self.timeout_milliseconds)
     
     
     #Prepare to traverse all links
     link_number = self.starting_subject_index
     link_name_base = "name=DERIVED_SSS_BCC_GROUP_BOX_1$84$$%d"
     link_name = link_name_base % (link_number,)
     
     while sel.is_element_present(link_name):
         #Store subject title
         m = re.search("^([^-]*) - (.*)$", sel.get_text(link_name).strip())
         
         subject_key = m.group(1)
         subject_title = m.group(2)
         
         print "\nSubject: %s: %s" % (subject_key, subject_title)
         
         if subject_key not in self.ignored_subjects:
             self.subject_index = SolusModels.subject_index_by_key(subject_key)
             SolusModels.Subject.subjects[self.subject_index].title = subject_title
             
             #Open the dropdown
             sel.click(link_name)
             sel.wait_for_page_to_load(self.timeout_milliseconds)
             
             #Traverses all course links in the dropdown
             self.scrape_single_dropdown()
         
             #Close the dropdown
             try:
                 sel.click(link_name)
             except:
                 print "FAILURE %s" % link_name
                 time.sleep(100)
             
             sel.wait_for_page_to_load(self.timeout_milliseconds)
         else:
             print "Ignored"
         
         
         #Go to next link
         link_number += 1
         if self.max_subjects_per_letter and link_number >= self.max_subjects_per_letter + self.starting_subject_index:
             break
         
         link_name = link_name_base % (link_number,)
Beispiel #11
0
    def scrape_subjects_for_alphanum(self, alphanum):
        sel = self.selenium
        sel.click("id=DERIVED_SSS_BCC_SSR_ALPHANUM_" + alphanum)
        sel.wait_for_page_to_load(self.timeout_milliseconds)

        #Prepare to traverse all links
        link_number = self.starting_subject_index
        link_name_base = "name=DERIVED_SSS_BCC_GROUP_BOX_1$84$$%d"
        link_name = link_name_base % (link_number, )

        while sel.is_element_present(link_name):
            #Store subject title
            m = re.search("^([^-]*) - (.*)$", sel.get_text(link_name).strip())

            subject_key = m.group(1)
            subject_title = m.group(2)

            print "\nSubject: %s: %s" % (subject_key, subject_title)

            if subject_key not in self.ignored_subjects:
                self.subject_index = SolusModels.subject_index_by_key(
                    subject_key)
                SolusModels.Subject.subjects[
                    self.subject_index].title = subject_title

                #Open the dropdown
                sel.click(link_name)
                sel.wait_for_page_to_load(self.timeout_milliseconds)

                #Traverses all course links in the dropdown
                self.scrape_single_dropdown()

                #Close the dropdown
                try:
                    sel.click(link_name)
                except:
                    print "FAILURE %s" % link_name
                    time.sleep(100)

                sel.wait_for_page_to_load(self.timeout_milliseconds)
            else:
                print "Ignored"

            #Go to next link
            link_number += 1
            if self.max_subjects_per_letter and link_number >= self.max_subjects_per_letter + self.starting_subject_index:
                break

            link_name = link_name_base % (link_number, )
Beispiel #12
0
    def scrape_single_dropdown(self):
        sel = self.selenium

        #Prepare to traverse all links
        link_number = self.starting_course_index
        link_name_base = "id=CRSE_TITLE$%d"
        link_name = link_name_base % (link_number, )

        while sel.is_element_present(link_name):
            #Go into the course
            sel.click(link_name)
            sel.wait_for_page_to_load(self.timeout_milliseconds)

            self.course = SolusModels.SolusCourse()

            SolusModels.SolusCourse.num_courses += 1

            self.course.subject = self.subject_index

            #Scrape info from course
            try:
                self.scrape_single_course()

                self.course.clean()

                self.add_course(self.course)

                self.merge_course_if_fullyear()

            except SolusModels.UselessCourseException as e:
                print "Ignored"
                SolusModels.SolusCourse.num_courses -= 1

            #Back out from course page
            sel.click("id=DERIVED_SAA_CRS_RETURN_PB")
            sel.wait_for_page_to_load(self.timeout_milliseconds)

            #Go to next course
            link_number += 1

            if self.max_courses_per_subject and link_number >= self.max_courses_per_subject + self.starting_course_index:
                break

            link_name = link_name_base % (link_number, )
Beispiel #13
0
    def scrape_title(self):
        sel = self.selenium
        raw_title = sel.get_text("css=span.PALEVEL0SECONDARY").strip()

        m = re.search('^([\S]+)\s+([\S]+)\s+-\s+(.*)$', raw_title)

        #Subject is assigned earlier
        #self.course.subject = SolusModels.subject_index_by_key(m.group(1))

        self.course.subject_description = m.group(1)
        self.course.num = m.group(2)
        self.course.title = m.group(3)

        print ""
        print "%s/%s %s - %s" % (self.course.subject_description,
                                 self.course.subject, self.course.num,
                                 self.course.title)

        if re.search('^(UNSP)|(.*UNS)$', self.course.num):
            raise SolusModels.UselessCourseException(
                "%s %s" % (self.course.subject, self.course.num))