def merge_course_if_fullyear(self): if self.course.num[-1] == "A": other_half_letter = "B" elif self.course.num[-1] == "B": other_half_letter = "A" else: #Not a full year course return other_half_key = self.course.get_key()[:-1] + other_half_letter if other_half_key not in self.courses_dict: #Haven't scraped the other course yet print "Haven't found other half yet." return other_half = self.courses_dict[other_half_key] print "Merging with other half: %s" % (other_half.get_key()) #Remove the other course from the dict so we can re-add the full course del self.courses_dict[other_half_key] del self.courses_dict[self.course.get_key()] merged = SolusModels.SolusCourse() merged.add_merged_info(other_half, self.course) self.add_course(merged)
def read_from_file(self): with open(self.read_file_name) as f: all_str = f.read() all_list = json.loads(all_str) for course_dict in all_list: course = SolusModels.SolusCourse(course_dict) self.courses_dict[course.get_key()] = course print len(self.courses_dict)
def scrape_single_dropdown(self): sel = self.selenium #Prepare to traverse all links link_number = self.starting_course_index link_name_base = "id=CRSE_TITLE$%d" link_name = link_name_base % (link_number, ) while sel.is_element_present(link_name): #Go into the course sel.click(link_name) sel.wait_for_page_to_load(self.timeout_milliseconds) self.course = SolusModels.SolusCourse() SolusModels.SolusCourse.num_courses += 1 self.course.subject = self.subject_index #Scrape info from course try: self.scrape_single_course() self.course.clean() self.add_course(self.course) self.merge_course_if_fullyear() except SolusModels.UselessCourseException as e: print "Ignored" SolusModels.SolusCourse.num_courses -= 1 #Back out from course page sel.click("id=DERIVED_SAA_CRS_RETURN_PB") sel.wait_for_page_to_load(self.timeout_milliseconds) #Go to next course link_number += 1 if self.max_courses_per_subject and link_number >= self.max_courses_per_subject + self.starting_course_index: break link_name = link_name_base % (link_number, )