def load_courses(): courses_list = [] with open('courses_list.txt', 'r', encoding='iso-8859-1') as scraped_data: line = scraped_data.readline() cnt = 1 while line: # print("Line {}: {}".format(cnt, line.strip())) course = Course() sigle = re.search('\d\w\w\w\d\d\d', line) line_without_sigle = line[8:] splited_line = line_without_sigle.split('(') # print(result) if sigle: course.sigle = (sigle.group()) course.name = splited_line[0] print(course.sigle) print(course.name) course.save() else: if line.strip() == "du": for index in range(12): if index == 1 or index == 2 or index == 4 or index == 5 or index == 7 or index == 9 or index == 11: # print(line) a = 1 line = scraped_data.readline() courses_list.append(course) line = scraped_data.readline()
def get_course(self): if not self.course: self.course = Course(self) return self.course
def getData(self, courses: dict) -> list: """ Scrapes the KFUPM Course offering page and returns a courses list containing course objects """ # Initializing necessary attributes self.setTerms() self.setDepartments() self.setFormAttributes() for term in self.terms: for dept in self.depts: logging.info(f"\t{term}: {dept}") try: response = self.session.post(self.url, data=self.getPayload( term, dept)) except requests.RequestException as e: logging.error(e) soup = BeautifulSoup(response.text, 'html.parser') numberOfCourses = 0 for row in soup.find_all("div", class_="trow"): # fetch data of ONE course data = self.getCourseData(row) # splitting course name and sections # as required by the schema data["Section"], data["Course"] = ( data["Course-Sec"].split("-")[1], data["Course-Sec"].split("-")[0]) # splitting Time as start_time and # end_time as required by schema data["start_time"], data["end_time"] = ( data["Time"].split("-")[0], data["Time"].split("-")[1]) # setting time as -1 to indicate # that the start_time / end_time # fields are empty if len(data["start_time"]) == 0: data["start_time"] = -1 if len(data["end_time"]) == 0: data["end_time"] = -1 # removing redundant keys data.pop("Course-Sec", None) data.pop("Time", None) numberOfCourses += 1 # storing name and term of latest course scraped # to check whether the previous course that was # scraped is the same so as to only append a # new section to the course object courseID = data["Course"] + term section = Section(data["CRN"], data["Section"], data["Instructor"], data["Activity"], data["Day"], data["Loc"], data["start_time"], data["end_time"], data["Status"]) # If the new course does not already exist, create a new # course object and append it to the courses dict, # otherwise only create a new section object and # append it to courses.sections if (courseID not in courses): sections = [] sections.append(section) course = Course(data["Course"], data["Course Name"], term, dept, sections) else: # Only appends the new section of the same course courses[courseID].sections.append(section) # Set course to unique courseID courses[courseID] = course logging.info(f"\t {courseID} created") return courses
def get_course(self): """Get current course.""" if not self.course: self.course = Course(self) return self.course
courses = [ Course( **{ "id": "8SoStds", "name": "8th Grade Social Studies", "section": "A", "grade": 8, "subject": "SOCIAL_STUDIES", "descriptionHeading": "8th Grade Social Studies A", "description": "Geography - Solar System, Our Planet - Earth", "ownerId": "116353579384483626788", "creationTime": "2018-05-26T20:10:12.481Z", "updateTime": "2018-05-26T20:14:03.185Z", "courseState": "ACTIVE", "imageUri": "https://gnext18-v5.appspot.com/play/img/social_studies.jpg", "calendarId": "*****@*****.**" }), Course(