def _update_lectures(self, course_url): """ Update lectures for a single course """ res = self._urlopen(course_url + "/luennot", exit_on_fail=False) if not res: return [] soup = BeautifulSoup(res.read()) lecture_data = [] # All the lectures are listed in a table with ID "leView" lectures_view = soup.find(id="leView") # Seperate the table head and body try: tbody = lectures_view.tbody.find_all(id=self._lecture_re) thead = lectures_view.thead.find_all(id=self._header_re) except AttributeError: print "* WARN * COULD NOT LOCATE LECTURES FOR THIS COURSE" return header_texts = [] # Extract the header text so we can later use them for header in thead: header_texts.append(header.get_text(strip=True).lower()) fin_to_eng = { u"pvm": u"date", u"klo": u"time", u"päivä": u"day", u"vko": u"week", u"tila": u"location", u"aihe": u"topic", } for tr in tbody: data = {} # Loop through each table row and extract the table data # Assign the extracted header texts to the correct data for i, td in enumerate(tr.find_all("td")): header_text = header_texts[i].lower() # if header_text in fin_to_eng: header_text = fin_to_eng.get(header_text, header_text) text = td.get_text(strip=True) # If the header appears to be a date, convert it to ISO format if header_text in [u"pvm", u"date"]: # Ensure that the header text is 'date' header_text = u"date" text = generate_ISO_date(text) # print text data[header_text] = text lecture_data.append(data) # print lecture_data course_id = course_url.split("/")[-1] self._set_course_data(course_id, "lectures", lecture_data)
def _update_assignments(self, course_url): """ Update the assignments for the given course """ res = self._urlopen(course_url + "/harjoitustyot", exit_on_fail=False) if not res: return [] soup = BeautifulSoup(res.read()) assignment_data = [] # All the assignments are listed in a table with ID "asView" assignments_view = soup.find(id="asView") # Seperate the table head and body try: tbody = assignments_view.tbody.find_all(id=self._lecture_re) thead = assignments_view.thead.find_all(id=self._header_re) except AttributeError: print "* WARN * COULD NOT LOCATE ASSIGNMENTS FOR THIS COURSE" return header_texts = [] # Extract the header text so we can later use them for header in thead: header_texts.append(header.get_text(strip=True).lower()) fin_to_eng = {u"dl": u"deadline", u"otsikko": u"title"} for tr in tbody: data = {} # Loop through each table row and extract the table data # Assign the extracted header texts to the correct data for i, td in enumerate(tr.find_all("td")): header_text = header_texts[i].lower() # if header_text in fin_to_eng: header_text = fin_to_eng.get(header_text, header_text) # If the header appears to be a date, convert it to ISO format if header_text in [u"dl", u"deadline"]: # Ensure that the header text is 'deadline' header_text = u"deadline" text = td.get_text(strip=True) text = generate_ISO_date(text) # print text else: # Get assignment title text = td.find("a").get_text(strip=True) data[header_text] = text assignment_data.append(data) # print assignment_data course_id = course_url.split("/")[-1] self._set_course_data(course_id, "assignments", assignment_data)
def _update_exams(self, course_url, soup=None): """ Update the exams for the given course """ # If we have a ready made soup in arguments, use that instead if soup == None: res = self._urlopen(course_url + "/etusivu", exit_on_fail=False) if not res: return [] soup = BeautifulSoup(res.read()) course_id = course_url.split("/")[-1] # print soup.prettify() (Exams and mid-term exams)| try: exams_table = soup.find(text=re.compile(u"Tentit ja välikokeet|Exams and mid-term exams", re.I)).find_next( "table" ) except AttributeError: print "Couldn't locate the exams table!!!" return [] # tr = exams_table.find_all('tr') # columns = len(tr.find_all('td')) # These are hard coded since they aren't visible anywhere on the site labels = ["day", "date", "time", "place", "title"] exam_data = [] for tr in exams_table.find_all("tr"): data = {} for i, td in enumerate(tr.find_all("td")): text = td.get_text(strip=True) # This might throw an IndexError! TODO: FIX IT label = "undefined" try: label = labels[i] except IndexError: pass # Date is at index 1 if i == 1: data[label] = generate_ISO_date(text) else: data[label] = text exam_data.append(data) self._set_course_data(course_id, "exams", exam_data)