def parse_department_page(url, q): html = get_quarter(url, q.quarter + " " + str(q.year)) soup = BeautifulSoup(html, "lxml") classes = [] rows =soup(class_="resultrow") for row in rows: classes.append([ row.find(class_="name").a.next_sibling.strip(), row.find(class_="two").string.strip() ]) for cl in classes: if not Course.objects.filter(name=cl[1], department=(cl[0].split(" "))[0], code=(((cl[0].split(" "))[1]).split("/"))[0]): c = Course(name=cl[1], department=(cl[0].split(" "))[0], code=(((cl[0].split(" "))[1]).split("/"))[0] ) c.save() c.quarter_set.add(q) else: c = Course.objects.get(name=cl[1], department=(cl[0].split(" "))[0], code=(((cl[0].split(" "))[1]).split("/"))[0]) c.quarter_set.add(q)
def scrapecrosslistings(course): lastqoffered = course.quarter_set.extra(order_by = ["-index"])[0] html = get_quarter("https://classes.uchicago.edu/courseDetail.php?courseName=" + str(course), str(lastqoffered)) soup = BeautifulSoup(html, "lxml") try: crossp = soup.find(id="tabs-1").find("p") except: crossp = "" if crossp: for l in crossp.find_all("a"): coursename = l.string.split(" ") try: lastcourseversion = Course.objects.filter(department=coursename[0], code=coursename[1])[0] except: c = Course(department=coursename[0], code=coursename[1], name=course.name) c.save() c.quarter_set.add(lastqoffered) print "added " + str(c) lastcourseversion = c course.cross_listings.add(lastcourseversion) else: return None
def parse_department_page(url, q): response = urllib2.urlopen(url) html = response.read() soup = BeautifulSoup(html, "lxml") classes = [] rows =soup(class_="resultrow") for row in rows: classes.append([ row.find(class_="name").a.next_sibling.strip(), row.find(class_="two").string.strip() ]) for cl in classes: if not Course.objects.filter(name=cl[1], department=(cl[0].split(" "))[0], code=(((cl[0].split(" "))[1]).split("/"))[0]): c = Course(name=cl[1], department=(cl[0].split(" "))[0], code=(((cl[0].split(" "))[1]).split("/"))[0] ) c.save() q.courses.add(c)