def mit(debug=False): base_url = "http://ocw.mit.edu" r = requests.get(base_url + "/courses/") soup = BeautifulSoup(r.text) for course_list in soup("div",{"class":"course_list"}): category_name = str(course_list("div",{"class":"table_head"})[0]("a")[0].string).lower() for row in course_list("tr",{"class":"row"}) + course_list("tr",{"class":"alt-row"}): course_id = row("td")[2].string school_name = "mit" try: school = School.objects.filter(name__icontains=school_name)[0] except: school = School.objects.create(name=school_name) try: category = Category.objects.get(name=category_name) except: category = Category.objects.create(name=category_name) try: m = Course.objects.filter(type="mit").filter(course_id=str(course_id))[0] except: m = Course() material_names = [a['alt'] for a in row("td")[1]("a")] materials = [] for name in material_names: try: material = Material.objects.get(name=name) except: material = Material.objects.create(name=name) materials.append(material) m.title = row("td")[3]("a")[0]("u")[0].string m.link = base_url + row("td")[3]("a")[0]['href'] m.type = "mit" m.course_id = course_id m.school = school m.save() m.categories = [category] m.materials = materials m.save() if debug: print m
def coursera(debug=False): r = requests.get("https://www.coursera.org/maestro/api/topic/list?full=1") data = json.loads(r.text) for course in data: course_id = course['id'] school_name = course['universities'][0]['name'].lower() category_names = [a['name'].lower() for a in course['categories']] categories = [] try: school = School.objects.filter(name__icontains=school_name)[0] except: school = School.objects.create(name=school_name) for category_name in category_names: try: category = Category.objects.get(name=category_name) except: category = Category.objects.create(name=category_name) categories.append(category) try: m = Course.objects.filter(type="coursera").filter(course_id=course_id)[0] except: m = Course() material_names = ["Assignments and solutions","Projects and examples","Multimedia content","Exams and solutions"] materials = [] for name in material_names: try: material = Material.objects.get(name=name) except: material = Material.objects.create(name=name) materials.append(material) m.title = course['name'] m.link = course['social_link'] m.image_url = course['small_icon'] m.course_id = course_id m.type = "coursera" m.school = school m.save() m.categories = categories m.materials = materials m.save() if debug: print m
def edx(debug=False): base_url = "https://www.edx.org" r = requests.get(base_url + "/courses") soup = BeautifulSoup(r.text) for column in soup("section", {"class":"university-column"}): for course in column("article",{"class":"course"}): course_id = "/".join(course['id'].split("/")[:-1]) school_name = course['id'].split("/")[0][:-1].lower() try: school = School.objects.filter(name__icontains=school_name)[0] except: school = School.objects.create(name=school_name) try: m = Course.objects.filter(type="edx").filter(course_id=course_id)[0] except: m = Course() material_names = ["Assignments and solutions","Projects and examples","Multimedia content","Exams and solutions"] materials = [] for name in material_names: try: material = Material.objects.get(name=name) except: material = Material.objects.create(name=name) materials.append(material) m.title = " ".join(course("header")[0]("h2")[0].get_text().split(" ")[1:]) m.link = base_url + course("a")[0]['href'] m.image_url = base_url + course("img")[0]['src'] m.type = "edx" m.course_id = course_id m.school = school m.save() m.materials = materials m.save() if debug: print m
for t in tempset: o = Offering.objects.filter(ccn=t)[:1] # o looks like a single object but is actually a queryset consisting of one record. # To get the first actual object in it, use o[0] offering = o[0] print print offering # Create a Course record based on that course = Course() course.title = offering.title course.ccn = offering.ccn course.cstring = offering.cstring course.units = offering.units course.type = offering.type course.description = offering.description course.restrictions = offering.restrictions course.save() # Programs is many to many. Loop through and re-create for p in offering.programs.all(): course.programs.add(p) # title = models.CharField(max_length=384) # ccn = models.CharField('CCN',max_length=384, blank=True) # cstring = models.ForeignKey(Cstring,verbose_name='Course String',help_text="e.g. J-200, but without the J") # units = models.IntegerField(choices=constants.UNIT_TYPE_CHOICES) # type = models.IntegerField(choices=constants.COURSE_TYPE_CHOICES) # description = models.TextField() # restrictions = models.TextField(null=True,blank=True)