def make_test_course(): course = CourseraCourse() course.cid = 'introstats2' course.n_seq = 1 # '001' course.title = 'Introduction to Statistics' course.description = 'Introduction to Statistics is nice course!' course.start_date = datetime.date(2013, 4, 5) course.duration_in_weeks = 8 # course.workload_in_hours_per_day = 3 course.workload_in_hours_per_week = 3 institution = Institution() institution.id = 10 institution.name = 'Harvard Univ.' course.institutions = [institution] professor = Instructor() professor.id = 10 professor.name = 'John Joey' professor.institution = institution course.instructors = [professor] category = Category() category.id = 10 category.name = 'Mathematics & Statistics' course.categories = [category] print 'course', course print 'Instructors', course.instructors.values() return course
def process_element(element): elements_l1 = element.getchildren() # first element of incoming element <div> is expected to be <h3> print 'len(elements_l1)', len(elements_l1) h3 = elements_l1[0] # first element is expected to be <h3>, if not, return if h3.tag != 'h3': return None print 'Got <h3>', h3.getchildren(), 'l1',elements_l1[1].tag, elements_l1[2].tag, elements_l1[3].tag elements_l2 = h3.getchildren() a = elements_l2[0] # first element of h3 is expected to be <a>, if not, return if a.tag != 'a': return None url = a.get('href') course_id = url.split('/')[-1] print 'course_id', course_id if not is_course_id_good(course_id): return None # "coursera-course-listing-more coursera-course-my-listing-more" university = Institution() try: div_that_has_university_info = elements_l1[3] inner_a = div_that_has_university_info.getchildren()[0] print 'university_class_node.text', inner_a.text university.name = inner_a.text except IndexError: pass course = Course() course.course_id = course_id course.title = a.text course.university = university return course
def save_courses_subset_to_db(self): for i, course_subset in enumerate(self.courses_subset): #if course_subset.university == None: #continue try: print str(i+1).zfill(3), 'Saving to db', course_subset except UnicodeEncodeError: print str(i+1).zfill(3), 'Saving to db' course = CourseraCourse() course.cid = course_subset.cid course.n_seq = course_subset.get_n_seq() course.title = course_subset.title if course_subset.start_date != None: course.start_date = course_subset.start_date if course_subset.duration_in_weeks != None: course.duration_in_weeks = course_subset.duration_in_weeks #course.save() if course_subset.university != None: university_name = course_subset.university try: institution = Institution.objects.get(name=university_name) except Institution.DoesNotExist: institution = Institution() institution.name = university_name #institution.courseracourse_id = course.cid institution.save() print 'institution id', institution.id #CourseraCourse.objects.create(name=university_name) except AttributeError, e: print 'university_name', university_name print e #sys.exit(0) pass #course.in #course.institutions.add(institution.id) course.save()