def make_test_course():

  course = CourseraCourse()
  course.cid = 'introstats2'
  course.n_seq = 1 # '001'

  course.title = 'Introduction to Statistics'
  course.description = 'Introduction to Statistics is nice course!'

  course.start_date = datetime.date(2013, 4, 5)
  course.duration_in_weeks = 8
  # course.workload_in_hours_per_day = 3
  course.workload_in_hours_per_week = 3

  institution = Institution()
  institution.id = 10
  institution.name = 'Harvard Univ.'
  course.institutions = [institution] 

  professor =  Instructor()
  professor.id = 10
  professor.name = 'John Joey'
  professor.institution = institution
  course.instructors = [professor]

  category =  Category()
  category.id = 10
  category.name = 'Mathematics & Statistics'
  course.categories = [category]
  
  print 'course', course
  print 'Instructors', course.instructors.values()

  return course  
def process_element(element):
  elements_l1 = element.getchildren()
  # first element of incoming element <div> is expected to be <h3>
  print 'len(elements_l1)', len(elements_l1)
  h3 = elements_l1[0]
  # first element is expected to be <h3>, if not, return
  if h3.tag != 'h3':
    return None
  print 'Got <h3>', h3.getchildren(), 'l1',elements_l1[1].tag, elements_l1[2].tag, elements_l1[3].tag 
  elements_l2 = h3.getchildren()
  a = elements_l2[0]
  # first element of h3 is expected to be <a>, if not, return
  if a.tag != 'a':
    return None
  url   = a.get('href')
  course_id = url.split('/')[-1]
  print 'course_id', course_id 
  if not is_course_id_good(course_id):
    return None
  # "coursera-course-listing-more coursera-course-my-listing-more"
  university = Institution()
  try:
    div_that_has_university_info = elements_l1[3]
    inner_a = div_that_has_university_info.getchildren()[0]
    print 'university_class_node.text', inner_a.text
    university.name = inner_a.text
  except IndexError:
    pass
  course = Course()
  course.course_id = course_id
  course.title = a.text
  course.university = university 
  return course
 def save_courses_subset_to_db(self):
   for i, course_subset in enumerate(self.courses_subset):
     #if course_subset.university == None:
       #continue
     try:
       print str(i+1).zfill(3), 'Saving to db', course_subset
     except UnicodeEncodeError:
       print str(i+1).zfill(3), 'Saving to db'
     course = CourseraCourse()
     course.cid = course_subset.cid
     course.n_seq = course_subset.get_n_seq()
     course.title = course_subset.title
     if course_subset.start_date != None:
       course.start_date = course_subset.start_date
     if course_subset.duration_in_weeks != None:
       course.duration_in_weeks = course_subset.duration_in_weeks
     #course.save()
     if course_subset.university != None:
       university_name = course_subset.university
       try:
         institution = Institution.objects.get(name=university_name)
       except Institution.DoesNotExist:
         institution = Institution()
         institution.name = university_name
         #institution.courseracourse_id = course.cid
         institution.save()
         print 'institution id', institution.id
         #CourseraCourse.objects.create(name=university_name)
       except AttributeError, e:
         print 'university_name', university_name
         print e
         #sys.exit(0)
         pass
       #course.in
       #course.institutions.add(institution.id)
     course.save()