コード例 #1
0
 def fill_in_cid_and_n_seq_dict_from_courseras_webrootpage(self):
     """
 The url-stock comes directly from a text file.
 This text file, in turn, is produced by CourseraWebRootPageScraperMod.py
   ie, they are the courses listed in Coursera's Web Root Course Index Page
 """
     # tuplestextfile = os.path.join(ls.get_coursera_app_data_dir_abspath(), 'Coursera tuples courseid and seq.txt')
     tuplestextfile = ls.get_default_textfile_with_extracted_ids_and_nseqs_from_coursera_webrootpage_abspath()
     tuplelines = open(tuplestextfile).read()
     lines = tuplelines.split("\n")
     for line in lines:
         if line.startswith("#"):
             continue
         try:
             pp = line.split(",")
             course_id = pp[0]
             if course_id in WorkCourse.finished_course_ids_list:
                 print "Course_id", course_id, "is finished. Continuing next."
                 continue
             n_seq = int(pp[1])
             if n_seq == 0:
                 continue
         except IndexError:
             continue
         except ValueError:
             continue
         self.cid_and_n_seq_dict_from_courseras_webrootpage[course_id] = n_seq
     print "Total courses found at coursera's webrootpage:", len(self.cid_and_n_seq_dict_from_courseras_webrootpage)
コード例 #2
0
 def write_to_txtfile_current_stocked_coursera_items(self, txt_filename=None):
   '''
     Write stocked course items to a txt file source
   '''
   n_items = len(self.course_tuple_list)
   if n_items == 0:
     return
   textfile_with_ids_and_nseqs_abspath = ls.get_default_textfile_with_extracted_ids_and_nseqs_from_coursera_webrootpage_abspath()
   print 'Writing %d lines to %s' %(n_items, textfile_with_ids_and_nseqs_abspath)
   fileobj = open(textfile_with_ids_and_nseqs_abspath, 'w')
   for tuple_item in self.course_tuple_list:
     # the 1st tuple element is course_id, the second is an object with at least attributes course_id and course_n_seq 
     course_item_obj = tuple_item[1]
     line = '%(course_id)s,%(course_n_seq)s' %{'course_id':course_item_obj.cid, 'course_n_seq':course_item_obj.n_seq}
     fileobj.write(line + '\n')
   fileobj.close()
コード例 #3
0
def process():
  extractor = CourseraWebRootCourseExtractor()
  print 'Extracting courses from webroot, please wait.' 
  extractor.restart_items_by_reading_htmlwebroot_source()  
  print 'Writing found courses to', ls.get_default_textfile_with_extracted_ids_and_nseqs_from_coursera_webrootpage_abspath() 
  extractor.write_to_txtfile_current_stocked_coursera_items()