def test5(): courses = [] coursera_root_page_filepath = ls.get_default_coursera_stocked_root_webpage_osfilepath() html_text = open(coursera_root_page_filepath).read() root = etree.HTML(html_text) elements = root.findall(".//div[@class]") for element in elements: classname = element.get('class') if classname == 'coursera-course-listing-main': course_obj = process_element(element) if course_obj != None: courses.append(course_obj) #sys.exit(0) grabber = CourseraRootCourseGrabber() grabber.restart_items_by_reading_htmlwebroot_source() for course_id in grabber.unique_course_id_dict.keys(): msg = '%s seq = %s' %(course_id, grabber.unique_course_id_dict[course_id].course_n_seq) logging.info(msg) logging.info('**********************************') for i, course_obj in enumerate(courses): if course_obj.course_id in grabber.unique_course_id_dict.keys(): course_there = grabber.unique_course_id_dict[course_obj.course_id] course_obj.course_n_seq = course_there.course_n_seq msg = '****EQUAL **** %s :: seq = %s' %(course_obj.course_id, course_there.course_n_seq) logging.info(msg) print i, course_obj.course_id logging.info(course_obj.course_id) print 'title', course_obj.title print 'nseq', course_obj.course_n_seq
def test5(): courses = [] coursera_root_page_filepath = ls.get_default_coursera_stocked_root_webpage_osfilepath() html_text = open(coursera_root_page_filepath).read() root = etree.HTML(html_text) elements = root.findall(".//div[@class]") for element in elements: classname = element.get('class') if classname == 'coursera-course-listing-main': course_obj = process_element(element) if course_obj != None: courses.append(course_obj) sys.exit(0) for i, course_obj in enumerate(courses): print i, course_obj.course_id print course_obj.title