def test_clean_physics(): parser = CourseParser() cleaner = CourseCleaner() classes = parser.parse_file(os.path.join(RESOURCE_DIR, "WI20_PHYS.html"), "PHYS") classes = cleaner.process_department("PHYS", classes) phys = [ c for c in classes if c.course_num == "2A" and c.section_id == "PHYS2A$0" ] assert len(phys) != 0 check_classes([{ "days": "M" }, { "days": "W", "times": "12:00-12:50" }, { "days": "F", "times": "12:00-12:50" }, { "days": "F", "times": "08:00-08:50" }, { "days": "Tu" }], phys)
def test_normal_clean(): parser = CourseParser() cleaner = CourseCleaner() classes = parser.parse_file(os.path.join(RESOURCE_DIR, "WI20_CSE.html"), "CSE") classes = cleaner.process_department("CSE", classes) cse = set([c.course_num for c in classes]) assert len(cse) == 7
def test_continue_with_bad_date_format(): parser = CourseParser() cleaner = CourseCleaner() classes = parser.parse_file( os.path.join(RESOURCE_DIR, "SP20_CSE_bad_date_format.html"), "CSE") classes = cleaner.process_department("CSE", classes) assert len([c for c in classes if c.course_num == "276D"]) != 0
def test_cancelled_classes(): parser = CourseParser() cleaner = CourseCleaner() classes = parser.parse_file( os.path.join(RESOURCE_DIR, "SP20_MUS_cancelled.html"), "MUS") classes = cleaner.process_department("MUS", classes) mus = [c for c in classes if c.course_num == "107"] assert len(mus) == 0
def test_phys_page_many_discussions(): parser = CourseParser() classes = parser.parse_file(os.path.join(RESOURCE_DIR, "WI20_PHYS.html"), "PHYS") phys_di = [c for c in classes if c.course_num == "2A" and c.section_type == "DI"] # There are two CSE 100 classes assert len(phys_di) == 12 for i in range(12): cls = phys_di[i] assert cls.course_id == "9934{:02d}".format(i + 9) assert cls.units == "4"
def test_units_assigned(): parser = CourseParser() classes = parser.parse_file(os.path.join(RESOURCE_DIR, "WI20_PHYS.html"), "PHYS") phys = [c for c in classes if c.course_num == "1C"] for cls in phys: assert cls.units == "3" classes = parser.parse_file(os.path.join(RESOURCE_DIR, "WI20_AIP.html"), "AIP") aip = [c for c in classes if c.course_num == "197"] for cls in aip: assert cls.units == "2/12 by 2"
def main(): execution_times = {} def record_execution_time(subroutine, label): timestamp = time.time() ret = subroutine() execution_times[label] = '{0:.3f} minutes'.format( (time.time() - timestamp) / 60) return ret # department_scraper = DepartmentScraper() # record_execution_time(department_scraper.scrape, 'Department Scraping') # # course_scraper = CourseScraper() # record_execution_time(course_scraper.scrape, 'Course Scraping {} '.format(QUARTERS_TO_SCRAPE)) parser = CourseParser() parsed_data = record_execution_time( parser.parse, 'Course Parsing {}'.format(QUARTERS_TO_SCRAPE)) cleaner = CourseCleaner() record_execution_time(partial(cleaner.clean, parsed_data), 'Cleaning') #record_execution_time(export_to_mysql, 'MySQL Exporting') pprint.pprint(execution_times)
def test_phys_page_no_course_num(): parser = CourseParser() classes = parser.parse_file(os.path.join(RESOURCE_DIR, "WI20_PHYS.html"), "PHYS") phys_le = [c for c in classes if c.course_num == "2A" and c.section_type == "LE"] # There are two CSE 100 classes assert len(phys_le) == 2 first, second = phys_le[0], phys_le[1] assert isinstance(first, ClassRow) and isinstance(second, ClassRow) assert first.course_id is '' and second.course_id is '' assert first.days == "MWF" assert first.times == "12:00p-12:50p" assert second.days == "F" assert second.times == "8:00a-8:50a"
def test_normal_clean_data(): parser = CourseParser() cleaner = CourseCleaner() classes = parser.parse_file(os.path.join(RESOURCE_DIR, "WI20_CSE.html"), "CSE") classes = cleaner.process_department("CSE", classes) cse = [c for c in classes if c.course_num == "100"] check_classes([{ "days": "M", "section_type": "LE", "instructor": "Cao, Yingjun" }, { "days": "M", "section_type": "DI" }, { "days": "W", "section_type": "LE" }, { "days": "F", "section_type": "LE" }], cse)
def test_basic_page(): parser = CourseParser() classes = parser.parse_file(os.path.join(RESOURCE_DIR, "WI20_CSE.html"), "CSE") cse100 = [c for c in classes if c.course_num == "100" and c.section_type == "DI"] # There are two CSE 100 classes assert len(cse100) == 2 first, second = cse100[0], cse100[1] assert isinstance(first, ClassRow) and isinstance(second, ClassRow) assert first.section_id is None and second.section_id is None assert second.course_id == "995097" assert first.department == "CSE" assert first.course_id == "995095" assert first.course_num == "100" assert first.instructor == "Cao, Yingjun" assert first.section_type == "DI" assert first.days == "M" assert first.times == "5:00p-5:50p" assert first.location == "SOLIS" assert first.room == "107"
def test_page_all_same_department(): parser = CourseParser() classes = parser.parse_file(os.path.join(RESOURCE_DIR, "WI20_CSE.html"), "CSE") cse_classes = [c for c in classes if c.department == "CSE"] assert len(classes) == len(cse_classes)