def get_all_ctecs(subject, caesar_scraper=None): logging.debug('Starting %s' % subject) caesar_scraper = caesar_scraper or CaesarScraper() for i, current_class_title in caesar_scraper.get_courses(subject): logging.debug("Starting %s %s %s" % (subject, current_class_title, i)) for j, quarter in caesar_scraper.get_ctecs(subject, i): ctec = caesar_scraper.get_ctec(subject, j) if ctec == {}: logging.error("Could not download %s %s %s %s %s" % (subject, current_catalog_num, quarter, i, j)) continue current_catalog_num = current_class_title.split(":")[0] original_catalog_num = "-".join(ctec['class_title'].split()[0].split("-")[0:2]) ctec['current_class_title'] = current_class_title section = ctec['class_title'].split()[0].split("-")[2] subj = ctec['subj'].split()[0] courses_query = courses.find({"term": quarter, "catalog_num": original_catalog_num, "subject": subj, "section": section}) if courses_query.count() > 1: courses_query = courses.find({"term": quarter, "catalog_num": original_catalog_num, "subject": subj, "section": section, "instructor.name": {"$regex" : ".*".join(ctec['instructor'].split())}}) if courses_query.count() > 1: logging.error("%s too many courses found for %s %s %s %s %s" % (j, quarter, original_catalog_num, subj, section, ctec['instructor'])) elif courses_query.count() == 1: logging.error("%s no courses found for %s %s %s %s %s" % (j, quarter, original_catalog_num, subj, section, ctec['instructor'])) elif courses_query.count() == 0: logging.error("%s no courses found for %s %s %s %s" % (j, quarter, original_catalog_num, subj, section)) if courses_query.count() == 1: course = list(courses_query)[0] ctec['_id'] = course['_id'] ctecs.save(ctec) logging.debug("Saved %s %s %s %s %s" % (subject, current_catalog_num, quarter, i, j)) # BUG, if the course starts with 300, CTEC thinks its part of the graduate school # for some subjects such as EECS if original_catalog_num[0] == "3" or subj != subject: caesar_scraper.post_doc(caesar_scraper.CTEC_URL, data={"ICAction": "NW_CT_PB_SRCH_ACAD_CAREER", "NW_CT_PB_SRCH_ACAD_CAREER": "UGRD", "NW_CT_PB_SRCH_SUBJECT": subject, "NW_CT_PB_SRCH_NW_CTEC_SRCH_CHOIC$4$": "C"}) caesar_scraper.post_doc(caesar_scraper.CTEC_URL, data={"ICAction": "NW_CT_PB_SRCH_SUBJECT", "NW_CT_PB_SRCH_ACAD_CAREER": "UGRD", "NW_CT_PB_SRCH_SUBJECT": subject, "NW_CT_PB_SRCH_NW_CTEC_SRCH_CHOIC$4$": "C"}) caesar_scraper.get_courses(subject) caesar_scraper.get_ctecs(subject, i) # sometimes after getting a single ctec, we need to get the courses AND ctecs again # because it routes back to the original search page elif subj == "AAL" or subj == "AF_AM_ST": caesar_scraper.get_ctecs(subject, i) # sometimes after getting a single ctec, we need to get ctecs again # because it routes back to the list of courses page caesar_scraper.get_courses(subject) # after getting all the ctecs for a single course, we need to get_courses again logging.debug('Finished %s' % subject)
def get_all_ctecs(subject, caesar_scraper=None): logging.debug('Starting %s' % subject) caesar_scraper = caesar_scraper or CaesarScraper() for i, current_class_title in caesar_scraper.get_courses(subject): logging.debug("Starting %s %s %s" % (subject, current_class_title, i)) for j, quarter in caesar_scraper.get_ctecs(subject, i): ctec = caesar_scraper.get_ctec(subject, j) if ctec == {}: logging.error("Could not download %s %s %s %s %s" % (subject, current_catalog_num, quarter, i, j)) continue current_catalog_num = current_class_title.split(":")[0] original_catalog_num = "-".join( ctec['class_title'].split()[0].split("-")[0:2]) ctec['current_class_title'] = current_class_title section = ctec['class_title'].split()[0].split("-")[2] subj = ctec['subj'].split()[0] courses_query = courses.find({ "term": quarter, "catalog_num": original_catalog_num, "subject": subj, "section": section }) if courses_query.count() > 1: courses_query = courses.find({ "term": quarter, "catalog_num": original_catalog_num, "subject": subj, "section": section, "instructor.name": { "$regex": ".*".join(ctec['instructor'].split()) } }) if courses_query.count() > 1: logging.error( "%s too many courses found for %s %s %s %s %s" % (j, quarter, original_catalog_num, subj, section, ctec['instructor'])) elif courses_query.count() == 1: logging.error("%s no courses found for %s %s %s %s %s" % (j, quarter, original_catalog_num, subj, section, ctec['instructor'])) elif courses_query.count() == 0: logging.error( "%s no courses found for %s %s %s %s" % (j, quarter, original_catalog_num, subj, section)) if courses_query.count() == 1: course = list(courses_query)[0] ctec['_id'] = course['_id'] ctecs.save(ctec) logging.debug("Saved %s %s %s %s %s" % (subject, current_catalog_num, quarter, i, j)) # BUG, if the course starts with 300, CTEC thinks its part of the graduate school # for some subjects such as EECS if original_catalog_num[0] == "3" or subj != subject: caesar_scraper.post_doc( caesar_scraper.CTEC_URL, data={ "ICAction": "NW_CT_PB_SRCH_ACAD_CAREER", "NW_CT_PB_SRCH_ACAD_CAREER": "UGRD", "NW_CT_PB_SRCH_SUBJECT": subject, "NW_CT_PB_SRCH_NW_CTEC_SRCH_CHOIC$4$": "C" }) caesar_scraper.post_doc( caesar_scraper.CTEC_URL, data={ "ICAction": "NW_CT_PB_SRCH_SUBJECT", "NW_CT_PB_SRCH_ACAD_CAREER": "UGRD", "NW_CT_PB_SRCH_SUBJECT": subject, "NW_CT_PB_SRCH_NW_CTEC_SRCH_CHOIC$4$": "C" }) caesar_scraper.get_courses(subject) caesar_scraper.get_ctecs(subject, i) # sometimes after getting a single ctec, we need to get the courses AND ctecs again # because it routes back to the original search page elif subj == "AAL" or subj == "AF_AM_ST": caesar_scraper.get_ctecs(subject, i) # sometimes after getting a single ctec, we need to get ctecs again # because it routes back to the list of courses page caesar_scraper.get_courses(subject) # after getting all the ctecs for a single course, we need to get_courses again logging.debug('Finished %s' % subject)
from models import courses import csv course_fieldnames = [ "id", "term", "year", "quarter", "course_id", "class_num", "school", "subject", "catalog_num", "section", "title", "instructor", "start_time", "end_time", "meeting_days" ] fieldnames = course_fieldnames with open("courses.csv", "w") as f: writer = csv.DictWriter(f, fieldnames=fieldnames) writer.writeheader() for course in courses.find({"term": "2015 Spring"}): course = dict(course) course['id'] = course.pop('_id') course['year'] = course['term'].split()[0] course['quarter'] = course['term'].split()[1] course['instructor'] = course['instructor']['name'] writer.writerow( {k: v for k, v in course.iteritems() if k in fieldnames})
from models import courses, ctecs for course in courses.find({ "school": "WCAS", "subject": "ECON", "catalog_num": "281-0", "instructor.name": { "$regex": ".*Walker.*" } }): print course['term'], course['instructor']['name'] ctec = ctecs.find_one({"_id": course['_id']}) if ctec: for statement in ctec['essay'].split("/"): print statement print "-------------------------"
from models import courses, ctecs for course in courses.find({ "school": "WCAS", "subject": "ECON", "catalog_num": "281-0", "instructor.name": {"$regex" : ".*Walker.*"} }): print course['term'], course['instructor']['name'] ctec = ctecs.find_one({"_id": course['_id']}) if ctec: for statement in ctec['essay'].split("/"): print statement print "-------------------------"