Beispiel #1
0
def parse_catalog(a=False):
    courses = {}
    url = "catalog.rpi.edu"
    ids = get_catalogs(load_page(url))
    if a:
        catalogs = len(ids)
    else:
        catalogs = 1
    for i in range(catalogs):
        catalog_url = url + "/index.php?catoid=" + ids[i]
        link_id = get_courses_link_id(load_page(catalog_url))
        courses_url = url + "/content.php?catoid=" + ids[
            i] + "&navoid=" + link_id

        # parse need to parse out the coid (course id) from each department list of courses
        # then use it in the url: http://catalog.rpi.edu/preview_course.php?catoid=<id>&navoid<link_id>&coid=<course>
        # this will bring up the course descriptions and info and only the info for that course.
        for e in DEPARTMENTS.keys():
            print "parsing", e
            course_id = get_course_ids(
                load_page(courses_url, "filter[27]=" + e))
            for c in range(0, len(course_id)):
                detail_url = url + "/preview_course.php?catoid=" + ids[
                    i] + "&coid=" + course_id[c]
                temp = get_course_detail(load_page(detail_url))
                key = temp['department'] + temp['num']
                if (key not in courses
                        or temp['description'].strip() != '') and re.search(
                            'Topics in', temp['title']) == None:
                    courses[key] = temp
    return courses
Beispiel #2
0
def parse_catalog(a=False):
    courses = {}
    url = "catalog.rpi.edu"
    ids = get_catalogs(load_page(url))
    if a:
        catalogs = len(ids)
    else:
        catalogs = 1
    for i in range(catalogs):
        catalog_url = url + "/index.php?catoid=" + ids[i]
        link_id = get_courses_link_id(load_page(catalog_url))
        courses_url = url + "/content.php?catoid=" + ids[i] + "&navoid=" + link_id

        # parse need to parse out the coid (course id) from each department list of courses
        # then use it in the url: http://catalog.rpi.edu/preview_course.php?catoid=<id>&navoid<link_id>&coid=<course>
        # this will bring up the course descriptions and info and only the info for that course.
        for e in DEPARTMENTS.keys():
            print "parsing", e
            course_id = get_course_ids(load_page(courses_url, "filter[27]=" + e))
            for c in range(0, len(course_id)):
                detail_url = url + "/preview_course.php?catoid=" + ids[i] + "&coid=" + course_id[c]
                temp = get_course_detail(load_page(detail_url))
                if temp:
                    key = temp['department'] + temp['num']
                    if (key not in courses or temp['description'].strip() != '') and re.search('Topics in', temp['title']) is None:
                        courses[key] = temp
    return courses