Example #1
0
def scrape_course(requester, output_dir, course_id, year, term):
    """
    Scrapes all data for a particluar course, including ratings, comments,
    and instructor ratings.
    """
    # This course is weird. Let's skip it.
    if course_id in (44050,):
        print('There is some weird shit going on with course {}'.format(course_id))
        return

    c = Course(course_id=course_id, year=year, term=term)
    base_url = '/course_evaluation_reports/fas/course_summary.html'
    url = '{base_url}?course_id={course_id}'.format(base_url=base_url,
                                                    course_id=course_id)

    soup = requester.make_request(url)

    # Get course name, department, enrollment, etc.
    if soup.h1 is None:
        print('No data for course {}'.format(course_id))
        return

    title = soup.h1.text
    colon_loc = title.find(':')
    c.department, c.course_code = title[:colon_loc].split()
    c.course_name = title[colon_loc + 2:]

    stats = soup.select('#summaryStats')[0].text.split()
    c.enrollment = int(stats[1])
    c.evaluations = int(stats[3])

    # Get course ratings
    graph_reports = soup.select('.graphReport')
    if not graph_reports:
        print('No data for course {}'.format(course_id))
        return

    c.ratings = []
    for graph_report in graph_reports[:-1]:
        c.ratings += scrape_ratings(graph_report)

    # Get reasons for why people signed up
    c.reasons = scrape_reasons(graph_reports[-1])

    c.instructors = scrape_instuctors(requester, course_id)
    c.questions = scrape_questions(requester, course_id)

    c.validate()
    filename = os.path.join(output_dir, '{}.json'.format(c.course_id))
    with open(filename, 'w') as f:
        json.dump(c.to_json_dict(), f, indent=3)
Example #2
0
 def _get_or_create_course(self, course, department, instructors, attributes, restrictions, section):
     course_record = Course.query.filter_by(name=course.course_name).first()
     if not course_record:
         course_record = Course(name=course.course_name, subject=course.subject, subject_level=course.course_number)
         #print "Adding Course: \"%s-%s\"" % (course.subject, course.course_number)
         self.num_new_courses += 1
     if course.course_name not in self.course_names:
         self.num_courses += 1
         self.course_names[course.course_name] = True
     course_record.department = department
     course_record.attributes = attributes
     course_record.restrictions = restrictions
     course_record.sections.append(section)
     db.session.add(course_record)
     #db.session.commit()
     return course_record