def scrape_course(requester, output_dir, course_id, year, term): """ Scrapes all data for a particluar course, including ratings, comments, and instructor ratings. """ # This course is weird. Let's skip it. if course_id in (44050,): print('There is some weird shit going on with course {}'.format(course_id)) return c = Course(course_id=course_id, year=year, term=term) base_url = '/course_evaluation_reports/fas/course_summary.html' url = '{base_url}?course_id={course_id}'.format(base_url=base_url, course_id=course_id) soup = requester.make_request(url) # Get course name, department, enrollment, etc. if soup.h1 is None: print('No data for course {}'.format(course_id)) return title = soup.h1.text colon_loc = title.find(':') c.department, c.course_code = title[:colon_loc].split() c.course_name = title[colon_loc + 2:] stats = soup.select('#summaryStats')[0].text.split() c.enrollment = int(stats[1]) c.evaluations = int(stats[3]) # Get course ratings graph_reports = soup.select('.graphReport') if not graph_reports: print('No data for course {}'.format(course_id)) return c.ratings = [] for graph_report in graph_reports[:-1]: c.ratings += scrape_ratings(graph_report) # Get reasons for why people signed up c.reasons = scrape_reasons(graph_reports[-1]) c.instructors = scrape_instuctors(requester, course_id) c.questions = scrape_questions(requester, course_id) c.validate() filename = os.path.join(output_dir, '{}.json'.format(c.course_id)) with open(filename, 'w') as f: json.dump(c.to_json_dict(), f, indent=3)
def _get_or_create_course(self, course, department, instructors, attributes, restrictions, section): course_record = Course.query.filter_by(name=course.course_name).first() if not course_record: course_record = Course(name=course.course_name, subject=course.subject, subject_level=course.course_number) #print "Adding Course: \"%s-%s\"" % (course.subject, course.course_number) self.num_new_courses += 1 if course.course_name not in self.course_names: self.num_courses += 1 self.course_names[course.course_name] = True course_record.department = department course_record.attributes = attributes course_record.restrictions = restrictions course_record.sections.append(section) db.session.add(course_record) #db.session.commit() return course_record