def transfer_reviews_from_prof_helper(self, delete_prof): real_prof = m.Professor(id='real_prof') dupe_prof = m.Professor(id='dupe_prof', first_name='first', last_name='last') dupe_prof.save() NUM_DUPE_PROF_REVIEWS = 5 for i in range(NUM_DUPE_PROF_REVIEWS): m.MenloCourse(professor_id=dupe_prof.id, course_id='cs135').save() self.assertEqual( m.MenloCourse.objects(professor_id=real_prof.id).count(), 0) self.assertEqual( m.MenloCourse.objects(professor_id=dupe_prof.id).count(), NUM_DUPE_PROF_REVIEWS) real_prof.transfer_reviews_from_prof(dupe_prof, delete_prof) self.assertEqual( m.MenloCourse.objects(professor_id=real_prof.id).count(), NUM_DUPE_PROF_REVIEWS) self.assertEqual( m.MenloCourse.objects(professor_id=dupe_prof.id).count(), 0) self.assertEqual( m.Professor.objects(id=dupe_prof.id).count(), 0 if delete_prof else 1) #clean up m.Professor.objects.delete() m.MenloCourse.objects.delete()
def import_professors(): # NOTE: not safe to drop table anymore since users can add their own # professors now def clean_professor(professor): def clean_name(name): return re.sub(r'\s+', ' ', name.strip()) prof_name = get_prof_name(professor['prof_name']) return { 'first_name': clean_name(prof_name['first_name']), 'last_name': clean_name(prof_name['last_name']), } file_names = glob.glob( os.path.join(os.path.dirname(__file__), c.REVIEWS_DATA_DIR, '*.txt')) for file_name in file_names: with open(file_name, 'r') as f: data = json.load(f) professor = clean_professor(data) # Since user's can now add professors, gotta first check # that the professor does not aleady exist if not m.Professor.objects(**professor): m.Professor(**professor).save() print 'imported professors:', m.Professor.objects.count()
def _opendata_to_section_meeting(data, term_year): """Converts OpenData class section info to a SectionMeeting instance. Args: data: An object from the `classes` field returned by OpenData. term_year: The year this term is in. """ date = data['date'] days = [] if date['weekdays']: days = re.findall(r'[A-Z][a-z]?', date['weekdays'].replace('U', 'Su')) # TODO(david): Actually use the term begin/end dates when we get nulls date_format = '%m/%d' start_date = datetime.strptime(date['start_date'], date_format).replace( year=term_year) if date['start_date'] else None end_date = datetime.strptime(date['end_date'], date_format).replace( year=term_year) if date['end_date'] else None time_format = '%H:%M' # TODO(david): DRY-up start_seconds = None if date['start_time']: start_time = datetime.strptime(date['start_time'], time_format) start_seconds = ( start_time - start_time.replace(hour=0, minute=0, second=0)).seconds end_seconds = None if date['end_time']: end_time = datetime.strptime(date['end_time'], time_format) end_seconds = (end_time - end_time.replace(hour=0, minute=0, second=0)).seconds meeting = m.SectionMeeting( start_seconds=start_seconds, end_seconds=end_seconds, days=days, start_date=start_date, end_date=end_date, building=data['location']['building'], room=data['location']['room'], is_tba=date['is_tba'], is_cancelled=date['is_cancelled'], is_closed=date['is_closed'], ) if data['instructors']: last_name, first_name = data['instructors'][0].split(',') prof_id = m.Professor.get_id_from_name(first_name, last_name) if not m.Professor.objects.with_id(prof_id): m.Professor(id=prof_id, first_name=first_name, last_name=last_name).save() meeting.prof_id = prof_id return meeting
def upload_schedule(): req = flask.request user = view_helpers.get_current_user() schedule_data = util.json_loads(req.form.get('schedule_data')) processed_items = schedule_data['processed_items'] failed_items = schedule_data['failed_items'] term_name = schedule_data['term_name'] term_id = m.Term.id_from_name(term_name) # FIXME TODO(david): Save these in models and display on schedule #failed_items = schedule_data['failed_items'] rmclogger.log_event( rmclogger.LOG_CATEGORY_API, rmclogger.LOG_EVENT_SCHEDULE, { 'schedule_data': schedule_data, 'term_id': term_id, 'user_id': user.id, }, ) now = datetime.now() user.last_good_schedule_paste = req.form.get('schedule_text') user.last_good_schedule_paste_date = now user.save() # Remove existing schedule items for the user for the given term for usi in m.UserScheduleItem.objects(user_id=user.id, term_id=term_id): usi.delete() for item in processed_items: try: # Create this UserScheduleItem first_name, last_name = m.Professor.guess_names(item['prof_name']) prof_id = m.Professor.get_id_from_name( first_name=first_name, last_name=last_name, ) if first_name and last_name: if not m.Professor.objects.with_id(prof_id): m.Professor( id=prof_id, first_name=first_name, last_name=last_name, ).save() usi = m.UserScheduleItem( user_id=user.id, class_num=item['class_num'], building=item['building'], room=item.get('room'), section_type=item['section_type'].upper(), section_num=item['section_num'], start_date=datetime.utcfromtimestamp(item['start_date']), end_date=datetime.utcfromtimestamp(item['end_date']), course_id=item['course_id'], prof_id=prof_id, term_id=term_id, ) try: usi.save() except me.NotUniqueError as ex: # Likely the case where the user pastes in two or more valid # schedules into the same input box logging.info( 'Duplicate error on UserScheduleItem .save(): %s' % (ex)) # Add this item to the user's course history # FIXME(Sandy): See if we can get program_year_id from Quest # Or just increment their last one user.add_course(usi.course_id, usi.term_id) except KeyError: logging.error("Invalid item in uploaded schedule: %s" % (item)) # Add courses that failed to fully parse, probably due to unavailable times for course_id in set(failed_items): fsi = m.FailedScheduleItem( user_id=user.id, course_id=course_id, parsed_date=now, ) try: fsi.save() except me.NotUniqueError as ex: # This should never happen since we're iterating over a set logging.warn('WTF this should never happen.') logging.warn('Duplicate error FailedScheduleItem.save(): %s' % ex) user.add_course(course_id, term_id) user.schedules_imported += 1 user.save() schedule_screenshot.update_screenshot_async(user) rmclogger.log_event(rmclogger.LOG_CATEGORY_SCHEDULE, rmclogger.LOG_EVENT_UPLOAD, user.id) return ''
def user_course(): uc_data = util.json_loads(flask.request.data) user = view_helpers.get_current_user() rmclogger.log_event( rmclogger.LOG_CATEGORY_API, rmclogger.LOG_EVENT_USER_COURSE, { 'uc_data': uc_data, 'user_id': user.id, }, ) # Validate request object course_id = uc_data.get('course_id') term_id = uc_data.get('term_id') if course_id is None or term_id is None: logging.error("/api/user/course got course_id (%s) and term_id (%s)" % (course_id, term_id)) # TODO(david): Perhaps we should have a request error function that # returns a 400 raise exceptions.ImATeapot('No course_id or term_id set') if not m.UserCourse.can_review(term_id): logging.warning("%s attempted to rate %s in future/shortlist term %s" % (user.id, course_id, term_id)) raise exceptions.ImATeapot( "Can't review a course in the future or shortlist") # Fetch existing UserCourse uc = m.UserCourse.objects(user_id=user.id, course_id=uc_data['course_id'], term_id=uc_data['term_id']).first() if uc is None: logging.error("/api/user/course User course not found for " "user_id=%s course_id=%s term_id=%s" % (user.id, course_id, term_id)) # TODO(david): Perhaps we should have a request error function that # returns a 400 raise exceptions.ImATeapot('No user course found') orig_points = uc.num_points # TODO(Sandy): Consider the case where the user picked a professor and # rates them, but then changes the professor. We need to remove the ratings # from the old prof's aggregated ratings and add them to the new prof's # Maybe create professor if newly added if uc_data.get('new_prof_added'): new_prof_name = uc_data['new_prof_added'] # TODO(mack): should do guess_names first, and use that to # generate the id prof_id = m.Professor.get_id_from_name(new_prof_name) uc.professor_id = prof_id # TODO(Sandy): Have some kind of sanity check for professor names. # Don't allow ridiculousness like "Santa Claus", "aksnlf", # "swear words" if m.Professor.objects(id=prof_id).count() == 0: first_name, last_name = m.Professor.guess_names(new_prof_name) m.Professor( id=prof_id, first_name=first_name, last_name=last_name, ).save() course = m.Course.objects.with_id(uc.course_id) course.professor_ids = list(set(course.professor_ids) | {prof_id}) course.save() logging.info("Added new course professor %s (name: %s)" % (prof_id, new_prof_name)) elif uc_data.get('professor_id'): uc.professor_id = uc_data['professor_id'] else: uc.professor_id = None now = datetime.now() if uc_data.get('course_review'): # New course review data uc_data['course_review']['comment_date'] = now uc.course_review.update(**uc_data['course_review']) if uc_data.get('professor_review'): # New prof review data uc_data['professor_review']['comment_date'] = now uc.professor_review.update(**uc_data['professor_review']) uc.save() points_gained = uc.num_points - orig_points user.award_points(points_gained, view_helpers.get_redis_instance()) user.save() return util.json_dumps({ 'professor_review.comment_date': uc['professor_review']['comment_date'], 'course_review.comment_date': uc['course_review']['comment_date'], 'points_gained': points_gained, })
def import_engineering_critiques(input_file): print 'Begin importing Engineering course critiques' number_courses_imported = 0 number_reviews_imported = 0 line = input_file.readline() while line: data = ast.literal_eval(line) course_id = (data['code'] + data['num']).lower() for critique in data['critiques']: # arch247 and math212 are dumb. # Has 'n/a' or '' for prof, which becomes '/a' or '' after parsing prof_name = critique['prof'] if prof_name == '/a' or prof_name == '': continue # Eg. Morton, Andrew OR Morton, A # FIXME(Sandy): Normalize prof names prof_names = get_prof_names(prof_name) prof = m.Professor(**prof_names) # Note: Manually verified that .save() will not erase existing # fields that are not set on save (ie. ratings) prof.save() professor_id = prof.id season = critique['term'] year = critique['year'] term_id = m.Term.get_id_from_year_season(year, season) # The score index correspond directly to the question numbers # (i.e. arrays are 1-indexed) scores = critique['scores'] def clarity_from_scores(scores): Q1_WEIGHT = 0.2 Q2_WEIGHT = 0.2 Q3_WEIGHT = 0.4 Q4_WEIGHT = 0.2 # CLARITY # presentation in lectures (organization and clarity) c1 = normalize_score(scores[1]) * Q1_WEIGHT c1r = scores[1]['num_replies'] * Q1_WEIGHT # response to questions c2 = normalize_score(scores[2]) * Q2_WEIGHT c2r = scores[2]['num_replies'] * Q2_WEIGHT # oral presentation (audibility, articulation, english) c3 = normalize_score(scores[3]) * Q3_WEIGHT c3r = scores[3]['num_replies'] * Q3_WEIGHT # visual presentation # (organization, legibility, effective use of materials) c4 = normalize_score(scores[4]) * Q4_WEIGHT c4r = scores[4]['num_replies'] * Q4_WEIGHT c_count = int(round(c1r + c2r + c3r + c4r)) c_rating = (c1 + c2 + c3 + c4) / max(1, c_count) return m.AggregateRating(rating=c_rating, count=c_count) def passion_from_scores(scores): # PASSION # attitude towards teachings the course p_count = scores[8]['num_replies'] p_rating = normalize_score(scores[8]) / max(1, p_count) return m.AggregateRating(rating=p_rating, count=p_count) def overall_prof_from_scores(scores): # OVERALL # overall appraisal of quality of teaching op_count = scores[10]['num_replies'] op_rating = normalize_score(scores[10]) / max(1, op_count) return m.AggregateRating(rating=op_rating, count=op_count) def interest_from_scores(scores): # Course directed ratings # INTEREST # TODO(Sandy): Revise the use of this question-metric # how many classes attended i_count = scores[17]['num_replies'] i_rating = normalize_score(scores[17]) / max(1, i_count) return m.AggregateRating(rating=i_rating, count=i_count) def easiness_from_scores(scores): Q11_WEIGHT = 0.5 Q12_WEIGHT = 0.5 # EASINESS # difficulty of concepts e1 = normalize_score(scores[11]) * Q11_WEIGHT e1r = scores[11]['num_replies'] * Q11_WEIGHT # workload e2 = normalize_score(scores[12]) * Q12_WEIGHT e2r = scores[12]['num_replies'] * Q12_WEIGHT e_count = int(round(e1r + e2r)) e_rating = (e1 + e2) / max(1, e_count) return m.AggregateRating(rating=e_rating, count=e_count) def overall_course_from_interest_easiness(i, e): INTEREST_WEIGHT = 0.5 EASINESS_WEIGHT = 0.5 # OVERALL oc_count = int( round(i.count * INTEREST_WEIGHT + e.count * EASINESS_WEIGHT)) oc_rating = (i.rating * INTEREST_WEIGHT + e.rating * EASINESS_WEIGHT) / max(1, oc_count) return m.AggregateRating(rating=oc_rating, count=oc_count) # TODO(Sandy): Try different weightings to see if we can get better data interest = interest_from_scores(scores) easiness = easiness_from_scores(scores) overall_course = overall_course_from_interest_easiness( interest, easiness) clarity = clarity_from_scores(scores) passion = passion_from_scores(scores) overall_prof = overall_prof_from_scores(scores) critique_course = { 'course_id': course_id, 'professor_id': professor_id, 'term_id': term_id, 'interest': interest, 'easiness': easiness, 'overall_course': overall_course, 'clarity': clarity, 'passion': passion, 'overall_prof': overall_prof, } m.CritiqueCourse(**critique_course).save() number_reviews_imported += 1 number_courses_imported += 1 line = input_file.readline() print('imported %d engineering course critiques reviews' % number_reviews_imported) print 'from %d courses' % number_courses_imported print 'totalling %d courses critiques' % m.CritiqueCourse.objects.count() print 'Finished importing Engineering course critiques'