def check_or_add_prof_course_semester(unique_num, prof_course, semester): """ Checks if the provided prof_course_semester exists in the database. If it does, nothing happens. If it doesn't, add the said prof_course_semester :param unique_num: unique number for the prof_course_semester :type unique_num: int :param prof_course: prof_course object for the given relationship :type prof_course: ProfCourse :param semester: semester object for the given relationship :type semester: Semester :return: results of the search as a tuple(number of results, ProfCourseSemester object containing the info) :rtype: tuple(int, ProfCourseSemester) """ prof_course_sem_obj = ProfCourseSemester.query.filter_by( unique_num=unique_num, prof_course_id=prof_course.id, sem_id=semester.id) num_results = len(prof_course_sem_obj.all()) prof_course_sem_obj = prof_course_sem_obj.first() if prof_course_sem_obj is None: logger.debug( f'Adding new prof_course_semester: unique={unique_num}, semester={semester.year} {semester.semester}' ) prof_course_sem_obj = ProfCourseSemester(unique_num=unique_num, prof_course_id=prof_course.id, sem_id=semester.id) db.session.add(prof_course_sem_obj) db.session.commit() return num_results, prof_course_sem_obj
def fetch_html(url, attempt=1): """ Fetch html from the provided url :param url: link to site to fetch :type url: str :param attempt: attempt number for the url :type attempt: int :return: html object pertaining to the url """ logger.debug("fetching: ", url) __max_attempts = 10 try: client = urlopen(url) html = client.read() client.close() except http.client.HTTPException: logger.debug(f"URL Failed: {url}, Attempt Number: {attempt}") if attempt >= __max_attempts: failed_requests.append(url) return None return fetch_html(url, attempt+1) return html
def check_or_add_xlist(x_listings, semester): """ Checks the ScheduledCourse list for an xlist. If it exists, nothing happens. If it doesn't, add the said xlist (CrossListed) :param x_listings: list of unique numbers to search through :type x_listings: list[str] :param semester: semester for the ScheduledCourse to iterate through :type semester: Semester :return: results of the search :rtype: CrossListed """ x_list = None for x_list_str in x_listings: x_course = ScheduledCourse.query.filter_by(unique_no=x_list_str, sem_id=semester.id).first() if x_course is not None and x_course.xlist is not None: x_list = x_course.xlist if x_list is None: x_list = CrossListed.query.filter(~CrossListed.courses.any()).first() logger.debug(f"Using empty CrossListed: {x_list.id}") if x_list is None: logger.debug( f"Adding new CrossListed for semester {semester.year} {semester.semester}" ) x_list = CrossListed() db.session.add(x_list) db.session.commit() return x_list
def refresh_ecis(): """ Set course and prof ecis_avg and ecis_students by iterating through ecis_scores """ logger.info("Refreshing course and professor ecis fields with respective data") query_tuple = (Course.query.all(), Prof.query.all()) # will iterate between Course and Prof since code is identical for queries in query_tuple: for query in queries: if type(query) is Course: logger.debug(f"Refreshing ecis for Course: {query.dept} {query.num}") elif type(query) is Prof: logger.debug(f"Refreshing ecis for Prof: {query.first_name} {query.last_name}") ecis = 0 students = 0 # iterate through ecis scores specific to the course/prof for prof_course in query.prof_course: for prof_course_sem in prof_course.prof_course_sem: for ecis_child in prof_course_sem.ecis: ecis += ecis_child.course_avg * ecis_child.num_students students += ecis_child.num_students # average will be None if there are no students query.ecis_avg = (ecis / students) if students > 0 else None query.ecis_students = students db.session.commit()
def fetch_ftp_files(out_dir): """Downloads ftp files from UT Austin FTP server Args: out_dir (str): directory to download files to """ __url = 'reg-it.austin.utexas.edu' __username = '******' logger.info(f"Downloading FTP data files to {out_dir}") cur_dir = getcwd() ftp = FTP(__url) ftp.login(user=__username) chdir(out_dir) for filename in (filename_current, filename_next, filename_future): logger.debug(f'FTP: downloading {filename}') local_file = open(filename, 'wb') ftp.retrbinary('RETR ' + filename, local_file.write, 1024) local_file.close() ftp.quit() chdir(cur_dir)
def parse_prof_csv(file_path): """ Parse .csv file containing prof data :param file_path: path to prof file :type file_path: str :return: sorted list of prof data :rtype: list(tuple(str, str, str)) """ __key_sem = 'CCYYS' __key_prof_name = 'INSTR_NAME' __key_prof_eid = 'INSTR_EID' logger.info(f'Parsing prof csv file: {file_path}') df = pd.read_csv(file_path) profs = set() for index, row in df.iterrows(): semester, name, eid = row[__key_sem], row[__key_prof_name], row[ __key_prof_eid] try: semester = int(semester) except ValueError: logger.debug( f'Unable to parse semester {semester}. Defaulting to 0...') semester = 0 profs.add((semester, name.lower(), eid.lower())) profs = sorted(list(profs), key=lambda x: x[0]) return profs
def refresh_review_info(): """ Refresh course and prof review metric fields For Course: approval, difficulty, usefulness, workload For Prof: approval, clear, engaging, grading """ query_lst = (Course.query.all(), Prof.query.all()) for queries in query_lst: for query in queries: if type(query) is Course: logger.debug(f"Refreshing review fields for Course: {query.dept} {query.num}") elif type(query) is Prof: logger.debug(f"Refreshing review fields for Prof: {query.first_name} {query.last_name}") # initiate variables query.num_ratings = len(query.reviews) approval = 0 metrics = [0, 0, 0] # iterate through reviews and update metric values for review in query.reviews: approval += int(review.approval) if type(query) is Course: metrics[0] += review.difficulty metrics[1] += review.usefulness metrics[2] += review.workload elif type(query) is Prof: metrics[0] += review.clear metrics[1] += review.engaging metrics[2] += review.grading # do final metric calculation (averages) query.approval = approval / query.num_ratings if query.num_ratings > 0 else None metrics[0] = metrics[0] / query.num_ratings if query.num_ratings > 0 else None metrics[1] = metrics[1] / query.num_ratings if query.num_ratings > 0 else None metrics[2] = metrics[2] / query.num_ratings if query.num_ratings > 0 else None # update query based on type if type(query) is Course: query.difficulty = metrics[0] query.usefulness = metrics[1] query.workload = metrics[2] elif type(query) is Prof: query.difficulty = metrics[0] query.usefulness = metrics[1] query.workload = metrics[2] db.session.commit()
def fetch_prof_course_sem_depts(): """ From the professor course site, fetch lists of the semesters and departments available :return: list of semesters and departments (semesters, departments) :rtype: tuple(list[str], list[str]) """ base_html = fetch_html("https://utdirect.utexas.edu/apps/student/coursedocs/nlogon/") if base_html is None: logger.debug("Failed to fetch prof_course semester and department lists") return None base_soup = BSoup(base_html, "html.parser") sems = parse_prof_course_sems(base_soup)[1:] depts = parse_prof_course_depts(base_soup)[1:] return sems, depts
def get_header_indices(headers, *header_vals): """ Provided a list of headers and some headers, find the indices for each of the second set of headers :param headers: list of headers to search through :type headers: list[str] :param header_vals: list of headers to find indices for :return: a tuple of indices for the headers :rtype: list[int] """ indices = [] for header in header_vals: try: index = headers.index(header) indices.append(index) except ValueError: logger.debug(f"Cannot find index for: {header}") indices.append(None) return tuple(indices)
def populate_dept_info(dept_info): """ Populate department with additional information (college and department name) :param dept_info: list of tuples containing: (abbreviation, department name, college name) :type dept_info: list[tuple(str, str, str) """ logger.info('Populating departments with additional info') for abr, dept, college in dept_info: cur_dept = Dept.query.filter_by(abr=abr).first() if cur_dept is None: logger.debug(f"Cannot find dept {abr}") else: logger.debug(f"Updating dept: {abr} with dept={dept}, college={college}") cur_dept.dept = dept cur_dept.college = college db.session.commit()
def get_course_url(sem="spring", year=2020, dept="", c_num="", c_title="", u_num="", inst_first="", inst_last=""): """ Generate url to site with syllabi/cvs data Also contains data for professor, course, and unique number, separated by semester :param sem: semester to search. Valid values: 'spring', 'summer', 'fall' :type sem: str :param year: year to search :type year: int :param dept: department to search :type dept: str :param c_num: course number to search :type c_num: str :param c_title: course title to search :type c_title: str :param u_num: unique number to search :type u_num: str :param inst_first: instructor first name to search :type inst_first: str :param inst_last: instructor last name to search :type inst_last: str :return: url to site :rtype: str """ if sem == "spring": sem_num = 2 elif sem == "summer": sem_num = 6 elif sem == "fall": sem_num = 9 else: logger.debug(f"Cannot parse semester: {sem}. Defaulting to spring...") sem_num = 2 return ('https://utdirect.utexas.edu/apps/student/coursedocs/nlogon/?' f'semester={year}{sem_num}' f'&department={dept.replace(" ", "+")}' f'&course_number={c_num}' f'&course_title={c_title.replace(" ", "+")}' f'&unique={u_num}' f'&instructor_first={inst_first.replace(" ", "+")}' f'&instructor_last={inst_last.replace(" ", "+")}' '&course_type=In+Residence&search=Search')
def check_or_add_scheduled_course(scheduled_info, course, prof, x_list, semester, add=True): """ Checks the database for the existence of the scheduled_course If it does, nothing happens. If it doesn't, add the said ScheduledCourse :param scheduled_info: object containing parsed scheduled course info :type scheduled_info: ScheduledCourseInfo :param course: model object containing course id related to scheduled course :type course: Course :param prof: model object containing prof id related to scheduled course :type prof: Prof :param x_list: model object containing cross_listed id related to scheduled course :type x_list: CrossListed or None :param semester: model object containing semester id related to scheduled course :type semester: Semester :param add: add the object to database if doesn't exist :type add: bool :return: results of the search as a tuple(number of results, ScheduledCourse object containing the info) :rtype: tuple(int, ScheduledCourse) """ cur_schedule = ScheduledCourse.query.filter_by( unique_no=scheduled_info.unique_no, sem_id=semester.id) num_results = len(cur_schedule.all()) cur_schedule = cur_schedule.first() if cur_schedule is None: logger.debug( f"""Adding new scheduled course. Unique = {scheduled_info.unique_no} semester={repr(semester)} course={repr(course)} prof={repr(prof)}""") cur_schedule = scheduled_info.build_scheduled_course( semester, course, prof, x_list) if add: db.session.add(cur_schedule) db.session.commit() return num_results, cur_schedule
def check_or_add_semester(yr, sem): """ Checks if the provided semester exists in the database. If it does, nothing happens. If it doesn't, add the said semester :param yr: semester year :type yr: int :param sem: semester integer (view utreview's __init__.py to view corresponding integers) :type sem: int :return: results of the search as a tuple(number of results, Semester object containing the info requested) :rtype: tuple(int, Semester) """ sem_obj = Semester.query.filter_by(year=yr, semester=sem) num_results = len(sem_obj.all()) sem_obj = sem_obj.first() if sem_obj is None: logger.debug(f'Adding new Semester: {yr} {sem}') sem_obj = Semester(year=yr, semester=sem) db.session.add(sem_obj) db.session.commit() return num_results, sem_obj
def populate_dept(dept_info, override=False): """ Populate the database with departments :param dept_info: list of tuples with: (abbreviation, name) :type dept_info: tuple(str, str) :param override: override current department with same abbreviation if found in database :type override: bool """ logger.info("Populating database with departments") for abr, name in dept_info: cur_dept = Dept.query.filter_by(abr=abr).first() if cur_dept is None: # add department to database abr = abr.strip() name = name.strip() logger.debug(f"Adding dept {name} ({abr}) to database") dept = Dept(abr=abr, name=name) db.session.add(dept) elif override: # override current department logger.debug(f"Overriding dept {name} ({abr}) to database") cur_dept.abr = abr cur_dept.name = name else: # department already exists and not overriding logger.debug(f"Already exists: dept {name} ({abr})") db.session.commit()
def check_or_add_prof_course(prof, course): """ Checks if the provided prof_course relationship exists in the database. If it does, nothing happens. If it doesn't, add the said prof_course :param prof: professor for the relationship :type prof: Prof :param course: course for the relationship :type course: Course :return: results of the search as a tuple(number of results, Prof_Course object containing the info requested) :rtype: tuple(int, ProfCourse) """ prof_course_obj = ProfCourse.query.filter_by(prof_id=prof.id, course_id=course.id) num_results = len(prof_course_obj.all()) prof_course_obj = prof_course_obj.first() if prof_course_obj is None: logger.debug(f'Adding new prof_course: {prof} {course}') prof_course_obj = ProfCourse(prof_id=prof.id, course_id=course.id) db.session.add(prof_course_obj) db.session.commit() return num_results, prof_course_obj
def check_or_add_prof(first_name, last_name): """ Checks if the provided professor exists in the database. If it does, nothing happens. If it doesn't, add the said professor :param first_name: first name of professor :type first_name: str :param last_name: last name of professor :type last_name: str :return: results of the search as a tuple(number of results, Prof object containing the info requested) :rtype: tuple(int, Prof) """ prof = Prof.query.filter_by(first_name=first_name, last_name=last_name) num_results = len(prof.all()) prof = prof.first() if prof is None: logger.debug(f"Adding new prof: {first_name} {last_name}") prof = Prof(first_name=first_name, last_name=last_name) db.session.add(prof) db.session.commit() return num_results, prof
def parse_ftp(in_dir): """Parse FTP files from the UT Austin FTP server Args: in_dir (str): directory containinig the ftp files """ logger.info(f"Parsing FTP files from {in_dir}") courses = [] for filename in (filename_current, filename_next, filename_future): filepath = join(in_dir, filename) if isfile(filepath): logger.debug(f'FTP: parsing {filename}') with open(filepath) as f: lines = f.readlines() categories, lines = __parse_categories(lines) if categories is not None: for line in lines: # standardizing the lines line = line.lower() data = line.split("\t") data = [d.strip() for d in data] if len(line) > 0 and len(data) >= len(categories): # separating data by category list course = {categories[i]: data[i] for i in range(len(categories))} courses.append(course) else: logger.debug(f'FTP: {filename} does not exist in {in_dir}') return courses
def fetch_sem_values(ftp_dir, out_dir): """ fetch semester values from the FTP data files from the given directory :param ftp_dir: the directory containing the ftp data files :param out_dir: the directory to output a file containing the semester data """ files = (filename_current, filename_next, filename_future) keys = (key_current, key_next, key_future) out_path = join(out_dir, sem_file) sem_dict = {} logger.info(f"Fetching semester values from dir={ftp_dir}, to file={out_path}") for i in range(len(files)): sem = None m_file = files[i] filepath = join(ftp_dir, m_file) if isfile(filepath): with open(filepath, 'r') as f: lines = f.readlines() for line in lines: if __sem_label in line: m = re.search(r'[A-Za-z ]+(\d{5}) (.*)?', line) sem = m.group(1) else: logger.debug(f"Fetch Sem: cannot find file: {m_file} in {ftp_dir}") sem_dict[keys[i]] = sem with open(out_path, 'w') as f: json.dump(sem_dict, f) return out_path
def check_or_add_course(dept, num, title): """ Checks if the provided course exists in the database. If it does, nothing happens. If it doesn't, add the said course :param dept: department of the course :type dept: Dept :param num: course number :type num: str :param title: title of the course :type title: str :return: results of the search as a tuple(number of results, Course object containing the info requested) :rtype: tuple(int, Course) """ course = Course.query.filter_by(dept_id=dept.id, num=num) num_results = len(course.all()) course = course.first() if course is None: logger.debug(f'Adding new course: {dept.abr} {num}') course = Course(dept_id=dept.id, num=num, title=title) db.session.add(course) db.session.commit() return num_results, course
def fetch_prof(query): """ Fetch professor name and eid from UT directory website :param query: professor query to search on site :type query: str :return: name and eid of professor in format: (name, eid) :rtype: tuple(str, str) """ logger.debug(f"Fetching Prof: {query}") __name_tag = "Name" __eid_tag = "UT EID" name = None eid = None # fetch html from link, if None, cannot continue html = fetch_html('https://directory.utexas.edu/index.php?q=' f'{query}' '&scope=faculty%2Fstaff&submit=Search') if html is None: logger.debug("Failed to fetch professor data: html is None") return None, None soup = BSoup(html, "html.parser") # search for data using the html elements surrounding ti prof_info_table = soup.find("table", {"class": "dir_info"}) if prof_info_table is None: logger.debug( "Failed to fetch professor data: professor info table does not exist" ) return None, None prof_info_table = prof_info_table.findAll("tr") prof_info_table = [tr.findAll("td") for tr in prof_info_table] for tr in prof_info_table: if len(tr) < 2: continue tag = tr[0].text.strip() val = tr[1].text.strip() if __name_tag in tag: name = val name.split(",")[0].strip() elif __eid_tag in tag: eid = val return name, eid
def populate_prof(prof_info): """ Populate database with a professor using data fetched from the web :param prof_info: data fetched using fetch_prof from utreview.services.fetch_web :type prof_info: list """ if prof_info is not None and len(prof_info) > 1: first_name, last_name = __parse_prof_name(prof_info[0]) eid = prof_info[1] cur_prof = Prof.query.filter_by(first_name=first_name, last_name=last_name, eid=eid).first() if cur_prof is None: logger.debug(f"Adding professor {first_name} {last_name}") prof = Prof(first_name=first_name, last_name=last_name, eid=eid) db.session.add(prof) db.session.commit() else: logger.debug(f"Professor {first_name} {last_name} already exists") else: logger.debug(f"Invalid input to populate_prof: {prof_info}")
def populate_prof_course(in_file): """ Populate database with Professor and Course relationship using data fetched from the web (utreview.services.fetch_web.fetch_prof_course_info only) :param in_file: file the data was fetched to :type in_file: str """ __sem_fall = "Fall" __sem_spring = "Spring" __sem_summer = "Summer" logger.info(f"Populating database with prof_course info using {in_file}") # creating list of prof-course relationships from the given file prof_courses = [] with open(in_file, 'r') as f: for line in f: prof_courses.append(json.loads(line)) cur_profs = Prof.query.all() # add each prof-course relationship to the database if appropriate for prof_course in prof_courses: # check for existence of professor -> add if does not exist prof_name = [name.strip() for name in prof_course[KEY_PROF].lower().split(",")] last, first = prof_name[0].strip(), prof_name[1].strip() last_words = [word.strip() for word in last.split(' ') if len(word.strip()) > 0] first_words = [word.strip() for word in first.split(' ') if len(word.strip()) > 0] target_prof = None for cur_prof in cur_profs: found = True cur_last, cur_first = cur_prof.last_name.lower(), cur_prof.first_name.lower() cur_last_words = [word.strip() for word in cur_last.split(' ') if len(word.strip()) > 0] cur_first_words = [word.strip() for word in cur_first.split(' ') if len(word.strip()) > 0] for word in last_words: if word not in cur_last_words: found = False break if found: for word in first_words: if word not in cur_first_words: found = False break if found: target_prof = cur_prof break if target_prof is None: logger.debug(f"Cannot find prof: {prof_course[KEY_PROF]}. Skipping...") continue # check for existence of department -> skip if does not exist abr = prof_course[KEY_DEPT].strip().upper() dept = Dept.query.filter_by(abr=abr).first() if dept is None: logger.debug(f"Cannot find dept: {abr}. Skipping...") continue # check if course exists -> add if does not exist # TODO: choosing topic 0 by default. Update when topic info available. num_results, course = check_or_add_course(dept, prof_course[KEY_CNUM], prof_course[KEY_TITLE]) if num_results > 1: courses = Course.query.filter_by(dept_id=dept.id, num=prof_course[KEY_CNUM]) for c in courses: if c.topic_num <= 0: course = c db.session.commit() # check if prof_course exists -> add if it doesn't _, prof_course_obj = check_or_add_prof_course(target_prof, course) db.session.commit() # parse semester to integer representation sem_lst = [s.strip() for s in prof_course[KEY_SEM].split(",")] if sem_lst[1] == __sem_spring: sem = SPRING_SEM elif sem_lst[1] == __sem_summer: sem = SUMMER_SEM elif sem_lst[1] == __sem_fall: sem = FALL_SEM else: logger.debug(f"Invalid semester: {sem_lst[1]}. Skipping...") continue yr = int(sem_lst[0].strip()) # check for semester existence -> if it doesn't, add to database _, sem_obj = check_or_add_semester(yr, sem) # check for prof_course_semester existence -> if it doesn't add to database check_or_add_prof_course_semester(prof_course[KEY_UNIQUE], prof_course_obj, sem_obj) db.session.commit()
def populate_scheduled_course(course_info): """ Populate the database with scheduled course info as parsed from FTP :param course_info: list of course data :type course_info: list[dict] """ logger.info("Populating database with scheduled course info") s_course_queue = [] for s_course in course_info: # create ScheduledCourseInfo object using the s_course dictionary try: scheduled = ScheduledCourseInfo(s_course) except ValueError as err: logger.warn(f"Populate scheduled course error: {err}. Skipping...") continue # check to see if dept exists dept_obj = Dept.query.filter_by(abr=scheduled.dept).first() if dept_obj is None: logger.debug(f"Populate scheduled course: cannot find department {scheduled.dept}. Skipping...") continue # check to see if course exists cur_courses = Course.query.filter_by(dept_id=dept_obj.id, num=scheduled.c_num) if len(cur_courses.all()) > 1: cur_courses = cur_courses.filter_by(topic_num=scheduled.topic) cur_course = cur_courses.first() if cur_course is None: course_log_description = f"{scheduled.dept} {scheduled.c_num} w/ topic num {scheduled.topic}" logger.debug(f"Populate scheduled course: cannot find course {course_log_description}. Skipping...") continue # check to see if prof exists --> if not then leave empty cur_prof = Prof.query.filter_by(eid=scheduled.prof_eid).first() if cur_prof is None: logger.warn(f"Could not find professor w/ EID={scheduled.prof_eid}. Leaving empty...") # check to see if semester exists else add semester _, semester = check_or_add_semester(yr=scheduled.yr, sem=scheduled.sem) # check to see if scheduled course exists else create new num_results, cur_schedule = check_or_add_scheduled_course(scheduled, cur_course, cur_prof, None, semester, add=False) if num_results > 0: logger.debug(f"""Updating scheduled course. Unique = {scheduled.unique_no} semester={repr(semester)} course={repr(cur_course)} prof={repr(cur_prof)}""") cur_schedule = scheduled.to_scheduled_course(cur_schedule, semester, cur_course, cur_prof, None) s_course_queue.append({ 'scheduled': cur_schedule, 'prof': cur_prof, 'course': cur_course, 'semester': semester, 'unique': scheduled.unique_no, 'xlist': scheduled.x_listings, }) update_scheduled_courses(s_course_queue)
def populate_ecis(file_path, pages): """ Populate database with ECIS information :param file_path: path to file containing data :type file_path: str :param pages: pages of file to parse :type pages: list[int] or list[str] """ # FOR FUTURE UPDATES, PLEASE READ: # remember to update Course and Prof ECIS fields when inputting new ECIS scores: ecis_avg and ecis_students logger.info(f'Populating ecis database with data from: {file_path}') ecis_lst = parse_ecis_excel(file_path, pages) for ecis in ecis_lst: # separate values from dictionary unique, c_avg, p_avg, students, yr, sem = ( ecis[KEY_UNIQUE_NUM], ecis[KEY_COURSE_AVG], ecis[KEY_PROF_AVG], ecis[KEY_NUM_STUDENTS], ecis[KEY_YR], ecis[KEY_SEMESTER] ) # check for existence of specified Semester, ProfCourseSemester in database logger.debug(f'Adding ecis for: unique={unique}, sem={yr}{sem}') sem_obj = Semester.query.filter_by(year=yr, semester=sem).first() if sem_obj is None: logger.debug(f"Cannot find semester for: {yr}{sem}. Skipping...") continue pcs_obj = ProfCourseSemester.query.filter_by(unique_num=unique, sem_id=sem_obj.id).first() if pcs_obj is None: logger.debug( f"Failed to find ProfCourseSemester for: unique={unique}, sem={yr}{sem}. Skipping..." ) continue # assumption: only one ecis score per prof_course_semester instance -> else skip ecis_lst = pcs_obj.ecis if len(ecis_lst) >= 1: # ecis already exists continue # creating the ecis object ecis_obj = EcisScore( course_avg=c_avg, prof_avg=p_avg, num_students=students, prof_course_sem_id=pcs_obj.id) db.session.add(ecis_obj) db.session.commit() # updating course and prof ecis fields logger.debug("Updating prof and course ecis fields") pc_obj = pcs_obj.prof_course course_obj = pc_obj.course prof_obj = pc_obj.prof queries = ((course_obj, c_avg), (prof_obj, p_avg)) for query, avg in queries: total_students = query.ecis_students + students total_avg = ((query.ecis_avg * query.ecis_students) if query.ecis_avg is not None else 0) + \ ((avg * students) if avg is not None else 0) query.ecis_avg = (total_avg / total_students) if total_students > 0 else None query.ecis_students = total_students db.session.commit()
def fetch_prof_course_info(out_file, sems, depts): """ Parse prof course info from the site -> for the relationship :param out_file: file to output the relationships/data :type out_file: str :param sems: semesters to fetch data for :type sems: list[str] :param depts: departments to fetch data for :type depts: list[str] """ __sem_header = 'SEMESTER' __dept_header = 'DEPT' __title_header = 'TITLE' __course_num_header = 'COURSENUMBER' __unique_header = 'UNIQUE' __instr_header = 'INSTRUCTOR(S)*' logger.info(f"Fetching prof_course info. Output={out_file}. Semesters={sems}. Departments={depts}") for sem in sems: for dept in depts: # get BeautifulSoup object for the parameters html = fetch_html(get_prof_course_url(sem, dept)) html_soup = BSoup(html, "html.parser") # look for headers on page -> headers for the table headers = html_soup.find("tr", {"class": "tbh header"}) if headers is None: logger.debug("Cannot find headers for prof_course search: " f"Semester={sem}, Department={dept}. Skipping...") continue headers = [header.text.replace("\n", "").strip() for header in headers.findAll("th")] logger.debug(f"Fetched headers from profcourse site with headers: {headers}") # parse out indices for each of the headers sem_index, dept_index, title_index, cnum_index, unique_index, instr_index = get_header_indices( headers, __sem_header, __dept_header, __title_header, __course_num_header, __unique_header, __instr_header ) # iterate through each row in the web table and parse out data rows = html_soup.findAll("tr", {"class": ["tboff", "tbon"]}) for row in rows: cols = row.findAll("td") cols = [col.text.replace("\n", "").strip() for col in cols] # get data via the indices for the headers for i in range(len(cols)): if 'CV' in cols[i]: cols[i] = cols[i].split('CV')[0].strip() # create dictionary containing the data prof_course = { KEY_SEM: cols[sem_index] if sem_index is not None else None, KEY_DEPT: cols[dept_index] if dept_index is not None else None, KEY_TITLE: cols[title_index].strip()[:-1] if title_index is not None else None, KEY_CNUM: cols[cnum_index] if cnum_index is not None else None, KEY_UNIQUE: cols[unique_index] if unique_index is not None else None, KEY_PROF: cols[instr_index] if instr_index is not None else None } # write dictionary to file with open(out_file, "a") as f: json.dump(prof_course, f) f.write("\n")
def populate_course(course_info, cur_sem=None): """ Populate database with courses :param course_info: list of dictionaries containing course data :type course_info: list[dict] :param cur_sem: the current semester. if set to None, data will be replaced with most recent value :type cur_sem: int or None """ __inherit = "(See Base Topic for inherited information.)" null_depts = set() logger.info("Populating database with courses") for course in course_info: # fetch values from dictionary semester = course[KEY_SEM] dept = course[KEY_DEPT] num = course[KEY_NUM] title = course[KEY_TITLE] cs_title = course[KEY_CS_TITLE] description = course[KEY_DESCRIPTION] restrictions = course[KEY_RESTRICTION] t_num = course[KEY_TOPIC_NUM] pre_req = course[KEY_PRE_REQ] # check to see if dept exists --> else ski[ dept_obj = Dept.query.filter_by(abr=dept).first() if dept_obj is None: null_depts.add(dept) continue # if topic number > 0, then title = modified cs title if t_num > 0: cs_title = __parse_title(cs_title) title = title if cs_title is None else cs_title # None if course doesn't currently exist old_course = None # define new base course variable new_course = Course( num=num, title=title, description=description, restrictions=restrictions, pre_req=pre_req, dept_id=dept_obj.id, topic_num=t_num ) # condition on topic number if t_num >= 0: # all courses with same topic number --> should be unique topics # if len 0 --> new topic topic_courses_flask = Course.query.filter_by(dept_id=dept_obj.id, num=num) topic_courses = topic_courses_flask.all() # set topic number --> will create new topic if doesnt exist new_course.topic_id = __check_new_topic(topic_courses_flask) # assumption: unique based on topic number t_course_flask = topic_courses_flask.filter_by(topic_num=t_num) if t_num == 0: if len(t_course_flask.all()) > 0: old_course = t_course_flask.first() __populate_child_topics(new_course, topic_courses, __inherit) else: topic_zero = __get_topic_zero(topic_courses) if len(t_course_flask.all()) > 0: old_course = t_course_flask.first() __populate_child_topics(topic_zero, [new_course], __inherit) else: # course doesn't have topic number old_course = Course.query.filter_by(dept_id=dept_obj.id, num=num).first() # create new or replace old if old_course is None: # new course logger.debug(f"Creating new course {dept_obj.abr} {new_course.num}") db.session.add(new_course) elif cur_sem is None or semester == cur_sem: # course existed but replacing logger.debug(f"Replacing previous {old_course.dept.abr} {old_course.num}") __replace_course(old_course, new_course) else: # course existed and skipping logger.debug(f"Already existed: {old_course.dept.abr} {old_course.num}") db.session.commit() null_depts = list(null_depts) null_depts.sort() for dept in null_depts: logger.debug(f"Unexpected Error: department {dept} cannot be found in the database")
def populate_prof_eid(profs): """ Populate database with prof eid info or add prof if doesn't exist :param profs: list of prof data sorted in incrementing order of semester. :type profs: list(tuple(semester, name, eid)) """ # profs must be sorted in order of semester # NOTE: professors sometimes have different names by semester -> take most recent (check by eid)] cur_profs = Prof.query.all() for semester, name, eid in profs: if ',' not in name: logger.debug(f'Invalid prof name: {name}') continue name = name.lower() name = name.split(',') last, first = name[0].strip(), name[1].strip() last_words = [word.strip() for word in last.split(' ') if len(word.strip()) > 0] first_words = [word.strip() for word in first.split(' ') if len(word.strip()) > 0] # check if professor exists by eid target_prof = Prof.query.filter_by(eid=eid).first() # if None then check by name matching if target_prof is None: for cur_prof in cur_profs: found = True cur_last, cur_first = cur_prof.last_name.lower(), cur_prof.first_name.lower() cur_last_words = [word.strip() for word in cur_last.split(' ') if len(word.strip()) > 0] cur_first_words = [word.strip() for word in cur_first.split(' ') if len(word.strip()) > 0] for word in cur_last_words: if word not in last_words: found = False break if found: for word in cur_first_words: if word not in first_words: found = False break if found: target_prof = cur_prof break first = first.title() last = last.title() if target_prof is None: logger.debug(f'Adding new prof: {first} {last}') new_prof = Prof(first_name=first, last_name=last, eid=eid) db.session.add(new_prof) else: logger.debug(f'Updating prof: {target_prof.first_name} {target_prof.last_name} -> {first} {last}') target_prof.first_name = first target_prof.last_name = last target_prof.eid = eid db.session.commit()