Example #1
0
def check_or_add_prof_course_semester(unique_num, prof_course, semester):
    """
    Checks if the provided prof_course_semester exists in the database.
    If it does, nothing happens.
    If it doesn't, add the said prof_course_semester
    :param unique_num: unique number for the prof_course_semester
    :type unique_num: int
    :param prof_course: prof_course object for the given relationship
    :type prof_course: ProfCourse
    :param semester: semester object for the given relationship
    :type semester: Semester
    :return: results of the search as a tuple(number of results, ProfCourseSemester object containing the info)
    :rtype: tuple(int, ProfCourseSemester)
    """
    prof_course_sem_obj = ProfCourseSemester.query.filter_by(
        unique_num=unique_num,
        prof_course_id=prof_course.id,
        sem_id=semester.id)
    num_results = len(prof_course_sem_obj.all())
    prof_course_sem_obj = prof_course_sem_obj.first()
    if prof_course_sem_obj is None:
        logger.debug(
            f'Adding new prof_course_semester: unique={unique_num}, semester={semester.year} {semester.semester}'
        )
        prof_course_sem_obj = ProfCourseSemester(unique_num=unique_num,
                                                 prof_course_id=prof_course.id,
                                                 sem_id=semester.id)
        db.session.add(prof_course_sem_obj)
        db.session.commit()
    return num_results, prof_course_sem_obj
Example #2
0
def fetch_html(url, attempt=1):
    """
    Fetch html from the provided url
    :param url: link to site to fetch
    :type url: str
    :param attempt: attempt number for the url
    :type attempt: int
    :return: html object pertaining to the url
    """

    logger.debug("fetching: ", url)
    __max_attempts = 10

    try:
        client = urlopen(url)
        html = client.read()
        client.close()
    except http.client.HTTPException:

        logger.debug(f"URL Failed: {url}, Attempt Number: {attempt}")
        if attempt >= __max_attempts:
            failed_requests.append(url)
            return None
        return fetch_html(url, attempt+1)
    return html
Example #3
0
def check_or_add_xlist(x_listings, semester):
    """
    Checks the ScheduledCourse list for an xlist.
    If it exists, nothing happens.
    If it doesn't, add the said xlist (CrossListed)
    :param x_listings: list of unique numbers to search through
    :type x_listings: list[str]
    :param semester: semester for the ScheduledCourse to iterate through
    :type semester: Semester
    :return: results of the search
    :rtype: CrossListed
    """
    x_list = None
    for x_list_str in x_listings:
        x_course = ScheduledCourse.query.filter_by(unique_no=x_list_str,
                                                   sem_id=semester.id).first()
        if x_course is not None and x_course.xlist is not None:
            x_list = x_course.xlist
    if x_list is None:
        x_list = CrossListed.query.filter(~CrossListed.courses.any()).first()
        logger.debug(f"Using empty CrossListed: {x_list.id}")

    if x_list is None:
        logger.debug(
            f"Adding new CrossListed for semester {semester.year} {semester.semester}"
        )
        x_list = CrossListed()
        db.session.add(x_list)
        db.session.commit()
    return x_list
Example #4
0
def refresh_ecis():
	"""
	Set course and prof ecis_avg and ecis_students by iterating through ecis_scores
	"""

	logger.info("Refreshing course and professor ecis fields with respective data")
	query_tuple = (Course.query.all(), Prof.query.all())

	# will iterate between Course and Prof since code is identical
	for queries in query_tuple:
		for query in queries:

			if type(query) is Course:
				logger.debug(f"Refreshing ecis for Course: {query.dept} {query.num}")
			elif type(query) is Prof:
				logger.debug(f"Refreshing ecis for Prof: {query.first_name} {query.last_name}")

			ecis = 0
			students = 0

			# iterate through ecis scores specific to the course/prof
			for prof_course in query.prof_course:
				for prof_course_sem in prof_course.prof_course_sem:
					for ecis_child in prof_course_sem.ecis:
						ecis += ecis_child.course_avg * ecis_child.num_students
						students += ecis_child.num_students

			# average will be None if there are no students
			query.ecis_avg = (ecis / students) if students > 0 else None
			query.ecis_students = students
			db.session.commit()
Example #5
0
def fetch_ftp_files(out_dir):
	"""Downloads ftp files from UT Austin FTP server

	Args:
		out_dir (str): directory to download files to
	"""
	__url = 'reg-it.austin.utexas.edu'
	__username = '******'

	logger.info(f"Downloading FTP data files to {out_dir}")

	cur_dir = getcwd()

	ftp = FTP(__url)
	ftp.login(user=__username)

	chdir(out_dir)
	for filename in (filename_current, filename_next, filename_future):

		logger.debug(f'FTP: downloading {filename}')
		local_file = open(filename, 'wb')
		ftp.retrbinary('RETR ' + filename, local_file.write, 1024)
		local_file.close()
	
	ftp.quit()

	chdir(cur_dir)
Example #6
0
def parse_prof_csv(file_path):
    """
    Parse .csv file containing prof data
    :param file_path: path to prof file
    :type file_path: str
    :return: sorted list of prof data
    :rtype: list(tuple(str, str, str))
    """

    __key_sem = 'CCYYS'
    __key_prof_name = 'INSTR_NAME'
    __key_prof_eid = 'INSTR_EID'

    logger.info(f'Parsing prof csv file: {file_path}')
    df = pd.read_csv(file_path)
    profs = set()
    for index, row in df.iterrows():

        semester, name, eid = row[__key_sem], row[__key_prof_name], row[
            __key_prof_eid]
        try:
            semester = int(semester)
        except ValueError:
            logger.debug(
                f'Unable to parse semester {semester}. Defaulting to 0...')
            semester = 0

        profs.add((semester, name.lower(), eid.lower()))

    profs = sorted(list(profs), key=lambda x: x[0])
    return profs
Example #7
0
def refresh_review_info():
	"""
	Refresh course and prof review metric fields
	For Course: approval, difficulty, usefulness, workload
	For Prof: approval, clear, engaging, grading
	"""

	query_lst = (Course.query.all(), Prof.query.all())
	for queries in query_lst:
		for query in queries:

			if type(query) is Course:
				logger.debug(f"Refreshing review fields for Course: {query.dept} {query.num}")
			elif type(query) is Prof:
				logger.debug(f"Refreshing review fields for Prof: {query.first_name} {query.last_name}")

			# initiate variables
			query.num_ratings = len(query.reviews)
			approval = 0
			metrics = [0, 0, 0]

			# iterate through reviews and update metric values
			for review in query.reviews:
				approval += int(review.approval)
				if type(query) is Course:
					metrics[0] += review.difficulty
					metrics[1] += review.usefulness
					metrics[2] += review.workload
				elif type(query) is Prof:
					metrics[0] += review.clear
					metrics[1] += review.engaging
					metrics[2] += review.grading

			# do final metric calculation (averages)
			query.approval = approval / query.num_ratings if query.num_ratings > 0 else None
			metrics[0] = metrics[0] / query.num_ratings if query.num_ratings > 0 else None
			metrics[1] = metrics[1] / query.num_ratings if query.num_ratings > 0 else None
			metrics[2] = metrics[2] / query.num_ratings if query.num_ratings > 0 else None

			# update query based on type
			if type(query) is Course:
				query.difficulty = metrics[0]
				query.usefulness = metrics[1]
				query.workload = metrics[2]
			elif type(query) is Prof:
				query.difficulty = metrics[0]
				query.usefulness = metrics[1]
				query.workload = metrics[2]

			db.session.commit()
Example #8
0
def fetch_prof_course_sem_depts():
    """
    From the professor course site, fetch lists of the semesters and departments available
    :return: list of semesters and departments (semesters, departments)
    :rtype: tuple(list[str], list[str])
    """
    
    base_html = fetch_html("https://utdirect.utexas.edu/apps/student/coursedocs/nlogon/")
    if base_html is None:
        logger.debug("Failed to fetch prof_course semester and department lists")
        return None
    
    base_soup = BSoup(base_html, "html.parser")
    sems = parse_prof_course_sems(base_soup)[1:]
    depts = parse_prof_course_depts(base_soup)[1:]

    return sems, depts
Example #9
0
def get_header_indices(headers, *header_vals):
    """
    Provided a list of headers and some headers, find the indices for each of the second set of headers
    :param headers: list of headers to search through
    :type headers: list[str]
    :param header_vals: list of headers to find indices for
    :return: a tuple of indices for the headers
    :rtype: list[int]
    """
    indices = []
    for header in header_vals:
        try:
            index = headers.index(header)
            indices.append(index)
        except ValueError:
            logger.debug(f"Cannot find index for: {header}")
            indices.append(None)
    return tuple(indices)
Example #10
0
def populate_dept_info(dept_info):
	"""
	Populate department with additional information (college and department name)
	:param dept_info: list of tuples containing: (abbreviation, department name, college name)
	:type dept_info: list[tuple(str, str, str)
	"""
	logger.info('Populating departments with additional info')

	for abr, dept, college in dept_info:

		cur_dept = Dept.query.filter_by(abr=abr).first()
		if cur_dept is None:
			logger.debug(f"Cannot find dept {abr}")
		else:
			logger.debug(f"Updating dept: {abr} with dept={dept}, college={college}")
			cur_dept.dept = dept
			cur_dept.college = college

		db.session.commit()
Example #11
0
def get_course_url(sem="spring", year=2020, dept="", c_num="", c_title="", u_num="", inst_first="", inst_last=""):
    """
    Generate url to site with syllabi/cvs data
    Also contains data for professor, course, and unique number, separated by semester
    :param sem: semester to search. Valid values: 'spring', 'summer', 'fall'
    :type sem: str
    :param year: year to search
    :type year: int
    :param dept: department to search
    :type dept: str
    :param c_num: course number to search
    :type c_num: str
    :param c_title: course title to search
    :type c_title: str
    :param u_num: unique number to search
    :type u_num: str
    :param inst_first: instructor first name to search
    :type inst_first: str
    :param inst_last: instructor last name to search
    :type inst_last: str
    :return: url to site
    :rtype: str
    """

    if sem == "spring":
        sem_num = 2
    elif sem == "summer":
        sem_num = 6
    elif sem == "fall":
        sem_num = 9
    else:
        logger.debug(f"Cannot parse semester: {sem}. Defaulting to spring...")
        sem_num = 2

    return ('https://utdirect.utexas.edu/apps/student/coursedocs/nlogon/?'
            f'semester={year}{sem_num}'
            f'&department={dept.replace(" ", "+")}'
            f'&course_number={c_num}'
            f'&course_title={c_title.replace(" ", "+")}'
            f'&unique={u_num}'
            f'&instructor_first={inst_first.replace(" ", "+")}'
            f'&instructor_last={inst_last.replace(" ", "+")}'
            '&course_type=In+Residence&search=Search')
Example #12
0
def check_or_add_scheduled_course(scheduled_info,
                                  course,
                                  prof,
                                  x_list,
                                  semester,
                                  add=True):
    """
    Checks the database for the existence of the scheduled_course
    If it does, nothing happens.
    If it doesn't, add the said ScheduledCourse
    :param scheduled_info: object containing parsed scheduled course info
    :type scheduled_info: ScheduledCourseInfo
    :param course: model object containing course id related to scheduled course
    :type course: Course
    :param prof: model object containing prof id related to scheduled course
    :type prof: Prof
    :param x_list: model object containing cross_listed id related to scheduled course
    :type x_list: CrossListed or None
    :param semester: model object containing semester id related to scheduled course
    :type semester: Semester
    :param add: add the object to database if doesn't exist
    :type add: bool
    :return: results of the search as a tuple(number of results, ScheduledCourse object containing the info)
    :rtype: tuple(int, ScheduledCourse)
    """

    cur_schedule = ScheduledCourse.query.filter_by(
        unique_no=scheduled_info.unique_no, sem_id=semester.id)
    num_results = len(cur_schedule.all())
    cur_schedule = cur_schedule.first()
    if cur_schedule is None:
        logger.debug(
            f"""Adding new scheduled course. Unique = {scheduled_info.unique_no}
                    semester={repr(semester)}
                    course={repr(course)}
                    prof={repr(prof)}""")

        cur_schedule = scheduled_info.build_scheduled_course(
            semester, course, prof, x_list)
        if add:
            db.session.add(cur_schedule)
            db.session.commit()
    return num_results, cur_schedule
Example #13
0
def check_or_add_semester(yr, sem):
    """
    Checks if the provided semester exists in the database.
    If it does, nothing happens.
    If it doesn't, add the said semester
    :param yr: semester year
    :type yr: int
    :param sem: semester integer (view utreview's __init__.py to view corresponding integers)
    :type sem: int
    :return: results of the search as a tuple(number of results, Semester object containing the info requested)
    :rtype: tuple(int, Semester)
    """
    sem_obj = Semester.query.filter_by(year=yr, semester=sem)
    num_results = len(sem_obj.all())
    sem_obj = sem_obj.first()
    if sem_obj is None:
        logger.debug(f'Adding new Semester: {yr} {sem}')
        sem_obj = Semester(year=yr, semester=sem)
        db.session.add(sem_obj)
        db.session.commit()
    return num_results, sem_obj
Example #14
0
def populate_dept(dept_info, override=False):
	"""
	Populate the database with departments
	:param dept_info: list of tuples with: (abbreviation, name)
	:type dept_info: tuple(str, str)
	:param override: override current department with same abbreviation if found in database
	:type override: bool
	"""

	logger.info("Populating database with departments")
	for abr, name in dept_info:

		cur_dept = Dept.query.filter_by(abr=abr).first()
		if cur_dept is None:
			# add department to database
			abr = abr.strip()
			name = name.strip()

			logger.debug(f"Adding dept {name} ({abr}) to database")
			dept = Dept(abr=abr, name=name)
			db.session.add(dept)

		elif override:
			# override current department
			logger.debug(f"Overriding dept {name} ({abr}) to database")
			cur_dept.abr = abr
			cur_dept.name = name

		else:
			# department already exists and not overriding
			logger.debug(f"Already exists: dept {name} ({abr})")

		db.session.commit()
Example #15
0
def check_or_add_prof_course(prof, course):
    """
    Checks if the provided prof_course relationship exists in the database.
    If it does, nothing happens.
    If it doesn't, add the said prof_course
    :param prof: professor for the relationship
    :type prof: Prof
    :param course: course for the relationship
    :type course: Course
    :return: results of the search as a tuple(number of results, Prof_Course object containing the info requested)
    :rtype: tuple(int, ProfCourse)
    """
    prof_course_obj = ProfCourse.query.filter_by(prof_id=prof.id,
                                                 course_id=course.id)
    num_results = len(prof_course_obj.all())
    prof_course_obj = prof_course_obj.first()
    if prof_course_obj is None:
        logger.debug(f'Adding new prof_course: {prof} {course}')
        prof_course_obj = ProfCourse(prof_id=prof.id, course_id=course.id)
        db.session.add(prof_course_obj)
        db.session.commit()
    return num_results, prof_course_obj
Example #16
0
def check_or_add_prof(first_name, last_name):
    """
    Checks if the provided professor exists in the database.
    If it does, nothing happens.
    If it doesn't, add the said professor
    :param first_name: first name of professor
    :type first_name: str
    :param last_name: last name of professor
    :type last_name: str
    :return: results of the search as a tuple(number of results, Prof object containing the info requested)
    :rtype: tuple(int, Prof)
    """

    prof = Prof.query.filter_by(first_name=first_name, last_name=last_name)
    num_results = len(prof.all())
    prof = prof.first()
    if prof is None:
        logger.debug(f"Adding new prof: {first_name} {last_name}")
        prof = Prof(first_name=first_name, last_name=last_name)
        db.session.add(prof)
        db.session.commit()
    return num_results, prof
Example #17
0
def parse_ftp(in_dir):
	"""Parse FTP files from the UT Austin FTP server

	Args:
		in_dir (str): directory containinig the ftp files
	"""

	logger.info(f"Parsing FTP files from {in_dir}")
	courses = []

	for filename in (filename_current, filename_next, filename_future):

		filepath = join(in_dir, filename)

		if isfile(filepath):

			logger.debug(f'FTP: parsing {filename}')
			with open(filepath) as f:

				lines = f.readlines()
				categories, lines = __parse_categories(lines)
				
				if categories is not None:

					for line in lines:
						# standardizing the lines
						line = line.lower()
						data = line.split("\t")
						data = [d.strip() for d in data]

						if len(line) > 0 and len(data) >= len(categories):
							# separating data by category list
							course = {categories[i]: data[i] for i in range(len(categories))}
							courses.append(course)
		else:
			logger.debug(f'FTP: {filename} does not exist in {in_dir}')

	return courses
Example #18
0
def fetch_sem_values(ftp_dir, out_dir):
	"""
	fetch semester values from the FTP data files from the given directory
	:param ftp_dir: the directory containing the ftp data files
	:param out_dir: the directory to output a file containing the semester data
	"""

	files = (filename_current, filename_next, filename_future)
	keys = (key_current, key_next, key_future)

	out_path = join(out_dir, sem_file)
	sem_dict = {}
	logger.info(f"Fetching semester values from dir={ftp_dir}, to file={out_path}")

	for i in range(len(files)):

		sem = None
		m_file = files[i]
		filepath = join(ftp_dir, m_file)

		if isfile(filepath):

			with open(filepath, 'r') as f:
				lines = f.readlines()				
				
			for line in lines:
				if __sem_label in line:
					m = re.search(r'[A-Za-z ]+(\d{5}) (.*)?', line)
					sem = m.group(1)
		else:
			logger.debug(f"Fetch Sem: cannot find file: {m_file} in {ftp_dir}")

		sem_dict[keys[i]] = sem

	with open(out_path, 'w') as f:
		json.dump(sem_dict, f)
	
	return out_path
Example #19
0
def check_or_add_course(dept, num, title):
    """
    Checks if the provided course exists in the database.
    If it does, nothing happens.
    If it doesn't, add the said course
    :param dept: department of the course
    :type dept: Dept
    :param num: course number
    :type num: str
    :param title: title of the course
    :type title: str
    :return: results of the search as a tuple(number of results, Course object containing the info requested)
    :rtype: tuple(int, Course)
    """

    course = Course.query.filter_by(dept_id=dept.id, num=num)
    num_results = len(course.all())
    course = course.first()
    if course is None:
        logger.debug(f'Adding new course: {dept.abr} {num}')
        course = Course(dept_id=dept.id, num=num, title=title)
        db.session.add(course)
        db.session.commit()
    return num_results, course
Example #20
0
def fetch_prof(query):
    """
    Fetch professor name and eid from UT directory website
    :param query: professor query to search on site
    :type query: str
    :return: name and eid of professor in format: (name, eid)
    :rtype: tuple(str, str)
    """
    logger.debug(f"Fetching Prof: {query}")

    __name_tag = "Name"
    __eid_tag = "UT EID"

    name = None
    eid = None

    # fetch html from link, if None, cannot continue
    html = fetch_html('https://directory.utexas.edu/index.php?q='
                      f'{query}'
                      '&scope=faculty%2Fstaff&submit=Search')

    if html is None:
        logger.debug("Failed to fetch professor data: html is None")
        return None, None

    soup = BSoup(html, "html.parser")

    # search for data using the html elements surrounding ti
    prof_info_table = soup.find("table", {"class": "dir_info"})
    if prof_info_table is None:
        logger.debug(
            "Failed to fetch professor data: professor info table does not exist"
        )
        return None, None
    prof_info_table = prof_info_table.findAll("tr")
    prof_info_table = [tr.findAll("td") for tr in prof_info_table]

    for tr in prof_info_table:
        if len(tr) < 2:
            continue
        tag = tr[0].text.strip()
        val = tr[1].text.strip()

        if __name_tag in tag:
            name = val
            name.split(",")[0].strip()
        elif __eid_tag in tag:
            eid = val

    return name, eid
Example #21
0
def populate_prof(prof_info):
	"""
	Populate database with a professor using data fetched from the web
	:param prof_info: data fetched using fetch_prof from utreview.services.fetch_web
	:type prof_info: list
	"""

	if prof_info is not None and len(prof_info) > 1:

		first_name, last_name = __parse_prof_name(prof_info[0])
		eid = prof_info[1]

		cur_prof = Prof.query.filter_by(first_name=first_name, last_name=last_name, eid=eid).first()
		if cur_prof is None:
			logger.debug(f"Adding professor {first_name} {last_name}")
			prof = Prof(first_name=first_name, last_name=last_name, eid=eid)
			db.session.add(prof)
			db.session.commit()
		else:
			logger.debug(f"Professor {first_name} {last_name} already exists")
	else:
		logger.debug(f"Invalid input to populate_prof: {prof_info}")
Example #22
0
def populate_prof_course(in_file):
	"""
	Populate database with Professor and Course relationship using data fetched from the web
	(utreview.services.fetch_web.fetch_prof_course_info only)
	:param in_file: file the data was fetched to
	:type in_file: str
	"""

	__sem_fall = "Fall"
	__sem_spring = "Spring"
	__sem_summer = "Summer"

	logger.info(f"Populating database with prof_course info using {in_file}")

	# creating list of prof-course relationships from the given file
	prof_courses = []
	with open(in_file, 'r') as f:
		for line in f:
			prof_courses.append(json.loads(line))
	cur_profs = Prof.query.all()

	# add each prof-course relationship to the database if appropriate
	for prof_course in prof_courses:

		# check for existence of professor -> add if does not exist
		prof_name = [name.strip() for name in prof_course[KEY_PROF].lower().split(",")]
		last, first = prof_name[0].strip(), prof_name[1].strip()
		last_words = [word.strip() for word in last.split(' ') if len(word.strip()) > 0]
		first_words = [word.strip() for word in first.split(' ') if len(word.strip()) > 0]

		target_prof = None
		for cur_prof in cur_profs:
			found = True

			cur_last, cur_first = cur_prof.last_name.lower(), cur_prof.first_name.lower()
			cur_last_words = [word.strip() for word in cur_last.split(' ') if len(word.strip()) > 0]
			cur_first_words = [word.strip() for word in cur_first.split(' ') if len(word.strip()) > 0]

			for word in last_words:
				if word not in cur_last_words:
					found = False
					break
			
			if found:
				for word in first_words:
					if word not in cur_first_words:
						found = False
						break
			
			if found:
				target_prof = cur_prof
				break
		
		if target_prof is None:
			logger.debug(f"Cannot find prof: {prof_course[KEY_PROF]}. Skipping...")
			continue

		# check for existence of department -> skip if does not exist
		abr = prof_course[KEY_DEPT].strip().upper()
		dept = Dept.query.filter_by(abr=abr).first()
		if dept is None:
			logger.debug(f"Cannot find dept: {abr}. Skipping...")
			continue

		# check if course exists -> add if does not exist
		# TODO: choosing topic 0 by default. Update when topic info available.
		num_results, course = check_or_add_course(dept, prof_course[KEY_CNUM], prof_course[KEY_TITLE])
		if num_results > 1:
			courses = Course.query.filter_by(dept_id=dept.id, num=prof_course[KEY_CNUM])
			for c in courses:
				if c.topic_num <= 0:
					course = c
		db.session.commit()

		# check if prof_course exists -> add if it doesn't
		_, prof_course_obj = check_or_add_prof_course(target_prof, course)
		db.session.commit()

		# parse semester to integer representation
		sem_lst = [s.strip() for s in prof_course[KEY_SEM].split(",")]
		if sem_lst[1] == __sem_spring:
			sem = SPRING_SEM
		elif sem_lst[1] == __sem_summer:
			sem = SUMMER_SEM
		elif sem_lst[1] == __sem_fall:
			sem = FALL_SEM
		else:
			logger.debug(f"Invalid semester: {sem_lst[1]}. Skipping...")
			continue

		yr = int(sem_lst[0].strip())

		# check for semester existence -> if it doesn't, add to database
		_, sem_obj = check_or_add_semester(yr, sem)

		# check for prof_course_semester existence -> if it doesn't add to database
		check_or_add_prof_course_semester(prof_course[KEY_UNIQUE], prof_course_obj, sem_obj)
		db.session.commit()
Example #23
0
def populate_scheduled_course(course_info):
	"""
	Populate the database with scheduled course info as parsed from FTP
	:param course_info: list of course data
	:type course_info: list[dict]
	"""

	logger.info("Populating database with scheduled course info")
	s_course_queue = []

	for s_course in course_info:

		# create ScheduledCourseInfo object using the s_course dictionary
		try:
			scheduled = ScheduledCourseInfo(s_course)
		except ValueError as err:
			logger.warn(f"Populate scheduled course error: {err}. Skipping...")
			continue

		# check to see if dept exists
		dept_obj = Dept.query.filter_by(abr=scheduled.dept).first()
		if dept_obj is None:
			logger.debug(f"Populate scheduled course: cannot find department {scheduled.dept}. Skipping...")
			continue

		# check to see if course exists
		cur_courses = Course.query.filter_by(dept_id=dept_obj.id, num=scheduled.c_num)
		if len(cur_courses.all()) > 1:
			cur_courses = cur_courses.filter_by(topic_num=scheduled.topic)
		cur_course = cur_courses.first()

		if cur_course is None:
			course_log_description = f"{scheduled.dept} {scheduled.c_num} w/ topic num {scheduled.topic}"
			logger.debug(f"Populate scheduled course: cannot find course {course_log_description}. Skipping...")
			continue

		# check to see if prof exists --> if not then leave empty
		cur_prof = Prof.query.filter_by(eid=scheduled.prof_eid).first()

		if cur_prof is None:
			logger.warn(f"Could not find professor w/ EID={scheduled.prof_eid}. Leaving empty...")

		# check to see if semester exists else add semester
		_, semester = check_or_add_semester(yr=scheduled.yr, sem=scheduled.sem)

		# check to see if scheduled course exists else create new
		num_results, cur_schedule = check_or_add_scheduled_course(scheduled, cur_course, cur_prof, None, semester, add=False)
		if num_results > 0:
			logger.debug(f"""Updating scheduled course. Unique = {scheduled.unique_no}
					semester={repr(semester)}
					course={repr(cur_course)}
					prof={repr(cur_prof)}""")
			cur_schedule = scheduled.to_scheduled_course(cur_schedule, semester, cur_course, cur_prof, None)
		
		s_course_queue.append({
			'scheduled': cur_schedule,
			'prof': cur_prof,
			'course': cur_course,
			'semester': semester, 
			'unique': scheduled.unique_no,
			'xlist': scheduled.x_listings,
		})
	update_scheduled_courses(s_course_queue)	
Example #24
0
def populate_ecis(file_path, pages):
	"""
	Populate database with ECIS information
	:param file_path: path to file containing data
	:type file_path: str
	:param pages: pages of file to parse
	:type pages: list[int] or list[str]
	"""

	# FOR FUTURE UPDATES, PLEASE READ:
	# remember to update Course and Prof ECIS fields when inputting new ECIS scores: ecis_avg and ecis_students

	logger.info(f'Populating ecis database with data from: {file_path}')
	ecis_lst = parse_ecis_excel(file_path, pages)

	for ecis in ecis_lst:

		# separate values from dictionary
		unique, c_avg, p_avg, students, yr, sem = (
			ecis[KEY_UNIQUE_NUM],
			ecis[KEY_COURSE_AVG],
			ecis[KEY_PROF_AVG],
			ecis[KEY_NUM_STUDENTS],
			ecis[KEY_YR],
			ecis[KEY_SEMESTER]
		)

		# check for existence of specified Semester, ProfCourseSemester in database
		logger.debug(f'Adding ecis for: unique={unique}, sem={yr}{sem}')
		sem_obj = Semester.query.filter_by(year=yr, semester=sem).first()
		if sem_obj is None:
			logger.debug(f"Cannot find semester for: {yr}{sem}. Skipping...")
			continue

		pcs_obj = ProfCourseSemester.query.filter_by(unique_num=unique, sem_id=sem_obj.id).first()
		if pcs_obj is None:
			logger.debug(
				f"Failed to find ProfCourseSemester for: unique={unique}, sem={yr}{sem}. Skipping..."
			)
			continue

		# assumption: only one ecis score per prof_course_semester instance -> else skip
		ecis_lst = pcs_obj.ecis
		if len(ecis_lst) >= 1:
			# ecis already exists
			continue

		# creating the ecis object
		ecis_obj = EcisScore(
			course_avg=c_avg,
			prof_avg=p_avg,
			num_students=students,
			prof_course_sem_id=pcs_obj.id)
		db.session.add(ecis_obj)
		db.session.commit()

		# updating course and prof ecis fields
		logger.debug("Updating prof and course ecis fields")
		pc_obj = pcs_obj.prof_course
		course_obj = pc_obj.course
		prof_obj = pc_obj.prof

		queries = ((course_obj, c_avg), (prof_obj, p_avg))
		for query, avg in queries:
			total_students = query.ecis_students + students
			total_avg = ((query.ecis_avg * query.ecis_students) if query.ecis_avg is not None else 0) + \
				((avg * students) if avg is not None else 0)
			query.ecis_avg = (total_avg / total_students) if total_students > 0 else None
			query.ecis_students = total_students

		db.session.commit()
Example #25
0
def fetch_prof_course_info(out_file, sems, depts):
    """
    Parse prof course info from the site -> for the relationship
    :param out_file: file to output the relationships/data
    :type out_file: str
    :param sems: semesters to fetch data for
    :type sems: list[str]
    :param depts: departments to fetch data for
    :type depts: list[str]
    """
    __sem_header = 'SEMESTER'
    __dept_header = 'DEPT'
    __title_header = 'TITLE'
    __course_num_header = 'COURSENUMBER'
    __unique_header = 'UNIQUE'
    __instr_header = 'INSTRUCTOR(S)*'

    logger.info(f"Fetching prof_course info. Output={out_file}. Semesters={sems}. Departments={depts}")

    for sem in sems:
        for dept in depts:

            # get BeautifulSoup object for the parameters
            html = fetch_html(get_prof_course_url(sem, dept))
            html_soup = BSoup(html, "html.parser")

            # look for headers on page -> headers for the table
            headers = html_soup.find("tr", {"class": "tbh header"})
            if headers is None:
                logger.debug("Cannot find headers for prof_course search: "
                             f"Semester={sem}, Department={dept}. Skipping...")
                continue
            headers = [header.text.replace("\n", "").strip() for header in headers.findAll("th")]
            logger.debug(f"Fetched headers from profcourse site with headers: {headers}")

            # parse out indices for each of the headers
            sem_index, dept_index, title_index, cnum_index, unique_index, instr_index = get_header_indices(
                headers,
                __sem_header,
                __dept_header,
                __title_header,
                __course_num_header,
                __unique_header,
                __instr_header
            )

            # iterate through each row in the web table and parse out data
            rows = html_soup.findAll("tr", {"class": ["tboff", "tbon"]})
            for row in rows:
                cols = row.findAll("td")
                cols = [col.text.replace("\n", "").strip() for col in cols]

                # get data via the indices for the headers
                for i in range(len(cols)):
                    if 'CV' in cols[i]:
                        cols[i] = cols[i].split('CV')[0].strip()

                # create dictionary containing the data
                prof_course = {
                    KEY_SEM: cols[sem_index] if sem_index is not None else None,
                    KEY_DEPT: cols[dept_index] if dept_index is not None else None,
                    KEY_TITLE: cols[title_index].strip()[:-1] if title_index is not None else None,
                    KEY_CNUM: cols[cnum_index] if cnum_index is not None else None,
                    KEY_UNIQUE: cols[unique_index] if unique_index is not None else None,
                    KEY_PROF: cols[instr_index] if instr_index is not None else None
                }

                # write dictionary to file
                with open(out_file, "a") as f:
                    json.dump(prof_course, f)
                    f.write("\n")
Example #26
0
def populate_course(course_info, cur_sem=None):
	"""
	Populate database with courses
	:param course_info: list of dictionaries containing course data
	:type course_info: list[dict]
	:param cur_sem: the current semester. if set to None, data will be replaced with most recent value
	:type cur_sem: int or None
	"""

	__inherit = "(See Base Topic for inherited information.)"
	null_depts = set()

	logger.info("Populating database with courses")

	for course in course_info:

		# fetch values from dictionary
		semester = course[KEY_SEM]
		dept = course[KEY_DEPT]
		num = course[KEY_NUM]
		title = course[KEY_TITLE]
		cs_title = course[KEY_CS_TITLE]
		description = course[KEY_DESCRIPTION]
		restrictions = course[KEY_RESTRICTION]
		t_num = course[KEY_TOPIC_NUM]
		pre_req = course[KEY_PRE_REQ]

		# check to see if dept exists --> else ski[
		dept_obj = Dept.query.filter_by(abr=dept).first()

		if dept_obj is None:
			null_depts.add(dept)
			continue

		# if topic number > 0, then title = modified cs title
		if t_num > 0:
			cs_title = __parse_title(cs_title)
			title = title if cs_title is None else cs_title

		# None if course doesn't currently exist
		old_course = None
		# define new base course variable
		new_course = Course(
			num=num,
			title=title,
			description=description,
			restrictions=restrictions,
			pre_req=pre_req,
			dept_id=dept_obj.id,
			topic_num=t_num
		)

		# condition on topic number
		if t_num >= 0:

			# all courses with same topic number --> should be unique topics
			# if len 0 --> new topic
			topic_courses_flask = Course.query.filter_by(dept_id=dept_obj.id, num=num)
			topic_courses = topic_courses_flask.all()

			# set topic number --> will create new topic if doesnt exist
			new_course.topic_id = __check_new_topic(topic_courses_flask)

			# assumption: unique based on topic number
			t_course_flask = topic_courses_flask.filter_by(topic_num=t_num)

			if t_num == 0:

				if len(t_course_flask.all()) > 0:
					old_course = t_course_flask.first()

				__populate_child_topics(new_course, topic_courses, __inherit)
			else:

				topic_zero = __get_topic_zero(topic_courses)

				if len(t_course_flask.all()) > 0:
					old_course = t_course_flask.first()

				__populate_child_topics(topic_zero, [new_course], __inherit)

		else:
			# course doesn't have topic number
			old_course = Course.query.filter_by(dept_id=dept_obj.id, num=num).first()

		# create new or replace old
		if old_course is None:
			# new course
			logger.debug(f"Creating new course {dept_obj.abr} {new_course.num}")
			db.session.add(new_course)
		elif cur_sem is None or semester == cur_sem:
			# course existed but replacing
			logger.debug(f"Replacing previous {old_course.dept.abr} {old_course.num}")
			__replace_course(old_course, new_course)
		else:
			# course existed and skipping
			logger.debug(f"Already existed: {old_course.dept.abr} {old_course.num}")

		db.session.commit()

	null_depts = list(null_depts)
	null_depts.sort()
	for dept in null_depts:
		logger.debug(f"Unexpected Error: department {dept} cannot be found in the database")
Example #27
0
def populate_prof_eid(profs):
	"""
	Populate database with prof eid info or add prof if doesn't exist
	:param profs: list of prof data sorted in incrementing order of semester.
	:type profs: list(tuple(semester, name, eid))
	"""
	# profs must be sorted in order of semester
	# NOTE: professors sometimes have different names by semester -> take most recent (check by eid)]

	cur_profs = Prof.query.all()

	for semester, name, eid in profs:
		
		if ',' not in name:
			logger.debug(f'Invalid prof name: {name}')
			continue
		
		name = name.lower()
		name = name.split(',')
		last, first = name[0].strip(), name[1].strip()
		last_words = [word.strip() for word in last.split(' ') if len(word.strip()) > 0]
		first_words = [word.strip() for word in first.split(' ') if len(word.strip()) > 0]

		# check if professor exists by eid
		target_prof = Prof.query.filter_by(eid=eid).first()

		# if None then check by name matching		
		if target_prof is None:
			for cur_prof in cur_profs:
				found = True

				cur_last, cur_first = cur_prof.last_name.lower(), cur_prof.first_name.lower()
				cur_last_words = [word.strip() for word in cur_last.split(' ') if len(word.strip()) > 0]
				cur_first_words = [word.strip() for word in cur_first.split(' ') if len(word.strip()) > 0]

				for word in cur_last_words:
					if word not in last_words:
						found = False
						break
				
				if found:
					for word in cur_first_words:
						if word not in first_words:
							found = False
							break
				
				if found:
					target_prof = cur_prof
					break

		first = first.title()
		last = last.title()

		if target_prof is None:
			logger.debug(f'Adding new prof: {first} {last}')
			new_prof = Prof(first_name=first, last_name=last, eid=eid)
			db.session.add(new_prof)
		else: 
			logger.debug(f'Updating prof: {target_prof.first_name} {target_prof.last_name} -> {first} {last}')
			target_prof.first_name = first
			target_prof.last_name = last
			target_prof.eid = eid

		db.session.commit()