Example #1
0
def create_course_objects(
        tables: List[BeautifulSoup]) -> Tuple[Course, Instructor]:
    course = Course()
    instructor = Instructor()
    for i, table in enumerate(tables[:max_tables]):
        for row in table.findAll('tr'):
            if row.has_attr('bgcolor'):
                continue
            data = [data for data in row.findAll('td')]
            cells = [list(cell.stripped_strings) for cell in data]

            if i == table_type['GENERAL_INFO']:
                log.info("parsing through course info")
                course.sln = cells[0][0] if cells[0] else None
                if cells[1]:
                    course_tokens = cells[1][0].split()
                    course.department = course_tokens[0]
                    course.number = course_tokens[1]
                course.section = cells[2][0] if cells[2] else None
                course.type = cells[3][0] if cells[3] else None

                # TODO: Add way to handle fractional credits (i.e, 2.5)
                if len(cells) > 7:
                    credit_tokens = cells[5][0].strip().split(
                        '-') if cells[5] else []
                    if len(credit_tokens) > 1:
                        course.lower_credits = credit_tokens[0]
                        course.upper_credits = credit_tokens[1]
                    else:
                        course.lower_credits = credit_tokens[0]
                        course.upper_credits = credit_tokens[0]

                    course.name = cells[6][0]
                    gen_ed_marker = cells[7]

                else:
                    credit_tokens = cells[4][0].strip().split(
                        '-') if cells[4] else []
                    if len(credit_tokens) > 1:
                        course.lower_credits = credit_tokens[0]
                        course.upper_credits = credit_tokens[1]
                    else:
                        course.lower_credits = credit_tokens[0]
                        course.upper_credits = credit_tokens[0]

                    course.name = cells[5][0] if cells[5] else None
                    gen_ed_marker = cells[6][0] if cells[6] else None

                log.info(gen_ed_marker)
                if gen_ed_marker:
                    gen_eds = gen_ed_marker.split(",")  # QSR,NW --> [QSR, NW]
                    for gen_end in gen_eds:
                        if gen_end in course.general_education:
                            course.general_education[gen_end] = True
            elif i == table_type['ENROLLMENT']:
                log.info("parsing through course info (enrollment)")

                course.current_size = cells[0][0] if cells[0] else None
                course.max_size = cells[1][0] if cells[1] else None
                if len(cells) > 4 and cells[4][0] == 'Entry Code required':
                    course.add_code_required = True

            elif i == table_type['MEETINGS']:
                log.info("parsing through meeting times")
                log.info(cells)
                # If there is more than one meeting location:
                # Ex: TTh   08:45-09:45     UW1 121	GUNNERSON,KIM N.
                #     TTh   09:45-10:50	    UW2 131 GUNNERSON,KIM N.
                # meeting_days: [TTh, TTh]
                # start_times: [08:45, 09:45]
                # end_times: [09:45, 10:50]
                # rooms: [UW1 121, UW2 131]
                if cells[0] and cells[0][0] != 'To be arranged':
                    meeting_days = cells[0]

                    start_times = [
                        time_range.split('-')[0].replace('\u00a0', ' ')
                        for time_range in cells[1]
                    ]
                    end_times = [
                        time_range.split('-')[1].replace('\u00a0', ' ')
                        for time_range in cells[1]
                    ]
                    rooms = [room.replace('\u00a0', ' ') for room in cells[2]]

                    for days, start_time, end_time, room in zip(
                            meeting_days, start_times, end_times, rooms):
                        room_tokens = room.split()
                        if len(room_tokens) == 1:
                            room_building = room_tokens[0]
                            room_number = None
                        else:
                            room_building, room_number = room_tokens

                        new_meeting = {
                            "room_building": room_building,
                            "room_number": room_number,
                            "meeting_days": days,
                            "start_time": start_time,
                            "end_time": end_time
                        }
                        course.meetings.append(new_meeting)

                    instructor_name = cells[3][0] if cells[3] else None
                    log.info(f"instructor name: {instructor_name}")
                    instructor_tokens = instructor_name.split(',')
                    if len(instructor_tokens) > 1:
                        instructor.first_name = instructor_tokens[1]
                        instructor.last_name = instructor_tokens[0]
                    log.info(f"split instructor name: {instructor_tokens}")
                    first_name_tokens = instructor.first_name.split(' ')
                    log.info(f"first name: {first_name_tokens}")

                    if len(first_name_tokens) > 1:
                        instructor.first_name = first_name_tokens[0]
                        instructor.middle_name = first_name_tokens[1]
                    else:
                        instructor.middle_name = ""
                    log.info(
                        f"{instructor.first_name}, {instructor.middle_name}, {instructor.last_name}"
                    )
                    log.info(
                        "retrieving data for instructor email and phone number"
                    )

                    data = get_data(instructor.first_name,
                                    instructor.last_name)

                    if data and not data.get('error'):
                        instructor.email = data['teacher'][0]['email']
                        instructor.phone_number = data['teacher'][0]['phone']

            elif i == table_type['NOTES']:
                log.info("Retrieving course description...")
                log.info(cells)
                lines = cells[0]
                course.description = "\n".join(
                    [line if line else "" for line in lines])
            break
    log.info("Done collecting course information and instructor information.")
    return course, instructor