Example #1
0
def parse_exam_schedule(exam_file_name):

    m.Exam.objects._collection.drop()

    exam_file = open(exam_file_name)

    for line in exam_file:
        index = 0
        tokens = re.split('\s+', line)

        # Get the course ID
        course_id = safe_list_get(tokens, 0) + safe_list_get(tokens, 1)
        course_id = course_id.lower()

        if not course_id:
            print "Skipping line '%s'" % ' '.join(tokens)
            continue

        # Get the sections
        # day_of_week_pattern = re.compile(
        index = 2
        section_string = ''
        while not is_day_of_week(safe_list_get(tokens, index)):
            section_string += safe_list_get(tokens, index) + ' '
            index += 1

        section_string = section_string.strip()

        # Get the date. Next 4 tokens is Tuesday December 11, 2012
        exam_date_string = ''
        for i in range(index, index + 4):
            exam_date_string += safe_list_get(tokens, i) + ' '

        index += 4

        start_date_string = (exam_date_string + safe_list_get(tokens, index) +
            safe_list_get(tokens, index + 1))
        index += 2

        end_date_string = (exam_date_string + safe_list_get(tokens, index) +
            safe_list_get(tokens, index + 1))
        index += 2

        # E.g. Tuesday December 11, 2012 7:30PM
        #      Tuesday December 11, 2012 10:00PM
        date_format = "%A %B %d, %Y %I:%M%p"

        # TODO(sandy): do timezones better
        try:
            start_date = rmc_util.eastern_to_utc(
                    datetime.fromtimestamp(
                        mktime(time.strptime(start_date_string, date_format))))
            end_date = rmc_util.eastern_to_utc(
                    datetime.fromtimestamp(
                        mktime(time.strptime(end_date_string, date_format))))
        except Exception as exp:
            print "Could not get date for line '%s'" % ' '.join(tokens)
            # Don't remmeber exactly what exception I was trying to catch...
            print exp
            start_date = None
            end_date = None

        # Get the location
        location = ''
        while index < len(tokens):
            location += tokens[index] + ' '
            index += 1

        location = location.strip()

        exam_slot = m.Exam()
        exam_slot.course_id = course_id
        exam_slot.sections = section_string
        exam_slot.start_date = start_date
        exam_slot.end_date = end_date
        exam_slot.location = location

        if (start_date and end_date):
            exam_slot.info_known = True
        else:
            exam_slot.info_known = False

        exam_slot.save()
Example #2
0
def import_opendata_exam_schedules():
    """Import exam schedules data from the OpenData API"""
    today = datetime.today()
    file_name = os.path.join(
        os.path.dirname(__file__),
        '%s/uw_exams_%s.txt' % (c.EXAMS_DATA_DIR, today.strftime('%Y_%m_%d')))

    processed_exams = []
    errors = []
    with open(file_name, 'r') as f:
        data = json.load(f)

        # Data will contain something like this:
        #
        #   [{
        #       "course": "AFM 131",
        #       "sections": [
        #           {
        #               "date": "2014-04-17",
        #               "day": "Thursday",
        #               "end_time": "10:00 PM",
        #               "location": "DC 1350",
        #               "notes": "",
        #               "section": "001",
        #               "start_time": "7:30 PM"
        #           },
        #           {
        #               "date": "",
        #               "day": "",
        #               "end_time": "",
        #               "location": "",
        #               "notes": "See blah blah blah",
        #               "section": "081 Online",
        #               "start_time": ""
        #           }
        #       ]
        #   }, ...]
        #
        # TODO(jlfwong): Refactor this to separate concerns of file IO, db
        # storage, and data processing so that the data processing step can be
        # tested, and this example can be moved into tests.

        for exam_data in data:
            course_id = m.Course.code_to_id(exam_data.get('course'))
            grouped_sections = group_similar_exam_sections(
                exam_data.get('sections', []))
            for section_data in grouped_sections:
                section = section_data.get('section')
                day = section_data.get('day')

                # Catch these to be more detailed in our errors
                if section.endswith('Online'):
                    errors.append("Skipping online course: %s %s" %
                                  (course_id, section))
                    continue
                if 'Exam removed' in day:
                    errors.append("Skipping removed course: %s" % (course_id))
                    continue
                if 'See http:' in day:
                    errors.append("Skipping url for course: %s" % (course_id))
                    continue

                # E.g. 2014-04-17
                date = section_data.get('date')
                # E.g. 11:30 AM
                start_time = section_data.get('start_time')
                end_time = section_data.get('end_time')
                # E.g. 2014-04-17 7:30 PM
                #      2014-04-17 10:00 PM
                date_format = "%Y-%m-%d %I:%M %p"
                start_date_string = "%s %s" % (date, start_time)
                end_date_string = "%s %s" % (date, end_time)

                try:
                    start_date = rmc_util.eastern_to_utc(
                        datetime.fromtimestamp(
                            time.mktime(
                                time.strptime(start_date_string,
                                              date_format))))
                    end_date = rmc_util.eastern_to_utc(
                        datetime.fromtimestamp(
                            time.mktime(
                                time.strptime(end_date_string, date_format))))
                except Exception as exp:
                    errors.append("Could not get date (%s)\n%s" %
                                  (section_data, exp))
                    continue

                exam = m.Exam(
                    course_id=course_id,
                    sections=section,
                    start_date=start_date,
                    end_date=end_date,
                    location=section_data.get('location'),
                    info_known=bool(start_date and end_date),
                )
                processed_exams.append(exam)

    # Do some sanity checks to make sure OpenData is being reasonable.
    # This number is arbitrary and just reminds us to double-check
    # TODO(Sandy): This ranges from 775 (Fall & Winter) to 325 (Spring)
    season = m.Term.get_season_from_id(m.Term.get_current_term_id())
    EXAM_ITEMS_THRESHOLD = 325 if season == 'Spring' else 775
    if len(processed_exams) < EXAM_ITEMS_THRESHOLD:
        raise ValueError("processor.py: too few exam items %d (< %d)" %
                         (len(processed_exams), EXAM_ITEMS_THRESHOLD))

    # Everything should be fine by here, drop the old exams collection
    m.Exam.objects.delete()
    for exam in processed_exams:
        exam.save()

    return errors
Example #3
0
def import_opendata_exam_schedules():
    """Import exam schedules data from the OpenData API"""
    today = datetime.today()
    file_name = os.path.join(
        os.path.dirname(__file__), "%s/uw_exams_%s.txt" % (c.EXAMS_DATA_DIR, today.strftime("%Y_%m_%d"))
    )

    processed_exams = []
    errors = []
    with open(file_name, "r") as f:
        data = json.load(f)

        # Data will contain something like this:
        #
        #   [{
        #       "course": "AFM 131",
        #       "sections": [
        #           {
        #               "date": "2014-04-17",
        #               "day": "Thursday",
        #               "end_time": "10:00 PM",
        #               "location": "DC 1350",
        #               "notes": "",
        #               "section": "001",
        #               "start_time": "7:30 PM"
        #           },
        #           {
        #               "date": "",
        #               "day": "",
        #               "end_time": "",
        #               "location": "",
        #               "notes": "See blah blah blah",
        #               "section": "081 Online",
        #               "start_time": ""
        #           }
        #       ]
        #   }, ...]
        #
        # TODO(jlfwong): Refactor this to separate concerns of file IO, db
        # storage, and data processing so that the data processing step can be
        # tested, and this example can be moved into tests.

        for exam_data in data:
            course_id = m.Course.code_to_id(exam_data.get("course"))
            grouped_sections = group_similar_exam_sections(exam_data.get("sections", []))
            for section_data in grouped_sections:
                section = section_data.get("section")
                day = section_data.get("day")

                # Catch these to be more detailed in our errors
                if section.endswith("Online"):
                    errors.append("Skipping online course: %s %s" % (course_id, section))
                    continue
                if "Exam removed" in day:
                    errors.append("Skipping removed course: %s" % (course_id))
                    continue
                if "See http:" in day:
                    errors.append("Skipping url for course: %s" % (course_id))
                    continue

                # E.g. 2014-04-17
                date = section_data.get("date")
                # E.g. 11:30 AM
                start_time = section_data.get("start_time")
                end_time = section_data.get("end_time")
                # E.g. 2014-04-17 7:30 PM
                #      2014-04-17 10:00 PM
                date_format = "%Y-%m-%d %I:%M %p"
                start_date_string = "%s %s" % (date, start_time)
                end_date_string = "%s %s" % (date, end_time)

                try:
                    start_date = rmc_util.eastern_to_utc(
                        datetime.fromtimestamp(time.mktime(time.strptime(start_date_string, date_format)))
                    )
                    end_date = rmc_util.eastern_to_utc(
                        datetime.fromtimestamp(time.mktime(time.strptime(end_date_string, date_format)))
                    )
                except Exception as exp:
                    errors.append("Could not get date (%s)\n%s" % (section_data, exp))
                    continue

                exam = m.Exam(
                    course_id=course_id,
                    sections=section,
                    start_date=start_date,
                    end_date=end_date,
                    location=section_data.get("location"),
                    info_known=bool(start_date and end_date),
                )
                processed_exams.append(exam)

    # Do some sanity checks to make sure OpenData is being reasonable.
    # This number is arbitrary and just reminds us to double-check
    # TODO(Sandy): This ranges from 775 (Fall & Winter) to 325 (Spring)
    season = m.Term.get_season_from_id(m.Term.get_current_term_id())
    EXAM_ITEMS_THRESHOLD = 325 if season == "Spring" else 775
    if len(processed_exams) < EXAM_ITEMS_THRESHOLD:
        raise ValueError("processor.py: too few exam items %d (< %d)" % (len(processed_exams), EXAM_ITEMS_THRESHOLD))

    # Everything should be fine by here, drop the old exams collection
    m.Exam.objects.delete()
    for exam in processed_exams:
        exam.save()

    return errors
def parse_exam_schedule(exam_file_name):

    m.Exam.objects._collection.drop()

    exam_file = open(exam_file_name)

    for line in exam_file:
        index = 0
        tokens = re.split('\s+', line)

        # Get the course ID
        course_id = safe_list_get(tokens, 0) + safe_list_get(tokens, 1)
        course_id = course_id.lower()

        if not course_id:
            print "Skipping line '%s'" % ' '.join(tokens)
            continue

        # Get the sections
        # day_of_week_pattern = re.compile(
        index = 2
        section_string = ''
        while not is_day_of_week(safe_list_get(tokens, index)):
            section_string += safe_list_get(tokens, index) + ' '
            index += 1

        section_string = section_string.strip()

        # Get the date. Next 4 tokens is Tuesday December 11, 2012
        exam_date_string = ''
        for i in range(index, index + 4):
            exam_date_string += safe_list_get(tokens, i) + ' '

        index += 4

        start_date_string = (exam_date_string + safe_list_get(tokens, index) +
                             safe_list_get(tokens, index + 1))
        index += 2

        end_date_string = (exam_date_string + safe_list_get(tokens, index) +
                           safe_list_get(tokens, index + 1))
        index += 2

        # E.g. Tuesday December 11, 2012 7:30PM
        #      Tuesday December 11, 2012 10:00PM
        date_format = "%A %B %d, %Y %I:%M%p"

        # TODO(sandy): do timezones better
        try:
            start_date = rmc_util.eastern_to_utc(
                datetime.fromtimestamp(
                    mktime(time.strptime(start_date_string, date_format))))
            end_date = rmc_util.eastern_to_utc(
                datetime.fromtimestamp(
                    mktime(time.strptime(end_date_string, date_format))))
        except Exception as exp:
            print "Could not get date for line '%s'" % ' '.join(tokens)
            # Don't remmeber exactly what exception I was trying to catch...
            print exp
            start_date = None
            end_date = None

        # Get the location
        location = ''
        while index < len(tokens):
            location += tokens[index] + ' '
            index += 1

        location = location.strip()

        exam_slot = m.Exam()
        exam_slot.course_id = course_id
        exam_slot.sections = section_string
        exam_slot.start_date = start_date
        exam_slot.end_date = end_date
        exam_slot.location = location

        if (start_date and end_date):
            exam_slot.info_known = True
        else:
            exam_slot.info_known = False

        exam_slot.save()