Beispiel #1
0
def timestamp_feature(catalog, soup):
    """The datetime the xml file was last modified.
    """
    # there's really no "time created", we're using the date the courses are listed for...
    epoch = 1318790434
    catalog.timestamp = int(float(soup.title.text)) + epoch
    catalog.datetime = datetime.datetime.fromtimestamp(catalog.timestamp)
    logger.info('Catalog last updated on %s' % catalog.datetime)
Beispiel #2
0
def course_feature(catalog, soup):
    """Parses all the courses (AKA, the most important part).
    """
    courses = {}
    course_crns = {}
    for course in soup.findAll('course'):
        c = Course.from_soup_tag(course)
        courses[str(c)] = c
    catalog.courses = courses
    catalog.courses
    logger.info('Catalog has %d courses' % len(courses))
Beispiel #3
0
def course_feature(catalog, soup):
    """Parses all the courses (AKA, the most important part).
    """
    courses = {}
    count = 0
    for course_data in parse_tables(soup):
        c = create_course(course_data)
        count += 1
        courses[str(c)] = c
    catalog.courses = FrozenDict(courses)
    logger.info('Catalog has %d courses (manual: %d)' % (len(courses), count))
Beispiel #4
0
def semester_feature(catalog, soup):
    """The year and semester information that this xml file hold courses for.
    """
    catalog.name = soup.find('h3').text.strip()
    raw = soup.find('h3').text.split(' Session ')
    catalog.year = int(raw[1])

    month_mapping = {'Spring': 1, 'Summer': 5, 'Fall': 9}
    catalog.semester = raw[0]
    catalog.month = month_mapping[raw[0]]

    logger.info('Catalog type: %s' % catalog.name)
Beispiel #5
0
def semester_feature(catalog, soup):
    """The year and semester information that this xml file hold courses for.
    """
    raw = soup.coursedb['semesternumber']
    catalog.year = int(raw[:4])

    month_mapping = {1: 'Spring', 5: 'Summer', 9: 'Fall'}
    catalog.month = int(raw[4:])
    catalog.semester = month_mapping[catalog.month]

    catalog.name = soup.coursedb['semesterdesc']

    logger.info('Catalog type: %s' % catalog.name)
Beispiel #6
0
def crosslisting_feature(catalog, soup):
    """Parses all the crosslistings. These refer to the similar CRNs,
    such as a grad & undergrad level course.
    """
    listing = {}
    for elem in soup.coursedb.findAll('crosslisting'):
        seats = int(elem['seats'])
        crns = [safeInt(crn.string) for crn in elem.findAll('crn')]

        # we want to refer to the same object to save space
        cl = CrossListing(crns, seats)
        for crn in crns:
            listing[crn] = cl
    catalog.crosslistings = FrozenDict(listing)

    logger.info('Catalog has %d course crosslistings' % len(catalog.crosslistings))
Beispiel #7
0
def semester_feature(catalog, soup):
    """The year and semester information that this xml file hold courses for.
    """
    raw = _text(soup.findAll('h3')).split('\n')[1]
    match = RE_SEMESTER_RANGE.match(raw)
    catalog.year = int(match.group('year'))

    #month_mapping = {'Spring': 1, 'Summer': 5, 'Fall': 9}
    month_mapping = {'january': 1, 'may': 5, 'august': 9}
    catalog.month = month_mapping[match.group('start_month').lower()]

    if catalog.url:
        match = RE_SEMESTER_URL.match(catalog.url)
        if match:
            catalog.year = int(match.group('year'))
            catalog.month = int(match.group('month'))

    semester_mapping = {1: 'Spring', 5: 'Summer', 9: 'Fall'}
    catalog.semester = semester_mapping[catalog.month]
    catalog.name = '%s %d' % (catalog.semester, catalog.year)
    logger.info('Catalog type: %s' % catalog.name)
Beispiel #8
0
def timestamp_feature(catalog, soup):
    """The datetime the xml file was last modified.
    """
    catalog.timestamp = int(soup.coursedb['timestamp'])
    catalog.datetime = datetime.datetime.fromtimestamp(catalog.timestamp)
    logger.info('Catalog last updated on %s' % catalog.datetime)