def timestamp_feature(catalog, soup): """The datetime the xml file was last modified. """ # there's really no "time created", we're using the date the courses are listed for... epoch = 1318790434 catalog.timestamp = int(float(soup.title.text)) + epoch catalog.datetime = datetime.datetime.fromtimestamp(catalog.timestamp) logger.info('Catalog last updated on %s' % catalog.datetime)
def course_feature(catalog, soup): """Parses all the courses (AKA, the most important part). """ courses = {} course_crns = {} for course in soup.findAll('course'): c = Course.from_soup_tag(course) courses[str(c)] = c catalog.courses = courses catalog.courses logger.info('Catalog has %d courses' % len(courses))
def course_feature(catalog, soup): """Parses all the courses (AKA, the most important part). """ courses = {} count = 0 for course_data in parse_tables(soup): c = create_course(course_data) count += 1 courses[str(c)] = c catalog.courses = FrozenDict(courses) logger.info('Catalog has %d courses (manual: %d)' % (len(courses), count))
def semester_feature(catalog, soup): """The year and semester information that this xml file hold courses for. """ catalog.name = soup.find('h3').text.strip() raw = soup.find('h3').text.split(' Session ') catalog.year = int(raw[1]) month_mapping = {'Spring': 1, 'Summer': 5, 'Fall': 9} catalog.semester = raw[0] catalog.month = month_mapping[raw[0]] logger.info('Catalog type: %s' % catalog.name)
def semester_feature(catalog, soup): """The year and semester information that this xml file hold courses for. """ raw = soup.coursedb['semesternumber'] catalog.year = int(raw[:4]) month_mapping = {1: 'Spring', 5: 'Summer', 9: 'Fall'} catalog.month = int(raw[4:]) catalog.semester = month_mapping[catalog.month] catalog.name = soup.coursedb['semesterdesc'] logger.info('Catalog type: %s' % catalog.name)
def crosslisting_feature(catalog, soup): """Parses all the crosslistings. These refer to the similar CRNs, such as a grad & undergrad level course. """ listing = {} for elem in soup.coursedb.findAll('crosslisting'): seats = int(elem['seats']) crns = [safeInt(crn.string) for crn in elem.findAll('crn')] # we want to refer to the same object to save space cl = CrossListing(crns, seats) for crn in crns: listing[crn] = cl catalog.crosslistings = FrozenDict(listing) logger.info('Catalog has %d course crosslistings' % len(catalog.crosslistings))
def semester_feature(catalog, soup): """The year and semester information that this xml file hold courses for. """ raw = _text(soup.findAll('h3')).split('\n')[1] match = RE_SEMESTER_RANGE.match(raw) catalog.year = int(match.group('year')) #month_mapping = {'Spring': 1, 'Summer': 5, 'Fall': 9} month_mapping = {'january': 1, 'may': 5, 'august': 9} catalog.month = month_mapping[match.group('start_month').lower()] if catalog.url: match = RE_SEMESTER_URL.match(catalog.url) if match: catalog.year = int(match.group('year')) catalog.month = int(match.group('month')) semester_mapping = {1: 'Spring', 5: 'Summer', 9: 'Fall'} catalog.semester = semester_mapping[catalog.month] catalog.name = '%s %d' % (catalog.semester, catalog.year) logger.info('Catalog type: %s' % catalog.name)
def timestamp_feature(catalog, soup): """The datetime the xml file was last modified. """ catalog.timestamp = int(soup.coursedb['timestamp']) catalog.datetime = datetime.datetime.fromtimestamp(catalog.timestamp) logger.info('Catalog last updated on %s' % catalog.datetime)