def scrape(self): prefix = ntnu.prefix(self.semester) groups = {} courses = Course.objects.filter(semester=self.semester) courses = {c.code: c for c in courses} query = ('SELECT aktkode, studieprogramkode FROM ' '%s_akt_studieprogram') % prefix for row in fetch.sql('ntnu', query): groups.setdefault(row.aktkode, set()).add(row.studieprogramkode) query = ('SELECT emnekode, typenavn, dag, start, slutt, uke, romnr, ' 'romnavn, larer, aktkode FROM %s_timeplan ORDER BY emnekode, ' 'dag, start, slutt, uke, romnavn, aktkode') % prefix for row in fetch.sql('ntnu', query): code, version = ntnu.parse_course(row.emnekode) if not code: logging.warning('Skipped invalid course name: %s', row.emnekode) continue elif code not in courses: logging.debug("Unknown course %s.", code) continue elif not self.should_proccess_course(code): continue yield { 'course': courses[code], 'type': row.typenavn, 'day': utils.parse_day_of_week(row.dag), 'start': utils.parse_time(row.start), 'end': utils.parse_time(row.slutt), 'weeks': utils.parse_weeks(row.uke), 'rooms': zip(utils.split(row.romnr, '#'), utils.split(row.romnavn, '#')), 'lecturers': utils.split(row.larer, '#'), 'groups': groups.get(row.aktkode, set()) }
def parse_row(tr, room_codes): data = {} for i, td in enumerate(tr.cssselect('td')): if i == 0: if td.attrib.get('colspan', 1) == '4': lecture_type = utils.clean_string(td.text_content()) if lecture_type: data['lecture_type'] = lecture_type else: time = td.cssselect('b')[0].text_content().strip() raw_day, period = time.split(' ', 1) raw_start, raw_end = period.split('-') data['day'] = utils.parse_day_of_week(raw_day) data['start'] = utils.parse_time(raw_start) data['end'] = utils.parse_time(raw_end) match = re.match('.*Uke: (.+)', td.text_content()) data['weeks'] = utils.parse_weeks(match.group(1)) elif i == 1 and len(td.cssselect('a')) > 0: if len(td.cssselect('a')) > 1: logging.warning('Multiple rooms links found, simply ' 'using first one.') a = td.cssselect('a')[0] rooms = [a.text] + [e.tail for e in a] data['rooms'] = [] for name in utils.clean_list(rooms, utils.clean_string): if name not in room_codes: data['rooms'].append((None, name)) continue if len(room_codes[name]) > 1: logging.warning( 'Multiple rooms with name %s, ' 'simply using first code.', name) data['rooms'].append((room_codes[name][0], name)) elif i == 2: data['lecturers'] = [td.text] + [e.tail for e in td] elif i == 3: data['groups'] = [g.text_content() for g in td.cssselect('span')] return data
def parse_row(tr, room_codes): data = {} for i, td in enumerate(tr.cssselect('td')): if i == 0: if td.attrib.get('colspan', 1) == '4': lecture_type = utils.clean_string(td.text_content()) if lecture_type: data['lecture_type'] = lecture_type else: time = td.cssselect('b')[0].text_content().strip() raw_day, period = time.split(' ', 1) raw_start, raw_end = period.split('-') data['day'] = utils.parse_day_of_week(raw_day) data['start'] = utils.parse_time(raw_start) data['end'] = utils.parse_time(raw_end) match = re.match('.*Uke: (.+)', td.text_content()) data['weeks'] = utils.parse_weeks(match.group(1)) elif i == 1 and len(td.cssselect('a')) > 0: if len(td.cssselect('a')) > 1: logging.warning('Multiple rooms links found, simply ' 'using first one.') a = td.cssselect('a')[0] rooms = [a.text] + [e.tail for e in a] data['rooms'] = [] for name in utils.clean_list(rooms, utils.clean_string): if name not in room_codes: data['rooms'].append((None, name)) continue if len(room_codes[name]) > 1: logging.warning('Multiple rooms with name %s, ' 'simply using first code.', name) data['rooms'].append((room_codes[name][0], name)) elif i == 2: data['lecturers'] = [td.text] + [e.tail for e in td] elif i == 3: data['groups'] = [g.text_content() for g in td.cssselect('span')] return data
def scrape(self): prefix = ntnu.prefix(self.semester) groups = {} courses = Course.objects.filter(semester=self.semester) courses = {c.code: c for c in courses} query = ("SELECT aktkode, studieprogramkode FROM " "%s_akt_studieprogram") % prefix for row in fetch.sql("ntnu", query): groups.setdefault(row.aktkode, set()).add(row.studieprogramkode) query = ( "SELECT emnekode, typenavn, dag, start, slutt, uke, romnr, " "romnavn, larer, aktkode FROM %s_timeplan ORDER BY emnekode, " "dag, start, slutt, uke, romnavn, aktkode" ) % prefix for row in fetch.sql("ntnu", query): code, version = ntnu.parse_course(row.emnekode) if not code: logging.warning("Skipped invalid course name: %s", row.emnekode) continue elif code not in courses: logging.debug("Unknown course %s.", code) continue elif not self.should_proccess_course(code): continue yield { "course": courses[code], "type": row.typenavn, "day": utils.parse_day_of_week(row.dag), "start": utils.parse_time(row.start), "end": utils.parse_time(row.slutt), "weeks": utils.parse_weeks(row.uke), "rooms": zip(utils.split(row.romnr, "#"), utils.split(row.romnavn, "#")), "lecturers": utils.split(row.larer, "#"), "groups": groups.get(row.aktkode, set()), }
def scrape(self): prefix = ntnu.prefix(self.semester) groups = {} courses = Course.objects.filter(semester=self.semester) courses = dict((c.code, c) for c in courses) query = ('SELECT aktkode, studieprogramkode FROM ' '%s_akt_studieprogram') % prefix for row in fetch.sql('ntnu', query): groups.setdefault(row.aktkode, set()).add(row.studieprogramkode) query = ('SELECT emnekode, typenavn, dag, start, slutt, uke, romnr, ' 'romnavn, larer, aktkode FROM %s_timeplan ORDER BY emnekode, ' 'dag, start, slutt, uke, romnavn, aktkode') % prefix for row in fetch.sql('ntnu', query): code, version = ntnu.parse_course(row.emnekode) if not code: logging.warning('Skipped invalid course name: %s', row.emnekode) continue elif code not in courses: logging.debug("Unknown course %s.", code) continue yield {'course': courses[code], 'type': row.typenavn, 'day': utils.parse_day_of_week(row.dag), 'start': utils.parse_time(row.start), 'end': utils.parse_time(row.slutt), 'weeks': utils.parse_weeks(row.uke), 'rooms': zip(utils.split(row.romnr, '#'), utils.split(row.romnavn, '#')), 'lecturers': utils.split(row.larer, '#'), 'groups': groups.get(row.aktkode, set())}