def prepare_data(self, data): if not data['course'] or not data['start'] or not data['end']: return elif data['day'] not in dict(Lecture.DAYS): return data['lecturers'] = utils.clean_list(data['lecturers'], utils.clean_string) data['groups'] = utils.clean_list(data['groups'], utils.clean_string) rooms, data['rooms'] = data['rooms'][:], [] for code, name in rooms: code = utils.clean_string(code) name = utils.clean_string(name) if code or name: data['rooms'].append(self.room(code, name)) data['type'] = self.lecture_type(data['type']) data['lecturers'] = [self.lecturer(l) for l in data['lecturers']] data['groups'] = [self.group(g) for g in data['groups']] if not data['groups']: data['groups'] = [self.group(Group.DEFAULT)] return data
def parse_row(tr, room_codes): data = {} for i, td in enumerate(tr.cssselect('td')): if i == 0: if td.attrib.get('colspan', 1) == '4': lecture_type = utils.clean_string(td.text_content()) if lecture_type: data['lecture_type'] = lecture_type else: time = td.cssselect('b')[0].text_content().strip() raw_day, period = time.split(' ', 1) raw_start, raw_end = period.split('-') data['day'] = utils.parse_day_of_week(raw_day) data['start'] = utils.parse_time(raw_start) data['end'] = utils.parse_time(raw_end) match = re.match('.*Uke: (.+)', td.text_content()) data['weeks'] = utils.parse_weeks(match.group(1)) elif i == 1 and len(td.cssselect('a')) > 0: if len(td.cssselect('a')) > 1: logging.warning('Multiple rooms links found, simply ' 'using first one.') a = td.cssselect('a')[0] rooms = [a.text] + [e.tail for e in a] data['rooms'] = [] for name in utils.clean_list(rooms, utils.clean_string): if name not in room_codes: data['rooms'].append((None, name)) continue if len(room_codes[name]) > 1: logging.warning( 'Multiple rooms with name %s, ' 'simply using first code.', name) data['rooms'].append((room_codes[name][0], name)) elif i == 2: data['lecturers'] = [td.text] + [e.tail for e in td] elif i == 3: data['groups'] = [g.text_content() for g in td.cssselect('span')] return data
def parse_row(tr, room_codes): data = {} for i, td in enumerate(tr.cssselect('td')): if i == 0: if td.attrib.get('colspan', 1) == '4': lecture_type = utils.clean_string(td.text_content()) if lecture_type: data['lecture_type'] = lecture_type else: time = td.cssselect('b')[0].text_content().strip() raw_day, period = time.split(' ', 1) raw_start, raw_end = period.split('-') data['day'] = utils.parse_day_of_week(raw_day) data['start'] = utils.parse_time(raw_start) data['end'] = utils.parse_time(raw_end) match = re.match('.*Uke: (.+)', td.text_content()) data['weeks'] = utils.parse_weeks(match.group(1)) elif i == 1 and len(td.cssselect('a')) > 0: if len(td.cssselect('a')) > 1: logging.warning('Multiple rooms links found, simply ' 'using first one.') a = td.cssselect('a')[0] rooms = [a.text] + [e.tail for e in a] data['rooms'] = [] for name in utils.clean_list(rooms, utils.clean_string): if name not in room_codes: data['rooms'].append((None, name)) continue if len(room_codes[name]) > 1: logging.warning('Multiple rooms with name %s, ' 'simply using first code.', name) data['rooms'].append((room_codes[name][0], name)) elif i == 2: data['lecturers'] = [td.text] + [e.tail for e in td] elif i == 3: data['groups'] = [g.text_content() for g in td.cssselect('span')] return data