Exemple #1
0
Fichier : db.py Projet : mydos/plan
    def scrape(self):
        prefix = ntnu.prefix(self.semester)
        groups = {}

        courses = Course.objects.filter(semester=self.semester)
        courses = {c.code: c for c in courses}

        query = ('SELECT aktkode, studieprogramkode FROM '
                 '%s_akt_studieprogram') % prefix

        for row in fetch.sql('ntnu', query):
            groups.setdefault(row.aktkode, set()).add(row.studieprogramkode)

        query = ('SELECT emnekode, typenavn, dag, start, slutt, uke, romnr, '
                 'romnavn, larer, aktkode FROM %s_timeplan ORDER BY emnekode, '
                 'dag, start, slutt, uke, romnavn, aktkode') % prefix

        for row in fetch.sql('ntnu', query):
            code, version = ntnu.parse_course(row.emnekode)
            if not code:
                logging.warning('Skipped invalid course name: %s',
                                row.emnekode)
                continue
            elif code not in courses:
                logging.debug("Unknown course %s.", code)
                continue
            elif not self.should_proccess_course(code):
                continue

            yield {
                'course':
                courses[code],
                'type':
                row.typenavn,
                'day':
                utils.parse_day_of_week(row.dag),
                'start':
                utils.parse_time(row.start),
                'end':
                utils.parse_time(row.slutt),
                'weeks':
                utils.parse_weeks(row.uke),
                'rooms':
                zip(utils.split(row.romnr, '#'), utils.split(row.romnavn,
                                                             '#')),
                'lecturers':
                utils.split(row.larer, '#'),
                'groups':
                groups.get(row.aktkode, set())
            }
Exemple #2
0
def parse_row(tr, room_codes):
    data = {}
    for i, td in enumerate(tr.cssselect('td')):
        if i == 0:
            if td.attrib.get('colspan', 1) == '4':
                lecture_type = utils.clean_string(td.text_content())
                if lecture_type:
                    data['lecture_type'] = lecture_type
            else:
                time = td.cssselect('b')[0].text_content().strip()
                raw_day, period = time.split(' ', 1)
                raw_start, raw_end = period.split('-')

                data['day'] = utils.parse_day_of_week(raw_day)
                data['start'] = utils.parse_time(raw_start)
                data['end'] = utils.parse_time(raw_end)

                match = re.match('.*Uke: (.+)', td.text_content())
                data['weeks'] = utils.parse_weeks(match.group(1))
        elif i == 1 and len(td.cssselect('a')) > 0:
            if len(td.cssselect('a')) > 1:
                logging.warning('Multiple rooms links found, simply '
                                'using first one.')

            a = td.cssselect('a')[0]
            rooms = [a.text] + [e.tail for e in a]

            data['rooms'] = []
            for name in utils.clean_list(rooms, utils.clean_string):
                if name not in room_codes:
                    data['rooms'].append((None, name))
                    continue

                if len(room_codes[name]) > 1:
                    logging.warning(
                        'Multiple rooms with name %s, '
                        'simply using first code.', name)
                data['rooms'].append((room_codes[name][0], name))
        elif i == 2:
            data['lecturers'] = [td.text] + [e.tail for e in td]
        elif i == 3:
            data['groups'] = [g.text_content() for g in td.cssselect('span')]

    return data
Exemple #3
0
def parse_row(tr, room_codes):
    data = {}
    for i, td in enumerate(tr.cssselect('td')):
        if i == 0:
            if td.attrib.get('colspan', 1) == '4':
                lecture_type = utils.clean_string(td.text_content())
                if lecture_type:
                    data['lecture_type'] = lecture_type
            else:
                time = td.cssselect('b')[0].text_content().strip()
                raw_day, period = time.split(' ', 1)
                raw_start, raw_end = period.split('-')

                data['day'] = utils.parse_day_of_week(raw_day)
                data['start'] = utils.parse_time(raw_start)
                data['end'] = utils.parse_time(raw_end)

                match = re.match('.*Uke: (.+)', td.text_content())
                data['weeks'] = utils.parse_weeks(match.group(1))
        elif i == 1 and len(td.cssselect('a')) > 0:
            if len(td.cssselect('a')) > 1:
                logging.warning('Multiple rooms links found, simply '
                                'using first one.')

            a = td.cssselect('a')[0]
            rooms = [a.text] + [e.tail for e in a]

            data['rooms'] = []
            for name in utils.clean_list(rooms, utils.clean_string):
                if name not in room_codes:
                    data['rooms'].append((None, name))
                    continue

                if len(room_codes[name]) > 1:
                    logging.warning('Multiple rooms with name %s, '
                                    'simply using first code.', name)
                data['rooms'].append((room_codes[name][0], name))
        elif i == 2:
            data['lecturers'] = [td.text] + [e.tail for e in td]
        elif i == 3:
            data['groups'] = [g.text_content() for g in td.cssselect('span')]

    return data
Exemple #4
0
    def scrape(self):
        prefix = ntnu.prefix(self.semester)
        groups = {}

        courses = Course.objects.filter(semester=self.semester)
        courses = {c.code: c for c in courses}

        query = ("SELECT aktkode, studieprogramkode FROM " "%s_akt_studieprogram") % prefix

        for row in fetch.sql("ntnu", query):
            groups.setdefault(row.aktkode, set()).add(row.studieprogramkode)

        query = (
            "SELECT emnekode, typenavn, dag, start, slutt, uke, romnr, "
            "romnavn, larer, aktkode FROM %s_timeplan ORDER BY emnekode, "
            "dag, start, slutt, uke, romnavn, aktkode"
        ) % prefix

        for row in fetch.sql("ntnu", query):
            code, version = ntnu.parse_course(row.emnekode)
            if not code:
                logging.warning("Skipped invalid course name: %s", row.emnekode)
                continue
            elif code not in courses:
                logging.debug("Unknown course %s.", code)
                continue
            elif not self.should_proccess_course(code):
                continue

            yield {
                "course": courses[code],
                "type": row.typenavn,
                "day": utils.parse_day_of_week(row.dag),
                "start": utils.parse_time(row.start),
                "end": utils.parse_time(row.slutt),
                "weeks": utils.parse_weeks(row.uke),
                "rooms": zip(utils.split(row.romnr, "#"), utils.split(row.romnavn, "#")),
                "lecturers": utils.split(row.larer, "#"),
                "groups": groups.get(row.aktkode, set()),
            }
Exemple #5
0
    def scrape(self):
        prefix = ntnu.prefix(self.semester)
        groups = {}

        courses = Course.objects.filter(semester=self.semester)
        courses = dict((c.code, c) for c in courses)

        query = ('SELECT aktkode, studieprogramkode FROM '
                 '%s_akt_studieprogram') % prefix

        for row in fetch.sql('ntnu', query):
            groups.setdefault(row.aktkode, set()).add(row.studieprogramkode)

        query = ('SELECT emnekode, typenavn, dag, start, slutt, uke, romnr, '
                 'romnavn, larer, aktkode FROM %s_timeplan ORDER BY emnekode, '
                 'dag, start, slutt, uke, romnavn, aktkode') % prefix

        for row in fetch.sql('ntnu', query):
            code, version = ntnu.parse_course(row.emnekode)
            if not code:
                logging.warning('Skipped invalid course name: %s', row.emnekode)
                continue
            elif code not in courses:
                logging.debug("Unknown course %s.", code)
                continue

            yield {'course': courses[code],
                   'type': row.typenavn,
                   'day':  utils.parse_day_of_week(row.dag),
                   'start': utils.parse_time(row.start),
                   'end':  utils.parse_time(row.slutt),
                   'weeks': utils.parse_weeks(row.uke),
                   'rooms': zip(utils.split(row.romnr, '#'),
                                utils.split(row.romnavn, '#')),
                   'lecturers': utils.split(row.larer, '#'),
                   'groups': groups.get(row.aktkode, set())}