Ejemplo n.º 1
0
Archivo: tt.py Proyecto: mor1/uon
def scrape_timetable(doc):
    modules = []
    module = {}
    tables = doc.getiterator(scraper.tag("table"))
    for table in tables:
        attrs = set(table.items())
        if attrs == set([ ("width", "100%"), ("border", "0"), ]):
            ## denotes end of timetable block and possible start of new heading
            if len(module) > 0:
                modules.append(module)
                module = {}

            bolds = list(table.getiterator(scraper.tag("b")))
            if len(bolds) == 0: continue ## not a heading

            module_title = bolds[0].text
            if module_title.startswith("Module:"):
                (_, code, title) = module_title.split("  ")
                module['title'] = title
                module['code'] = code
            elif module_title.startswith("Programme:"):
                module['title'] = module_title.lstrip("Programme: ")
                module['code'] = ""
            else: continue

            module['acts'] = {}

        elif attrs == set([ ("cellspacing", "0"), ("cellpadding", "2%"), ("border", "1"), ]):
            ## timetable block
            rows = list(table.getiterator(scraper.tag("tr")))
            hrow = map(lambda e:spelling(e.text), rows[0].getchildren())
            for row in rows[1:]:
                activity = dict(zip(hrow, map(lambda c:c.text, row)))
                if activity['Name of Type'] not in activity_types: continue

                a = activity["Activity"]
                if a not in module['acts']:
                    module['acts'][a] = dict(map(lambda (k,v): (k,[v]), activity.items()))
                else:
                    for (k,v) in activity.items():
                        if v not in module['acts'][a][k]: module['acts'][a][k].append(v)

            for a in module['acts']:
                for k in module['acts'][a]:
                    if len(module['acts'][a][k]) == 1:
                        module['acts'][a][k] = module['acts'][a][k][0]
                        
        else: pass

    return modules
Ejemplo n.º 2
0
Archivo: tt.py Proyecto: mor1/uon
def scrape_module_details(doc):
    SKILLS = ('Intellectual Skills',
              'Professional Skills',
              'Transferable Skills',)
    details = {}
    outcomes = False
    ps = doc.getiterator(scraper.tag("p"))
    for p in ps:
        for c in p.getchildren():
            if c.tail and c.text:
                details[c.text.strip().strip(":")] = c.tail.strip()
            elif c.tail:
                cts = c.tail.strip()
                if not outcomes: continue
                
                if ("Knowledge and Understanding" in details
                    and not details['Knowledge and Understanding']):
                    if not cts.endswith("."): cts += "."
                    details['Knowledge and Understanding'] = cts
                elif cts.startswith("Knowledge and Understanding"):
                    details['Knowledge and Understanding'] = None

                else:
                    for k in SKILLS:
                        if k in details and len(details[k]) == 0:
                            details[k] = cts
                        elif cts.startswith(k):
                            details[k] = cts.lstrip("%s[.:] " % k)

            elif c.text:
                if c.text.strip().startswith("Learning Outcomes"):
                    outcomes = True

    for k in SKILLS:
        if k in details and not details[k].endswith("."): details[k] += "."
        elif k not in details: details[k] = '[No %s listed.]' % (k.lower(),)
    return details