def echo(*filenames): """ Parse and then print. """ for f in filenames: doc = read_xml(f) print ET.tostring(doc.getroot())
def _get_points(filename, node, the_class): xpath = "./div[@class='%s']" % the_class points = node.findall(xpath) if not points: return None points = points[0].findall(".//ul") assert len(points) == 1, \ 'No list of type %s in section' % the_class points = points[0] points.tail = '' return ET.tostring(points)
def _sections(filename, doc): for s in doc.findall(".//section"): title = _get_section_title(s) if title is None: continue title = ET.tostring(title) understand = _get_points(filename, s, 'understand') keypoints = _get_points(filename, s, 'keypoints') if (not understand) and (not keypoints): continue assert understand and keypoints, \ 'Section %s in %s has understanding/keypoints mis-match' % (title, filename) print ' <a href="%s#%s">%s</a>' % \ (filename, s.attrib.get('id'), title) print '<p><strong>Understand:</strong></p>%s\n<p><strong>Summary:</strong></p>%s' % \ (understand, keypoints)
def ideas(*filenames): """ Extract ideas from files and display in groups. """ all_ideas = {} for f in filenames: doc = read_xml(f) for section in doc.findall(".//div[@class='keypoints']"): for example in section.findall(".//li[@idea]"): ideas = example.attrib['idea'].split(';') del example.attrib['idea'] example.tag = 'a' example.attrib['href'] = '%s#%s' % (f, section.attrib.get('id')) example.tail = '' for i in ideas: if i not in all_ideas: all_ideas[i] = [] all_ideas[i].append(ET.tostring(example)) for idea in all_ideas: print '<h2>%s</h2>' % idea print '<ul>' for item in all_ideas[idea]: print ' <li>%s</li>' % item print '</ul>'
def _lecture(filename, doc): title = doc.findall(".//div[@class='title']")[0] title.text = title.text.strip() title.tag = 'h2' title.tail = '' print '<a href="%s">%s</a>' % (filename, ET.tostring(title))