def extract_revision_text(remark_el): remark_children = minidom_children(remark_el) if len(remark_children) == 1 and remark_children[0].tagName == "p": return minidom_get_text(remark_children[0]) if len(remark_children) == 0: return minidom_get_text(remark_el) or None if minidom_get_text(remark_el).strip(): return None lines = [] for child in remark_children: if child.tagName == "p": lines.append(minidom_get_text(child)) return "\n".join(lines)
def extract_revision_text(remark_el): remark_children = minidom_children(remark_el) if len(remark_children) == 1 and remark_children[0].tagName == "p": return minidom_get_text(remark_children[0]) if len(remark_children) == 0: return minidom_get_text(remark_el) or None if minidom_get_text(remark_el).strip(): return None lines = [] for child in remark_children: if child.tagName == "p": lines.append(minidom_get_text(child)) elif child.tagName == "ul": for ul_child in minidom_children(child): if ul_child.tagName == "li": lines.append("* {}".format(minidom_get_text(ul_child))) return "\n".join(lines)
def extract_xep_metadata(document): header = minidom_find_header(document) latest_revision = minidom_find_child(header, "revision") if latest_revision is not None: last_revision_version = minidom_get_text( minidom_find_child(latest_revision, "version")) last_revision_date = minidom_get_text( minidom_find_child(latest_revision, "date")) remark_el = minidom_find_child(latest_revision, "remark") last_revision_remark = None if remark_el is not None: last_revision_remark = extract_revision_text(remark_el) if last_revision_remark is not None: initials_el = minidom_find_child(latest_revision, "initials") last_revision_initials = initials_el and minidom_get_text( initials_el) else: last_revision_initials = None else: last_revision_version = None last_revision_date = None last_revision_remark = None last_revision_initials = None status = minidom_get_text(minidom_find_child(header, "status")) type_ = minidom_get_text(minidom_find_child(header, "type")) abstract = " ".join( minidom_get_text(minidom_find_child(header, "abstract")).split()) sig_el = minidom_find_child(header, "sig") if sig_el is None: sig = None else: sig = minidom_get_text(sig_el) shortname = minidom_get_text(minidom_find_child(header, "shortname")) if shortname.replace("-", " ").replace("_", " ").lower() in [ "not yet assigned", "n/a", "none", "to be assigned", "to be issued" ]: shortname = None title = minidom_get_text(minidom_find_child(header, "title")) approver_el = minidom_find_child(header, "approver") if approver_el is not None: approver = minidom_get_text(approver_el) else: approver = "Board" if type_ == "Procedural" else "Council" last_call_el = minidom_find_child(header, "lastcall") if last_call_el is not None: last_call = minidom_get_text(last_call_el) else: last_call = None return { "last_revision": { "version": last_revision_version, "date": last_revision_date, "initials": last_revision_initials, "remark": last_revision_remark, }, "status": status, "type": type_, "sig": sig, "abstract": abstract, "shortname": shortname, "title": title, "approver": approver, "last_call": last_call, }
def extract_xep_metadata(document): header = minidom_find_header(document) latest_revision = minidom_find_child(header, "revision") if latest_revision is not None: last_revision_version = minidom_get_text( minidom_find_child(latest_revision, "version") ) last_revision_date = minidom_get_text( minidom_find_child(latest_revision, "date") ) remark_el = minidom_find_child(latest_revision, "remark") last_revision_remark = None if remark_el is not None: last_revision_remark = extract_revision_text(remark_el) if last_revision_remark is not None: initials_el = minidom_find_child(latest_revision, "initials") last_revision_initials = initials_el and minidom_get_text( initials_el ) else: last_revision_initials = None else: last_revision_version = None last_revision_date = None last_revision_remark = None last_revision_initials = None status = minidom_get_text(minidom_find_child(header, "status")) type_ = minidom_get_text(minidom_find_child(header, "type")) abstract = " ".join(minidom_get_text( minidom_find_child(header, "abstract") ).split()) sig_el = minidom_find_child(header, "sig") if sig_el is None: sig = None else: sig = minidom_get_text(sig_el) shortname = minidom_get_text(minidom_find_child(header, "shortname")) if shortname.replace("-", " ").replace("_", " ").lower() in [ "not yet assigned", "n/a", "none", "to be assigned", "to be issued"]: shortname = None title = minidom_get_text(minidom_find_child(header, "title")) approver_el = minidom_find_child(header, "approver") if approver_el is not None: approver = minidom_get_text(approver_el) else: approver = "Board" if type_ == "Procedural" else "Council" last_call_el = minidom_find_child(header, "lastcall") if last_call_el is not None: last_call = minidom_get_text(last_call_el) else: last_call = None return { "last_revision": { "version": last_revision_version, "date": last_revision_date, "initials": last_revision_initials, "remark": last_revision_remark, }, "status": status, "type": type_, "sig": sig, "abstract": abstract, "shortname": shortname, "title": title, "approver": approver, "last_call": last_call, }