def transform_header(self, obj): check(obj, "content", -1, "type") == "template" check(obj, "content", -1, "name") == "anker" heading = text_rstrip(obj["content"][:-1]) anchor = obj["content"][-1]["params"]["1"] return merge(obj, {"content": heading, "anchor": anchor})
def transform_dict(self, obj): check(obj, "type") == "element" check(obj, "name") == "table" content = obj["children"] if lookup(content, 0, "name") == "tbody": content = content[0]["children"] return {"type": "table", "content": self(content)}
def transform_dict(self, obj): check(obj, "type") == "element" check(obj, "name").of(("ul", "ol")) items = [{ "type": "listitem", "content": self(li["children"]) } for li in obj["children"]] return { "type": "list", "ordered": obj["name"] == "ol", "items": items }
def transform_dict(self, obj): check(obj, "type") == "element" check(obj, "name") == "dl" items = [{ "type": "definitionlistitem", "definition": self(dt["children"]), "explanation": self(dd["children"]) } for dt, dd in zip(obj["children"][::2], obj["children"][1::2])] if not items: message = "A definition list must not be empty!" log_parser_error(message, obj, position=self.current_section) return {"type": "error", "message": message} return {"type": "definitionlist", "items": items}
def transform_element(self, obj): if lookup(obj, "attrs", "about") in self._template_ids: return None check(obj, "attrs", "typeof").of([ "mw:Transclusion", "mw:Transclusion mw:Video/Thumb", "mw:Transclusion mw:Image" ]) template = json.loads(obj["attrs"]["data-mw"])["parts"][0] try: template = template["template"] except (TypeError, KeyError): return { "type": "error", "message": "Template spans over several HTML elements." } name = template["target"]["wt"].strip() # labeled section transclusion needs unchanged case. if not name.startswith("#lst:"): name = name.lower() if name != "(!": # Template includes a table afterwards self._template_ids.add(obj["attrs"]["about"]) name = remove_prefix(name, ":mathe für nicht-freaks: vorlage:") params = template["params"] params = {k: v["wt"] for k, v in params.items()} params = {key: self.parse_parameter_value(name, key, value) \ for key, value in params.items() if not params.get(key + "-noprint", False)} # TODO: Find better solution if params.get("noprint", False): return None return {"type": "template", "name": name, "params": params}
def transform_dict(self, obj): check(obj, "type") == "element" check(obj, "name").of(("figure", "span", "figure-inline")) check(obj, "attrs", "typeof").of(("mw:Image", "mw:Image/Thumb")) caption = [ child for child in obj["children"] if child["name"] == "figcaption" ] try: caption = caption[0]["children"] except IndexError: caption = [] img = obj["children"][0]["children"][0] name = canonical_image_name(img["attrs"]["resource"]) license = self.api.get_image_license(name) return { "type": "image", "caption": self(caption), "name": name, "thumbnail": obj["attrs"]["typeof"] == "mw:Image/Thumb", "inline": obj["name"] in ("span", "figure-inline"), "license": license, "noprint": "noprint" in obj["attrs"].get("class", "") }
def transform_dict(self, obj): check(obj, "type") == "element" check(obj, "name") == "span" check(obj, "attrs", "typeof") == "mw:Image" message = "Inline images are not allowed" log_parser_error(message, obj, position=self.current_section) return {"type": "error", "message": message}
def transform_dict(self, obj): check(obj, "type") == "element" check(obj, "name") == "ul" check(obj, "attrs", "typeof") == "mw:Extension/gallery" data_mw = json.loads(obj["attrs"]["data-mw"]) spec = data_mw["body"]["extsrc"].strip() items = [self.parse_gallery_item(x) for x in spec.splitlines()] return { "type": "gallery", "widths": int(data_mw["attrs"].get("widths", 120)), "heights": int(data_mw["attrs"].get("heights", 120)), "items": items }
def transform_dict(self, obj): check(obj, "type").of(DEFAULT_VALUES) return merge(DEFAULT_VALUES[obj["type"]], super(NodeTransformation, self).act_on_dict(obj))
def transform_dict(self, obj): check(obj, "attrs", "typeof") == "mw:Extension/math" formula = json.loads(obj["attrs"]["data-mw"])["body"]["extsrc"] return {"type": "inlinemath", "formula": formula.strip()}
def transform_dict(self, obj): check(obj, "type") == "element" check(obj, "name").of(("span", )) check(obj, "attrs", "typeof") == "mw:FallbackId" return None