def parse_ethnicity(wikicode: mwp.wikicode.Wikicode): wikicode = parse_markup(wikicode) wikicode_elements = wikicode.filter(forcetype=(Template, Wikilink)) if " and " in wikicode: wikicode.replace(" and ", ", ") if not wikicode_elements: ethnicity = wikicode.strip_code().strip().title() else: for wc in wikicode_elements: if isinstance(wc, Wikilink): wikicode.replace(wc, wc.title.strip_code().strip()) elif isinstance(wc, Template): params = tuple( p for p in ( p.name.strip_code() if p.showkey is True else p.value.strip_code() for p in wc.params ) if p != "cat" ) if len(params) == 1: wikicode.replace(wc, params[0]) elif "Noble" in params or "noble" in params: wikicode.replace(wc, "Noble") else: wikicode.replace(wc, params) ethnicity = wikicode.strip_code().strip().title() if ethnicity == "Skaa, Noble": ethnicity = "Half-Skaa" return ethnicity
def parse_residence(wikicode: mwp.wikicode.Wikicode): wikicode = parse_markup(wikicode) wikicode_elements = wikicode.filter(forcetype=(Template, Wikilink)) if "<br>" in wikicode: wikicode.replace("<br>", ", ") if not wikicode_elements: residence = wikicode.strip_code().strip() else: for wc in wikicode_elements: if isinstance(wc, Wikilink): wikicode.replace(wc, wc.title.strip_code().strip().title()) elif isinstance(wc, Template): params = tuple( p for p in ( p.name.strip_code() if p.showkey is True else p.value.strip_code() for p in wc.params ) if p != "cat" ) if len(params) == 1: wikicode.replace(wc, params[0]) else: wikicode.replace(wc, params) residence = wikicode.strip_code().strip() residence = re.sub(r"\s?\([\w\s]+\)", "", residence) # special cases if residence.startswith("15 Stranat Place"): residence = "Elendel" return residence
def naiveStrip(self, wikiCode:mwparserfromhell.wikicode.Wikicode): """removes the code delimiter, keeps the text. Accepts pages and sections in Wikicode format Arguments: wikiCode {mwparserfromhell.wikicode.Wikicode} -- wikicode for a page or a section. """ return wikiCode.strip_code()
def parse_names(wikicode: mwp.wikicode.Wikicode): wikicode = parse_markup(wikicode) for t in wikicode.filter(forcetype=Template): if len(t.params) == 1: wikicode.replace(t, t.params[0]) elif len(t.params) > 1: if "highprince" in t.params[0].lower(): wikicode.replace(t, "{0} of {1}".format(*t.params)) elif "army" in t.params[0].lower(): wikicode.replace(t, "{1} {0}".format(*t.params)) return [ n for n in (n.strip() for n in wikicode.strip_code().split(",")) if n ]
def parse_family(wikicode: mwp.wikicode.Wikicode): wikicode = parse_markup(wikicode) links = [ link for link in wikicode.filter_wikilinks() if "category" not in link.lower() ] if not links: print("unable to find family", wikicode) return None elif len(links) == 1: family = links[0].title.strip_code().strip() else: print("unexpected number of wikilinks", wikicode) for wc in links: wikicode.replace(wc, wc.title.strip_code().strip()) family = wikicode.strip_code().strip() return family
def parse_species(wikicode: mwp.wikicode.Wikicode): spec = species.get(wikicode.strip_code().lower().strip(), None) if spec is not None and any(s in val.lower() for s in ("spren", "cryptic")): char_info["subspecies"] = spec spec = "Spren" return spec
def parse_nation(wikicode: mwp.wikicode.Wikicode): return demonyms.get(wikicode.strip_code().lower().strip(), None)
def parse_profession(wikicode: mwp.wikicode.Wikicode): # todo: parse with simple NLP return str(wikicode.strip_code().lower().strip())