def __init__(self, article): self.article = article self.name = None self.values = {} #nested templates; list bestehend aus: #{"start":startcursor, "end":endcursor, "template":Template()} self.subtemplates = [] #"anonyme" Werte in Vorlagen ({{Vorlage|Wert}}) self.anonymous = 0 #Setup State Machine self.fsm = StateMachine() self.fsm.addState("start", self.start_state) self.fsm.setStart("start") self.fsm.addState("name", self.name_state) self.fsm.addState("value", self.value_state) self.fsm.addState("end", None, end=True) self.fsm.addState("link", self.link_state) self.p_start = re.compile(r"\{\{") self.p_end = re.compile(r"\}\}") self.p_val = re.compile(r"\s*\|\s*([^=|}]*)\s*=?\s*([^|}]*)") self.p_linkstart = re.compile(r"\[\[") self.p_link = re.compile(r"\[\[(.*?)\]\]") self.p_slicer = re.compile(r"\|") #Marker für nächsten Abschnitt; dh Ende der Vorlage oder nächster Wert self.slicers = { self.p_end : "end", self.p_slicer : "value", self.p_start : "start", self.p_linkstart : "link", } self.fsm.run()
class Template: def __init__(self, article): self.article = article self.name = None self.values = {} #nested templates; list bestehend aus: #{"start":startcursor, "end":endcursor, "template":Template()} self.subtemplates = [] #"anonyme" Werte in Vorlagen ({{Vorlage|Wert}}) self.anonymous = 0 #Setup State Machine self.fsm = StateMachine() self.fsm.addState("start", self.start_state) self.fsm.setStart("start") self.fsm.addState("name", self.name_state) self.fsm.addState("value", self.value_state) self.fsm.addState("end", None, end=True) self.fsm.addState("link", self.link_state) self.p_start = re.compile(r"\{\{") self.p_end = re.compile(r"\}\}") self.p_val = re.compile(r"\s*\|\s*([^=|}]*)\s*=?\s*([^|}]*)") self.p_linkstart = re.compile(r"\[\[") self.p_link = re.compile(r"\[\[(.*?)\]\]") self.p_slicer = re.compile(r"\|") #Marker für nächsten Abschnitt; dh Ende der Vorlage oder nächster Wert self.slicers = { self.p_end : "end", self.p_slicer : "value", self.p_start : "start", self.p_linkstart : "link", } self.fsm.run() def link_state(self): print("cursor: "+str(self.article.cursor)) raise Exception("link state") """ State Machine Handlers """ """Start bzw. bisher keine Vorlage gefunden""" def start_state(self): start = self.p_start.search(self.article.line) if not start: try: self.article.__next__() except StopIteration: raise NoTemplate() return "start" cursor = { "line" : self.article.cursor["line"] } cursor["char"] = start.span()[1] + self.article.cursor["char"] self.article.cursor = cursor return "name" """Name der Vorlage""" def name_state(self): line = self.article.line newState = None #Hinteren Vorlagenkram abhacken startCursor = self.article.cursor for slicer in self.slicers: match = slicer.search(line) if not match: continue line = line[:match.span()[0]] self.article.cursor = match.span()[1] + startCursor["char"] newState = self.slicers[slicer] #TODO #if self.slicers[slicer] == "start": # raise Exception("template in template name: " + line.rstrip('\n')) line = line.strip() if line == "": return "name" self.name = line.strip() if newState: return newState #Nächsten Status in nächster Zeile suchen newState = None while True: try: line = self.article.__next__() except StopIteration: raise NoTemplate() span = [len(line)+1, len(line)+1] for slicer in self.slicers: match = slicer.search(line) if not match: continue if not match.span()[0] < span[0]: continue span = match.span() newState = self.slicers[slicer] #TODO #template name over multiple lines if newState: c = self.article.cursor["char"] + span[1] self.article.cursor = c return newState """Vorlageneintrag /-wert; sucht über mehrere Zeilen hinweg""" def value_state(self): #hinteren Kram abhacken; mehrere Zeilen zusammensammeln newState = "continue" value = "" while True: line = self.article.line span = None for slicer in self.slicers: match = slicer.search(line) if not match: continue newState = self.slicers[slicer] span = match.span() line = line[:span[0]] value += line #link parsen [[ ... ]] if newState is "link": cursor = self.article.cursor self.article.cursor = cursor["char"] + span[0] del(cursor) m = self.p_link.match(self.article.line) endCursor = None asString = None #Angetäuschtes [[ ohne Link if m is None: endCursor = self.article.cursor endCursor["char"] += 2 asString = "[[" #Echter Link else: endCursor = self.article.cursor endCursor["char"] += m.span()[1] asString = self.article.extract(self.article.cursor,endCursor) value += asString self.article.cursor = endCursor newState = "continue" continue #nested template elif newState is "start": cursor = self.article.cursor cursor["char"] += span[0] self.article.cursor = cursor template = Template(self.article) subt = {"startcursor" : cursor, "template" : template, "endcursor" : self.article.cursor} self.subtemplates.append(subt) asString = self.article.extract(cursor, subt["endcursor"]) value += asString self.article.cursor = subt["endcursor"] newState = "continue" continue #v.a. Cursor setzen elif newState is not "continue": self.article.cursor = span[1] + self.article.cursor["char"] break try: line = self.article.__next__() value += "\n" except StopIteration: raise NoTemplate() #value parsen split = value.split("=") if len(split) > 1: value = split[1] #mögliche weitere = in value abfangen for el in range(2, len(split)): value += "=" + split[el] key = split[0] if "{{" in key: raise Exception("template in key: "+key) self.values[key] = value.strip() #anonyme values else: key = 1 while True: if key in self.values: key += 1 else: break self.values[key] = split[0].strip() return newState