def decode_pred(elem): if elem.tag == 'gpred': return Pred.grammarpred(elem.text) elif elem.tag == 'realpred': return Pred.realpred(elem.get('lemma'), elem.get('pos'), elem.get('sense'))
def _decode_pred(elem): # <!ELEMENT realpred EMPTY> # <!ATTLIST realpred # lemma CDATA #REQUIRED # pos (v|n|j|r|p|q|c|x|u|a|s) #REQUIRED # sense CDATA #IMPLIED > # <!ELEMENT gpred (#PCDATA)> if elem.tag == 'gpred': return Pred.grammarpred(elem.text) elif elem.tag == 'realpred': return Pred.realpred(elem.get('lemma'), elem.get('pos'), elem.get('sense'))
def strip_category(cat): if cat.endswith("u_unknown"): lemma, pos_and_sense = cat.rsplit("/", 1) pos_part, sense_part = pos_and_sense.split("_", 1) lemma_part = "X" else: pred_obj = Pred.stringpred(cat) lemma_part = "X" if cat.startswith("_") else pred_obj.lemma pos_part = str(pred_obj.pos) sense_part = str(pred_obj.sense) return lemma_part + "_" + pos_part + "_" + sense_part
def _read_node(tokens): if not tokens or tokens[0][0] not in {'string', 'symbol', 'nodeid'}: return None # A node can be a pred, a nodeid, or both (in that order). This # means two 'if's, not 'if-else'. mtype, mtext = tokens.popleft() pred = nodeid = None if mtype in ('string', 'symbol'): if mtext == TOP or mtext == STAR: pred = mtext else: pred = Pred.stringpred(mtext) if tokens and tokens[0][0] == 'nodeid': mtype, mtext = tokens.popleft() if mtype == 'nodeid': nodeid = int(mtext[1:]) # get rid of the initial # character context = _read_context(tokens) links = _read_links(tokens) return XmrsPathNode(nodeid, pred, context=context, links=links)
def _read_node(tokens): if not tokens or tokens[0][0] not in {'string', 'symbol', 'nodeid'}: return None # A node can be a pred, a nodeid, or both (in that order). This # means two 'if's, not 'if-else'. mtype, mtext = tokens.popleft() pred = nodeid = None if mtype in ('string', 'symbol'): if mtext == TOP or mtext == STAR: pred = mtext else: pred = Pred.stringpred(mtext) if tokens and tokens[0][0] == 'nodeid': mtype, mtext = tokens.popleft() if mtype == 'nodeid': nodeid = int(mtext[1:]) # get rid of the initial # character context = _read_context(tokens) links = _read_links(tokens) return XmrsPathNode( nodeid, pred, context=context, links=links )