Beispiel #1
0
def decode_pred(elem):
    if elem.tag == 'gpred':
        return Pred.grammarpred(elem.text)
    elif elem.tag == 'realpred':
        return Pred.realpred(elem.get('lemma'),
                             elem.get('pos'),
                             elem.get('sense'))
Beispiel #2
0
def _decode_pred(elem):
    # <!ELEMENT realpred EMPTY>
    # <!ATTLIST realpred
    #           lemma CDATA #REQUIRED
    #           pos (v|n|j|r|p|q|c|x|u|a|s) #REQUIRED
    #           sense CDATA #IMPLIED >
    # <!ELEMENT gpred (#PCDATA)>
    if elem.tag == 'gpred':
        return Pred.grammarpred(elem.text)
    elif elem.tag == 'realpred':
        return Pred.realpred(elem.get('lemma'), elem.get('pos'),
                             elem.get('sense'))
Beispiel #3
0
def _decode_pred(elem):
    # <!ELEMENT realpred EMPTY>
    # <!ATTLIST realpred
    #           lemma CDATA #REQUIRED
    #           pos (v|n|j|r|p|q|c|x|u|a|s) #REQUIRED
    #           sense CDATA #IMPLIED >
    # <!ELEMENT gpred (#PCDATA)>
    if elem.tag == 'gpred':
        return Pred.grammarpred(elem.text)
    elif elem.tag == 'realpred':
        return Pred.realpred(elem.get('lemma'),
                             elem.get('pos'),
                             elem.get('sense'))
Beispiel #4
0
def strip_category(cat):
    if cat.endswith("u_unknown"):
        lemma, pos_and_sense = cat.rsplit("/", 1)
        pos_part, sense_part = pos_and_sense.split("_", 1)
        lemma_part = "X"
    else:
        pred_obj = Pred.stringpred(cat)
        lemma_part = "X" if cat.startswith("_") else pred_obj.lemma
        pos_part = str(pred_obj.pos)
        sense_part = str(pred_obj.sense)
    return lemma_part + "_" + pos_part + "_" + sense_part
Beispiel #5
0
def _read_node(tokens):
    if not tokens or tokens[0][0] not in {'string', 'symbol', 'nodeid'}:
        return None
    # A node can be a pred, a nodeid, or both (in that order). This
    # means two 'if's, not 'if-else'.
    mtype, mtext = tokens.popleft()
    pred = nodeid = None
    if mtype in ('string', 'symbol'):
        if mtext == TOP or mtext == STAR:
            pred = mtext
        else:
            pred = Pred.stringpred(mtext)
        if tokens and tokens[0][0] == 'nodeid':
            mtype, mtext = tokens.popleft()
    if mtype == 'nodeid':
        nodeid = int(mtext[1:])  # get rid of the initial # character
    context = _read_context(tokens)
    links = _read_links(tokens)
    return XmrsPathNode(nodeid, pred, context=context, links=links)
Beispiel #6
0
def _read_node(tokens):
    if not tokens or tokens[0][0] not in {'string', 'symbol', 'nodeid'}:
        return None
    # A node can be a pred, a nodeid, or both (in that order). This
    # means two 'if's, not 'if-else'.
    mtype, mtext = tokens.popleft()
    pred = nodeid = None
    if mtype in ('string', 'symbol'):
        if mtext == TOP or mtext == STAR:
            pred = mtext
        else:
            pred = Pred.stringpred(mtext)
        if tokens and tokens[0][0] == 'nodeid':
            mtype, mtext = tokens.popleft()
    if mtype == 'nodeid':
        nodeid = int(mtext[1:])  # get rid of the initial # character
    context = _read_context(tokens)
    links = _read_links(tokens)
    return XmrsPathNode(
        nodeid,
        pred,
        context=context,
        links=links
    )
Beispiel #7
0
def decode_pred(elem):
    if elem.tag == 'gpred':
        return Pred.grammarpred(elem.text)
    elif elem.tag == 'realpred':
        return Pred.realpred(elem.get('lemma'), elem.get('pos'),
                             elem.get('sense'))