Beispiel #1
0
def process_drs(parent, out):
    assert parent.tag == "drs"

    supertag = DRSnode()
    supertag.init_from_xml(parent)
    add_pointer(supertag, 1000)
    out.write(json.dumps(supertag.serialization()) + "\n")
Beispiel #2
0
def process_binaryrule(parent):
    List = []
    List.append("binary")
    List.append(parent.attrib["type"] + " " + parent.attrib["description"])
    i = 0
    while i < len(parent):
        if parent[i].tag == "cat":
            List.append(process_cat(parent[i][0]))
            break
        i += 1

    while i < len(parent):
        if parent[i].tag == "sem":
            supertag = DRSnode()
            supertag.init_from_xml(parent[i][0])
            add_pointer(supertag, 1000)
            List.append(json.dumps(supertag.serialization()))
            break
        i += 1

    print filename, der
    print List[0], List[1]
    print List[2]
    print List[3].encode("UTF-8")
    print
Beispiel #3
0
def process_rule(parent):
    for child in parent:
        if child.tag == "binaryrule":
            process_rule(child)
        elif child.tag == "unaryrule":
            process_rule(child)
        elif child.tag == "lex":
            d = {}
            for cc in child:
                if cc.tag == "token":
                    d["token"] = cc.text
                if cc.tag == "tag":
                    d[cc.attrib["type"]] = cc.text
                if cc.tag == "cat":
                    d["cat"] = process_cat(cc[0])
                if cc.tag == "sem":
                    find = True
                    supertag = DRSnode()
                    supertag.init_from_xml(cc[0])
                    d["sem"] = supertag.serialization()
            d["sem"] = general(d["lemma"], d["sem"])
            print "\t".join([
                d["token"].encode("utf-8"), d["lemma"].encode("utf-8"),
                d["pos"], d["cat"],
                json.dumps(d["sem"]).encode("utf-8")
            ])
Beispiel #4
0
def process_lex(parent):
    print filename, der
    print "lex lex"
    for child in parent:
        if child.tag == "cat":
            print process_cat(child[0])
        if child.tag == "sem":
            find = True
            supertag = DRSnode()
            supertag.init_from_xml(child[0])
            print json.dumps(supertag.serialization())
            break
    assert find
Beispiel #5
0
def process_unaryrule(parent):
    List = []
    List.append("unary")
    List.append(parent.attrib["type"] + " " + parent.attrib["description"])
    i = 0
    while i < len(parent):
        if parent[i].tag == "cat":
            List.append(process_cat(parent[i][0]))
            break
        i += 1

    while i < len(parent):
        if parent[i].tag == "sem":
            supertag = DRSnode()
            supertag.init_from_xml(parent[i][0])
            List.append(json.dumps(supertag.serialization()))
            break
        i += 1

    i = 0
    cnt = 0
    while i < len(parent):
        if parent[i].tag in ["binaryrule", "unaryrule", "lex"]:
            find = False
            for child in parent[i]:
                if child.tag == "cat":
                    List.append(process_cat(child[0]))
                if child.tag == "sem":
                    find = True
                    supertag = DRSnode()
                    supertag.init_from_xml(child[0])
                    List.append(json.dumps(supertag.serialization()))
                    break
            assert find
            cnt += 1
        i += 1
    assert cnt == 1
    print filename, der
    print List[0], List[1]
    print List[2]
    print List[3].encode("UTF-8")
    print List[4]
    print List[5].encode("UTF-8")
    print
    for child in parent:
        if child.tag == "binaryrule":
            process_binaryrule(child)
        elif child.tag == "unaryrule":
            process_unaryrule(child)
Beispiel #6
0
def process_lex(parent):
    d = {}
    find = False
    for child in parent:
        if child.tag == "token":
            d["token"] = child.text
        if child.tag == "tag":
            d[child.attrib["type"]] = child.text
        if child.tag == "cat":
            d["cat"] = process_cat(child[0])
        if child.tag == "sem":
            find = True
            supertag = DRSnode()
            supertag.init_from_xml(child[0])
            d["sem"] = supertag.serialization()
    d["sem"] = general(d["lemma"], d["sem"])
    print "\t".join([
        d["token"].encode("utf-8"), d["lemma"].encode("utf-8"), d["pos"],
        d["cat"],
        json.dumps(d["sem"]).encode("utf-8")
    ])
    assert find