def process_unaryrule(parent): List = [] List.append("unary") List.append(parent.attrib["type"] + " " + parent.attrib["description"]) i = 0 while i < len(parent): if parent[i].tag == "cat": List.append(process_cat(parent[i][0])) break i += 1 while i < len(parent): if parent[i].tag == "sem": supertag = DRSnode() supertag.init_from_xml(parent[i][0]) List.append(json.dumps(supertag.serialization())) break i += 1 i = 0 cnt = 0 while i < len(parent): if parent[i].tag in ["binaryrule", "unaryrule", "lex"]: find = False for child in parent[i]: if child.tag == "cat": List.append(process_cat(child[0])) if child.tag == "sem": find = True supertag = DRSnode() supertag.init_from_xml(child[0]) List.append(json.dumps(supertag.serialization())) break assert find cnt += 1 i += 1 assert cnt == 1 print filename, der print List[0], List[1] print List[2] print List[3].encode("UTF-8") print List[4] print List[5].encode("UTF-8") print for child in parent: if child.tag == "binaryrule": process_binaryrule(child) elif child.tag == "unaryrule": process_unaryrule(child)
def process_drs(parent, out): assert parent.tag == "drs" supertag = DRSnode() supertag.init_from_xml(parent) add_pointer(supertag, 1000) out.write(json.dumps(supertag.serialization()) + "\n")
def process_binaryrule(parent): List = [] List.append("binary") List.append(parent.attrib["type"] + " " + parent.attrib["description"]) i = 0 while i < len(parent): if parent[i].tag == "cat": List.append(process_cat(parent[i][0])) break i += 1 while i < len(parent): if parent[i].tag == "sem": supertag = DRSnode() supertag.init_from_xml(parent[i][0]) add_pointer(supertag, 1000) List.append(json.dumps(supertag.serialization())) break i += 1 print filename, der print List[0], List[1] print List[2] print List[3].encode("UTF-8") print
def process_rule(parent): for child in parent: if child.tag == "binaryrule": process_rule(child) elif child.tag == "unaryrule": process_rule(child) elif child.tag == "lex": d = {} for cc in child: if cc.tag == "token": d["token"] = cc.text if cc.tag == "tag": d[cc.attrib["type"]] = cc.text if cc.tag == "cat": d["cat"] = process_cat(cc[0]) if cc.tag == "sem": find = True supertag = DRSnode() supertag.init_from_xml(cc[0]) d["sem"] = supertag.serialization() d["sem"] = general(d["lemma"], d["sem"]) print "\t".join([ d["token"].encode("utf-8"), d["lemma"].encode("utf-8"), d["pos"], d["cat"], json.dumps(d["sem"]).encode("utf-8") ])
def process_lex(parent): print filename, der print "lex lex" for child in parent: if child.tag == "cat": print process_cat(child[0]) if child.tag == "sem": find = True supertag = DRSnode() supertag.init_from_xml(child[0]) print json.dumps(supertag.serialization()) break assert find
def process_lex(parent): d = {} find = False for child in parent: if child.tag == "token": d["token"] = child.text if child.tag == "tag": d[child.attrib["type"]] = child.text if child.tag == "cat": d["cat"] = process_cat(child[0]) if child.tag == "sem": find = True supertag = DRSnode() supertag.init_from_xml(child[0]) d["sem"] = supertag.serialization() d["sem"] = general(d["lemma"], d["sem"]) print "\t".join([ d["token"].encode("utf-8"), d["lemma"].encode("utf-8"), d["pos"], d["cat"], json.dumps(d["sem"]).encode("utf-8") ]) assert find
print "#"," ".join(sys.argv) for line in open(sys.argv[1]): line = line.strip() if line == "": if L[0] == "illegal": L = [] continue total += 1 target = json.loads(L[3], object_hook=ascii_encode_dict) target_DRSnode = DRSnode() target_DRSnode.unserialization(target) simplify_temporal(target_DRSnode) #normal_variables(target_DRSnode) L[3] = json.dumps(target_DRSnode.serialization()) print "\n".join(L) print L = [] else: if line[0] == "#": continue L.append(line)
if __name__ == "__main__": L = [] eq = 0 total = 0 for line in open(sys.argv[1]): line = line.strip() if line == "": #print "\n".join(L) #print total += 1 target = json.loads(L[3], object_hook=ascii_encode_dict) target_DRSnode = DRSnode() target_DRSnode.unserialization(target) normal_variables(target_DRSnode) target = json.dumps(target_DRSnode.serialization()) source = json.loads(L[5], object_hook=ascii_encode_dict) source_DRSnode = DRSnode() source_DRSnode.unserialization(source) source_DRSnode = tc1(source_DRSnode, "x") change = json.dumps(source_DRSnode.serialization()) #l = "" #for i in range(len(target)): # if target[i] == change[i]: # l += target[i] # else: # break #print l if target == change: