コード例 #1
0
ファイル: lexical-acq.py プロジェクト: philip30/letrac
def calculate_e(node, logic_map,aligned_word,start=True):
    if type(node.label) != int:
        words = [i for i in logic_map[str_logical_rule(node.label,node.id)]]
        node.e = []
        node.eorigin = []
        for w in words:
            node.e.append(w)
            node.eorigin.append(w)
            aligned_word.add(w)
    for (i,child) in enumerate(node.childs):
        node.childs[i] = calculate_e(child, logic_map,aligned_word,False)
        for word in node.childs[i].e:
            node.e.append(word)
    node.e = sorted(node.e)
    node.eorigin = sorted(node.eorigin)
    return node
コード例 #2
0
ファイル: align-gen.py プロジェクト: philip30/letrac
def main():
    parser = argparse.ArgumentParser(description="Run Geoparse Alignment Input Generator")
    parser.add_argument('--input',type=str,required=True,help="Input file of geoparse")
    parser.add_argument('--osent',type=str,required=True,help="Directory where sentence is outputed")
    parser.add_argument('--ologic',type=str,required=True,help="Directory where logical-form is outputed")
    parser.add_argument('--output',type=str,help="Directory where verbosed output is generated")
    parser.add_argument('--manual',type=str)
    args = parser.parse_args()

    if args.manual:
        with open(args.manual) as fp:
            for line in fp:
                a, b = line.strip().split()
                manual_align[a] = b

    linecount = 0 
    inp = open(args.input,"r")
    out_sent = open(args.osent, "w")
    out_sent_g = open(args.osent + ".gin", "w")
    out_log = open(args.ologic,"w")
    out_log_g = open(args.ologic + ".gin", "w")
    out_log_p = open(args.ologic + ".parse", "w")
    out_w = open(args.osent + ".word", "w")
    out = None
    if args.output:
        out = open(args.output, "w")

    #### For every well formed query in file extract the rule!
    for line in inp:
        line = line.strip()

        (sentence_node, query_node) = extract(line,0,"")[0][0].childs

        #### Sentence and node
        sentence = [node.label for node in sentence_node.childs]

        if sentence[-1] == "'.'" or sentence[-1] == "?":
            sentence = sentence[:-1]

        # print_node(sentence_node)
        # print_node(query_node)
        for word in sentence: words.add(word)

        #### logical rule extraction
        var_map = defaultdict(lambda: len(var_map)+1)
        query_node = construct_query_node(query_node,[])
        query_node = change_var_to_x(query_node,var_map)
        rules = transform_into_rule([],query_node,start=True)

        #### Printing
        out_sent.write(" ".join(sentence) + "\n")
        out_sent_g.write(" ".join(sentence) + "\n")

        (logical_rule, logical_rule_giza) = ([str_logical_rule(rule[1],rule[4]) for rule in rules], [str_giza_in_rule(rule) for rule in rules])
        if (len(logical_rule) != len(logical_rule_giza)):
            print >> sys.stderr, "Rule size doesn't match", logical_rule_giza, logical_rule

        out_log.write(" ".join(logical_rule) + "\n")
        out_log_g.write(" ".join(logical_rule_giza)+ "\n")
        out_log_p.write(query_representation(query_node,{value:key for key, value in var_map.items()},input_generator=True) +"\n")

        if args.output:
            out.write(" ".join(sentence) + "\n")
            for rule in rules:
                out.write(str_logical_rule(rule[1],rule[4]) + " ||| " + str_giza_in_rule(rule)+ "\n")
            out.write("------------------------------------\n") 
        linecount += 1

    inp.close()
    out_sent.close()
    out_log.close()

    if args.output:
        out.close()

    #### ADDITIONAL information for alignment
    #### Every word is aligned to itself
    for i in range(0,10):
        for word in sorted(words):
            out_sent_g.write(word + "\n")
            out_log_g.write(word + "\n")
            out_w.write(word +"\n")
        
        for word1, word2 in manual_align.items():
            out_sent_g.write(word1 + "\n")
            out_log_g.write(word2 + "\n")
            out_w.write(word1 + "\n")

    #### Handle something like 'south dakota' so add alignment south -> south_dakota and dakota -> south_dakota
    for literals in many_literals:
        literals = literals.split(' ')
        for word in literals:
            out_sent_g.write(word + "\n")
            out_log_g.write('_'.join(literals) + "\n")

    out_sent_g.close()
    out_log_g.close()

    print >> sys.stderr, "Successfully extracting :",  linecount, "pair(s)."