Beispiel #1
0
def get_models():
    query = "annotated"
    print "Obtaining questions and semantic annotations..."
    questions = geoserver_interface.download_questions([query])
    semantics = geoserver_interface.download_semantics([query])

    print "Obtaining syntax trees..."
    if False:
        syntax_trees = {pk: {sentence_index: stanford_parser.get_best_syntax_tree(replace(words))
                             for sentence_index, words in question.words.iteritems()}
                        for pk, question in questions.iteritems()}
        pickle.dump(syntax_trees, open("syntax_trees.p", 'wb'))
    else:
        syntax_trees = pickle.load(open("syntax_trees.p", 'rb'))

    print "Obtaining nodes..."
    nodes = {pk: {sentence_index: [annotation_to_node(annotation) for _, annotation in annotations.iteritems()]
                  for sentence_index, annotations in d.iteritems()}
             for pk, d in semantics.iteritems()}

    print "Extracting tag rules..."
    tag_rules = []
    for pk, d in nodes.iteritems():
        for sentence_index, dd in d.iteritems():
            syntax_tree = syntax_trees[pk][sentence_index]
            for node in dd:
                local_tag_rules = node_to_tag_rules(syntax_tree.words, syntax_tree, node)
                tag_rules.extend(local_tag_rules)

    print "Learning tag model..."
    tag_model = CountBasedTagModel(tag_rules)

    print "Extracting semantic rules..."
    unary_rules = []
    binary_rules = []
    for pk, d in nodes.iteritems():
        for sentence_index, dd in d.iteritems():
            syntax_tree = syntax_trees[pk][sentence_index]
            for node in dd:
                local_unary_rules, local_binary_rules = node_to_semantic_rules(syntax_tree.words, syntax_tree, tag_model, node, lift_index=True)
                unary_rules.extend(local_unary_rules)
                binary_rules.extend(local_binary_rules)

    # localities = {function_signatures['add']: 1}
    impliable_signatures = rules_to_impliable_signatures(unary_rules + binary_rules)
    uff1 = generate_unary_feature_function(unary_rules)
    bff1 = generate_binary_feature_function(binary_rules)
    print "Learning unary model..."
    unary_model = UnarySemanticModel(uff1, impliable_signatures=impliable_signatures)
    unary_model.fit(unary_rules, 1)
    print "Learning binary model..."
    binary_model = BinarySemanticModel(bff1, impliable_signatures=impliable_signatures)
    binary_model.fit(binary_rules, 1)

    print "unary weights:", unary_model.weights
    print "binary_weights:", binary_model.weights
    print "impliable:", unary_model.impliable_signatures, binary_model.impliable_signatures

    return tag_model, unary_model, binary_model
Beispiel #2
0
def test_models(tag_model, unary_model, binary_model):
    print("Testing the model...")
    query = "annotated"
    questions = geoserver_interface.download_questions([query])
    semantics = geoserver_interface.download_semantics([query])
    all_gt_nodes = {}
    all_my_node_dict = {}
    reweighed_my_dict = {}

    sizes = []

    for pk, question in questions.iteritems():
        all_gt_nodes[pk] = {}
        all_my_node_dict[pk] = {}
        reweighed_my_dict[pk] = {}
        for sentence_index, words in question.words.iteritems():
            all_gt_nodes[pk][sentence_index] = set(
                annotation_to_node(annotation)
                for annotation in semantics[pk][sentence_index].values())
            all_my_node_dict[pk][sentence_index] = {}
            reweighed_my_dict[pk][sentence_index] = {}
            words = replace(words)
            syntax_tree = stanford_parser.get_best_syntax_tree(words)
            decoder = TopDownLiftedDecoder(unary_model, binary_model)
            dist = decoder.get_formula_distribution(words, syntax_tree,
                                                    tag_model)
            items = sorted(dist.items(), key=lambda x: x[1])
            sizes.append(len(items))
            print "---------------"
            print pk, sentence_index
            print " ".join(words.values())
            for node, logp in items:
                # print(node_to_semantic_rules(words, syntax_tree, tags, node, True))
                print node, np.exp(logp)
                all_my_node_dict[pk][sentence_index][node] = np.exp(logp)
            reweighed_my_dict[pk][sentence_index] = reweigh(
                words, syntax_tree, tag_model,
                all_my_node_dict[pk][sentence_index])

    print "--------------"
    print "sizes:", max(sizes), np.median(sizes), min(sizes)

    #prs =  [get_pr(all_gt_nodes, all_my_node_dict, conf) for conf in np.linspace(-0.1,1.1,121)]
    prs = [
        get_pr_by_rank(all_gt_nodes, all_my_node_dict, rank)
        for rank in range(1, 400)
    ]
    print prs
    #re_prs =  [get_pr(all_gt_nodes, reweighed_my_dict, conf) for conf in np.linspace(-0.1,1.1,121)]
    re_prs = [
        get_pr_by_rank(all_gt_nodes, reweighed_my_dict, rank)
        for rank in range(1, 400)
    ]
    draw(prs)
    draw(re_prs)
    plt.show()
    pr = get_pr(all_gt_nodes, all_my_node_dict, 0)
Beispiel #3
0
def test_models(tag_model, unary_model, binary_model):
    print("Testing the model...")
    query = "annotated"
    questions = geoserver_interface.download_questions([query])
    semantics = geoserver_interface.download_semantics([query])
    all_gt_nodes = {}
    all_my_node_dict = {}
    reweighed_my_dict = {}

    sizes = []

    for pk, question in questions.iteritems():
        all_gt_nodes[pk] = {}
        all_my_node_dict[pk] = {}
        reweighed_my_dict[pk] = {}
        for sentence_index, words in question.words.iteritems():
            all_gt_nodes[pk][sentence_index] = set(annotation_to_node(annotation) for annotation in semantics[pk][sentence_index].values())
            all_my_node_dict[pk][sentence_index] = {}
            reweighed_my_dict[pk][sentence_index] = {}
            words = replace(words)
            syntax_tree = stanford_parser.get_best_syntax_tree(words)
            decoder = TopDownLiftedDecoder(unary_model, binary_model)
            dist = decoder.get_formula_distribution(words, syntax_tree, tag_model)
            items = sorted(dist.items(), key=lambda x: x[1])
            sizes.append(len(items))
            print "---------------"
            print pk, sentence_index
            print " ".join(words.values())
            for node, logp in items:
                # print(node_to_semantic_rules(words, syntax_tree, tags, node, True))
                print node, np.exp(logp)
                all_my_node_dict[pk][sentence_index][node] = np.exp(logp)
            reweighed_my_dict[pk][sentence_index] = reweigh(words, syntax_tree, tag_model, all_my_node_dict[pk][sentence_index])

    print "--------------"
    print "sizes:", max(sizes), np.median(sizes), min(sizes)


    #prs =  [get_pr(all_gt_nodes, all_my_node_dict, conf) for conf in np.linspace(-0.1,1.1,121)]
    prs =  [get_pr_by_rank(all_gt_nodes, all_my_node_dict, rank) for rank in range(1,400)]
    print prs
    #re_prs =  [get_pr(all_gt_nodes, reweighed_my_dict, conf) for conf in np.linspace(-0.1,1.1,121)]
    re_prs =  [get_pr_by_rank(all_gt_nodes, reweighed_my_dict, rank) for rank in range(1,400)]
    draw(prs)
    draw(re_prs)
    plt.show()
    pr = get_pr(all_gt_nodes, all_my_node_dict, 0)
Beispiel #4
0
def test_trees():
    root_path = "/Users/minjoon/Desktop/questions2"
    if not os.path.exists(root_path):
        os.mkdir(root_path)
    k = 300
    numbers = [26, 28]
    questions = geoserver_interface.download_questions(numbers)
    for pk, question in questions.iteritems():
        folder_name = get_number_string(pk, 4)
        question_path = os.path.join(root_path, folder_name)
        if not os.path.exists(question_path):
            os.mkdir(question_path)
        lexical_parses = get_lexical_parses(question.text)
        for idx, lexical_parse in enumerate(lexical_parses):
            sentence_folder_name = get_number_string(idx, 2)
            sentence_path = os.path.join(question_path, sentence_folder_name)
            if not os.path.exists(sentence_path):
                os.mkdir(sentence_path)

            syntax = create_syntax(lexical_parse.tokens, k)
            syntax.save_graphs(sentence_path)
            print(pk, idx)
Beispiel #5
0
def test_trees():
    root_path = "/Users/minjoon/Desktop/questions2"
    if not os.path.exists(root_path):
        os.mkdir(root_path)
    k = 300
    numbers = [26, 28]
    questions = geoserver_interface.download_questions(numbers)
    for pk, question in questions.iteritems():
        folder_name = get_number_string(pk, 4)
        question_path = os.path.join(root_path, folder_name)
        if not os.path.exists(question_path):
            os.mkdir(question_path)
        lexical_parses = get_lexical_parses(question.text)
        for idx, lexical_parse in enumerate(lexical_parses):
            sentence_folder_name = get_number_string(idx, 2)
            sentence_path = os.path.join(question_path, sentence_folder_name)
            if not os.path.exists(sentence_path):
                os.mkdir(sentence_path)

            syntax = create_syntax(lexical_parse.tokens, k)
            syntax.save_graphs(sentence_path)
            print(pk, idx)
Beispiel #6
0
def test_geoserver_interface():
    data = geoserver_interface.download_questions(["annotated"])
    ann = geoserver_interface.download_semantics()
    print(ann)
    print(data)
Beispiel #7
0
def test_zip_diagrams():
    questions = geoserver_interface.download_questions(['development'])
    zip_diagrams(questions, '/Users/minjoon/Desktop/development.zip')
Beispiel #8
0
def get_models():
    query = "annotated"
    print "Obtaining questions and semantic annotations..."
    questions = geoserver_interface.download_questions([query])
    semantics = geoserver_interface.download_semantics([query])

    print "Obtaining syntax trees..."
    if False:
        syntax_trees = {
            pk: {
                sentence_index:
                stanford_parser.get_best_syntax_tree(replace(words))
                for sentence_index, words in question.words.iteritems()
            }
            for pk, question in questions.iteritems()
        }
        pickle.dump(syntax_trees, open("syntax_trees.p", 'wb'))
    else:
        syntax_trees = pickle.load(open("syntax_trees.p", 'rb'))

    print "Obtaining nodes..."
    nodes = {
        pk: {
            sentence_index: [
                annotation_to_node(annotation)
                for _, annotation in annotations.iteritems()
            ]
            for sentence_index, annotations in d.iteritems()
        }
        for pk, d in semantics.iteritems()
    }

    print "Extracting tag rules..."
    tag_rules = []
    for pk, d in nodes.iteritems():
        for sentence_index, dd in d.iteritems():
            syntax_tree = syntax_trees[pk][sentence_index]
            for node in dd:
                local_tag_rules = node_to_tag_rules(syntax_tree.words,
                                                    syntax_tree, node)
                tag_rules.extend(local_tag_rules)

    print "Learning tag model..."
    tag_model = CountBasedTagModel(tag_rules)

    print "Extracting semantic rules..."
    unary_rules = []
    binary_rules = []
    for pk, d in nodes.iteritems():
        for sentence_index, dd in d.iteritems():
            syntax_tree = syntax_trees[pk][sentence_index]
            for node in dd:
                local_unary_rules, local_binary_rules = node_to_semantic_rules(
                    syntax_tree.words,
                    syntax_tree,
                    tag_model,
                    node,
                    lift_index=True)
                unary_rules.extend(local_unary_rules)
                binary_rules.extend(local_binary_rules)

    # localities = {function_signatures['add']: 1}
    impliable_signatures = rules_to_impliable_signatures(unary_rules +
                                                         binary_rules)
    uff1 = generate_unary_feature_function(unary_rules)
    bff1 = generate_binary_feature_function(binary_rules)
    print "Learning unary model..."
    unary_model = UnarySemanticModel(uff1,
                                     impliable_signatures=impliable_signatures)
    unary_model.fit(unary_rules, 1)
    print "Learning binary model..."
    binary_model = BinarySemanticModel(
        bff1, impliable_signatures=impliable_signatures)
    binary_model.fit(binary_rules, 1)

    print "unary weights:", unary_model.weights
    print "binary_weights:", binary_model.weights
    print "impliable:", unary_model.impliable_signatures, binary_model.impliable_signatures

    return tag_model, unary_model, binary_model