Beispiel #1
0
    def add_child(self, child):
        """Add a child Papfunc_SemanticNode to a Papfunc_SemanticNode.
        This method overrides the add_child method from SemanticNode to
        enforce the constraint that the child parameter must be a Papfunc_SemanticNode

        Args:
        child: the child Papfunc_SemanticNode
        """
        assert_type(child, Papfunc_SemanticNode)
        SemanticNode.add_child(self, child)
xml_string = '''
<ccg>
<lf start="0" span="1" word="dog" lemma="dog" pos="NN" chunk="I-NP" entity="O" cat="N" />
</ccg>
'''
syntactic_tree = SyntacticTree.parse_tree_from_xml_string(xml_string)
vecfilepref = test_vector_file_prefix
matfilepref = test_matrix_file_prefix
vecspace = Space.build(data = vecfilepref + ".dm",
                       rows = vecfilepref + ".rows",
                       format = "dm")
matspace = Space.build(data = matfilepref + ".dm",
                       rows = matfilepref + ".rows",
                       format = "dm")

semnode = SemanticNode.create_semantic_node(syntactic_tree.root,None)
papnode = Papfunc_SemanticNode.create_papfunc_node(semnode,vecspace,matspace)

print "*****"
print "Syntactic tree:", semnode
print "Symbolic representation:", papnode._matrep
print "Numeric representation:"
for x in papnode._numrep: print x

# SECOND TEST
xml_string = '''
<ccg>
 <rule type="fa" cat="NP[nb]">
  <lf start="0" span="1" word="A" lemma="a" pos="DT" chunk="I-NP" entity="O" cat="NP[nb]/N" />
  <rule type="fa" cat="N">
   <lf start="1" span="1" word="dog" lemma="dog" pos="NN" chunk="I-NP" entity="O" cat="N/N" />
def _syntactic_node_2_semantic_node(syntactic_node, vector_space,
                                    composition_model, normed=True):
    """Create a SemanticNode from a SyntacticNode recursively
    
    Args:
        syntactic_node: the input syntatic_node
        vector_space: a vector space where the lexical vectors can be retrieved
        composition_model: the compositional model, with which the vector
            representations of phrases are computed (the compositional model
            should be either WeightedAdditive, Multiplicative or FullAdditive)
        normed: a boolean value indicating whether the lexical vectors should be
            normalized or not
        
    Returns:
        the semantic node
    """
    
    
    
    # if the node is a terminal node:
    #   - retrieve the lexical vector
    # if the node is non-terminal
    #   - recursively apply this function to the child nodes to get the vector
    #     representations of the child nodes
    #   - use the composition model, and the vectors of the children to compute
    #     the vector of the current node
    # 
    if syntactic_node.is_terminal():
        new_node = SemanticNode.create_semantic_node(syntactic_node, None)
        try:
            row_vector = vector_space.get_row(syntactic_node._word)
            if normed:
                new_node._vector = RowNormalization().apply(row_vector)
                #print "shouldn't be here"
            else:
                new_node._vector = row_vector
            # print new_node._vector
        except KeyError:
            #print "missing word:", syntactic_node._word
            matrix_type = type(vector_space.cooccurrence_matrix)
            vector_shape = (1,vector_space.cooccurrence_matrix.shape[1])
            if isinstance(composition_model, Multiplicative):
                new_node._vector =  matrix_type(np.ones(vector_shape,
                                                         dtype=np.float))
            else:
                new_node._vector =  matrix_type(np.zeros(vector_shape,
                                                         dtype=np.float))
    else:
        new_node = SemanticNode(syntactic_node.label, None)
        for child in syntactic_node._children:
            new_child = _syntactic_node_2_semantic_node(child, vector_space,
                                                        composition_model, normed)
            new_node.add_child(new_child)
        
        new_vector = new_node.get_child(0).vector
        # print new_node
        for i in range(1,len(new_node._children)):
            new_vector = composition_model._compose(new_vector,
                                                    new_node.get_child(i).vector)
        
        new_node.vector = new_vector
    #print syntactic_node.get_surface_string()
    #print new_node.vector[0,0]
    return new_node