Exemple #1
0
def clean_instance(instance):
    '''
    Gets an instance ready for feature extraction.
    '''
    try:
        # make instance's tree a real tree
        tree_text = APBTree.fix_parse_tree_text(instance.tree, instance.pos)
        instance = instance._replace(tree=APBTree.APBTree(tree_text))

        # get arguments into useable form
        arguments = []
        Argument = namedtuple('Argument', 'label position text description')
        for (label, arg) in instance.arguments.items():
            text = arg[0] if '-NONE-' not in arg[0] else arg[0].split()[1]
            argument = Argument(
                label=label,
                text=text,
                description=arg[1],
                position=instance.tree.get_arg_node_position(text))
            arguments.append(argument)
        instance = instance._replace(arguments=arguments)

        # fix up predicate
        Predicate = namedtuple('Predicate', 'text position')
        position = instance.tree.get_predicate_node_position(
            instance.predicate, instance.pos)
        predicate = Predicate(text=instance.predicate, position=position)
        instance = instance._replace(predicate=predicate)

    except:
        return None

    return instance
Exemple #2
0
def clean_instance(instance):
    '''
    Gets an instance ready for feature extraction.
    '''
    try:
        # make instance's tree a real tree
        tree_text = APBTree.fix_parse_tree_text(instance.tree, instance.pos)
        instance  = instance._replace(tree=APBTree.APBTree(tree_text))

        # get arguments into useable form
        arguments = []
        Argument = namedtuple('Argument', 'label position text description')
        for (label, arg) in instance.arguments.items():
            text = arg[0] if '-NONE-' not in arg[0] else arg[0].split()[1]
            argument = Argument(label=label,
                                text=text,
                                description=arg[1],
                                position=instance.tree.get_arg_node_position(text))
            arguments.append(argument)
        instance = instance._replace(arguments=arguments)


        # fix up predicate
        Predicate = namedtuple('Predicate', 'text position')
        position  = instance.tree.get_predicate_node_position(instance.predicate, instance.pos)
        predicate = Predicate(text=instance.predicate, position=position)
        instance  = instance._replace(predicate=predicate)

    except:
        return None

    return instance
Exemple #3
0
    def __init__(self, tree, sense_gloss, arguments, roleset, pos):

        self.filename = 'file.name'
        '''Name of the Arabic TreeBank file containing the parse tree'''

        self.sentnum = sentnum
        """The sentence number of this sentence within ``fileid``.
        Indexing starts from zero."""

        self.wordnum = wordnum
        """The word number of this instance's predicate within its
        containing sentence.  Word numbers are indexed starting from
        zero, and include traces and other empty parse elements."""

        self.tagger = 'gold'
        """An identifier for the tagger who tagged this instance; or
        ``'gold'`` if this is an adjuticated instance."""

        self.roleset = roleset
        """The name of the roleset used by this instance's predicate.
        Use ``propbank.roleset() <PropbankCorpusReader.roleset>`` to
        look up information about the roleset."""

        self.inflection = None  #inflection
        """A ``PropbankInflection`` object describing the inflection of
        this instance's predicate."""

        self.predicate = predicate
        """A ``PropbankTreePointer`` indicating the position of this
        instance's predicate within its containing sentence."""

        self.arguments = tuple(arguments)
        """A list of tuples (argloc, argid), specifying the location
        and identifier for each of the predicate's argument in the
        containing sentence.  Argument identifiers are strings such as
        ``'ARG0'`` or ``'ARGM-TMP'``.  This list does *not* contain
        the predicate."""

        self.sense_gloss = sense_gloss

        # make instance's tree a real tree
        try:
            tree_text = APBTree.fix_parse_tree_text(tree, pos)
            self.tree = APBTree.APBTree(tree_text)
        except:
            self.tree = None
Exemple #4
0
    def __init__(self, tree, sense_gloss, arguments, roleset, pos):

        self.filename = 'file.name'
        '''Name of the Arabic TreeBank file containing the parse tree'''

        self.sentnum = sentnum
        """The sentence number of this sentence within ``fileid``.
        Indexing starts from zero."""

        self.wordnum = wordnum
        """The word number of this instance's predicate within its
        containing sentence.  Word numbers are indexed starting from
        zero, and include traces and other empty parse elements."""

        self.tagger = 'gold'
        """An identifier for the tagger who tagged this instance; or
        ``'gold'`` if this is an adjuticated instance."""

        self.roleset = roleset
        """The name of the roleset used by this instance's predicate.
        Use ``propbank.roleset() <PropbankCorpusReader.roleset>`` to
        look up information about the roleset."""

        self.inflection = None #inflection
        """A ``PropbankInflection`` object describing the inflection of
        this instance's predicate."""

        self.predicate = predicate
        """A ``PropbankTreePointer`` indicating the position of this
        instance's predicate within its containing sentence."""

        self.arguments = tuple(arguments)
        """A list of tuples (argloc, argid), specifying the location
        and identifier for each of the predicate's argument in the
        containing sentence.  Argument identifiers are strings such as
        ``'ARG0'`` or ``'ARGM-TMP'``.  This list does *not* contain
        the predicate."""

        self.sense_gloss = sense_gloss

        # make instance's tree a real tree
        try:
            tree_text = APBTree.fix_parse_tree_text(tree, pos)
            self.tree = APBTree.APBTree(tree_text)
        except:
            self.tree = None