def clean_instance(instance): ''' Gets an instance ready for feature extraction. ''' try: # make instance's tree a real tree tree_text = APBTree.fix_parse_tree_text(instance.tree, instance.pos) instance = instance._replace(tree=APBTree.APBTree(tree_text)) # get arguments into useable form arguments = [] Argument = namedtuple('Argument', 'label position text description') for (label, arg) in instance.arguments.items(): text = arg[0] if '-NONE-' not in arg[0] else arg[0].split()[1] argument = Argument( label=label, text=text, description=arg[1], position=instance.tree.get_arg_node_position(text)) arguments.append(argument) instance = instance._replace(arguments=arguments) # fix up predicate Predicate = namedtuple('Predicate', 'text position') position = instance.tree.get_predicate_node_position( instance.predicate, instance.pos) predicate = Predicate(text=instance.predicate, position=position) instance = instance._replace(predicate=predicate) except: return None return instance
def clean_instance(instance): ''' Gets an instance ready for feature extraction. ''' try: # make instance's tree a real tree tree_text = APBTree.fix_parse_tree_text(instance.tree, instance.pos) instance = instance._replace(tree=APBTree.APBTree(tree_text)) # get arguments into useable form arguments = [] Argument = namedtuple('Argument', 'label position text description') for (label, arg) in instance.arguments.items(): text = arg[0] if '-NONE-' not in arg[0] else arg[0].split()[1] argument = Argument(label=label, text=text, description=arg[1], position=instance.tree.get_arg_node_position(text)) arguments.append(argument) instance = instance._replace(arguments=arguments) # fix up predicate Predicate = namedtuple('Predicate', 'text position') position = instance.tree.get_predicate_node_position(instance.predicate, instance.pos) predicate = Predicate(text=instance.predicate, position=position) instance = instance._replace(predicate=predicate) except: return None return instance
def __init__(self, tree, sense_gloss, arguments, roleset, pos): self.filename = 'file.name' '''Name of the Arabic TreeBank file containing the parse tree''' self.sentnum = sentnum """The sentence number of this sentence within ``fileid``. Indexing starts from zero.""" self.wordnum = wordnum """The word number of this instance's predicate within its containing sentence. Word numbers are indexed starting from zero, and include traces and other empty parse elements.""" self.tagger = 'gold' """An identifier for the tagger who tagged this instance; or ``'gold'`` if this is an adjuticated instance.""" self.roleset = roleset """The name of the roleset used by this instance's predicate. Use ``propbank.roleset() <PropbankCorpusReader.roleset>`` to look up information about the roleset.""" self.inflection = None #inflection """A ``PropbankInflection`` object describing the inflection of this instance's predicate.""" self.predicate = predicate """A ``PropbankTreePointer`` indicating the position of this instance's predicate within its containing sentence.""" self.arguments = tuple(arguments) """A list of tuples (argloc, argid), specifying the location and identifier for each of the predicate's argument in the containing sentence. Argument identifiers are strings such as ``'ARG0'`` or ``'ARGM-TMP'``. This list does *not* contain the predicate.""" self.sense_gloss = sense_gloss # make instance's tree a real tree try: tree_text = APBTree.fix_parse_tree_text(tree, pos) self.tree = APBTree.APBTree(tree_text) except: self.tree = None