Python headwordの例、headwordextractor.headword Pythonの例

コード例 #1

0

ファイルを表示

    def test_1(self):
        filename = "ANC__110CYL067"
        fnparsed_reader = FNParsedReader()
        parsed_conll_file = options.framenet_parsed / (filename + ".conll")

        reader = framenetreader.FulltextReader(
            options.fulltext_corpus / (filename + ".xml"), False)

        frame = reader.frames[1]
        sentence_id, sentence_text, tree = list(
            fnparsed_reader.sentence_trees(parsed_conll_file))[
                frame.sentence_id]
        self.assertEqual(headwordextractor.headword(frame.args[0], tree), {
            'top_headword': ('PRP', 'you'),
            'content_headword': ('PRP', 'you')
        })

        frame = reader.frames[25]
        sentence_id, sentence_text, tree = list(
            fnparsed_reader.sentence_trees(parsed_conll_file))[
                frame.sentence_id]
        self.assertEqual(
            headwordextractor.headword(frame.args[0], tree), {
                'top_headword': ('NNS', 'people'),
                'content_headword': ('NNS', 'people')
            })

コード例 #2

0

ファイルを表示

ファイル: test_headwordextractor.py プロジェクト: aymara/knowledgesrl

    def test_1(self):
        filename = "ANC__110CYL067"
        fnparsed_reader = FNParsedReader()
        parsed_conll_file = options.framenet_parsed / (filename + ".conll")

        reader = framenetreader.FulltextReader(options.fulltext_corpus / (filename + ".xml"), False)

        frame = reader.frames[1]
        sentence_id, sentence_text, tree = list(fnparsed_reader.sentence_trees(parsed_conll_file))[frame.sentence_id]
        self.assertEqual(headwordextractor.headword(frame.args[0], tree), {'top_headword': ('PRP', 'you'), 'content_headword': ('PRP', 'you')})

        frame = reader.frames[25]
        sentence_id, sentence_text, tree = list(fnparsed_reader.sentence_trees(parsed_conll_file))[frame.sentence_id]
        self.assertEqual(headwordextractor.headword(frame.args[0], tree), {'top_headword': ('NNS', 'people'), 'content_headword': ('NNS', 'people')})

コード例 #3

0

ファイルを表示

    def add_syntactic_information(self, frame, sentence_tree):
        """
        Add information to a frame using the syntactic annotation: whether
        it is passive or not, and whether the predicate has been found in the
        tree.

        In some cases (five for the training set), our parser produces
        multiple roots, which mean the resulting tree could only cover one
        part of the sentence.

        In those cases, the function returns False and the frame is not handled
        by our labeler.

        :param frame: The frame
        :type frame: FrameInstance
        """

        # Search verb + passive status
        try:
            search = frame.predicate.text.split()[0].lower()
            predicate_node = [
                node for node in frame.tree if node.word == search
            ][0]
            frame.passive = FNAllReader.is_passive(predicate_node)
        except IndexError:
            raise PredicateNotFound("\nframenetparsedreader : predicate"
                                    " \"{}\" not found in sentence {}".format(
                                        search, frame.tree.flat()))

        # Read headwords
        for i, arg in enumerate(frame.args):
            if not arg.instanciated:
                continue
            frame.headwords[i] = headwordextractor.headword(arg, sentence_tree)

コード例 #4

0

ファイルを表示

ファイル: framenetallreader.py プロジェクト: aymara/knowledgesrl

    def add_syntactic_information(self, frame, sentence_tree):
        """
        Add information to a frame using the syntactic annotation: whether
        it is passive or not, and whether the predicate has been found in the
        tree.

        In some cases (five for the training set), our parser produces
        multiple roots, which mean the resulting tree could only cover one
        part of the sentence.

        In those cases, the function returns False and the frame is not handled
        by our labeler.

        :param frame: The frame
        :type frame: FrameInstance
        """

        # Search verb + passive status
        try:
            search = frame.predicate.text.split()[0].lower()
            predicate_node = [node for node in frame.tree
                              if node.word == search][0]
            frame.passive = FNAllReader.is_passive(predicate_node)
        except IndexError:
            raise PredicateNotFound("\nframenetparsedreader : predicate"
                                    " \"{}\" not found in sentence {}".format(
                                        search, frame.tree.flat()))

        # Read headwords
        for i, arg in enumerate(frame.args):
            if not arg.instanciated:
                continue
            frame.headwords[i] = headwordextractor.headword(arg, sentence_tree)

コード例 #5

0

ファイルを表示

ファイル: test_headwordextractor.py プロジェクト: aymara/knowledgesrl

    def test_classes(self):
        filename = "ANC__110CYL067"
        fnparsed_reader = FNParsedReader()
        parsed_conll_file = options.framenet_parsed / (filename + ".conll")

        reader = framenetreader.FulltextReader(options.fulltext_annotations[0], False)

        for frame in reader.frames:
            sentence_id, sentence_text, tree = list(fnparsed_reader.sentence_trees(parsed_conll_file))[frame.sentence_id]
            for arg in frame.args:
                headwordextractor.headword(arg, tree)

        self.assertEqual(headwordextractor.get_class("soda"), "physical_entity.n.01")
        #self.assertEqual(headwordextractor.get_class("i"), "pronoun")
        
        # get_class should return None for words out of WordNet
        self.assertEqual(headwordextractor.get_class("abcde"), None)

コード例 #6

0

ファイルを表示

    def test_classes(self):
        filename = "ANC__110CYL067"
        fnparsed_reader = FNParsedReader()
        parsed_conll_file = options.framenet_parsed / (filename + ".conll")

        reader = framenetreader.FulltextReader(options.fulltext_annotations[0],
                                               False)

        for frame in reader.frames:
            sentence_id, sentence_text, tree = list(
                fnparsed_reader.sentence_trees(parsed_conll_file))[
                    frame.sentence_id]
            for arg in frame.args:
                headwordextractor.headword(arg, tree)

        self.assertEqual(headwordextractor.get_class("soda"),
                         "physical_entity.n.01")
        #self.assertEqual(headwordextractor.get_class("i"), "pronoun")

        # get_class should return None for words out of WordNet
        self.assertEqual(headwordextractor.get_class("abcde"), None)

コード例 #7

0

ファイルを表示

ファイル: argguesser.py プロジェクト: SystemAgent/knowledgesrl

    def _sentence_predicates_iterator(self, sentence_id, sentence, tree,
                                      filename):
        """ Extracts frames from one sentence and iterate over them """
        logger.debug("_sentence_predicates_iterator %s" % sentence_id)
        for node in tree:
            # For every verb, looks for its infinitive form in VerbNet, and
            # builds a frame occurrence if it is found
            logger.debug("_sentence_predicates_iterator on %s" % node.lemma)

            if node.lemma not in self.frames_for_verb:
                #logger.debug("_sentence_predicates_iterator node.lemma {} not in frames_for_verb".format(node.lemma))
                continue

            if self._is_predicate(node):
                logger.debug(
                    "_sentence_predicates_iterator node.lemma {} is a predicate"
                    .format(node.lemma))
                predicate = Predicate(node.begin_word,
                                      node.begin_word + len(node.word) - 1,
                                      node.word, node.lemma, node.word_id)

                if options.Options.heuristic_rules:
                    args = [self._nodeToArg(x, node) for x in find_args(node)]
                else:
                    args = self._find_args(node)

                args = [
                    x for x in args if self._is_good_phrase_type(x.phrase_type)
                ]

                # Read headwords
                headwords = [None] * len(args)
                for i, arg in enumerate(args):
                    if not arg.instanciated:
                        continue
                    headwords[i] = headwordextractor.headword(arg, tree)

                logger.debug(
                    '_sentence_predicates_iterator yielding {} {}…'.format(
                        predicate, args))
                yield FrameInstance(
                    sentence=sentence,
                    predicate=predicate,
                    args=args,
                    words=[Word(x.begin, x.end, x.pos) for x in tree],
                    frame_name="",
                    sentence_id=sentence_id,
                    filename=filename,
                    tree=tree,
                    headwords=headwords)

コード例 #8

0

ファイルを表示

ファイル: bootstrap.py プロジェクト: SystemAgent/knowledgesrl

def bootstrap_algorithm(vn_frames, probability_model, verbnet_classes):
    # See Swier and Stevenson, Unsupervised Semantic Role Labelling, 2004, 5.4
    # for information about the parameters' values
    log_ratio = 8
    log_ratio_step = 0.5
    min_evidence = [1, 1, 10]
    # [1, 3, 10] -> [17, 65, 2076]
    # [3, 5, 10] -> [17, 65, 2076]

    total = [0, 0, 0]
    while log_ratio >= 1:
        # Update probability model with resolved slots (only one role)
        for frame_occurrence in vn_frames:
            for slot_position, role_set in enumerate(frame_occurrence.roles):
                if len(role_set) != 1:
                    continue

                headword = headwordextractor.headword(
                    frame_occurrence.args[slot_position],
                    frame_occurrence.tree)['content_headword'][1]
                probability_model.add_data_bootstrap(
                    next(iter(role_set)), frame_occurrence.predicate,
                    verbnet_classes[frame_occurrence.predicate],
                    frame_occurrence.slot_types[slot_position],
                    frame_occurrence.slot_preps[slot_position], headword,
                    headwordextractor.get_class(headword))

        # According to the article, there is no longer a min evidence threshold
        # when log_ratio reaches 1
        if log_ratio == 1:
            min_evidence = [1, 1, 1]

        for frame_occurrence in vn_frames:
            for slot_position in range(frame_occurrence.num_slots):
                role_set = frame_occurrence.roles[slot_position]
                if len(role_set) <= 1:
                    continue

                headword = headwordextractor.headword(
                    frame_occurrence.args[slot_position],
                    frame_occurrence.tree)['content_headword'][1]
                role = None
                for backoff_level in [0, 1, 2]:
                    role1, role2, ratio = probability_model.best_roles_bootstrap(
                        role_set,
                        frame_occurrence.predicate,
                        # Choosing the first class here is arbitrary
                        verbnet_classes[frame_occurrence.predicate],
                        frame_occurrence.slot_types[slot_position],
                        frame_occurrence.slot_preps[slot_position],
                        headword,
                        headwordextractor.get_class(headword),
                        backoff_level,
                        min_evidence[backoff_level])

                    if (role1 is not None
                            and ((role2 is not None and log(ratio) > log_ratio)
                                 or log_ratio <= 1)):

                        role = role1
                        total[backoff_level] += 1
                        break

                if role is not None:
                    frame_occurrence.restrict_slot_to_role(slot_position, role)
            frame_occurrence.select_likeliest_matches()

        log_ratio -= log_ratio_step

コード例 #9

0

ファイルを表示

ファイル: bootstrap.py プロジェクト: aymara/knowledgesrl

def bootstrap_algorithm(vn_frames, probability_model, verbnet_classes):
    # See Swier and Stevenson, Unsupervised Semantic Role Labelling, 2004, 5.4
    # for information about the parameters' values
    log_ratio = 8
    log_ratio_step = 0.5
    min_evidence = [1, 1, 10]
    # [1, 3, 10] -> [17, 65, 2076]
    # [3, 5, 10] -> [17, 65, 2076]

    total = [0, 0, 0]
    while log_ratio >= 1:
        # Update probability model with resolved slots (only one role)
        for frame_occurrence in vn_frames:
            for slot_position, role_set in enumerate(frame_occurrence.roles):
                if len(role_set) != 1:
                    continue

                headword = headwordextractor.headword(
                    frame_occurrence.args[slot_position],
                    frame_occurrence.tree)['content_headword'][1]
                probability_model.add_data_bootstrap(
                    next(iter(role_set)),
                    frame_occurrence.predicate,
                    verbnet_classes[frame_occurrence.predicate],
                    frame_occurrence.slot_types[slot_position],
                    frame_occurrence.slot_preps[slot_position],
                    headword,
                    headwordextractor.get_class(headword)
                )

        # According to the article, there is no longer a min evidence threshold
        # when log_ratio reaches 1
        if log_ratio == 1:
            min_evidence = [1, 1, 1]

        for frame_occurrence in vn_frames:
            for slot_position in range(frame_occurrence.num_slots):
                role_set = frame_occurrence.roles[slot_position]
                if len(role_set) <= 1:
                    continue

                headword = headwordextractor.headword(
                    frame_occurrence.args[slot_position],
                    frame_occurrence.tree)['content_headword'][1]
                role = None
                for backoff_level in [0, 1, 2]:
                    role1, role2, ratio = probability_model.best_roles_bootstrap(
                        role_set,
                        frame_occurrence.predicate,
                        # Choosing the first class here is arbitrary
                        verbnet_classes[frame_occurrence.predicate],
                        frame_occurrence.slot_types[slot_position],
                        frame_occurrence.slot_preps[slot_position],
                        headword,
                        headwordextractor.get_class(headword),
                        backoff_level,
                        min_evidence[backoff_level]
                    )

                    if (role1 is not None and
                        ((role2 is not None and log(ratio) > log_ratio) or
                        log_ratio <= 1)):

                        role = role1
                        total[backoff_level] += 1
                        break

                if role is not None:
                    frame_occurrence.restrict_slot_to_role(slot_position, role)
            frame_occurrence.select_likeliest_matches()

        log_ratio -= log_ratio_step