Beispiel #1
0
    def determine_feature_value_for_instance(self, igt):
        function_list = []
        try:
            words = igt["w"]
            dparse = igt["w-ds"]
        except KeyError:
            return

        found_root = False
        found_subj = False
        found_obj = False
        root_word = ""

        if len(dparse) > 0 and dparse[0].text != "root":
            return  # early exit if it doesn't have a root.  unusable
        else:
            found_root = True
            pos = dparse[0]
            root_word = pos.attributes["dep"]
            word = words[pos.attributes["dep"]]
            segmentation = word.segmentation
            word_token = ref.resolve(igt, segmentation)
            function_list.append(WordPos(word_token, "V", segmentation))

        # loop looking for subject and object of that root
        for pos in dparse:
            if pos.text != "root":
                word = words[pos.attributes["dep"]]
                segmentation = word.segmentation
                word_token = ref.resolve(igt, segmentation)
                head_word = pos.attributes["head"]
                if pos.text == "nsubj" and not found_subj and head_word == root_word:
                    function_list.append(WordPos(word_token, "S", segmentation))
                    found_subj = True
                    if found_obj:
                        break  # break out of the loop.   we have everything we need
                elif pos.text == "dobj" and not found_obj and head_word == root_word:
                    function_list.append(WordPos(word_token, "O", segmentation))
                    found_obj = True
                    if found_subj:
                        break  # break out of the loop.   we have everything we need

        # if we've found something, let's add some probabilities
        if found_root and len(function_list) > 1:
            sorted_list = sorted(function_list)
            word_order = FeatureProbe.list_to_word_order(sorted_list)
            if found_subj and found_obj:
                self.instance_count += 1
                self.order_counts[word_order] += 1.0
                if self.debug:
                    self.debug_log("WORD-ORDER: " + word_order)
Beispiel #2
0
    def determine_feature_value_for_instance(self, igt):
        try:
            words = igt["w"]
            dparse = igt["w-ds"]
        except KeyError:
            return

        if len(dparse) < 0:
            return  # early exit.  wtf.

        # loop looking for all adjectives
        for pos in dparse:
            if pos.text == "amod":
                word = words[pos.attributes["dep"]]
                word_segmentation = word.segmentation
                word_token = ref.resolve(igt, word_segmentation)
                try:
                    if not self.is_head_noun(pos, igt, dparse):
                        continue
                    head_word = words[pos.attributes["head"]]
                    head_word_segmentation = head_word.segmentation
                    adj = WordPos(word_token, "adj", word_segmentation)
                    noun = WordPos(head_word, "noun", head_word_segmentation)
                    if adj > noun:
                        self.order_counts["Noun-Adjective"] += 1.0
                    else:  # assume noun>adj
                        self.order_counts["Adjective-Noun"] += 1.0
                    self.instance_count += 1
                except KeyError:
                    continue
Beispiel #3
0
    def test_resolve(self):
        self.assertRaises(XigtError, ref.resolve, self.xc1, 'p1')
        self.assertRaises(XigtError, ref.resolve, self.xc1[0]['t'], 'p1')
        self.assertEqual(ref.resolve(self.xc1[0], 'p1'),
                         'inu=ga san-biki hoe-ru')
        self.assertEqual(ref.resolve(self.xc1[0]['p'], 'p1'),
                         'inu=ga san-biki hoe-ru')
        self.assertEqual(ref.resolve(self.xc1[0], 't1'),
                         'Three dogs bark.')
        self.assertEqual(ref.resolve(self.xc1[0], 'p1[0:6]'),
                         'inu=ga')
        self.assertEqual(ref.resolve(self.xc1[0], 'p1[0:6,7:15]'),
                         'inu=ga san-biki')

        self.assertEqual(ref.resolve(self.xc3[0], 'w1'), 'inu=ga')
        self.assertEqual(ref.resolve(self.xc3[0], 'm1'), 'inu')
        self.assertEqual(ref.resolve(self.xc3[0], 'g1'), 'dog')
Beispiel #4
0
 def resolve_ref(self, refattr):
     algnexpr = self.attributes[refattr]
     if self.tier is None:
         raise XigtStructureError(
             'Cannot resolve item reference; item (id: {}) is not '
             'contained by a Tier.'.format(self.id))
     reftier_id = self.tier.attributes[refattr]
     if self.igt is None:
         raise XigtStructureError(
             'Cannot resolve item reference; item\'s tier (id: {}) '
             'is not contained by an Igt.'.format(self.tier.id))
     reftier = self.igt.get(reftier_id)
     if reftier is None:
         raise XigtStructureError(
             'Referred tier (id: {}) does not exist in the Igt.'.format(
                 reftier_id))
     value = ref.resolve(reftier, algnexpr)
     return value
Beispiel #5
0
 def resolve_ref(self, refattr):
     algnexpr = self.attributes[refattr]
     if self.tier is None:
         raise XigtStructureError(
             'Cannot resolve item reference; item (id: {}) is not '
             'contained by a Tier.'
             .format(self.id)
         )
     reftier_id = self.tier.attributes[refattr]
     if self.igt is None:
         raise XigtStructureError(
             'Cannot resolve item reference; item\'s tier (id: {}) '
             'is not contained by an Igt.'
             .format(self.tier.id)
         )
     reftier = self.igt.get(reftier_id)
     if reftier is None:
         raise XigtStructureError(
             'Referred tier (id: {}) does not exist in the Igt.'
             .format(reftier_id)
         )
     value = ref.resolve(reftier, algnexpr)
     return value
Beispiel #6
0
    def test_resolve(self):
        with pytest.raises(XigtError): ref.resolve(self.xc1, 'p1')
        with pytest.raises(XigtError): ref.resolve(self.xc1[0]['t'], 'p1')
        assert ref.resolve(self.xc1[0], 'p1') == 'inu=ga san-biki hoe-ru'
        assert ref.resolve(self.xc1[0]['p'], 'p1') == 'inu=ga san-biki hoe-ru'
        assert ref.resolve(self.xc1[0], 't1') == 'Three dogs bark.'
        assert ref.resolve(self.xc1[0], 'p1[0:6]') == 'inu=ga'
        assert ref.resolve(self.xc1[0], 'p1[0:6,7:15]') == 'inu=ga san-biki'

        assert ref.resolve(self.xc3[0], 'w1') == 'inu=ga'
        assert ref.resolve(self.xc3[0], 'm1') == 'inu'
        assert ref.resolve(self.xc3[0], 'g1') == 'dog'