예제 #1
0
 def test_resolve_conflicts_ALL(self):
     event_tagger = EventTagger([], conflict_resolving_strategy='ALL')
     # complex
     events = [{START: 1, END: 8}, {START: 2, END: 4}, {START: 3, END: 6}]
     result = event_tagger._resolve_conflicts(events)
     expected = [{START: 1, END: 8}, {START: 2, END: 4}, {START: 3, END: 6}]
     self.assertListEqual(expected, result)
예제 #2
0
    def test_event_tagger_naive_equals_ahocorasick(self):
        event_vocabulary = [{
            'term': 'üks'
        }, {
            'term': 'kaks'
        }, {
            'term': 'kaks kolm'
        }, {
            'term': 's k'
        }]
        text = """kolm kaks kaks kaks kolm kaks viis kolm neli kolm üks neli kaks neli 
        kaks kaks üks üks neli kolm viis kolm üks kaks kaks kolm kolm üks viis neli"""

        event_tagger = EventTagger(event_vocabulary,
                                   'naive',
                                   case_sensitive=True,
                                   conflict_resolving_strategy='ALL',
                                   return_layer=True)

        naive = event_tagger._find_events_naive(text)
        ahocorasick = event_tagger._find_events_ahocorasick(text)

        naive.sort(key=lambda event: event['end'])
        naive.sort(key=lambda event: event['start'])

        ahocorasick.sort(key=lambda event: event['end'])
        ahocorasick.sort(key=lambda event: event['start'])

        self.assertListEqual(naive, ahocorasick)
예제 #3
0
 def test_resolve_conflicts_ALL(self):
     event_tagger = EventTagger([], conflict_resolving_strategy='ALL')
     # complex
     events = [{START: 1, END:  8},
               {START: 2, END:  4},
               {START: 3, END:  6}]
     result = event_tagger._resolve_conflicts(events)
     expected = [{START: 1, END:  8},
                 {START: 2, END:  4},
                 {START: 3, END:  6}]
     self.assertListEqual(expected, result)        
예제 #4
0
    def test_event_tagger_case_insensitive(self):
        event_vocabulary = [{'term': u'üKs'},
                            {'term': 'KaKs'}]
        text = Text(u'ÜkS kAkS üks KAKS.')
        event_tagger = EventTagger(event_vocabulary, 
                                   case_sensitive=False,
                                   conflict_resolving_strategy='ALL', 
                                   return_layer=True)
        result = event_tagger.tag(text)
        expected = [{'term': u'üKs',  'start':  0, 'end':  3, 'wstart_raw': 0, 'wend_raw': 1, 'cstart':  0, 'wstart': 0, 'bstart': 0},
                    {'term': 'KaKs', 'start':  4, 'end':  8, 'wstart_raw': 1, 'wend_raw': 2, 'cstart':  2, 'wstart': 1, 'bstart': 1},
                    {'term': u'üKs',  'start':  9, 'end': 12, 'wstart_raw': 2, 'wend_raw': 3, 'cstart':  4, 'wstart': 2, 'bstart': 2},
                    {'term': 'KaKs', 'start': 13, 'end': 17, 'wstart_raw': 3, 'wend_raw': 4, 'cstart':  6, 'wstart': 3, 'bstart': 3}]

        self.assertListEqual(expected, result)
예제 #5
0
    def test_events_(self):
        event_vocabulary = [{'term': 'üks'}, {'term': 'kaks'}]

        event_tagger = EventTagger(event_vocabulary, return_layer=True)

        event_text = EventText('üks kaks kolm neli', event_tagger=event_tagger)

        self.assertEqual('üks kaks kolm neli', event_text.text)

        expected = [{
            'term': 'üks',
            'start': 0,
            'end': 3,
            'wstart_raw': 0,
            'wend_raw': 1,
            'cstart': 0,
            'wstart': 0,
        }, {
            'term': 'kaks',
            'start': 4,
            'end': 8,
            'wstart_raw': 1,
            'wend_raw': 2,
            'cstart': 2,
            'wstart': 1
        }]
        self.assertListEqual(expected, event_text.events)
예제 #6
0
    def test_event_tagger_case_insensitive(self):
        event_vocabulary = [{'term': u'üKs'}, {'term': 'KaKs'}]
        text = Text(u'ÜkS kAkS üks KAKS.')
        event_tagger = EventTagger(event_vocabulary,
                                   case_sensitive=False,
                                   conflict_resolving_strategy='ALL',
                                   return_layer=True)
        result = event_tagger.tag(text)
        expected = [{
            'term': u'üKs',
            'start': 0,
            'end': 3,
            'wstart_raw': 0,
            'wend_raw': 1,
            'cstart': 0,
            'wstart': 0,
            'bstart': 0
        }, {
            'term': 'KaKs',
            'start': 4,
            'end': 8,
            'wstart_raw': 1,
            'wend_raw': 2,
            'cstart': 2,
            'wstart': 1,
            'bstart': 1
        }, {
            'term': u'üKs',
            'start': 9,
            'end': 12,
            'wstart_raw': 2,
            'wend_raw': 3,
            'cstart': 4,
            'wstart': 2,
            'bstart': 2
        }, {
            'term': 'KaKs',
            'start': 13,
            'end': 17,
            'wstart_raw': 3,
            'wend_raw': 4,
            'cstart': 6,
            'wstart': 3,
            'bstart': 3
        }]

        self.assertListEqual(expected, result)
예제 #7
0
    def test_event_tagger_sort_events(self):
        event_vocabulary = [{'term': 'neli'}, 
                            {'term': 'kolm neli'},
                            {'term': 'kaks kolm'},
                            {'term': 'kaks kolm neli'}]
        text = Text('Üks kaks kolm neli.')
        event_tagger = EventTagger(event_vocabulary, 'naive', 
                                   case_sensitive=True,
                                   conflict_resolving_strategy='ALL', 
                                   return_layer=True)
        result = event_tagger.tag(text)
        expected = [{'term': 'kaks kolm',      'start':  4, 'end': 13, 'wstart_raw': 1, 'wend_raw': 3},
                    {'term': 'kaks kolm neli', 'start':  4, 'end': 18, 'wstart_raw': 1, 'wend_raw': 4},
                    {'term': 'kolm neli',      'start':  9, 'end': 18, 'wstart_raw': 2, 'wend_raw': 4},
                    {'term': 'neli',           'start': 14, 'end': 18, 'wstart_raw': 3, 'wend_raw': 4}]

        self.assertListEqual(expected, result)
예제 #8
0
    def test_event_tagger_tag_events(self):
        event_vocabulary = [{'term': 'Harv', 'type': 'sagedus'},
                            {'term': 'peavalu', 'type': 'sümptom'}]
        text = Text('Harva esineb peavalu.')
        event_tagger = EventTagger(event_vocabulary, return_layer=True)
        result = event_tagger.tag(text)
        expected = [{'term':    'Harv', 'type': 'sagedus', 'start':  0, 'end':  4, 'wstart_raw': 0, 'wend_raw': 1, 'cstart':  0, 'wstart': 0, 'bstart': 0},
                    {'term': 'peavalu', 'type': 'sümptom', 'start': 13, 'end': 20, 'wstart_raw': 2, 'wend_raw': 3, 'cstart': 10, 'wstart': 2, 'bstart': 2}]
        self.assertListEqual(expected, result)

        event_vocabulary = [{'term': 'harv', 'type': 'sagedus'},
                            {'term': 'tugev peavalu', 'type': 'sümptom'}]
        text = Text('Sümptom tugev peavalu esineb valimis harva.')
        event_tagger = EventTagger(event_vocabulary, return_layer=True)
        result = event_tagger.tag(text)
        expected = [{'term': 'tugev peavalu', 'type': 'sümptom', 'start':  8, 'end': 21, 'wstart_raw': 1, 'wend_raw': 3, 'cstart':  8, 'wstart': 1, 'bstart': 1},
                    {'term': 'harv',          'type': 'sagedus', 'start': 37, 'end': 41, 'wstart_raw': 5, 'wend_raw': 6, 'cstart': 25, 'wstart': 4, 'bstart': 3}]
        self.assertListEqual(expected, result)
예제 #9
0
    def test_event_tagger_sort_events(self):
        event_vocabulary = [{
            'term': 'neli'
        }, {
            'term': 'kolm neli'
        }, {
            'term': 'kaks kolm'
        }, {
            'term': 'kaks kolm neli'
        }]
        text = Text('Üks kaks kolm neli.')
        event_tagger = EventTagger(event_vocabulary,
                                   'naive',
                                   case_sensitive=True,
                                   conflict_resolving_strategy='ALL',
                                   return_layer=True)
        result = event_tagger.tag(text)
        expected = [{
            'term': 'kaks kolm',
            'start': 4,
            'end': 13,
            'wstart_raw': 1,
            'wend_raw': 3
        }, {
            'term': 'kaks kolm neli',
            'start': 4,
            'end': 18,
            'wstart_raw': 1,
            'wend_raw': 4
        }, {
            'term': 'kolm neli',
            'start': 9,
            'end': 18,
            'wstart_raw': 2,
            'wend_raw': 4
        }, {
            'term': 'neli',
            'start': 14,
            'end': 18,
            'wstart_raw': 3,
            'wend_raw': 4
        }]

        self.assertListEqual(expected, result)
예제 #10
0
 def test_event_tagger_naive_equals_ahocorasick(self):
     event_vocabulary = [{'term': 'üks'},
                         {'term': 'kaks'},
                         {'term': 'kaks kolm'},
                         {'term': 's k'}]
     text = """kolm kaks kaks kaks kolm kaks viis kolm neli kolm üks neli kaks neli 
     kaks kaks üks üks neli kolm viis kolm üks kaks kaks kolm kolm üks viis neli"""
     
     event_tagger = EventTagger(event_vocabulary, 'naive', 
                                case_sensitive=True,
                                conflict_resolving_strategy='ALL', 
                                return_layer=True)
     
     naive = event_tagger._find_events_naive(text)
     ahocorasick = event_tagger._find_events_ahocorasick(text)
     
     naive.sort(key=lambda event: event['end'])
     naive.sort(key=lambda event: event['start'])
     
     ahocorasick.sort(key=lambda event: event['end'])
     ahocorasick.sort(key=lambda event: event['start'])
     
     self.assertListEqual(naive, ahocorasick)
예제 #11
0
    def test_resolve_conflicts_MIN(self):
        event_tagger = EventTagger([], conflict_resolving_strategy='MIN')
        # empty list
        events = []
        result = event_tagger._resolve_conflicts(events)
        expected = []
        self.assertListEqual(expected, result)

        # one event
        events = [{START: 1, END:  4}]
        result = event_tagger._resolve_conflicts(events)
        expected = [{START: 1, END:  4}]
        self.assertListEqual(expected, result)

        # equal events
        events = [{START: 1, END:  4},
                  {START: 1, END:  4}]
        result = event_tagger._resolve_conflicts(events)
        expected = [{START: 1, END:  4}]
        self.assertListEqual(expected, result)

        # common start
        events = [{START: 1, END:  4},
                  {START: 1, END:  6}]
        result = event_tagger._resolve_conflicts(events)
        expected = [{START: 1, END:  4}]
        self.assertListEqual(expected, result)

        # common end
        events = [{START: 3, END:  6},
                  {START: 1, END:  6}]
        result = event_tagger._resolve_conflicts(events)
        expected = [{START: 3, END:  6}]
        self.assertListEqual(expected, result)

        # complex
        events = [{START: 1, END:  8},
                  {START: 2, END:  4},
                  {START: 3, END:  6}]
        result = event_tagger._resolve_conflicts(events)
        expected = [{START: 2, END:  4},
                    {START: 3, END:  6}]
        self.assertListEqual(expected, result)        
예제 #12
0
    def test_resolve_conflicts_MAX(self):
        event_tagger = EventTagger([], conflict_resolving_strategy='MAX')
        # empty list
        events = []
        result = event_tagger._resolve_conflicts(events)
        expected = []
        self.assertListEqual(expected, result)

        # one event
        events = [{START: 1, END: 4}]
        result = event_tagger._resolve_conflicts(events)
        expected = [{START: 1, END: 4}]
        self.assertListEqual(expected, result)

        # equal events
        events = [{START: 1, END: 4}, {START: 1, END: 4}]
        result = event_tagger._resolve_conflicts(events)
        expected = [{START: 1, END: 4}]
        self.assertListEqual(expected, result)

        # common start
        events = [{START: 1, END: 4}, {START: 1, END: 6}]
        result = event_tagger._resolve_conflicts(events)
        expected = [{START: 1, END: 6}]
        self.assertListEqual(expected, result)

        # common end
        events = [{START: 3, END: 6}, {START: 1, END: 6}]
        result = event_tagger._resolve_conflicts(events)
        expected = [{START: 1, END: 6}]
        self.assertListEqual(expected, result)

        # complex
        events = [{START: 1, END: 8}, {START: 2, END: 4}, {START: 3, END: 6}]
        result = event_tagger._resolve_conflicts(events)
        expected = [{START: 1, END: 8}]
        self.assertListEqual(expected, result)
예제 #13
0
    def test_initialization_by_EventText(self):
        event_vocabulary = [{'term': 'kakskümmend viis'}, {'term': 'seitse'}]
        event_tagger = EventTagger(event_vocabulary,
                                   search_method='naive',
                                   conflict_resolving_strategy='ALL',
                                   return_layer=True)
        event_text = EventText('Arv kakskümmend viis on suurem kui seitse.',
                               event_tagger=event_tagger)

        event_sequence = EventSequence(event_text=event_text,
                                       classificator='term',
                                       time_scale='start')
        self.assertEqual(event_sequence.end, 42)
        self.assertEqual(event_sequence.start, 0)
        self.assertListEqual(
            event_sequence.sequence_of_events,
            [Event('kakskümmend viis', 4),
             Event('seitse', 35)])

        event_sequence = EventSequence(event_text=event_text,
                                       classificator='term',
                                       time_scale='start',
                                       start=3,
                                       end=35)
        self.assertEqual(event_sequence.end, 35)
        self.assertEqual(event_sequence.start, 3)
        self.assertListEqual(event_sequence.sequence_of_events,
                             [Event('kakskümmend viis', 4)])

        event_sequence = EventSequence(event_text=event_text,
                                       classificator='term',
                                       time_scale='cstart')
        self.assertEqual(event_sequence.end, 22)
        self.assertEqual(event_sequence.start, 0)
        self.assertListEqual(
            event_sequence.sequence_of_events,
            [Event('kakskümmend viis', 4),
             Event('seitse', 20)])

        event_sequence = EventSequence(event_text=event_text,
                                       classificator='term',
                                       time_scale='wstart')
        self.assertEqual(event_sequence.start, 0)
        self.assertEqual(event_sequence.end, 7)
        self.assertListEqual(
            event_sequence.sequence_of_events,
            [Event('kakskümmend viis', 1),
             Event('seitse', 5)])

        event_text = EventText('Sündmusteta tekst.', event_tagger=event_tagger)

        event_sequence = EventSequence(event_text=event_text,
                                       classificator='term',
                                       time_scale='cstart')
        self.assertEqual(event_sequence.start, 0)
        self.assertEqual(event_sequence.end, 18)
        self.assertEqual(len(event_sequence.sequence_of_events), 0)

        event_sequence = EventSequence(event_text=event_text,
                                       classificator='term',
                                       time_scale='wstart')
        self.assertEqual(event_sequence.start, 0)
        self.assertEqual(event_sequence.end, 3)
        self.assertEqual(len(event_sequence.sequence_of_events), 0)
예제 #14
0
    def test_event_tagger_tag_events(self):
        event_vocabulary = [{
            'term': 'Harv',
            'type': 'sagedus'
        }, {
            'term': 'peavalu',
            'type': 'sümptom'
        }]
        text = Text('Harva esineb peavalu.')
        event_tagger = EventTagger(event_vocabulary, return_layer=True)
        result = event_tagger.tag(text)
        expected = [{
            'term': 'Harv',
            'type': 'sagedus',
            'start': 0,
            'end': 4,
            'wstart_raw': 0,
            'wend_raw': 1,
            'cstart': 0,
            'wstart': 0,
            'bstart': 0
        }, {
            'term': 'peavalu',
            'type': 'sümptom',
            'start': 13,
            'end': 20,
            'wstart_raw': 2,
            'wend_raw': 3,
            'cstart': 10,
            'wstart': 2,
            'bstart': 2
        }]
        self.assertListEqual(expected, result)

        event_vocabulary = [{
            'term': 'harv',
            'type': 'sagedus'
        }, {
            'term': 'tugev peavalu',
            'type': 'sümptom'
        }]
        text = Text('Sümptom tugev peavalu esineb valimis harva.')
        event_tagger = EventTagger(event_vocabulary, return_layer=True)
        result = event_tagger.tag(text)
        expected = [{
            'term': 'tugev peavalu',
            'type': 'sümptom',
            'start': 8,
            'end': 21,
            'wstart_raw': 1,
            'wend_raw': 3,
            'cstart': 8,
            'wstart': 1,
            'bstart': 1
        }, {
            'term': 'harv',
            'type': 'sagedus',
            'start': 37,
            'end': 41,
            'wstart_raw': 5,
            'wend_raw': 6,
            'cstart': 25,
            'wstart': 4,
            'bstart': 3
        }]
        self.assertListEqual(expected, result)