Ejemplo n.º 1
0
 def testGetPhraseStemsMultiPhrase(self):
     s = Sentence(TEST_TOKENS)
     a = s.addAnnotation('mask', 3, 8)
     # old behaviour:
     # self.assertListEqual(list(a.getPhraseStems()), ['mask'] * 5)
     # new behaviour only reports one mask token, not each in a successive row:
     self.assertListEqual(list(a.getPhraseStems()), ['mask'])
Ejemplo n.º 2
0
 def testAddAnnotationsAreUnique(self):
     s = Sentence(TEST_TOKENS)
     s.addAnnotation('type1', 2, 4)
     s.addAnnotation('type1', 2, 4)
     self.assertEqual(len(s.annotations), 1)
     self.assertEqual(
         sum(len(annotations) for annotations in s.annotations.values()), 1)
Ejemplo n.º 3
0
 def testGetAnnotations(self):
     s = Sentence(TEST_TOKENS)
     a = s.addAnnotation('type1', 2, 4)
     a1 = s.addAnnotation('type2', 6)
     a2 = s.addAnnotation('type2', 8, 9)
     self.assertEqual(s.getAnnotations('type1'), {a})
     self.assertEqual(s.getAnnotations('type2'), {a1, a2})
Ejemplo n.º 4
0
 def testGetPhraseStemsMultiPhrase(self):
     s = Sentence(TEST_TOKENS)
     a = s.addAnnotation('mask', 3, 8)
     # old behaviour:
     # self.assertListEqual(list(a.getPhraseStems()), ['mask'] * 5)
     # new behaviour only reports one mask token, not each in a successive row:
     self.assertListEqual(list(a.getPhraseStems()), ['mask'])
Ejemplo n.º 5
0
 def testGetPhraseNumber(self):
     s = Sentence(TEST_TOKENS)
     a1 = s.addAnnotation('t', 8)
     a2 = s.addAnnotation('t', 0, 2)
     a3 = s.addAnnotation('t', 4)
     self.assertEqual(a1.getPhraseNumber_(), 0)
     self.assertEqual(a2.getPhraseNumber_(), 1)
     self.assertEqual(a3.getPhraseNumber_(), 2)
Ejemplo n.º 6
0
 def testGetWords(self):
     s = Sentence(TEST_TOKENS)
     s.addAnnotation('mask', 2, 4)
     s.addAnnotation('mask', 6)
     s.addAnnotation('mask', 8, 9)
     self.assertListEqual(list(s.words()),
                          ['word%d' % i for i in range(len(s))])
     self.assertListEqual(list(s.words(7)), ['word7', 'word8', 'word9'])
Ejemplo n.º 7
0
 def testGetPhraseNumber(self):
     s = Sentence(TEST_TOKENS)
     a1 = s.addAnnotation('t', 8)
     a2 = s.addAnnotation('t', 0, 2)
     a3 = s.addAnnotation('t', 4)
     self.assertEqual(a1.getPhraseNumber_(), 0)
     self.assertEqual(a2.getPhraseNumber_(), 1)
     self.assertEqual(a3.getPhraseNumber_(), 2)
Ejemplo n.º 8
0
 def testGetPhraseNumbers(self):
     s = Sentence(TEST_TOKENS)
     self.assertListEqual(list(s.phraseNumbers()), [
         1, 2, 3, 4
     ])
     self.assertListEqual(list(s.phraseNumbers(1, 6)), [
         1, 2
     ])
Ejemplo n.º 9
0
 def testIsInsidePhrase(self):
     s = Sentence(TEST_TOKENS)
     a1 = s.addAnnotation('true', 3)
     a2 = s.addAnnotation('true', 0, 2)
     a3 = s.addAnnotation('false', 8)
     self.assertTrue(a1.isInsidePhrase())
     self.assertTrue(a2.isInsidePhrase())
     self.assertTrue(a3.isInsidePhrase())
Ejemplo n.º 10
0
 def testGetPhraseTags(self):
     s = Sentence(TEST_TOKENS)
     self.assertListEqual(list(s.phraseTags()), [
         'NP', 'NP', 'NP', 'NP'
     ])
     self.assertListEqual(list(s.phraseTags(1, 6)), [
         'NP', 'NP'
     ])
Ejemplo n.º 11
0
 def testGetPhraseTag(self):
     s = Sentence(TEST_TOKENS)
     a1 = s.addAnnotation('t', 2)
     a2 = s.addAnnotation('t', 0, 2)
     a3 = s.addAnnotation('t', 4)
     self.assertEqual(a1.getPhraseTag_(), 'O')
     self.assertEqual(a2.getPhraseTag_(), 'NP')
     self.assertEqual(a3.getPhraseTag_(), 'NP')
Ejemplo n.º 12
0
 def testPhraseDistanceIfOverlapping(self):
     s = Sentence(TEST_TOKENS)
     a = s.addAnnotation('A', 0, 2)
     b = s.addAnnotation('B', 1)
     c = s.addAnnotation('C', 7)
     d = s.addAnnotation('D', 6, 8)
     self.assertEqual(a.phraseDistanceTo(b), 0)
     self.assertEqual(c.phraseDistanceTo(d), 0)
Ejemplo n.º 13
0
 def testIsInsidePhrase(self):
     s = Sentence(TEST_TOKENS)
     a1 = s.addAnnotation('true', 3)
     a2 = s.addAnnotation('true', 0, 2)
     a3 = s.addAnnotation('false', 8)
     self.assertTrue(a1.isInsidePhrase())
     self.assertTrue(a2.isInsidePhrase())
     self.assertTrue(a3.isInsidePhrase())
Ejemplo n.º 14
0
 def testVerbPhraseBetweenExactOverlap(self):
     tokens = list(TEST_TOKENS)
     tokens[0] = tokens[0].replace(chunk="B-VP")
     tokens[1] = tokens[1].replace(chunk="I-VP", stem="sentinel")
     s = Sentence(tokens)
     this = s.addAnnotation('this', 3)
     other = s.addAnnotation('other', 3)
     self.assertEqual(this.verbPhraseBetween(other), None)
Ejemplo n.º 15
0
 def testVerbPhraseBetweenExactOverlap(self):
     tokens = list(TEST_TOKENS)
     tokens[0] = tokens[0].replace(chunk="B-VP")
     tokens[1] = tokens[1].replace(chunk="I-VP", stem="sentinel")
     s = Sentence(tokens)
     this = s.addAnnotation('this', 3)
     other = s.addAnnotation('other', 3)
     self.assertEqual(this.verbPhraseBetween(other), None)
Ejemplo n.º 16
0
 def testGetPhraseTag(self):
     s = Sentence(TEST_TOKENS)
     a1 = s.addAnnotation('t', 2)
     a2 = s.addAnnotation('t', 0, 2)
     a3 = s.addAnnotation('t', 4)
     self.assertEqual(a1.getPhraseTag_(), 'O')
     self.assertEqual(a2.getPhraseTag_(), 'NP')
     self.assertEqual(a3.getPhraseTag_(), 'NP')
Ejemplo n.º 17
0
 def testPhraseDistanceIfOverlapping(self):
     s = Sentence(TEST_TOKENS)
     a = s.addAnnotation('A', 0, 2)
     b = s.addAnnotation('B', 1)
     c = s.addAnnotation('C', 7)
     d = s.addAnnotation('D', 6, 8)
     self.assertEqual(a.phraseDistanceTo(b), 0)
     self.assertEqual(c.phraseDistanceTo(d), 0)
Ejemplo n.º 18
0
 def testVerbPhraseBetween(self):
     tokens = list(TEST_TOKENS)
     tokens[3] = tokens[3].replace(chunk="B-VP", stem="sentinel1")
     tokens[4] = tokens[4].replace(chunk="I-VP", stem="sentinel2")
     s = Sentence(tokens)
     this = s.addAnnotation('this', 0)
     other = s.addAnnotation('other', 6)
     self.assertListEqual(list(this.verbPhraseBetween(other)),
                          ['sentinel1', 'sentinel2'])
Ejemplo n.º 19
0
 def testGetPrepositionedNounPhrase(self):
     tokens = list(TEST_TOKENS)
     tokens[3] = tokens[3].replace(chunk="B-PP")
     tokens[4] = tokens[4].replace(chunk="I-PP")
     s = Sentence(tokens)
     s.addAnnotation('sentinel', 0)
     a = s.addAnnotation('type', 6)
     self.assertListEqual(list(a.getPrepositionedNounPhrase_()),
                          ['sentinel', 'stem1'])
Ejemplo n.º 20
0
 def testGetPrepositionedNounPhrase(self):
     tokens = list(TEST_TOKENS)
     tokens[3] = tokens[3].replace(chunk="B-PP")
     tokens[4] = tokens[4].replace(chunk="I-PP")
     s = Sentence(tokens)
     s.addAnnotation('sentinel', 0)
     a = s.addAnnotation('type', 6)
     self.assertListEqual(list(a.getPrepositionedNounPhrase_()),
                          ['sentinel', 'stem1'])
Ejemplo n.º 21
0
 def testVerbPhraseBetween(self):
     tokens = list(TEST_TOKENS)
     tokens[3] = tokens[3].replace(chunk="B-VP", stem="sentinel1")
     tokens[4] = tokens[4].replace(chunk="I-VP", stem="sentinel2")
     s = Sentence(tokens)
     this = s.addAnnotation('this', 0)
     other = s.addAnnotation('other', 6)
     self.assertListEqual(list(this.verbPhraseBetween(other)),
                          ['sentinel1', 'sentinel2'])
Ejemplo n.º 22
0
    def testTokenDistanceIfOverlapping(self):
        s = Sentence(TEST_TOKENS)
        a = s.addAnnotation('A', 2, 4)
        b = s.addAnnotation('B', 2, 3)
        c = s.addAnnotation('C', 1, 3)
        d = s.addAnnotation('D', 0, 2)
        e = s.addAnnotation('E', 1, 2)

        for other in [a, b, d, e]:
            self.assertEqual(c.tokenDistanceTo(other), -1)
Ejemplo n.º 23
0
    def testTokenDistanceIfOverlapping(self):
        s = Sentence(TEST_TOKENS)
        a = s.addAnnotation('A', 2, 4)
        b = s.addAnnotation('B', 2, 3)
        c = s.addAnnotation('C', 1, 3)
        d = s.addAnnotation('D', 0, 2)
        e = s.addAnnotation('E', 1, 2)

        for other in [a, b, d, e]:
            self.assertEqual(c.tokenDistanceTo(other), -1)
Ejemplo n.º 24
0
 def testGetMaskedStems(self):
     s = Sentence(TEST_TOKENS)
     s.addAnnotation('type1', 2, 4)
     s.addAnnotation('type2', 6)
     s.addAnnotation('type2', 8, 9)
     self.assertListEqual(list(s.maskedStems(3, 7)),
                          ['type1', 'stem4', 'stem5', 'type2'])
Ejemplo n.º 25
0
 def testPosTagsTo(self):
     s = Sentence(TEST_TOKENS)
     a1 = s.addAnnotation('true', 5)
     a2 = s.addAnnotation('true', 0, 2)
     a3 = s.addAnnotation('true', 3, 5)
     a0 = s.addAnnotation('true', 0)
     a9 = s.addAnnotation('true', 9)
     self.assertEqual(list(a1.posTagsBetween(a2)), ['pos2', 'pos3', 'pos4'])
     self.assertEqual(list(a1.posTagsBetween(a1)), [])
     self.assertEqual(list(a2.posTagsBetween(a3)), ['pos2'])
     self.assertEqual(list(a0.posTagsBetween(a9)), ['pos%d' % i for i in range(1, 9)])
Ejemplo n.º 26
0
 def testPhraseTagsTo(self):
     s = Sentence(TEST_TOKENS)
     a1 = s.addAnnotation('true', 5)
     a2 = s.addAnnotation('true', 0, 2)
     a3 = s.addAnnotation('true', 3, 5)
     a0 = s.addAnnotation('true', 0)
     a9 = s.addAnnotation('true', 9)
     self.assertEqual(list(a1.phraseTagsBetween(a2)), ['NP'])
     self.assertEqual(list(a1.phraseTagsBetween(a1)), [])
     self.assertEqual(list(a2.phraseTagsBetween(a3)), [])
     self.assertEqual(list(a0.phraseTagsBetween(a9)), ['NP', 'NP', 'NP'])
Ejemplo n.º 27
0
 def testPhraseTagsTo(self):
     s = Sentence(TEST_TOKENS)
     a1 = s.addAnnotation('true', 5)
     a2 = s.addAnnotation('true', 0, 2)
     a3 = s.addAnnotation('true', 3, 5)
     a0 = s.addAnnotation('true', 0)
     a9 = s.addAnnotation('true', 9)
     self.assertEqual(list(a1.phraseTagsBetween(a2)), ['NP'])
     self.assertEqual(list(a1.phraseTagsBetween(a1)), [])
     self.assertEqual(list(a2.phraseTagsBetween(a3)), [])
     self.assertEqual(list(a0.phraseTagsBetween(a9)), ['NP', 'NP', 'NP'])
Ejemplo n.º 28
0
 def testPosTagsTo(self):
     s = Sentence(TEST_TOKENS)
     a1 = s.addAnnotation('true', 5)
     a2 = s.addAnnotation('true', 0, 2)
     a3 = s.addAnnotation('true', 3, 5)
     a0 = s.addAnnotation('true', 0)
     a9 = s.addAnnotation('true', 9)
     self.assertEqual(list(a1.posTagsBetween(a2)), ['pos2', 'pos3', 'pos4'])
     self.assertEqual(list(a1.posTagsBetween(a1)), [])
     self.assertEqual(list(a2.posTagsBetween(a3)), ['pos2'])
     self.assertEqual(list(a0.posTagsBetween(a9)),
                      ['pos%d' % i for i in range(1, 9)])
Ejemplo n.º 29
0
 def testGetMaskedWords(self):
     s = Sentence(TEST_TOKENS)
     s.addAnnotation('type1', 2,
                     4)  # should only fetch one masked token ("type1")
     s.addAnnotation('type2', 6)
     s.addAnnotation('type2', 8, 9)
     self.maxDiff = None
     self.assertListEqual(list(s.maskedWords()), [
         'word0', 'word1', 'type1', 'word4', 'word5', 'type2', 'word7',
         'type2', 'word9'
     ])
     self.assertListEqual(list(s.maskedWords(7)),
                          ['word7', 'type2', 'word9'])
Ejemplo n.º 30
0
 def testGetWords(self):
     s = Sentence(TEST_TOKENS)
     s.addAnnotation('mask', 2, 4)
     s.addAnnotation('mask', 6)
     s.addAnnotation('mask', 8, 9)
     self.assertListEqual(list(s.words()), [
         'word%d' % i for i in range(len(s))
     ])
     self.assertListEqual(list(s.words(7)), [
         'word7', 'word8', 'word9'
     ])
Ejemplo n.º 31
0
 def testGetMaskedStems(self):
     s = Sentence(TEST_TOKENS)
     s.addAnnotation('type1', 2, 4)
     s.addAnnotation('type2', 6)
     s.addAnnotation('type2', 8, 9)
     self.assertListEqual(list(s.maskedStems(3, 7)), [
         'type1', 'stem4', 'stem5', 'type2'
     ])
Ejemplo n.º 32
0
 def testGetMaskedWords(self):
     s = Sentence(TEST_TOKENS)
     s.addAnnotation('type1', 2, 4)  # should only fetch one masked token ("type1")
     s.addAnnotation('type2', 6)
     s.addAnnotation('type2', 8, 9)
     self.maxDiff = None
     self.assertListEqual(list(s.maskedWords()), [
         'word0', 'word1', 'type1', 'word4',
         'word5', 'type2', 'word7', 'type2', 'word9'
     ])
     self.assertListEqual(list(s.maskedWords(7)), [
         'word7', 'type2', 'word9'
     ])
Ejemplo n.º 33
0
def asDict(sentence: Sentence, ngrams=2):
    """Convert a :class:`fnl.text.sentence.Sentence` into a feature dictionary."""
    d = {'gene-count': sentence.countEntity('B-gene')}
    stems = list(sentence.maskedStems())
    pos = sentence.posTags()
    tokens = Counter('{}/{}'.format(s, t) for s, t in zip(stems, pos))
    d.update(tokens)

    if "TARGET/NN" in d and "FACTOR/NN" in d:
        d['has-all-entities'] = 1

    gram = list(stems)

    while ngrams > 1:
        ngrams =- 1
        tokens = Counter('{} {}'.format(s, g) for s, g in zip(stems, gram[1:]))
        d.update(tokens)

    return d
Ejemplo n.º 34
0
def asDict(sentence: Sentence, ngrams=2):
    """Convert a :class:`fnl.text.sentence.Sentence` into a feature dictionary."""
    d = {'gene-count': sentence.countEntity('B-gene')}
    stems = list(sentence.maskedStems())
    pos = sentence.posTags()
    tokens = Counter('{}/{}'.format(s, t) for s, t in zip(stems, pos))
    d.update(tokens)

    if "TARGET/NN" in d and "FACTOR/NN" in d:
        d['has-all-entities'] = 1

    gram = list(stems)

    while ngrams > 1:
        ngrams = -1
        tokens = Counter('{} {}'.format(s, g) for s, g in zip(stems, gram[1:]))
        d.update(tokens)

    return d
Ejemplo n.º 35
0
 def testAddAnnotation(self):
     s = Sentence(TEST_TOKENS)
     s.addAnnotation('type1', 2, 4)
     s.addAnnotation('type2', 6)
     s.addAnnotation('type2', 8, 9)
     self.assertEqual(len(s.annotations), 2)
     self.assertEqual(set(s.annotations.keys()), {'type1', 'type2'})
     self.assertTrue(
         all(
             isinstance(annotations, set)
             for annotations in s.annotations.values()))
     self.assertEqual(
         sum(len(annotations) for annotations in s.annotations.values()), 3)
Ejemplo n.º 36
0
    def testTokenDistance(self):
        s = Sentence(TEST_TOKENS)
        a = s.addAnnotation('A', 0, 2)
        b = s.addAnnotation('B', 3)
        c = s.addAnnotation('C', 6, 8)
        d = s.addAnnotation('D', 8)
        e = s.addAnnotation('E', 4, 6)

        for other, dist in [(a, 4), (b, 2), (d, 0), (e, 0)]:
            self.assertEqual(c.tokenDistanceTo(other), dist)
Ejemplo n.º 37
0
    def testPhraseDistance(self):
        s = Sentence(TEST_TOKENS)
        a = s.addAnnotation('A', 0, 2)
        b = s.addAnnotation('B', 3)
        c = s.addAnnotation('C', 6, 8)
        d = s.addAnnotation('D', 9)
        e = s.addAnnotation('E', 3, 5)

        for other, dist in [(a, 1), (b, 1), (c, -1), (d, 0), (e, 0)]:
            self.assertEqual(c.phraseDistanceTo(other), dist, msg=repr(other))
Ejemplo n.º 38
0
 def testAddAnnotation(self):
     s = Sentence(TEST_TOKENS)
     s.addAnnotation('type1', 2, 4)
     s.addAnnotation('type2', 6)
     s.addAnnotation('type2', 8, 9)
     self.assertEqual(len(s.annotations), 2)
     self.assertEqual(set(s.annotations.keys()), {'type1', 'type2'})
     self.assertTrue(all(isinstance(annotations, set) for
                         annotations in s.annotations.values()))
     self.assertEqual(sum(len(annotations) for annotations in s.annotations.values()), 3)
Ejemplo n.º 39
0
 def testGetAnnotations(self):
     s = Sentence(TEST_TOKENS)
     a = s.addAnnotation('type1', 2, 4)
     a1 = s.addAnnotation('type2', 6)
     a2 = s.addAnnotation('type2', 8, 9)
     self.assertEqual(s.getAnnotations('type1'), {a})
     self.assertEqual(s.getAnnotations('type2'), {a1, a2})
Ejemplo n.º 40
0
 def testGetStems(self):
     s = Sentence(TEST_TOKENS)
     s.addAnnotation('mask', 2, 4)
     s.addAnnotation('mask', 6)
     self.assertListEqual(list(s.stems(3, 7)), [
         'stem3', 'stem4', 'stem5', 'stem6'
     ])
Ejemplo n.º 41
0
 def testGetPoSTags(self):
     s = Sentence(TEST_TOKENS)
     s.addAnnotation('mask', 2, 4)
     s.addAnnotation('mask', 6)
     self.assertListEqual(list(s.posTags(3, 7)), [
         'pos3', 'pos4', 'pos5', 'pos6'
     ])
Ejemplo n.º 42
0
    def testComparator(self):
        s = Sentence(TEST_TOKENS)
        n = Annotation(s, 2, 5)

        for i in range(1, 7):
            self.assertTrue(n > Annotation(s, 0, i), i)

        for i in range(3, 7):
            self.assertTrue(n < Annotation(s, i, 7), i)

        self.assertTrue(n > Annotation(s, 1, 6))
        self.assertTrue(n < Annotation(s, 3, 4))
        self.assertTrue(n == Annotation(s, 2, 5))
Ejemplo n.º 43
0
    def testTokenDistance(self):
        s = Sentence(TEST_TOKENS)
        a = s.addAnnotation('A', 0, 2)
        b = s.addAnnotation('B', 3)
        c = s.addAnnotation('C', 6, 8)
        d = s.addAnnotation('D', 8)
        e = s.addAnnotation('E', 4, 6)

        for other, dist in [(a, 4), (b, 2), (d, 0), (e, 0)]:
            self.assertEqual(c.tokenDistanceTo(other), dist)
Ejemplo n.º 44
0
    def testPhraseDistance(self):
        s = Sentence(TEST_TOKENS)
        a = s.addAnnotation('A', 0, 2)
        b = s.addAnnotation('B', 3)
        c = s.addAnnotation('C', 6, 8)
        d = s.addAnnotation('D', 9)
        e = s.addAnnotation('E', 3, 5)

        for other, dist in [(a, 1), (b, 1), (c, -1), (d, 0), (e, 0)]:
            self.assertEqual(c.phraseDistanceTo(other), dist, msg=repr(other))
Ejemplo n.º 45
0
 def testGetPhraseNumbers(self):
     s = Sentence(TEST_TOKENS)
     self.assertListEqual(list(s.phraseNumbers()), [1, 2, 3, 4])
     self.assertListEqual(list(s.phraseNumbers(1, 6)), [1, 2])
Ejemplo n.º 46
0
    def testGetPhraseNumber(self):
        s = Sentence(TEST_TOKENS)
        tests = [1, 1, 0, 2, 2, 0, 3, 3, 0, 4]

        for i, n in enumerate(tests):
            self.assertEqual(s.phraseNumber(i), n)
Ejemplo n.º 47
0
 def testAddAnnotationsAreUnique(self):
     s = Sentence(TEST_TOKENS)
     s.addAnnotation('type1', 2, 4)
     s.addAnnotation('type1', 2, 4)
     self.assertEqual(len(s.annotations), 1)
     self.assertEqual(sum(len(annotations) for annotations in s.annotations.values()), 1)
Ejemplo n.º 48
0
 def testEquals(self):
     s1 = Sentence(TEST_TOKENS)
     s2 = Sentence(s1)
     self.assertEqual(s1, s1)
     self.assertNotEqual(s1, s2)
Ejemplo n.º 49
0
 def testTokenDistanceIfEqual(self):
     s = Sentence(TEST_TOKENS)
     a = s.addAnnotation('A', 0, 2)
     b = s.addAnnotation('B', 0, 2)
     self.assertEqual(a.tokenDistanceTo(b), -2)
Ejemplo n.º 50
0
 def testIsNotInsidePhrase(self):
     s = Sentence(TEST_TOKENS)
     a1 = s.addAnnotation('false', 1, 3)
     self.assertFalse(a1.isInsidePhrase())
Ejemplo n.º 51
0
 def testPhraseDistanceIfBothInOverlappingPhrase(self):
     s = Sentence(TEST_TOKENS)
     a = s.addAnnotation('A', 0, 5)
     b = s.addAnnotation('B', 6, 10)
     self.assertEqual(a.phraseDistanceTo(b), 0)
Ejemplo n.º 52
0
 def testIsNotInsidePhrase(self):
     s = Sentence(TEST_TOKENS)
     a1 = s.addAnnotation('false', 1, 3)
     self.assertFalse(a1.isInsidePhrase())
Ejemplo n.º 53
0
 def testPhraseDistanceIfBothNotInPhrase(self):
     s = Sentence(TEST_TOKENS)
     a = s.addAnnotation('A', 2)
     b = s.addAnnotation('B', 5)
     self.assertEqual(a.phraseDistanceTo(b), 1)
Ejemplo n.º 54
0
 def testTokenDistanceIfEqual(self):
     s = Sentence(TEST_TOKENS)
     a = s.addAnnotation('A', 0, 2)
     b = s.addAnnotation('B', 0, 2)
     self.assertEqual(a.tokenDistanceTo(b), -2)
Ejemplo n.º 55
0
    def testGetPhraseNumber(self):
        s = Sentence(TEST_TOKENS)
        tests = [1, 1, 0, 2, 2, 0, 3, 3, 0, 4]

        for i, n in enumerate(tests):
            self.assertEqual(s.phraseNumber(i), n)
Ejemplo n.º 56
0
 def testPhraseDistanceIfBothInOverlappingPhrase(self):
     s = Sentence(TEST_TOKENS)
     a = s.addAnnotation('A', 0, 5)
     b = s.addAnnotation('B', 6, 10)
     self.assertEqual(a.phraseDistanceTo(b), 0)
Ejemplo n.º 57
0
 def testTokenDistanceOnDifferentSentences(self):
     s1 = Sentence(TEST_TOKENS)
     s2 = Sentence(TEST_TOKENS)
     a1 = s1.addAnnotation('type', 0, 2)
     a2 = s2.addAnnotation('type', 6, 8)
     self.assertRaises(ValueError, a1.tokenDistanceTo, a2)
Ejemplo n.º 58
0
    def testGetPhraseOffset(self):
        s = Sentence(TEST_TOKENS)
        tests = [(1, (0, 2)), (2, (3, 5)), (3, (6, 8)), (4, (9, 10))]

        for number, offset in tests:
            self.assertEqual(s.phraseOffsetFor(number), offset)
Ejemplo n.º 59
0
 def testTokenDistanceOnDifferentSentences(self):
     s1 = Sentence(TEST_TOKENS)
     s2 = Sentence(TEST_TOKENS)
     a1 = s1.addAnnotation('type', 0, 2)
     a2 = s2.addAnnotation('type', 6, 8)
     self.assertRaises(ValueError, a1.tokenDistanceTo, a2)
Ejemplo n.º 60
0
 def testPhraseDistanceIfBothNotInPhrase(self):
     s = Sentence(TEST_TOKENS)
     a = s.addAnnotation('A', 2)
     b = s.addAnnotation('B', 5)
     self.assertEqual(a.phraseDistanceTo(b), 1)