Python break2sentences Examples

Programming Language: Python

Namespace/Package Name: ucca.util

Method/Function: break2sentences

Examples at hotexamples.com: 3

Python break2sentences - 3 examples found. These are the top rated real world Python examples of ucca.util.break2sentences extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: convert.py Project: bhaddow/hume-data

def to_text(passage, sentences=True):
    """Converts from a Passage object to tokenized strings.

    Args:
        passage: the Passage object to convert
        sentences: whether to break the Passage to sentences (one for string)
        or leave as one string. Defaults to True

    Returns:
        a list of strings - 1 if sentences=False, # of sentences otherwise

    """
    tokens = [
        x.text for x in sorted(passage.layer(layer0.LAYER_ID).all,
                               key=lambda x: x.position)
    ]
    # break2sentences return the positions of the end tokens, which is
    # always the index into tokens incremented by ones (tokens index starts
    # with 0, positions with 1). So in essence, it returns the index to start
    # the next sentence from, and we should add index 0 for the first sentence
    if sentences:
        starts = [0] + util.break2sentences(passage)
    else:
        starts = [0, len(tokens)]
    return [
        ' '.join(tokens[starts[i]:starts[i + 1]])
        for i in range(len(starts) - 1)
    ]

Example #2

Show file

File: test_ucca_ut.py Project: amitbeka/ucca

    def test_break2sentences(self):
        """Tests identifying correctly sentence ends.

        Passage: [1 2 [3 P] H] . [[5 6 . P] H]
                 [[8 P] . 10 . H]

        """
        p = core.Passage('1')
        l0 = layer0.Layer0(p)
        l1 = layer1.Layer1(p)
        terms = [l0.add_terminal(str(i), False) for i in range(1, 4)]
        terms.append(l0.add_terminal('.', True))
        terms.append(l0.add_terminal('5', False))
        terms.append(l0.add_terminal('6', False))
        terms.append(l0.add_terminal('.', True))
        terms.append(l0.add_terminal('8', False, paragraph=2))
        terms.append(l0.add_terminal('.', True, paragraph=2))
        terms.append(l0.add_terminal('10', False, paragraph=2))
        terms.append(l0.add_terminal('.', True, paragraph=2))
        h1 = l1.add_fnode(None, layer1.EdgeTags.ParallelScene)
        h2 = l1.add_fnode(None, layer1.EdgeTags.ParallelScene)
        h3 = l1.add_fnode(None, layer1.EdgeTags.ParallelScene)
        p1 = l1.add_fnode(h1, layer1.EdgeTags.Process)
        p2 = l1.add_fnode(h2, layer1.EdgeTags.Process)
        p3 = l1.add_fnode(h3, layer1.EdgeTags.Process)
        h1.add(layer1.EdgeTags.Terminal, terms[0])
        h1.add(layer1.EdgeTags.Terminal, terms[1])
        p1.add(layer1.EdgeTags.Terminal, terms[2])
        l1.add_punct(None, terms[3])
        p2.add(layer1.EdgeTags.Terminal, terms[4])
        p2.add(layer1.EdgeTags.Terminal, terms[5])
        l1.add_punct(p2, terms[6])
        p3.add(layer1.EdgeTags.Terminal, terms[7])
        l1.add_punct(h3, terms[8])
        h3.add(layer1.EdgeTags.Terminal, terms[9])
        l1.add_punct(h3, terms[10])

        self.assertSequenceEqual(util.break2sentences(p), [4, 7, 11])

Example #3

Show file

File: convert.py Project: amitbeka/ucca

def to_text(passage, sentences=True):
    """Converts from a Passage object to tokenized strings.

    Args:
        passage: the Passage object to convert
        sentences: whether to break the Passage to sentences (one for string)
        or leave as one string. Defaults to True

    Returns:
        a list of strings - 1 if sentences=False, # of sentences otherwise

    """
    tokens = [x.text for x in sorted(passage.layer(layer0.LAYER_ID).all,
                                     key=lambda x: x.position)]
    # break2sentences return the positions of the end tokens, which is
    # always the index into tokens incremented by ones (tokens index starts
    # with 0, positions with 1). So in essence, it returns the index to start
    # the next sentence from, and we should add index 0 for the first sentence
    if sentences:
        starts = [0] + util.break2sentences(passage)
    else:
        starts = [0, len(tokens)]
    return [' '.join(tokens[starts[i]:starts[i + 1]])
            for i in range(len(starts) - 1)]