Esempi in Python per sent_tokenize

Linguaggio di programmazione: Python

Spazio dei nomi/nome del pacchetto: nlp

Metodo/funzione: sent_tokenize

Esempi su hotexamples.com: 9

sent_tokenize in Python: 9 esempi trovati. Questi sono i migliori esempi reali in Python per nlp.sent_tokenize, estratti da progetti open source. Li puoi valutare, per aiutarci a migliorare la qualità dei nostri esempi.

Esempio n. 1

Mostra file

def two_incomplete_test():
    string = "This should be two sentences.  Second one incomplete"
    gold = ["This should be two sentences.", "Second one incomplete"]

    doc = nlp.sent_tokenize(string)
    sents = [s for s in doc.sents()]

    for sent, gold_sent in zip(sents, gold):
        assert str(sent) == str(gold_sent)
    assert len(sents) == len(gold)

Esempio n. 2

Mostra file

def nothing_test():
    string = "  "
    gold = []

    doc = nlp.sent_tokenize(string)
    sents = [s for s in doc.sents()]

    for sent, gold_sent in zip(sents, gold):
        assert str(sent) == str(gold_sent)
    assert len(sents) == len(gold)

Esempio n. 3

Mostra file

def one_incomplete_test():
    string = "One incomplete sentence"
    gold = ["One incomplete sentence"]

    doc = nlp.sent_tokenize(string)
    sents = [s for s in doc.sents()]

    for sent, gold_sent in zip(sents, gold):
        assert str(sent) == str(gold_sent)
    assert len(sents) == len(gold)

Esempio n. 4

Mostra file

def two_long_test():
    string = "This should be two sentences!!!?!!  There is a split."
    gold = ["This should be two sentences!!!?!!", "There is a split."]

    doc = nlp.sent_tokenize(string)
    sents = [s for s in doc.sents()]

    for sent, gold_sent in zip(sents, gold):
        assert str(sent) == str(gold_sent)
    assert len(sents) == len(gold)

Esempio n. 5

Mostra file

def no_split_test():
    string = "This should only be one sentence."
    gold = ["This should only be one sentence."]

    doc = nlp.sent_tokenize(string)
    sents = [s for s in doc.sents()]

    for sent, gold_sent in zip(sents, gold):
        assert str(sent) == str(gold_sent)
    assert len(sents) == len(gold)

Esempio n. 6

Mostra file

def mr_test():
    string = "Mr. White got a loaf of bread"
    gold = ["Mr. White got a loaf of bread"]

    doc = nlp.sent_tokenize(string)
    sents = [s for s in doc.sents()]

    for sent, gold_sent in zip(sents, gold):
        assert str(sent) == str(gold_sent)
    assert len(sents) == len(gold)

Esempio n. 7

Mostra file

def funny_test():
    string = "'') Funny stuff joined on."
    gold = ["'') Funny stuff joined on."]

    doc = nlp.sent_tokenize(string)
    sents = [s for s in doc.sents()]

    for sent, gold_sent in zip(sents, gold):
        assert str(sent) == str(gold_sent)
    assert len(sents) == len(gold)

Esempio n. 8

Mostra file

def parens_quotes_test():
    string = "(Break after a parenthesis.)  (Or after \"quoted stuff!\")"
    gold = ["(Break after a parenthesis.)", "(Or after \"quoted stuff!\")"]

    doc = nlp.sent_tokenize(string)
    sents = [s for s in doc.sents()]

    for sent, gold_sent in zip(sents, gold):
        print(sent)
        assert str(sent) == str(gold_sent)
    assert len(sents) == len(gold)

Esempio n. 9

Mostra file

File: important.py Progetto: amcgail/nytimes-obituaries

    def run(self):

        from nlp import sent_tokenize, word_tokenize, lemmatize
        fb = self['fullBody']

        possibilities = sent_tokenize(fb)

        for i, p in enumerate(possibilities):
            words = word_tokenize(fb)
            words = map(lemmatize, words)
            if len([x for x in words if x == 'die']):
                return p