Exemple #1
0
def test_tokenizer():
    sr = parser.tokenize("junk <s> It's some text to tokenize, if you feel like it -- or not. </s>", 399)
    print 'sr %r' % str(sr)
    print 'sr length', len(sr)
    for i in range(len(sr)):
        print 'sr word', i, sr.getWord(i).lexeme()
    return sr
Exemple #2
0
 def __init__(self, text_or_tokens, max_sentence_length=399):
     if isinstance(text_or_tokens, Sentence):
         self.sentrep = text_or_tokens.sentrep
     elif isinstance(text_or_tokens, basestring):
         self.sentrep = parser.tokenize('<s> ' + text_or_tokens + ' </s>',
                                        max_sentence_length)
     else:
         self.sentrep = parser.SentRep(text_or_tokens)
 def __init__(self, text_or_tokens, max_sentence_length=399):
     if isinstance(text_or_tokens, Sentence):
         self.sentrep = text_or_tokens.sentrep
     elif isinstance(text_or_tokens, basestring):
         self.sentrep = parser.tokenize('<s> ' + text_or_tokens + ' </s>',
                                        max_sentence_length)
     else:
         self.sentrep = parser.SentRep(text_or_tokens)
def test_tokenizer():
    sr = parser.tokenize(
        "junk <s> It's some text to tokenize, if you feel like it -- or not. </s>",
        399)
    print 'sr %r' % str(sr)
    print 'sr length', len(sr)
    for i in range(len(sr)):
        print 'sr word', i, sr.getWord(i).lexeme()
    return sr