예제 #1
0
파일: blob.py 프로젝트: Arttii/TextBlob
 def _create_sentence_objects(self):
     '''Returns a list of Sentence objects from the raw text.
     '''
     sentence_objects = []
     sentences = sent_tokenize(self.raw)
     char_index = 0  # Keeps track of character index within the blob
     for sent in sentences:
         # Compute the start and end indices of the sentence
         # within the blob
         start_index = self.raw.index(sent, char_index)
         char_index += len(sent)
         end_index = start_index + len(sent)
         # Sentences share the same models as their parent blob
         s = Sentence(sent, start_index=start_index, end_index=end_index,
             tokenizer=self.tokenizer, np_extractor=self.np_extractor,
             pos_tagger=self.pos_tagger, analyzer=self.analyzer,
             parser=self.parser, classifier=self.classifier)
         sentence_objects.append(s)
     return sentence_objects
예제 #2
0
파일: blob.py 프로젝트: wdw110/TextBlob
 def _create_sentence_objects(self):
     '''Returns a list of Sentence objects from the raw text.
     '''
     sentence_objects = []
     sentences = sent_tokenize(self.raw)
     char_index = 0  # Keeps track of character index within the blob
     for sent in sentences:
         # Compute the start and end indices of the sentence
         # within the blob
         start_index = self.raw.index(sent, char_index)
         char_index += len(sent)
         end_index = start_index + len(sent)
         # Sentences share the same models as their parent blob
         s = Sentence(sent, start_index=start_index, end_index=end_index,
             tokenizer=self.tokenizer, np_extractor=self.np_extractor,
             pos_tagger=self.pos_tagger, analyzer=self.analyzer,
             parser=self.parser, classifier=self.classifier)
         sentence_objects.append(s)
     return sentence_objects
예제 #3
0
 def test_sent_tokenize(self):
     tokens = sent_tokenize(self.text)
     assert_true(is_generator(tokens))  # It's a generator
     assert_equal(list(tokens), self.tokenizer.tokenize(self.text))
예제 #4
0
 def test_sent_tokenize(self):
     tokens = sent_tokenize(self.text)
     assert_true(is_generator(tokens))  # It's a generator
     assert_equal(list(tokens), self.tokenizer.tokenize(self.text))