Example #1
0
 def _create_sentence_objects(self):
     '''Returns a list of Sentence objects from the raw text.
     '''
     sentence_objects = []
     sentences = sent_tokenize(self.raw)
     char_index = 0  # Keeps track of character index within the blob
     for sent in sentences:
         # Compute the start and end indices of the sentence
         # within the blob
         start_index = self.raw.index(sent, char_index)
         char_index += len(sent)
         end_index = start_index + len(sent)
         # Sentences share the same models as their parent blob
         s = Sentence(sent, start_index=start_index, end_index=end_index,
             tokenizer=self.tokenizer, np_extractor=self.np_extractor,
             pos_tagger=self.pos_tagger, analyzer=self.analyzer,
             parser=self.parser, classifier=self.classifier)
         sentence_objects.append(s)
     return sentence_objects
Example #2
0
 def _create_sentence_objects(self):
     '''Returns a list of Sentence objects from the raw text.
     '''
     sentence_objects = []
     sentences = sent_tokenize(self.raw)
     char_index = 0  # Keeps track of character index within the blob
     for sent in sentences:
         # Compute the start and end indices of the sentence
         # within the blob
         start_index = self.raw.index(sent, char_index)
         char_index += len(sent)
         end_index = start_index + len(sent)
         # Sentences share the same models as their parent blob
         s = Sentence(sent, start_index=start_index, end_index=end_index,
             tokenizer=self.tokenizer, np_extractor=self.np_extractor,
             pos_tagger=self.pos_tagger, analyzer=self.analyzer,
             parser=self.parser, classifier=self.classifier)
         sentence_objects.append(s)
     return sentence_objects
Example #3
0
 def test_sent_tokenize(self):
     tokens = sent_tokenize(self.text)
     assert_true(is_generator(tokens))  # It's a generator
     assert_equal(list(tokens), self.tokenizer.tokenize(self.text))
Example #4
0
 def test_sent_tokenize(self):
     tokens = sent_tokenize(self.text)
     assert_true(is_generator(tokens))  # It's a generator
     assert_equal(list(tokens), self.tokenizer.tokenize(self.text))