Exemple #1
0
 def test_sent_split(self):
     reference = [('0', ['2012', '02', '15'], [('Syrian', 'JJ'),
                  ('rebels', 'NNS'), ('killed', 'VBD'), ('28', 'CD'),
                  ('soldiers', 'NNS'), ('in', 'IN'), ('attacks', 'NNS'),
                  ('on', 'IN'), ('three', 'CD'), ('army', 'NN'),
                  ('checkpoints', 'NNS'), ('on', 'IN'), ('the', 'DT'),
                  ('main', 'JJ'), ('road', 'NN'), ('from', 'IN'),
                  ('Damascus', 'NNP'), ('to', 'TO'), ('the', 'DT'),
                  ('embattled', 'JJ'), ('city', 'NN'), ('of', 'IN'),
                  ('Aleppo', 'NNP'), ('Thursday', 'NNP'), (',', ','),
                  ('a', 'DT'), ('watchdog', 'NN'), ('said', 'VBD'),
                  ('.', '.')]), ('1', ['2012', '02', '15'],
                 [('Gunmen', 'NNS'), (',', ','), ('numbering', 'VBG'),
                  ('about', 'IN'), ('20', 'CD'), (',', ','),
                  ('have', 'VBP'), ('attacked', 'VBN'), ('Kaboro', 'NNP'),
                  ('community', 'NN'), ('in', 'IN'), ('the', 'DT'),
                  ('Dansadau', 'NNP'), ('Emirate', 'NNP'), ('of', 'IN'),
                  ('Zamfara', 'NNP'), ('State', 'NNP'), (',', ','),
                  ('killing', 'VBG'), ('18', 'CD'), ('people', 'NNS'),
                  (',', ','), ('including', 'VBG'), ('the', 'DT'),
                  ('village', 'NN'), ('head', 'NN'), ('.', '.')]),
                  ('2', ['2012', '02', '16'], [('Rebels', 'NNS'),
                  ('killed', 'VBD'), ('28', 'CD'), ('soldiers', 'NNS'),
                  ('in', 'IN'), ('Syria', 'NNP'), ("'s", 'POS'),
                  ('northwestern', 'NN'), ('battlefields', 'NNS'),
                  ('Thursday', 'NNP'), (',', ','), ('a', 'DT'),
                  ('watchdog', 'NN'), ('said', 'VBD'), (',', ','),
                  ('as', 'IN'), ('the', 'DT'), ('regime', 'NN'),
                  ('launched', 'VBD'), ('new', 'JJ'), ('air', 'NN'),
                  ('strikes', 'NNS'), ('in', 'IN'), ('what', 'WP'),
                  ('is', 'VBZ'), ('seen', 'VBN'), ('as', 'IN'),
                  ('a', 'DT'), ('desperate', 'NN'), ('attempt', 'NN'),
                  ('to', 'TO'), ('reverse', 'VB'), ('opposition', 'NN'),
                  ('gains', 'NNS'), ('.', '.')]),
                  ('3', ['2012', '02', '17'], [('Six', 'CD'),
                  ('killed', 'VBD'), ('in', 'IN'), ('an', 'DT'),
                  ('attack', 'NN'), ('on', 'IN'), ('ANP', 'NNP'),
                  ('office', 'NN'), ('and', 'CC'), ('terrorism', 'NN'),
                  ('in', 'IN'), ('Karachi', 'NNP'), ('.', '.')])]
     text = open('test_files/test.txt', 'r').read()
     sents = text.split('\n')
     output = [pyTABARI.sent_split(line) for line in sents if line]
     self.assertEqual(reference, output)
Exemple #2
0
 def setUp(self):
     text = open('test_files/test.txt', 'r').read()
     sents = text.split('\n')
     sents = [pyTABARI.sent_split(line) for line in sents if line]
     self.tag_sentence = sents