예제 #1
0
 def test_get_sents(self):
     with open(self.filepath('base-segmentation-single.tab')) as fd:
         d = TempEval2Document(fd.read(), 'ABC1')
     self.assertEqual('ABC1', d.docid)
     self.assertEqual([[('The', 'DT', set()), ('first', 'JJ', set()), ('sentence', 'NN', set()), ('.', '.', set())],
                        [('The', 'DT', set()), ('second', 'JJ', set()), ('sentence', 'NN', set()), ('.', '.', set())]],
         d.get_sents())
예제 #2
0
 def test_extents(self):
     t1 = Timex(id=1)
     t2 = Timex(id=2)
     sents = [[('The', 'DT', set()), ('first', 'JJ', {t1}), ('sentence', 'NN', set()), ('.', '.', set())],
              [('The', 'DT', set()), ('second', 'JJ', {t2}), ('sentence', 'NN', {t2}), ('.', '.', set())]]
     d = TempEval2Document.create(sents, 'ABC1')
     with open(self.filepath('timex-extents.tab')) as fd:
         self.assertEquals(sorted(d.get_extents().splitlines()), sorted(fd.read().splitlines()))
예제 #3
0
 def test_extents(self):
     t1 = Timex(id=1)
     t2 = Timex(id=2)
     sents = [[('The', 'DT', set()), ('first', 'JJ', {t1}), ('sentence', 'NN', set()), ('.', '.', set())],
              [('The', 'DT', set()), ('second', 'JJ', {t2}), ('sentence', 'NN', {t2}), ('.', '.', set())]]
     d = TempEval2Document.create(sents, 'ABC1')
     with open(self.filepath('timex-extents.tab')) as fd:
         self.assertEqual(sorted(d.get_extents().splitlines()), sorted(fd.read().splitlines()))
예제 #4
0
 def test_attr(self):
     t1 = Timex(id=1, type='date')
     t2 = Timex(id=2)
     t3 = Timex(id=3)
     t1.value = "20100710"
     t1.mod = "BEFORE"
     t1.freq = "1M"
     t1.comment = "Test"
     t1.granuality = "1D"
     t1.non_specific = True
     t1.quant = 'EVERY'
     t1.temporal_function = True
     t1.document_role = 'MODIFICATION_TIME'
     t1.begin_timex = t1
     t1.end_timex = t2
     t1.context = t3
     sents = [[('The', 'DT', set()), ('first', 'JJ', {t1}), ('sentence', 'NN', set()), ('.', '.', set())],
              [('The', 'DT', set()), ('second', 'JJ', {t2}), ('sentence', 'NN', {t2}), ('.', '.', {t3})]]
     d = TempEval2Document.create(sents, 'ABC1')
     with open(self.filepath('timex-attr.tab')) as fd:
         self.assertEquals(sorted(d.get_attrs().splitlines()), sorted(fd.read().splitlines()))
예제 #5
0
 def test_attr(self):
     t1 = Timex(id=1, type='date')
     t2 = Timex(id=2)
     t3 = Timex(id=3)
     t1.value = "20100710"
     t1.mod = "BEFORE"
     t1.freq = "1M"
     t1.comment = "Test"
     t1.granuality = "1D"
     t1.non_specific = True
     t1.quant = 'EVERY'
     t1.temporal_function = True
     t1.document_role = 'MODIFICATION_TIME'
     t1.begin_timex = t1
     t1.end_timex = t2
     t1.context = t3
     sents = [[('The', 'DT', set()), ('first', 'JJ', {t1}), ('sentence', 'NN', set()), ('.', '.', set())],
              [('The', 'DT', set()), ('second', 'JJ', {t2}), ('sentence', 'NN', {t2}), ('.', '.', {t3})]]
     d = TempEval2Document.create(sents, 'ABC1')
     with open(self.filepath('timex-attr.tab')) as fd:
         self.assertEqual(sorted(d.get_attrs().splitlines()), sorted(fd.read().splitlines()))
예제 #6
0
 def test_load_multi(self):
     with open(self.filepath('base-segmentation-multi.tab')) as fd:
         ds = TempEval2Document.load_multi(fd.read(), '')
     self.assertEquals(2, len(ds))
     self.assertTrue('ABC1' in [d.docid for d in ds])
     self.assertTrue('ABC2' in [d.docid for d in ds])
예제 #7
0
print "TERNIP TempEval-2 evaluator"
print

# Load TERNIP
recogniser = ternip.recogniser()
print "TERNIP loaded", recogniser.num_rules, "recognition rules"
normaliser = ternip.normaliser()
print "TERNIP loaded", normaliser.num_rules, "normalisation rules"
print
print "Loading data..."

# Load testing data
data_path = os.path.normpath('../sample_data/tempeval-training-2/english/data/')
with open(os.path.join(data_path, 'base-segmentation.tab')) as fd:
    with open(os.path.join(data_path, 'dct.txt')) as dct_fd:
        docs = TempEval2Document.load_multi(fd.read(), dct_fd.read())

temp = tempfile.mkdtemp()

ternip_extents = open(os.path.join(temp, 'ternip-extents.tab'), 'w')
ternip_attrs = open(os.path.join(temp, 'ternip-attrs.tab'), 'w')

start = time.clock()

print

for doc in docs:
    
    print "Annotating", doc.docid
    
    # Annotate
예제 #8
0
 def test_load_multi(self):
     with open(self.filepath('base-segmentation-multi.tab')) as fd:
         ds = TempEval2Document.load_multi(fd.read(), '')
     self.assertEqual(2, len(ds))
     self.assertTrue('ABC1' in [d.docid for d in ds])
     self.assertTrue('ABC2' in [d.docid for d in ds])