def test_extents(self): t1 = ternip.timex(id=1) t2 = ternip.timex(id=2) sents = [[('The', 'DT', set()), ('first', 'JJ', set([t1])), ('sentence', 'NN', set()), ('.', '.', set())], [('The', 'DT', set()), ('second', 'JJ', set([t2])), ('sentence', 'NN', set([t2])), ('.', '.', set())]] d = ternip.formats.tempeval2.create(sents, 'ABC1') with open(os.path.normpath('tests/formats/timex-extents.tab')) as fd: self.assertEquals(sorted(d.get_extents().splitlines()), sorted(fd.read().splitlines()))
def test_reconcile_sents_attrs(self): t1 = ternip.timex(id=1, type='date') t2 = ternip.timex(id=2) t3 = ternip.timex(id=3) t1.value = "20100710" t1.mod = "BEFORE" t1.freq = "1M" t1.comment = "Test" t1.granuality = "1D" t1.non_specific = True t1.quant = 'EVERY' t1.temporal_function = True t1.document_role = 'MODIFICATION_TIME' t1.begin_timex = t1 t1.end_timex = t2 t1.context = t3 d = ternip.formats.gate("""This POS B 20101010 is POS I a POS I sentence POS I . . I And POS B a POS I second POS I sentence POS I . POS I Outside POS O""") d.reconcile([[('This', 'POS', set()), ('is', 'POS', set()), ('a', 'POS', set([t1])), ('sentence', 'POS', set([t1])), ('.', '.', set())], [ ('And', 'POS', set()), ('a', 'POS', set()), ('second', 'POS', set()), ('sentence', 'POS', set()), ('.', 'POS', set()), ], [('Outside', 'POS', set())]]) self.assertEquals( str(d), """This is a id=t1,value=20100710,type=DATE,mod=BEFORE,freq=1M,quant=EVERY,temporalFunction=true,functionInDocument=MODIFICATION_TIME,beginPoint=t1,endPoint=t2,anchorTimeID=t3 sentence t1 . And a second sentence . Outside """)
def testApplyAll(self): rules = [normalisation_rule(r'<(\d+)~.+><th~.+><January~.+><(\d{4})~.+>', 'date', 'testApplyAll1', r'{#2} + "01" + {#1}'), normalisation_rule(r'<(\d+)~.+><th~.+><January~.+><(\d{4})~.+>', 'date', 'testApplyAll2', r'{#2} + "02" + {#1}')] b = normalisation_rule_block(None, [], 'all', rules) t = timex(type='date') self.assertTrue(b.apply(t, '', '', [('06', 'POS', set([t])), ('th', 'POS', set([t])), ('January', 'POS', set([t])), ('1996', 'POS', set([t]))], [], [])[0]) self.assertEquals(t.value, '19960206')
def _timex_from_node(self, node): """ Given a TIMEX2 node, create a timex object with the values of that node """ t = ternip.timex() if node.hasAttribute('SET'): if node.getAttribute('SET').lower() == 'yes': t.type = 'set' if node.hasAttribute('PERIODICITY'): t.value = 'P' + node.getAttribute('PERIODICITY')[1:] if node.hasAttribute('VAL'): t.value = node.getAttribute('VAL') if node.hasAttribute('MOD'): t.mod = node.getAttribute('MOD') if node.hasAttribute('GRANUALITY'): t.freq = node.getAttribute('GRANUALITY')[1:] if node.hasAttribute('COMMENT'): t.comment = node.getAttribute('COMMENT') return t
def test_reconcile_sents(self): d = ternip.formats.gate("""This POS B 20101010 is POS I a POS I sentence POS I . . I And POS B a POS I second POS I sentence POS I . POS I Outside POS O""") t = ternip.timex(id=1) d.reconcile([[('This', 'POS', set()), ('is', 'POS', set()), ('a', 'POS', set([t])), ('sentence', 'POS', set([t])), ('.', '.', set())], [('And', 'POS', set()), ('a', 'POS', set()), ('second', 'POS', set()), ('sentence', 'POS', set()), ('.', 'POS', set()), ], [('Outside', 'POS', set())]]) self.assertEquals(str(d), """This is a id=t1 sentence t1 . And a second sentence . Outside """)
def test_reconcile_TIMEX_SET(self): s = ternip.formats.timex2('<root>This is some annotated text.</root>') t = ternip.timex(type='set') t.value = "P6M" t.mod = "BEFORE" s.reconcile([[('This', 'POS', set()), ('is', 'POS', set()), ('some', 'POS', set([t])), ('annotated', 'POS', set([t])), ('text', 'POS', set([t])), ('.', 'POS', set())]]) self.assertEquals(str(s), xml.dom.minidom.parseString('<root>This is <TIMEX2 PERIODICITY="F6M" MOD="BEFORE" SET="YES">some annotated text</TIMEX2>.</root>').toxml())
def test_assign_IDs(self): # Get some sample IDs ts = set([ternip.timex(), ternip.timex(), ternip.timex()]) ternip.add_timex_ids(ts) # Get the assigned IDs tids = set() for t in ts: tids.add(t.id) # Should be exactly 3 unique IDs self.assertEquals(len(tids), 3) # Should be consecutive self.assertTrue(1 in tids) self.assertTrue(2 in tids) self.assertTrue(3 in tids)
def test_reconcile_TIMEX(self): s = ternip.formats.timex2('<root>This is some annotated text.</root>') t = ternip.timex(type='date') t.value = "20100710" t.mod = "BEFORE" t.freq = "1M" t.comment = "Test" s.reconcile([[('This', 'POS', set()), ('is', 'POS', set()), ('some', 'POS', set([t])), ('annotated', 'POS', set([t])), ('text', 'POS', set([t])), ('.', 'POS', set())]]) self.assertEquals(str(s), xml.dom.minidom.parseString('<root>This is <TIMEX2 VAL="20100710" MOD="BEFORE" COMMENT="Test" GRANUALITY="G1M">some annotated text</TIMEX2>.</root>').toxml())
def test_attr(self): t1 = ternip.timex(id=1, type='date') t2 = ternip.timex(id=2) t3 = ternip.timex(id=3) t1.value = "20100710" t1.mod = "BEFORE" t1.freq = "1M" t1.comment = "Test" t1.granuality = "1D" t1.non_specific = True t1.quant = 'EVERY' t1.temporal_function = True t1.document_role = 'MODIFICATION_TIME' t1.begin_timex = t1 t1.end_timex = t2 t1.context = t3 sents = [[('The', 'DT', set()), ('first', 'JJ', set([t1])), ('sentence', 'NN', set()), ('.', '.', set())], [('The', 'DT', set()), ('second', 'JJ', set([t2])), ('sentence', 'NN', set([t2])), ('.', '.', set([t3]))]] d = ternip.formats.tempeval2.create(sents, 'ABC1') with open(os.path.normpath('tests/formats/timex-attr.tab')) as fd: self.assertEquals(sorted(d.get_attrs().splitlines()), sorted(fd.read().splitlines()))
def test_reconcile_sents_attrs(self): t1 = ternip.timex(id=1, type='date') t2 = ternip.timex(id=2) t3 = ternip.timex(id=3) t1.value = "20100710" t1.mod = "BEFORE" t1.freq = "1M" t1.comment = "Test" t1.granuality = "1D" t1.non_specific = True t1.quant = 'EVERY' t1.temporal_function = True t1.document_role = 'MODIFICATION_TIME' t1.begin_timex = t1 t1.end_timex = t2 t1.context = t3 d = ternip.formats.gate("""This POS B 20101010 is POS I a POS I sentence POS I . . I And POS B a POS I second POS I sentence POS I . POS I Outside POS O""") d.reconcile([[('This', 'POS', set()), ('is', 'POS', set()), ('a', 'POS', set([t1])), ('sentence', 'POS', set([t1])), ('.', '.', set())], [('And', 'POS', set()), ('a', 'POS', set()), ('second', 'POS', set()), ('sentence', 'POS', set()), ('.', 'POS', set()), ], [('Outside', 'POS', set())]]) self.assertEquals(str(d), """This is a id=t1,value=20100710,type=DATE,mod=BEFORE,freq=1M,quant=EVERY,temporalFunction=true,functionInDocument=MODIFICATION_TIME,beginPoint=t1,endPoint=t2,anchorTimeID=t3 sentence t1 . And a second sentence . Outside """)
def test_assign_IDs_consecutive(self): # Get some sample IDs ts = set([ternip.timex(), ternip.timex(), ternip.timex()]) at = ternip.timex() at.id = 2 ts.add(at) ternip.add_timex_ids(ts) # Get the assigned IDs tids = set() for t in ts: tids.add(t.id) # Should be exactly 4 unique IDs and pre-assigned one hasn't changed self.assertEquals(len(tids), 4) self.assertEquals(2, at.id) # Should be consecutive for new ones self.assertTrue(1 in tids) self.assertTrue(2 in tids) self.assertTrue(3 in tids) self.assertTrue(4 in tids)
def test_reconcile_TIMEX_SET(self): s = ternip.formats.timex2('<root>This is some annotated text.</root>') t = ternip.timex(type='set') t.value = "P6M" t.mod = "BEFORE" s.reconcile([[('This', 'POS', set()), ('is', 'POS', set()), ('some', 'POS', set([t])), ('annotated', 'POS', set([t])), ('text', 'POS', set([t])), ('.', 'POS', set())]]) self.assertEquals( str(s), xml.dom.minidom.parseString( '<root>This is <TIMEX2 PERIODICITY="F6M" MOD="BEFORE" SET="YES">some annotated text</TIMEX2>.</root>' ).toxml())
def test_reconcile_TIMEX(self): s = ternip.formats.timex2('<root>This is some annotated text.</root>') t = ternip.timex(type='date') t.value = "20100710" t.mod = "BEFORE" t.freq = "1M" t.comment = "Test" s.reconcile([[('This', 'POS', set()), ('is', 'POS', set()), ('some', 'POS', set([t])), ('annotated', 'POS', set([t])), ('text', 'POS', set([t])), ('.', 'POS', set())]]) self.assertEquals( str(s), xml.dom.minidom.parseString( '<root>This is <TIMEX2 VAL="20100710" MOD="BEFORE" COMMENT="Test" GRANUALITY="G1M">some annotated text</TIMEX2>.</root>' ).toxml())
def testTag(self): e = ternip.rule_engine.normalisation_rule_engine() e.load_rules('tests/rule_engine/test_normalisation_rules/') t = ternip.timex(type='date') e.annotate([[('We', 'POS', set()), ('took', 'POS', set()), ('a', 'POS', set()), ('plane', 'POS', set()), ('on', 'POS', set()), ('the', 'POS', set()), ('06', 'POS', set([t])), ('th', 'POS', set([t])), ('January', 'POS', set([t])), ('1996', 'POS', set([t])), ('to', 'POS', set()), ('Atlanta', 'POS', set())]], '') self.assertEquals(t.value, '19960106')
def _timex_from_node(self, node): """ Given a node representing a TIMEX3 element, return a timex object representing it """ t = ternip.timex() if node.hasAttribute('tid'): t.id = int(node.getAttribute('tid')[1:]) if node.hasAttribute('value'): t.value = node.getAttribute('value') if node.hasAttribute('mod'): t.mod = node.getAttribute('mod') if node.hasAttribute('type'): t.type = node.getAttribute('type') if node.hasAttribute('freq'): t.freq = node.getAttribute('freq') if node.hasAttribute('quant'): t.quant = node.getAttribute('quant') if node.hasAttribute('comment'): t.comment = node.getAttribute('comment') if node.getAttribute('temporalFunction'): t.temporal_function = True if node.hasAttribute('functionInDocument'): t.document_role = node.getAttribute('functionInDocument') if node.hasAttribute('beginPoint'): t.begin_timex = int(node.getAttribute('beginPoint')[1:]) if node.hasAttribute('endPoint'): t.end_timex = int(node.getAttribute('endPoint')[1:]) if node.hasAttribute('anchorTimeID'): t.context = int(node.getAttribute('anchorTimeID')[1:]) return t
def testPosGuardBlocks(self): rule = normalisation_rule(r'<(\d+)~.+><th~.+><January~.+><(\d{4})~.+>', 'date', 'testPosGuardBlocks', r'{#2} + "01" + {#1}', guards = [r'<th~.+><February~.+>']) t = timex(type='date') (before, body, after) = ( [('We', 'POS', set()), ('took', 'POS', set()), ('a', 'POS', set()), ('plane', 'POS', set()), ('on', 'POS', set()), ('the', 'POS', set())], [('06', 'POS', set()), ('th', 'POS', set()), ('January', 'POS', set()), ('1996', 'POS', set())], [('to', 'POS', set()), ('Atlanta', 'POS', set())] ) self.assertFalse(rule.apply(t, '', '', body, before, after)[0])
def testNegAfterAllows(self): rule = normalisation_rule(r'<(\d+)~.+><th~.+><January~.+><(\d{4})~.+>', 'date', 'testNegAfterAllows', r'{#2} + "01" + {#1}', after_guards = [r'!<a~.+><plane~.+>']) t = timex(type='date') (before, body, after) = ( [('We', 'POS', set()), ('took', 'POS', set()), ('a', 'POS', set()), ('plane', 'POS', set()), ('on', 'POS', set()), ('the', 'POS', set())], [('06', 'POS', set()), ('th', 'POS', set()), ('January', 'POS', set()), ('1996', 'POS', set())], [('to', 'POS', set()), ('Atlanta', 'POS', set())] ) self.assertTrue(rule.apply(t, '', '', body, before, after)[0]) self.assertEquals(t.value, '19960106')
def test_reconcile_sents(self): d = ternip.formats.gate("""This POS B 20101010 is POS I a POS I sentence POS I . . I And POS B a POS I second POS I sentence POS I . POS I Outside POS O""") t = ternip.timex(id=1) d.reconcile([[('This', 'POS', set()), ('is', 'POS', set()), ('a', 'POS', set([t])), ('sentence', 'POS', set([t])), ('.', '.', set())], [ ('And', 'POS', set()), ('a', 'POS', set()), ('second', 'POS', set()), ('sentence', 'POS', set()), ('.', 'POS', set()), ], [('Outside', 'POS', set())]]) self.assertEquals( str(d), """This is a id=t1 sentence t1 . And a second sentence . Outside """)
def test_warn(self): # Do something that generates a warning t = timex() r = normalisation_rule('test', value='non_existent_function()') r.apply(t, '', '', [('test', 'POS', set([t]))], [], []) self.assertEquals(1, self.w.num)
def testApplyFreq(self): rule = normalisation_rule(r'<(\d+)~.+><th~.+><January~.+><(\d{4})~.+>', 'date', 'testApplyFreq', freq=r'"1D"') t = timex(type='date') self.assertTrue(rule.apply(t, '', '', [('06', 'POS', set([t])), ('th', 'POS', set([t])), ('January', 'POS', set([t])), ('1996', 'POS', set([t]))], [], [])[0]) self.assertEquals(t.freq, '1D')
def testApplyValue(self): rule = normalisation_rule(r'<(\d+)~.+><th~.+><January~.+><(\d{4})~.+>', 'date', 'testApplyValue', r'{#2} + "01" + {#1}') t = timex(type='date') self.assertTrue(rule.apply(t, '', '', [('06', 'POS', set([t])), ('th', 'POS', set([t])), ('January', 'POS', set([t])), ('1996', 'POS', set([t]))], [], [])[0]) self.assertEquals(t.value, '19960106')
def testApplyQuant(self): rule = normalisation_rule(r'<(\d+)~.+><th~.+><January~.+><(\d{4})~.+>', 'date', 'testApplyQuant', quant=r'"EVERY"') t = timex(type='date') self.assertTrue(rule.apply(t, '', '', [('06', 'POS', set([t])), ('th', 'POS', set([t])), ('January', 'POS', set([t])), ('1996', 'POS', set([t]))], [], [])[0]) self.assertEquals(t.quant, 'EVERY')
def testApplyCorrectType(self): rule = normalisation_rule(r'<(\d+)~.+><th~.+><January~.+><(\d{4})~.+>', 'date', 'testApplyCorrectType', r'{#2} + "01" + {#1}') t = timex(type='time') self.assertFalse(rule.apply(t, '', '', [('06', 'POS', set([t])), ('th', 'POS', set([t])), ('January', 'POS', set([t])), ('1996', 'POS', set([t]))], [], [])[0])
def testApplyChangeType(self): rule = normalisation_rule(r'<(\d+)~.+><th~.+><January~.+><(\d{4})~.+>', 'date', 'testApplyChangeType', change_type=r'"non-date"') t = timex(type='date') self.assertTrue(rule.apply(t, '', '', [('06', 'POS', set([t])), ('th', 'POS', set([t])), ('January', 'POS', set([t])), ('1996', 'POS', set([t]))], [], [])[0]) self.assertEquals(t.type, 'non-date')