def test_reconcile_TIMEX_SET(self): s = Timex2XmlDocument('<root>This is some annotated text.</root>') t = Timex(type='set') t.value = "P6M" t.mod = "BEFORE" s.reconcile([[('This', 'POS', set()), ('is', 'POS', set()), ('some', 'POS', set([t])), ('annotated', 'POS', set([t])), ('text', 'POS', {t}), ('.', 'POS', set())]]) self.assertEquals(str(s), xml.dom.minidom.parseString('<root>This is <TIMEX2 PERIODICITY="F6M" MOD="BEFORE" SET="YES">some annotated text</TIMEX2>.</root>').toxml())
def test_reconcile_TIMEX_SET(self): s = Timex2XmlDocument('<root>This is some annotated text.</root>') t = Timex(type='set') t.value = "P6M" t.mod = "BEFORE" s.reconcile([[('This', 'POS', set()), ('is', 'POS', set()), ('some', 'POS', set([t])), ('annotated', 'POS', set([t])), ('text', 'POS', {t}), ('.', 'POS', set())]]) self.assertEqual(str(s), xml.dom.minidom.parseString('<root>This is <TIMEX2 PERIODICITY="F6M" MOD="BEFORE" SET="YES">some annotated text</TIMEX2>.</root>').toxml())
def test_reconcile_DCT_sents_DATE(self): d = TernDocument('<DOC><DOCNO>ABC123</DOCNO><DATE>20100801</DATE><BODY><TEXT>This is some annotated text. This is a second sentence.</TEXT></BODY></DOC>') s = d.get_dct_sents() t = Timex() t.value = 'ABCDEF' s[0][0][2].add(t) d.reconcile_dct(s) self.assertEquals(str(d), xml.dom.minidom.parseString('<DOC><DOCNO>ABC123</DOCNO><DATE><TIMEX2 VAL="ABCDEF">20100801</TIMEX2></DATE><BODY><TEXT>This is some annotated text. This is a second sentence.</TEXT></BODY></DOC>').toxml())
def test_reconcile_TIMEX_embedded_start_end_consec(self): s = _XmlDocument('<root>This is <p>some</p> <p>annotated</p> <p>text</p>. This is <b>a second timex.</b></root>') t1 = Timex() t2 = Timex() s.reconcile([[('This', 'POS', set()), ('is', 'POS', set()), ('some', 'POS', set()), ('annotated', 'POS', {t1}), ('text', 'POS', {t1}), ('.', 'POS', set())], [('This', 'POS', set()), ('is', 'POS', set()), ('a', 'POS', {t2}), ('second', 'POS', {t2}), ('timex.', 'POS', {t2})]], add_S='s') self.assertEquals(str(s), xml.dom.minidom.parseString('<root><s>This is <p>some</p> <TIMEX><p>annotated</p> <p>text</p></TIMEX>.</s> <s>This is <TIMEX><b>a second timex.</b></TIMEX></s></root>').toxml())
def test_reconcile_TIMEX_consecutive_timex(self): s = _XmlDocument('<root>This is some annotated text and a second annotation.</root>') t1 = Timex() t2 = Timex() s.reconcile([[('This', 'POS', set()), ('is', 'POS', set()), ('some', 'POS', {t1}), ('annotated', 'POS', {t1}), ('text', 'POS', {t1}), ('and', 'POS', {t2}), ('a', 'POS', {t2}), ('second', 'POS', {t2}), ('annotation', 'POS', {t2}), ('.', 'POS', set())]]) self.assertEquals(str(s), xml.dom.minidom.parseString('<root>This is <TIMEX>some annotated text</TIMEX> <TIMEX>and a second annotation</TIMEX>.</root>').toxml())
def test_extents(self): t1 = Timex(id=1) t2 = Timex(id=2) sents = [[('The', 'DT', set()), ('first', 'JJ', {t1}), ('sentence', 'NN', set()), ('.', '.', set())], [('The', 'DT', set()), ('second', 'JJ', {t2}), ('sentence', 'NN', {t2}), ('.', '.', set())]] d = TempEval2Document.create(sents, 'ABC1') with open(self.filepath('timex-extents.tab')) as fd: self.assertEqual(sorted(d.get_extents().splitlines()), sorted(fd.read().splitlines()))
def test_reconcile_TIMEX_S_start(self): s = _XmlDocument('<root>This is some annotated text. This is a second timex.</root>') t1 = Timex() t2 = Timex() s.reconcile([[('This', 'POS', {t1}), ('is', 'POS', {t1}), ('some', 'POS', set()), ('annotated', 'POS', set()), ('text', 'POS', set()), ('.', 'POS', set())], [('This', 'POS', set()), ('is', 'POS', set()), ('a', 'POS', {t2}), ('second', 'POS', {t2}), ('timex.', 'POS', {t2})]], add_S='s') self.assertEquals(str(s), xml.dom.minidom.parseString('<root><s><TIMEX>This is</TIMEX> some annotated text.</s> <s>This is <TIMEX>a second timex.</TIMEX></s></root>').toxml())
def test_reconcile_TIMEX(self): s = Timex2XmlDocument('<root>This is some annotated text.</root>') t = Timex(type='date') t.value = "20100710" t.mod = "BEFORE" t.freq = "1M" t.comment = "Test" s.reconcile([[('This', 'POS', set()), ('is', 'POS', set()), ('some', 'POS', {t}), ('annotated', 'POS', {t}), ('text', 'POS', {t}), ('.', 'POS', set())]]) self.assertEquals(str(s), xml.dom.minidom.parseString('<root>This is <TIMEX2 VAL="20100710" MOD="BEFORE" COMMENT="Test" GRANUALITY="G1M">some annotated text</TIMEX2>.</root>').toxml())
def test_reconcile_TIMEX_embedded_nonconsuming(self): s = _XmlDocument('<root>This is some <p>annotated</p> text. This is <b>a second timex.</b></root>') t1 = Timex() t2 = Timex() t3 = Timex() t3.non_consuming = True s.reconcile([[('This', 'POS', {t3}), ('is', 'POS', set()), ('some', 'POS', {t1}), ('annotated', 'POS', {t1}), ('text', 'POS', {t1}), ('.', 'POS', set())], [('This', 'POS', set()), ('is', 'POS', set()), ('a', 'POS', {t2}), ('second', 'POS', {t2}), ('timex.', 'POS', {t2})]], add_S='s') self.assertEquals(str(s), xml.dom.minidom.parseString('<root><s><TIMEX />This is <TIMEX>some <p>annotated</p> text</TIMEX>.</s> <s>This is <TIMEX><b>a second timex.</b></TIMEX></s></root>').toxml())
def apply(self, sent): """ Applies this rule to the tokenised sentence. The 'after' ordering must be checked by the caller to ensure correct rule application. sent is a list of tuples (token, POS, [timexes]) A tuple is returned where the first element is a list in the same form as sent, with additional timexes added to the 3rd element if need be, and the second element in the tuple is whether or not this rule matched anything """ senttext = self._toks_to_str(sent) if self._deliminate_numbers: senttext = self._do_deliminate_numbers(senttext) success = False # Ensure the sentence-level guards are satisfied if not self._check_guards(senttext, self._guards): return sent, success # Now see if this rule actually matches anything for match in self._match.finditer(senttext): # Now check before guards if not self._check_guards(senttext[:match.start()], self._before_guards): continue # and after guards if not self._check_guards(senttext[match.end():], self._after_guards): continue # okay, first we need to find which tokens we matched, can do this # by using our token markers ti = senttext.count('<', 0, match.start()) tj = senttext.count('<', 0, match.end()) if not self._squelch: t = Timex( self._type) # only create a new timex if not squelching if self._DEBUG: t.comment = self.id else: t = None # Add TIMEX self._set_timex_extents(t, sent, ti, tj, self._squelch) success = True return sent, success
def apply(self, sent): """ Applies this rule to the tokenised sentence. The 'after' ordering must be checked by the caller to ensure correct rule application. sent is a list of tuples (token, POS, [timexes]) A tuple is returned where the first element is a list in the same form as sent, with additional timexes added to the 3rd element if need be, and the second element in the tuple is whether or not this rule matched anything """ senttext = self._toks_to_str(sent) if self._deliminate_numbers: senttext = self._do_deliminate_numbers(senttext) success = False # Ensure the sentence-level guards are satisfied if not self._check_guards(senttext, self._guards): return sent, success # Now see if this rule actually matches anything for match in self._match.finditer(senttext): # Now check before guards if not self._check_guards(senttext[:match.start()], self._before_guards): continue # and after guards if not self._check_guards(senttext[match.end():], self._after_guards): continue # okay, first we need to find which tokens we matched, can do this # by using our token markers ti = senttext.count('<', 0, match.start()) tj = senttext.count('<', 0, match.end()) if not self._squelch: t = Timex(self._type) # only create a new timex if not squelching if self._DEBUG: t.comment = self.id else: t = None # Add TIMEX self._set_timex_extents(t, sent, ti, tj, self._squelch) success = True return sent, success
def _timex_from_node(self, node): """ Given a TIMEX2 node, create a timex object with the values of that node """ t = Timex() if node.hasAttribute('SET'): if node.getAttribute('SET').lower() == 'yes': t.type = 'set' if node.hasAttribute('PERIODICITY'): t.value = 'P' + node.getAttribute('PERIODICITY')[1:] if node.hasAttribute('VAL'): t.value = node.getAttribute('VAL') if node.hasAttribute('MOD'): t.mod = node.getAttribute('MOD') if node.hasAttribute('GRANUALITY'): t.freq = node.getAttribute('GRANUALITY')[1:] if node.hasAttribute('COMMENT'): t.comment = node.getAttribute('COMMENT') return t
def test_parse_strip_and_reconcile(self): s = _XmlDocument('<root>This is some <TIMEX>annotated</TIMEX> text.</root>') s.strip_timexes() t1 = Timex() s.reconcile([[('This', 'POS', set()), ('is', 'POS', set()), ('some', 'POS', set()), ('annotated', 'POS', {t1}), ('text', 'POS', set()), ('.', 'POS', set())]]) self.assertEquals(str(s), xml.dom.minidom.parseString('<root>This is some <TIMEX>annotated</TIMEX> text.</root>').toxml())
def test_reconcile_sents(self): d = GateDocument("""This POS B 20101010 is POS I a POS I sentence POS I . . I And POS B a POS I second POS I sentence POS I . POS I Outside POS O""") t = Timex(id=1) d.reconcile([[('This', 'POS', set()), ('is', 'POS', set()), ('a', 'POS', set([t])), ('sentence', 'POS', set([t])), ('.', '.', set())], [('And', 'POS', set()), ('a', 'POS', set()), ('second', 'POS', set()), ('sentence', 'POS', set()), ('.', 'POS', set()), ], [('Outside', 'POS', set())]]) self.assertEqual(str(d), """This is a id=t1 sentence t1 . And a second sentence . Outside """)
def test_assign_IDs(self): # Get some sample IDs ts = set([Timex(), Timex(), Timex()]) add_timex_ids(ts) # Get the assigned IDs tids = set() for t in ts: tids.add(t.id) # Should be exactly 3 unique IDs self.assertEquals(len(tids), 3) # Should be consecutive self.assertTrue(1 in tids) self.assertTrue(2 in tids) self.assertTrue(3 in tids)
def testNoApply(self): rule = NormalisationRule(r'<(\d+)~.+><th~.+><February~.+><(\d{4})~.+>', 'date', 'testNoApply', r'{#2} + "01" + {#1}') t = Timex(type='date') self.assertFalse( rule.apply(t, '', '', [('06', 'POS', set([t])), ('th', 'POS', set([t])), ('January', 'POS', set([t])), ('1996', 'POS', set([t]))], [], [])[0]) self.assertEquals(t.value, None)
def testApplyCorrectType(self): rule = NormalisationRule(r'<(\d+)~.+><th~.+><January~.+><(\d{4})~.+>', 'date', 'testApplyCorrectType', r'{#2} + "01" + {#1}') t = Timex(type='time') self.assertFalse( rule.apply(t, '', '', [('06', 'POS', set([t])), ('th', 'POS', set([t])), ('January', 'POS', set([t])), ('1996', 'POS', set([t]))], [], [])[0])
def test_reconcile_TIMEX(self): s = _XmlDocument('<root>This is some annotated text.</root>') t = Timex() s.reconcile([[('This', 'POS', set()), ('is', 'POS', set()), ('some', 'POS', {t}), ('annotated', 'POS', {t}), ('text', 'POS', {t}), ('.', 'POS', set())]]) self.assertEqual( str(s), xml.dom.minidom.parseString( '<root>This is <TIMEX>some annotated text</TIMEX>.</root>'). toxml())
def testApplyInsensitive(self): rule = NormalisationRule(r'<(\d+)~.+><th~.+><january~.+><(\d{4})~.+>', 'date', 'testApplyInsensitive', r'{#2} + "01" + {#1}') t = Timex(type='date') self.assertTrue( rule.apply(t, '', '', [('06', 'POS', set([t])), ('th', 'POS', set([t])), ('January', 'POS', set([t])), ('1996', 'POS', set([t]))], [], [])[0]) self.assertEquals(t.value, '19960106')
def testApplyFreq(self): rule = NormalisationRule(r'<(\d+)~.+><th~.+><January~.+><(\d{4})~.+>', 'date', 'testApplyFreq', freq=r'"1D"') t = Timex(type='date') self.assertTrue( rule.apply(t, '', '', [('06', 'POS', set([t])), ('th', 'POS', set([t])), ('January', 'POS', set([t])), ('1996', 'POS', set([t]))], [], [])[0]) self.assertEquals(t.freq, '1D')
def testApplyQuant(self): rule = NormalisationRule(r'<(\d+)~.+><th~.+><January~.+><(\d{4})~.+>', 'date', 'testApplyQuant', quant=r'"EVERY"') t = Timex(type='date') self.assertTrue( rule.apply(t, '', '', [('06', 'POS', set([t])), ('th', 'POS', set([t])), ('January', 'POS', set([t])), ('1996', 'POS', set([t]))], [], [])[0]) self.assertEquals(t.quant, 'EVERY')
def testApplyChangeType(self): rule = NormalisationRule(r'<(\d+)~.+><th~.+><January~.+><(\d{4})~.+>', 'date', 'testApplyChangeType', change_type=r'"non-date"') t = Timex(type='date') self.assertTrue( rule.apply(t, '', '', [('06', 'POS', set([t])), ('th', 'POS', set([t])), ('January', 'POS', set([t])), ('1996', 'POS', set([t]))], [], [])[0]) self.assertEquals(t.type, 'non-date')
def testApplyAll(self): rules = [ NormalisationRule(r'<(\d+)~.+><th~.+><January~.+><(\d{4})~.+>', 'date', 'testApplyAll1', r'{#2} + "01" + {#1}'), NormalisationRule(r'<(\d+)~.+><th~.+><January~.+><(\d{4})~.+>', 'date', 'testApplyAll2', r'{#2} + "02" + {#1}') ] b = NormalisationRuleBlock(None, [], 'all', rules) t = Timex(type='date') self.assertTrue( b.apply(t, '', '', [('06', 'POS', {t}), ('th', 'POS', {t}), ('January', 'POS', {t}), ('1996', 'POS', {t})], [], [])[0]) self.assertEquals(t.value, '19960206')
def test_reconcile_TIMEX(self): s = Timex2XmlDocument('<root>This is some annotated text.</root>') t = Timex(type='date') t.value = "20100710" t.mod = "BEFORE" t.freq = "1M" t.comment = "Test" s.reconcile([[('This', 'POS', set()), ('is', 'POS', set()), ('some', 'POS', {t}), ('annotated', 'POS', {t}), ('text', 'POS', {t}), ('.', 'POS', set())]]) self.assertEqual(str(s), xml.dom.minidom.parseString('<root>This is <TIMEX2 VAL="20100710" MOD="BEFORE" COMMENT="Test" GRANUALITY="G1M">some annotated text</TIMEX2>.</root>').toxml())
def testApplyUntilSuccess2(self): rules = [ NormalisationRule(r'<(\d+)~.+><th~.+><February~.+><(\d{4})~.+>', 'date', 'testApplyUntilSuccess2A', r'{#2} + "02" + {#1}'), NormalisationRule(r'<(\d+)~.+><th~.+><January~.+><(\d{4})~.+>', 'date', 'testApplyUntilSuccess2B', r'{#2} + "01" + {#1}') ] b = NormalisationRuleBlock(None, [], 'until-success', rules) t = Timex(type='date') self.assertTrue( b.apply(t, '', '', [('06', 'POS', {t}), ('th', 'POS', {t}), ('January', 'POS', {t}), ('1996', 'POS', {t})], [], [])[0]) self.assertEquals(t.value, '19960106')
def testTag(self): e = NormalisationRuleEngine() e.load_rules(os.path.join(os.path.dirname(__file__), 'test_normalisation_rules')) t = Timex(type='date') e.annotate([[('We', 'POS', set()), ('took', 'POS', set()), ('a', 'POS', set()), ('plane', 'POS', set()), ('on', 'POS', set()), ('the', 'POS', set()), ('06', 'POS', {t}), ('th', 'POS', {t}), ('January', 'POS', {t}), ('1996', 'POS', {t}), ('to', 'POS', set()), ('Atlanta', 'POS', set())]], '') self.assertEqual(t.value, '19960106')
def testNegAfterBlocks(self): rule = NormalisationRule(r'<(\d+)~.+><th~.+><January~.+><(\d{4})~.+>', 'date', 'testNegAfterBlocks', r'{#2} + "01" + {#1}', after_guards=[r'!<to~.+><Atlanta~.+>']) t = Timex(type='date') (before, body, after) = ([('We', 'POS', set()), ('took', 'POS', set()), ('a', 'POS', set()), ('plane', 'POS', set()), ('on', 'POS', set()), ('the', 'POS', set())], [ ('06', 'POS', set()), ('th', 'POS', set()), ('January', 'POS', set()), ('1996', 'POS', set()) ], [('to', 'POS', set()), ('Atlanta', 'POS', set())]) self.assertFalse(rule.apply(t, '', '', body, before, after)[0])
def testPosGuardAllows(self): rule = NormalisationRule(r'<(\d+)~.+><th~.+><January~.+><(\d{4})~.+>', 'date', 'testPosGuardAllows', r'{#2} + "01" + {#1}', guards=[r'<th~.+><January~.+>']) t = Timex(type='date') (before, body, after) = ([('We', 'POS', set()), ('took', 'POS', set()), ('a', 'POS', set()), ('plane', 'POS', set()), ('on', 'POS', set()), ('the', 'POS', set())], [ ('06', 'POS', set()), ('th', 'POS', set()), ('January', 'POS', set()), ('1996', 'POS', set()) ], [('to', 'POS', set()), ('Atlanta', 'POS', set())]) self.assertTrue(rule.apply(t, '', '', body, before, after)[0]) self.assertEquals(t.value, '19960106')
def test_assign_IDs_consecutive(self): # Get some sample IDs ts = set([Timex(), Timex(), Timex()]) at = Timex() at.id = 2 ts.add(at) add_timex_ids(ts) # Get the assigned IDs tids = set() for t in ts: tids.add(t.id) # Should be exactly 4 unique IDs and pre-assigned one hasn't changed self.assertEquals(len(tids), 4) self.assertEquals(2, at.id) # Should be consecutive for new ones self.assertTrue(1 in tids) self.assertTrue(2 in tids) self.assertTrue(3 in tids) self.assertTrue(4 in tids)
def test_attr(self): t1 = Timex(id=1, type='date') t2 = Timex(id=2) t3 = Timex(id=3) t1.value = "20100710" t1.mod = "BEFORE" t1.freq = "1M" t1.comment = "Test" t1.granuality = "1D" t1.non_specific = True t1.quant = 'EVERY' t1.temporal_function = True t1.document_role = 'MODIFICATION_TIME' t1.begin_timex = t1 t1.end_timex = t2 t1.context = t3 sents = [[('The', 'DT', set()), ('first', 'JJ', {t1}), ('sentence', 'NN', set()), ('.', '.', set())], [('The', 'DT', set()), ('second', 'JJ', {t2}), ('sentence', 'NN', {t2}), ('.', '.', {t3})]] d = TempEval2Document.create(sents, 'ABC1') with open(self.filepath('timex-attr.tab')) as fd: self.assertEqual(sorted(d.get_attrs().splitlines()), sorted(fd.read().splitlines()))
def test_reconcile_sents_attrs(self): t1 = Timex(id=1, type='date') t2 = Timex(id=2) t3 = Timex(id=3) t1.value = "20100710" t1.mod = "BEFORE" t1.freq = "1M" t1.comment = "Test" t1.granuality = "1D" t1.non_specific = True t1.quant = 'EVERY' t1.temporal_function = True t1.document_role = 'MODIFICATION_TIME' t1.begin_timex = t1 t1.end_timex = t2 t1.context = t3 d = GateDocument("""This POS B 20101010 is POS I a POS I sentence POS I . . I And POS B a POS I second POS I sentence POS I . POS I Outside POS O""") d.reconcile([[('This', 'POS', set()), ('is', 'POS', set()), ('a', 'POS', set([t1])), ('sentence', 'POS', set([t1])), ('.', '.', set())], [('And', 'POS', set()), ('a', 'POS', set()), ('second', 'POS', set()), ('sentence', 'POS', set()), ('.', 'POS', set()), ], [('Outside', 'POS', set())]]) self.assertEqual(str(d), """This is a id=t1,value=20100710,type=DATE,mod=BEFORE,freq=1M,quant=EVERY,temporalFunction=true,functionInDocument=MODIFICATION_TIME,beginPoint=t1,endPoint=t2,anchorTimeID=t3 sentence t1 . And a second sentence . Outside """)
def _timex_from_node(self, node): return Timex()
def test_reconcile_TIMEX(self): s = Timex3XmlDocument('<root>This is some annotated text.</root>') t = Timex(type='date') t1 = Timex(id=1) t2 = Timex(id=2) t3 = Timex(id=3) t.value = "20100710" t.id = 6 t.mod = "BEFORE" t.freq = "1M" t.comment = "Test" t.quant = 'EVERY' t.temporal_function = True t.document_role = 'MODIFICATION_TIME' t.begin_timex = t1 t.end_timex = t2 t.context = t3 s.reconcile([[('This', 'POS', set()), ('is', 'POS', set()), ('some', 'POS', {t}), ('annotated', 'POS', {t}), ('text', 'POS', {t}), ('.', 'POS', set())]]) self.assertEqual( str(s), xml.dom.minidom.parseString( '<root>This is <TIMEX3 tid="t6" beginPoint="t1" endPoint="t2" anchorTimeID="t3" functionInDocument="MODIFICATION_TIME" temporalFunction="true" type="DATE" value="20100710" mod="BEFORE" freq="1M" comment="Test" quant="EVERY">some annotated text</TIMEX3>.</root>' ).toxml())
def _timex_from_node(self, node): """ Given a node representing a TIMEX3 element, return a timex object representing it """ t = Timex() if node.hasAttribute('tid'): t.id = int(node.getAttribute('tid')[1:]) if node.hasAttribute('value'): t.value = node.getAttribute('value') if node.hasAttribute('mod'): t.mod = node.getAttribute('mod') if node.hasAttribute('type'): t.type = node.getAttribute('type') if node.hasAttribute('freq'): t.freq = node.getAttribute('freq') if node.hasAttribute('quant'): t.quant = node.getAttribute('quant') if node.hasAttribute('comment'): t.comment = node.getAttribute('comment') if node.getAttribute('temporalFunction'): t.temporal_function = True if node.hasAttribute('functionInDocument'): t.document_role = node.getAttribute('functionInDocument') if node.hasAttribute('beginPoint'): t.begin_timex = int(node.getAttribute('beginPoint')[1:]) if node.hasAttribute('endPoint'): t.end_timex = int(node.getAttribute('endPoint')[1:]) if node.hasAttribute('anchorTimeID'): t.context = int(node.getAttribute('anchorTimeID')[1:]) return t
def test_reconcile_TIMEX(self): s = Timex3XmlDocument("<root>This is some annotated text.</root>") t = Timex(type="date") t1 = Timex(id=1) t2 = Timex(id=2) t3 = Timex(id=3) t.value = "20100710" t.id = 6 t.mod = "BEFORE" t.freq = "1M" t.comment = "Test" t.quant = "EVERY" t.temporal_function = True t.document_role = "MODIFICATION_TIME" t.begin_timex = t1 t.end_timex = t2 t.context = t3 s.reconcile( [ [ ("This", "POS", set()), ("is", "POS", set()), ("some", "POS", {t}), ("annotated", "POS", {t}), ("text", "POS", {t}), (".", "POS", set()), ] ] ) self.assertEquals( str(s), xml.dom.minidom.parseString( '<root>This is <TIMEX3 tid="t6" beginPoint="t1" endPoint="t2" anchorTimeID="t3" functionInDocument="MODIFICATION_TIME" temporalFunction="true" type="DATE" value="20100710" mod="BEFORE" freq="1M" comment="Test" quant="EVERY">some annotated text</TIMEX3>.</root>' ).toxml(), )
def test_reconcile_sents_attrs(self): t1 = Timex(id=1, type='date') t2 = Timex(id=2) t3 = Timex(id=3) t1.value = "20100710" t1.mod = "BEFORE" t1.freq = "1M" t1.comment = "Test" t1.granuality = "1D" t1.non_specific = True t1.quant = 'EVERY' t1.temporal_function = True t1.document_role = 'MODIFICATION_TIME' t1.begin_timex = t1 t1.end_timex = t2 t1.context = t3 d = GateDocument("""This POS B 20101010 is POS I a POS I sentence POS I . . I And POS B a POS I second POS I sentence POS I . POS I Outside POS O""") d.reconcile([[('This', 'POS', set()), ('is', 'POS', set()), ('a', 'POS', set([t1])), ('sentence', 'POS', set([t1])), ('.', '.', set())], [('And', 'POS', set()), ('a', 'POS', set()), ('second', 'POS', set()), ('sentence', 'POS', set()), ('.', 'POS', set()), ], [('Outside', 'POS', set())]]) self.assertEquals(str(d), """This is a id=t1,value=20100710,type=DATE,mod=BEFORE,freq=1M,quant=EVERY,temporalFunction=true,functionInDocument=MODIFICATION_TIME,beginPoint=t1,endPoint=t2,anchorTimeID=t3 sentence t1 . And a second sentence . Outside """)
def test_attr(self): t1 = Timex(id=1, type='date') t2 = Timex(id=2) t3 = Timex(id=3) t1.value = "20100710" t1.mod = "BEFORE" t1.freq = "1M" t1.comment = "Test" t1.granuality = "1D" t1.non_specific = True t1.quant = 'EVERY' t1.temporal_function = True t1.document_role = 'MODIFICATION_TIME' t1.begin_timex = t1 t1.end_timex = t2 t1.context = t3 sents = [[('The', 'DT', set()), ('first', 'JJ', {t1}), ('sentence', 'NN', set()), ('.', '.', set())], [('The', 'DT', set()), ('second', 'JJ', {t2}), ('sentence', 'NN', {t2}), ('.', '.', {t3})]] d = TempEval2Document.create(sents, 'ABC1') with open(self.filepath('timex-attr.tab')) as fd: self.assertEquals(sorted(d.get_attrs().splitlines()), sorted(fd.read().splitlines()))