예제 #1
0
 def test_extents(self):
     t1 = ternip.timex(id=1)
     t2 = ternip.timex(id=2)
     sents = [[('The', 'DT', set()), ('first', 'JJ', set([t1])), ('sentence', 'NN', set()), ('.', '.', set())], [('The', 'DT', set()), ('second', 'JJ', set([t2])), ('sentence', 'NN', set([t2])), ('.', '.', set())]]
     d = ternip.formats.tempeval2.create(sents, 'ABC1')
     with open(os.path.normpath('tests/formats/timex-extents.tab')) as fd:
         self.assertEquals(sorted(d.get_extents().splitlines()), sorted(fd.read().splitlines()))
예제 #2
0
    def test_reconcile_sents_attrs(self):
        t1 = ternip.timex(id=1, type='date')
        t2 = ternip.timex(id=2)
        t3 = ternip.timex(id=3)
        t1.value = "20100710"
        t1.mod = "BEFORE"
        t1.freq = "1M"
        t1.comment = "Test"
        t1.granuality = "1D"
        t1.non_specific = True
        t1.quant = 'EVERY'
        t1.temporal_function = True
        t1.document_role = 'MODIFICATION_TIME'
        t1.begin_timex = t1
        t1.end_timex = t2
        t1.context = t3
        d = ternip.formats.gate("""This	POS	B	20101010
is	POS	I
a	POS	I
sentence	POS	I
.	.	I
And	POS	B
a	POS	I
second	POS	I
sentence	POS	I
.	POS	I
Outside	POS	O""")
        d.reconcile([[('This', 'POS', set()), ('is', 'POS', set()),
                      ('a', 'POS', set([t1])), ('sentence', 'POS', set([t1])),
                      ('.', '.', set())],
                     [
                         ('And', 'POS', set()),
                         ('a', 'POS', set()),
                         ('second', 'POS', set()),
                         ('sentence', 'POS', set()),
                         ('.', 'POS', set()),
                     ], [('Outside', 'POS', set())]])
        self.assertEquals(
            str(d), """This		
is		
a	id=t1,value=20100710,type=DATE,mod=BEFORE,freq=1M,quant=EVERY,temporalFunction=true,functionInDocument=MODIFICATION_TIME,beginPoint=t1,endPoint=t2,anchorTimeID=t3	
sentence		t1
.		
And		
a		
second		
sentence		
.		
Outside		
""")
예제 #3
0
 def testApplyAll(self):
     rules = [normalisation_rule(r'<(\d+)~.+><th~.+><January~.+><(\d{4})~.+>', 'date', 'testApplyAll1', r'{#2} + "01" + {#1}'),
              normalisation_rule(r'<(\d+)~.+><th~.+><January~.+><(\d{4})~.+>', 'date', 'testApplyAll2', r'{#2} + "02" + {#1}')]
     b = normalisation_rule_block(None, [], 'all', rules)
     t = timex(type='date')
     self.assertTrue(b.apply(t, '', '', [('06', 'POS', set([t])), ('th', 'POS', set([t])), ('January', 'POS', set([t])), ('1996', 'POS', set([t]))], [], [])[0])
     self.assertEquals(t.value, '19960206')
예제 #4
0
파일: timex2.py 프로젝트: brucehorn/ternip
    def _timex_from_node(self, node):
        """
        Given a TIMEX2 node, create a timex object with the values of that node
        """
        t = ternip.timex()

        if node.hasAttribute('SET'):
            if node.getAttribute('SET').lower() == 'yes':
                t.type = 'set'
                if node.hasAttribute('PERIODICITY'):
                    t.value = 'P' + node.getAttribute('PERIODICITY')[1:]

        if node.hasAttribute('VAL'):
            t.value = node.getAttribute('VAL')

        if node.hasAttribute('MOD'):
            t.mod = node.getAttribute('MOD')

        if node.hasAttribute('GRANUALITY'):
            t.freq = node.getAttribute('GRANUALITY')[1:]

        if node.hasAttribute('COMMENT'):
            t.comment = node.getAttribute('COMMENT')

        return t
예제 #5
0
파일: timex2.py 프로젝트: brucehorn/ternip
 def _timex_from_node(self, node):
     """
     Given a TIMEX2 node, create a timex object with the values of that node
     """
     t = ternip.timex()
     
     if node.hasAttribute('SET'):
         if node.getAttribute('SET').lower() == 'yes':
             t.type = 'set'
             if node.hasAttribute('PERIODICITY'):
                 t.value = 'P' + node.getAttribute('PERIODICITY')[1:]
     
     if node.hasAttribute('VAL'):
         t.value = node.getAttribute('VAL')
     
     if node.hasAttribute('MOD'):
         t.mod = node.getAttribute('MOD')
     
     if node.hasAttribute('GRANUALITY'):
         t.freq = node.getAttribute('GRANUALITY')[1:]
     
     if node.hasAttribute('COMMENT'):
         t.comment = node.getAttribute('COMMENT')
     
     return t
예제 #6
0
파일: gate.py 프로젝트: brucehorn/ternip
    def test_reconcile_sents(self):
        d = ternip.formats.gate("""This	POS	B	20101010
is	POS	I
a	POS	I
sentence	POS	I
.	.	I
And	POS	B
a	POS	I
second	POS	I
sentence	POS	I
.	POS	I
Outside	POS	O""")
        t = ternip.timex(id=1)
        d.reconcile([[('This', 'POS', set()), ('is', 'POS', set()), ('a', 'POS', set([t])), ('sentence', 'POS', set([t])), ('.', '.', set())], [('And', 'POS', set()), ('a', 'POS', set()), ('second', 'POS', set()), ('sentence', 'POS', set()), ('.', 'POS', set()), ], [('Outside', 'POS', set())]])
        self.assertEquals(str(d), """This		
is		
a	id=t1	
sentence		t1
.		
And		
a		
second		
sentence		
.		
Outside		
""")
예제 #7
0
파일: timex2.py 프로젝트: brucehorn/ternip
 def test_reconcile_TIMEX_SET(self):
     s = ternip.formats.timex2('<root>This is some annotated text.</root>')
     t = ternip.timex(type='set')
     t.value = "P6M"
     t.mod = "BEFORE"
     s.reconcile([[('This', 'POS', set()), ('is', 'POS', set()), ('some', 'POS', set([t])), ('annotated', 'POS', set([t])), ('text', 'POS', set([t])), ('.', 'POS', set())]])
     self.assertEquals(str(s), xml.dom.minidom.parseString('<root>This is <TIMEX2 PERIODICITY="F6M" MOD="BEFORE" SET="YES">some annotated text</TIMEX2>.</root>').toxml())
예제 #8
0
 def test_assign_IDs(self):
     # Get some sample IDs
     ts = set([ternip.timex(), ternip.timex(), ternip.timex()])
     ternip.add_timex_ids(ts)
     
     # Get the assigned IDs
     tids = set()
     for t in ts:
         tids.add(t.id)
     
     # Should be exactly 3 unique IDs
     self.assertEquals(len(tids), 3)
     
     # Should be consecutive
     self.assertTrue(1 in tids)
     self.assertTrue(2 in tids)
     self.assertTrue(3 in tids)
예제 #9
0
파일: timex2.py 프로젝트: brucehorn/ternip
 def test_reconcile_TIMEX(self):
     s = ternip.formats.timex2('<root>This is some annotated text.</root>')
     t = ternip.timex(type='date')
     t.value = "20100710"
     t.mod = "BEFORE"
     t.freq = "1M"
     t.comment = "Test"
     s.reconcile([[('This', 'POS', set()), ('is', 'POS', set()), ('some', 'POS', set([t])), ('annotated', 'POS', set([t])), ('text', 'POS', set([t])), ('.', 'POS', set())]])
     self.assertEquals(str(s), xml.dom.minidom.parseString('<root>This is <TIMEX2 VAL="20100710" MOD="BEFORE" COMMENT="Test" GRANUALITY="G1M">some annotated text</TIMEX2>.</root>').toxml())
예제 #10
0
 def test_attr(self):
     t1 = ternip.timex(id=1, type='date')
     t2 = ternip.timex(id=2)
     t3 = ternip.timex(id=3)
     t1.value = "20100710"
     t1.mod = "BEFORE"
     t1.freq = "1M"
     t1.comment = "Test"
     t1.granuality = "1D"
     t1.non_specific = True
     t1.quant = 'EVERY'
     t1.temporal_function = True
     t1.document_role = 'MODIFICATION_TIME'
     t1.begin_timex = t1
     t1.end_timex = t2
     t1.context = t3
     sents = [[('The', 'DT', set()), ('first', 'JJ', set([t1])), ('sentence', 'NN', set()), ('.', '.', set())], [('The', 'DT', set()), ('second', 'JJ', set([t2])), ('sentence', 'NN', set([t2])), ('.', '.', set([t3]))]]
     d = ternip.formats.tempeval2.create(sents, 'ABC1')
     with open(os.path.normpath('tests/formats/timex-attr.tab')) as fd:
         self.assertEquals(sorted(d.get_attrs().splitlines()), sorted(fd.read().splitlines()))
예제 #11
0
파일: gate.py 프로젝트: brucehorn/ternip
    def test_reconcile_sents_attrs(self):
        t1 = ternip.timex(id=1, type='date')
        t2 = ternip.timex(id=2)
        t3 = ternip.timex(id=3)
        t1.value = "20100710"
        t1.mod = "BEFORE"
        t1.freq = "1M"
        t1.comment = "Test"
        t1.granuality = "1D"
        t1.non_specific = True
        t1.quant = 'EVERY'
        t1.temporal_function = True
        t1.document_role = 'MODIFICATION_TIME'
        t1.begin_timex = t1
        t1.end_timex = t2
        t1.context = t3
        d = ternip.formats.gate("""This	POS	B	20101010
is	POS	I
a	POS	I
sentence	POS	I
.	.	I
And	POS	B
a	POS	I
second	POS	I
sentence	POS	I
.	POS	I
Outside	POS	O""")
        d.reconcile([[('This', 'POS', set()), ('is', 'POS', set()), ('a', 'POS', set([t1])), ('sentence', 'POS', set([t1])), ('.', '.', set())], [('And', 'POS', set()), ('a', 'POS', set()), ('second', 'POS', set()), ('sentence', 'POS', set()), ('.', 'POS', set()), ], [('Outside', 'POS', set())]])
        self.assertEquals(str(d), """This		
is		
a	id=t1,value=20100710,type=DATE,mod=BEFORE,freq=1M,quant=EVERY,temporalFunction=true,functionInDocument=MODIFICATION_TIME,beginPoint=t1,endPoint=t2,anchorTimeID=t3	
sentence		t1
.		
And		
a		
second		
sentence		
.		
Outside		
""")
예제 #12
0
 def test_assign_IDs_consecutive(self):
     # Get some sample IDs
     ts = set([ternip.timex(), ternip.timex(), ternip.timex()])
     at = ternip.timex()
     at.id = 2
     ts.add(at)
     ternip.add_timex_ids(ts)
     
     # Get the assigned IDs
     tids = set()
     for t in ts:
         tids.add(t.id)
     
     # Should be exactly 4 unique IDs and pre-assigned one hasn't changed
     self.assertEquals(len(tids), 4)
     self.assertEquals(2, at.id)
     
     # Should be consecutive for new ones
     self.assertTrue(1 in tids)
     self.assertTrue(2 in tids)
     self.assertTrue(3 in tids)
     self.assertTrue(4 in tids)
예제 #13
0
파일: timex2.py 프로젝트: brucehorn/ternip
 def test_reconcile_TIMEX_SET(self):
     s = ternip.formats.timex2('<root>This is some annotated text.</root>')
     t = ternip.timex(type='set')
     t.value = "P6M"
     t.mod = "BEFORE"
     s.reconcile([[('This', 'POS', set()), ('is', 'POS', set()),
                   ('some', 'POS', set([t])),
                   ('annotated', 'POS', set([t])),
                   ('text', 'POS', set([t])), ('.', 'POS', set())]])
     self.assertEquals(
         str(s),
         xml.dom.minidom.parseString(
             '<root>This is <TIMEX2 PERIODICITY="F6M" MOD="BEFORE" SET="YES">some annotated text</TIMEX2>.</root>'
         ).toxml())
예제 #14
0
파일: timex2.py 프로젝트: brucehorn/ternip
 def test_reconcile_TIMEX(self):
     s = ternip.formats.timex2('<root>This is some annotated text.</root>')
     t = ternip.timex(type='date')
     t.value = "20100710"
     t.mod = "BEFORE"
     t.freq = "1M"
     t.comment = "Test"
     s.reconcile([[('This', 'POS', set()), ('is', 'POS', set()),
                   ('some', 'POS', set([t])),
                   ('annotated', 'POS', set([t])),
                   ('text', 'POS', set([t])), ('.', 'POS', set())]])
     self.assertEquals(
         str(s),
         xml.dom.minidom.parseString(
             '<root>This is <TIMEX2 VAL="20100710" MOD="BEFORE" COMMENT="Test" GRANUALITY="G1M">some annotated text</TIMEX2>.</root>'
         ).toxml())
예제 #15
0
 def testTag(self):
     e = ternip.rule_engine.normalisation_rule_engine()
     e.load_rules('tests/rule_engine/test_normalisation_rules/')
     t = ternip.timex(type='date')
     e.annotate([[('We', 'POS', set()),
          ('took', 'POS', set()),
          ('a', 'POS', set()),
          ('plane', 'POS', set()),
          ('on', 'POS', set()),
          ('the', 'POS', set()),
          ('06', 'POS', set([t])),
          ('th', 'POS', set([t])),
          ('January', 'POS', set([t])),
          ('1996', 'POS', set([t])),
          ('to', 'POS', set()),
          ('Atlanta', 'POS', set())]], '')
     self.assertEquals(t.value, '19960106')
예제 #16
0
    def _timex_from_node(self, node):
        """
        Given a node representing a TIMEX3 element, return a timex object
        representing it
        """
        t = ternip.timex()

        if node.hasAttribute('tid'):
            t.id = int(node.getAttribute('tid')[1:])

        if node.hasAttribute('value'):
            t.value = node.getAttribute('value')

        if node.hasAttribute('mod'):
            t.mod = node.getAttribute('mod')

        if node.hasAttribute('type'):
            t.type = node.getAttribute('type')

        if node.hasAttribute('freq'):
            t.freq = node.getAttribute('freq')

        if node.hasAttribute('quant'):
            t.quant = node.getAttribute('quant')

        if node.hasAttribute('comment'):
            t.comment = node.getAttribute('comment')

        if node.getAttribute('temporalFunction'):
            t.temporal_function = True

        if node.hasAttribute('functionInDocument'):
            t.document_role = node.getAttribute('functionInDocument')

        if node.hasAttribute('beginPoint'):
            t.begin_timex = int(node.getAttribute('beginPoint')[1:])

        if node.hasAttribute('endPoint'):
            t.end_timex = int(node.getAttribute('endPoint')[1:])

        if node.hasAttribute('anchorTimeID'):
            t.context = int(node.getAttribute('anchorTimeID')[1:])

        return t
예제 #17
0
파일: timex3.py 프로젝트: brucehorn/ternip
 def _timex_from_node(self, node):
     """
     Given a node representing a TIMEX3 element, return a timex object
     representing it
     """
     t = ternip.timex()
     
     if node.hasAttribute('tid'):
         t.id = int(node.getAttribute('tid')[1:])
     
     if node.hasAttribute('value'):
         t.value = node.getAttribute('value')
     
     if node.hasAttribute('mod'):
         t.mod = node.getAttribute('mod')
     
     if node.hasAttribute('type'):
         t.type = node.getAttribute('type')
     
     if node.hasAttribute('freq'):
         t.freq = node.getAttribute('freq')
     
     if node.hasAttribute('quant'):
         t.quant = node.getAttribute('quant')
     
     if node.hasAttribute('comment'):
         t.comment = node.getAttribute('comment')
     
     if node.getAttribute('temporalFunction'):
         t.temporal_function = True
     
     if node.hasAttribute('functionInDocument'):
         t.document_role = node.getAttribute('functionInDocument')
     
     if node.hasAttribute('beginPoint'):
         t.begin_timex = int(node.getAttribute('beginPoint')[1:])
     
     if node.hasAttribute('endPoint'):
         t.end_timex = int(node.getAttribute('endPoint')[1:])
     
     if node.hasAttribute('anchorTimeID'):
         t.context = int(node.getAttribute('anchorTimeID')[1:])
     
     return t
예제 #18
0
 def testPosGuardBlocks(self):
     rule = normalisation_rule(r'<(\d+)~.+><th~.+><January~.+><(\d{4})~.+>', 'date', 'testPosGuardBlocks', r'{#2} + "01" + {#1}',
                               guards = [r'<th~.+><February~.+>'])
     t = timex(type='date')
     (before, body, after) = (
         [('We', 'POS', set()),
          ('took', 'POS', set()),
          ('a', 'POS', set()),
          ('plane', 'POS', set()),
          ('on', 'POS', set()),
          ('the', 'POS', set())],
         [('06', 'POS', set()),
          ('th', 'POS', set()),
          ('January', 'POS', set()),
          ('1996', 'POS', set())],
         [('to', 'POS', set()),
          ('Atlanta', 'POS', set())]
     )
     self.assertFalse(rule.apply(t, '', '', body, before, after)[0])
예제 #19
0
 def testNegAfterAllows(self):
     rule = normalisation_rule(r'<(\d+)~.+><th~.+><January~.+><(\d{4})~.+>', 'date', 'testNegAfterAllows', r'{#2} + "01" + {#1}',
                               after_guards = [r'!<a~.+><plane~.+>'])
     t = timex(type='date')
     (before, body, after) = (
         [('We', 'POS', set()),
          ('took', 'POS', set()),
          ('a', 'POS', set()),
          ('plane', 'POS', set()),
          ('on', 'POS', set()),
          ('the', 'POS', set())],
         [('06', 'POS', set()),
          ('th', 'POS', set()),
          ('January', 'POS', set()),
          ('1996', 'POS', set())],
         [('to', 'POS', set()),
          ('Atlanta', 'POS', set())]
     )
     self.assertTrue(rule.apply(t, '', '', body, before, after)[0])
     self.assertEquals(t.value, '19960106')
예제 #20
0
    def test_reconcile_sents(self):
        d = ternip.formats.gate("""This	POS	B	20101010
is	POS	I
a	POS	I
sentence	POS	I
.	.	I
And	POS	B
a	POS	I
second	POS	I
sentence	POS	I
.	POS	I
Outside	POS	O""")
        t = ternip.timex(id=1)
        d.reconcile([[('This', 'POS', set()), ('is', 'POS', set()),
                      ('a', 'POS', set([t])), ('sentence', 'POS', set([t])),
                      ('.', '.', set())],
                     [
                         ('And', 'POS', set()),
                         ('a', 'POS', set()),
                         ('second', 'POS', set()),
                         ('sentence', 'POS', set()),
                         ('.', 'POS', set()),
                     ], [('Outside', 'POS', set())]])
        self.assertEquals(
            str(d), """This		
is		
a	id=t1	
sentence		t1
.		
And		
a		
second		
sentence		
.		
Outside		
""")
예제 #21
0
파일: warn.py 프로젝트: jasonzou/entex
 def test_warn(self):
     # Do something that generates a warning
     t = timex()
     r = normalisation_rule('test', value='non_existent_function()')
     r.apply(t, '', '', [('test', 'POS', set([t]))], [], [])
     self.assertEquals(1, self.w.num)
예제 #22
0
 def testApplyFreq(self):
     rule = normalisation_rule(r'<(\d+)~.+><th~.+><January~.+><(\d{4})~.+>', 'date', 'testApplyFreq', freq=r'"1D"')
     t = timex(type='date')
     self.assertTrue(rule.apply(t, '', '', [('06', 'POS', set([t])), ('th', 'POS', set([t])), ('January', 'POS', set([t])), ('1996', 'POS', set([t]))], [], [])[0])
     self.assertEquals(t.freq, '1D')
예제 #23
0
 def testApplyValue(self):
     rule = normalisation_rule(r'<(\d+)~.+><th~.+><January~.+><(\d{4})~.+>', 'date', 'testApplyValue', r'{#2} + "01" + {#1}')
     t = timex(type='date')
     self.assertTrue(rule.apply(t, '', '', [('06', 'POS', set([t])), ('th', 'POS', set([t])), ('January', 'POS', set([t])), ('1996', 'POS', set([t]))], [], [])[0])
     self.assertEquals(t.value, '19960106')
예제 #24
0
 def testApplyQuant(self):
     rule = normalisation_rule(r'<(\d+)~.+><th~.+><January~.+><(\d{4})~.+>', 'date', 'testApplyQuant', quant=r'"EVERY"')
     t = timex(type='date')
     self.assertTrue(rule.apply(t, '', '', [('06', 'POS', set([t])), ('th', 'POS', set([t])), ('January', 'POS', set([t])), ('1996', 'POS', set([t]))], [], [])[0])
     self.assertEquals(t.quant, 'EVERY')
예제 #25
0
 def testApplyCorrectType(self):
     rule = normalisation_rule(r'<(\d+)~.+><th~.+><January~.+><(\d{4})~.+>', 'date', 'testApplyCorrectType', r'{#2} + "01" + {#1}')
     t = timex(type='time')
     self.assertFalse(rule.apply(t, '', '', [('06', 'POS', set([t])), ('th', 'POS', set([t])), ('January', 'POS', set([t])), ('1996', 'POS', set([t]))], [], [])[0])
예제 #26
0
 def testApplyChangeType(self):
     rule = normalisation_rule(r'<(\d+)~.+><th~.+><January~.+><(\d{4})~.+>', 'date', 'testApplyChangeType', change_type=r'"non-date"')
     t = timex(type='date')
     self.assertTrue(rule.apply(t, '', '', [('06', 'POS', set([t])), ('th', 'POS', set([t])), ('January', 'POS', set([t])), ('1996', 'POS', set([t]))], [], [])[0])
     self.assertEquals(t.type, 'non-date')