forked from rlsummerscales/acres
/
annotation.py
executable file
·131 lines (111 loc) · 4.26 KB
/
annotation.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
#!/usr/bin/python
# author: Rodney Summerscales
# define classes for an annotation
import xmlutil
import xml.dom
from xml.dom import minidom
from xml.dom.minidom import Document
##############################################################
# stores an annotation
##############################################################
class Annotation:
""" maintain information related to a token annotation or label assigned
by a classifier """
type = '' # e.g. group, outcome number, etc
attributes = {} # e.g. id, role, etc
def __init__(self, type=''):
""" initialize a new annotation with default values
"""
self.attributes = {}
self.type = type
def parseXML(self, node=None):
""" load information from an xml node """
# parse xml element if given one
if node != None:
self.type = node.getAttribute('type').lower()
for childNode in node.childNodes:
if childNode.nodeType == xml.dom.Node.ELEMENT_NODE:
attribName = childNode.tagName
value = xmlutil.getText(childNode)
self.attributes[attribName] = value
def copy(self, annotation):
""" copy annotation information from a given annotation to this one """
self.type = annotation.type
for key, value in annotation.attributes.items():
self.attributes[key] = value
def getXML(self, doc, elementName):
""" return an xml node that contains label information """
node = doc.createElement(elementName)
node.setAttribute('type',self.type)
for attrib, value in self.attributes.items():
node.appendChild(xmlutil.createNodeWithTextChild(doc, attrib, value))
return node
def getAttributeValue(self, attrib):
""" return the value of the given attribute
or empty string if the annotation does not have such an attribute """
return self.attributes.get(attrib, '')
def setAttributeValue(self, attrib, value):
""" set a given attribute value for this annotation
attrib = name of attribute
value = value for the attribute
"""
self.attributes[attrib] = value
##############################################################
# manage a list of annotations
##############################################################
class AnnotationList:
""" maintain a list of Annotation objects """
__annotations = {} # actually a hash of annotations, keyed by name
__index = 0 # current index in list (used for iterator)
__annotationList = [] # used in iterator
def __init__(self, nodeList=[]):
""" create new annoation list given a list of xml element nodes
of type "annotation" """
self.__annotations = {}
self.__index = 0
self.__annotationList = []
# parse xml node if given one
for aNode in nodeList:
annotation = Annotation()
annotation.parseXML(aNode)
self.__annotations[annotation.type] = annotation
def contains(self, name):
""" return true if an annotation with given name is in list """
return name in self.__annotations
def get(self, name):
""" return a given annotation """
if name in self.__annotations:
return self.__annotations[name]
else:
return None
def add(self, annotation):
""" add a new annotation to the list.
annotation = Annotation object to add
"""
self.__annotations[annotation.type] = annotation
def remove(self, name):
""" remove an annotation with a given name """
if name in self.__annotations:
del self.__annotations[name]
if name in self.__annotations:
print "ERROR: unable to remove", name
def __contains__(self, name):
""" implement the 'in' operator. return true if given name is in list
of annotations. """
if name in self.__annotations:
return True
else:
return False
def __len__(self):
""" implement len() method """
return len(self.__annotations)
# routines needed for implementing the iterator
def __iter__(self):
self.__index = 0
self.__annotationList = self.__annotations.values()
return self
def next(self):
if self.__index == len(self.__annotationList):
raise StopIteration
self.__index += 1
return self.__annotationList[self.__index-1]