Beispiel #1
0
    def load(self,args):

        fh = None

        try:
            fh = args['filehandle']
        except:
            filename = args['filename']
            fh = open(filename, 'r')

        fh = codecs.getreader('utf8')(fh)

        nodes = []
        comment = ''

        for line in fh:

            if re.search('^#',line):
                comment = comment + line

            elif re.search('^\d+\-',line):  # HACK: multiword tokens temporarily avoided
                pass

            elif line.strip():

                if not nodes:
                    bundle = Bundle()
                    self.bundles.append(bundle)
                    root = Root() # TODO: nahradit bundle.create_tree, az bude odladene
                    root._aux['comment'] = comment # TODO: ulozit nekam poradne
                    nodes = [root]
                    bundle.trees.append(root)

                columns = line.strip().split('\t')

                node = Node()
                nodes.append(node)

                for index in xrange(0,len(Document.attrnames)):
                    setattr( node, Document.attrnames[index], columns[index] )

                try:  # TODO: kde se v tomhle sloupecku berou podtrzitka
                    node.head = int(node.head)
                except ValueError:
                    node.head = 0

                try:   # TODO: poresit multitokeny
                    node.ord = int(node.ord)
                except ValueError:
                    node.ord = 0


            else: # an empty line is guaranteed even after the last sentence in a conll-u file

                nodes[0]._aux['descendants'] = nodes[1:]

                for node in nodes[1:]:

                    node.set_parent( nodes[node.head] )

                nodes = []
                comment = ''
Beispiel #2
0
class LinkedList:

	def __init__(self,value = None):
		# if no value is passed in
		if(value == None):
			self._size = 0
			self._headNode = None
			self._tailNode = None
		else:
			self._headNode = Node(value,None)
			self._tailNode = self._headNode
			self._size = 1

	def get(self,index):
		self._verifyIndex(index)
		if(index == 0):
			return self._headNode.head()
		elif(index == self._size - 1):
			return self._tailNode.head()
		else:
			i = 0
			walk = self._headNode
			while(i < index):
				walk = walk.tail()
				i += 1
			return walk.head()

	def set(self,index,value):
		self._verifyIndex(index)
		i = 0
		walk = self._headNode
		while(i < index):
			walk = walk.tail()
			i += 1
		walk.setHead(value)

	def frontadd(self,value):
		self._headNode = Node(value,self._headNode)
		# if the headNode has tail None (is the last node)
		# then it IS tailNode
		if(self._headNode.tail() == None):
			self._tailNode = self._headNode
		self._size += 1

	def backadd(self,value):
		if(self._headNode == None):
			self.frontadd(value)
			return
		self._tailNode.setTail(Node(value,None))
		self._tailNode = self._tailNode.tail()
		self._size += 1

	def indexadd(self,index,value):
		if(index == 0):
			self.frontadd(value)
			return
		if(index == self._size):
			self.backadd(value)
			return
		if(index > self._size):
			raise IndexError
		i = 0
		walk = self._headNode
		while(i < index-1):
			walk = walk.tail()
			i += 1
		walk.setTail(Node(value,walk.tail()))
		self._size += 1

	def indexremove(self,index):
		if(index == 0):
			self.frontremove()
			return
		if(index == self._size - 1):
			rval = self.backremove()
			return rval
		self._verifyIndex(index)
		i = 0
		walk = self._headNode
		while(i < index-1):
			walk = walk.tail()
			i += 1
		rval = walk.tail().head()
		walk.setTail(walk.tail().tail())
		self._size -= 1
		return rval

	def frontremove(self):
		rval = self._headNode.head()
		self._headNode = self._headNode.tail()
		self._size -= 1
		return rval

	def backremove(self):
		rval = self._tailNode.head()
		walk = self._headNode

		while(walk.tail() != self._tailNode):
			walk = walk.tail()

		self._tailNode = walk
		self._tailNode.setTail(None)
		self._size -= 1

		return rval

	def extract(self):
		if(self._headNode == None):
			return []
		rlist = []
		walk = self._headNode
		while(walk.tail() != None):
			rlist.append(walk.head())
			walk = walk.tail()
		rlist.append(walk.head())

		return rlist

	def size(self):
		return self._size

	def _verifyIndex(self,index):
		if(index > self._size-1):
			raise IndexError("index out of bounds")
		else: return