Beispiel #1
0
				l.append(l[-1])
				k += 1
				j += 1
		else:
			l.append(chr(random.randint(ord('a'), ord('z'))))
		j += 1
	#
	# Now copy the list and test it
	#
	start_offset = random.randint(0, str_len - 3)
	end_offset = random.randint(start_offset + 1, str_len - 1)
	save_l = []
	save_l.extend(l)
	q = SBList(l)	
	q.sort(start=start_offset, end=end_offset)
	l_new = q.return_list()
	save_l2 = subsort(save_l, start_offset, end_offset)
	# The diff() function seems to remove the sort from
	# my saved list

	### TEMPORARY HACK TO TEST ERROR DETECTION:
	##save_l2[3] = 'zzTEST'
	difflist = list(difflib.ndiff(save_l2, l_new))
	if len(difflist) != len(q):
		print('================== verify failed')
		print("start idx " + str(start_offset) + ' end idx ' + str(end_offset))
		print('save:\n' + repr(save_l2))
		print('new:\n' + repr(l_new))
		raise Exception('Verify failed')
print('finished with no errors')
Beispiel #2
0
del q[4]
del q[0]
q.insert(2, 'after c')
q.insert(len(q), "last line")
del l[5]
del l[4]
del l[0]
l.insert(2, 'after c')
l.insert(len(q), "last line")
for j in range(len(l)):
	assert(q[j] == l[j])

revised = SBList(l)
assert(revised == q)

qlist = q.return_list()
assert(l == qlist)
print("delete last item, when last item is a single range  ")
q.delete(len(q) - 1)
print(repr(q))
print(q.show_state() + '\n')

######################## multiple delete:
#from SBList01 import *

l = ['a', 'b', 'c', 'd', 'e', 'f']
del l[1:3]
assert(l == ['a', 'd', 'e', 'f'])
assert(l != ['a junk list to be sure that the comparison works'])
del l[0:2]
assert(l == ['e', 'f'])
Beispiel #3
0
class VirtView():
	'''class VirtView()
	THIS SHOULD PROBABLY MOVE INSIDE SBSTRING OBJECT
	AND BE RENAMED.
 
	This object holds a list that contains abstracted
	pointers that allow for a logical view of the foreign
	text in an SBString object.  
	If edits are made to the foreign text, new text
	entries will be appened to the end of the foreign list but the pointers 
	here will be ordered so that the intended logical view
	of the foreign text is preserved.  

	Example: the foreign list contains the text "abcdefghij", then I 
	insert "zz" after "d", which is offset 4.  The new text ("zz")
	is appended to the end of the foreign list object
	but the entry in this object becomes:
	   [[0, 4, 0], [0, 2, 1], [4, 9, 0]]
	The entry for [0, 2, 1] means to grab bytes 0-1 (which
	is python slice s[0:2]) from the string that is stored
	at index 1 in the foreign list.
	
	*range* format: [start, end, list_offset_for_text]	
	where start and end* are python *slice()* numbers that
	point into the text that is referenced by the list offset.
	'''
	class StateEntry(object):
		start_pt = None
		end_pt = None
		list_idx = None
		length = None
		def __init__(self, start_pt, end_pt, list_idx):
			object.__init__(self)
			self.start_pt = start_pt
			self.end_pt = end_pt
			self.list_idx = list_idx 
			self.length = self.end_pt - self.start_pt
		def __repr__(self):
			return('[' + str(self.start_pt) + ', ' + str(self.end_pt) \
				+ ', '+ str(self.list_idx) + ', ' + str(self.length) + ']')

	class StateDeRef(object):
		'''class StateDeRef()
		This class will hold some values that will help to
		translate an entry in the state list to an entry
		in the main list.  

		The state list holds range entries with [start, end]
		indexes (in python slice() format) that refer to
		entries in the main list object.

		state_offset = the zero-based offset into the state
		               object.
		state_adj    = a virtual offset that would point to
		               a logical value in the range [start, end].
		               For example, if the state entry is [10, 20]
		               and state_adj = 3, then the reference is to
		               an entry in the main list at offset 13, which
		               is 10 + 3.
		list_idx     = an index into self.l that corresponds to
		               self.state[state_offset][0] + state_adj.
		'''

		state_offset = None
		state_adj = None
		str_ptr = None
		list_idx = None
		def __init__(self, s_offset, s_adj, str_ptr, l_idx):
			'''StateDeRef.__init()
			'''
			object.__init__(self)
			self.state_offset = s_offset
			self.state_adj = s_adj
			self.str_ptr = str_ptr
			self.list_idx = l_idx
		def __iter__ (self): 
			# Maybe add a flag that will lock the objects from
			# being altered when iter is active?
			self.iterindex = 4
			return(self)
		def __next__(self):
			self.iterindex -= 1
			if self.iterindex == 4:
				return(self.state_offset)
			elif self.iterindex == 3:
				return(self.state_adj)
			elif self.iterindex == 2:
				return(self.str_ptr)
			elif self.iterindex == 1:
				return(self.list_idx)
			else:
				raise StopIteration

		def __repr__(self):
			return([repr(self.state_offset) + ', ' + repr(self.state_adj) \
					+ repr(self.str_ptr) + repr(self.list_idx)])

	def __init__(self, state_entry):
		'''VirtView.__init__()
		'''
		self.state = SBList([state_entry])
		# use self.get_state_id()
		#self.state_id = 1 # NOT USED??

	def __iter__ (self): 
		# Maybe add a flag that will lock the objects from
		# being altered when iter is active?
		self.iterindex = len(self.state)
		return(self)

	def __getitem__(self, i):
		return(self.state[i])

	def __len__(self):
		'''VirtView.__len__()
		Returns the number of saved states.
		'''
		return(len(self.state))

	def __next__ (self): 
		'''VirtView.__next__
		Returns a range object, which contains
		a start offset, end offset, and an id for 
		a string stored elsewhere.  The start/end
		pair follows python slice() notation where
		end is one greater than the value that will
		be returned when used as list[start : end]
		'''
		# return an object of type xxx with each iteration.
		if self.iterindex == 0:
			raise StopIteration
		self.iterindex -= 1
		##(s_offset, s_adj, l_idx) = self._get_vl_idx(self.iterindex)
		##return(self.state[l_idx])
		return(self.state[len(self.state) - self.iterindex])

	def __repr__(self):
		return(repr(self.state))


	def _get_vl_idx(self, str_offset, state_id=-1):
		'''VirtView._get_vl_idx()
		Return pointers into state and a dereferenced
		pointer into the the foreign list for the given
		character offset.

		If the character offset is beyond the 
		virtual length of the underlying object, this
		returns [None, None, None, None], which might mean 
		to append a proposed insert at the end of the list.
		'''
		if state_id == -1: state_id = self.get_state_id()
		#assert(state_id < (len(self.state) ))
		str_len = self.get_llen(state_id=state_id)
		if str_offset >= str_len: #l_list_len:
			return([None, None, None, None])
		# temp_str_offset will be the starting offset into the string
		# in the range of pointers at this state offset.  If temp_str_offset
		# is less than the desired offset, read the next range of
		# pointers from this state and see if the span of that range
		# adds enough to temp_str_offset to contain the desired offset.
		temp_str_offset = 0	
		l_adjust = None
		#print('SBSTring. char offset=' + str(str_offset) +' state: ' + repr(self.state))
		for s_offset in range(len(self.state)):
			# For each s_offset in the current state (s_offset
			# is an index into character offset
			# ranges entered into the current state).
			#
			# 'highest_str_offset' is the highest character offset that would
			# be captured by the tuple in self.state[state_id][s_offset].
			highest_str_offset = temp_str_offset + self.state[s_offset].end_pt \
				- self.state[s_offset].start_pt - 1
			if highest_str_offset < str_offset:
				# We have not scanned forward far enough to reach the
				# desired virtual row, advance the search row index:
				temp_str_offset = highest_str_offset + 1
			else:
				# The desired string offset is in this range
				#
				# Within the specified range, I want to find the entry
				# in l that is 'offset' from the first entry in
				# the current range, so the real data will be in
				# l[state[state_id][0] + l_adjust]
				l_adjust = str_offset - temp_str_offset 

				temp_str_offset += l_adjust
				break
		# Return 
		# 1) s_offset = (state offset) an offset into the current state that contains
		#    a range that spans the specified virtual row.
		# 2) l_adjust = an offset that should be added to the contents of
		#    state[s_offset][0] to point into l to
		#    find the specifed virtual row; e.g.:
		#    l[self.state[s_offset][0] + l_adjust]
		# 3) l_offset = the desired string? offset into the text object?
		#  
		# I think str_pt is now used to point to the character offset within
		# a chunk of text, and string_id is the index of the list entry in
		# the foreign list.
		str_pt = self.state[s_offset].start_pt + l_adjust
		string_id = self.state[s_offset].list_idx 
		# IS STRING_ID THE SAME AS str_pt?
		# THIS ASSERTION FAILED: assert(string_id == str_pt)
		sd = self.StateDeRef(s_offset, l_adjust,  str_pt, string_id)
		return(sd)

	def delete_state(self, s_offset, batch=False, state_id=-1):
		'''VirtView.delete_state()
		Delete a *range* item from the state.
		'''
		if state_id == -1: state_id = self.get_state_id()
		#del self.state[s_offset]
		self.state.delete(s_offset, state_id=state_id, batch=batch)
		return(0)

	def get_item(self, idx, state_id=-1):
		if state_id == -1:
			state_id = self.get_state_id()
		return(self.state.get_item(idx, state_id))

	def get_list(self):
		return(self.state.return_list())
	
	def get_llen(self, state_id=-1):
		'''VirtView.get_llen()
		Return the length of the virtual string in the
		specified state.
		'''
		if state_id == -1: state_id = self.get_state_id()
		llen = 0
		#for s in self.state: ################################ THIS DIDN'T WORK
		startt =  datetime.datetime.now()
		#for j in range(len(self.state)):
		#	s = self.state[j]
		# The 'for s in self.state' version was 25% faster than for j in range(len(self.state
		# but is still slow. I then stored the list len in SBList and reduce run time by
		# half.
		for s in self.state:
			# Remember that the last index is in python format is
			# is one greater than the real index of what will
			# be includuded in this state element.
			#print('adding virtual len of state ' + repr(s) + ' totlen = ' + str(len(self.state)))
			#llen += s[1] - s[0]
			# The change from calculating length to storing made no difference
			llen += s.length
		endt = datetime.datetime.now()
		#dprint('llen took ' + str((endt - startt).microseconds) )#+ ' for loop count: ' + str(j))
		return(llen)

	def get_range(self, s_offset=-1, v_row=-1, state_id=-1):
		'''VirtView.get_range()
		Given either a state_offset or a virtual row index,
		return the *range* entry, which contains  
		start, end, string_id (where start and end are in
		python slice() format).

		You must pass a keyword argument for either s_offset
		or v_row.
		'''
		if state_id == -1: state_id = self.get_state_id()
		if s_offset == -1:
			if v_row == -1:
				raise Exception('You must pass either a state offset or a v_row " \
					+ "as a keyword parameter to get_range')
			else:
				#(s_offset, s_adj, l_idx, str_idx) = self._get_vl_idx(v_row)
				sd = self._get_vl_idx(v_row)
				return(self.state[sd.state_offset])
		else:
			return(self.state[s_offset])

	def get_state_id(self):
		'''VirtView.et_state_id()
		This gets the state ID from the underlying SBLIst object
		that holds the state information.

		see also self.state.incr_state_id()
		'''
		return(self.state.get_state_id())

	def insert(self, v_row, state_entry, state_id=-1, batch=False,):
		'''VirtView.insert()
		l_range is now a state_entry object

		*range* points to a slice of a foreign list, but
		the contents of that slice will be displayed 
		starting at the specified virtual row.

		I think *range* contains [start, end] but I'm not sure.
		
		Insert the range into the state list, and ideally
		I should merge state entries if the starting or ending
		values inside *range* are contiguous to those entries
		near it in the *state* list.
		'''
		times = []
		#dprint('-----------------insert start')
		if state_id == -1:
			state_id = self.get_state_id()
		v_idx = 0
		s_offset = 0
		# * a *range* is a 3-item list like [5, 8, 2], the fist two
		#            numbers of which are 
		#            a 'slice()' referece to a range of entries in 
		#            a foreign list object that can be identifed with the third nbr.
		# * s_offset points to an existing *range* entry in self.state
		# * sd.state_adj is an offset between l_range[0] and l_range[1]
		# * list_idx is an offset into the foreign table where the associated
		#            virtual item (v_row) is stored. Its value is between
		#            l_range[0] and l_range[1]
		#
		# Length of the string before insertion.
		times.append(['a', datetime.datetime.now()])	
		virtual_len = self.get_llen(state_id=state_id)
		times.append(['b', datetime.datetime.now()])	
		if virtual_len == 0:
			# The state list can be empty if the user deleted everything,
			# but point to row zero to facilitate insert()
			v_row = 0
		#print('vv, vrow = ' + str(v_row) + ' len ' + str(virtual_len) + ' state: ' + repr(self.state))
		if v_row >= virtual_len:
			# Append the new row at the end of self.state, regardless of how far 
			# v_row exceeds the length of self.state 
			self.state.append(state_entry, state_id=state_id, batch=batch)
			return(0)
		# For the given virtual row, find the index into self.state. I think
		# l_row is a character offset
		#(s_offset, s_adj, l_row, string_id) = self._get_vl_idx(v_row)
		sd = self._get_vl_idx(v_row)
		assert(sd.list_idx is not None)
		if sd.state_adj == 0:
			# The new *range* item belongs at the start of an existing range
			# so insert the new item here:
			times.append(['c', datetime.datetime.now()])
			self.state.insert(sd.state_offset, state_entry, state_id=state_id, batch=batch)
			times.append(['c2', datetime.datetime.now()])
		else:
			if self.state[sd.state_offset].end_pt == v_row:
				# The new item goes after this state entry
				times.append(['d', datetime.datetime.now()])
				self.state.insert(sd.state_offset + 1, state_entry, state_id=state_id, batch=batch)
				times.append(['d2', datetime.datetime.now()])
			else:
				# split the old range entry
				low = self.state[sd.state_offset].start_pt
				high = self.state[sd.state_offset].end_pt
				# I think str_id is an offset into the sblist for an immutable string.
				str_id = self.state[sd.state_offset].list_idx
				self.state.delete(sd.state_offset, state_id=state_id, batch=batch)
				# The deletion above should have creatd a new state for the self.state object.
				# I should now use that for the remainin transactions in this batch:
				state_id = self.state.get_state_id()
				# Insert the lower part of the state entry:
				times.append(['e', datetime.datetime.now()])
				self.state.insert(sd.state_offset, self.StateEntry(low, low + sd.state_adj, sd.list_idx), 
					batch=True, state_id=state_id)# batch is always true here
				# Insert the entry for the newly inserted text:
				self.state.insert(sd.state_offset + 1, state_entry, batch=True, state_id=state_id)
				# Insert the top half of the old range that was split
				self.state.insert(sd.state_offset+ 2, self.StateEntry(low + sd.state_adj, high, sd.list_idx), 
					batch=True, state_id=state_id)
				times.append(['e2', datetime.datetime.now()])
		#for j in range(1, len(times)):
		#	#dprint(times[j][0] + ', ' + times[j - 1][0] + ' ' 
		#	#	+ repr((times[j][1] - times[j - 1][1]).microseconds))
		return(0)


	def insert_state(self, s_offset, state_entry, state_id=-1, batch=False):
		'''VirtView.insert_state()
		Insert a range object into self.state.
		This is intended to facilitate rewriting
		the state for deletions in the virual
		representation of the underlyling list.
		see also: delete_state()
		'''
		if state_id == -1: state_id = self.get_state_id()
		self.state.insert(s_offset, state_entry, state_id=state_id, batch=batch)
		return(0)

	def set_state_id(self, id):
		self.state.set_state_id(id)
		return(0)
	
	def show_state(self, state_id):
		'''VirtView.show_state()
		return some debugging info
		'''
		#return(self.state.show_state(state_id=state_id))
		return(repr(self.state))