l.append(l[-1]) k += 1 j += 1 else: l.append(chr(random.randint(ord('a'), ord('z')))) j += 1 # # Now copy the list and test it # start_offset = random.randint(0, str_len - 3) end_offset = random.randint(start_offset + 1, str_len - 1) save_l = [] save_l.extend(l) q = SBList(l) q.sort(start=start_offset, end=end_offset) l_new = q.return_list() save_l2 = subsort(save_l, start_offset, end_offset) # The diff() function seems to remove the sort from # my saved list ### TEMPORARY HACK TO TEST ERROR DETECTION: ##save_l2[3] = 'zzTEST' difflist = list(difflib.ndiff(save_l2, l_new)) if len(difflist) != len(q): print('================== verify failed') print("start idx " + str(start_offset) + ' end idx ' + str(end_offset)) print('save:\n' + repr(save_l2)) print('new:\n' + repr(l_new)) raise Exception('Verify failed') print('finished with no errors')
del q[4] del q[0] q.insert(2, 'after c') q.insert(len(q), "last line") del l[5] del l[4] del l[0] l.insert(2, 'after c') l.insert(len(q), "last line") for j in range(len(l)): assert(q[j] == l[j]) revised = SBList(l) assert(revised == q) qlist = q.return_list() assert(l == qlist) print("delete last item, when last item is a single range ") q.delete(len(q) - 1) print(repr(q)) print(q.show_state() + '\n') ######################## multiple delete: #from SBList01 import * l = ['a', 'b', 'c', 'd', 'e', 'f'] del l[1:3] assert(l == ['a', 'd', 'e', 'f']) assert(l != ['a junk list to be sure that the comparison works']) del l[0:2] assert(l == ['e', 'f'])
class VirtView(): '''class VirtView() THIS SHOULD PROBABLY MOVE INSIDE SBSTRING OBJECT AND BE RENAMED. This object holds a list that contains abstracted pointers that allow for a logical view of the foreign text in an SBString object. If edits are made to the foreign text, new text entries will be appened to the end of the foreign list but the pointers here will be ordered so that the intended logical view of the foreign text is preserved. Example: the foreign list contains the text "abcdefghij", then I insert "zz" after "d", which is offset 4. The new text ("zz") is appended to the end of the foreign list object but the entry in this object becomes: [[0, 4, 0], [0, 2, 1], [4, 9, 0]] The entry for [0, 2, 1] means to grab bytes 0-1 (which is python slice s[0:2]) from the string that is stored at index 1 in the foreign list. *range* format: [start, end, list_offset_for_text] where start and end* are python *slice()* numbers that point into the text that is referenced by the list offset. ''' class StateEntry(object): start_pt = None end_pt = None list_idx = None length = None def __init__(self, start_pt, end_pt, list_idx): object.__init__(self) self.start_pt = start_pt self.end_pt = end_pt self.list_idx = list_idx self.length = self.end_pt - self.start_pt def __repr__(self): return('[' + str(self.start_pt) + ', ' + str(self.end_pt) \ + ', '+ str(self.list_idx) + ', ' + str(self.length) + ']') class StateDeRef(object): '''class StateDeRef() This class will hold some values that will help to translate an entry in the state list to an entry in the main list. The state list holds range entries with [start, end] indexes (in python slice() format) that refer to entries in the main list object. state_offset = the zero-based offset into the state object. state_adj = a virtual offset that would point to a logical value in the range [start, end]. For example, if the state entry is [10, 20] and state_adj = 3, then the reference is to an entry in the main list at offset 13, which is 10 + 3. list_idx = an index into self.l that corresponds to self.state[state_offset][0] + state_adj. ''' state_offset = None state_adj = None str_ptr = None list_idx = None def __init__(self, s_offset, s_adj, str_ptr, l_idx): '''StateDeRef.__init() ''' object.__init__(self) self.state_offset = s_offset self.state_adj = s_adj self.str_ptr = str_ptr self.list_idx = l_idx def __iter__ (self): # Maybe add a flag that will lock the objects from # being altered when iter is active? self.iterindex = 4 return(self) def __next__(self): self.iterindex -= 1 if self.iterindex == 4: return(self.state_offset) elif self.iterindex == 3: return(self.state_adj) elif self.iterindex == 2: return(self.str_ptr) elif self.iterindex == 1: return(self.list_idx) else: raise StopIteration def __repr__(self): return([repr(self.state_offset) + ', ' + repr(self.state_adj) \ + repr(self.str_ptr) + repr(self.list_idx)]) def __init__(self, state_entry): '''VirtView.__init__() ''' self.state = SBList([state_entry]) # use self.get_state_id() #self.state_id = 1 # NOT USED?? def __iter__ (self): # Maybe add a flag that will lock the objects from # being altered when iter is active? self.iterindex = len(self.state) return(self) def __getitem__(self, i): return(self.state[i]) def __len__(self): '''VirtView.__len__() Returns the number of saved states. ''' return(len(self.state)) def __next__ (self): '''VirtView.__next__ Returns a range object, which contains a start offset, end offset, and an id for a string stored elsewhere. The start/end pair follows python slice() notation where end is one greater than the value that will be returned when used as list[start : end] ''' # return an object of type xxx with each iteration. if self.iterindex == 0: raise StopIteration self.iterindex -= 1 ##(s_offset, s_adj, l_idx) = self._get_vl_idx(self.iterindex) ##return(self.state[l_idx]) return(self.state[len(self.state) - self.iterindex]) def __repr__(self): return(repr(self.state)) def _get_vl_idx(self, str_offset, state_id=-1): '''VirtView._get_vl_idx() Return pointers into state and a dereferenced pointer into the the foreign list for the given character offset. If the character offset is beyond the virtual length of the underlying object, this returns [None, None, None, None], which might mean to append a proposed insert at the end of the list. ''' if state_id == -1: state_id = self.get_state_id() #assert(state_id < (len(self.state) )) str_len = self.get_llen(state_id=state_id) if str_offset >= str_len: #l_list_len: return([None, None, None, None]) # temp_str_offset will be the starting offset into the string # in the range of pointers at this state offset. If temp_str_offset # is less than the desired offset, read the next range of # pointers from this state and see if the span of that range # adds enough to temp_str_offset to contain the desired offset. temp_str_offset = 0 l_adjust = None #print('SBSTring. char offset=' + str(str_offset) +' state: ' + repr(self.state)) for s_offset in range(len(self.state)): # For each s_offset in the current state (s_offset # is an index into character offset # ranges entered into the current state). # # 'highest_str_offset' is the highest character offset that would # be captured by the tuple in self.state[state_id][s_offset]. highest_str_offset = temp_str_offset + self.state[s_offset].end_pt \ - self.state[s_offset].start_pt - 1 if highest_str_offset < str_offset: # We have not scanned forward far enough to reach the # desired virtual row, advance the search row index: temp_str_offset = highest_str_offset + 1 else: # The desired string offset is in this range # # Within the specified range, I want to find the entry # in l that is 'offset' from the first entry in # the current range, so the real data will be in # l[state[state_id][0] + l_adjust] l_adjust = str_offset - temp_str_offset temp_str_offset += l_adjust break # Return # 1) s_offset = (state offset) an offset into the current state that contains # a range that spans the specified virtual row. # 2) l_adjust = an offset that should be added to the contents of # state[s_offset][0] to point into l to # find the specifed virtual row; e.g.: # l[self.state[s_offset][0] + l_adjust] # 3) l_offset = the desired string? offset into the text object? # # I think str_pt is now used to point to the character offset within # a chunk of text, and string_id is the index of the list entry in # the foreign list. str_pt = self.state[s_offset].start_pt + l_adjust string_id = self.state[s_offset].list_idx # IS STRING_ID THE SAME AS str_pt? # THIS ASSERTION FAILED: assert(string_id == str_pt) sd = self.StateDeRef(s_offset, l_adjust, str_pt, string_id) return(sd) def delete_state(self, s_offset, batch=False, state_id=-1): '''VirtView.delete_state() Delete a *range* item from the state. ''' if state_id == -1: state_id = self.get_state_id() #del self.state[s_offset] self.state.delete(s_offset, state_id=state_id, batch=batch) return(0) def get_item(self, idx, state_id=-1): if state_id == -1: state_id = self.get_state_id() return(self.state.get_item(idx, state_id)) def get_list(self): return(self.state.return_list()) def get_llen(self, state_id=-1): '''VirtView.get_llen() Return the length of the virtual string in the specified state. ''' if state_id == -1: state_id = self.get_state_id() llen = 0 #for s in self.state: ################################ THIS DIDN'T WORK startt = datetime.datetime.now() #for j in range(len(self.state)): # s = self.state[j] # The 'for s in self.state' version was 25% faster than for j in range(len(self.state # but is still slow. I then stored the list len in SBList and reduce run time by # half. for s in self.state: # Remember that the last index is in python format is # is one greater than the real index of what will # be includuded in this state element. #print('adding virtual len of state ' + repr(s) + ' totlen = ' + str(len(self.state))) #llen += s[1] - s[0] # The change from calculating length to storing made no difference llen += s.length endt = datetime.datetime.now() #dprint('llen took ' + str((endt - startt).microseconds) )#+ ' for loop count: ' + str(j)) return(llen) def get_range(self, s_offset=-1, v_row=-1, state_id=-1): '''VirtView.get_range() Given either a state_offset or a virtual row index, return the *range* entry, which contains start, end, string_id (where start and end are in python slice() format). You must pass a keyword argument for either s_offset or v_row. ''' if state_id == -1: state_id = self.get_state_id() if s_offset == -1: if v_row == -1: raise Exception('You must pass either a state offset or a v_row " \ + "as a keyword parameter to get_range') else: #(s_offset, s_adj, l_idx, str_idx) = self._get_vl_idx(v_row) sd = self._get_vl_idx(v_row) return(self.state[sd.state_offset]) else: return(self.state[s_offset]) def get_state_id(self): '''VirtView.et_state_id() This gets the state ID from the underlying SBLIst object that holds the state information. see also self.state.incr_state_id() ''' return(self.state.get_state_id()) def insert(self, v_row, state_entry, state_id=-1, batch=False,): '''VirtView.insert() l_range is now a state_entry object *range* points to a slice of a foreign list, but the contents of that slice will be displayed starting at the specified virtual row. I think *range* contains [start, end] but I'm not sure. Insert the range into the state list, and ideally I should merge state entries if the starting or ending values inside *range* are contiguous to those entries near it in the *state* list. ''' times = [] #dprint('-----------------insert start') if state_id == -1: state_id = self.get_state_id() v_idx = 0 s_offset = 0 # * a *range* is a 3-item list like [5, 8, 2], the fist two # numbers of which are # a 'slice()' referece to a range of entries in # a foreign list object that can be identifed with the third nbr. # * s_offset points to an existing *range* entry in self.state # * sd.state_adj is an offset between l_range[0] and l_range[1] # * list_idx is an offset into the foreign table where the associated # virtual item (v_row) is stored. Its value is between # l_range[0] and l_range[1] # # Length of the string before insertion. times.append(['a', datetime.datetime.now()]) virtual_len = self.get_llen(state_id=state_id) times.append(['b', datetime.datetime.now()]) if virtual_len == 0: # The state list can be empty if the user deleted everything, # but point to row zero to facilitate insert() v_row = 0 #print('vv, vrow = ' + str(v_row) + ' len ' + str(virtual_len) + ' state: ' + repr(self.state)) if v_row >= virtual_len: # Append the new row at the end of self.state, regardless of how far # v_row exceeds the length of self.state self.state.append(state_entry, state_id=state_id, batch=batch) return(0) # For the given virtual row, find the index into self.state. I think # l_row is a character offset #(s_offset, s_adj, l_row, string_id) = self._get_vl_idx(v_row) sd = self._get_vl_idx(v_row) assert(sd.list_idx is not None) if sd.state_adj == 0: # The new *range* item belongs at the start of an existing range # so insert the new item here: times.append(['c', datetime.datetime.now()]) self.state.insert(sd.state_offset, state_entry, state_id=state_id, batch=batch) times.append(['c2', datetime.datetime.now()]) else: if self.state[sd.state_offset].end_pt == v_row: # The new item goes after this state entry times.append(['d', datetime.datetime.now()]) self.state.insert(sd.state_offset + 1, state_entry, state_id=state_id, batch=batch) times.append(['d2', datetime.datetime.now()]) else: # split the old range entry low = self.state[sd.state_offset].start_pt high = self.state[sd.state_offset].end_pt # I think str_id is an offset into the sblist for an immutable string. str_id = self.state[sd.state_offset].list_idx self.state.delete(sd.state_offset, state_id=state_id, batch=batch) # The deletion above should have creatd a new state for the self.state object. # I should now use that for the remainin transactions in this batch: state_id = self.state.get_state_id() # Insert the lower part of the state entry: times.append(['e', datetime.datetime.now()]) self.state.insert(sd.state_offset, self.StateEntry(low, low + sd.state_adj, sd.list_idx), batch=True, state_id=state_id)# batch is always true here # Insert the entry for the newly inserted text: self.state.insert(sd.state_offset + 1, state_entry, batch=True, state_id=state_id) # Insert the top half of the old range that was split self.state.insert(sd.state_offset+ 2, self.StateEntry(low + sd.state_adj, high, sd.list_idx), batch=True, state_id=state_id) times.append(['e2', datetime.datetime.now()]) #for j in range(1, len(times)): # #dprint(times[j][0] + ', ' + times[j - 1][0] + ' ' # # + repr((times[j][1] - times[j - 1][1]).microseconds)) return(0) def insert_state(self, s_offset, state_entry, state_id=-1, batch=False): '''VirtView.insert_state() Insert a range object into self.state. This is intended to facilitate rewriting the state for deletions in the virual representation of the underlyling list. see also: delete_state() ''' if state_id == -1: state_id = self.get_state_id() self.state.insert(s_offset, state_entry, state_id=state_id, batch=batch) return(0) def set_state_id(self, id): self.state.set_state_id(id) return(0) def show_state(self, state_id): '''VirtView.show_state() return some debugging info ''' #return(self.state.show_state(state_id=state_id)) return(repr(self.state))