def update(self, key: int, *columns: int or None) -> bool: """ Updates the record that has specified key to the specified columns. Args: key: the key of the record to be updated *columns: new values of the record, None means not updating the column """ if len(columns) != self.table.num_columns: self.__print_error( 'columns has incompatible size with the table, ' 'expecting {} but got {}, update failed' .format(self.table.num_columns, len(columns)) ) return False new_tail_rid = self.table.get_new_rid('tail') new_record = Record(new_tail_rid, key, columns, 'tail') # append the new Record into tail pages self.table.insert_record(new_record) # get the base rid before we do the map change base_rid = self.table.index.locate(key, 0)[0] query_column = [1 for _ in range(self.table.num_columns)] # Using the old primary key to get the only old_record with the old data old_features = self.select(key, 0, query_column)[0].columns new_features = columns # change the index of old data to new data self.table.index.map_change(base_rid, old_features, new_features) self.table.update_record(base_rid, new_record) return True
def select(self, key, column, query_columns): # Get the indirection id given choice of key in specific column page_pointer = self.table.index.locate(column, key) records = [] for i in range(len(page_pointer)): # collect base meta datas of each record args = [ self.table.name, "Base", SCHEMA_ENCODING_COLUMN, *page_pointer[i] ] base_schema = int.from_bytes(BufferPool.get_record(*args), byteorder='big') args = [ self.table.name, "Base", INDIRECTION_COLUMN, *page_pointer[i] ] base_indirection = BufferPool.get_record(*args) # Total record specified by key and columns res = [] for query_col, val in enumerate(query_columns): # column is not selected if val != 1: res.append(None) continue if (base_schema & (1 << query_col)) >> query_col == 1: res.append( self.table.get_tail( int.from_bytes(base_indirection, byteorder='big'), query_col, page_pointer[i][0])) else: args = [ self.table.name, "Base", query_col + NUM_METAS, *page_pointer[i] ] res.append( int.from_bytes(BufferPool.get_record(*args), byteorder="big")) # construct the record with rid, primary key, columns args = [self.table.name, "Base", RID_COLUMN, *page_pointer[i]] rid = BufferPool.get_record(*args) args = [ self.table.name, "Base", NUM_METAS + column, *page_pointer[i] ] # or non_prim _key prim_key = BufferPool.get_record(*args) record = Record(rid, prim_key, res) records.append(record) return records
def insert(self, *columns: int) -> None: """ Inserts a record with specified columns into the base page range Args: *columns: values of each column of the new record """ if len(columns) != self.table.num_columns: self.__print_error('inserted record has incompatible dimensions' ' with the table, insertion failed') return # instantiates the new Record base_rid = self.table.get_new_rid('base') key = columns[self.table.key_index] new_record = Record(base_rid, key, columns, 'base') self.table.index.map_insert(new_record) # inserts the new Record into self.table self.table.insert_record(new_record)
def select(self, key: int, key_index: int, query_columns: List[int]) \ -> List[Record]: """ Selects the specified column(s) of the record with the specified key. Args: key: the key of the record to select key_index: which column does this key belong to query_columns: bit-vector to indicate which column(s) to select Returns: on success: list with records of specified columns on failure: empty list """ if len(query_columns) != self.table.num_columns: self.__print_error( 'query_columns has incompatible size with the table, ' 'expecting {} but got {}' .format(self.table.num_columns, len(query_columns)) ) return [] base_rids_list = self.table.index.locate(key, key_index) record_list = [] # list of records to return for base_rid in base_rids_list: query_result = [] for i in range(self.table.num_columns): if query_columns[i] == 0: # current column is not selected query_result.append(None) continue field = self.table.select_feature(base_rid, i) query_result.append(field) record = Record(base_rid, key, query_result, 'base') record_list.append(record) return record_list
def update(self, key, *columns, commit=False, abort=False, t=None): if commit: self.table.lm_lock.acquire() self.table.lock.release(t) self.table.lm_lock.release() return (Config.INVALID_RID, True) if abort: base_rid = columns[0][0] (range_index, base, set_index, offset) = self.table.calculate_base_location(base_rid) base_ind_page = self.table.bufferpool.find_page( self.table.name, range_index, base, set_index, Config.INDIRECTION_COLUMN) base_ind = int.from_bytes(base_ind_page.read(offset), sys.byteorder) self.table.pd_lock.acquire() (tail_range, tail, tail_set, tail_offset) = self.table.page_directory[base_ind] self.table.pd_lock.release() tail_rid_page = self.table.bufferpool.find_page( self.table.name, tail_range, tail, tail_set, Config.RID_COLUMN) tail_rid_page.write( tail_offset, Config.INVALID_RID.to_bytes(Config.ENTRY_SIZE, sys.byteorder)) tail_ind_page = self.table.bufferpool.find_page( self.table.name, tail_range, tail, tail_set, Config.INDIRECTION_COLUMN) base_ind_page.write(offset, tail_ind_page.read(tail_offset)) self.table.pd_lock.acquire() del self.table.page_directory[base_ind] self.table.pd_lock.release() return (Config.INVALID_RID, True) # find RID for tail record self.table.base_rid_lock.acquire() self.table.tail_rid_lock.acquire() self.table.assign_rid('update') record = Record(self.table.tail_current_rid, self.table.key, columns) self.table.tail_rid_lock.release() self.table.base_rid_lock.release() rids = self.table.index.locate(0, key) if t: self.table.lm_lock.acquire() if not self.table.lock.acquire(rids[0], t, 'X'): self.table.lm_lock.release() return (Config.INVALID_RID, False) self.table.lm_lock.release() self.table.pd_lock.acquire() (base_range, _, base_set, base_offset) = self.table.page_directory[rids[0]] self.table.pd_lock.release() # generate schema encoding new_schema = "" for i in range(self.table.num_columns): if (columns[i] == None): new_schema += '0' else: new_schema += '1' # Base RID (1) base_rid_page = self.table.bufferpool.find_page( self.table.name, base_range, 0, base_set, Config.RID_COLUMN) base_rid_page.pin_count += 1 base_rid = int.from_bytes(base_rid_page.read(base_offset), sys.byteorder) base_rid_page.pin_count -= 1 # Base Indirection (0) base_ind_page = self.table.bufferpool.find_page( self.table.name, base_range, 0, base_set, Config.INDIRECTION_COLUMN) base_ind_page.pin_count += 1 base_ind = int.from_bytes(base_ind_page.read(base_offset), sys.byteorder) base_ind_page.write( base_offset, record.rid.to_bytes(Config.ENTRY_SIZE, sys.byteorder)) base_ind_page.pin_count -= 1 # Base SE (3) base_SE_page = self.table.bufferpool.find_page( self.table.name, base_range, 0, base_set, Config.SCHEMA_ENCODING_COLUMN) base_SE_page.pin_count += 1 base_SE = int.from_bytes(base_SE_page.read(base_offset), sys.byteorder) # write indirection to base page and update base record schema encoding base_schema = self.int_to_schema(base_SE) result_schema = "" for i in range(self.table.num_columns): if base_schema[i] == '1' or new_schema[i] == '1': result_schema += '1' else: result_schema += '0' base_SE_page.write( base_offset, self.schema_to_int(result_schema).to_bytes(Config.ENTRY_SIZE, sys.byteorder)) base_SE_page.pin_count -= 1 # Get information from latest updated record non_updated_values = [] if base_ind != 0: # if base record has been updated at least once self.table.pd_lock.acquire() (prev_range, prev_bt, prev_set, prev_offset) = self.table.page_directory[base_ind] self.table.pd_lock.release() else: # if base record has not been updated prev_range = base_range prev_bt = 0 prev_set = base_set prev_offset = base_offset for i in range(self.table.num_columns): if new_schema[i] == '0': page = self.table.bufferpool.find_page( self.table.name, prev_range, prev_bt, prev_set, i + Config.NUM_META_COLS) page.pin_count += 1 value = int.from_bytes(page.read(prev_offset), sys.byteorder) page.pin_count -= 1 non_updated_values.append(value) count = 0 new_columns = [] for i in range(self.table.num_columns): if (columns[i] == None): new_columns.append(non_updated_values[count]) count += 1 else: new_columns.append(columns[i]) record.columns = tuple(new_columns) # write tail record to memory self.table.tt_lock.acquire() tail_index = self.table.tail_tracker[base_range] path = os.getcwd() + "/r_" + str(base_range) + "/1" if tail_index == -1 and not os.path.exists( path): # if no updates to record yet os.makedirs(path) if tail_index == -1: # no tail page created yet path = os.getcwd() + "/r_" + str(base_range) + "/1" + "/s_0" if not os.path.exists(path): os.makedirs(path) self.table.tail_tracker[base_range] = 0 tail_offset = 0 for i in range(self.table.num_columns + Config.NUM_META_COLS): file = open(path + "/p_" + str(i) + ".txt", "w+") file.close() else: # if tail page has been created rid_page = self.table.bufferpool.find_page( self.table.name, base_range, 1, self.table.tail_tracker[base_range], 0) rid_page.pin_count += 1 if rid_page.has_capacity(): tail_offset = rid_page.num_records else: self.table.tail_tracker[base_range] += 1 tail_offset = 0 path = os.getcwd() + '/r_' + str(base_range) + '/1/s_' + str( self.table.tail_tracker[base_range]) if not os.path.exists(path): os.makedirs(path) for i in range(self.table.num_columns + Config.NUM_META_COLS): file = open(path + "/p_" + str(i) + ".txt", 'w+') file.close() rid_page.pin_count -= 1 self.table.pd_lock.acquire() self.table.page_directory.update({ record.rid: (base_range, 1, self.table.tail_tracker[base_range], tail_offset) }) self.table.pd_lock.release() self.write_to_page(base_range, 1, self.table.tail_tracker[base_range], tail_offset, base_ind, self.schema_to_int(new_schema), base_rid, record) self.table.tt_lock.release() return (base_rid, True)
def select(self, key, column, query_columns, t=None, commit=False, abort=False, *args): # need to make sure key is available # Milestone 2 index if commit: self.table.lm_lock.acquire() self.table.lock.release(t) self.table.lm_lock.release() return ([], True) if abort: return ([], True) record_list = [] # find base record physical location rids = self.table.index.locate(column, key) if (rids == None): return None # or None? if t: self.table.lm_lock.acquire() for rid in rids: if not self.table.lock.acquire(rid, t, 'S'): self.table.lm_lock.release() return ([], False) self.table.lm_lock.release() for rid in rids: record_info = [] self.table.pd_lock.acquire() (range_index, _, set_index, offset) = self.table.page_directory[rid] self.table.pd_lock.release() for j in range(len(query_columns)): if query_columns[j] == 1: record_info.append( self.get_latest_val(range_index, set_index, offset, j)) else: record_info.append('None') # this line may not be correct as locate_range returns a list of rids # and the first rid in this list may not be the one we are looking for # also inefficient as have to generate another sortedDict-->traverse everything # rid = self.table.index.locate_range(key, key, column)[0] rid_page = self.table.bufferpool.find_page(self.table.name, range_index, 0, set_index, Config.RID_COLUMN) rid_page.pin_count += 1 rid = int.from_bytes(rid_page.read(offset), sys.byteorder) rid_page.pin_count -= 1 record_list.append(Record(rid, key, record_info)) return (record_list, True)
def insert(self, *columns, commit=False, abort=False, t=None): # committing to database if commit: self.table.lm_lock.acquire() self.table.lock.release(t) self.table.lm_lock.release() return (Config.INVALID_RID, 0, True) # undoing writes if abort: rid = columns[0] key = columns[1] (range_index, base, set_index, offset) = self.table.calculate_base_location(rid) rid_page = self.table.bufferpool.find_page(self.table.name, range_index, base, set_index, Config.RID_COLUMN) rid_page.write(offset, rid.to_bytes(Config.ENTRY_SIZE, sys.byteorder)) self.table.pd_lock.acquire() del self.table.page_directory[rid] del self.table.index.indexes[Config.NUM_META_COLS + self.table.key][key] del self.table.key_directory[key] self.table.pd_lock.release() return (Config.INVALID_RID, 0, True) # generate schema encoding schema_encoding = '0' * self.table.num_columns self.table.base_rid_lock.acquire() self.table.tail_rid_lock.acquire() self.table.assign_rid('insert') # get valid rid record = Record(self.table.base_current_rid, self.table.key, columns) self.table.tail_rid_lock.release() self.table.base_rid_lock.release() (range_index, base, set_index, offset) = self.table.calculate_base_location(record.rid) if t: self.table.lm_lock.acquire() if not self.table.lock.acquire(record.rid, t, 'X'): self.table.lm_lock.release() return (Config.INVALID_RID, 0, False) self.table.lm_lock.release() # store physical location in page directory self.table.pd_lock.acquire() self.table.page_directory.update( {record.rid: (range_index, 0, set_index, offset)}) self.table.index.indexes[Config.NUM_META_COLS + self.table.key].update( {record.columns[self.table.key]: [record.rid]}) self.table.key_directory.update( {record.columns[self.table.key]: (range_index, set_index, offset)}) self.table.pd_lock.release() # Create new range? if range_index > self.table.latest_range_index: self.table.tt_lock.acquire() self.table.tail_tracker.append(-1) self.table.tt_lock.release() self.table.merge_tracker.append(0) self.table.base_tracker.append(0) self.table.latest_range_index += 1 path = os.getcwd() + "/r_" + str(range_index) + "/0" if not os.path.exists(path): os.makedirs(path) # Create new page? if offset == 0: path = os.getcwd() + "/r_" + str(range_index) + "/0/s_" + str( set_index) if not os.path.exists(path): os.makedirs(path) ind_path = os.getcwd() + "/r_" + str( range_index) + "/0/indirection.txt" file = open(ind_path, 'w+') file.close() for i in range(1, self.table.num_columns + Config.NUM_META_COLS): file = open(path + "/p_" + str(i) + ".txt", "w+") file.close() self.write_to_page(range_index, base, set_index, offset, Config.INVALID_RID, self.schema_to_int(schema_encoding), record.rid, record) # writing to page return (record.rid, record.columns[self.table.key], True)
def select_bp(self, key, query_columns, rid): # print("rid in select: ", rid) lock.acquire() user_col_num = len(query_columns) curr_offset = utility.offset_calc(rid) list_pages = self.rid_to_dict_value(rid) # Get list of page ids associated to base page # print("list_pages, ", list_pages) base_page_ids = list_pages[1] # print("list_page[1] and col: ", list_pages[1]) num_base = list_pages[0][NUM_BASE] # Get list of pages associated to tail pages in page range # tail_page_list is a list of lists of tail page ids tail_page_ids = list_pages[num_base + 1:] # Create copy of base record and schema encoding to track applied updates base_record = Record(rid, key, []) # Schema starts off as all 0's schema_checker = '' # End Condition Schema is all 1's end_condition_schema = '' for n in range(user_col_num): curr_page = self.get_phys_page_from_bp(base_page_ids[n + user_col_num]) base_record.columns.append(int(curr_page.read(curr_offset))) curr_page.pin_cnt -= 1 schema_checker += '0' end_condition_schema += '1' # RID of first update # curr_page is indirection column of base record curr_page = self.get_phys_page_from_bp( base_page_ids[INDIRECTION_COLUMN]) update_rid = int(curr_page.read(curr_offset)) curr_page.pin_cnt -= 1 # USED TO Check if tps isn't 0 then is the update rid < TPS under_tps = True # While there are still potential fields to be updated while update_rid != rid and schema_checker != end_condition_schema and under_tps: # print(str(update_rid) + " " + base_page_list[0].read(0)) # Find row of tail pages update is on from rid tail_page_row = self.get_update_row(update_rid, tail_page_ids) tail_rid_column_page = self.get_phys_page_from_bp( tail_page_ids[tail_page_row][RID_COLUMN]) tail_rid_offset = utility.tail_offset_calc(update_rid, tail_rid_column_page) tail_rid_column_page.pin_cnt -= 1 tail_rid_schema_page = self.get_phys_page_from_bp( tail_page_ids[tail_page_row][SCHEMA_ENCODING_COLUMN]) update_schema = tail_rid_schema_page.read(tail_rid_offset) # borrow idea from https://stackoverflow.com/questions/25589729/binary-numbers-of-n-digits # convert = bin(update_schema)[2:] # new_update_schema = str(0) * (user_col_num - len(convert)) + convert # update_schema = list(new_update_schema) tail_rid_schema_page.pin_cnt -= 1 # Look at schema encoding for n in range(user_col_num): # If update is most recent update for that field if update_schema[n] == '1' and schema_checker[n] == '0': # Apply update to field tail_rid_column_page = self.get_phys_page_from_bp( tail_page_ids[tail_page_row][RID_COLUMN]) tail_rid_offset = utility.tail_offset_calc( update_rid, tail_rid_column_page) tail_rid_column_page.pin_cnt -= 1 # Apply change curr_tail_page = self.get_phys_page_from_bp( tail_page_ids[tail_page_row][n + DEFAULT_COLS_NUM]) base_record.columns[n] = int( curr_tail_page.read(tail_rid_offset)) curr_tail_page.pin_cnt -= 1 # Cannot just use schema[n] = '1' so use following loop instead... new_schema = '' for m in range(user_col_num): if m == n: new_schema += '1' else: new_schema += schema_checker[m] schema_checker = new_schema # Look at the next most recent update tail_rid_column_page = self.get_phys_page_from_bp( tail_page_ids[tail_page_row][RID_COLUMN]) tail_rid_offset = utility.tail_offset_calc(update_rid, tail_rid_column_page) tail_rid_column_page.pin_cnt -= 1 tail_rid_indirection_page = self.get_phys_page_from_bp( tail_page_ids[tail_page_row][INDIRECTION_COLUMN]) update_rid = int(tail_rid_indirection_page.read(tail_rid_offset)) tail_rid_indirection_page.pin_cnt -= 1 # Checks if tps isn't 0 then is the update rid < TPS base_indirection_page = self.get_phys_page_from_bp( base_page_ids[INDIRECTION_COLUMN]) first_read = int(base_indirection_page.read(0)) if first_read != 0 and update_rid >= first_read: under_tps = False base_indirection_page.pin_cnt -= 1 # Create record to return new_record = Record(rid, key, []) for n in range(user_col_num): # If not interested in column if query_columns[n] == 0: new_record.columns.append(None) # If interested in column else: new_record.columns.append(base_record.columns[n]) lock.release() return new_record