def create_table(self, name, num_columns, key): table = Table(name, num_columns, key) BufferPool.init_latest_tail(name) BufferPool.init_tps(name) # create a new table in database self.tables.append(table) # table.mergeProcessController() return table
def select(self, key, column, query_columns): # Get the indirection id given choice of key in specific column page_pointer = self.table.index.locate(column, key) records = [] for i in range(len(page_pointer)): # collect base meta datas of each record args = [ self.table.name, "Base", SCHEMA_ENCODING_COLUMN, *page_pointer[i] ] base_schema = int.from_bytes(BufferPool.get_record(*args), byteorder='big') args = [ self.table.name, "Base", INDIRECTION_COLUMN, *page_pointer[i] ] base_indirection = BufferPool.get_record(*args) # Total record specified by key and columns res = [] for query_col, val in enumerate(query_columns): # column is not selected if val != 1: res.append(None) continue if (base_schema & (1 << query_col)) >> query_col == 1: res.append( self.table.get_tail( int.from_bytes(base_indirection, byteorder='big'), query_col, page_pointer[i][0])) else: args = [ self.table.name, "Base", query_col + NUM_METAS, *page_pointer[i] ] res.append( int.from_bytes(BufferPool.get_record(*args), byteorder="big")) # construct the record with rid, primary key, columns args = [self.table.name, "Base", RID_COLUMN, *page_pointer[i]] rid = BufferPool.get_record(*args) args = [ self.table.name, "Base", NUM_METAS + column, *page_pointer[i] ] # or non_prim _key prim_key = BufferPool.get_record(*args) record = Record(rid, prim_key, res) records.append(record) return records
def tail_page_write(self, data, range_index): for i, value in enumerate(data): page_id = self.get_latest_tail(i, range_index) args = [self.name, "Tail", i, range_index, page_id] page = BufferPool.get_page(*args) # Verify Page is not full if not page.has_capacity(): args[-1] += 1 BufferPool.set_latest_tail(self.name, i, range_index, args[-1]) # Update Latest Tail page = BufferPool.get_page(*args) page.dirty = 1 page.write(value)
def open(self, path): # print("BufferPool Path @ {}".format(path)) if not os.path.exists(path): os.makedirs(path) BufferPool.initial_path(path) name2idx = {} # Restore Existed Table on Disk tables = [ name for name in os.listdir(path) if os.path.isdir(os.path.join(path, name)) ] for t_name in tables: t_path = os.path.join(path, t_name, 'table.pkl') old_table = read_table(t_path) name2idx[t_name] = len(self.tables) self.tables.append(old_table) # Restore Page Directory to BufferPool fname = os.path.join(path, "page_directory.txt") # Create page_directory.txt if not exist if not os.path.exists(fname): f = open(fname, "w+") f.close() f = open(fname, "r") lines = f.readlines() for line in lines: t_name, base_tail, column_id, page_range_id, page_id = line.rstrip( '\n').split(',') uid = (t_name, base_tail, int(column_id), int(page_range_id), int(page_id)) BufferPool.add_page(uid) f.close() # Restore tps to BufferPool fname = os.path.join(path, "tps.pkl") # Create page_directory.txt if not exist if not os.path.exists(fname): f = open(fname, "w+") f.close() else: f = open(fname, "rb") old_tps = pickle.load(f) f.close() BufferPool.copy_tps(old_tps) # Restore latest_tail to BufferPool fname = os.path.join(path, "latest_tail.pkl") # Create page_directory.txt if not exist if not os.path.exists(fname): f = open(fname, "w+") f.close() else: f = open(fname, "rb") latest_tail = pickle.load(f) f.close() BufferPool.copy_latest_tail(latest_tail)
def get_tail(self, tid, column, range_index): #tid_str = str(tid.decode()).split('t')[1] # tid = int(tid_str) # return int.from_bytes(self.page_directory["Tail"][column+NUM_METAS][range_index][tid//MAX_RECORDS].get(tid%MAX_RECORDS),byteorder='big') args = [ self.name, "Tail", column + NUM_METAS, range_index, tid // MAX_RECORDS, tid % MAX_RECORDS ] return int.from_bytes(BufferPool.get_record(*args), byteorder='big')
def close(self): s_time = time.time() BufferPool.close() # print("Closing BufferPool took: {}".format(time.time() - s_time)) s_time = time.time() # Write Table Config file for table in self.tables: t_name = table.name # os.kill(table.merge_pid, signal.SIGSTOP) table.merge_pid = None t_path = os.path.join(BufferPool.path, t_name, "table.pkl") write_table(t_path, table) # print("Updating table.txt: {}".format(time.time() - s_time)) s_time = time.time() # Write Page Directory Config file all_uids = BufferPool.page_directories.keys() f = open(os.path.join(BufferPool.path, "page_directory.txt"), "w") for uid in all_uids: t_name, base_tail, column_id, page_range_id, page_id = uid my_list = [ t_name, base_tail, str(column_id), str(page_range_id), str(page_id) ] line = ",".join(my_list) + "\n" f.write(line) f.close() # print("Updating page_directory.txt: {}".format(time.time() - s_time)) s_time = time.time() # Write Tps Config file f = open(os.path.join(BufferPool.path, "tps.pkl"), "wb") pickle.dump(BufferPool.tps, f) f.close() # print("Updating tps.pkl: {}".format(time.time() - s_time)) s_time = time.time() # Write latest_tail Config file f = open(os.path.join(BufferPool.path, "latest_tail.pkl"), "wb") pickle.dump(BufferPool.latest_tail, f) f.close()
def create_index(self, column_number): tree = OOBTree() self.indices[column_number] = tree # Look through the specific column-based columns for i in range(self.table.num_records): # Compute Base page pointers range_indice = i// (MAX_RECORDS * PAGE_RANGE) range_remainder = i % (MAX_RECORDS * PAGE_RANGE) page_pointer = [range_indice, range_remainder//MAX_RECORDS, range_remainder%MAX_RECORDS] # Find Schema encoding to find the lastest column value of this record args = [self.table.name, "Base", SCHEMA_ENCODING_COLUMN, *page_pointer ] base_schema = int.from_bytes(BufferPool.get_record(*args), byteorder='big') # Find Indirection args = [self.table.name, "Base", INDIRECTION_COLUMN, *page_pointer] base_indirection = BufferPool.get_record(*args) # Find column value if (base_schema & (1<<column_number))>>column_number == 1: key = self.table.get_tail(int.from_bytes(base_indirection,byteorder = 'big'),column_number, page_pointer[0]) else: args = [self.table.name, "Base", column_number + NUM_METAS, *page_pointer] key = (int.from_bytes(BufferPool.get_record(*args), byteorder="big")) self.update_index(key, page_pointer, column_number)
def sum(self, start_range, end_range, aggregate_column_index): values = 0 # locate all keys in index locations = self.table.index.locate_range(start_range, end_range, self.table.key) # Aggregating columns specified for i in range(len(locations)): page_pointer = locations[i] # collect base meta datas of this record args = [ self.table.name, "Base", SCHEMA_ENCODING_COLUMN, *page_pointer[0] ] base_schema = int.from_bytes(BufferPool.get_record(*args), byteorder='big') args = [ self.table.name, "Base", INDIRECTION_COLUMN, *page_pointer[0] ] base_indirection = BufferPool.get_record(*args) if (base_schema & (1 << aggregate_column_index)) >> aggregate_column_index == 1: temp = self.table.get_tail( int.from_bytes(base_indirection, byteorder='big'), aggregate_column_index, locations[i][0][0]) if (temp == DELETED): # might be deleted continue values += temp else: args = [ self.table.name, "Base", aggregate_column_index + NUM_METAS, *page_pointer[0] ] temp = int.from_bytes(BufferPool.get_record(*args), byteorder="big") if (temp == DELETED): # might be deleted continue values += temp return values
def __merge(self): keys, p_indices = BufferPool.get_table_tails(self.name) for (col_index, rg_index), last_p_index in zip(keys, p_indices): if col_index < NUM_METAS: continue args = [self.name, 'Tail', col_index, rg_index, last_p_index] last_page = BufferPool.get_page(*args) old_tps = BufferPool.get_tps(self.name, col_index, rg_index) new_tps = last_p_index * MAX_RECORDS + last_page.num_records page_range = BufferPool.get_base_page_range( self.name, col_index, rg_index) page_range_copy = copy.deepcopy(page_range) merged_record = {} for uid in page_range_copy.keys(): t_name, base_tail, col_id, range_id, page_id = uid for rec_id in range(MAX_RECORDS): # merged_record contains all the records within base page range merged_record[(t_name, base_tail, col_id, range_id, page_id, rec_id)] = 0 # Init max_merged_count = len(list(self.merged_record.keys())) early_stopping = 0 start_tail_p_index = (new_tps - 1) // MAX_RECORDS end_tail_p_index = old_tps // MAX_RECORDS # print("Merging Column {} Page Range {}".format(col_index, rg_index)) # print("New Tps {} Old Tps {} Start Tail Page {} End Tail Page {}".format(new_tps, old_tps, start_tail_p_index, end_tail_p_index)) for rev_page in reversed( range(end_tail_p_index, start_tail_p_index + 1) ): # Rly need to double check, so easily to messed it up args_rid = [self.name, 'Tail', BASE_RID, rg_index, rev_page] args_data = [self.name, 'Tail', col_index, rg_index, rev_page] for rev_rec in reversed(range(0, MAX_RECORDS)): rid = int.from_bytes( BufferPool.get_page(*args_rid).get(rev_rec), byteorder='big') base_page, base_rec = rid % ( MAX_RECORDS * PAGE_RANGE) // MAX_RECORDS, rid % ( MAX_RECORDS * PAGE_RANGE) % MAX_RECORDS uid = (self.name, "Base", col_index, rg_index, base_page) uid_w_record = (self.name, "Base", col_index, rg_index, base_page, base_rec) if merged_record[uid_w_record] == 0: update_val = int.from_bytes( BufferPool.get_page(*args_data).get(rev_rec), byteorder='big') if update_val != MAXINT: page_range_copy[uid].update(base_rec, update_val) # Also reset schema encoding to 0 args_schema = [ self.name, "Base", SCHEMA_ENCODING_COLUMN, rg_index, base_page ] old_encoding = int.from_bytes(BufferPool.get_page( *args_schema).get(base_rec), byteorder="big") old_encoding = bin(old_encoding)[2:].zfill( self.num_columns) new_encoding = old_encoding[:self.num_columns - ( col_index - NUM_METAS ) - 1] + "0" + old_encoding[self.num_columns - (col_index - NUM_METAS):] new_encoding = int(new_encoding, 2) # Convert to int BufferPool.page_directories[tuple( args_schema)].update(base_rec, new_encoding) merged_record[uid_w_record] = 1 early_stopping += 1 if early_stopping == max_merged_count: print("Early Stopped") break if early_stopping == max_merged_count: break # Base Page Range updates BufferPool.update_base_page_range(page_range_copy) # TPS updates BufferPool.set_tps(self.name, col_index, rg_index, new_tps) self.merged_record = {}
def base_page_write(self, data): for i, value in enumerate(data): range_index = (self.num_records // MAX_RECORDS) // PAGE_RANGE page_index = (self.num_records // MAX_RECORDS) % PAGE_RANGE args = [self.name, "Base", i, range_index, page_index] # latest base page page = BufferPool.get_page(*args) # page_range = self.page_directory["Base"][i][-1] # page = page_range.page_range[page_range.curr_page] # check if page range currently at the end of the page if page_index < PAGE_RANGE: # Edge Case if page_index == 0: t_ages = [self.name, "Tail", i, range_index, page_index] BufferPool.add_page(tuple(t_ages)) # Create new Tail Page BufferPool.set_tps(self.name, i, range_index) BufferPool.set_latest_tail(self.name, t_ages[2], t_ages[3], t_ages[4]) # Page range not at the end. Verify if Page is full if not page.has_capacity(): # need a new page allocation args[-1] += 1 # increment page index page = BufferPool.get_page(*args) # self.page_directory["Base"][i][-1].write() # page = self.page_directory["Base"][i][-1].get() else: # Page is full, need a new page range and new page args[-2] += 1 # Increment Page Range args[-1] = 0 # Reset Page Index to 0 page = BufferPool.get_page(*args) # Create New Base Page Range args[1] = "Tail" BufferPool.add_page(tuple(args)) # Create new Tail Page self.add_latest_tail(args[2], args[3], args[4]) page.dirty = 1 page.write(value)
def get_latest_tail(self, column_id, page_range_id): return BufferPool.get_latest_tail(self.name, column_id, page_range_id)
def delete(self, key): #page_pointer = self.table.index.locate(self.table.key,key) null_value = [] #page_range, page_index, record_index = page_pointer[0],page_pointer[1], page_pointer[2] page_pointer = self.table.index.locate(self.table.key, key) for i in range(self.table.num_columns): null_value.append(DELETED) self.table.mg_rec_update(NUM_METAS + i, *page_pointer[0]) update_range_index, update_record_page_index, update_record_index = page_pointer[ 0][0], page_pointer[0][1], page_pointer[0][2] args = [self.table.name, "Base", INDIRECTION_COLUMN, *page_pointer[0]] base_indirection_id = BufferPool.get_record(*args) args = [self.table.name, "Base", RID_COLUMN, *page_pointer[0]] base_rid = BufferPool.get_record(*args) base_id = int.from_bytes(base_rid, byteorder='big') tmp_indice = self.table.get_latest_tail(INDIRECTION_COLUMN, update_range_index) args = [ self.table.name, "Tail", INDIRECTION_COLUMN, update_range_index, tmp_indice ] page_records = BufferPool.get_page(*args).num_records total_records = page_records + tmp_indice * MAX_RECORDS next_tid = total_records #next_tid = int.from_bytes(('t'+ str(total_records)).encode(), byteorder = "big") # the record is firstly updated if (int.from_bytes(base_indirection_id, byteorder='big') == MAXINT): # compute new tail record indirection : the indirection of tail record point backward to base pages args = [self.table.name, "Base", RID_COLUMN, *page_pointer] next_tail_indirection = BufferPool.get_record(*args) # in bytes next_tail_indirection = int.from_bytes(next_tail_indirection, byteorder='big') else: next_tail_indirection = int.from_bytes(base_indirection_id, byteorder='big') args = [ self.table.name, "Base", SCHEMA_ENCODING_COLUMN, *page_pointer[0] ] encoding_base = BufferPool.get_record(*args) # in bytes old_encoding = int.from_bytes(encoding_base, byteorder="big") new_encoding = int('1' * self.table.num_columns, 2) schema_encoding = new_encoding starttime = datetime_to_int(datetime.datetime.now()) lastupdatetime = 0 updatetime = 0 # update new tail record meta_data = [ next_tail_indirection, next_tid, schema_encoding, base_id, starttime, lastupdatetime, updatetime ] meta_data.extend(null_value) tail_data = meta_data self.table.tail_page_write(tail_data, update_range_index) # overwrite base page with new metadata args = [ self.table.name, "Base", INDIRECTION_COLUMN, page_pointer[0][0], page_pointer[0][1] ] page = BufferPool.get_page(*args) page.update(update_record_index, next_tid) args = [ self.table.name, "Base", SCHEMA_ENCODING_COLUMN, page_pointer[0][0], page_pointer[0][1] ] page = BufferPool.get_page(*args) page.update(update_record_index, schema_encoding) self.table.num_updates += 1 self.table.mergeThreadController()
def update(self, key, *columns): # get the indirection in base pages given specified key\ page_pointer = self.table.index.locate(self.table.key, key) update_range_index, update_record_page_index, update_record_index = page_pointer[ 0][0], page_pointer[0][1], page_pointer[0][2] # if primary key in index is also updated, then insert new entries into primary key index if (columns[self.table.key] != None): self.table.index.update_index(columns[self.table.key], page_pointer[0], self.table.key) args = [self.table.name, "Base", INDIRECTION_COLUMN, *page_pointer[0]] base_indirection_id = BufferPool.get_record(*args) args = [self.table.name, "Base", RID_COLUMN, *page_pointer[0]] base_rid = BufferPool.get_record(*args) base_id = int.from_bytes(base_rid, byteorder='big') for query_col, val in enumerate(columns): if val == None: continue else: # self.table.page_directory["Base"][NUM_METAS+query_col][update_range_index].Hash_insert(int.from_bytes(base_rid,byteorder='big')) # compute new tail record TID self.table.mg_rec_update(NUM_METAS + query_col, *page_pointer[0]) tmp_indice = self.table.get_latest_tail( INDIRECTION_COLUMN, update_range_index) args = [ self.table.name, "Tail", INDIRECTION_COLUMN, update_range_index, tmp_indice ] page_records = BufferPool.get_page(*args).num_records total_records = page_records + tmp_indice * MAX_RECORDS next_tid = total_records #next_tid = int.from_bytes(('t'+ str(total_records)).encode(), byteorder = "big") # the record is firstly updated if (int.from_bytes(base_indirection_id, byteorder='big') == MAXINT): # compute new tail record indirection : the indirection of tail record point backward to base pages args = [ self.table.name, "Base", RID_COLUMN, *page_pointer[0] ] next_tail_indirection = BufferPool.get_record( *args) # in bytes next_tail_indirection = int.from_bytes( next_tail_indirection, byteorder='big') # compute tail columns : e.g. [NONE,NONE,updated_value,NONE] next_tail_columns = [] next_tail_columns = [ MAXINT for i in range(0, len(columns)) ] next_tail_columns[query_col] = val # the record has been updated else: # compute new tail record indirection : the indirection of new tail record point backward to last tail record for this key next_tail_indirection = int.from_bytes(base_indirection_id, byteorder='big') # compute tail columns : first copy the columns of the last tail record and update the new specified attribute base_indirection = int.from_bytes(base_indirection_id, byteorder='big') next_tail_columns = self.table.get_tail_columns( base_indirection, update_range_index) next_tail_columns[query_col] = val args = [ self.table.name, "Base", SCHEMA_ENCODING_COLUMN, *page_pointer[0] ] encoding_base = BufferPool.get_record(*args) old_encoding = int.from_bytes(encoding_base, byteorder="big") new_encoding = old_encoding | (1 << query_col) schema_encoding = new_encoding starttime = datetime_to_int(datetime.datetime.now()) lastupdatetime = 0 updatetime = 0 # update new tail record meta_data = [ next_tail_indirection, next_tid, schema_encoding, base_id, starttime, lastupdatetime, updatetime ] meta_data.extend(next_tail_columns) tail_data = meta_data self.table.tail_page_write(tail_data, update_range_index) # overwrite base page with new metadata args = [ self.table.name, "Base", INDIRECTION_COLUMN, page_pointer[0][0], page_pointer[0][1] ] page = BufferPool.get_page(*args) page.update(update_record_index, next_tid) args = [ self.table.name, "Base", SCHEMA_ENCODING_COLUMN, page_pointer[0][0], page_pointer[0][1] ] page = BufferPool.get_page(*args) page.update(update_record_index, schema_encoding) self.table.num_updates += 1 #self.table.event.set() self.table.mergeThreadController()