Example #1
0
 def __init__(self, name, num_columns, key):
     self.name = name
     self.key = key
     self.num_columns = num_columns
     self.page_directory = {
     }  # Replace with index, and all references inside table and query with index API
     self.index = Index(self, self.num_columns)
     self.buffer_pool_range = BufferPoolRange(BUFFER_POOL_SIZE_RANGE,
                                              num_columns)
     # self.page_ranges = []
     # self.page_ranges.append(PageRange(self.num_columns))
     self.curr_page_range = 0
     self.insertRecord([0] * num_columns)
Example #2
0
   def open(self, path):
       self.path = path
       if not os.path.isdir(path):
           os.mkdir(path)
           return
       for file in os.listdir(path):
           # print(file)
           t_path = self.path + '/' + str(file) + '/' + str(file) + '.pkl'
           # print(t_path)
 
           f = open(t_path, 'rb+')
           table = pickle.load(f)
           f.close()
           table.index = Index(table)
           table.buffer = Bufferpool(table)
           table.sem = threading.RLock()
           # obj.index.create_index(0)
           for i in range(table.num_index):
               index_path = self.path + '/' + file + '/table_index_col' + str(i) + '.txt'
               indexObj = open(index_path, 'r+')
               for line in indexObj.readlines():
                   line = line.split('_')
                   for rid in line[1: -1]:
                       table.index.insert(i, int(line[0]), rid)
               indexObj.close()
           # self.tables_directory.append(obj)
           self.append_table(table)
           # print(len(self.tables_directory))
           self.num_table += 1
Example #3
0
 def __init__(self, name, num_columns, key, bufferpool):
     self.name = name
     self.key = key
     # Total columns = num_columns + 4 internal columns (RID, Indirection, Schema, Timestamp)
     self.total_columns = num_columns + 4
     self.num_columns = num_columns
     self.page_directory = {}  #{RID: (pageId, offset)}
     self.index_directory = {}  # {Key: RID}
     self.index = Index(self)  # index object
     # rid_counter keeps track of the current rid to avoid duplicates
     self.rid_counter = 0
     # num_pages keeps track of the pageID we're currently adding to. Initially, this is one.
     self.num_page = 2
     # Put all of the config constants into one variable
     self.config = init()
     # base_pages_internal is a list of page Id's that belong to the internal pages of a base record
     # base_page_external is a list of page Id's that belong to the external pages of a base record
     self.base_pages_internal = [1]
     self.base_pages_external = [2]
     # tail_pages is a list of pageId's that belong to tail pages.
     self.tail_pages = [0]
     # Every table in the database has access to the shared bufferpool object
     self.bufferpool = bufferpool
     # Implementing locks
     self.shared_locks = {}
     self.exclusive_locks = {}
Example #4
0
    def __init__(self, name, num_columns, key):
        self.name = name
        self.key = key
        self.num_columns = num_columns
        self.bufferpool = BufferPool(self.num_columns)
        #self.page_directory = {}
        self.basePage_dir = {}
        self.tailPage_dir = {
        }  # Store tailRID: tailLocation, so that we can find a tail record
        self.tailRIDTOBaseRID = {}
        self.index = Index(self)
        self.num_PageRanges = 1

        # baseRID and tailRID are initialized to 1, 0 is for deleted record
        self.baseRID = 1
        self.tailRID = 1

        #merge
        self.mergeQ = []
        #self.deallocateQ = []
        self.mergedCount = 0

        thread = threading.Thread(target=self.merge, args=())
        thread.daemon = True
        thread.start()
Example #5
0
 def __init__(self, name, num_columns, key):
     self.name = name
     self.key = key
     self.num_columns = num_columns
     self.page_directory = {}
     self.index = Index(self)
     pass
 def __init__(self, name, num_columns, key):
     self.name = name
     self.key = key
     self.num_columns = num_columns
     self.page_directory = {}
     self.index = Index(self)
     self.pageRanges = [PageRange(self.num_columns)]
     self.keyToBaseRID = {}
     self.baseRID = 1
     self.tailRID = 1
     pass
Example #7
0
 def __init__(self, name, num_columns, Table_key, path):
     self.path = path
     self.name = name
     self.Table_key = Table_key
     self.num_columns = num_columns
     # self.prange_directory = {}  #'col_num': 'page_range_list'
     self.page_directory = {}  #'RID': 'record obj'
     self.origin_base_page_memory = []  # original unmerged and idk???
     self.after_merge_base_page_memory = {
     }  # the new copy that is being  merged,idk???
     self.buffer = Bufferpool(self)
     self.index = Index(self)
     self.prange_num = 0
     self.free_brid = 0
     self.free_trid = 0
     self.rid_list = []
     self.key_list = set()
     self.rif_trash = []  #?????idk
     self.merge_waiting_set = set()  # storing rid which needs to be merged
     self.merge_times = 0
     self.num_index = 0
     self.sem = threading.RLock()
     self.merge_start()
Example #8
0
    def __init__(self, name, num_columns, key):
        self.name = name
        self.key = key
        self.num_columns = num_columns
        self.bufferpool = BufferPool(self.num_columns)
        #self.page_directory = {}
        self.tailPage_lib = {} # Store tailRID: tailLocation, so that we can find a tail record
        self.index = Index(self)
        self.num_PageRanges = 1

        # baseRID and tailRID are initialized to 1, 0 is for deleted record
        self.baseRID = 1
        self.tailRID = 1

        #merge
        self.mergeQ = queue.Queue()
        self.deallocateQ = queue.Queue()
        '''
Example #9
0
class Table:
    """
    :param name: string         #Table name
    :param num_columns: int     #Number of Columns: all columns are integer
    :param key: int             #Index of table key in columns
    """
    def __init__(self, name, num_columns, key):
        self.name = name
        self.key = key
        self.num_columns = num_columns
        self.page_directory = {
        }  # Replace with index, and all references inside table and query with index API
        self.index = Index(self, self.num_columns)
        self.buffer_pool_range = BufferPoolRange(BUFFER_POOL_SIZE_RANGE,
                                                 num_columns)
        # self.page_ranges = []
        # self.page_ranges.append(PageRange(self.num_columns))
        self.curr_page_range = 0
        self.insertRecord([0] * num_columns)

    # Future function to merge tail records into base records
    def __merge(self):
        # create deep copy of page range

        #for x offset in base page
        pass

    def __str__(self):
        return self.name

    def createIndex(self, column_num):
        self.index.create_index(column_num)

    def getIndex(self, column_num):
        return self.index.getIndex(column_num)

    def close(self):
        self.buffer_pool_range.evictAll()

    def getName(self):
        return self.name

    # Creates a new PageRange if needed, and appends it to page_ranges
    def newPageRange(self):
        # self.page_ranges.append(PageRange(self.num_columns))
        # self.buffer_pool_range.
        self.curr_page_range = self.curr_page_range + 1

    # Helper function for the translation of RID value to RID components
    def getOffset(self, rid):
        return rid % (PAGE_SIZE // COL_DATA_SIZE)

    # Helper function for the translation of RID value to RID components
    def getPageR(self, rid):
        return rid // ((BASE_CONST + TAIL_CONST) *
                       (PAGE_SIZE // COL_DATA_SIZE))

    # Helper function for the translation of RID value to RID components
    def getPageB(self, rid):
        return (rid //
                (PAGE_SIZE // COL_DATA_SIZE)) % (BASE_CONST + TAIL_CONST)

    # Helper function for the translation of RID components to RID value
    def getRID(self, pageR, pageB, offset):
        return (pageR * (BASE_CONST + TAIL_CONST) *
                (PAGE_SIZE // COL_DATA_SIZE)) + (
                    pageB * (PAGE_SIZE // COL_DATA_SIZE)) + offset

    # Helper function to find the value of the next RID before writing to basepages
    def nextBaseRid(self):
        # Calls for calculation of the first two RID components
        # #prerid = self.page_ranges[self.curr_page_range].nextBaseRid()
        prerid = self.buffer_pool_range.nextBaseRid_Pool(self.curr_page_range)
        # Calculates the last RID component and adds it together with the previous for the next base RID
        rid = self.curr_page_range * (BASE_CONST + TAIL_CONST) * (
            PAGE_SIZE // COL_DATA_SIZE) + prerid
        return rid

    # Helper function to find the value of the next tail RID before writing to tail pages
    def nextTailRid(self, pageR):
        # Calls for calculation of the first two RID components
        # #prerid = self.page_ranges[self.curr_page_range].nextTailRid()
        prerid = self.buffer_pool_range.nextTailRid_Pool(pageR)
        # print("prerid:", prerid)
        # Calculates the last RID component and adds it together with the previous for the next tail RID
        rid = pageR * (BASE_CONST + TAIL_CONST) * (PAGE_SIZE //
                                                   COL_DATA_SIZE) + prerid
        return rid

    # Helper function unique for this metadata scheme
    def formatCols(self, indir, rid, timestamp, schema, columns):
        format_cols = []
        format_cols.append(indir)
        format_cols.append(rid)
        format_cols.append(timestamp)
        format_cols.append(schema)

        for index in range(self.num_columns):
            format_cols.append(columns[index])

        return format_cols

    # Function to set the RID of a record to the invalid value
    def deleteRecord(self, rid):
        pageR = self.getPageR(rid)
        pageB = self.getPageB(rid)
        offset = self.getOffset(rid)
        # #self.page_ranges[pageR].deleteRecord(pageB, offset)
        self.buffer_pool_range.deleteRecord_Pool(pageR, pageB, offset)

    # Function to check the indirection value of a record before doing a full read
    def checkIndirection(self, rid):
        pageR = self.getPageR(rid)
        pageB = self.getPageB(rid)
        offset = self.getOffset(rid)
        # print(pageR, pageB, offset)
        # #indir = self.page_ranges[pageR].getIndirection(pageB, offset)
        indir = self.buffer_pool_range.getIndirection_Pool(
            pageR, pageB, offset)
        # print("checked indir", indir)

        if indir == 0:
            return rid
        else:
            return indir

    def readRecord(self, rid):
        # Gets the true rid for the most recent version of the data
        trueRID = self.checkIndirection(rid)
        # Does the math to calculate pageR, pageB, and offset for record retrieval
        pageR = self.getPageR(trueRID)
        pageB = self.getPageB(trueRID)
        offset = self.getOffset(trueRID)

        # print("RID:",rid)
        # print("TrueRID:",trueRID)
        # print("Reading: Rid=",rid," pageR=",pageR," pageB=",pageB," offset=",offset)

        # Retrieves record
        # #full_record = self.page_ranges[pageR].readBlock(pageB, offset)
        full_record = self.buffer_pool_range.readBlock_Pool(
            pageR, pageB, offset)
        # print("Full record:", full_record)
        if full_record[RID_COLUMN] == 0:
            return None

        data_record = full_record[len(full_record) - self.num_columns:]

        # print(full_record)

        ret_record = Record(rid, data_record[self.key], data_record)
        return ret_record

    def insertRecord(self, columns):
        # Check for room for base page, if not make more room
        # #if self.page_ranges[self.curr_page_range].hasCapacityBase() == False:
        if self.buffer_pool_range.hasCapacityBase_Pool(
                self.curr_page_range) == False:
            self.newPageRange()

        indir = 0
        schema_encoding = 0  # '0' * self.num_columns
        cur_Time = 0  # time()
        base_rid = self.nextBaseRid()
        format_columns = self.formatCols(indir, base_rid, cur_Time,
                                         schema_encoding, columns)
        # print(format_columns)
        # #self.page_ranges[self.curr_page_range].writeBaseBlock(format_columns)
        self.buffer_pool_range.writeBaseBlock_Pool(self.curr_page_range,
                                                   format_columns)

        self.index.insertPair(self.key, columns[self.key], base_rid)

        return base_rid

    def updateRecord(self, rid, columns):
        # Check for room for tail page, if not make more room
        # #if self.page_ranges[self.curr_page_range].hasCapacityTail() == False:
        if self.buffer_pool_range.hasCapacityTail_Pool(
                self.curr_page_range) == False:
            self.newPageRange()

        page_R = self.getPageR(rid)
        page_B = self.getPageB(rid)
        page_offset = self.getOffset(rid)

        # #prev_vers = self.page_ranges[page_R].getIndirection(page_B, page_offset)
        prev_vers = self.buffer_pool_range.getIndirection_Pool(
            page_R, page_B, page_offset)
        schema_encoding = 0  # '0' * self.num_columns
        currTime = 0  # time()
        tail_rid = self.nextTailRid(page_R)

        prev_record = self.readRecord(rid)
        if prev_record is None:
            return
        prev_columns = prev_record.getColumns()

        # print(prev_columns)
        new_columns = []

        for index in range(self.num_columns):
            if type(columns[index]) == type(None):
                new_columns.append(prev_columns[index])
            else:
                new_columns.append(columns[index])

        # print(new_columns)

        # print("tail rid:", tail_rid)
        format_columns = self.formatCols(prev_vers, tail_rid, currTime,
                                         schema_encoding, new_columns)

        # print(format_columns)
        # #self.page_ranges[self.curr_page_range].writeTailBlock(format_columns)
        # val = self.buffer_pool_range.writeTailBlock_Pool(self.curr_page_range, format_columns)
        val = self.buffer_pool_range.writeTailBlock_Pool(
            page_R, format_columns)
        tail_rid2 = int(val)
        # print("written tail rid:", tail_rid2)

        # #self.page_ranges[page_R].editBlock(page_B, INDIRECTION_COLUMN, page_offset, tail_rid)
        self.buffer_pool_range.editBlock_Pool(page_R, page_B,
                                              INDIRECTION_COLUMN, page_offset,
                                              tail_rid)
Example #10
0
from template.db import Database
from template.query import Query
from template.config import init
from template.index import Index

from random import choice, randint, sample, seed
init()

db = Database()
db.open('./ECS165')

grades_table = db.create_table('Grades', 5, 0)
index = Index(grades_table)
query = Query(grades_table)

# repopulate with random data
records = {}
seed(3562901)
for i in range(0, 1000):
    key = 92106429 + i
    records[key] = [
        key,
        randint(0, 20),
        randint(0, 20),
        randint(0, 20),
        randint(0, 20)
    ]
    query.insert(*records[key])
keys = sorted(list(records.keys()))
print("Insert finished")
Example #11
0
class Table:
    """
    :param name: string         #Table name
    :param num_columns: int     #Number of Columns: all columns are integer
    :param key: int             #Index of table key in columns
    """

    def __init__(self, name, num_columns, key):
        self.name = name
        self.key = key
        self.num_columns = num_columns
        self.page_directory = {} # Replace with index, and all references inside table and query with index API
        self.index = Index(self, self.num_columns)
        self.buffer_pool_range = BufferPoolRange(BUFFER_POOL_SIZE_RANGE, num_columns)
        # self.page_ranges = []
        # self.page_ranges.append(PageRange(self.num_columns))
        self.curr_page_range = 0
        self.insertRecord([0] * num_columns)
        #self.merge_queue = Queue()
        self.lock = threading.Lock()

        self.lock_manager_rids = {}
        self.lock_manager_pageRanges = {}

    def __str__(self):
            return self.name

    def lockReadRid(self, rid):
        self.lock.acquire()

        val = self.lock_manager_rids.get(rid)

        if val is None:
            self.lock_manager_rids[rid] = 0
            val = 0
        else:
            pass

        if val >= 0:
            self.lock_manager_rids[rid] = val + 1
            retval = True
        else:
            retval = False

        self.lock.release()

        return retval

    def unlockReadRid(self, rid):
        self.lock.acquire()

        val = self.lock_manager_rids.get(rid)

        if val is None:
            self.lock_manager_rids[rid] = 0
            val = 0
        else:
            pass

        if val > 0:
            self.lock_manager_rids[rid] = val - 1
            retval = True
        else:
            retval = False

        self.lock.release()
        
        return retval

    def lockWriteRid(self, rid):
        self.lock.acquire()

        val = self.lock_manager_rids.get(rid)

        if val is None:
            self.lock_manager_rids[rid] = 0
            val = 0
        else:
            pass

        if val == 0:
            self.lock_manager_rids[rid] = val - 1
            retval = True
        else:
            retval = False

        self.lock.release()
        
        return retval

    def unlockWriteRid(self, rid):
        self.lock.acquire()

        val = self.lock_manager_rids.get(rid)

        if val is None:
            self.lock_manager_rids[rid] = 0
            val = 0
        else:
            pass

        if val == -1:
            self.lock_manager_rids[rid] = val + 1
            retval = True
        else:
            retval = False

        self.lock.release()
        
        return retval

    def lockWriteRange(self, pageR):
        self.lock.acquire()

        val = self.lock_manager_pageRanges.get(pageR)

        if val is None:
            self.lock_manager_pageRanges[pageR] = 0
            val = 0
        else:
            pass

        if val == 0:
            self.lock_manager_pageRanges[pageR] = val - 1
            retval = True
        else:
            retval = False

        self.lock.release()
        
        return retval

    def unlockWriteRange(self, pageR):
        self.lock.acquire()

        val = self.lock_manager_pageRanges.get(pageR)

        if val is None:
            self.lock_manager_pageRanges[pageR] = 0
            val = 0
        else:
            pass

        if val == -1:
            self.lock_manager_pageRanges[pageR] = val + 1
            retval = True
        else:
            retval = False

        self.lock.release()
        
        return retval

    # Future function to merge tail records into base records
    def merge(self, pageR):
        # create deep copy of page range
        deep_copy = self.buffer_pool_range.loadMerge(pageR)
        tail_blocks = deep_copy.tail_count - 1 #indexes clear for removal, shift constant
        new_copy = PageRange(self.num_columns)
        read_block = None

        # for all base blocks in deep_copy page range
        for pageB in range(0, deep_copy.max_base):
            # for all base records in a base block
            for offset in range(0, deep_copy.page_blocks[pageB].pages[0].num_records):
                # check if records is invalid
                read_block = [0] * deep_copy.page_blocks[0].total
                # print("Empty read block:", read_block)
                if deep_copy.getRID(pageB, offset) == 0:
                    # empty record found, create empty record for writing
                    pass
                else:
                    # record found, check indir column for most up to date version
                    indir = deep_copy.getIndirection(pageB, offset)
                    if indir == 0:
                        # version is up to date, copy over from original base record
                        read_block = deep_copy.readBlock(pageB, offset)

                    else:
                        # version is not up to date, copy over from indir tail record
                        inder_pageB = self.getPageB(indir)
                        inder_offset = self.getOffset(indir)
                        read_block = deep_copy.readBlock(inder_pageB, inder_offset)
                        # set record indir column to the original indir value
                        read_block[INDIRECTION_COLUMN] = indir

                # print("read_block:", read_block)
                checker = new_copy.hasCapacityBase()
                if checker:
                    new_copy.writeBaseBlock(read_block)
                else:
                    print("Unexpected error, new_copy at capacity")

        self.buffer_pool_range.submitMerge(pageR, new_copy, tail_blocks)

    '''
    def enqueueMerge(self, pageR):
        self.merge_queue.enqueue

    def checkMerge(self):
        if self.merge_queue.empty():
            pass
        else:
            # run merge
            next = self.merge_queue.get()
            self.__merge(next)
            pass
    '''

    def createIndex(self, column_num):
        self.index.create_index(column_num)

    def getIndex(self, column_num):
        return self.index.getIndex(column_num)

    def close(self):
        self.buffer_pool_range.evictAll()
        self.buffer_pool_range.lock = None

    def getName(self):
        return self.name

    # Creates a new PageRange if needed, and appends it to page_ranges
    def newPageRange(self):
        # self.page_ranges.append(PageRange(self.num_columns))
        # self.buffer_pool_range.
        self.curr_page_range = self.curr_page_range + 1

    # Helper function for the translation of RID value to RID components
    def getOffset(self, rid):
        return rid % (PAGE_SIZE // COL_DATA_SIZE)

    # Helper function for the translation of RID value to RID components
    def getPageR(self, rid):
        return rid // ((BASE_CONST + TAIL_CONST) * (PAGE_SIZE // COL_DATA_SIZE))

    # Helper function for the translation of RID value to RID components
    def getPageB(self, rid):
        return (rid // (PAGE_SIZE // COL_DATA_SIZE)) % (BASE_CONST + TAIL_CONST)

    # Helper function for the translation of RID components to RID value
    def getRID(self, pageR, pageB, offset):
        return (pageR * (BASE_CONST + TAIL_CONST) * (PAGE_SIZE // COL_DATA_SIZE)) + (
                    pageB * (PAGE_SIZE // COL_DATA_SIZE)) + offset

    # Helper function to find the value of the next RID before writing to basepages
    def nextBaseRid(self):
        # Calls for calculation of the first two RID components
        # #prerid = self.page_ranges[self.curr_page_range].nextBaseRid()
        prerid = self.buffer_pool_range.nextBaseRid_Pool(self.curr_page_range)
        # Calculates the last RID component and adds it together with the previous for the next base RID
        rid = self.curr_page_range * (BASE_CONST + TAIL_CONST) * (PAGE_SIZE // COL_DATA_SIZE) + prerid
        return rid

    # Helper function to find the value of the next tail RID before writing to tail pages
    def nextTailRid(self, pageR):
        # Calls for calculation of the first two RID components
        # #prerid = self.page_ranges[self.curr_page_range].nextTailRid()
        prerid = self.buffer_pool_range.nextTailRid_Pool(pageR)
        # print("prerid:", prerid)
        # Calculates the last RID component and adds it together with the previous for the next tail RID
        rid = pageR * (BASE_CONST + TAIL_CONST) * (PAGE_SIZE // COL_DATA_SIZE) + prerid
        return rid

    # Helper function unique for this metadata scheme
    def formatCols(self, indir, rid, timestamp, schema, columns):
        format_cols = []
        format_cols.append(indir)
        format_cols.append(rid)
        format_cols.append(timestamp)
        format_cols.append(schema)

        for index in range(self.num_columns):
            format_cols.append(columns[index])

        return format_cols

    # Function to set the RID of a record to the invalid value
    def deleteRecord(self, rid):
        pageR = self.getPageR(rid)
        pageB = self.getPageB(rid)
        offset = self.getOffset(rid)
        # #self.page_ranges[pageR].deleteRecord(pageB, offset)
        self.buffer_pool_range.deleteRecord_Pool(pageR, pageB, offset)

    # Function to check the indirection value of a record before doing a full read
    def checkIndirection(self, rid):
        pageR = self.getPageR(rid)
        pageB = self.getPageB(rid)
        offset = self.getOffset(rid)
        # print(pageR, pageB, offset)
        # #indir = self.page_ranges[pageR].getIndirection(pageB, offset)
        indir = self.buffer_pool_range.getIndirection_Pool(pageR, pageB, offset)
        # print("checked indir", indir)

        if indir == 0:
            return rid
        else:
            return indir

    def readFullRecord(self,rid):
        # Gets the true rid for the most recent version of the data
        trueRID = self.checkIndirection(rid)
        # Does the math to calculate pageR, pageB, and offset for record retrieval
        pageR = self.getPageR(trueRID)
        pageB = self.getPageB(trueRID)
        offset = self.getOffset(trueRID)

        # print("RID:",rid)
        # print("TrueRID:",trueRID)
        # print("Reading: Rid=",rid," pageR=",pageR," pageB=",pageB," offset=",offset)

        # Retrieves record
        # #full_record = self.page_ranges[pageR].readBlock(pageB, offset)
        full_record = self.buffer_pool_range.readBlock_Pool(pageR, pageB, offset)

        return full_record

    def readRecord(self, rid):
        # Gets the true rid for the most recent version of the data
        trueRID = self.checkIndirection(rid)
        # Does the math to calculate pageR, pageB, and offset for record retrieval
        pageR = self.getPageR(trueRID)
        pageB = self.getPageB(trueRID)
        offset = self.getOffset(trueRID)

        # print("RID:",rid)
        # print("TrueRID:",trueRID)
        # print("Reading: Rid=",rid," pageR=",pageR," pageB=",pageB," offset=",offset)

        # Retrieves record
        # #full_record = self.page_ranges[pageR].readBlock(pageB, offset)
        full_record = self.buffer_pool_range.readBlock_Pool(pageR, pageB, offset)
        # print("Full record:", full_record)
        if full_record[RID_COLUMN] == 0:
            return None

        data_record = full_record[len(full_record) - self.num_columns:]

        # print(full_record)

        ret_record = Record(rid, data_record[self.key], data_record)
        return ret_record

    def insertRecord(self, columns):
        # Check for room for base page, if not make more room
        # #if self.page_ranges[self.curr_page_range].hasCapacityBase() == False:
        if self.buffer_pool_range.hasCapacityBase_Pool(self.curr_page_range) == False:
            self.newPageRange()

        indir = 0
        schema_encoding = 0  # '0' * self.num_columns
        cur_Time = 0  # time()
        base_rid = self.nextBaseRid()
        format_columns = self.formatCols(indir, base_rid, cur_Time, schema_encoding, columns)
        # print(format_columns)
        # #self.page_ranges[self.curr_page_range].writeBaseBlock(format_columns)
        self.buffer_pool_range.writeBaseBlock_Pool(self.curr_page_range, format_columns)

        self.index.insertPair(self.key, columns[self.key], base_rid)

        # Redundant to make sure we always have a current working page range
        if self.buffer_pool_range.hasCapacityBase_Pool(self.curr_page_range) == False:
            self.newPageRange()

        return base_rid

    def updateRecord(self, rid, columns):
        # Check for room for tail page, if not make more room
        # #if self.page_ranges[self.curr_page_range].hasCapacityTail() == False:
        # print("update record space check", self.buffer_pool_range.hasCapacityTail_Pool(self.curr_page_range))
        if self.buffer_pool_range.hasCapacityTail_Pool(self.curr_page_range) == False:
            self.newPageRange()

        page_R = self.getPageR(rid)
        page_B = self.getPageB(rid)
        page_offset = self.getOffset(rid)

        # #prev_vers = self.page_ranges[page_R].getIndirection(page_B, page_offset)
        prev_vers = self.buffer_pool_range.getIndirection_Pool(page_R, page_B, page_offset)
        schema_encoding = 0  # '0' * self.num_columns
        currTime = 0  # time()
        tail_rid = self.nextTailRid(page_R)

        prev_record = self.readRecord(rid)
        if prev_record is None:
            return
        prev_columns = prev_record.getColumns()

        # print(prev_columns)
        new_columns = []

        for index in range(self.num_columns):
            if type(columns[index]) == type(None):
                new_columns.append(prev_columns[index])
            else:
                new_columns.append(columns[index])

        # print(new_columns)

        # print("tail rid:", tail_rid)
        format_columns = self.formatCols(prev_vers, tail_rid, currTime, schema_encoding, new_columns)

        # print(format_columns)
        # #self.page_ranges[self.curr_page_range].writeTailBlock(format_columns)
        # val = self.buffer_pool_range.writeTailBlock_Pool(self.curr_page_range, format_columns)
        val = self.buffer_pool_range.writeTailBlock_Pool(page_R, format_columns)
        tail_rid2 = int(val)
        # print("written tail rid:", tail_rid2)

        # #self.page_ranges[page_R].editBlock(page_B, INDIRECTION_COLUMN, page_offset, tail_rid)
        self.buffer_pool_range.editBlock_Pool(page_R, page_B, INDIRECTION_COLUMN, page_offset, tail_rid)
Example #12
0
class Table:
    """
    :param name: string         #Table name
    :param num_columns: int     #Number of Columns: all columns are integer
    :param key: int             #Index of table key in columns
    """
    def __init__(self, name, num_columns, Table_key, path):
        self.path = path
        self.name = name
        self.Table_key = Table_key
        self.num_columns = num_columns
        # self.prange_directory = {}  #'col_num': 'page_range_list'
        self.page_directory = {}  #'RID': 'record obj'
        self.origin_base_page_memory = []  # original unmerged and idk???
        self.after_merge_base_page_memory = {
        }  # the new copy that is being  merged,idk???
        self.buffer = Bufferpool(self)
        self.index = Index(self)
        self.prange_num = 0
        self.free_brid = 0
        self.free_trid = 0
        self.rid_list = []
        self.key_list = set()
        self.rif_trash = []  #?????idk
        self.merge_waiting_set = set()  # storing rid which needs to be merged
        self.merge_times = 0
        self.num_index = 0
        self.sem = threading.RLock()
        self.merge_start()

        # RIDs are shared in one table
        # once a record, take out one RID from the pool
        # MILESTONE1 never put RID back to pool
        # eazy to use BinarySearch in B-tree

        # TODO: remove hard code, and set it in config
        # self.total_RID = 100000000

    # b_page and t_page will have seperate rids
    def merge_start(self):
        thread = threading.Thread(target=self.merge)
        thread.daemon = True
        thread.start()

    def next_free_rid(self, page_pos):
        if page_pos == 0:
            rid = 'b' + str(self.free_brid)
            # if rid >=100000000:
            #     return -1
            self.free_brid = self.free_brid + 1
        if page_pos == 1:
            rid = 't' + str(self.free_trid)
            self.free_trid += 1
        return rid

    """
    Create page ranges for each columns (categories)
    Assign each page range with one base page with nothing in there,
    and write down their location to the page_directory
    For example: A table called UCD, which has 3 columns (categories):
    student name, year, grade. 

    """

    def get_prange_num(self):
        return self.prange_num

    # To Do: build index during looping
    def create(self):
        for i in range(self.num_columns + META_DATA_COL_NUM):
            b_page = Page(0)
            t_page = Page(0)
            prange = Prange(b_page, t_page, self.prange_num)
            # tree = Index(self)
            # self.index.append(tree)
            # self.prange_directory.update({i: [prange]})
            self.buffer.load_prange(prange)
            # ZYW: add a return statement to return the prange?

    # def insert_meta_data(self, prange, record):
    #     prange.b_page[record.page_pos].

    def add_prange(self, column, times):
        for i in range(times):
            b_page = Page(0)
            t_page = Page(0)
            # print('adding prange' + str(self.prange_num))
            prange = Prange(b_page, t_page, self.prange_num)
            # self.prange_directory.get(column).append(prange)
            self.buffer.load_prange(prange)

    def create_record(self, rid, indirect, value, data, first):
        # print('indirect', indirect)
        # if indirect == None:
        # print('empty', rid)
        record = Record(rid, indirect, value, data)
        if first == True:
            self.page_directory.update({rid: record})
            self.rid_list.append(rid)
        return record

    # To Do: inset record to index
    def insert_record(self, *data):
        if data[0] in self.key_list:
            # print(data, 'key existed----')
            # self.sem.release()
            return False
        self.key_list.add(data[0])
        record = None
        first = None
        rid = None
        prange_ = None
        meta_cols = []
        read = False
        # print(data)
        # flag = False
        if len(self.buffer.pool) == 0:
            read = True
        self.sem.acquire()
        for i in range(self.num_columns + META_DATA_COL_NUM):
            if i == 0:
                first = True
            else:
                first = False
            prange_ = self.buffer.get_(i, self.prange_num, 'in')

            if prange_[0].b_page[-1].has_capacity() == True:
                prange_[0].dirty = True
                if i < self.num_columns:
                    if first == True:
                        rid = self.next_free_rid(0)
                        if rid == None:
                            # print('line 174 error: rid cannot be None')
                            self.sem.release()
                            return -1
                        record = self.create_record(rid, rid, data[i], data,
                                                    first)
                        Lock().addLock(LOCK_MUTEX, [record])
                        meta_cols = record.get_meta()
                        # print(meta_cols)
                        # prange = self.prange_directory.get(i)[-1]
                        record.offset = prange_[0].b_page[-1].writeRecord(
                            data[i])
                        record.page_pos = len(prange_[0].b_page) - 1
                        record.prange_pos = self.prange_num
                        self.index.insert(i, data[i], record.rid)
                    else:
                        # prange = self.prange_directory.get(i)[-1]
                        # print(record.rid)
                        prange_[0].b_page[-1].writeRecord(data[i])
                        if self.index.indices[i] is not None:
                            self.index.insert(i, data[i], record.rid)
                else:
                    # print('pos',i,record.prange_pos,record.page_pos,meta_cols[i - self.num_columns])
                    # print(meta_cols[0])
                    # prange = self.prange_directory.get(i)[record.prange_pos]
                    # prange = self.buffer.get_(i, record.prange_pos)
                    prange_[0].b_page[record.page_pos].writeRecord(
                        meta_cols[i - self.num_columns])
            else:
                if self.insert_page_to(i) == -1:
                    # print('i:',i)
                    if first == True:
                        # print('adding prange')
                        # flag = True
                        self.prange_num += 1
                        self.add_prange(i,
                                        self.num_columns + META_DATA_COL_NUM)

                    # self.free_trid = 0
                    if i < self.num_columns:
                        if first == True:
                            rid = self.next_free_rid(0)
                            record = self.create_record(
                                rid, rid, data[i], data, first)
                            Lock().addLock(LOCK_MUTEX, [record])
                            meta_cols = record.get_meta()
                            # prange = self.prange_directory.get(i)[-1]
                            # print(self.prange_num)
                            prange_ = self.buffer.get_(i, self.prange_num,
                                                       'in')
                            # prange_[0].isFull = True
                            # print('228',prange_[0].b_page[0].num_records)
                            record.offset = prange_[0].b_page[-1].writeRecord(
                                data[i])
                            # print(self.buffer.pool[1][0][0].b_page[-1].num_records)
                            # print('done')
                            record.page_pos = len(prange_[0].b_page) - 1
                            record.prange_pos = self.prange_num
                            self.index.insert(i, data[i], record.rid)
                        else:
                            # prange = self.prange_directory.get(i)[-1]
                            prange_ = self.buffer.get_(i, self.prange_num,
                                                       'in')
                            # prange_[0].isFull = True
                            # print('236',prange_[0].b_page[0].num_records)
                            prange_[0].b_page[-1].writeRecord(data[i])
                            if self.index.indices[i] is not None:
                                self.index.insert(i, data[i], record.rid)
                    else:
                        # prange =
                        # self.prange_directory.get(i)[record.prange_pos]
                        prange_ = self.buffer.get_(i, self.prange_num, 'in')
                        # print('242',prange_[0].b_page[0].num_records)
                        # prange_[0].isFull = True
                        prange_[0].b_page[record.page_pos].writeRecord(
                            meta_cols[i - self.num_columns])
                    prange_[0].dirty = True
                    # self.free_brid = 0
                else:
                    # print('i:',i)
                    # self.free_trid = 0
                    prange_[0].dirty = True
                    if i < self.num_columns:
                        if first == True:
                            rid = self.next_free_rid(0)
                            record = self.create_record(
                                rid, rid, data[i], data, first)
                            Lock().addLock(LOCK_MUTEX, [record])
                            meta_cols = record.get_meta()
                            # prange = self.prange_directory.get(i)[-1]
                            # print('252',prange_[0].b_page[-1].num_records)
                            record.offset = prange_[0].b_page[-1].writeRecord(
                                data[i])
                            record.page_pos = len(prange_[0].b_page) - 1
                            record.prange_pos = self.prange_num
                            self.index.insert(i, data[i], record.rid)
                        else:
                            # prange = self.prange_directory.get(i)[-1]
                            prange_[0].b_page[-1].writeRecord(data[i])
                            if self.index.indices[i] is not None:
                                self.index.insert(i, data[i], record.rid)
                    else:
                        # prange =
                        # self.prange_directory.get(i)[record.prange_pos]
                        # print('265',prange_[0].b_page[-1].num_records, 'page_pos', record.page_pos)
                        prange_[0].b_page[record.page_pos].writeRecord(
                            meta_cols[i - self.num_columns])
        # Lock().releaseLock(LOCK_MUTEX, [record])
        self.sem.release()
        return True

    def checkSchema(self, record, data):
        schema = ''
        for i in range(self.num_columns):
            # print(len(data))
            if record.columns[i] == data[i] or data[i] == None:
                schema += '1'
            else:
                schema += '2'
        # print('243. schema', schema)
        return int(schema)

    def addtps(self, key, num):
        base_record = self.page_directory.get(key)
        tail_record = self.page_directory.get(base_record.indirect)
        tail_record.tps = base_record.update_num

    # if key does not exist then return false
    # To Do: update record to index
    def update_record(self, key, brid, *data, delete):
        # print(data)
        if len(self.buffer.buffer_bin) != 0:
            self.merge_start()
        # self.merge_times += 1
        schema = None
        if key == None:
            print('empty key')
        data = list(data)
        self.sem.acquire()
        base_record = self.page_directory.get(brid)
        if Lock().check(LOCK_MUTEX, [base_record]) == False:
            self.sem.release()
            return False
        elif Lock().check(LOCK_MUTEX, [base_record]) == 'pass':
            pass
        else:
            Lock().addLock(LOCK_MUTEX, [base_record])
        base_record.update_num += 1
        rid = self.next_free_rid(1)

        ##print("update type = ", type(base_record))
        # if base_record.indirect == None:
        #     print('got none indirect', key)
        #     return
        if base_record == None:
            print('record doesnt exist')
            # Lock().releaseLock(LOCK_MUTEX, [base_record])
            self.sem.release()
            return False
        # get current prange position
        cur_prange_pos = base_record.prange_pos
        prev_record = self.page_directory.get(base_record.indirect)
        if prev_record.rid == None:
            print('None prev_record')
            # Lock().releaseLock(LOCK_MUTEX, [base_record])
            self.sem.release()
            return False
        if delete == True:
            schema = int('1' * self.num_columns)
        else:
            schema = self.checkSchema(prev_record, data)
            data[0] = key
        cur_record = Record(rid, prev_record.indirect, key, data)
        cur_record.schema = schema
        meta_cols = cur_record.get_meta()
        # construct linked list
        base_record.indirect = cur_record.rid
        base_record.schema = schema
        brecord_meta = base_record.get_meta()

        self.sem.acquire()
        self.buffer.get_(self.num_columns + INDIRECTION_COLUMN,
                         base_record.prange_pos,
                         'up')[0].b_page[base_record.page_pos].updateRecord(
                             base_record.offset,
                             brecord_meta[INDIRECTION_COLUMN])

        self.buffer.get_(self.num_columns + SCHEMA_ENCODING_COLUMN,
                         base_record.prange_pos,
                         'up')[0].b_page[base_record.page_pos].updateRecord(
                             base_record.offset, schema)
        self.sem.release()

        cur_record.indirect = prev_record.rid
        self.page_directory.update({cur_record.rid: cur_record})
        for i in range(self.num_columns + META_DATA_COL_NUM):
            prev_data = None
            if i < self.num_columns:
                prev_data = self.get_data(prev_record.rid, i,
                                          prev_record.prange_pos,
                                          prev_record.page_pos,
                                          prev_record.offset)
                if prev_data == '/' or prev_data == None:
                    if data[i] != None and self.index.indices[i] is not None:
                        self.index.update(i, None, data[i], base_record.rid)
                    prev_data = None
                else:
                    # print(i, prev_data, data[i])
                    if self.index.indices[i] is not None:
                        self.index.update(i, prev_data, data[i],
                                          base_record.rid)
                # print('prev_data', prev_data)
                # self.index.update(i, prev_data, data[i], base_record.rid)
                data_ = data[i]
                # handle the case when the data is empty, we will emerge data
                # from previous record
                if data[i] == None:
                    data_ = prev_data
                    # handle the case when the data is always empty, we use '/' to
                    # represent the final value
                    if delete == True or data_ == '/':
                        data_ = None
                    # print('176',prev_record.rid, prev_record.prange_pos,
                    # prev_record.page_pos, data_)
            prange_ = self.buffer.get_(i, cur_prange_pos, 'up')
            if prange_[0].t_page[-1].has_capacity():
                if i < self.num_columns:
                    # print(i)
                    # prange = self.prange_directory.get(i)[cur_prange_pos]
                    # print(data_)
                    prange_[0].dirty = True
                    cur_record.columns[i] = data_
                    cur_record.offset = prange_[0].t_page[-1].writeRecord(
                        data_)
                    cur_record.page_pos = len(prange_[0].t_page) - 1
                    cur_record.prange_pos = cur_prange_pos
                    # if i == 0:
                    #     self.index.update(i, prev_data, data_, base_record.rid)
                else:
                    # print('writing meta data', i , cur_prange_pos, cur_record.page_pos)
                    # prange = self.prange_directory.get(i)[cur_prange_pos]
                    prange_[0].t_page[cur_record.page_pos].writeRecord(
                        meta_cols[i - self.num_columns])
            else:
                if i < self.num_columns:
                    prange_[0].dirty = True
                    self.update_page_to(prange_)
                    # prange = self.prange_directory.get(i)[cur_prange_pos]
                    cur_record.columns[i] = data_
                    cur_record.offset = prange_[0].t_page[-1].writeRecord(
                        data_)
                    cur_record.page_pos = len(prange_[0].t_page) - 1
                    cur_record.prange_pos = cur_prange_pos
                    # if i == 0:
                    #     self.index.update(i, prev_data, data_, base_record.rid)
                else:
                    self.update_page_to(prange_)
                    # print('writing meta data')
                    # prange = self.prange_directory.get(i)[cur_prange_pos]
                    prange_[0].t_page[cur_record.page_pos].writeRecord(
                        meta_cols[i - self.num_columns])
        self.merge_waiting_set.add(base_record.rid)
        self.addtps(brid, base_record.update_num)
        if delete == True:
            # self.rid_list.remove()
            # Lock().releaseLock(LOCK_MUTEX, [base_record])
            self.sem.release()
            return cur_record
        # Lock().releaseLock(LOCK_MUTEX, [base_record])
        self.sem.release()
        return True

    def insert_page_to(self, ith_column):
        # prange = self.prange_directory.get(ith_column)[-1]
        prange = self.buffer.get_(ith_column, self.prange_num, 'in')
        if prange[0].append_page(0) == -1:
            return -1

    def get_rid_list(self):
        return self.rid_list

    def update_page_to(self, prange):
        # prange = self.prange_directory.get(ith_column)[ith_prange]
        # prange = self.buffer.get_(ith_column, ith_prange, 'up')
        prange[0].append_page(1)

    def get_data(self, rid, ith_col, prange_pos, page_pos, offset):
        if rid[0] == 'b':
            # page =
            # self.prange_directory.get(ith_col)[prange_pos].b_page[page_pos]
            page = self.buffer.get_(ith_col, prange_pos,
                                    'up')[0].b_page[page_pos]
            page.pin = True
            return page.readRecord(offset)
        page = self.buffer.get_(ith_col, prange_pos, 'up')[0].t_page[page_pos]
        page.pin = True
        # page = self.prange_directory.get(ith_col)[prange_pos].t_page[page_pos]
        # print(page.readRecord(offset))
        return page.readRecord(offset)

    def read_record(self, key):
        record = self.page_directory.get(key)
        if record == None:
            print('The key doesnt exist or empty key')
        if Lock().check(LOCK_SHARED, [record]) == False:
            return False
        else:
            Lock().addLock(LOCK_SHARED, [record])
        rid = record.indirect
        # print(rid)
        record = self.page_directory.get(rid)
        offset = record.offset
        page_pos = record.page_pos
        prange_pos = record.prange_pos
        # print('record info', page_pos, prange_pos, offset)
        res = []
        self.sem.acquire()
        for i in range(self.num_columns + META_DATA_COL_NUM):
            if rid[0] == 'b':
                # page =
                # self.prange_directory.get(i)[prange_pos].
                page = self.buffer.get_(i, prange_pos,
                                        're')[0].b_page[page_pos]
                data = page.readRecord(offset)
                # print(data)
                res.append(data)
            else:
                # print(offset)
                # page =
                # self.prange_directory.get(i)[prange_pos].t_page[page_pos]
                page = self.buffer.get_(i, prange_pos,
                                        're')[0].t_page[page_pos]
                data = page.readRecord(offset)
                # print(data)
                res.append(data)
        # Lock().releaseLock(LOCK_SHARED, [record])
        self.sem.release()
        return res

    def merge(self):
        # print('merge starting')
        #find lastest tail page
        # print(self.merge_waiting_set)
        while len(self.merge_waiting_set) != 0:
            rid = self.merge_waiting_set.pop()
            cpy_base_record = self.page_directory.get(rid)
            prange_list = self.buffer.findTrash(cpy_base_record.prange_pos)
            # print(cpy_base_record.prange_pos,'\n')
            if prange_list != -1:
                #print("type = ", type(base_record))
                tail_record = self.page_directory.get(cpy_base_record.indirect)
                #merge data
                cpy_base_record.columns_ = tail_record.columns_
                cpy_base_record.column = tail_record.column

                for i in range(1, self.num_columns):
                    cpy_page = copy.copy(
                        prange_list[i][0].b_page[cpy_base_record.page_pos])
                    cpy_page.updateRecord(cpy_base_record.offset,
                                          cpy_base_record.columns_[i])
                    prange_list[i][0].b_page[
                        cpy_base_record.page_pos] = cpy_page
                #update_tps
                cpy_base_record.tps = tail_record.tps
                self.page_directory.update({rid: cpy_base_record})
                self.merge_times += 1
                print('done')