Example #1
0
    def create_table(self, name, num_columns, key):
        table = Table(name, num_columns, key)
        BufferPool.init_latest_tail(name)
        BufferPool.init_tps(name)

        # create a new table in database
        self.tables.append(table)
        # table.mergeProcessController()
        return table
Example #2
0
    def select(self, key, column, query_columns):
        # Get the indirection id given choice of key in specific column
        page_pointer = self.table.index.locate(column, key)
        records = []
        for i in range(len(page_pointer)):
            # collect base meta datas of each record
            args = [
                self.table.name, "Base", SCHEMA_ENCODING_COLUMN,
                *page_pointer[i]
            ]
            base_schema = int.from_bytes(BufferPool.get_record(*args),
                                         byteorder='big')
            args = [
                self.table.name, "Base", INDIRECTION_COLUMN, *page_pointer[i]
            ]
            base_indirection = BufferPool.get_record(*args)

            # Total record specified by key and columns
            res = []
            for query_col, val in enumerate(query_columns):
                # column is not selected
                if val != 1:
                    res.append(None)
                    continue
                if (base_schema & (1 << query_col)) >> query_col == 1:
                    res.append(
                        self.table.get_tail(
                            int.from_bytes(base_indirection, byteorder='big'),
                            query_col, page_pointer[i][0]))
                else:
                    args = [
                        self.table.name, "Base", query_col + NUM_METAS,
                        *page_pointer[i]
                    ]
                    res.append(
                        int.from_bytes(BufferPool.get_record(*args),
                                       byteorder="big"))

            # construct the record with rid, primary key, columns
            args = [self.table.name, "Base", RID_COLUMN, *page_pointer[i]]
            rid = BufferPool.get_record(*args)
            args = [
                self.table.name, "Base", NUM_METAS + column, *page_pointer[i]
            ]
            # or non_prim _key
            prim_key = BufferPool.get_record(*args)
            record = Record(rid, prim_key, res)
            records.append(record)
        return records
Example #3
0
    def tail_page_write(self, data, range_index):
        for i, value in enumerate(data):
            page_id = self.get_latest_tail(i, range_index)
            args = [self.name, "Tail", i, range_index, page_id]
            page = BufferPool.get_page(*args)

            # Verify Page is not full
            if not page.has_capacity():
                args[-1] += 1
                BufferPool.set_latest_tail(self.name, i, range_index,
                                           args[-1])  # Update Latest Tail
                page = BufferPool.get_page(*args)

            page.dirty = 1
            page.write(value)
Example #4
0
    def open(self, path):
        # print("BufferPool Path @ {}".format(path))
        if not os.path.exists(path):
            os.makedirs(path)

        BufferPool.initial_path(path)

        name2idx = {}
        # Restore Existed Table on Disk
        tables = [
            name for name in os.listdir(path)
            if os.path.isdir(os.path.join(path, name))
        ]
        for t_name in tables:
            t_path = os.path.join(path, t_name, 'table.pkl')
            old_table = read_table(t_path)
            name2idx[t_name] = len(self.tables)
            self.tables.append(old_table)

        # Restore Page Directory to BufferPool
        fname = os.path.join(path, "page_directory.txt")
        # Create page_directory.txt if not exist
        if not os.path.exists(fname):
            f = open(fname, "w+")
            f.close()
        f = open(fname, "r")
        lines = f.readlines()
        for line in lines:
            t_name, base_tail, column_id, page_range_id, page_id = line.rstrip(
                '\n').split(',')
            uid = (t_name, base_tail, int(column_id), int(page_range_id),
                   int(page_id))
            BufferPool.add_page(uid)
        f.close()

        # Restore tps to BufferPool
        fname = os.path.join(path, "tps.pkl")
        # Create page_directory.txt if not exist
        if not os.path.exists(fname):
            f = open(fname, "w+")
            f.close()
        else:
            f = open(fname, "rb")
            old_tps = pickle.load(f)
            f.close()
            BufferPool.copy_tps(old_tps)

        # Restore latest_tail to BufferPool
        fname = os.path.join(path, "latest_tail.pkl")
        # Create page_directory.txt if not exist
        if not os.path.exists(fname):
            f = open(fname, "w+")
            f.close()
        else:
            f = open(fname, "rb")
            latest_tail = pickle.load(f)
            f.close()
            BufferPool.copy_latest_tail(latest_tail)
Example #5
0
 def get_tail(self, tid, column, range_index):
     #tid_str = str(tid.decode()).split('t')[1]
     #    tid = int(tid_str)
     # return int.from_bytes(self.page_directory["Tail"][column+NUM_METAS][range_index][tid//MAX_RECORDS].get(tid%MAX_RECORDS),byteorder='big')
     args = [
         self.name, "Tail", column + NUM_METAS, range_index,
         tid // MAX_RECORDS, tid % MAX_RECORDS
     ]
     return int.from_bytes(BufferPool.get_record(*args), byteorder='big')
Example #6
0
    def close(self):
        s_time = time.time()
        BufferPool.close()
        # print("Closing BufferPool took: {}".format(time.time() - s_time))

        s_time = time.time()
        # Write Table Config file
        for table in self.tables:
            t_name = table.name
            # os.kill(table.merge_pid, signal.SIGSTOP)
            table.merge_pid = None
            t_path = os.path.join(BufferPool.path, t_name, "table.pkl")
            write_table(t_path, table)
        # print("Updating table.txt: {}".format(time.time() - s_time))

        s_time = time.time()
        # Write Page Directory Config file
        all_uids = BufferPool.page_directories.keys()
        f = open(os.path.join(BufferPool.path, "page_directory.txt"), "w")
        for uid in all_uids:
            t_name, base_tail, column_id, page_range_id, page_id = uid
            my_list = [
                t_name, base_tail,
                str(column_id),
                str(page_range_id),
                str(page_id)
            ]
            line = ",".join(my_list) + "\n"
            f.write(line)
        f.close()
        # print("Updating page_directory.txt: {}".format(time.time() - s_time))

        s_time = time.time()
        # Write Tps Config file
        f = open(os.path.join(BufferPool.path, "tps.pkl"), "wb")
        pickle.dump(BufferPool.tps, f)
        f.close()
        # print("Updating tps.pkl: {}".format(time.time() - s_time))

        s_time = time.time()
        # Write latest_tail Config file
        f = open(os.path.join(BufferPool.path, "latest_tail.pkl"), "wb")
        pickle.dump(BufferPool.latest_tail, f)
        f.close()
Example #7
0
 def create_index(self, column_number):
     tree = OOBTree()
     self.indices[column_number] = tree
     # Look through the specific column-based columns 
     for i in range(self.table.num_records):
         # Compute Base page pointers 
         range_indice = i// (MAX_RECORDS * PAGE_RANGE)
         range_remainder = i % (MAX_RECORDS * PAGE_RANGE)
         page_pointer = [range_indice, range_remainder//MAX_RECORDS, range_remainder%MAX_RECORDS]
         # Find Schema encoding to find the lastest column value of this record 
         args = [self.table.name, "Base", SCHEMA_ENCODING_COLUMN, *page_pointer ]
         base_schema = int.from_bytes(BufferPool.get_record(*args), byteorder='big')
         # Find Indirection 
         args = [self.table.name, "Base", INDIRECTION_COLUMN, *page_pointer]
         base_indirection = BufferPool.get_record(*args)
         # Find column value 
         if (base_schema & (1<<column_number))>>column_number == 1:
             key = self.table.get_tail(int.from_bytes(base_indirection,byteorder = 'big'),column_number, page_pointer[0])
         else:
             args = [self.table.name, "Base", column_number + NUM_METAS, *page_pointer]
             key = (int.from_bytes(BufferPool.get_record(*args), byteorder="big"))
         self.update_index(key, page_pointer, column_number)
Example #8
0
    def sum(self, start_range, end_range, aggregate_column_index):
        values = 0
        # locate all keys in index
        locations = self.table.index.locate_range(start_range, end_range,
                                                  self.table.key)
        # Aggregating columns specified
        for i in range(len(locations)):
            page_pointer = locations[i]
            # collect base meta datas of this record
            args = [
                self.table.name, "Base", SCHEMA_ENCODING_COLUMN,
                *page_pointer[0]
            ]
            base_schema = int.from_bytes(BufferPool.get_record(*args),
                                         byteorder='big')
            args = [
                self.table.name, "Base", INDIRECTION_COLUMN, *page_pointer[0]
            ]
            base_indirection = BufferPool.get_record(*args)

            if (base_schema &
                (1 << aggregate_column_index)) >> aggregate_column_index == 1:
                temp = self.table.get_tail(
                    int.from_bytes(base_indirection, byteorder='big'),
                    aggregate_column_index, locations[i][0][0])
                if (temp == DELETED):  # might be deleted
                    continue
                values += temp
            else:
                args = [
                    self.table.name, "Base",
                    aggregate_column_index + NUM_METAS, *page_pointer[0]
                ]
                temp = int.from_bytes(BufferPool.get_record(*args),
                                      byteorder="big")
                if (temp == DELETED):  # might be deleted
                    continue
                values += temp
        return values
Example #9
0
    def __merge(self):
        keys, p_indices = BufferPool.get_table_tails(self.name)
        for (col_index, rg_index), last_p_index in zip(keys, p_indices):
            if col_index < NUM_METAS:
                continue

            args = [self.name, 'Tail', col_index, rg_index, last_p_index]
            last_page = BufferPool.get_page(*args)

            old_tps = BufferPool.get_tps(self.name, col_index, rg_index)
            new_tps = last_p_index * MAX_RECORDS + last_page.num_records

            page_range = BufferPool.get_base_page_range(
                self.name, col_index, rg_index)
            page_range_copy = copy.deepcopy(page_range)

            merged_record = {}
            for uid in page_range_copy.keys():
                t_name, base_tail, col_id, range_id, page_id = uid
                for rec_id in range(MAX_RECORDS):
                    # merged_record contains all the records within base page range
                    merged_record[(t_name, base_tail, col_id, range_id,
                                   page_id, rec_id)] = 0  # Init

            max_merged_count = len(list(self.merged_record.keys()))
            early_stopping = 0
            start_tail_p_index = (new_tps - 1) // MAX_RECORDS
            end_tail_p_index = old_tps // MAX_RECORDS
            # print("Merging Column {} Page Range {}".format(col_index, rg_index))
            # print("New Tps {} Old Tps {} Start Tail Page  {} End Tail Page {}".format(new_tps, old_tps, start_tail_p_index, end_tail_p_index))

            for rev_page in reversed(
                    range(end_tail_p_index, start_tail_p_index + 1)
            ):  # Rly need to double check, so easily to messed it up
                args_rid = [self.name, 'Tail', BASE_RID, rg_index, rev_page]
                args_data = [self.name, 'Tail', col_index, rg_index, rev_page]

                for rev_rec in reversed(range(0, MAX_RECORDS)):
                    rid = int.from_bytes(
                        BufferPool.get_page(*args_rid).get(rev_rec),
                        byteorder='big')
                    base_page, base_rec = rid % (
                        MAX_RECORDS * PAGE_RANGE) // MAX_RECORDS, rid % (
                            MAX_RECORDS * PAGE_RANGE) % MAX_RECORDS
                    uid = (self.name, "Base", col_index, rg_index, base_page)
                    uid_w_record = (self.name, "Base", col_index, rg_index,
                                    base_page, base_rec)

                    if merged_record[uid_w_record] == 0:
                        update_val = int.from_bytes(
                            BufferPool.get_page(*args_data).get(rev_rec),
                            byteorder='big')
                        if update_val != MAXINT:
                            page_range_copy[uid].update(base_rec, update_val)
                            # Also reset schema encoding to 0
                            args_schema = [
                                self.name, "Base", SCHEMA_ENCODING_COLUMN,
                                rg_index, base_page
                            ]
                            old_encoding = int.from_bytes(BufferPool.get_page(
                                *args_schema).get(base_rec),
                                                          byteorder="big")
                            old_encoding = bin(old_encoding)[2:].zfill(
                                self.num_columns)
                            new_encoding = old_encoding[:self.num_columns - (
                                col_index - NUM_METAS
                            ) - 1] + "0" + old_encoding[self.num_columns -
                                                        (col_index -
                                                         NUM_METAS):]
                            new_encoding = int(new_encoding,
                                               2)  # Convert to int
                            BufferPool.page_directories[tuple(
                                args_schema)].update(base_rec, new_encoding)
                        merged_record[uid_w_record] = 1
                        early_stopping += 1

                    if early_stopping == max_merged_count:
                        print("Early Stopped")
                        break

                if early_stopping == max_merged_count:
                    break

            # Base Page Range updates
            BufferPool.update_base_page_range(page_range_copy)
            # TPS updates
            BufferPool.set_tps(self.name, col_index, rg_index, new_tps)
            self.merged_record = {}
Example #10
0
    def base_page_write(self, data):
        for i, value in enumerate(data):
            range_index = (self.num_records // MAX_RECORDS) // PAGE_RANGE
            page_index = (self.num_records // MAX_RECORDS) % PAGE_RANGE
            args = [self.name, "Base", i, range_index, page_index]
            # latest base page
            page = BufferPool.get_page(*args)
            # page_range = self.page_directory["Base"][i][-1]
            # page = page_range.page_range[page_range.curr_page]

            # check if page range currently at the end of the page
            if page_index < PAGE_RANGE:
                # Edge Case
                if page_index == 0:
                    t_ages = [self.name, "Tail", i, range_index, page_index]
                    BufferPool.add_page(tuple(t_ages))  # Create new Tail Page
                    BufferPool.set_tps(self.name, i, range_index)
                    BufferPool.set_latest_tail(self.name, t_ages[2], t_ages[3],
                                               t_ages[4])

                # Page range not at the end. Verify if Page is full
                if not page.has_capacity():
                    # need a new page allocation
                    args[-1] += 1  # increment page index
                    page = BufferPool.get_page(*args)

                    # self.page_directory["Base"][i][-1].write()
                    # page = self.page_directory["Base"][i][-1].get()
            else:
                # Page is full, need a new page range and new page
                args[-2] += 1  # Increment Page Range
                args[-1] = 0  # Reset Page Index to 0
                page = BufferPool.get_page(*args)  # Create New Base Page Range
                args[1] = "Tail"
                BufferPool.add_page(tuple(args))  # Create new Tail Page
                self.add_latest_tail(args[2], args[3], args[4])

            page.dirty = 1
            page.write(value)
Example #11
0
 def get_latest_tail(self, column_id, page_range_id):
     return BufferPool.get_latest_tail(self.name, column_id, page_range_id)
Example #12
0
    def delete(self, key):
        #page_pointer = self.table.index.locate(self.table.key,key)
        null_value = []

        #page_range, page_index, record_index = page_pointer[0],page_pointer[1], page_pointer[2]
        page_pointer = self.table.index.locate(self.table.key, key)
        for i in range(self.table.num_columns):
            null_value.append(DELETED)
            self.table.mg_rec_update(NUM_METAS + i, *page_pointer[0])

        update_range_index, update_record_page_index, update_record_index = page_pointer[
            0][0], page_pointer[0][1], page_pointer[0][2]

        args = [self.table.name, "Base", INDIRECTION_COLUMN, *page_pointer[0]]
        base_indirection_id = BufferPool.get_record(*args)
        args = [self.table.name, "Base", RID_COLUMN, *page_pointer[0]]
        base_rid = BufferPool.get_record(*args)
        base_id = int.from_bytes(base_rid, byteorder='big')

        tmp_indice = self.table.get_latest_tail(INDIRECTION_COLUMN,
                                                update_range_index)
        args = [
            self.table.name, "Tail", INDIRECTION_COLUMN, update_range_index,
            tmp_indice
        ]
        page_records = BufferPool.get_page(*args).num_records
        total_records = page_records + tmp_indice * MAX_RECORDS
        next_tid = total_records
        #next_tid = int.from_bytes(('t'+ str(total_records)).encode(), byteorder = "big")

        # the record is firstly updated
        if (int.from_bytes(base_indirection_id, byteorder='big') == MAXINT):
            # compute new tail record indirection :  the indirection of tail record point backward to base pages
            args = [self.table.name, "Base", RID_COLUMN, *page_pointer]
            next_tail_indirection = BufferPool.get_record(*args)  # in bytes
            next_tail_indirection = int.from_bytes(next_tail_indirection,
                                                   byteorder='big')
        else:
            next_tail_indirection = int.from_bytes(base_indirection_id,
                                                   byteorder='big')

        args = [
            self.table.name, "Base", SCHEMA_ENCODING_COLUMN, *page_pointer[0]
        ]
        encoding_base = BufferPool.get_record(*args)  # in bytes
        old_encoding = int.from_bytes(encoding_base, byteorder="big")
        new_encoding = int('1' * self.table.num_columns, 2)
        schema_encoding = new_encoding
        starttime = datetime_to_int(datetime.datetime.now())
        lastupdatetime = 0
        updatetime = 0
        # update new tail record
        meta_data = [
            next_tail_indirection, next_tid, schema_encoding, base_id,
            starttime, lastupdatetime, updatetime
        ]
        meta_data.extend(null_value)
        tail_data = meta_data
        self.table.tail_page_write(tail_data, update_range_index)

        # overwrite base page with new metadata
        args = [
            self.table.name, "Base", INDIRECTION_COLUMN, page_pointer[0][0],
            page_pointer[0][1]
        ]
        page = BufferPool.get_page(*args)
        page.update(update_record_index, next_tid)

        args = [
            self.table.name, "Base", SCHEMA_ENCODING_COLUMN,
            page_pointer[0][0], page_pointer[0][1]
        ]
        page = BufferPool.get_page(*args)
        page.update(update_record_index, schema_encoding)
        self.table.num_updates += 1
        self.table.mergeThreadController()
Example #13
0
    def update(self, key, *columns):
        # get the indirection in base pages given specified key\
        page_pointer = self.table.index.locate(self.table.key, key)
        update_range_index, update_record_page_index, update_record_index = page_pointer[
            0][0], page_pointer[0][1], page_pointer[0][2]
        # if primary key in index is also updated, then insert new entries into primary key index
        if (columns[self.table.key] != None):
            self.table.index.update_index(columns[self.table.key],
                                          page_pointer[0], self.table.key)
        args = [self.table.name, "Base", INDIRECTION_COLUMN, *page_pointer[0]]
        base_indirection_id = BufferPool.get_record(*args)
        args = [self.table.name, "Base", RID_COLUMN, *page_pointer[0]]
        base_rid = BufferPool.get_record(*args)
        base_id = int.from_bytes(base_rid, byteorder='big')

        for query_col, val in enumerate(columns):
            if val == None:
                continue
            else:
                # self.table.page_directory["Base"][NUM_METAS+query_col][update_range_index].Hash_insert(int.from_bytes(base_rid,byteorder='big'))
                # compute new tail record TID
                self.table.mg_rec_update(NUM_METAS + query_col,
                                         *page_pointer[0])
                tmp_indice = self.table.get_latest_tail(
                    INDIRECTION_COLUMN, update_range_index)
                args = [
                    self.table.name, "Tail", INDIRECTION_COLUMN,
                    update_range_index, tmp_indice
                ]
                page_records = BufferPool.get_page(*args).num_records
                total_records = page_records + tmp_indice * MAX_RECORDS
                next_tid = total_records
                #next_tid = int.from_bytes(('t'+ str(total_records)).encode(), byteorder = "big")
                # the record is firstly updated
                if (int.from_bytes(base_indirection_id,
                                   byteorder='big') == MAXINT):
                    # compute new tail record indirection :  the indirection of tail record point backward to base pages
                    args = [
                        self.table.name, "Base", RID_COLUMN, *page_pointer[0]
                    ]
                    next_tail_indirection = BufferPool.get_record(
                        *args)  # in bytes
                    next_tail_indirection = int.from_bytes(
                        next_tail_indirection, byteorder='big')
                    # compute tail columns : e.g. [NONE,NONE,updated_value,NONE]
                    next_tail_columns = []
                    next_tail_columns = [
                        MAXINT for i in range(0, len(columns))
                    ]
                    next_tail_columns[query_col] = val
                # the record has been updated
                else:
                    # compute new tail record indirection : the indirection of new tail record point backward to last tail record for this key
                    next_tail_indirection = int.from_bytes(base_indirection_id,
                                                           byteorder='big')
                    # compute tail columns : first copy the columns of the last tail record and update the new specified attribute
                    base_indirection = int.from_bytes(base_indirection_id,
                                                      byteorder='big')
                    next_tail_columns = self.table.get_tail_columns(
                        base_indirection, update_range_index)
                    next_tail_columns[query_col] = val

                args = [
                    self.table.name, "Base", SCHEMA_ENCODING_COLUMN,
                    *page_pointer[0]
                ]
                encoding_base = BufferPool.get_record(*args)
                old_encoding = int.from_bytes(encoding_base, byteorder="big")
                new_encoding = old_encoding | (1 << query_col)
                schema_encoding = new_encoding
                starttime = datetime_to_int(datetime.datetime.now())
                lastupdatetime = 0
                updatetime = 0
                # update new tail record
                meta_data = [
                    next_tail_indirection, next_tid, schema_encoding, base_id,
                    starttime, lastupdatetime, updatetime
                ]
                meta_data.extend(next_tail_columns)
                tail_data = meta_data
                self.table.tail_page_write(tail_data, update_range_index)

                # overwrite base page with new metadata
                args = [
                    self.table.name, "Base", INDIRECTION_COLUMN,
                    page_pointer[0][0], page_pointer[0][1]
                ]
                page = BufferPool.get_page(*args)
                page.update(update_record_index, next_tid)

                args = [
                    self.table.name, "Base", SCHEMA_ENCODING_COLUMN,
                    page_pointer[0][0], page_pointer[0][1]
                ]
                page = BufferPool.get_page(*args)
                page.update(update_record_index, schema_encoding)

                self.table.num_updates += 1
        #self.table.event.set()
        self.table.mergeThreadController()