Beispiel #1
0
 def __init__(self, tableName, load=False):
     self.tableName = tableName
     self.memtable = Memtable(tableName)
     if load:
         self._load_meta()
         self.recover()
     else:
         self.entries = 0
         self.schema = {}
     self.ystore = Ystore(tableName)
     self.yindex = Yindex(tableName)
     self.memindex = Memindex(tableName, self.yindex)
     self._write_meta()
Beispiel #2
0
class MemtableDictComparison(RuleBasedStateMachine):
    def __init__(self):
        super().__init__()
        _, self.tempf = tempfile.mkstemp()
        self.memtable = Memtable()
        self.model: Dict[str, Value] = dict()

    keys = Bundle("keys")
    values = Bundle("values")

    @rule(target=keys, key=st.text())
    def add_key(self, key: str) -> str:
        return key

    @rule(target=values, value=st.text())
    def add_value(self, value: str) -> str:
        return value

    @rule(key=keys, value=values)
    def set(self, key: str, value: str):
        self.model[key] = value
        self.memtable.set(key, value)

    @rule(key=keys)
    def unset(self, key: str):
        self.model[key] = TOMBSTONE
        self.memtable.unset(key)

    @rule(key=keys)
    def values_agree(self, key: str):
        assert self.memtable.get(key) == self.model.get(key, None)

    @rule()
    def memtable_is_ordered(self):
        assert list(self.memtable.entries()) == sorted(self.memtable.entries())

    # TODO: Current bytes bug (issue #31) breaks this rule
    # @rule()
    # def approximate_bytes_reflects_what_is_in_model(self):
    #     def add_up_bytes():
    #         bytes_ = 0
    #         for (key, value) in self.model.items():
    #             if value is TOMBSTONE:
    #                 bytes_ += len(key)
    #             else:
    #                 bytes_ += len(key) + len(value) #type: ignore [arg-type]
    #         return bytes_
    #     assert self.memtable.approximate_bytes() == add_up_bytes()

    def teardown(self):
        os.remove(self.tempf)
Beispiel #3
0
 def resume(cls, log_path):
     memtable = Memtable()
     # recover the memtable if necessary
     if path.exists(log_path):
         for key, value in kv_iter(log_path):
             if value == TOMBSTONE:
                 memtable.unset(key)
             else:
                 memtable.set(key, value)
         writer = KVWriter(log_path, append=True)
     else:
         writer = KVWriter(log_path)
     return cls(writer), memtable
Beispiel #4
0
 def _flush_memory(self):
     self.segments.flush(self.memtable)
     self.memtable = Memtable()
     self.commitlog.purge()
Beispiel #5
0
class Table():
    def __init__(self, tableName, load=False):
        self.tableName = tableName
        self.memtable = Memtable(tableName)
        if load:
            self._load_meta()
            self.recover()
        else:
            self.entries = 0
            self.schema = {}
        self.ystore = Ystore(tableName)
        self.yindex = Yindex(tableName)
        self.memindex = Memindex(tableName, self.yindex)
        self._write_meta()

    def destroy(self):
        self.yindex.destroy()
        self.ystore.destroy()
        if os.path.exists("./WAL/{}.txt".format(self.tableName)):
            os.remove("./WAL/{}.txt".format(self.tableName))
        if os.path.exists("./meta/{}.txt".format(self.tableName)):
            os.remove("./meta/{}.txt".format(self.tableName))
        return True

    def close(self):
        self.spill()

    def putRow(self, rowKey, columns):
        self._write_WAL(rowKey, columns)
        memFull = self.memtable.put(rowKey, columns)
        self.entries += 1
        if memFull:
            self.spill()
        self.updateSchema(columns)
        return True

    def recover(self):
        if os.path.exists("./WAL/{}.txt".format(self.tableName)):
            with open("./WAL/{}.txt".format(self.tableName), 'r') as f:
                recovery_count = 0
                commands = []
                for line in f:
                    commands.append(json.loads(line))
                    recovery_count += 1
                for c in commands:
                    self.putRow(c['rowKey'], c['columns'])
            if recovery_count > 0:
                print("Recovered {} entries from WAL".format(recovery_count))

    def getRow(self, rowKey):
        memFind = self.memtable.get(rowKey)
        if memFind:
            temp = {memFind['key']: memFind['val']}
            return temp
        data = self.memindex.get(rowKey)
        # data = self.yindex.get(rowKey)
        if not data:
            print("Couldn't find row with key: {}".format(rowKey))
            return ''
        return self.ystore.get(data['offset'], data['size'])

    def getRows(self, startRow, endRow):
        memFind = self.memtable.getRange(startRow, endRow)
        # storeFind = self.yindex.getRange(startRow, endRow)
        storeFind = self.memindex.getRange(startRow, endRow)
        memFind_keys = set()
        only_store = []
        mem_temp = []
        for i in memFind:
            memFind_keys.add(i['key'])
            mem_temp.append({i['key']: i['val']})
        rows = mem_temp
        for s in storeFind:
            if not s['key'] in memFind_keys:
                only_store.append((s['offset'], s['size']))
        for t in only_store:
            rows.append(self.ystore.get(t[0], t[1]))
        return rows

    def getColumnByRow(self, rowKey, fam, quals):
        row = self.getRow(rowKey)
        row = row[rowKey]
        data = {fam: {}}
        if not fam in row:
            print("{} not present in row".format(fam))
            return ''
        for q in quals:
            if not q in row[fam]:
                print("{} not present in row".format(q))
                return ''
            data[fam][q] = row[fam][q]
        return data

    def setMemTableLimit(self, newLimit):
        return self.memtable.setLimit(newLimit)

    def updateSchema(self, columns):
        changes = False
        for fam in columns:
            if fam in self.schema:
                for col in columns[fam]:
                    if not col in self.schema[fam]:
                        changes = True
                        self.schema[fam].append(col)
            else:
                self.schema[fam] = []
                for col in columns[fam]:
                    changes = True
                    self.schema[fam].append(col)
        if changes:
            self._write_meta()

    def open(self):
        self.memtable = Memtable(self.tableName)

    def close(self):
        self.spill()
        del self.memtable

    def printSchema(self):
        print("Schema for {}:".format(self.tableName))
        start = '['
        for fam in self.schema:
            s = '{' + "{}:\t".format(fam)
            for col in self.schema[fam]:
                s = "{}{}, ".format(s, col)
            start += s + '}, '
        start += ']'
        print(start)

    def spill(self):
        print("Spilling to disk")
        memTableContents = self.memtable.flush()
        idx_updates = self.ystore.store(memTableContents)
        self.yindex.update(idx_updates)
        self.memindex.populate()
        self._clear_WAL()

    def _load_meta(self):
        if os.path.exists('./meta/{}.txt'.format(self.tableName)):
            with open('./meta/{}.txt'.format(self.tableName), 'r') as f:
                self.entries = int(f.readline())
                self.schema = json.loads(f.readline())
        else:
            self.schema = {}
            self.entries = 0

    def _write_meta(self):
        if not os.path.isdir("./meta"):
            os.makedirs("./meta")
        with open('./meta/{}.txt'.format(self.tableName), 'w') as f:
            f.write(str(self.entries) + "\n")
            f.write(json.dumps(self.schema))

    def _write_WAL(self, rowKey, columns):
        if not os.path.isdir("./WAL"):
            os.makedirs("./WAL")
        with open("./WAL/{}.txt".format(self.tableName), 'a') as f:
            log = {'rowKey': rowKey, 'columns': columns}
            f.write(json.dumps(log) + "\n")

    def _clear_WAL(self):
        if os.path.exists("./WAL/{}.txt".format(self.tableName)):
            open('./WAL/{}.txt'.format(self.tableName), 'w').close()
Beispiel #6
0
 def open(self):
     self.memtable = Memtable(self.tableName)
Beispiel #7
0
 def __init__(self):
     super().__init__()
     _, self.tempf = tempfile.mkstemp()
     self.memtable = Memtable()
     self.model: Dict[str, Value] = dict()