def next(self): if self.on_field is None: # so, we iterate using hash-index if self.do_next_page: curr_page = Ipage(page_offset=self.page_offsets.pop(), filename=self.filename) self.curr_items = curr_page.items() self.do_next_page = False item = self.curr_items[self.curr_iter] self.curr_iter += 1 if self.curr_iter == len(self.curr_items): self.do_next_page = True self.curr_iter = 0 self.iter += 1 attrs = self.type(to_parse=item).attrs return tuple(attrs[k] for k in self.type.__attrs__[:]) else: # so, we iterate using b_tree index on field "on_field" if self.do_next_set: self.tree_cursor.next() self.cur_set = self.tree_cursor.read_value().copy() self.do_next_set = False res = self.cur_set.pop() if len(self.cur_set) == 0: self.do_next_set = True toks = res.split(',') f = open(toks[0], 'r') f.seek(int(toks[1])) res = f.read(int(toks[2])) f.close() ent = self.type(to_parse=res) return tuple(ent.attrs[k] for k in self.type.__attrs__[:])
def b_index(self): visited = set() for p in self.pp: if p not in visited: visited.add(p) page = Ipage(page_offset=p, filename=self.filename) for attr, key_size in zip(self.index_attrs, self.key_sizes): tree = self.trees[attr] page.store_to_tree(tree, self.type, attr, self.filename) else: continue visited.clear()
def next(self): if self.on_cursor is None: if self.ordered_on is None: # so, we iterate using hash-index if self.do_next_page: curr_page = Ipage(page_offset=self.page_offsets.pop(), filename=self.filename) self.curr_items = curr_page.items() self.do_next_page = False item = self.curr_items[self.curr_iter] self.curr_iter += 1 if self.curr_iter == len(self.curr_items): self.do_next_page = True self.curr_iter = 0 self.iter += 1 res = {} attrs = self.type(to_parse=item).attrs for field in self.fields: res[field] = attrs[field] return tuple(res[k] for k in self.fields) else: # so, we iterate using b_tree index on field "on_field" if self.do_next_set: self.tree_cursor.next() self.cur_set = self.tree_cursor.read_value().copy() self.do_next_set = False res = self.cur_set.pop() if len(self.cur_set) == 0: self.do_next_set = True toks = res.split(',') f = open(toks[0], 'r') f.seek(int(toks[1])) res = f.read(int(toks[2])) f.close() ent = self.type(to_parse=res) res = {} for field in self.fields: res[field] = ent.attrs[field] return tuple(res[k] for k in self.fields) else: res = self.on_cursor.next() tmp = [] # remove redundant attributes from result of calling next on given cursor for attr in self.on_cursor.type_attrs: if attr in self.fields: tmp.append(self.on_cursor.type_attrs.index(attr)) return tuple(res[t] for t in tmp)
def page_pair_test(): pairs = [] size = 10000 for i in range(size): p = pair(id=i, id1=i, id2=i) pairs.append(p) p = Ipage() for pa in pairs: if not p.is_fit(pa): break p.insert(pa) p.store('page.txt', 0) print(p.get("100", 1))
def page_test(): studs = get_dataset() p = Ipage() open('page.txt', 'w').close() from mx.BeeBase import BeeDict tree = BeeDict.BeeStringDict(os.getcwd() + '/storage/' + student.__name__ + 'name', keysize=256) for stud in studs[0:10]: p.insert(stud) p.store('page.txt', 0) p.store_to_tree(tree, student, 'name', 'page.txt') print(zip(tree.keys(), tree.values())) tree.close()
def page_pair_test(): pairs = [] size = 10000 for i in range(size): p = pair(id = i, id1 = i, id2 = i) pairs.append(p) p = Ipage() for pa in pairs: if not p.is_fit(pa): break p.insert(pa) p.store('page.txt', 0) print(p.get("100", 1))
def page_test(): studs = get_dataset() p = Ipage() open('page.txt', 'w').close() from mx.BeeBase import BeeDict tree = BeeDict.BeeStringDict(os.getcwd() + '/storage/' + student.__name__ + 'name', keysize=256) for stud in studs[0:10]: p.insert(stud) p.store('page.txt', 0) p.store_to_tree(tree, student, 'name', 'page.txt') print (zip(tree.keys(), tree.values())) tree.close()
def has_next(self): if self.on_cursor is None: if self.on_field is None: print("Something tried to create range-query cursor without assigned field") else: if self.equal_to is None: res = self.tree_cursor.next() key = self.tree_cursor.read_key() self.tree_cursor.prev() if key > self.less_than: self.refresh() return False if res and len(self.cur_set) == 0: return True else: self.refresh() return False else: l = len(self.cur_set) if l != 0: return True else: self.refresh() return False else: if self.equal_to is None: if self.on_cursor.on_field is not None: if not self.on_cursor.tree_cursor.next(): return False key = self.on_cursor.tree_cursor.read_key() if key > self.less_than: return False self.on_cursor.tree_cursor.prev() return self.on_cursor.has_next() else: tmp_iter = self.on_cursor.iter tmp_curr_iter = self.on_cursor.curr_iter tmp_page_offsets = self.on_cursor.page_offsets.copy() tmp_do_next_set = self.on_cursor.do_next_set items = self.on_cursor.curr_items while True: if tmp_iter == self.on_cursor.size: self.refresh() return False if len(tmp_page_offsets) == 0: self.refresh() return False if tmp_do_next_set: curr_page = Ipage(page_offset=tmp_page_offsets.pop(), filename=self.on_cursor.filename) items = curr_page.items() tmp_do_next_set = False tmp_curr_iter = 0 item = items[tmp_curr_iter] tmp_curr_iter += 1 if tmp_curr_iter == len(items): tmp_do_next_set = True tmp_curr_iter = 0 tmp_iter += 1 attrs = self.on_cursor.type(to_parse=item).attrs if self.less_than > attrs[self.on_field] > self.greater_than: return True else: l = len(self.on_cursor.cur_set) if l != 0: return True else: self.refresh() return False
def has_next(self): if self.on_cursor is None: if self.on_field is None: print( "Something tried to create range-query cursor without assigned field" ) else: if self.equal_to is None: res = self.tree_cursor.next() key = self.tree_cursor.read_key() self.tree_cursor.prev() if key > self.less_than: self.refresh() return False if res and len(self.cur_set) == 0: return True else: self.refresh() return False else: l = len(self.cur_set) if l != 0: return True else: self.refresh() return False else: if self.equal_to is None: if self.on_cursor.on_field is not None: if not self.on_cursor.tree_cursor.next(): return False key = self.on_cursor.tree_cursor.read_key() if key > self.less_than: return False self.on_cursor.tree_cursor.prev() return self.on_cursor.has_next() else: tmp_iter = self.on_cursor.iter tmp_curr_iter = self.on_cursor.curr_iter tmp_page_offsets = self.on_cursor.page_offsets.copy() tmp_do_next_set = self.on_cursor.do_next_set items = self.on_cursor.curr_items while True: if tmp_iter == self.on_cursor.size: self.refresh() return False if len(tmp_page_offsets) == 0: self.refresh() return False if tmp_do_next_set: curr_page = Ipage( page_offset=tmp_page_offsets.pop(), filename=self.on_cursor.filename) items = curr_page.items() tmp_do_next_set = False tmp_curr_iter = 0 item = items[tmp_curr_iter] tmp_curr_iter += 1 if tmp_curr_iter == len(items): tmp_do_next_set = True tmp_curr_iter = 0 tmp_iter += 1 attrs = self.on_cursor.type(to_parse=item).attrs if self.less_than > attrs[ self.on_field] > self.greater_than: return True else: l = len(self.on_cursor.cur_set) if l != 0: return True else: self.refresh() return False
def put(self, k, v): p = self.get_page(k) if p.is_fit(v) == False and p.d == self.gd: self.pp = self.pp + self.pp self.gd += 1 if p.is_fit(v) == False and p.d < self.gd: # p.insert(v) p1 = Ipage() p2 = Ipage() items = p.items() items.append(v.get_string()) first = True while True: for record_str in items: entity = self.type(to_parse=record_str) k2 = int(entity.get_key()) h = self.my_hash(k2) # h = hash(k2) h = h & ((1 << self.gd) - 1) if (h | (1 << p.d) == h): p2.insert(entity) else: p1.insert(entity) if p1.count == 0 or p2.count == 0 or p1.end_pointer > p1.total_space or p2.end_pointer > p2.total_space: print('oops len = ', len(self.pp), ' gd = ', self.gd) if first: p.d += 1 p1 = Ipage() p2 = Ipage() if p.d == self.gd: first = False else: print(len(self.pp)) p.d = self.gd self.pp *= 2 self.gd += 1 p1 = Ipage() p2 = Ipage() else: break self.counter += 1 for i, x in enumerate(self.pp): if x == p.page_offset: if (i >> p.d) & 1 == 1: self.pp[i] = self.counter * p.total_space else: self.pp[i] = p.page_offset p1.set_doubling(p.d + 1) p2.set_doubling(p1.d) p1.store(self.filename, p.page_offset) p2.store(self.filename, self.counter * p.total_space) else: p.insert(v) p.store(self.filename, p.page_offset) # if self.check() == False: # pass self.size += 1
def get_page(self, k): k = int(k) h = self.my_hash(k) # h = hash(k) offset = self.pp[h & ((1 << self.gd) - 1)] return Ipage(offset, self.filename)