class BufferManger(object): def __init__(self, name, cap=16, create=False, size=0): #buffer self.buffer = Buffer(name, 602116, create, size) # start loader service self.id_queue = multiprocessing.Manager().Queue(maxsize=cap) self.data_queue = multiprocessing.Manager().Queue(maxsize=cap) self.loader = multiprocessing.Process( target=Loader.loading, args=(self.id_queue, self.data_queue, name, int(cap/2))) self.loader.start() assert(self.loader.is_alive() == True) # table self.data_lock = threading.Lock() self.id_table = {} #id -> datanode self.data_refs = {} #id -> refs self.task_tails = {} self.task_heads = {} self.pending_id_lock = threading.Lock() self.pending_id = {} #id->namelist #replacer self.replacer = Replacer() # start a thread to listen data queue executor = ThreadPoolExecutor(max_workers=1) executor.submit(self.listener) def listener(self): while True: # try: item = self.data_queue.get() p_ticker = m.tiker("data process") p_ticker.end() # p_ticker.print_avg(128, 128) # except: # logging.error("listener read data queue error") # exit(0) w_ticker = m.tiker("data write") w_ticker.start() data_id, data_idx = item with self.pending_id_lock: name_list, expect_diff = self.pending_id[data_id] del self.pending_id[data_id] with self.data_lock: self.id_table[data_id] = data_idx logging.info("buffer write data %d in %d with tasks %s", data_id, data_idx, str(name_list)) self.write(data_id, name_list, expect_diff) def write(self, data_id, name_list, expect_diff): hit = True with self.data_lock: if data_id not in self.id_table.keys(): hit = False else: logging.info("data %d with %s hit", data_id, name_list) self.replacer.delete(data_id) if hit is False: logging.info("data %d with %s miss", data_id, name_list) if self._merge_pendingid(data_id, name_list, expect_diff): p_ticker = m.tiker("pool") p_ticker.start() data_idx = self.allocate_datanode() self.id_queue.put((data_id, data_idx)) return with self.data_lock: for name in name_list: data_idx = self.id_table[data_id] inode_idx = self.allocate_inode() if data_id not in self.data_refs.keys(): self.data_refs[data_id] = [] self.data_refs[data_id].append(inode_idx) self.buffer.write_inode(inode_idx, self.task_tails[name], data_idx) logging.info("wirte %s's data [%d]-->[%d]-->(%d)", name, self.task_tails[name], inode_idx, data_idx) self.task_tails[name] = inode_idx self.replacer.update(data_id, expect_diff) w_ticker = m.tiker("data write") w_ticker.end() # w_ticker.print_avg(128, 128) def _merge_pendingid(self, data_id, name_list, expect_diff): res = False with self.pending_id_lock: if data_id not in self.pending_id.keys(): res = True self.pending_id[data_id] = [[], 0] self.pending_id[data_id][0].extend(name_list) self.pending_id[data_id][1] = expect_diff return res def add_task(self, task_name): if task_name in self.task_heads.keys(): return -1 inode_idx = self.allocate_inode() self.buffer.write_inode(inode_idx) self.task_heads[task_name] = inode_idx self.task_tails[task_name] = inode_idx logging.info("add task %s with head %d", task_name, inode_idx) return inode_idx def allocate_inode(self): inode_idx = self.buffer.allocate_inode() if inode_idx != -1: return inode_idx # free some inode while True: for task_name in self.task_heads.keys(): head_inode = self.task_heads[task_name] # print("try to free %d (%s)"%(head_inode, task_name)) if self.buffer.is_used(head_inode) == False: _, next_head = self.buffer.parse_inode(head_inode) self.task_heads[task_name] = next_head return head_inode def allocate_datanode(self): datanode_idx = self.buffer.allocate_datanode() if datanode_idx != -1: return datanode_idx # free some datanode valid = True while True: # print("find datanode") with self.data_lock: data_id = self.replacer.next() data_idx = self.id_table[data_id] for ref in self.data_refs[data_id]: valid = self.buffer.is_datavalid(ref, data_idx) if valid is True: break if valid is False: logging.info("evict data %d in %d", data_id, data_idx) del self.id_table[data_id] del self.data_refs[data_id] self.replacer.delete(data_id) self.replacer.reset() return data_idx def delete_task(self, name): with self.pending_id_lock: for data_id in self.pending_id.keys(): self.pending_id[data_id].remove(name) head = self.task_heads[name] with self.data_lock: del self.task_heads[name] while head != -1: # print("del", head) head = self.buffer.get_next(head) def terminate(self): self.loader.kill() while self.loader.is_alive() == True: time.sleep(0.1) self.loader.close()