예제 #1
0
class BufferManger(object):
    def __init__(self, name, cap=16, create=False, size=0):
        #buffer
        self.buffer = Buffer(name, 602116, create, size)

        # start loader service
        self.id_queue = multiprocessing.Manager().Queue(maxsize=cap)
        self.data_queue = multiprocessing.Manager().Queue(maxsize=cap)
        self.loader = multiprocessing.Process(
            target=Loader.loading, args=(self.id_queue, self.data_queue, name, int(cap/2)))
        self.loader.start()
        assert(self.loader.is_alive() == True)

        # table
        self.data_lock = threading.Lock()
        self.id_table = {}   #id -> datanode
        self.data_refs = {}  #id -> refs
        self.task_tails = {}
        self.task_heads = {}

        self.pending_id_lock = threading.Lock()
        self.pending_id = {} #id->namelist

        #replacer
        self.replacer = Replacer()
        

        # start a thread to listen data queue
        executor = ThreadPoolExecutor(max_workers=1)
        executor.submit(self.listener)

    def listener(self):
        while True:
            # try:
            item = self.data_queue.get()
            p_ticker = m.tiker("data process")
            p_ticker.end()
            # p_ticker.print_avg(128, 128)
            # except:
            #     logging.error("listener read data queue error")
            #     exit(0)
            w_ticker = m.tiker("data write")
            w_ticker.start()

            data_id, data_idx = item
            with self.pending_id_lock:
                name_list, expect_diff = self.pending_id[data_id]
                del self.pending_id[data_id]
            with self.data_lock:
                self.id_table[data_id] = data_idx
            logging.info("buffer write data %d in %d with tasks %s", data_id, data_idx, str(name_list))
            
            self.write(data_id, name_list, expect_diff)

    def write(self, data_id, name_list, expect_diff):
        hit = True
        with self.data_lock:
            if data_id not in self.id_table.keys():
                hit = False
            else:
                logging.info("data %d with %s hit", data_id, name_list)
                self.replacer.delete(data_id)
        
        if hit is False:
            logging.info("data %d with %s miss", data_id, name_list)
            if self._merge_pendingid(data_id, name_list, expect_diff):
                p_ticker = m.tiker("pool")
                p_ticker.start()
                data_idx = self.allocate_datanode()
                self.id_queue.put((data_id, data_idx))
            return
        
        with self.data_lock:
            for name in name_list:
                data_idx = self.id_table[data_id]
                inode_idx = self.allocate_inode()
                if data_id not in self.data_refs.keys():
                    self.data_refs[data_id] = []
                self.data_refs[data_id].append(inode_idx)
                self.buffer.write_inode(inode_idx, self.task_tails[name], data_idx)
                logging.info("wirte %s's data [%d]-->[%d]-->(%d)", name, self.task_tails[name], inode_idx, data_idx)
                self.task_tails[name] = inode_idx
            self.replacer.update(data_id, expect_diff)
        
        w_ticker = m.tiker("data write")
        w_ticker.end()
        # w_ticker.print_avg(128, 128)
    
    def _merge_pendingid(self, data_id, name_list, expect_diff):
        res = False
        with self.pending_id_lock:
            if data_id not in self.pending_id.keys():
                res = True
                self.pending_id[data_id] = [[], 0]
            self.pending_id[data_id][0].extend(name_list)
            self.pending_id[data_id][1] = expect_diff
        return res
    
    def add_task(self, task_name):
        if task_name in self.task_heads.keys():
            return -1
        inode_idx = self.allocate_inode()
        self.buffer.write_inode(inode_idx)
        self.task_heads[task_name] = inode_idx
        self.task_tails[task_name] = inode_idx

        logging.info("add task %s with head %d", task_name, inode_idx)
        return inode_idx

    def allocate_inode(self):
        inode_idx = self.buffer.allocate_inode()
        if inode_idx != -1:
            return inode_idx

        # free some inode
        while True:
            for task_name in self.task_heads.keys():
                head_inode = self.task_heads[task_name]
                # print("try to free %d (%s)"%(head_inode, task_name))
                if self.buffer.is_used(head_inode) == False:
                    _, next_head = self.buffer.parse_inode(head_inode)
                    self.task_heads[task_name] = next_head
                    return head_inode

    def allocate_datanode(self):
        datanode_idx = self.buffer.allocate_datanode()
        if datanode_idx != -1:
            return datanode_idx

        # free some datanode
        valid = True
        while True:
            # print("find datanode")
            with self.data_lock:
                data_id = self.replacer.next()
                data_idx = self.id_table[data_id]
                for ref in self.data_refs[data_id]:
                    valid = self.buffer.is_datavalid(ref, data_idx)
                    if valid is True:
                        break
                        
                if valid is False:
                    logging.info("evict data %d in %d", data_id, data_idx)
                    del self.id_table[data_id]
                    del self.data_refs[data_id]
                    self.replacer.delete(data_id)
                    self.replacer.reset()
                    return data_idx

    def delete_task(self, name):
        with self.pending_id_lock:
            for data_id in self.pending_id.keys():
                self.pending_id[data_id].remove(name)

        head = self.task_heads[name]

        with self.data_lock:
            del self.task_heads[name]
        
        while head != -1:
            # print("del", head)
            head = self.buffer.get_next(head)
        
    def terminate(self):
        self.loader.kill()
        while self.loader.is_alive() == True:
            time.sleep(0.1)
        self.loader.close()