def __init__(self, config: Dict[str, any], result_dir: str, cache_stats: CacheInformation): super().__init__(config, result_dir, cache_stats) # evaluation specific variables self.observation_seen = 0 self.episode_reward = 0 self.checkpoint_steps = config['checkpoint_steps'] self._incomplete_experiences = TTLCache(InMemoryStorage()) self._incomplete_experiences.expired_entry_callback(self._observe_expired_incomplete_experience) self.experimental_reward = config.get('experimental_reward', False) agent_config = config['agent_config'] self.converter = CachingStrategyRLConverter() # action space: should cache: true or false # state space: [capacity (1), query key(1), query result set(num_indexes)] fields_in_state = len(CachingAgentSystemState.__slots__) self.agent = Agent.from_spec(agent_config, state_space=FloatBox(shape=(fields_in_state,)), action_space=IntBox(2)) self.logger = logging.getLogger(__name__) name = 'rl_caching_strategy' self.reward_logger = create_file_logger(name=f'{name}_reward_logger', result_dir=self.result_dir) self.loss_logger = create_file_logger(name=f'{name}_loss_logger', result_dir=self.result_dir) self.observation_logger = create_file_logger(name=f'{name}_observation_logger', result_dir=self.result_dir) self.entry_hits_logger = create_file_logger(name=f'{name}_entry_hits_logger', result_dir=self.result_dir) self.key_vocab = Vocabulary()
def __init__(self, config: Dict[str, any], result_dir: str, cache_stats: CacheInformation): super().__init__(config, result_dir, cache_stats) self.supported_observations = { ObservationType.Hit, ObservationType.Miss, ObservationType.Invalidate } # evaluation specific variables self.observation_seen = 0 self.cum_reward = 0 self.checkpoint_steps = config['checkpoint_steps'] self._incomplete_experiences = TTLCache(InMemoryStorage()) self._incomplete_experiences.expired_entry_callback( self._observe_expiry_eviction) self.non_terminal_observations = { ObservationType.EvictionPolicy, ObservationType.Expiration } agent_config = config['agent_config'] self.maximum_ttl = config['max_ttl'] fields_in_state = len(MultiTaskAgentSystemState.__slots__) action_space = RLDict({ 'ttl': IntBox(low=0, high=self.maximum_ttl), 'eviction': IntBox(low=0, high=2) }) self.agent = Agent.from_spec( agent_config, state_space=FloatBox(shape=(fields_in_state, )), action_space=action_space) # TODO refactor into common RL interface for all strategies self.logger = logging.getLogger(__name__) name = 'rl_multi_strategy' self.reward_logger = create_file_logger(name=f'{name}_reward_logger', result_dir=self.result_dir) self.loss_logger = create_file_logger(name=f'{name}_loss_logger', result_dir=self.result_dir) self.ttl_logger = create_file_logger(name=f'{name}_ttl_logger', result_dir=self.result_dir) self.observation_logger = create_file_logger( name=f'{name}_observation_logger', result_dir=self.result_dir) self.performance_logger = create_file_logger( name=f'{name}_performance_logger', result_dir=self.result_dir) self.key_vocab = Vocabulary()
def __init__(self, config: Dict[str, any], result_dir: str, cache_stats: CacheInformation): super().__init__(config, result_dir, cache_stats) self.observation_seen = 0 self.cum_reward = 0 self.checkpoint_steps = config['checkpoint_steps'] self._incomplete_experiences = TTLCache(InMemoryStorage()) self._incomplete_experiences.expired_entry_callback( self._observe_expiry_eviction) self.non_terminal_observations = { ObservationType.EvictionPolicy, ObservationType.Expiration } agent_config = config['agent_config'] self.maximum_ttl = config['max_ttl'] self.experimental_reward = config.get('experimental_reward', False) fields_in_state = len(TTLAgentSystemState.__slots__) self.agent = Agent.from_spec( agent_config, state_space=FloatBox(shape=(fields_in_state, )), action_space=FloatBox(low=0, high=self.maximum_ttl, shape=(1, ))) # TODO refactor into common RL interface for all strategies self.logger = logging.getLogger(__name__) name = 'rl_ttl_strategy' self.reward_logger = create_file_logger(name=f'{name}_reward_logger', result_dir=self.result_dir) self.loss_logger = create_file_logger(name=f'{name}_loss_logger', result_dir=self.result_dir) self.ttl_logger = create_file_logger(name=f'{name}_ttl_logger', result_dir=self.result_dir) self.observation_logger = create_file_logger( name=f'{name}_observation_logger', result_dir=self.result_dir) self.key_vocab = Vocabulary() self.errors = create_file_logger(name=f'{name}_error_logger', result_dir=self.result_dir)
def __init__(self, config: Dict[str, any], result_dir: str, cache_stats: CacheInformation): super().__init__(config, result_dir, cache_stats) # evaluation specific variables self.observation_seen = 0 self.episode_reward = 0 self.checkpoint_steps = config['checkpoint_steps'] self._incomplete_experiences = TTLCache(InMemoryStorage()) self._incomplete_experiences.expired_entry_callback( self._observe_expired_incomplete_experience) self.view_of_the_cache = {} # type: Dict[str, Dict[str, any]] self._end_episode_observation = { ObservationType.Invalidate, ObservationType.Miss, ObservationType.Expiration } # TODO refactor into common RL interface for all strategies # Agent configuration (can be shared with others) agent_config = config['agent_config'] fields_in_state = len(EvictionAgentSystemState.__slots__) self.converter = EvictionStrategyRLConverter(self.result_dir) # State: fields to observe in question # Action: to evict or not that key self.agent = Agent.from_spec( agent_config, state_space=FloatBox(shape=(fields_in_state, )), action_space=IntBox(low=0, high=2)) self.logger = logging.getLogger(__name__) name = 'rl_eviction_strategy' self.reward_logger = create_file_logger(name=f'{name}_reward_logger', result_dir=self.result_dir) self.loss_logger = create_file_logger(name=f'{name}_loss_logger', result_dir=self.result_dir) self.observation_logger = create_file_logger( name=f'{name}_observation_logger', result_dir=self.result_dir) self.key_vocab = Vocabulary()
def __init__(self, result_dir: str): self.vocabulary = Vocabulary(add_pad=True, add_unk=False) self.logger = logging.getLogger(__name__) name = 'rl_eviction_strategy' self.performance_logger = create_file_logger( name=f'{name}_performance_logger', result_dir=result_dir)
def __init__(self): self.vocabulary = Vocabulary() self.logger = logging.getLogger(__name__)
class RLTtlStrategy(TtlStrategy): """RL driven TTL estimation strategy.""" def __init__(self, config: Dict[str, any], result_dir: str, cache_stats: CacheInformation): super().__init__(config, result_dir, cache_stats) self.observation_seen = 0 self.cum_reward = 0 self.checkpoint_steps = config['checkpoint_steps'] self._incomplete_experiences = TTLCache(InMemoryStorage()) self._incomplete_experiences.expired_entry_callback( self._observe_expiry_eviction) self.non_terminal_observations = { ObservationType.EvictionPolicy, ObservationType.Expiration } agent_config = config['agent_config'] self.maximum_ttl = config['max_ttl'] self.experimental_reward = config.get('experimental_reward', False) fields_in_state = len(TTLAgentSystemState.__slots__) self.agent = Agent.from_spec( agent_config, state_space=FloatBox(shape=(fields_in_state, )), action_space=FloatBox(low=0, high=self.maximum_ttl, shape=(1, ))) # TODO refactor into common RL interface for all strategies self.logger = logging.getLogger(__name__) name = 'rl_ttl_strategy' self.reward_logger = create_file_logger(name=f'{name}_reward_logger', result_dir=self.result_dir) self.loss_logger = create_file_logger(name=f'{name}_loss_logger', result_dir=self.result_dir) self.ttl_logger = create_file_logger(name=f'{name}_ttl_logger', result_dir=self.result_dir) self.observation_logger = create_file_logger( name=f'{name}_observation_logger', result_dir=self.result_dir) self.key_vocab = Vocabulary() self.errors = create_file_logger(name=f'{name}_error_logger', result_dir=self.result_dir) def estimate_ttl(self, key: str, values: Dict[str, any], operation_type: OperationType) -> float: observation_time = time.time() encoded_key = self.key_vocab.add_or_get_id(key) cache_utility = self.cache_stats.cache_utility state = TTLAgentSystemState(encoded_key=encoded_key, hit_count=0, step_code=0, cache_utility=cache_utility, operation_type=operation_type.value) state_as_numpy = state.to_numpy() agent_action = self.agent.get_action(state_as_numpy) action = agent_action.item() incomplete_experience = TTLAgentObservedExperience( state=state, agent_action=agent_action, starting_state=state.copy(), observation_time=observation_time) self._incomplete_experiences.set(key, incomplete_experience, self.maximum_ttl) return action def observe(self, key: str, observation_type: ObservationType, info: Dict[str, any]): observed_experience = self._incomplete_experiences.get(key) if observed_experience is None or observation_type == ObservationType.Expiration: return # haven't had to make a decision on it current_time = time.time() stored_state = observed_experience.state if observation_type == ObservationType.Hit: stored_state.hit_count += 1 # elif observation_type in self.non_terminal_observations: # # it was evicted by another policy don't attempt to learn stuff from this # pass estimated_ttl = observed_experience.agent_action.item() first_observation_time = observed_experience.observation_time real_ttl = current_time - first_observation_time stored_state.step_code = observation_type.value stored_state.cache_utility = self.cache_stats.cache_utility self.reward_agent(observation_type, observed_experience, real_ttl) if observation_type != ObservationType.Hit: self.ttl_logger.info( f'{self.episode_num},{observation_type.name},{key},{estimated_ttl},{real_ttl},{stored_state.hit_count}' ) self._incomplete_experiences.delete(key) self.observation_seen += 1 if self.observation_seen % self.checkpoint_steps == 0: self.logger.info( f'Observation seen so far: {self.observation_seen}, reward so far: {self.cum_reward}' ) if observation_type not in self.non_terminal_observations: self.observation_logger.info(f'{key},{observation_type}') def _observe_expiry_eviction(self, key: str, observation_type: ObservationType, info: Dict[str, any]): """Observe decisions taken that hasn't been observed by main cache. e.g. don't cache -> ttl up -> no miss""" self.observation_logger.info( f'{self.episode_num},{key},{observation_type}') experience = info['value'] # type: TTLAgentObservedExperience self.ttl_logger.info( f'{self.episode_num},{observation_type.name},{key},{experience.agent_action.item()},' f'{experience.agent_action.item()},{experience.state.hit_count}') experience.state.step_code = observation_type.value self.reward_agent(observation_type, experience, experience.agent_action.item()) def reward_agent(self, observation_type: ObservationType, experience: TTLAgentObservedExperience, real_ttl: time) -> int: # reward more utilisation of the cache capacity given more hits final_state = experience.state difference_in_ttl = -abs((experience.agent_action.item() + 1) / max(min(real_ttl, self.maximum_ttl), 1)) # reward = final_state.hit_count - abs(difference_in_ttl * self.cache_stats.cache_utility) if observation_type == ObservationType.Hit: reward = 1 terminal = False # elif observation_type == ObservationType.EvictionPolicy: # reward = 0 # terminal = True else: reward = difference_in_ttl if abs(difference_in_ttl) < 10: reward = 10 terminal = True self.logger.debug( f'Hits: {final_state.hit_count}, ttl diff: {difference_in_ttl}, Reward: {reward}' ) self.agent.observe( preprocessed_states=experience.starting_state.to_numpy(), actions=experience.agent_action, internals=[], rewards=reward, next_states=final_state.to_numpy(), terminals=terminal) self.cum_reward += reward self.reward_logger.info(f'{self.episode_num},{reward}') loss = self.agent.update() if loss is not None: self.loss_logger.info(f'{self.episode_num},{loss[0]}') return reward def close(self): super().close() for (k, v) in list(self._incomplete_experiences.items()): self.ttl_logger.info( f'{self.episode_num},{ObservationType.EndOfEpisode.name},{k},{v.agent_action.item()},' f'{v.agent_action.item()},{v.state.hit_count}') self._incomplete_experiences.clear() try: self.agent.reset() except Exception as e: self.errors.info(e)
class RLMultiTasksStrategy(BaseStrategy): """RL driven multi task strategy - Caching, eviction, and ttl estimation.""" def __init__(self, config: Dict[str, any], result_dir: str, cache_stats: CacheInformation): super().__init__(config, result_dir, cache_stats) self.supported_observations = { ObservationType.Hit, ObservationType.Miss, ObservationType.Invalidate } # evaluation specific variables self.observation_seen = 0 self.cum_reward = 0 self.checkpoint_steps = config['checkpoint_steps'] self._incomplete_experiences = TTLCache(InMemoryStorage()) self._incomplete_experiences.expired_entry_callback( self._observe_expiry_eviction) self.non_terminal_observations = { ObservationType.EvictionPolicy, ObservationType.Expiration } agent_config = config['agent_config'] self.maximum_ttl = config['max_ttl'] fields_in_state = len(MultiTaskAgentSystemState.__slots__) action_space = RLDict({ 'ttl': IntBox(low=0, high=self.maximum_ttl), 'eviction': IntBox(low=0, high=2) }) self.agent = Agent.from_spec( agent_config, state_space=FloatBox(shape=(fields_in_state, )), action_space=action_space) # TODO refactor into common RL interface for all strategies self.logger = logging.getLogger(__name__) name = 'rl_multi_strategy' self.reward_logger = create_file_logger(name=f'{name}_reward_logger', result_dir=self.result_dir) self.loss_logger = create_file_logger(name=f'{name}_loss_logger', result_dir=self.result_dir) self.ttl_logger = create_file_logger(name=f'{name}_ttl_logger', result_dir=self.result_dir) self.observation_logger = create_file_logger( name=f'{name}_observation_logger', result_dir=self.result_dir) self.performance_logger = create_file_logger( name=f'{name}_performance_logger', result_dir=self.result_dir) self.key_vocab = Vocabulary() def observe(self, key: str, observation_type: ObservationType, info: Dict[str, any]): observed_experience = self._incomplete_experiences.get(key) if observed_experience is None: return # haven't had to make a decision on it current_time = time.time() stored_state = observed_experience.state stored_state.step_code = observation_type.value stored_state.cache_utility = self.cache_stats.cache_utility if observation_type == ObservationType.Hit: stored_state.hit_count += 1 else: # Include eviction, invalidation, and miss estimated_ttl = observed_experience.agent_action['ttl'].item() first_observation_time = observed_experience.observation_time real_ttl = current_time - first_observation_time # log the difference between the estimated ttl and real ttl self.ttl_logger.info( f'{self.episode_num},{observation_type.name},{key},{estimated_ttl},{real_ttl},{stored_state.hit_count}' ) self._incomplete_experiences.delete(key) self.reward_agent(observation_type, observed_experience) self.observation_seen += 1 if self.observation_seen % self.checkpoint_steps == 0: self.logger.info( f'Observation seen so far: {self.observation_seen}, reward so far: {self.cum_reward}' ) if observation_type not in self.non_terminal_observations: self.observation_logger.info(f'{key},{observation_type}') def trim_cache(self, cache: TTLCache): # trim cache isn't called often so the operation is ok to be expensive # produce an action on the whole cache keys_to_evict = [] for (key, stored_experience) in list(self._incomplete_experiences.items()): action = self.agent.get_action( stored_experience.state.to_numpy())['eviction'] evict = (action.flatten() == 1).item() if evict: cache.delete(key) keys_to_evict.append(key) # update stored value for eviction action stored_experience.agent_action['eviction'] = action stored_experience.manual_eviction = True if len(keys_to_evict) == 0: self.logger.error('trim_cache No keys were evicted.') return keys_to_evict def should_cache(self, key: str, values: Dict[str, str], ttl: int, operation_type: OperationType) -> bool: # cache objects that have TTL more than 1 second (maybe make this configurable?) return ttl > 10 def estimate_ttl(self, key: str, values: Dict[str, any], operation_type: OperationType) -> float: # TODO check if it is in the observed queue observation_time = time.time() encoded_key = self.key_vocab.add_or_get_id(key) cache_utility = self.cache_stats.cache_utility state = MultiTaskAgentSystemState(encoded_key=encoded_key, hit_count=0, ttl=0, step_code=0, cache_utility=cache_utility, operation_type=operation_type.value) state_as_numpy = state.to_numpy() agent_action = self.agent.get_action(state_as_numpy) action = agent_action['ttl'].item() incomplete_experience = MultiTaskAgentObservedExperience( state=state, agent_action=agent_action, starting_state=state.copy(), observation_time=observation_time, manual_eviction=False) self._incomplete_experiences.set(key, incomplete_experience, self.maximum_ttl) return action def reward_agent(self, observation_type: ObservationType, experience: MultiTaskAgentObservedExperience) -> int: # reward more utilisation of the cache capacity given more hits final_state = experience.state # difference_in_ttl = -abs((experience.agent_action.item() + 1) / min(real_ttl, self.maximum_ttl)) reward = 0 terminal = False if observation_type == observation_type.Invalidate and ( experience.agent_action['ttl'] < 10 or (experience.agent_action['eviction'].flatten() == 1).item()): # if evicted or not cached, followed by an invalidate reward = 10 terminal = True elif observation_type == observation_type.Invalidate or observation_type == observation_type.Miss: reward = -10 terminal = True elif observation_type == ObservationType.Hit: terminal = False reward = 1 if experience.manual_eviction: if observation_type == observation_type.Expiration: reward = -10 terminal = True if observation_type == observation_type.Hit: reward = 2 self.performance_metric_for_eviction(experience, observation_type) self.agent.observe( preprocessed_states=experience.starting_state.to_numpy(), actions=experience.agent_action, internals=[], rewards=terminal, next_states=final_state.to_numpy(), terminals=True) self.cum_reward += reward self.reward_logger.info(f'{self.episode_num},{reward}') # TODO use self.agent.update_schedule to decide when to call update loss = self.agent.update() if loss is not None: self.loss_logger.info(f'{self.episode_num},{loss[0]}') return reward def _observe_expiry_eviction(self, key: str, observation_type: ObservationType, info: Dict[str, any]): """Observe decisions taken that hasn't been observed by main cache. e.g. don't cache -> ttl up -> no miss""" self.observation_logger.info( f'{self.episode_num},{key},{observation_type}') experience = info['value'] # type: MultiTaskAgentObservedExperience self.ttl_logger.info( f'{self.episode_num},{observation_type.name},{key},{experience.agent_action["ttl"].item()},' f'{experience.agent_action["ttl"].item()},{experience.state.hit_count}' ) experience.state.step_code = observation_type.value self.reward_agent(observation_type, experience) def performance_metric_for_eviction( self, stored_experience: MultiTaskAgentObservedExperience, observation_type: ObservationType) -> int: should_evict = ( stored_experience.agent_action['eviction'].flatten() == 1).item() if observation_type == ObservationType.Expiration: if should_evict: # reward if should evict didn't observe any follow up miss self.performance_logger.info(f'{self.episode_num},TrueEvict') # else didn't evict else: # reward for not evicting a key that received more hits. # or 0 if it didn't evict but also didn't get any hits gain_for_not_evicting = stored_experience.state.hit_count - stored_experience.starting_state.hit_count if gain_for_not_evicting > 0: self.performance_logger.info( f'{self.episode_num},TrueMiss') else: self.performance_logger.info( f'{self.episode_num},MissEvict') return gain_for_not_evicting elif observation_type == ObservationType.Invalidate: # Set/Delete, remove entry from the cache. # reward an eviction followed by invalidation. if should_evict: self.performance_logger.info(f'{self.episode_num},TrueEvict') else: # punish not evicting a key that got invalidated after. self.performance_logger.info(f'{self.episode_num},MissEvict') elif observation_type == ObservationType.Miss: if should_evict: self.performance_logger.info(f'{self.episode_num},FalseEvict') # Miss after making an eviction decision # Punish, a read after an eviction decision def close(self): for (k, v) in list(self._incomplete_experiences.items()): self.ttl_logger.info( f'{self.episode_num},{ObservationType.EndOfEpisode.name},{k},{v.agent_action["ttl"].item()},' f'{v.agent_action["ttl"].item()},{v.state.hit_count}') self.performance_logger.info(f'{self.episode_num},TrueMiss') super().close() self._incomplete_experiences.clear() try: self.agent.reset() except Exception as e: pass
class RLCachingStrategy(CachingStrategy): def __init__(self, config: Dict[str, any], result_dir: str, cache_stats: CacheInformation): super().__init__(config, result_dir, cache_stats) # evaluation specific variables self.observation_seen = 0 self.episode_reward = 0 self.checkpoint_steps = config['checkpoint_steps'] self._incomplete_experiences = TTLCache(InMemoryStorage()) self._incomplete_experiences.expired_entry_callback(self._observe_expired_incomplete_experience) self.experimental_reward = config.get('experimental_reward', False) agent_config = config['agent_config'] self.converter = CachingStrategyRLConverter() # action space: should cache: true or false # state space: [capacity (1), query key(1), query result set(num_indexes)] fields_in_state = len(CachingAgentSystemState.__slots__) self.agent = Agent.from_spec(agent_config, state_space=FloatBox(shape=(fields_in_state,)), action_space=IntBox(2)) self.logger = logging.getLogger(__name__) name = 'rl_caching_strategy' self.reward_logger = create_file_logger(name=f'{name}_reward_logger', result_dir=self.result_dir) self.loss_logger = create_file_logger(name=f'{name}_loss_logger', result_dir=self.result_dir) self.observation_logger = create_file_logger(name=f'{name}_observation_logger', result_dir=self.result_dir) self.entry_hits_logger = create_file_logger(name=f'{name}_entry_hits_logger', result_dir=self.result_dir) self.key_vocab = Vocabulary() def should_cache(self, key: str, values: Dict[str, str], ttl: int, operation_type: OperationType) -> bool: # TODO what about the case of a cache key that exist already in the incomplete exp? assert self._incomplete_experiences.get(key) is None, \ "should_cache is assumed to be first call and key shouldn't be in the cache" observation_time = time.time() encoded_key = self.key_vocab.add_or_get_id(key) state = CachingAgentSystemState(encoded_key=encoded_key, ttl=ttl, hit_count=0, step_code=0, operation_type=operation_type.value) agent_action = self.agent.get_action(state.to_numpy()) incomplete_experience_entry = CachingAgentIncompleteExperienceEntry(state=state, agent_action=agent_action, starting_state=state.copy(), observation_time=observation_time) action = self.converter.agent_to_system_action(agent_action) self._incomplete_experiences.set(key, incomplete_experience_entry, ttl) return action def observe(self, key: str, observation_type: ObservationType, info: Dict[str, any]): # TODO include stats/capacity information in the info dict experience = self._incomplete_experiences.get(key) # type: CachingAgentIncompleteExperienceEntry if experience is None: return # if I haven't had to make a decision on this, ignore it. self.observation_logger.info(f'{self.episode_num},{key},{observation_type.name}') if observation_type == ObservationType.Hit: experience.state.hit_count += 1 else: self._reward_experience(key, experience, observation_type) self.observation_seen += 1 if self.observation_seen % self.checkpoint_steps == 0: self.logger.info(f'Observation seen so far: {self.observation_seen}, reward so far: {self.episode_reward}') def _observe_expired_incomplete_experience(self, key: str, observation_type: ObservationType, info: Dict[str, any]): """Observe decisions taken that hasn't been observed by main cache. e.g. don't cache -> ttl up -> no miss""" assert observation_type == ObservationType.Expiration experience = info['value'] self._reward_experience(key, experience, observation_type) def _reward_experience(self, key: str, experience: CachingAgentIncompleteExperienceEntry, observation_type: ObservationType): state = experience.state state.step_code = observation_type.value self._incomplete_experiences.delete(key) self.entry_hits_logger.info(f'{self.episode_num},{key},{experience.state.hit_count}') reward = self.converter.system_to_agent_reward(experience) if self.experimental_reward: # TODO add cache utility to state and reward pass self.agent.observe(preprocessed_states=experience.starting_state.to_numpy(), actions=experience.agent_action, internals=[], rewards=reward, next_states=experience.state.to_numpy(), terminals=False) self.episode_reward += reward self.reward_logger.info(f'{self.episode_num},{reward}') self.logger.debug(f'Key: {key} is in terminal state because: {str(observation_type)}') loss = self.agent.update() if loss is not None: self.loss_logger.info(f'{self.episode_num},{loss[0]}') def close(self): super().close() self.agent.reset() self._incomplete_experiences.clear()