def create_two_ddpg_agents(): source_ddpg = DDPG(N_STATE, N_ACTION, hparam_override={ "hidden1": N_HIDDEN_NODE, "hidden2": N_HIDDEN_NODE, "tau": TAU }) source_ddpg.actor.load_state_dict({ state: torch.ones_like(param) / 2 for state, param in source_ddpg.actor.state_dict().items() }) target_ddpg = DDPG(N_STATE, N_ACTION, hparam_override={ "hidden1": N_HIDDEN_NODE, "hidden2": N_HIDDEN_NODE, "tau": TAU }) target_ddpg.actor.load_state_dict({ state: torch.ones_like(param) for state, param in target_ddpg.actor.state_dict().items() }) return source_ddpg, target_ddpg
def test_random_action(): ddpg = DDPG(N_STATE, N_ACTION) randomized_action = ddpg.random_action() assert randomized_action.shape[0] == N_ACTION # check action value within bound assert sum((randomized_action >= ddpg.LBOUND) & (randomized_action <= ddpg.RBOUND)) == N_ACTION
def test_observe(): def check_replay_buffer_size(length): assert ddpg.memory.limit == REPLAY_MEMORY_SIZE assert len(ddpg.memory.actions) == length assert len(ddpg.memory.rewards) == length assert len(ddpg.memory.terminals) == length assert len(ddpg.memory.observations) == length ddpg = DDPG(N_STATE, N_ACTION, hparam_override={"rmsize": REPLAY_MEMORY_SIZE}) check_replay_buffer_size(SCALAR_ZERO) reward_t = SCALAR_ZERO states_t = [SCALAR_ZERO] * N_STATE action_t = [SCALAR_ZERO] * N_ACTION is_done_t = True ddpg.observe(reward_t, states_t, action_t, is_done_t) check_replay_buffer_size(SCALAR_ONE) # Replay Buffer Appending is disabled in non-training mode # size of experience must stay as the same as above ddpg.is_training = False ddpg.observe(reward_t, states_t, action_t, is_done_t) ddpg.observe(reward_t, states_t, action_t, is_done_t) check_replay_buffer_size(SCALAR_ONE)
def test_update_policy(test_vector, mocker, _seed): batch_size, discount, is_movingavg, ref_policy_loss, ref_value_loss = test_vector.values( ) mocked_trace = mocker.patch( 'nncf.automl.agent.ddpg.memory.SequentialMemory.sample_and_split') # state_batch, action_batch, reward_batch, next_state_batch, terminal_batch mocked_trace.return_value = ( np.ones((batch_size, N_STATE)), np.ones((batch_size, N_ACTION)), np.ones((batch_size, SCALAR_ONE)), np.ones((batch_size, N_STATE)), np.ones((batch_size, SCALAR_ONE)), ) hparams = { "hidden1": N_HIDDEN_NODE, "hidden2": N_HIDDEN_NODE, "bsize": batch_size, "discount": discount, "window_length": SCALAR_ONE, } ddpg = DDPG(N_STATE, N_ACTION, hparam_override=hparams) ddpg.actor.load_state_dict({ state: torch.ones_like(param) / 2 for state, param in ddpg.actor.state_dict().items() }) ddpg.actor_target.load_state_dict({ state: torch.ones_like(param) for state, param in ddpg.actor_target.state_dict().items() }) ddpg.moving_average = is_movingavg ddpg.update_policy() np.testing.assert_almost_equal(ddpg.policy_loss.item(), ref_policy_loss) np.testing.assert_almost_equal(ddpg.value_loss.item(), ref_value_loss)
def test_select_action(episode_action_pair, decay_epsilon, _seed): episode, reference_action = episode_action_pair hparams = { "hidden1": N_HIDDEN_NODE, "hidden2": N_HIDDEN_NODE, "init_delta": 0.5, "delta_decay": 0.99, "warmup_iter_number": 5 } ddpg = DDPG(N_STATE, N_ACTION, hparam_override=hparams) ddpg.actor.load_state_dict({ state: torch.ones_like(param) for state, param in ddpg.actor.state_dict().items() }) s_t = [1.0] * N_STATE selected_action = ddpg.select_action(s_t, episode, decay_epsilon) if decay_epsilon: np.testing.assert_allclose(selected_action, reference_action) else: assert all(selected_action == 1.0)
def apply_init(self) -> SingleConfigQuantizerSetup: from nncf.automl.environment.quantization_env import QuantizationEnv from nncf.automl.agent.ddpg.ddpg import DDPG from nncf.debug import DEBUG_LOG_DIR if self._dump_autoq_data or is_debug(): dump_dir = self._init_args.config.get('log_dir', None) if dump_dir is None: dump_dir = DEBUG_LOG_DIR self.dump_dir = Path(dump_dir) / Path("autoq_agent_dump") self.dump_dir.mkdir(parents=True, exist_ok=True) self.policy_dict = OrderedDict() #key: episode self.best_policy_dict = OrderedDict() #key: episode self._init_args.config['episodic_nncfcfg'] = self.dump_dir / "episodic_nncfcfg" os.makedirs(self._init_args.config['episodic_nncfcfg'], exist_ok=True) try: from torch.utils.tensorboard import SummaryWriter self.tb_writer = SummaryWriter(self.dump_dir) # log compression config to tensorboard self.tb_writer.add_text('AutoQ/run_config', json.dumps(self._init_args.config['compression'], indent=4, sort_keys=False).replace("\n", "\n\n"), 0) except ModuleNotFoundError: logger.warning("Tensorboard installation not found! Install tensorboard Python package " "in order for AutoQ tensorboard statistics data to be dumped") start_ts = datetime.now() from nncf.automl.environment.quantization_env import QuantizationEnvParams env_params = QuantizationEnvParams(compression_ratio=self._params.compression_ratio, eval_subset_ratio=self._params.eval_subset_ratio, skip_constraint=self._params.skip_constraint, finetune=self._params.finetune, bits=self._params.bits, dump_init_precision_data=self._dump_autoq_data, log_dir=Path(DEBUG_LOG_DIR) / Path("autoq")) # Instantiate Quantization Environment env = QuantizationEnv( self._model, self.quantization_controller, self._hw_precision_constraints, self._init_args.data_loader, self._init_args.eval_fn, hw_config_type=self._hw_cfg_type, params=env_params) nb_state = len(env.state_list) nb_action = 1 # Instantiate Automation Agent agent = DDPG(nb_state, nb_action, self._iter_number, hparam_override=self._ddpg_hparams_override) if self._dump_autoq_data and self.tb_writer is not None: self.tb_writer.add_text('AutoQ/state_embedding', env.master_df[env.state_list].to_markdown()) best_policy, best_reward = self._search(agent, env) end_ts = datetime.now() final_qid_vs_qconfig_map = env.select_config_for_actions(best_policy) final_quantizer_setup = self.quantization_controller.get_quantizer_setup_for_current_state() for qp_id, qconf in final_qid_vs_qconfig_map.items(): final_quantizer_setup.quantization_points[qp_id].qconfig = qconf logger.info('[AutoQ] best_reward: {}'.format(best_reward)) logger.info('[AutoQ] best_policy: {}'.format(best_policy)) logger.info("[AutoQ] Search Complete") logger.info("[AutoQ] Elapsed time of AutoQ Precision Initialization (): {}".format(end_ts-start_ts)) return final_quantizer_setup
def test_create_ddpg_with_invalid_input(num_state, num_action): with pytest.raises((TypeError, ZeroDivisionError, RuntimeError)): DDPG(num_state, num_action, {})
def test_create_ddpg_with_valid_input(num_state, num_action): DDPG(num_state, num_action)