Beispiel #1
0
def test_random_action():
    ddpg = DDPG(N_STATE, N_ACTION)
    randomized_action = ddpg.random_action()
    assert randomized_action.shape[0] == N_ACTION
    # check action value within bound
    assert sum((randomized_action >= ddpg.LBOUND)
               & (randomized_action <= ddpg.RBOUND)) == N_ACTION
Beispiel #2
0
def create_two_ddpg_agents():
    source_ddpg = DDPG(N_STATE,
                       N_ACTION,
                       hparam_override={
                           "hidden1": N_HIDDEN_NODE,
                           "hidden2": N_HIDDEN_NODE,
                           "tau": TAU
                       })
    source_ddpg.actor.load_state_dict({
        state: torch.ones_like(param) / 2
        for state, param in source_ddpg.actor.state_dict().items()
    })

    target_ddpg = DDPG(N_STATE,
                       N_ACTION,
                       hparam_override={
                           "hidden1": N_HIDDEN_NODE,
                           "hidden2": N_HIDDEN_NODE,
                           "tau": TAU
                       })
    target_ddpg.actor.load_state_dict({
        state: torch.ones_like(param)
        for state, param in target_ddpg.actor.state_dict().items()
    })
    return source_ddpg, target_ddpg
Beispiel #3
0
def test_observe():
    def check_replay_buffer_size(length):
        assert ddpg.memory.limit == REPLAY_MEMORY_SIZE
        assert len(ddpg.memory.actions) == length
        assert len(ddpg.memory.rewards) == length
        assert len(ddpg.memory.terminals) == length
        assert len(ddpg.memory.observations) == length

    ddpg = DDPG(N_STATE,
                N_ACTION,
                hparam_override={"rmsize": REPLAY_MEMORY_SIZE})
    check_replay_buffer_size(SCALAR_ZERO)

    reward_t = SCALAR_ZERO
    states_t = [SCALAR_ZERO] * N_STATE
    action_t = [SCALAR_ZERO] * N_ACTION
    is_done_t = True

    ddpg.observe(reward_t, states_t, action_t, is_done_t)
    check_replay_buffer_size(SCALAR_ONE)

    # Replay Buffer Appending is disabled in non-training mode
    # size of experience must stay as the same as above
    ddpg.is_training = False
    ddpg.observe(reward_t, states_t, action_t, is_done_t)
    ddpg.observe(reward_t, states_t, action_t, is_done_t)
    check_replay_buffer_size(SCALAR_ONE)
Beispiel #4
0
def test_update_policy(test_vector, mocker, _seed):
    batch_size, discount, is_movingavg, ref_policy_loss, ref_value_loss = test_vector.values(
    )

    mocked_trace = mocker.patch(
        'nncf.automl.agent.ddpg.memory.SequentialMemory.sample_and_split')
    # state_batch, action_batch, reward_batch, next_state_batch, terminal_batch
    mocked_trace.return_value = (
        np.ones((batch_size, N_STATE)),
        np.ones((batch_size, N_ACTION)),
        np.ones((batch_size, SCALAR_ONE)),
        np.ones((batch_size, N_STATE)),
        np.ones((batch_size, SCALAR_ONE)),
    )

    hparams = {
        "hidden1": N_HIDDEN_NODE,
        "hidden2": N_HIDDEN_NODE,
        "bsize": batch_size,
        "discount": discount,
        "window_length": SCALAR_ONE,
    }

    ddpg = DDPG(N_STATE, N_ACTION, hparam_override=hparams)
    ddpg.actor.load_state_dict({
        state: torch.ones_like(param) / 2
        for state, param in ddpg.actor.state_dict().items()
    })
    ddpg.actor_target.load_state_dict({
        state: torch.ones_like(param)
        for state, param in ddpg.actor_target.state_dict().items()
    })

    ddpg.moving_average = is_movingavg
    ddpg.update_policy()
    np.testing.assert_almost_equal(ddpg.policy_loss.item(), ref_policy_loss)
    np.testing.assert_almost_equal(ddpg.value_loss.item(), ref_value_loss)
Beispiel #5
0
def test_select_action(episode_action_pair, decay_epsilon, _seed):
    episode, reference_action = episode_action_pair

    hparams = {
        "hidden1": N_HIDDEN_NODE,
        "hidden2": N_HIDDEN_NODE,
        "init_delta": 0.5,
        "delta_decay": 0.99,
        "warmup_iter_number": 5
    }

    ddpg = DDPG(N_STATE, N_ACTION, hparam_override=hparams)
    ddpg.actor.load_state_dict({
        state: torch.ones_like(param)
        for state, param in ddpg.actor.state_dict().items()
    })

    s_t = [1.0] * N_STATE
    selected_action = ddpg.select_action(s_t, episode, decay_epsilon)

    if decay_epsilon:
        np.testing.assert_allclose(selected_action, reference_action)
    else:
        assert all(selected_action == 1.0)
Beispiel #6
0
    def apply_init(self) -> SingleConfigQuantizerSetup:
        from nncf.automl.environment.quantization_env import QuantizationEnv
        from nncf.automl.agent.ddpg.ddpg import DDPG
        from nncf.debug import DEBUG_LOG_DIR

        if self._dump_autoq_data or is_debug():
            dump_dir = self._init_args.config.get('log_dir', None)
            if dump_dir is None:
                dump_dir = DEBUG_LOG_DIR
            self.dump_dir = Path(dump_dir) / Path("autoq_agent_dump")
            self.dump_dir.mkdir(parents=True, exist_ok=True)

            self.policy_dict = OrderedDict() #key: episode
            self.best_policy_dict = OrderedDict() #key: episode

            self._init_args.config['episodic_nncfcfg'] = self.dump_dir / "episodic_nncfcfg"
            os.makedirs(self._init_args.config['episodic_nncfcfg'], exist_ok=True)

            try:
                from torch.utils.tensorboard import SummaryWriter
                self.tb_writer = SummaryWriter(self.dump_dir)
                # log compression config to tensorboard
                self.tb_writer.add_text('AutoQ/run_config',
                                         json.dumps(self._init_args.config['compression'],
                                         indent=4, sort_keys=False).replace("\n", "\n\n"), 0)
            except ModuleNotFoundError:
                logger.warning("Tensorboard installation not found! Install tensorboard Python package "
                               "in order for AutoQ tensorboard statistics data to be dumped")

        start_ts = datetime.now()

        from nncf.automl.environment.quantization_env import QuantizationEnvParams
        env_params = QuantizationEnvParams(compression_ratio=self._params.compression_ratio,
            eval_subset_ratio=self._params.eval_subset_ratio,
            skip_constraint=self._params.skip_constraint,
            finetune=self._params.finetune,
            bits=self._params.bits,
            dump_init_precision_data=self._dump_autoq_data,
            log_dir=Path(DEBUG_LOG_DIR) / Path("autoq"))

        # Instantiate Quantization Environment
        env = QuantizationEnv(
            self._model,
            self.quantization_controller,
            self._hw_precision_constraints,
            self._init_args.data_loader,
            self._init_args.eval_fn,
            hw_config_type=self._hw_cfg_type,
            params=env_params)

        nb_state = len(env.state_list)
        nb_action = 1

        # Instantiate Automation Agent
        agent = DDPG(nb_state, nb_action, self._iter_number, hparam_override=self._ddpg_hparams_override)

        if self._dump_autoq_data and self.tb_writer is not None:
            self.tb_writer.add_text('AutoQ/state_embedding', env.master_df[env.state_list].to_markdown())

        best_policy, best_reward = self._search(agent, env)

        end_ts = datetime.now()

        final_qid_vs_qconfig_map = env.select_config_for_actions(best_policy)

        final_quantizer_setup = self.quantization_controller.get_quantizer_setup_for_current_state()
        for qp_id, qconf in final_qid_vs_qconfig_map.items():
            final_quantizer_setup.quantization_points[qp_id].qconfig = qconf

        logger.info('[AutoQ] best_reward: {}'.format(best_reward))
        logger.info('[AutoQ] best_policy: {}'.format(best_policy))
        logger.info("[AutoQ] Search Complete")
        logger.info("[AutoQ] Elapsed time of AutoQ Precision Initialization (): {}".format(end_ts-start_ts))
        return final_quantizer_setup
Beispiel #7
0
def test_create_ddpg_with_invalid_input(num_state, num_action):
    with pytest.raises((TypeError, ZeroDivisionError, RuntimeError)):
        DDPG(num_state, num_action, {})
Beispiel #8
0
def test_create_ddpg_with_valid_input(num_state, num_action):
    DDPG(num_state, num_action)