예제 #1
0
    def __init__(
        self,
        agent: Union[ActorSpec, CriticSpec],
        env: EnvironmentSpec,
        db_server: DBSpec = None,
        exploration_handler: ExplorationHandler = None,
        logdir: str = None,
        id: int = 0,
        mode: str = "infer",
        buffer_size: int = int(1e4),
        weights_sync_period: int = 1,
        seeds: List = None,
        episode_limit: int = None,
        force_store: bool = False,
        gc_period: int = 10,
    ):
        self._device = UtilsFactory.prepare_device()
        self._seed = 42 + id
        self._sampler_id = id

        self._infer = mode == "infer"
        self.seeds = seeds

        # logging
        self._prepare_logger(logdir, mode)

        # environment, model, exploration & action handlers
        self.env = env
        self.agent = agent
        self.exploration_handler = exploration_handler
        self.episode_index = 0
        self.episode_runner = EpisodeRunner(
            env=self.env,
            agent=self.agent,
            device=self._device,
            capacity=buffer_size,
            deterministic=self._infer
        )

        # synchronization configuration
        self.db_server = db_server
        self.weights_sync_period = weights_sync_period
        self.episode_limit = episode_limit or _BIG_NUM
        self._force_store = force_store
        self._sampler_weight_mode = \
            "critic" if env.discrete_actions else "actor"
        self._gc_period = gc_period
예제 #2
0
    def __init__(
        self,
        critic: CriticSpec,
        gamma: float,
        n_step: int,
        critic_loss_params: Dict = None,
        critic_optimizer_params: Dict = None,
        critic_scheduler_params: Dict = None,
        critic_grad_clip_params: Dict = None,
        critic_tau: float = 1.0,
        **kwargs
    ):
        self._device = UtilsFactory.prepare_device()
        self.critic = critic.to(self._device)
        self.target_critic = copy.deepcopy(critic).to(self._device)

        # preparation
        agent_stuff = get_agent_stuff_from_params(
            agent=self.critic,
            loss_params=critic_loss_params,
            optimizer_params=critic_optimizer_params,
            scheduler_params=critic_scheduler_params,
            grad_clip_params=critic_grad_clip_params
        )
        # criterion
        self._critic_loss_params = agent_stuff["loss_params"]
        self.critic_criterion = agent_stuff["criterion"]
        # optimizer
        self._critic_optimizer_params = agent_stuff["optimizer_params"]
        self.critic_optimizer = agent_stuff["optimizer"]
        # scheduler
        self._critic_scheduler_params = agent_stuff["scheduler_params"]
        self.critic_scheduler = agent_stuff["scheduler"]
        # grad clipping
        self._critic_grad_clip_params = agent_stuff["grad_clip_params"]
        self.critic_grad_clip_fn = agent_stuff["grad_clip_fn"]

        # other hyperparameters
        self._n_step = n_step
        self._gamma = gamma
        self.critic_tau = critic_tau

        # other init
        self._init(**kwargs)
예제 #3
0
    def __init__(self,
                 actor: ActorSpec,
                 gamma: float,
                 n_step: int,
                 actor_loss_params: Dict = None,
                 actor_optimizer_params: Dict = None,
                 actor_scheduler_params: Dict = None,
                 actor_grad_clip_params: Dict = None,
                 **kwargs):
        self._device = UtilsFactory.prepare_device()

        self.actor = actor.to(self._device)

        # actor preparation
        actor_stuff = get_agent_stuff_from_params(
            agent=self.actor,
            loss_params=actor_loss_params,
            optimizer_params=actor_optimizer_params,
            scheduler_params=actor_scheduler_params,
            grad_clip_params=actor_grad_clip_params)
        # criterion
        self._actor_loss_params = actor_stuff["loss_params"]
        self.actor_criterion = actor_stuff["criterion"]
        # optimizer
        self._actor_optimizer_params = actor_stuff["optimizer_params"]
        self.actor_optimizer = actor_stuff["optimizer"]
        # scheduler
        self._actor_scheduler_params = actor_stuff["scheduler_params"]
        self.actor_scheduler = actor_stuff["scheduler"]
        # grad clipping
        self._actor_grad_clip_params = actor_stuff["grad_clip_params"]
        self.actor_grad_clip_fn = actor_stuff["grad_clip_fn"]

        # other hyperparameters
        self._n_step = n_step
        self._gamma = gamma

        # other init
        self._init(**kwargs)
예제 #4
0
    def __init__(self,
                 actor,
                 env,
                 id,
                 logdir=None,
                 redis_server=None,
                 redis_prefix=None,
                 buffer_size=int(1e4),
                 history_len=1,
                 weights_sync_period=1,
                 mode="infer",
                 resume=None,
                 action_noise_prob=0,
                 action_noise_t=1,
                 random_process=None,
                 param_noise_prob=0,
                 param_noise_d=0.2,
                 param_noise_steps=1000,
                 seeds=None,
                 action_clip=(-1, 1),
                 episode_limit=None,
                 force_store=False,
                 min_episode_steps=None,
                 min_episode_reward=None):

        self._seed = 42 + id
        set_global_seeds(self._seed)

        self._sampler_id = id
        self._device = UtilsFactory.prepare_device()
        self.actor = copy.deepcopy(actor).to(self._device)
        self.env = env
        self.redis_server = redis_server
        self.redis_prefix = redis_prefix or ""
        self.resume = resume
        self.episode_limit = episode_limit or int(2**32 - 2)
        self.force_store = force_store
        self.min_episode_steps = min_episode_steps
        self.min_episode_reward = min_episode_reward
        self.hard_seeds = set()
        min_episode_flag_ = \
            min_episode_steps is None and min_episode_reward is None
        assert min_episode_flag_ or seeds is None

        self.min_episode_steps = self.min_episode_steps or -int(1e6)
        self.min_episode_reward = self.min_episode_reward or -int(1e6)

        self.history_len = history_len
        self.buffer_size = buffer_size
        self.weights_sync_period = weights_sync_period
        self.episode_index = 0
        self.action_clip = action_clip

        self.infer = mode == "infer"
        self.seeds = seeds

        self.action_noise_prob = action_noise_prob
        self.action_noise_t = action_noise_t
        self.random_process = random_process or RandomProcess()

        self.param_noise_prob = param_noise_prob
        self.param_noise_d = param_noise_d
        self.param_noise_steps = param_noise_steps

        if self.infer:
            self.action_noise_prob = 0
            self.param_noise_prob = 0

        if logdir is not None:
            current_date = datetime.now().strftime("%y-%m-%d-%H-%M-%S-%M-%f")
            logpath = f"{logdir}/sampler-{mode}-{id}-{current_date}"
            os.makedirs(logpath, exist_ok=True)
            self.logger = SummaryWriter(logpath)
        else:
            self.logger = None

        self.buffer = SamplerBuffer(
            capacity=self.buffer_size,
            observation_shape=self.env.observation_shape,
            action_shape=self.env.action_shape)
예제 #5
0
    def __init__(
        self,
        actor: ActorSpec,
        critic: CriticSpec,
        gamma: float,
        n_step: int,
        actor_loss_params: Dict = None,
        critic_loss_params: Dict = None,
        actor_optimizer_params: Dict = None,
        critic_optimizer_params: Dict = None,
        actor_scheduler_params: Dict = None,
        critic_scheduler_params: Dict = None,
        actor_grad_clip_params: Dict = None,
        critic_grad_clip_params: Dict = None,
        actor_tau=1.0,
        critic_tau=1.0,
        action_boundaries=None,
        **kwargs
    ):
        self._device = UtilsFactory.prepare_device()

        self.actor = actor.to(self._device)
        self.critic = critic.to(self._device)

        self.target_actor = copy.deepcopy(actor).to(self._device)
        self.target_critic = copy.deepcopy(critic).to(self._device)

        # actor preparation
        actor_stuff = get_agent_stuff_from_params(
            agent=self.actor,
            loss_params=actor_loss_params,
            optimizer_params=actor_optimizer_params,
            scheduler_params=actor_scheduler_params,
            grad_clip_params=actor_grad_clip_params
        )
        # criterion
        self._actor_loss_params = actor_stuff["loss_params"]
        self.actor_criterion = actor_stuff["criterion"]
        # optimizer
        self._actor_optimizer_params = actor_stuff["optimizer_params"]
        self.actor_optimizer = actor_stuff["optimizer"]
        # scheduler
        self._actor_scheduler_params = actor_stuff["scheduler_params"]
        self.actor_scheduler = actor_stuff["scheduler"]
        # grad clipping
        self._actor_grad_clip_params = actor_stuff["grad_clip_params"]
        self.actor_grad_clip_fn = actor_stuff["grad_clip_fn"]

        # critic preparation
        critic_stuff = get_agent_stuff_from_params(
            agent=self.critic,
            loss_params=critic_loss_params,
            optimizer_params=critic_optimizer_params,
            scheduler_params=critic_scheduler_params,
            grad_clip_params=critic_grad_clip_params
        )
        # criterion
        self._critic_loss_params = critic_stuff["loss_params"]
        self.critic_criterion = critic_stuff["criterion"]
        # optimizer
        self._critic_optimizer_params = critic_stuff["optimizer_params"]
        self.critic_optimizer = critic_stuff["optimizer"]
        # scheduler
        self._critic_scheduler_params = critic_stuff["scheduler_params"]
        self.critic_scheduler = critic_stuff["scheduler"]
        # grad clipping
        self._critic_grad_clip_params = critic_stuff["grad_clip_params"]
        self.critic_grad_clip_fn = critic_stuff["grad_clip_fn"]

        # other hyperparameters
        self._n_step = n_step
        self._gamma = gamma
        self._actor_tau = actor_tau
        self._critic_tau = critic_tau

        if action_boundaries is not None:
            assert len(action_boundaries) == 2, \
                "Should be min and max action boundaries"
            self._action_boundaries = action_boundaries

        # other init
        self._init(**kwargs)
예제 #6
0
    def __init__(self,
                 actor: ActorSpec,
                 critic: CriticSpec,
                 gamma: float,
                 n_step: int,
                 actor_loss_params: Dict = None,
                 critic_loss_params: Dict = None,
                 actor_optimizer_params: Dict = None,
                 critic_optimizer_params: Dict = None,
                 actor_scheduler_params: Dict = None,
                 critic_scheduler_params: Dict = None,
                 actor_grad_clip_params: Dict = None,
                 critic_grad_clip_params: Dict = None,
                 **kwargs):
        self._device = UtilsFactory.prepare_device()

        self.actor = actor.to(self._device)
        self.critic = critic.to(self._device)

        # actor preparation
        actor_stuff = get_agent_stuff_from_params(
            agent=self.actor,
            loss_params=actor_loss_params,
            optimizer_params=actor_optimizer_params,
            scheduler_params=actor_scheduler_params,
            grad_clip_params=actor_grad_clip_params)
        # criterion
        self._actor_loss_params = actor_stuff["loss_params"]
        self.actor_criterion = actor_stuff["criterion"]
        # optimizer
        self._actor_optimizer_params = actor_stuff["optimizer_params"]
        self.actor_optimizer = actor_stuff["optimizer"]
        # scheduler
        self._actor_scheduler_params = actor_stuff["scheduler_params"]
        self.actor_scheduler = actor_stuff["scheduler"]
        # grad clipping
        self._actor_grad_clip_params = actor_stuff["grad_clip_params"]
        self.actor_grad_clip_fn = actor_stuff["grad_clip_fn"]

        # critic preparation
        critic_stuff = get_agent_stuff_from_params(
            agent=self.critic,
            loss_params=critic_loss_params,
            optimizer_params=critic_optimizer_params,
            scheduler_params=critic_scheduler_params,
            grad_clip_params=critic_grad_clip_params)
        # criterion
        self._critic_loss_params = critic_stuff["loss_params"]
        self.critic_criterion = critic_stuff["criterion"]
        # optimizer
        self._critic_optimizer_params = critic_stuff["optimizer_params"]
        self.critic_optimizer = critic_stuff["optimizer"]
        # scheduler
        self._critic_scheduler_params = critic_stuff["scheduler_params"]
        self.critic_scheduler = critic_stuff["scheduler"]
        # grad clipping
        self._critic_grad_clip_params = critic_stuff["grad_clip_params"]
        self.critic_grad_clip_fn = critic_stuff["grad_clip_fn"]

        # other hyperparameters
        assert n_step == 1, "For now, on-policy setup works only with n-step=1"
        self._n_step = n_step
        self._gamma = gamma

        # other init
        self._init(**kwargs)
예제 #7
0
    def __init__(
        self,
        actor,
        critic,
        gamma,
        n_step,
        actor_optimizer_params,
        critic_optimizer_params,
        actor_grad_clip_params=None,
        critic_grad_clip_params=None,
        actor_loss_params=None,
        critic_loss_params=None,
        actor_scheduler_params=None,
        critic_scheduler_params=None,
        resume=None,
        load_optimizer=True,
        actor_tau=1.0,
        critic_tau=1.0,
        min_action=-1.0,
        max_action=1.0,
        **kwargs
    ):
        self._device = UtilsFactory.prepare_device()

        self.actor = actor.to(self._device)
        self.critic = critic.to(self._device)

        self.target_actor = copy.deepcopy(actor).to(self._device)
        self.target_critic = copy.deepcopy(critic).to(self._device)

        self.actor_optimizer = OPTIMIZERS.get_from_params(
            **actor_optimizer_params,
            params=prepare_optimizable_params(self.actor)
        )
        self.critic_optimizer = OPTIMIZERS.get_from_params(
            **critic_optimizer_params,
            params=prepare_optimizable_params(self.critic)
        )
        self.actor_optimizer_params = actor_optimizer_params
        self.critic_optimizer_params = critic_optimizer_params

        actor_scheduler_params = actor_scheduler_params or {}
        critic_scheduler_params = critic_scheduler_params or {}

        self.actor_scheduler = SCHEDULERS.get_from_params(
            **actor_scheduler_params,
            optimizer=self.actor_optimizer
        )
        self.critic_scheduler = SCHEDULERS.get_from_params(
            **critic_scheduler_params,
            optimizer=self.critic_optimizer
        )

        self.actor_scheduler_params = actor_scheduler_params
        self.critic_scheduler_params = critic_scheduler_params

        self.n_step = n_step
        self.gamma = gamma

        actor_grad_clip_params = actor_grad_clip_params or {}
        critic_grad_clip_params = critic_grad_clip_params or {}

        self.actor_grad_clip_fn = \
            GRAD_CLIPPERS.get_from_params(**actor_grad_clip_params)
        self.critic_grad_clip_fn = \
            GRAD_CLIPPERS.get_from_params(**critic_grad_clip_params)

        self.actor_grad_clip_params = actor_grad_clip_params
        self.critic_grad_clip_params = critic_grad_clip_params

        self.actor_criterion = CRITERIONS.get_from_params(
            **(actor_loss_params or {})
        )
        self.critic_criterion = CRITERIONS.get_from_params(
            **(critic_loss_params or {})
        )

        self.actor_loss_params = actor_loss_params
        self.critic_loss_params = critic_loss_params

        self.actor_tau = actor_tau
        self.critic_tau = critic_tau

        self.min_action = min_action
        self.max_action = max_action

        self._init(**kwargs)

        if resume is not None:
            self.load_checkpoint(resume, load_optimizer=load_optimizer)
예제 #8
0
    def __init__(self,
                 actor,
                 critic,
                 gamma,
                 n_step,
                 actor_optimizer_params,
                 critic_optimizer_params,
                 actor_grad_clip_params=None,
                 critic_grad_clip_params=None,
                 actor_loss_params=None,
                 critic_loss_params=None,
                 actor_scheduler_params=None,
                 critic_scheduler_params=None,
                 resume=None,
                 load_optimizer=True,
                 actor_tau=1.0,
                 critic_tau=1.0,
                 min_action=-1.0,
                 max_action=1.0,
                 **kwargs):
        # hack to prevent cycle dependencies
        from catalyst.contrib.registry import Registry

        self._device = UtilsFactory.prepare_device()

        self.actor = actor.to(self._device)
        self.critic = critic.to(self._device)

        self.target_actor = copy.deepcopy(actor).to(self._device)
        self.target_critic = copy.deepcopy(critic).to(self._device)

        self.actor_optimizer = Registry.get_optimizer(self.actor,
                                                      **actor_optimizer_params)
        self.critic_optimizer = Registry.get_optimizer(
            self.critic, **critic_optimizer_params)

        self.actor_optimizer_params = actor_optimizer_params
        self.critic_optimizer_params = critic_optimizer_params

        actor_scheduler_params = actor_scheduler_params or {}
        critic_scheduler_params = critic_scheduler_params or {}

        self.actor_scheduler = Registry.get_scheduler(self.actor_optimizer,
                                                      **actor_scheduler_params)
        self.critic_scheduler = Registry.get_scheduler(
            self.critic_optimizer, **critic_scheduler_params)

        self.actor_scheduler_params = actor_scheduler_params
        self.critic_scheduler_params = critic_scheduler_params

        self.n_step = n_step
        self.gamma = gamma

        actor_grad_clip_params = actor_grad_clip_params or {}
        critic_grad_clip_params = critic_grad_clip_params or {}

        self.actor_grad_clip_fn = Registry.get_grad_clip_fn(
            **actor_grad_clip_params)
        self.critic_grad_clip_fn = Registry.get_grad_clip_fn(
            **critic_grad_clip_params)

        self.actor_grad_clip_params = actor_grad_clip_params
        self.critic_grad_clip_params = critic_grad_clip_params

        self.actor_criterion = Registry.get_criterion(
            **(actor_loss_params or {}))
        self.critic_criterion = Registry.get_criterion(
            **(critic_loss_params or {}))

        self.actor_loss_params = actor_loss_params
        self.critic_loss_params = critic_loss_params

        self.actor_tau = actor_tau
        self.critic_tau = critic_tau

        self.min_action = min_action
        self.max_action = max_action

        self._init(**kwargs)

        if resume is not None:
            self.load_checkpoint(resume, load_optimizer=load_optimizer)