def prepare_for_trainer(cls, config): # hack to prevent cycle dependencies from catalyst.contrib.registry import Registry config_ = config.copy() actor_state_shape = ( config_["shared"]["history_len"], config_["shared"]["state_size"], ) actor_action_size = config_["shared"]["action_size"] n_step = config_["shared"]["n_step"] gamma = config_["shared"]["gamma"] history_len = config_["shared"]["history_len"] trainer_state_shape = (config_["shared"]["state_size"], ) trainer_action_shape = (config_["shared"]["action_size"], ) actor_fn = config_["actor"].pop("agent", None) actor = Registry.get_agent(agent=actor_fn, state_shape=actor_state_shape, action_size=actor_action_size, **config_["actor"]) critic_fn = config_["critic"].pop("agent", None) critic = Registry.get_agent(agent=critic_fn, state_shape=actor_state_shape, action_size=actor_action_size, **config_["critic"]) n_critics = config_["algorithm"].pop("n_critics", 2) critics = [ Registry.get_agent(agent=critic_fn, state_shape=actor_state_shape, action_size=actor_action_size, **config_["critic"]) for _ in range(n_critics - 1) ] algorithm = cls(**config_["algorithm"], actor=actor, critic=critic, critics=critics, n_step=n_step, gamma=gamma) kwargs = { "algorithm": algorithm, "state_shape": trainer_state_shape, "action_shape": trainer_action_shape, "n_step": n_step, "gamma": gamma, "history_len": history_len } return kwargs
def prepare_for_sampler(cls, config): # hack to prevent cycle dependencies from catalyst.contrib.registry import Registry config_ = config.copy() actor_state_shape = ( config_["shared"]["history_len"], config_["shared"]["state_size"], ) actor_action_size = config_["shared"]["action_size"] actor_fn = config_["actor"].pop("agent", None) actor = Registry.get_agent( agent=actor_fn, state_shape=actor_state_shape, action_size=actor_action_size, **config_["actor"] ) history_len = config_["shared"]["history_len"] kwargs = {"actor": actor, "history_len": history_len} return kwargs