def __init__(self, name, action_space, cfg, shared_objects={}): self.name = name self.pidx = 0 self.actions_no = len(action_space) self.target_update_freq = 400 self.batch_size = batch_size = cfg.general.batch_size self.update_freq = batch_size self.hist_len = 1 self.cuda = cfg.general.use_cuda self.torch_obs = True self.dtype = TorchTypes(self.cuda) self.clip_grad = 0.25 self.hidden_size = 256 self.max_reward = 0 self.net = net = Policy(cfg, self.hidden_size) if self.cuda: net.cuda() self.target = get_model(cfg.model.name)(cfg.model) if self.cuda: self.net.cuda() self.target.cuda() self.exploration_strategy = get_batch_schedule(cfg.agent.exploration, batch_size) self.batch_games = 0 self.step_cnt = 0 self.saved_transitions = [] self._o, self._a, self._v = None, None, None self.live_idx = torch.linspace(0, batch_size-1, batch_size) \ .type(self.dtype.LongTensor) self.lr = .001 self.gamma = .99 self.optimizer = optim.Adam(self.net.parameters(), lr=self.lr) self.optimizer.zero_grad() self.hidden_state = self.net.init_hidden(self.batch_size) self.obs_select = torch.LongTensor( [0, 1, 2, 4, 5, 6, 7, 9, 10, 11, 12, 14, 15, 16, 17]).type(self.dtype.LongTensor)
def __init__(self, name, action_space, cfg, shared_objects={}): super(BetaDQNBatchAgent, self).__init__() self.name = name self.pidx = 0 self.actions_no = len(action_space) self.target_update_freq = cfg.training.target_update self.batch_size = batch_size = cfg.general.batch_size self.update_freq = batch_size self.hist_len = cfg.model.hist_len self.cuda = cfg.general.use_cuda self.torch_obs = True self.dtype = TorchTypes(self.cuda) if "model" in shared_objects: self.net = net = shared_objects["model"] self.print_info("Training some shared model.") else: self.net = net = get_model(cfg.model.name)(cfg.model) if self.cuda: net.cuda() self.target = get_model(cfg.model.name)(cfg.model) if self.cuda: self.net.cuda() self.target.cuda() self.behaviour = DeterministicPolicy(self.net) self.exploration_strategy = get_batch_schedule(cfg.agent.exploration, batch_size) self.ddqn = cfg.agent.ddqn self.algorithm = DQNPolicyImprovement(self.net, self.target, cfg, self.ddqn) self.batch = [] self.history = [] self.batch_games = 0 self.step_cnt = 0 self._o = None self._a = None self.live_idx = torch.linspace(0, batch_size-1, batch_size) \ .type(self.dtype.LongTensor)
def __init__(self, name, action_space, cfg, shared_objects={}): super(Village_ActorCritic, self).__init__() self.name = name self.pidx = 0 self.actions_no = len(action_space) self.target_update_freq = 400 self.batch_size = batch_size = cfg.general.batch_size self.update_freq = batch_size self.hist_len = 1 self.cuda = cfg.general.use_cuda self.torch_obs = True self.dtype = TorchTypes(self.cuda) #TODO move to self.net = net = Policy(cfg) if self.cuda: net.cuda() self.target = get_model(cfg.model.name)(cfg.model) if self.cuda: self.net.cuda() self.target.cuda() self.exploration_strategy = get_batch_schedule(cfg.agent.exploration, batch_size) self.batch_games = 0 self.step_cnt = 0 self.saved_transitions = [] self._o, self._a, self._v = None, None, None self.live_idx = torch.linspace(0, batch_size-1, batch_size) \ .type(self.dtype.LongTensor) self.lr = .001 self.gamma = cfg.agent.gamma self.optimizer = optim.Adam(self.net.parameters(), lr=self.lr) self.optimizer.zero_grad()
def __init__(self, name, action_space, cfg, shared_objects={}): super(EpisodicRDQNAgent, self).__init__() self.name = name self.actions_no = len(action_space) self.batch_size = batch_size = cfg.general.batch_size self.cuda = cfg.general.use_cuda self.dtype = TorchTypes(self.cuda) if "model" in shared_objects: self.net = net = shared_objects["model"] self.print_info("Training some shared model.") else: self.net = net = get_model(cfg.model.name)(cfg.model) if self.cuda: net.cuda() Optimizer = getattr(optim, cfg.training.algorithm) optim_args = vars(cfg.training.algorithm_args) self.optimizer = Optimizer(net.parameters(), **optim_args) self.losses = [] self.exploration_strategy = get_batch_schedule(cfg.agent.exploration, batch_size) self.last_q = None self.last_a = None self.last_aux = None self.live_idx = torch.linspace(0, batch_size-1, batch_size) \ .type(self.dtype.LongTensor) self.prev_state = {} self.crt_step = -1 self.loss_coeff = {k: v for [k,v] in cfg.model.auxiliary_tasks}
import torch from methods import get_batch_schedule partition = [["constant", 0.2, [.5]], ["constant", 0.2, [0.1]], ["linear", 0.6, [1.0, 0.005, 100]]] sch = get_batch_schedule(partition, 20) for x in sch: print(x) print(torch.bernoulli(x).long().nonzero().squeeze(1)) input("Push!")