Beispiel #1
0
 def __init__(self,
              capacity=500000,
              warmup_steps=50000,
              n_frames=4,
              n_steps=3,
              n_atoms=21,
              v_min=-1,
              v_max=0,
              alpha=.6,
              beta=.4,
              gamma=.99,
              hidden_size=512,
              device='cuda',
              batch_size=48,
              lr=0.0000625 * 2,
              lr_decay=0.985,
              beta_converged=4000000,
              update_target_net_every=16000,
              train_every=4,
              frame_skip=4):
     self.memory_buffer = MemoryBuffer(
         capacity,
         n_frames,
         n_steps,
         SuperHexagonInterface.frame_size,
         SuperHexagonInterface.frame_size_cropped,
         alpha,
         beta,
         gamma,
         device=device)
     self.net = Network(n_frames, SuperHexagonInterface.n_actions, n_atoms,
                        hidden_size).to(device)
     self.target_net = Network(n_frames, SuperHexagonInterface.n_actions,
                               n_atoms, hidden_size).to(device)
     self.target_net.load_state_dict(self.net.state_dict())
     self.optimizer = torch.optim.Adam(self.net.parameters(),
                                       lr=lr,
                                       eps=1.5e-4)
     self.lr_scheduler = torch.optim.lr_scheduler.LambdaLR(
         self.optimizer, ExpLrDecay(lr_decay, min_factor=.1))
     self.batch_size = batch_size
     self.beta_converged = beta_converged
     self.update_target_net_every = update_target_net_every
     self.train_every = train_every
     self.frame_skip = frame_skip
     self.warmup_steps = warmup_steps
     self.n_steps = n_steps
     self.beta = beta
     self.gamma = gamma
     self.n_atoms = n_atoms
     self.v_min = v_min
     self.v_max = v_max
     self.delta_z = (v_max - v_min) / (n_atoms - 1)
     self.support = torch.linspace(v_min,
                                   v_max,
                                   n_atoms,
                                   dtype=torch.float,
                                   device=device)
     self.iteration = 0
     self.list_steps_alive = []
     self.losses = []
     self.kls = []
     self.times = []
     self._offset = torch.arange(0,
                                 batch_size * n_atoms,
                                 n_atoms,
                                 device=device).view(-1, 1)
     self._m = torch.empty((batch_size, n_atoms), device=device)
     self._longest_run = 0