def __init__(self, capacity=500000, warmup_steps=50000, n_frames=4, n_steps=3, n_atoms=21, v_min=-1, v_max=0, alpha=.6, beta=.4, gamma=.99, hidden_size=512, device='cuda', batch_size=48, lr=0.0000625 * 2, lr_decay=0.985, beta_converged=4000000, update_target_net_every=16000, train_every=4, frame_skip=4): self.memory_buffer = MemoryBuffer( capacity, n_frames, n_steps, SuperHexagonInterface.frame_size, SuperHexagonInterface.frame_size_cropped, alpha, beta, gamma, device=device) self.net = Network(n_frames, SuperHexagonInterface.n_actions, n_atoms, hidden_size).to(device) self.target_net = Network(n_frames, SuperHexagonInterface.n_actions, n_atoms, hidden_size).to(device) self.target_net.load_state_dict(self.net.state_dict()) self.optimizer = torch.optim.Adam(self.net.parameters(), lr=lr, eps=1.5e-4) self.lr_scheduler = torch.optim.lr_scheduler.LambdaLR( self.optimizer, ExpLrDecay(lr_decay, min_factor=.1)) self.batch_size = batch_size self.beta_converged = beta_converged self.update_target_net_every = update_target_net_every self.train_every = train_every self.frame_skip = frame_skip self.warmup_steps = warmup_steps self.n_steps = n_steps self.beta = beta self.gamma = gamma self.n_atoms = n_atoms self.v_min = v_min self.v_max = v_max self.delta_z = (v_max - v_min) / (n_atoms - 1) self.support = torch.linspace(v_min, v_max, n_atoms, dtype=torch.float, device=device) self.iteration = 0 self.list_steps_alive = [] self.losses = [] self.kls = [] self.times = [] self._offset = torch.arange(0, batch_size * n_atoms, n_atoms, device=device).view(-1, 1) self._m = torch.empty((batch_size, n_atoms), device=device) self._longest_run = 0