def test_init_memory(self): model = Sequential() when(model).compile(loss='mean_squared_error', optimizer='Adam').thenReturn() environment = mock( { 'observation_space': Discrete(8), 'action_space': Discrete(3) }, FL) when(environment).reset().thenReturn(0) #when(environment).step(...).thenReturn((1, 10, True)) expect(environment, times=2).step(...).thenReturn((1, 10, True)) configuration = mock( { 'model': model, 'memory_size': 2, 'nb_iterations': 0, 'training_params': [], 'plot_training': False }, Configuration) test_Agent = DQN(environment, configuration) verify(environment, times=2).step(...)
def __init__( self, state_size, n_actions, args, device=torch.device("cuda" if torch.cuda.is_available() else "cpu")): super().__init__(state_size, n_actions, args, device=device) self.target_net = DQN(state_size, n_actions, layers=self.layers).to(self.device) self.target_net.load_state_dict(self.policy_net.state_dict()) self.target_net.eval()
def __init__( self, state_size, n_actions, args, device=torch.device("cuda" if torch.cuda.is_available() else "cpu")): self.device = device # Exploration / Exploitation params. self.steps_done = 0 self.eps_threshold = 1 self.eps_start = args.eps_start self.eps_end = args.eps_end self.eps_decay = args.eps_decay # RL params self.target_update = args.target_update self.discount = args.discount # Env params self.n_actions = n_actions self.state_size = state_size # Deep q networks params self.layers = args.layers self.batch_size = args.batch_size self.policy_net = DQN(state_size, n_actions, layers=self.layers).to(self.device).float() self.target_net = None self.grad_clip = args.grad_clip if str(args.optimizer).lower() == 'adam': self.optimizer = optim.Adam(self.policy_net.parameters()) if str(args.optimizer).lower() == 'rmsprop': self.optimizer = optim.RMSprop(self.policy_net.parameters()) else: raise NotImplementedError self.memory = ReplayMemory(args.replay_size) # Performance buffers. self.rewards_list = []