def dfs_search(target=None): env = Env() dfs = DFS() initial_and_target_state = env.get_current_state() start_time = time.clock() * 1000 success = dfs.dfs(env) print(dfs.action) end_time = time.clock() * 1000 print('time: {} ms'.format(end_time - start_time)) return success, end_time - start_time, initial_and_target_state, dfs.action
def test(self, test_case_count=200, load_dir=None): self.target_net = self.target_net.eval() if load_dir is not None: self.target_net.load_state_dict(torch.load(load_dir)) count = 0 total_length = 0 for _ in tqdm(range(test_case_count)): env = Env() s = env.get_current_state() ep_r = 0 for i in range(4): x = torch.unsqueeze(torch.FloatTensor(s), 0) # input only one sample root_result, leaf_result = self.target_net(x) root_action = torch.argmax(root_result).item() if root_action != 3: leaf_action = torch.argmax(leaf_result[root_action]).item() # step s_, r, done = env.step(root_action, leaf_action) else: find_path_result = leaf_result[3] find_path_source = torch.argmax( find_path_result[:, :int(find_path_result.shape[1] / 2)]).item() find_path_target = torch.argmax( find_path_result[:, int(find_path_result.shape[1] / 2):]).item() # step s_, r, done = env.step( root_action, (find_path_source, find_path_target)) ep_r += r s = s_ if done: if ep_r > 0: total_length += i break if ep_r > 0: count += 1 acc = float(count) / test_case_count if acc > self.max_acc and load_dir is None: torch.save(self.target_net.state_dict(), 'models/dqn.pkl') self.max_acc = acc print("acc is: ", acc) if count > 0: # 因为统计的时候少1,这里补上1 print("length is: ", float(total_length) / count + 1)