class dqn(): def __init__(self, env, policy): self.N = env.map.get_node_num() self.env = env config_path = join(dirname(policy), "default.json") args, _ = load_args(config_path) # model state_shape = env.observation_space.shape or env.observation_space.n action_shape = env.action_space.shape or env.action_space.n self.net = Net(args.layer_num, state_shape, action_shape, args.device, # dueling=(1, 1) ).to(args.device) state_dict = torch.load(policy) self.net.load_state_dict(state_dict) def next_node(self, obs): out, _ = self.net(np.array([obs])) out = out.detach().cpu()[0] return np.argmax(out)
'.')[0] + '_camera' + '.world' state_dict = {} state_dict_raw = torch.load(model_path) for key in state_dict_raw.keys(): if key.split('.')[0] == 'model': state_dict[key[6:]] = state_dict_raw[key] env = wrapper_dict[wrapper_config['wrapper']](gym.make('jackal_navigation-v0', **env_config), wrapper_config['wrapper_args']) state_shape = env.observation_space.shape or env.observation_space.n action_shape = env.action_space.shape or env.action_space.n model = Net(training_config['layer_num'], state_shape, action_shape, config['device']).to(config['device']) model.load_state_dict(state_dict) range_dict = { 'max_vel_x': [0.1, 2], 'max_vel_theta': [0.314, 3.14], 'vx_samples': [1, 12], 'vtheta_samples': [1, 40], 'path_distance_bias': [0.1, 1.5], 'goal_distance_bias': [0.1, 2] } rs = [] cs = [] pms = np.array(env_config['param_init']) pms = np.expand_dims(pms, -1) succeed = 0