コード例 #1
0
def make_proba_distribution(action_space: gym.spaces.Space,
                            use_sde: bool = False,
                            dist_kwargs: Optional[Dict[str, Any]] = None) -> Distribution:
    """
    Return an instance of Distribution for the correct type of action space

    :param action_space: (gym.spaces.Space) the input action space
    :param use_sde: (bool) Force the use of StateDependentNoiseDistribution
        instead of DiagGaussianDistribution
    :param dist_kwargs: (Optional[Dict[str, Any]]) Keyword arguments to pass to the probability distribution
    :return: (Distribution) the appropriate Distribution object
    """
    if dist_kwargs is None:
        dist_kwargs = {}

    if isinstance(action_space, spaces.Box):
        assert len(action_space.shape) == 1, "Error: the action space must be a vector"
        if use_sde:
            return StateDependentNoiseDistribution(get_action_dim(action_space), **dist_kwargs)
        return DiagGaussianDistribution(get_action_dim(action_space), **dist_kwargs)
    elif isinstance(action_space, spaces.Discrete):
        return CategoricalDistribution(action_space.n, **dist_kwargs)
    # elif isinstance(action_space, spaces.MultiDiscrete):
    #     return MultiCategoricalDistribution(action_space.nvec, **dist_kwargs)
    # elif isinstance(action_space, spaces.MultiBinary):
    #     return BernoulliDistribution(action_space.n, **dist_kwargs)
    else:
        raise NotImplementedError("Error: probability distribution, not implemented for action space"
                                  f"of type {type(action_space)}."
                                  " Must be of type Gym Spaces: Box, Discrete, MultiDiscrete or MultiBinary.")
コード例 #2
0
ファイル: policies.py プロジェクト: wenry55/stable-baselines3
    def __init__(self,
                 observation_space: gym.spaces.Space,
                 action_space: gym.spaces.Space,
                 net_arch: List[int],
                 features_extractor: nn.Module,
                 features_dim: int,
                 activation_fn: Type[nn.Module] = nn.ReLU,
                 normalize_images: bool = True,
                 device: Union[th.device, str] = 'auto'):
        super(Critic, self).__init__(observation_space,
                                     action_space,
                                     features_extractor=features_extractor,
                                     normalize_images=normalize_images,
                                     device=device)

        action_dim = get_action_dim(self.action_space)

        q1_net = create_mlp(features_dim + action_dim, 1, net_arch,
                            activation_fn)
        self.q1_net = nn.Sequential(*q1_net)

        q2_net = create_mlp(features_dim + action_dim, 1, net_arch,
                            activation_fn)
        self.q2_net = nn.Sequential(*q2_net)

        self.q_networks = [self.q1_net, self.q2_net]
コード例 #3
0
    def __init__(
        self,
        observation_space: gym.spaces.Space,
        action_space: gym.spaces.Space,
        net_arch: List[int],
        features_extractor: nn.Module,
        features_dim: int,
        activation_fn: Type[nn.Module] = nn.ReLU,
        normalize_images: bool = True,
    ):
        super(Actor, self).__init__(
            observation_space,
            action_space,
            features_extractor=features_extractor,
            normalize_images=normalize_images,
            squash_output=True,
        )

        # Save arguments to re-create object at loading
        self.net_arch = net_arch
        self.features_dim = features_dim
        self.activation_fn = activation_fn

        self.action_dim = get_action_dim(self.action_space)
        latent_pi_net = create_mlp(features_dim, -1, net_arch, activation_fn)
        self.latent_pi = nn.Sequential(*latent_pi_net)
        last_layer_dim = net_arch[-1] if len(net_arch) > 0 else features_dim

        self.mu = nn.Linear(last_layer_dim, self.action_dim)
コード例 #4
0
    def __init__(
        self,
        observation_space: gym.spaces.Space,
        action_space: gym.spaces.Space,
        net_arch: List[int],
        features_extractor: nn.Module,
        features_dim: int,
        activation_fn: Type[nn.Module] = nn.ReLU,
        normalize_images: bool = True,
        n_quantiles: int = 25,
        n_critics: int = 2,
        share_features_extractor: bool = True,
    ):
        super().__init__(
            observation_space,
            action_space,
            features_extractor=features_extractor,
            normalize_images=normalize_images,
        )

        action_dim = get_action_dim(self.action_space)

        self.share_features_extractor = share_features_extractor
        self.q_networks = []
        self.n_quantiles = n_quantiles
        self.n_critics = n_critics
        self.quantiles_total = n_quantiles * n_critics

        for i in range(n_critics):
            qf_net = create_mlp(features_dim + action_dim, n_quantiles, net_arch, activation_fn)
            qf_net = nn.Sequential(*qf_net)
            self.add_module(f"qf{i}", qf_net)
            self.q_networks.append(qf_net)
コード例 #5
0
    def __init__(
        self,
        observation_space: gym.spaces.Space,
        action_space: gym.spaces.Space,
        net_arch: List[int],
        features_extractor: nn.Module,
        features_dim: int,
        activation_fn: Type[nn.Module] = nn.ReLU,
        normalize_images: bool = True,
        n_critics: int = 2,
        film_critic: bool = False,
        num_env_params: int = 0,
        share_features_extractor: bool = True,
    ):
        super().__init__(
            observation_space,
            action_space,
            features_extractor=features_extractor,
            normalize_images=normalize_images,
        )

        action_dim = get_action_dim(self.action_space)

        self.share_features_extractor = share_features_extractor
        self.n_critics = n_critics
        self.q_networks = []
        self.film = film_critic
        for idx in range(n_critics):
            q_net = create_mlp(features_dim + action_dim, 1, net_arch,
                               activation_fn)
            q_net = nn.Sequential(*q_net)
            self.add_module(f"qf{idx}", q_net)
            self.q_networks.append(q_net)
コード例 #6
0
    def __init__(
        self,
        observation_space: gym.spaces.Space,
        action_space: gym.spaces.Space,
        net_arch: List[int],
        features_extractor: nn.Module,
        features_dim: int,
        activation_fn: Type[nn.Module] = nn.ReLU,
        normalize_images: bool = True,
        device: Union[th.device, str] = "auto",
        n_critics: int = 2,
    ):
        super().__init__(
            observation_space,
            action_space,
            features_extractor=features_extractor,
            normalize_images=normalize_images,
            device=device,
        )

        action_dim = get_action_dim(self.action_space)

        self.n_critics = n_critics
        self.q_networks = []
        for idx in range(n_critics):
            q_net = create_mlp(features_dim + action_dim, 1, net_arch,
                               activation_fn)
            q_net = nn.Sequential(*q_net)
            self.add_module(f"qf{idx}", q_net)
            self.q_networks.append(q_net)
コード例 #7
0
    def __init__(
        self,
        observation_space: gym.spaces.Space,
        action_space: gym.spaces.Space,
        features_extractor: nn.Module,
        features_dim: int,
        latent_dim: int,
        hidden_dim: int,
        net_arch: Optional[List[int]] = None,
        activation_fn: Type[nn.Module] = nn.ReLU,
        normalize_images: bool = True,
    ):
        super(Controller, self).__init__(
            observation_space,
            action_space,
            features_extractor=features_extractor,
            normalize_images=normalize_images,
        )

        if net_arch is None:
            net_arch = [64]

        self.net_arch = net_arch
        self.activation_fn = activation_fn
        self.features_extractor = features_extractor
        self.features_dim = features_dim
        self.latent_dim = latent_dim
        self.hidden_dim = hidden_dim
        self.normalize_images = normalize_images
        action_dim = get_action_dim(self.action_space)
        self.fc = nn.Linear(self.latent_dim + self.hidden_dim, action_dim)
コード例 #8
0
    def __init__(
        self,
        observation_space: gym.spaces.Space,
        action_space: gym.spaces.Space,
        net_arch: List[int],
        features_extractor: nn.Module,
        features_dim: int,
        activation_fn: Type[nn.Module] = nn.ReLU,
        normalize_images: bool = True,
    ):
        super(Actor, self).__init__(
            observation_space,
            action_space,
            features_extractor=features_extractor,
            normalize_images=normalize_images,
            squash_output=True,
        )

        self.features_extractor = features_extractor
        self.normalize_images = normalize_images
        self.net_arch = net_arch
        self.features_dim = features_dim
        self.activation_fn = activation_fn

        action_dim = get_action_dim(self.action_space)
        actor_net = create_mlp(features_dim, action_dim, net_arch, activation_fn, squash_output=True)
        # Deterministic action
        self.mu = nn.Sequential(*actor_net)
コード例 #9
0
 def _initialize(self):
     self.dense1 = tf.keras.layers.Dense(
         400,
         input_shape=(get_flattened_obs_dim(TaskEnv.observation_space) +
                      get_action_dim(TaskEnv.action_space), ),
         activation='relu')
     self.dense2 = tf.keras.layers.Dense(300, activation='relu')
     self.dense3 = tf.keras.layers.Dense(1)
コード例 #10
0
    def __init__(self, observation_space: gym.spaces.Space,
                 action_space: gym.spaces.Space,
                 net_arch: List[int],
                 features_extractor: nn.Module,
                 features_dim: int,
                 activation_fn: Type[nn.Module] = nn.ReLU,
                 use_sde: bool = False,
                 log_std_init: float = -3,
                 full_std: bool = True,
                 sde_net_arch: Optional[List[int]] = None,
                 use_expln: bool = False,
                 clip_mean: float = 2.0,
                 normalize_images: bool = True,
                 device: Union[th.device, str] = 'auto'):
        super(Actor, self).__init__(observation_space, action_space,
                                    features_extractor=features_extractor,
                                    normalize_images=normalize_images,
                                    device=device,
                                    squash_output=True)

        # Save arguments to re-create object at loading
        self.use_sde = use_sde
        self.sde_features_extractor = None
        self.sde_net_arch = sde_net_arch
        self.net_arch = net_arch
        self.features_dim = features_dim
        self.activation_fn = activation_fn
        self.log_std_init = log_std_init
        self.sde_net_arch = sde_net_arch
        self.use_expln = use_expln
        self.full_std = full_std
        self.clip_mean = clip_mean

        action_dim = get_action_dim(self.action_space)
        latent_pi_net = create_mlp(features_dim, -1, net_arch, activation_fn)
        self.latent_pi = nn.Sequential(*latent_pi_net)
        last_layer_dim = net_arch[-1] if len(net_arch) > 0 else features_dim

        if self.use_sde:
            latent_sde_dim = last_layer_dim
            # Separate feature extractor for gSDE
            if sde_net_arch is not None:
                self.sde_features_extractor, latent_sde_dim = create_sde_features_extractor(features_dim, sde_net_arch,
                                                                                            activation_fn)

            self.action_dist = StateDependentNoiseDistribution(action_dim, full_std=full_std, use_expln=use_expln,
                                                               learn_features=True, squash_output=True)
            self.mu, self.log_std = self.action_dist.proba_distribution_net(latent_dim=last_layer_dim,
                                                                            latent_sde_dim=latent_sde_dim,
                                                                            log_std_init=log_std_init)
            # Avoid numerical issues by limiting the mean of the Gaussian
            # to be in [-clip_mean, clip_mean]
            if clip_mean > 0.0:
                self.mu = nn.Sequential(self.mu, nn.Hardtanh(min_val=-clip_mean, max_val=clip_mean))
        else:
            self.action_dist = SquashedDiagGaussianDistribution(action_dim)
            self.mu = nn.Linear(last_layer_dim, action_dim)
            self.log_std = nn.Linear(last_layer_dim, action_dim)
コード例 #11
0
def process(file, case_number, runs, verbose):
    print(f"File: {file}")
    agent = load_agent(file)
    name = f"{file.split('/')[-1].split('.')[0]}"
    print(f"Name: {name}")
    agent.name = name
    env = Monitor(get_env(case_number))
    agent.policy.action_dist = SquashedDiagGaussianDistribution(
        get_action_dim(env.action_space))
    print(f"  --> Testing...")
    test_agent(agent, env, runs, verbose)
コード例 #12
0
def create_agent(env, name, case_number, layers, verbose):
    # return DQN.load("./saved_agents/d1a_DQN_0.zip", env=env)
    agent = A2C(
        policy=MlpPolicy,
        env=env,
        learning_rate=1.0e-3,
        n_steps=env.unwrapped.envs[0].unwrapped._max_steps,
        gamma=1.0,
        gae_lambda=1.0,
        ent_coef=0.0,
        vf_coef=0.5,
        max_grad_norm=0.5,
        rms_prop_eps=1e-5,
        use_rms_prop=True,
        use_sde=False,
        sde_sample_freq=-1,
        normalize_advantage=False,
        tensorboard_log="./tblog",
        create_eval_env=True,
        policy_kwargs=dict(
            net_arch=[dict(vf=layers, pi=layers)],
            activation_fn=th.nn.LeakyReLU,
            ortho_init=True,
            log_std_init=1.0,
            full_std=True,
            sde_net_arch=None,
            use_expln=False,
            squash_output=True,
            features_extractor_class=FlattenExtractor,
            features_extractor_kwargs=dict(),
            normalize_images=False,
            optimizer_class=th.optim.Adam,
            optimizer_kwargs=dict(
                betas=(0.9, 0.999),
                eps=1e-8,
                weight_decay=0,
                amsgrad=False,
            ),
        ),
        verbose=verbose,
        seed=case_number,
        device="cpu",
        _init_setup_model=True,
    )
    agent.name = name
    agent.policy.action_dist = SquashedDiagGaussianDistribution(
        get_action_dim(env.action_space))
    writer = SummaryWriter(log_dir=agent.tensorboard_log + "/" + agent.name +
                           "_1")
    writer.add_graph(agent.policy,
                     th.as_tensor(np.zeros((1, 8))).to(agent.policy.device))
    writer.close()
    return agent
コード例 #13
0
    def __init__(self):
        super().__init__()
        self._initialize()

        # Initialize parameters by calling
        self.call(
            tf.constant(
                np.zeros(
                    shape=(1,
                           get_flattened_obs_dim(TaskEnv.observation_space)))),
            tf.constant(
                np.zeros(shape=(1, get_action_dim(TaskEnv.action_space)))))
        self.loss_function_id = CRITIC_LOSS
コード例 #14
0
ファイル: buffers.py プロジェクト: wenry55/stable-baselines3
 def __init__(self,
              buffer_size: int,
              observation_space: spaces.Space,
              action_space: spaces.Space,
              device: Union[th.device, str] = 'cpu',
              n_envs: int = 1):
     super(BaseBuffer, self).__init__()
     self.buffer_size = buffer_size
     self.observation_space = observation_space
     self.action_space = action_space
     self.obs_shape = get_obs_shape(observation_space)
     self.action_dim = get_action_dim(action_space)
     self.pos = 0
     self.full = False
     self.device = device
     self.n_envs = n_envs
コード例 #15
0
ファイル: policies.py プロジェクト: 00schen/stable-baselines3
    def __init__(self,
                 observation_space: gym.spaces.Space,
                 action_space: gym.spaces.Space,
                 net_arch: List[int],
                 features_extractor: nn.Module,
                 features_dim: int,
                 activation_fn: Type[nn.Module] = nn.ReLU,
                 use_sde: bool = False,
                 log_std_init: float = -3,
                 full_std: bool = True,
                 sde_net_arch: Optional[List[int]] = None,
                 use_expln: bool = False,
                 clip_mean: float = 2.0,
                 normalize_images: bool = True,
                 device: Union[th.device, str] = 'auto'):
        super(Actor, self).__init__(observation_space,
                                    action_space,
                                    features_extractor=features_extractor,
                                    normalize_images=normalize_images,
                                    device=device,
                                    squash_output=True)

        # Save arguments to re-create object at loading
        self.use_sde = use_sde
        self.sde_features_extractor = None
        self.sde_net_arch = sde_net_arch
        self.net_arch = net_arch
        self.features_dim = features_dim
        self.activation_fn = activation_fn
        self.log_std_init = log_std_init
        self.sde_net_arch = sde_net_arch
        self.use_expln = use_expln
        self.full_std = full_std
        self.clip_mean = clip_mean

        action_dim = get_action_dim(self.action_space)
        latent_pi_net = create_mlp(features_dim, -1, net_arch, activation_fn)
        self.latent_pi = nn.Sequential(*latent_pi_net)
        last_layer_dim = net_arch[-1] if len(net_arch) > 0 else features_dim

        self.mu = nn.Linear(last_layer_dim, action_dim)
        self.log_std = nn.Linear(last_layer_dim, action_dim)
コード例 #16
0
 def __init__(
     self,
     observation_space: spaces.Space,
     action_space: spaces.Space,
     device: Union[th.device, str] = "cpu",
     gae_lambda: float = 1,
     gamma: float = 0.99,
     num_trajectories: int = 20 # TODO: put as a parameter
 ):
     self.observation_space = observation_space
     self.action_space = action_space
     self.obs_shape = get_obs_shape(observation_space)
     self.action_dim = get_action_dim(action_space)
     self.full = False
     self.device = device
     self.gae_lambda = gae_lambda
     self.gamma = gamma
     self.num_trajectories = num_trajectories
     self.traj_idx = 0
     self.live_agents : Dict[int, int] = {} # env agent-id -> buffer-unique
     self.trajectories : Dict[int, TrajectoryBufferSamples] = {} # buffer-unique id -> trajectory
     self.num_done_trajectories = 0
コード例 #17
0
    def __init__(
        self,
        observation_space: gym.spaces.Space,
        action_space: gym.spaces.Space,
        net_arch: List[int],
        features_extractor: nn.Module,
        features_dim: int,
        activation_fn: Type[nn.Module] = nn.ReLU,
        normalize_images: bool = True,
        share_features_extractor: bool = True,
        action_dist_num=32,
    ):
        super().__init__(
            observation_space,
            action_space,
            features_extractor=features_extractor,
            normalize_images=normalize_images,
        )
        self.share_features_extractor = share_features_extractor
        self.n_cos = 64
        self.action_dist_num = action_dist_num
        self.first_hidden_size = net_arch[0]

        self.pis = th.FloatTensor([
            np.pi * i for i in range(1, self.n_cos + 1)
        ]).view(1, 1, self.n_cos).to(self.device)

        action_dim = get_action_dim(self.action_space)

        net = create_mlp(features_dim + action_dim, 1, net_arch, activation_fn)
        self.net_head = nn.Sequential(*net[0:2])
        self.net_cos_embedding = nn.Sequential(
            nn.Linear(self.n_cos, self.first_hidden_size), activation_fn())
        self.net_out = nn.Sequential(*net[2:])

        self.add_module("net_head", self.net_head)
        self.add_module("net_cos_embedding", self.net_cos_embedding)
        self.add_module("net_out", self.net_out)
コード例 #18
0
    def __init__(
        self,
        observation_space: gym.spaces.Space,
        action_space: gym.spaces.Space,
        net_arch: List[int],
        features_extractor: nn.Module,
        features_dim: int,
        activation_fn: Type[nn.Module] = nn.ReLU,
        mask_policy: int = 0,
        normalize_images: bool = True,
    ):
        super(Actor, self).__init__(
            observation_space,
            action_space,
            features_extractor=features_extractor,
            normalize_images=normalize_images,
            squash_output=True,
        )

        self.net_arch = net_arch
        self.features_dim = features_dim
        self.activation_fn = activation_fn

        action_dim = get_action_dim(self.action_space)
        actor_net = create_mlp(features_dim,
                               action_dim,
                               net_arch,
                               activation_fn,
                               squash_output=True)
        # Deterministic action
        self.mu = nn.Sequential(*actor_net)

        policy_mask = [True] * features_dim
        for i in range(1, mask_policy + 1):
            policy_mask[-i] = False
        self.policy_mask = th.Tensor(policy_mask)
コード例 #19
0
def process(file):
    env = gym.make('PerigeeRaising-Continuous3D-v0')
    env = NormalizeObservationSpace(env, lambda o: o / env.unwrapped.observation_space.high)
    env = Monitor(env)
    env.seed(42)
    agent = A2C.load(file)
    agent.policy.action_dist = SquashedDiagGaussianDistribution(get_action_dim(env.action_space))
    evaluate_policy(agent, env, n_eval_episodes=1)

    hist_sc_state = env.unwrapped.hist_sc_state
    hist_action = env.unwrapped.hist_action
    time = np.array(list(map(lambda sc_state: sc_state.getDate().durationFrom(hist_sc_state[0].getDate()),
                             hist_sc_state))) / 3600.0  # Convert to hours
    a = np.array(list(map(lambda sc_state: sc_state.getA(), hist_sc_state))) / 1000.0  # Convert to km
    e = np.array(list(map(lambda sc_state: sc_state.getE(), hist_sc_state)))
    mass = np.array(list(map(lambda sc_state: sc_state.getMass(), hist_sc_state)))
    ra = a * (1.0 + e)
    rp = a * (1.0 - e)
    v = np.array(list(map(lambda sc_state: sc_state.getPVCoordinates().getVelocity().toArray(), hist_sc_state)))
    h = np.array(list(map(lambda sc_state: sc_state.getPVCoordinates().getMomentum().toArray(), hist_sc_state)))
    angle_f_v = list(map(lambda q:
                         np.degrees(np.arccos(
                             np.dot(q[0], q[1]) / np.linalg.norm(q[0]) / (np.linalg.norm(q[1]) + 1e-10)
                         )),
                         zip(v, hist_action)))
    hist_action_plane = list(map(lambda q: q[1] - np.dot(q[1], q[0]) * q[0] / (np.linalg.norm(q[0]) ** 2),
                                 zip(h, hist_action)))
    angle_fp_v = list(map(lambda q:
                          np.degrees(np.arccos(
                              np.dot(q[0], q[1] * [1, 1, 0]) / np.linalg.norm(q[0]) / (
                                      np.linalg.norm(q[1] * [1, 1, 0]) + 1e-10)
                          )),
                          zip(v, hist_action_plane)))

    fig, axs = plt.subplots(1, 1, figsize=(4.8, 3.0))
    axs.ticklabel_format(axis='y', style='plain', useOffset=ra[0])
    axs.set_xlim(time[0], time[-1])
    axs.set_ylim(ra[0] - 20.0, ra[0] + 20.0)
    axs.grid(True)
    axs.set_xlabel("time (h)")
    axs.set_ylabel("ra (km)")
    axs.plot(time, ra, "k")
    plt.tight_layout()
    fig.savefig("plan_ra.pdf", format="pdf")
    plt.close(fig)

    fig, axs = plt.subplots(1, 1, figsize=(4.8, 3.0))
    axs.ticklabel_format(axis='y', style='plain', useOffset=rp[0])
    axs.set_xlim(time[0], time[-1])
    axs.set_ylim(rp[0] - 5.0, rp[0] + 35.0)
    axs.grid(True)
    axs.set_xlabel("time (h)")
    axs.set_ylabel("rp (km)")
    axs.plot(time, rp, "k")
    plt.tight_layout()
    fig.savefig("plan_rp.pdf", format="pdf")
    plt.close(fig)

    fig, axs = plt.subplots(1, 1, figsize=(4.8, 3.0))
    axs.ticklabel_format(axis='y', style='plain', useOffset=mass[0])
    axs.set_xlim(time[0], time[-1])
    axs.set_ylim(mass[0] - 0.04, mass[0])
    axs.grid(True)
    axs.set_xlabel("time (h)")
    axs.set_ylabel("mass (kg)")
    axs.plot(time, mass, "k")
    plt.tight_layout()
    fig.savefig("plan_m.pdf", format="pdf")
    plt.close(fig)

    fig, axs = plt.subplots(1, 1, figsize=(4.8, 3.0))
    axs.ticklabel_format(axis='y', style='plain')
    axs.set_xlim(time[0], time[-1])
    axs.set_ylim(-1.3, 1.3)
    axs.grid(True)
    axs.set_xlabel("time (h)")
    axs.set_ylabel("action")
    l1, l2, l3 = axs.plot(time[0:-1], hist_action, "k")
    l1.set_color("#000000")
    l2.set_color("#777777")
    l3.set_color("#BBBBBB")
    axs.legend(["Act1", "Act2", "Act3"], loc='upper left')
    plt.tight_layout()
    fig.savefig("plan_action.pdf", format="pdf")
    plt.close(fig)
コード例 #20
0
def process(file):
    env = gym.make('PerigeeRaising-Continuous3D-v0')
    env.unwrapped._ref_sv[2] = 0.0
    env.unwrapped._ref_sv[3] = 0.0
    env.unwrapped._ref_sv[4] = 0.0
    env = NormalizeObservationSpace(
        env, lambda o: o / env.unwrapped.observation_space.high)
    env = Monitor(env)
    env.seed(42)
    agent = A2C.load(file)
    agent.policy.action_dist = SquashedDiagGaussianDistribution(
        get_action_dim(env.action_space))
    evaluate_policy(agent, env, n_eval_episodes=1)

    hist_sc_state = env.unwrapped.hist_sc_state
    hist_action = env.unwrapped.hist_action
    x = np.array(
        list(
            map(
                lambda sc_state: sc_state.getPVCoordinates().getPosition().
                getX(), hist_sc_state))) / 1000.0  # Convert to km
    y = np.array(
        list(
            map(
                lambda sc_state: sc_state.getPVCoordinates().getPosition().
                getY(), hist_sc_state))) / 1000.0  # Convert to km

    env2 = gym.make('PerigeeRaising-Continuous3D-v0')
    env2.unwrapped._ref_sv[0] = 11000000.0 / 1.05
    env2.unwrapped._ref_sv[1] = 0.05
    env2.unwrapped._ref_sv[2] = 0.0
    env2.unwrapped._ref_sv[3] = 0.0
    env2.unwrapped._ref_sv[4] = 0.0
    env2 = NormalizeObservationSpace(
        env2, lambda o: o / env2.unwrapped.observation_space.high)
    env2 = Monitor(env2)
    env2.seed(42)
    agent = A2C.load(file)
    agent.policy.action_dist = SquashedDiagGaussianDistribution(
        get_action_dim(env.action_space))
    evaluate_policy(agent, env2, n_eval_episodes=1)

    hist_sc_state2 = env2.unwrapped.hist_sc_state
    hist_action2 = env2.unwrapped.hist_action
    x2 = np.array(
        list(
            map(
                lambda sc_state: sc_state.getPVCoordinates().getPosition().
                getX(), hist_sc_state2))) / 1000.0  # Convert to km
    y2 = np.array(
        list(
            map(
                lambda sc_state: sc_state.getPVCoordinates().getPosition().
                getY(), hist_sc_state2))) / 1000.0  # Convert to km

    fig, axs = plt.subplots(1, 1, figsize=(4.8, 3.0))
    axs.set_xlim(-12000, 12000)
    axs.set_ylim(-12000, 12000)
    axs.grid(False)
    axs.plot(x, y, "k", zorder=2)
    l2, = axs.plot(x2, y2, zorder=1)
    l2.set_color("#777777")
    axs.legend(["Before", "After"],
               loc='upper right',
               frameon=False,
               bbox_to_anchor=(0.0, 1.0))
    im = mpimg.imread('earth.png')
    plt.imshow(im, extent=[-6400, 6400, -6400, 6400], interpolation="none")
    axs.set_aspect('equal')
    plt.text(11000, 0, "Pericenter")
    plt.text(-18500, 0, "Apocenter")
    plt.axis('off')
    plt.tight_layout()
    fig.savefig("orbit.pdf", format="pdf")
    plt.close(fig)
コード例 #21
0
def process(file):
    env = gym.make('PerigeeRaising-Continuous3D-v0',
                   use_perturbations=True,
                   perturb_action=True)
    env = NormalizeObservationSpace(
        env, lambda o: o / env.unwrapped.observation_space.high)
    env = Monitor(env)
    env.seed(42)
    agent = A2C.load(file)
    agent.policy.action_dist = SquashedDiagGaussianDistribution(
        get_action_dim(env.action_space))
    evaluate_policy(agent, env, n_eval_episodes=1)

    hist_sc_state = env.unwrapped.hist_sc_state
    hist_action = env.unwrapped.hist_action
    time = np.array(
        list(
            map(
                lambda sc_state: sc_state.getDate().durationFrom(hist_sc_state[
                    0].getDate()),
                hist_sc_state))) / 3600.0  # Convert to hours
    a = np.array(list(map(lambda sc_state: sc_state.getA(),
                          hist_sc_state))) / 1000.0  # Convert to km
    e = np.array(list(map(lambda sc_state: sc_state.getE(), hist_sc_state)))
    mass = np.array(
        list(map(lambda sc_state: sc_state.getMass(), hist_sc_state)))
    ra = a * (1.0 + e)
    rp = a * (1.0 - e)

    env2 = gym.make('PerigeeRaising-Continuous3D-v0')
    env2 = NormalizeObservationSpace(
        env2, lambda o: o / env2.unwrapped.observation_space.high)
    env2 = Monitor(env2)
    env2.seed(42)
    agent = A2C.load(file)
    agent.policy.action_dist = SquashedDiagGaussianDistribution(
        get_action_dim(env.action_space))
    evaluate_policy(agent, env2, n_eval_episodes=1)

    hist_sc_state2 = env2.unwrapped.hist_sc_state
    hist_action2 = env2.unwrapped.hist_action
    time2 = np.array(
        list(
            map(
                lambda sc_state: sc_state.getDate().durationFrom(
                    hist_sc_state2[0].getDate()),
                hist_sc_state2))) / 3600.0  # Convert to hours
    a2 = np.array(list(map(lambda sc_state: sc_state.getA(),
                           hist_sc_state2))) / 1000.0  # Convert to km
    e2 = np.array(list(map(lambda sc_state: sc_state.getE(), hist_sc_state2)))
    mass2 = np.array(
        list(map(lambda sc_state: sc_state.getMass(), hist_sc_state2)))
    ra2 = a2 * (1.0 + e2)
    rp2 = a2 * (1.0 - e2)

    fig, axs = plt.subplots(1, 1, figsize=(4.8, 3.0))
    axs.ticklabel_format(axis='y', style='plain', useOffset=ra[0])
    axs.set_xlim(time[0], time[-1])
    axs.set_ylim(ra[0] - 20.0, ra[0] + 20.0)
    axs.grid(True)
    axs.set_xlabel("time (h)")
    axs.set_ylabel("ra (km)")
    l2, = axs.plot(time2, ra2, "--")
    l2.set_color("#777777")
    axs.plot(time, ra, "k")
    axs.legend(["Planned", "Real"], loc='upper left')
    plt.tight_layout()
    fig.savefig("real_ra.pdf", format="pdf")
    plt.close(fig)

    fig, axs = plt.subplots(1, 1, figsize=(4.8, 3.0))
    axs.ticklabel_format(axis='y', style='plain', useOffset=rp[0])
    axs.set_xlim(time[0], time[-1])
    axs.set_ylim(rp[0] - 5.0, rp[0] + 35.0)
    axs.grid(True)
    axs.set_xlabel("time (h)")
    axs.set_ylabel("rp (km)")
    l2, = axs.plot(time2, rp2, "--")
    l2.set_color("#777777")
    axs.plot(time, rp, "k")
    axs.legend(["Planned", "Real"], loc='upper left')
    plt.tight_layout()
    fig.savefig("real_rp.pdf", format="pdf")
    plt.close(fig)

    fig, axs = plt.subplots(1, 1, figsize=(4.8, 3.0))
    axs.ticklabel_format(axis='y', style='plain', useOffset=mass[0])
    axs.set_xlim(time[0], time[-1])
    axs.set_ylim(mass[0] - 0.04, mass[0])
    axs.grid(True)
    axs.set_xlabel("time (h)")
    axs.set_ylabel("mass (kg)")
    l2, = axs.plot(time2, mass2, "--")
    l2.set_color("#777777")
    axs.plot(time, mass, "k")
    axs.legend(["Planned", "Real"], loc='upper right')
    plt.tight_layout()
    fig.savefig("real_m.pdf", format="pdf")
    plt.close(fig)

    fig, axs = plt.subplots(1, 1, figsize=(4.8, 3.0))
    axs.ticklabel_format(axis='y', style='plain')
    axs.set_xlim(time[0], time[-1])
    axs.set_ylim(-1.3, 1.3)
    axs.grid(True)
    axs.set_xlabel("time (h)")
    axs.set_ylabel("action")
    l1, l2, l3 = axs.plot(time[0:-1], hist_action)
    l1.set_color("#000000")
    l2.set_color("#777777")
    l3.set_color("#BBBBBB")
    axs.legend(["Act1", "Act2", "Act3"], loc='upper left')
    plt.tight_layout()
    fig.savefig("real_action.pdf", format="pdf")
    plt.close(fig)