Example #1
0
    def __init__(self):
        super(PredictActionsCartpoleEnv, self).__init__()
        self.cartpole = CartPoleEnv()

        self.observation_space = self.cartpole.observation_space
        self.action_space = spaces.Tuple(
            (self.cartpole.action_space, ) * (NUM_PREDICTED_ACTIONS + 1))
    def __init__(self):
        super(PredictObsCartpoleEnv, self).__init__()
        self.cartpole = CartPoleEnv()

        self.observation_space = self.cartpole.observation_space
        self.action_space = spaces.Tuple((self.cartpole.action_space, ) +
                                         (self.cartpole.observation_space, ) *
                                         (NUM_PREDICTED_OBSERVATIONS))
Example #3
0
def cartpole_env(env_id=1, **kwargs):
    return GymEnvWrapper(CartPoleEnv(**kwargs), act_null_value=0)
Example #4
0
import gym
import numpy as np
import math
from gym.envs.classic_control.cartpole import CartPoleEnv

from envs.task import Task

_env = CartPoleEnv()
_X_THRESHOLD = _env.x_threshold
_THETA_THRESHOLD = _env.theta_threshold_radians
del _env


class CartPoleBalanceTask(Task):
    def __call__(self, states, actions, next_states):
        next_dones = GYMMB_ContinuousCartPole.is_done(next_states)
        dones = GYMMB_ContinuousCartPole.is_done(states)
        rewards = 1.0 - next_dones.float() * dones.float()  # basically, you get always 1.0 unless you exceed the is_done termination criteria
        return rewards


class CartPoleSpeedyBalanceTask(Task):
    def __call__(self, states, actions, next_states):
        rewards = states[:, 1].abs()  # more rewards for abs velocity. No free points
        return rewards


class GYMMB_ContinuousCartPole(CartPoleEnv):
    """
    A continuous version.
    Observation space:
Example #5
0
        logging.debug("actions: %s\n%s", action_tensor.shape, action_tensor)
        logging.debug("log_prob: %s\n%s", log_prob_tensor.shape,
                      log_prob_tensor)
        logging.debug("log_prob test: %s\n%s", prob_tensor.shape,
                      torch.log(prob_tensor))
        logging.debug("rewards: %s\n%s", rewards_tensor.shape, rewards_tensor)
        logging.debug("loss: %s\n%s", loss.shape, loss)


if __name__ == "__main__":

    #Create environment. Note that make actually wraps the actual environment.
    #The wrapper will end an episode based on a time limit
    # env = gym.make("CartPole-v0") #state = [x,x_dot,theta,theta_dot], actions = [left_or_right] 0 = left 1 = right

    env = CartPoleEnv()  #instanciate the env directly, without the wrapper

    #get the state and action size from the environement
    state_size = env.observation_space.shape[0]
    action_size = env.action_space.n

    #instanciate agent
    agent = PolicyGradientAgent(state_size, action_size)

    done = True
    last_render_time = time.time()
    episode_count = -1
    #forever
    while True:

        #if episode is done