Ejemplo n.º 1
0
 def custom_env(env_config):
     ''' Create an env stub for policy training.  Only the 
         action_space and observation_space attributes need to be defined,
         no other env functions are needed (e.g. step(), reset(), etc). '''
     env = gym.Env()
     env.action_space = agent_action_space
     env.observation_space = agent_obs_space
     return env
 def setUp(self) -> None:
     self.obs_idx = 0
     self.dummy_env = gym.Env()
     self.dummy_env.observation_space = gym.spaces.Box(low=0,
                                                       high=255,
                                                       shape=(84, 84, 3),
                                                       dtype=np.uint8)
     self.obs_buffer = ObservationBufferWrapper(self.dummy_env,
                                                obs_buffer_depth=3)
     self.visualize_buffer()
Ejemplo n.º 3
0
    def test_load_cell_raises_exception_on_non_loadable_cell(self):
        save_function = mock.MagicMock()
        load_function = mock.MagicMock()

        core_env = gym.Env()
        env = save_loadable.SaveLoadableWrapper(core_env, save_function,
                                                load_function)

        test_cell = base_returning_info.BaseReturningInfo()
        policy = load_policy.LoadPolicy(env)
        with self.assertRaisesRegexp(
                TypeError, "LoadableCellInfo.*got.*BaseReturningInfo"):
            policy.return_to_cell(test_cell)
Ejemplo n.º 4
0
def test(rank, params, shared_model):
    torch.manual_seed(params.seed + rank)  # asynchronizing the test agent
    env = gym.Env(params.env_name,
                  video=True)  # running an environment with a video
    env.seed(params.seed + rank)  # asynchronizing the environment
    model = AC_netwok(env.observation_space.shape[0],
                      env.action_space)  # creating one model
    model.eval(
    )  # putting the model in "eval" model because it won't be trained
    state = env.reset()  # getting the input images as numpy arrays
    state = torch.from_numpy(state)  # converting them into torch tensors
    reward_sum = 0  # initializing the sum of rewards to 0
    done = True  # initializing done to True
    start_time = time.time(
    )  # getting the starting time to measure the computation time
    actions = deque(
        maxlen=100)  # cf https://pymotw.com/2/collections/deque.html
    episode_length = 0  # initializing the episode length to 0
    while True:  # repeat
        episode_length += 1  # incrementing the episode length by one
        if done:  # synchronizing with the shared model (same as train.py)
            model.load_state_dict(shared_model.state_dict())
            cx = Variable(torch.zeros(1, 256), volatile=True)
            hx = Variable(torch.zeros(1, 256), volatile=True)
        else:
            cx = Variable(cx.data, volatile=True)
            hx = Variable(hx.data, volatile=True)
        value, action_value, (hx, cx) = model(
            (Variable(state.unsqueeze(0), volatile=True), (hx, cx)))
        prob = F.softmax(action_value)
        action = prob.max(1)[1].data.numpy(
        )  # the test agent does not explore, it directly plays the best action
        state, reward, done, _ = env.step(action[
            0,
            0])  # done = done or episode_length >= params.max_episode_length
        reward_sum += reward
        if done:  # printing the results at the end of each part
            print("Time {}, episode reward {}, episode length {}".format(
                time.strftime("%Hh %Mm %Ss",
                              time.gmtime(time.time() - start_time)),
                reward_sum, episode_length))
            reward_sum = 0  # reinitializing the sum of rewards
            episode_length = 0  # reinitializing the episode length
            actions.clear()  # reinitializing the actions
            state = env.reset()  # reinitializing the environment
            time.sleep(
                60
            )  # doing a one minute break to let the other agents practice (if the game is done)
        state = torch.from_numpy(state)  # new state and we continue
Ejemplo n.º 5
0
    def test_gym_loads_cell_using_saved_snapshot(self):
        save_function = mock.MagicMock()
        load_function = mock.MagicMock()

        core_env = gym.Env()
        env = save_loadable.SaveLoadableWrapper(core_env, save_function,
                                                load_function)

        cell = loadable_cell_info.LoadableCellInfo(
            snapshot_data="snapshot_data")

        policy = load_policy.LoadPolicy(env)
        policy.return_to_cell(cell)

        load_function.assert_called_once_with(core_env, "snapshot_data")
Ejemplo n.º 6
0
import gym
from gym import spaces
from gym.utils import seeding
import numpy as np
import time
import subprocess
# See user manual https://usermanual.wiki/Document/pybullet20quickstart20guide.479068914.pdf
import pybullet as p
import pybullet_data
from pkg_resources import parse_version

from servorobots.components.dc_motor import GearedDcMotor
from servorobots.components.dc_motor import TimestampInput

logger = logging.getLogger(__name__)
gym.Env()


class qt:
    def q_mult(q1, q2):
        x1, y1, z1, w1 = q1
        x2, y2, z2, w2 = q2
        w = w1 * w2 - x1 * x2 - y1 * y2 - z1 * z2
        x = w1 * x2 + x1 * w2 + y1 * z2 - z1 * y2
        y = w1 * y2 + y1 * w2 + z1 * x2 - x1 * z2
        z = w1 * z2 + z1 * w2 + x1 * y2 - y1 * x2
        return x, y, z, w

    def q_conjugate(q):
        x, y, z, w = q
        return (-x, -y, -z, w)
Ejemplo n.º 7
0
if __name__ == "__main__":
    action_space = gym.spaces.Dict({
        "attack":
        gym.spaces.Discrete(2),
        "back":
        gym.spaces.Discrete(2),
        "forward":
        gym.spaces.Discrete(2),
        "camera":
        gym.spaces.Box(low=-180, high=180, shape=(2, ), dtype=np.float32)
    })
    action_space = translate_action_space(action_space)
    print(repr(action_space))

    env = gym.Env()
    env.action_space = action_space
    env2 = DictToMultiDiscreteActionWrapper(env)
    try:
        env2.step(env2.action_space.sample())
    except NotImplementedError:
        pass

    env = gym.Env()
    env.action_space = action_space
    env2 = DictToDiscreteActionWrapper(env)
    try:
        env2.step(env2.action_space.sample())
    except NotImplementedError:
        pass
Ejemplo n.º 8
0
import gym
env=gym.Env("CartPole")
# hello
#hello