Ejemplo n.º 1
0
def test_boxed(name):
    env = make(name)
    initial = env.reset()
    env.render()
    height, width, num_layers = env.observation_space.shape
    if name == ENV_NAMES["boxed-large"]:
        assert height == (16 + 1 + 3)
        assert width == 8
        assert num_layers == 5
    if "Versus" in name:
        width = (width - 1) // 2
    final, _, _, _ = env.step(width * 2)  # Drop straight down.
    env.render()
    print(initial)
    print(final)

    for i in range(num_layers):
        # Check that the deal fell to the bottom.
        assert (initial[2, 0, i] == final[height - 1, 0, i])
        assert (initial[2, 1, i] == final[height - 1, 1, i])

        # Check that the deals advanced.
        assert (initial[0, 0, i] == final[1, 0, i])
        assert (initial[0, 1, i] == final[1, 1, i])
        assert (initial[1, 0, i] == final[2, 0, i])
        assert (initial[1, 1, i] == final[2, 1, i])
def read_data(ndata=1000, nviz=0):
    X_data = np.zeros((ndata, 51, 12))
    Y_data = np.zeros((ndata, 50, 6))
    for n in range(0, ndata):
        try:
            state = np.load("data_airplane/short_state_%d.npz" % (n + 1))
            control = np.load("data_airplane/short_control_%d.npz" % (n + 1))
        except:
            continue
        X_data[n, :, :] = state
        Y_data[n, :, :] = control

    if nviz > 0:
        x_goal = np.zeros(12)
        x_goal[0] = 25
        x_goal[2] = 50
        env = make("AirPlane-v0", g=1., dt=0.1)
        for n in range(nviz):
            idx = np.random.randint(0, ndata)
            print("data %d" % idx)
            x_0 = X_data[idx, 0, :]
            env.x_0 = x_0
            env.env.x_0 = x_0
            env.reset()
            u = Y_data[idx, :, :]
            for k in range(u.shape[0]):
                env.step(u[k, :])
            env.render(x_goals=[x_goal])

    return X_data, Y_data
Ejemplo n.º 3
0
def test_env(name):
    env = make(name)
    ob_space = env.observation_space
    act_space = env.action_space
    ob = env.reset()
    msg = 'Reset observation: {!r} not in space'.format(ob)
    assert ob_space.contains(ob), msg
    a = act_space.sample()
    observation, reward, done, _info = env.step(a)
    msg = 'Step observation: {!r} not in space'.format(observation)
    assert ob_space.contains(observation), msg
    if hasattr(env.unwrapped, "permute_observation"):
        permuted = env.unwrapped.permute_observation(observation)
        print(permuted[0] == observation[0])
        assert ob_space.contains(permuted), msg
    assert np.isscalar(reward), "{} is not a scalar for {}".format(reward, env)
    assert isinstance(done, bool), "Expected {} to be a boolean".format(done)

    for mode in env.metadata.get('render.modes', []):
        env.render(mode=mode)
    env.close()

    # Make sure we can render the environment after close.
    for mode in env.metadata.get('render.modes', []):
        env.render(mode=mode)
    env.close()
Ejemplo n.º 4
0
def test_tree_search():
    env = make("PdPEndless4-v0")

    def deep_agent():
        root = env.unwrapped.get_root()
        best_score = 0
        best_action = np.random.randint(2, env.action_space.n)
        for action, (child, score) in enumerate(root.get_children()):
            for grand_child, child_score in child.get_children():
                for _, grand_child_score in grand_child.get_children():
                    total = score + child_score + grand_child_score
                    if total > best_score:
                        best_action = action
                        best_score = total
        return best_action

    def agent():
        root = env.unwrapped.get_root()
        best_score = 0
        best_action = np.random.randint(2, env.action_space.n)
        for action, (child, score) in enumerate(root.get_children()):
            for grand_child, child_score in child.get_children():
                total = score + child_score
                if total > best_score:
                    best_action = action
                    best_score = total
        return best_action

    env.reset()
    for _ in range(4):
        env.step(1)
    for _ in range(5):
        env.step(agent())
        env.render(mode="human")
Ejemplo n.º 5
0
    def _launch(self):
        """Launch the environment."""

        self.env = make("SEIR-MA-PYMARL-v0")
        districts_group_ids = [self.env.unwrapped.district_idx(name) for name in self.district_names]
        self.env = NormalizedObservationWrapper(self.env)
        self.env = NormalizedRewardWrapper(self.env)
        self.env = MultiAgentSelectObservation(self.env, districts_group_ids, maac=False)
        self.env = MultiAgentSelectAction(self.env, districts_group_ids, 1, maac=False)
        self.env = MultiAgentSelectReward(self.env, districts_group_ids)

        self.env.seed(self._seed)
Ejemplo n.º 6
0
def test_random_rollout(name):
    env = make(name)
    agent = lambda ob: env.action_space.sample()  # noqa: E731
    ob = env.reset()
    for _ in range(100):
        assert env.observation_space.contains(ob)
        a = agent(ob)
        assert env.action_space.contains(a)
        (ob, _reward, done, _info) = env.step(a)
        env.render(mode="human")
        if done:
            break
Ejemplo n.º 7
0
def test_read_record():
    record = """[[
    1, 1, 0, 0, 0, 0,
    0, 0, 0, 0, 0, 0
    ],[
    0, 0, 1, 0, 0, 0,
    0, 0, 2, 0, 0, 0
    ],[
    0, 3, 2, 0, 0, 0,
    0, 0, 0, 0, 0, 0
    ],[
    3, 0, 0, 0, 0, 0,
    3, 0, 0, 0, 0, 0
    ],[
    0, 1, 2, 0, 0, 0,
    0, 0, 0, 0, 0, 0
    ],[
    0, 0, 0, 2, 0, 0,
    0, 0, 0, 4, 0, 0
    ],[
    0, 0, 0, 4, 0, 0,
    0, 0, 0, 4, 0, 0
    ],[
    0, 0, 0, 0, 1, 0,
    0, 0, 0, 0, 1, 0
    ],[
    0, 0, 0, 0, 1, 0,
    0, 0, 0, 0, 4, 0
    ],[
    0, 0, 0, 0, 0, 2,
    0, 0, 0, 0, 0, 2
    ],[
    0, 0, 0, 0, 0, 2,
    0, 0, 0, 0, 0, 1
    ],[
    3, 4, 0, 0, 0, 0,
    0, 0, 0, 0, 0, 0
    ]]"""
    stream = StringIO()
    stream.write(record)
    stream.seek(0)
    env = make(ENV_NAMES["tsu"])
    for observation, reward, done, info in env.read_record(stream):
        info["state"].render()
Ejemplo n.º 8
0
def test_tree(name):
    env = make(name)
    agent = lambda ob: env.action_space.sample()  # noqa: E731
    observation = env.reset()
    for _ in range(12):
        env.step(1)
    for _ in range(50):
        assert env.observation_space.contains(observation)
        action = agent(observation)
        assert env.action_space.contains(action)
        (observation, reward, done, _info) = env.step(action)
        assert env.reward_range[0] <= reward <= env.reward_range[-1]
        env.render(mode="human")
        rewards = env.unwrapped.get_tree(include_observations=False)
        print(rewards)
        for reward in rewards:
            assert env.reward_range[0] <= reward <= env.reward_range[-1]
        if done:
            break
Ejemplo n.º 9
0
def test_tree_search(name):
    env = make(name)

    def deep_agent():
        root = env.unwrapped.get_root()
        best_score = 0
        best_action = np.random.randint(0, env.action_space.n)
        for action, (child, score) in enumerate(root.get_children(True)):
            if child is None:
                continue
            for grand_child, child_score in child.get_children():
                for _, grand_child_score in grand_child.get_children():
                    total = score + child_score + grand_child_score
                    if total > best_score:
                        best_action = action
                        best_score = total
        return best_action

    def agent():
        root = env.unwrapped.get_root()
        best_score = 0
        best_action = np.random.randint(0, env.action_space.n)
        for action, (child, score) in enumerate(root.get_children(True)):
            if child is None:
                continue
            for _, child_score in child.get_children():
                total = score + child_score
                if total > best_score:
                    best_action = action
                    best_score = total
        return best_action

    env.reset()
    total_reward = 0
    for _ in range(10):
        _, reward, done, _ = env.step(agent())
        if done:
            break
        total_reward += reward
        env.render(mode="human")
        print_up(1)
        print("Reward =", total_reward)
Ejemplo n.º 10
0
    # x_goal = [0., 0., 0., 0.]
    # ilqr_actions = control.run_ilqr(Q, R, Qf, x_goal, n_step_ilqr)
    # lqr_actions = control.run_lqr(Q, R, x_goal, n_step_lqr, ilqr_actions[-1])
    # print(env.x)
    # env.render()

    n_step_lqr, n_step_ilqr = 200, 50
    Q = np.eye(6, 6) *10
    Qf = np.eye(6, 6) * 1000
    R = np.eye(3, 3)
    x_goal = [0., 0., 0., 0., 0., 0.]

    x_0 = [ 5.396 , 0.368,  3.022, -8.616 , 2.409 , 8.418]
    x_mid = [1.5, 1., 1.5, 1., 1., 1.]
    x_mid2 = [0.5, 0.5, 0.5, 0.0, 0.0, -0.]
    env = make("Pendulum3-v0", dt=0.02, x_0=x_0)
    env.reset()

    u_init = np.zeros((3, n_step_ilqr))
    u_init[0, :n_step_ilqr] = np.linspace(1, 0, n_step_ilqr)
    control = ControllerTriplePendulum(env, use_sympy=False)
    ilqr_actions_1 = control.run_ilqr(Q, R, Qf, x_mid, n_step_ilqr, u_init=u_init)

    env.env.x_0 = env.x
    ilqr_actions_2 = control.run_ilqr(Q, R, Qf, x_mid2, n_step_ilqr)#, u_init=u_init*0.2)

    env.env.x_0 = env.x
    ilqr_actions_3 = control.run_ilqr(Q, R, Qf, x_goal, n_step_ilqr)  # , u_init=u_init*0.2)

    try:
        lqr_actions = control.run_lqr(Q, R, x_goal, n_step_lqr, ilqr_actions_3[-1])
Ejemplo n.º 11
0
    #
    # # choose a random initial state
    # ns = bh.env.n_state
    # x_0 = [1., 0.5]
    # # x_0 = np.random.random(ns)
    # # x_0[0:ns//2] *= 2 * np.pi
    # # x_0[ns//2:] *= 2 * np.pi
    # # x_0[ns//2:] -= 2 * np.pi
    # #
    # print(x_0)
    #
    # # run the model.
    # bh.run_model(x_0, 1500)

    # # make the environment
    env = make("Pendulum2-v0", dt=0.1)
    env.reset()

    # call the class
    bh = BehaviorClonning(env,
                          ControllerDoublePendulum,
                          200,
                          folder_name="data2/",
                          n_step_ilqr=150,
                          n_step_lqr=250)

    generate_data, train = False, False
    if generate_data:
        # generate data.
        bh.generate_data()
Ejemplo n.º 12
0
import gym
from gym.envs.registration import register, make
import tensorflow as tf
tf.compat.v1.disable_eager_execution()
ENV_NAME = 'TradebotEnvironment-v0'

if ENV_NAME in gym.envs.registry.env_specs:
    del gym.envs.registry.env_specs[ENV_NAME]

register(
    id=ENV_NAME,
    entry_point='trade_env:TradebotEnvironment',
    max_episode_steps=10000,
)

env = make(ENV_NAME)

input_file = 'data/MSFT_1d_test.csv'
output_file = 'data/MSFT_1d_test_fe.csv'
w_file_name = 'data/MSFT_1d.h5f'

feature_extractor = FeatureExtractor(input_file, output_file)
feature_extractor.extract()

feature_list = feature_extractor.get_feature_names()

trade_cost = 0.03
env.init_file(output_file, feature_list, trade_cost, False)

model = create_model(env)
memory = SequentialMemory(limit=5000, window_length=1)
Ejemplo n.º 13
0
              - T1 / inertia + self.c * u1 / inertia
        eq2 = 2 * u1_prime * cos(th1 - th2) + 2 * u2_prime + u3_prime * cos(th2 - th3) \
              - 2 * u1 ** 2 * sin(th1 - th2) + u3 ** 2 * sin(th2 - th3) \
              - self.g / self.L * sin(th2) * 2 \
              - T2 / inertia + self.c * u2 / inertia
        eq3 = u1_prime * cos(th1 - th3) + u2_prime * cos(th2 - th3) + u3_prime \
              - u1 ** 2 * sin(th1 - th3) - u2 ** 2 * sin(th2 - th3) \
              - self.g / self.L * sin(th3) \
              - T3 / inertia + self.c * u3 / inertia

        return eq1, eq2, eq3


from gym.envs.registration import make
if __name__ == '__main__':
    p3 = Pendulum3Env(x_0=[3., 1., 3., 1., 0., 1.], dt=0.05)
    p3.reset()
    for _ in range(500):
        p3.step([
            np.random.random() - 0.8,
            np.random.random() - 0.5,
            np.random.random() - 0.5
        ])
    p3.render()

    env = make("Pendulum2-v0", dt=0.01, x_0=[1., 1., 0., 1.])
    env.reset()
    for _ in range(500):
        env.step([0., 0.])
    env.render()
Ejemplo n.º 14
0
import numpy as np
from util import ControllerDoublePendulum
from gym.envs.registration import make

env = make("Pendulum2-v0", x_0=[np.pi * 0.9, -np.pi * 0.8, 0., -1.], dt=0.01)
env.reset()
control = ControllerDoublePendulum(env)
n_step_lqr, n_step_ilqr = 250, 150
Q = np.eye(4, 4)
Q[1, 1] = 0
Q[2, 2] = 0
Qf = np.eye(4, 4) * 1000
R = np.eye(2, 2)
x_goal = [0., 0., 0., 0.]

ilqr_actions = control.run_ilqr(Q, R, Qf, x_goal, n_step_ilqr)
lqr_actions = control.run_lqr(Q, R, x_goal, n_step_lqr, ilqr_actions[-1])
print(env.x)
env.render()
Ejemplo n.º 15
0
import numpy as np
from gym.envs.registration import make

n_step_lqr, n_step_ilqr = 1500, 30
Q = np.eye(12, 12) * 10
Qf = np.eye(12, 12) * 1000
R = np.eye(6, 6)
x_goal = [40., 0., 40., 0., 0., 0., 0., .5, .5, 0., 0., 0.]
x_0 = [0., 0., 0., 2., -0., 0., .5, 0., 0., 0., 0., 0.]
x_med = [10., 0., 10., 0., -0., 0., 0., 0., 0., 0., 0., 0.]
env = make("AirPlane-v0", dt=0.01, x_0=x_0, g=1.)
env.reset()

from util import ControllerAirPlane

control = ControllerAirPlane(env)
ilqr_actions = control.run_ilqr(Q, R, Qf, x_med, n_step_ilqr)
control.run_lqr(Q, R, x_goal, n_step_lqr, ilqr_actions[-1])

env.render(skip=5)
#env.animate(file_name="ap.gif", x_goal=x_goal)
print([h['x'] for h in env.history])
                   "North Devon", "South Hams", "Teignbridge", "Torridge", "West Devon"]

N_WEEKS = 43
GRANULARITY = Granularity.WEEK

register(id="SEIRmulti-v0",
         entry_point="epcontrol.seir_environment:SEIREnvironment",
         max_episode_steps=N_WEEKS * (7 if GRANULARITY == Granularity.DAY else 1),
         kwargs=dict(grouped_census=pd.read_csv(args.census, index_col=0),
                     flux=Flux.Table(args.flux),
                     r0=args.R0,
                     n_weeks=N_WEEKS,
                     step_granularity=GRANULARITY,
                     model_seed=args.district_name,
                     budget_per_district_in_weeks=args.budget_in_weeks))
env = make("SEIRmulti-v0")
DISTRICTS_GROUP_IDS = [env.unwrapped.district_idx(name) for name in DISTRICTS_GROUP]

def evaluate(env, model: PPO2, districts_ids, num_steps):
    obs = env.reset()
    sus_before = districts_susceptibles(env, districts_ids)
    for _ in range(num_steps):
        action, _states = model.predict(obs)
        obs, _, _, _ = env.step(action)
    sus_after = districts_susceptibles(env, districts_ids)
    attack_rate = 1.0 - (sus_after / sus_before)

    assert total_school_closures(env) <= len(districts_ids)*args.budget_in_weeks

    return attack_rate
Ejemplo n.º 17
0
EPISODES = 10

if ENV_NAME in gym.envs.registry.env_specs:
    del gym.envs.registry.env_specs[ENV_NAME]

# registration
register(
    id=ENV_NAME,
    entry_point='bitmexLib:bitmexEnvironment',
    max_episode_steps=10000,
)
theta_space = np.linspace(-1, 1, 10)
theta_dot_space = np.linspace(0, 10000, 10)


env = make(ENV_NAME)  # make the environment

file_name = '..\\data\\small_batch.csv'
feature_list = ["O_Scaled", "H_Scaled", "L_Scaled", "C_Scaled", "V_Scaled", "VWap_Scaled",
                "EMA5_Scaled", "EMA10_Scaled", "EMA20_Scaled", "EMA50_Scaled", "EMA100_Scaled", "EMA200_Scaled",
                "trend_ichimoku_a_Scaled", "trend_ichimoku_b_Scaled", "momentum_rsi_Scaled", "momentum_mfi_Scaled",
                "volatility_bbh_Scaled", "volatility_bbl_Scaled", "volatility_bbm_Scaled", "volatility_bbhi_Scaled", "volatility_bbli_Scaled"
                ]
trade_cost = 0


env.init_file(file_name, feature_list, trade_cost)

obv = env.reset()

# print(getstate(obv))
Ejemplo n.º 18
0
def make_env(task='reach',
             gripper='parallel_jaw',
             num_block=5,
             render=False,
             binary_reward=True,
             grip_informed_goal=False,
             task_decomposition=False,
             joint_control=False,
             max_episode_steps=50,
             distance_threshold=0.05,
             primitive=None,
             image_observation=False,
             depth_image=False,
             goal_image=False,
             point_cloud=False,
             state_noise=False,
             visualize_target=True,
             camera_setup=None,
             observation_cam_id=None,
             goal_cam_id=0,
             use_curriculum=False,
             num_goals_to_generate=1e6):
    if observation_cam_id is None:
        observation_cam_id = [0]
    tasks = [
        'push', 'reach', 'slide', 'pick_and_place', 'block_stack',
        'block_rearrange', 'chest_pick_and_place', 'chest_push',
        'primitive_push_assemble', 'primitive_push_reach', 'insertion'
    ]
    grippers = ['robotiq85', 'parallel_jaw']
    assert gripper in grippers, 'invalid gripper: {}, only support: {}'.format(
        gripper, grippers)
    if task == 'reach':
        task_tag = 'Reach'
        entry = 'pybullet_multigoal_gym.envs.task_envs.kuka_single_step_envs:KukaReachEnv'
    elif task == 'push':
        task_tag = 'Push'
        entry = 'pybullet_multigoal_gym.envs.task_envs.kuka_single_step_envs:KukaPushEnv'
    elif task == 'pick_and_place':
        task_tag = 'PickAndPlace'
        entry = 'pybullet_multigoal_gym.envs.task_envs.kuka_single_step_envs:KukaPickAndPlaceEnv'
    elif task == 'slide':
        task_tag = 'Slide'
        assert not image_observation, "slide task doesn't support image observation well."
        image_observation = depth_image = goal_image = False
        entry = 'pybullet_multigoal_gym.envs.task_envs.kuka_single_step_envs:KukaSlideEnv'
    elif task == 'block_stack':
        task_tag = 'BlockStack'
        entry = 'pybullet_multigoal_gym.envs.task_envs.kuka_multi_step_envs:KukaBlockStackEnv'
    elif task == 'block_rearrange':
        task_tag = 'BlockRearrangeEnv'
        entry = 'pybullet_multigoal_gym.envs.task_envs.kuka_multi_step_envs:KukaBlockRearrangeEnv'
    elif task == 'chest_pick_and_place':
        task_tag = 'ChestPickAndPlace'
        entry = 'pybullet_multigoal_gym.envs.task_envs.kuka_multi_step_envs:KukaChestPickAndPlaceEnv'
    elif task == 'chest_push':
        task_tag = 'ChestPush'
        entry = 'pybullet_multigoal_gym.envs.task_envs.kuka_multi_step_envs:KukaChestPushEnv'
    elif task == 'primitive_push_assemble':
        task_tag = 'ShapeAssemble'
        entry = 'pybullet_multigoal_gym.envs.task_envs.kuka_shape_assemble_envs:KukaPushAssembleEnv'
    elif task == 'primitive_push_reach':
        task_tag = 'PrimPushReach'
        entry = 'pybullet_multigoal_gym.envs.task_envs.kuka_shape_assemble_envs:KukaPushReachEnv'
    elif task == 'insertion':
        task_tag = 'Insertion'
        entry = 'pybullet_multigoal_gym.envs.task_envs.kuka_insertion_envs:KukaInsertionEnv'
    else:
        raise ValueError('invalid task name: {}, only support: {}'.format(
            task, tasks))
    env_id = 'Kuka' + task_tag
    if gripper == 'parallel_jaw':
        env_id += 'ParallelGrip'
    else:
        env_id += 'Robotiq85Grip'
    if render:
        env_id += 'Render'
    if binary_reward:
        env_id += 'SparseReward'
    else:
        env_id += 'DenseReward'
    if joint_control:
        env_id += 'JointCtrl'
    if image_observation:
        if depth_image:
            env_id += 'DepthImgObs'
        else:
            env_id += 'ImgObs'
        if goal_image:
            env_id += 'ImgGoal'
        if camera_setup is not None:
            assert len(observation_cam_id) <= len(
                camera_setup) + 1, 'invalid observation camera id list'
            assert goal_cam_id <= len(
                camera_setup) - 1, 'invalid goal camera id'
            print(
                'Received %i cameras, cam {} for observation, cam %i for goal image'
                .format(observation_cam_id) % (len(camera_setup), goal_cam_id))
        else:
            print('Using default camera for observation and goal image')
    env_id += '-v0'
    print('Task id: %s' % env_id)
    if env_id not in registry.env_specs:
        # register and make env instance
        if task in ['push', 'reach', 'slide', 'pick_and_place']:
            register(
                id=env_id,
                entry_point=entry,
                kwargs={
                    'render': render,
                    'binary_reward': binary_reward,
                    'joint_control': joint_control,
                    'distance_threshold': distance_threshold,
                    'image_observation': image_observation,
                    'depth_image': depth_image,
                    'goal_image': goal_image,
                    'visualize_target': visualize_target,
                    'camera_setup': camera_setup,
                    'observation_cam_id': observation_cam_id,
                    'goal_cam_id': goal_cam_id,
                    'gripper_type': gripper,
                },
                max_episode_steps=max_episode_steps,
            )
        elif task in [
                'block_stack', 'block_rearrange', 'chest_pick_and_place',
                'chest_push'
        ]:
            assert num_block <= 5, "only support up to 5 blocks"
            register(
                id=env_id,
                entry_point=entry,
                kwargs={
                    'render': render,
                    'binary_reward': binary_reward,
                    'joint_control': joint_control,
                    'distance_threshold': distance_threshold,
                    'task_decomposition': task_decomposition,
                    'image_observation': image_observation,
                    'depth_image': depth_image,
                    'goal_image': goal_image,
                    'visualize_target': visualize_target,
                    'camera_setup': camera_setup,
                    'observation_cam_id': observation_cam_id,
                    'goal_cam_id': goal_cam_id,
                    'gripper_type': gripper,
                    'grip_informed_goal': grip_informed_goal,
                    'num_block': num_block,
                    'use_curriculum': use_curriculum,
                    'num_goals_to_generate': int(num_goals_to_generate)
                },
                max_episode_steps=max_episode_steps,
            )
        elif task in ['primitive_push_assemble', 'primitive_push_reach']:
            assert primitive in ['discrete_push', 'continuous_push']
            register(
                id=env_id,
                entry_point=entry,
                kwargs={
                    'render': render,
                    'binary_reward': binary_reward,
                    'distance_threshold': distance_threshold,
                    'image_observation': image_observation,
                    'depth_image': depth_image,
                    'pcd': point_cloud,
                    'goal_image': goal_image,
                    'visualize_target': visualize_target,
                    'camera_setup': camera_setup,
                    'observation_cam_id': observation_cam_id,
                    'goal_cam_id': goal_cam_id,
                    'gripper_type': gripper,
                    'primitive': primitive
                },
                max_episode_steps=max_episode_steps,
            )
        else:
            assert task in ['insertion']
            register(
                id=env_id,
                entry_point=entry,
                kwargs={
                    'render': render,
                    'binary_reward': binary_reward,
                    'distance_threshold': distance_threshold,
                    'image_observation': image_observation,
                    'depth_image': depth_image,
                    'pcd': point_cloud,
                    'goal_image': goal_image,
                    'state_noise': state_noise,
                    'visualize_target': visualize_target,
                    'camera_setup': camera_setup,
                    'observation_cam_id': observation_cam_id,
                    'goal_cam_id': goal_cam_id,
                    'gripper_type': gripper,
                },
                max_episode_steps=max_episode_steps,
            )

    return make(env_id)
Ejemplo n.º 19
0
for game in ['FlappyBird']:
    nondeterministic = False
    register(
        id='{}-v1'.format(game),
        entry_point='gym_ple_custom.PLEEnv:PLEEnv',
        kwargs={
            'game_name': game,
            'display_screen': False,
            'rng': random.randint(0, 999999999)
        },
        tags={'wrapper_config.TimeLimit.max_episode_steps': 10000},
        nondeterministic=nondeterministic,
    )

X = Discrete_DQN_Agent(
    env=make('FlappyBird-v1'),
    model_function=model_function,
    save_dir='../weights/FlappyBird/',
    max_epos=100000,
    action_size=2,
    state_size=100,
    second_size=50,
    dim=2,
    frames_input=4,
    lr=1e-5,
    model_name='NoPooling',
    load_weights=True,
    epos_snap=450,
    meta_data_types_to_save=get_meta_data_types_to_save(),
    meta_data_types_functions=get_meta_data_types_functions(),
    epos_data_types_to_save=['QValues'],
Ejemplo n.º 20
0
    outcome = Outcome.PEAK_DAY

register(id="SEIRsingle-v0",
         entry_point="epcontrol.seir_environment:SEIREnvironment",
         max_episode_steps=n_weeks *
         (7 if granularity == Granularity.DAY else 1),
         kwargs=dict(grouped_census=grouped_census,
                     flux=flux,
                     r0=args.R0,
                     n_weeks=n_weeks,
                     step_granularity=granularity,
                     outcome=outcome,
                     model_seed=args.district_name,
                     budget_per_district_in_weeks=args.budget_in_weeks))

env = make("SEIRsingle-v0")
env = NormalizedObservationWrapper(env)
if args.outcome == "ar":
    env = NormalizedRewardWrapper(env)
logger.configure(folder=args.monitor_path, format_strs=["csv"])

env = DummyVecEnv([lambda: env])

print(f"tensorboard --logdir {args.monitor_path}")

layers = [args.n_hidden_units] * args.n_hidden_layers

model = PPO2(MlpPolicy,
             env,
             verbose=0,
             tensorboard_log=args.monitor_path,
Ejemplo n.º 21
0
    #
    # # choose a random initial state
    # x_0 = np.random.random(4)
    # x_0[0:2] *= 2 * np.pi
    # x_0[2:] *= 8 * np.pi
    # x_0[2:] -= 4 * np.pi
    #
    # print(x_0)
    #
    # # run the model.
    # bh.run_model(x_0, 1500)
    #
    # bh.env.save_history()

    # make the environment
    env = make("Pendulum3-v0", dt=0.02)
    env.reset()

    # call the class
    x_range = [(0, 2 * np.pi)] * 3 + [(-9, 9)] * 3
    Q = np.eye(6, 6) * 10
    bh3 = BehaviorClonning3(env, ControllerTriplePendulum, 1000, folder_name="data3/", Q=Q, n_step_lqr=200, x_range=x_range)

    generate_data, train = False, False
    if generate_data:
        # generate data.
        for _ in range(100):
            try:
                bh3.generate_data()
            except:
                env = make("Pendulum3-v0", dt=0.02)