コード例 #1
0
def main():
    param_noise = None

    env1 = tm700GymEnv2(renders=False, isDiscrete=False)
    model = DDPG(MlpPolicy,
                 env1,
                 verbose=1,
                 param_noise=param_noise,
                 random_exploration=0.1)
    # model = DQN(MlpPolicy, env1, verbose=1, exploration_fraction=0.3)

    # = deepq.models.mlp([64])
    start = time.time()
    model.learn(total_timesteps=1000000)
    #max_timesteps=10000000,
    # exploration_fraction=0.1,
    # exploration_final_eps=0.02,
    # print_freq=10,
    # callback=callback, network='mlp')
    print("Saving model")
    model.save("tm_test_model_randomblocksrotated.pkl")

    print('total time', time.time() - start)
コード例 #2
0
import os, inspect
currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
parentdir = os.path.dirname(os.path.dirname(currentdir))
os.sys.path.insert(0, parentdir)
import gym
from pybullet_envs.bullet.tm700GymEnv_TEST import tm700GymEnv2
from stable_baselines import DQN, PPO2, DDPG
from pybullet_envs.baselines.train_tm700_multivec import evaluate, record_video

#################### PARAMETERS

savedmodel = "tm_test_model_randomblocks.pkl"
env = tm700GymEnv2(renders=True, isDiscrete=False)
model = DDPG.load(savedmodel, env=env)




########## run simulation

def runsimulation(model, env, iterations):
    obs = env.reset()
    time_step_counter = 0
    iterations = iterations
    while time_step_counter < iterations:
        action, _ = model.predict(obs)
        obs, rewards, dones, _ = env.step(action)  # Assumption: eval conducted on single env only!
        time_step_counter +=1

        # time.sleep(0.1)
        if dones:
コード例 #3
0
    """
    Utility function for multiprocessed env.

    :param env_id: (str) the environment ID
    :param seed: (int) the inital seed for RNG
    :param rank: (int) index of the subprocess
    """

    def _init():
        env = gym.make(env_id)
        # Important: use a different seed for each environment
        env.seed(seed + rank)
        return env

    set_global_seeds(seed)
    return _init

if __name__ == '__main__':


env_id = tm700GymEnv2(renders=True, isDiscrete=True)
# The different number of processes that will be used
PROCESSES_TO_TEST = [1, 2, 4, 8, 16]
NUM_EXPERIMENTS = 3 # RL algorithms can often be unstable, so we run several experiments (see https://arxiv.org/abs/1709.06560)
TRAIN_STEPS = 5000
# Number of episodes for evaluation
EVAL_EPS = 20
ALGO = PPO2

# We will create one environment to evaluate the agent on
eval_env = DummyVecEnv([lambda: gym.make(env_id)])
コード例 #4
0
ファイル: helpers.py プロジェクト: huetufemchopf/bullet3
# from stable_baselines.common.env_checker import check_env
from pybullet_envs.bullet.tm700GymEnv_TEST import tm700GymEnv2

env = tm700GymEnv2()
# It will check your custom environment and output additional warnings if needed
# check_env(env)

obs = env.reset()
n_steps = 10
for _ in range(n_steps):
    # Random action
    action = env.action_space.sample()
    obs, reward, done, info = env.step(action)