Esempio n. 1
0
    def test():

        env = RunEnv(visualize=False)

        observation_d = env.reset(project=False)
        observation = process_obs_dict(observation_d)

        total_reward = 0
        steps = 0

        while True:

            #a = AGENT OUTPUT
            a, q = agent.act(observation)

            observation_d, reward, done, info = env.step(a, project=False)
            observation = process_obs_dict(observation_d)

            total_reward += reward
            steps += 1

            #print(observation)

            print(steps, 'total reward:', total_reward)

            if done:

                break

        print('finished testing!')
Esempio n. 2
0
	def test(frameskip = 1, vis = False):

		env = RunEnv(visualize=vis)
		#env.change_model(model='2D', prosthetic=True, difficulty=0, seed=None)

		observation_d = env.reset(project = False)
		#observation = process_obs_dict(observation_d)


		total_reward = 0
		steps = 0

		while True:

			#a = AGENT OUTPUT
			observation = process_obs_dict(observation_d)
			a, q = agent.act(observation)

			for _ in range(frameskip):

				observation_d, reward, done, info = env.step(a, project = False)
				#observation = process_obs_dict(observation_d)

				total_reward += reward
				steps += 1

			#print(observation)

			print(steps, 'total reward:', total_reward)

			if done:

				break

		print('finished testing!')
Esempio n. 3
0
def play_multi_episode(agent, episode_num=2, vis=False, seed=0):
    np.random.seed(seed)
    env = ProstheticsEnv(visualize=vis)
    env.change_model(model='3D', difficulty=1, prosthetic=True, seed=seed)
    env = TestReward(env)
    env = FrameSkip(env, 4)
    env = ActionScale(env)
    env = PelvisBasedObs(env)

    all_reward = []

    for e in range(episode_num):
        t = time.time()
        episode_reward = 0.0
        obs = env.reset(project=False)
        step = 0
        while True:
            step += 1

            batch_obs = np.expand_dims(obs, axis=0)

            action = agent.ensemble_predict(batch_obs)
            action = np.squeeze(action, axis=0)
            obs, reward, done, info = env.step(action, project=False)
            episode_reward += reward
            logger.info("[step/{}]reward:{}".format(step, reward))
            if done:
                break
        all_reward.append(episode_reward)
        t = time.time() - t
        logger.info(
            "[episode/{}] time: {} episode_reward:{} mean_reward:{}".format(
                e, t, episode_reward, np.mean(all_reward)))
    logger.info("Mean reward:{}".format(np.mean(all_reward)))
    def _f():

        #if seed%2==0:
        env = ProstheticsEnv(visualize=False, integrator_accuracy=3e-3)
        env.change_model(model='2D', difficulty=2, prosthetic=False, seed=seed)
        #else:

        #env=ProstheticsEnv(visualize=False,integrator_accuracy = 3e-3)
        #env.change_model(model = '3D', difficulty = 0, prosthetic = True, seed=seed)
        '''
        if seed>=4 and seed<6: 
            env=ProstheticsEnv(visualize=False,integrator_accuracy = 3e-3)
            env.change_model(model = '2D', difficulty = 1, prosthetic = True, seed=seed)
        
        if seed>=6 and seed<8:
            env=ProstheticsEnv(visualize=False,integrator_accuracy = 3e-3)
            env.change_model(model = '3D', difficulty = 0, prosthetic = True, seed=seed)
        if seed>=8 and seed<10:
            env=ProstheticsEnv(visualize=False,integrator_accuracy = 3e-3)
            env.change_model(model = '3D', difficulty = 0, prosthetic = True, seed=seed)
        if seed>=10 and seed<12 :
            env=ProstheticsEnv(visualize=False,integrator_accuracy = 3e-3)
            env.change_model(model = '3D', difficulty = 0, prosthetic = True, seed=seed)
        #env.seed(seed)
        '''
        return env
Esempio n. 5
0
def main():
    import argparse
    parser = argparse.ArgumentParser()
    parser.add_argument('--exp_name', type=str, default='desc_200_1_1')
    parser.add_argument('--render', action='store_true')
    parser.add_argument('--discount', type=float, default=0.99)
    parser.add_argument('--n_iter', '-n', type=int, default=1000000)
    parser.add_argument('--batch_size', '-b', type=int, default=500)
    parser.add_argument('--ep_len', '-ep', type=float, default=-1.)
    parser.add_argument('--learning_rate', '-lr', type=float, default=5e-6)
    parser.add_argument('--reward_to_go', '-rtg', action='store_true', default=True)
    parser.add_argument('--test', '-t', action='store_true', default=False)
    parser.add_argument('--dont_normalize_advantages', '-dna', action='store_true', default=True)
    parser.add_argument('--nn_baseline', '-bl', action='store_true')
    parser.add_argument('--seed', type=int, default=1)
    parser.add_argument('--n_experiments', '-e', type=int, default=1)
    parser.add_argument('--n_layers', '-l', type=int, default=3)
    parser.add_argument('--size', '-s', type=int, default=150)
    parser.add_argument('--disc_levels', '-dl', type=int, default=100)
    args = parser.parse_args()

    print('test: ', args.test)
    if not (os.path.exists('data')):
        os.makedirs('data')
    logdir = args.exp_name+'_'+time.strftime("%d-%m-%Y_%H-%M-%S")
    logdir = os.path.join('data', logdir)
    if not (os.path.exists(logdir)):
        os.makedirs(logdir)

    env = ProstheticsEnv(visualize=False, integrator_accuracy=1e-4)
    env.change_model(model='3D', difficulty=2, prosthetic=True, seed=0)
    print('ac_dim: ', env.action_space.shape)
    print('obs_dim: ', env.observation_space.shape)
    print('normalize: ', not (args.dont_normalize_advantages))

    max_path_length = args.ep_len if args.ep_len > 0 else None
    train_PG(
        exp_name=args.exp_name,
        env_name=env,
        n_iter=args.n_iter,
        gamma=args.discount,
        min_timesteps_per_batch=args.batch_size,
        max_path_length=max_path_length,
        learning_rate=args.learning_rate,
        reward_to_go=args.reward_to_go,
        animate=args.render,
        logdir=os.path.join(logdir, '%d' % 0),
        seed=0,
        normalize_advantages=not (args.dont_normalize_advantages),
        nn_baseline=args.nn_baseline,
        n_layers=args.n_layers,
        size=args.size,
        test=args.test,
        disc_levels = args.disc_levels
    )
Esempio n. 6
0
    def __init__(self,
                 reward_scale=0.1,
                 frame_skip=1,
                 visualize=False,
                 reinit_random_action_every=1,
                 randomized_start=False,
                 max_episode_length=300,
                 death_penalty=0.0,
                 living_bonus=0.0,
                 crossing_legs_penalty=0.0,
                 bending_knees_bonus=0.0,
                 left_knee_bonus=0.,
                 right_knee_bonus=0.,
                 max_reward=10.0,
                 activations_penalty=0.,
                 bonus_for_knee_angles_scale=0.,
                 bonus_for_knee_angles_angle=0.):

        self.reinit_random_action_every = reinit_random_action_every
        self.visualize = visualize
        self.randomized_start = randomized_start
        self.env = ProstheticsEnv(visualize=visualize,
                                  integrator_accuracy=1e-3)
        self.env.change_model(model="3D",
                              prosthetic=True,
                              difficulty=1,
                              seed=np.random.randint(200))

        self.frame_skip = frame_skip
        self.observation_shapes = [(345, )]
        self.action_size = 19
        self.max_ep_length = max_episode_length - 2
        self.activations_penalty = activations_penalty
        self.bonus_for_knee_angles_scale = bonus_for_knee_angles_scale
        self.bonus_for_knee_angles_angle = bonus_for_knee_angles_angle

        self.observation_space = Box(low=self.env.observation_space.low[0],
                                     high=self.env.observation_space.high[0],
                                     shape=(344, ))
        self.action_space = Box(low=self.env.action_space.low[0],
                                high=self.env.action_space.high[0],
                                shape=(19, ))

        # reward shaping
        self.reward_scale = reward_scale
        self.death_penalty = np.abs(death_penalty)
        self.living_bonus = living_bonus
        self.cross_legs_coef = crossing_legs_penalty
        self.bending_knees_coef = bending_knees_bonus
        self.left_knee_bonus = left_knee_bonus
        self.right_knee_bonus = right_knee_bonus
        self.max_reward = max_reward

        self.episodes = 1
        self.ep2reload = 10
Esempio n. 7
0
    def respawn(self):
        """Method to respawn the env (hard reset)

            Parameters:
                None

            Returns:
                None
        """

        self.env = ProstheticsEnv(visualize=False, difficulty=self.difficulty)
Esempio n. 8
0
File: ppo.py Progetto: XDang13/CGSL
    def start_sim_evaluate(self):
        env = ProstheticsEnv(visualize=False)
        total_reward = 0
        observation = env.reset()
        for t in range(1000):
            state = observation
            action = self.select_action_evaluate(state)
            observation,reward,done,_ = env.step(action)
            total_reward += reward
            if done:
                break   

        return total_reward
Esempio n. 9
0
def make_env(test, render=False):
    env = ProstheticsEnv(visualize=render)
    # Use different random seeds for train and test envs
    env_seed = 2**32 - 1 - seed if test else seed
    env.seed(env_seed)
    #if args.monitor:
    #env = gym.wrappers.Monitor(env, args.outdir)
    if isinstance(env.action_space, spaces.Box):
        misc.env_modifiers.make_action_filtered(env, clip_action_filter)
    if not test:
        misc.env_modifiers.make_reward_filtered(env, reward_filter)
    if render and not test:
        misc.env_modifiers.make_rendered(env)
    return env
Esempio n. 10
0
    def __init__(self, server_ip='localhost', server_port=5007):
        if args.ident is not None:
            self.worker_id = args.ident
        else:
            self.worker_id = np.random.randint(int(1e18))

        self.address = '{}:{}'.format(server_ip, server_port)

        random_seed = int(self.worker_id % int(1e9))
        np.random.seed(random_seed)

        env = ProstheticsEnv(visualize=False, seed=random_seed)
        env.change_model(model='3D',
                         difficulty=1,
                         prosthetic=True,
                         seed=random_seed)
        env.spec.timestep_limit = MAXTIME_LIMIT
        env = CustomR2Env(env)

        if args.reward_type == 'RunFastest':
            env = RunFastestReward(env)
        elif args.reward_type == 'FixedTargetSpeed':
            env = FixedTargetSpeedReward(env, args.target_v,
                                         args.act_penalty_lowerbound,
                                         args.act_penalty_coeff,
                                         args.vel_penalty_coeff)
        elif args.reward_type == 'Round2':
            env = Round2Reward(env, args.act_penalty_lowerbound,
                               args.act_penalty_coeff, args.vel_penalty_coeff)
        else:
            assert False, 'Not supported reward type!'

        env = FrameSkip(env, 4)
        env = ActionScale(env)
        self.env = PelvisBasedObs(env)
def run(env_id, seed, noise_type, layer_norm, evaluation, **kwargs):
    # Configure things.
    rank = MPI.COMM_WORLD.Get_rank()
    if rank != 0:
        logger.set_level(logger.DISABLED)
    logger.configure(dir='/home/vaisakhs_shaj/Desktop/DeepReinforcementLearning/5_Deep_Deterministic_Policy_Gradients/LOGS/OSIM')
    # Create envs.
    env = ProstheticsEnv(visualize=True)
    env.change_model(model = '2D', difficulty = 0, prosthetic = True, seed=seed)
        #env.seed(seed)
    #env = bench.Monitor(env, logger.get_dir() and os.path.join(logger.get_dir(), str(rank)))

    
    eval_env = None

    # Parse noise_type
    action_noise = None
    param_noise = None
    nb_actions = env.action_space.shape[-1]
    for current_noise_type in noise_type.split(','):
        current_noise_type = current_noise_type.strip()
        if current_noise_type == 'none':
            pass
        elif 'adaptive-param' in current_noise_type:
            _, stddev = current_noise_type.split('_')
            param_noise = AdaptiveParamNoiseSpec(initial_stddev=float(stddev), desired_action_stddev=float(stddev))
        elif 'normal' in current_noise_type:
            _, stddev = current_noise_type.split('_')
            action_noise = NormalActionNoise(mu=np.zeros(nb_actions), sigma=float(stddev) * np.ones(nb_actions))
        elif 'ou' in current_noise_type:
            _, stddev = current_noise_type.split('_')
            action_noise = OrnsteinUhlenbeckActionNoise(mu=np.zeros(nb_actions), sigma=float(stddev) * np.ones(nb_actions))
        else:
            raise RuntimeError('unknown noise type "{}"'.format(current_noise_type))

    # Configure components.
    memory = Memory(limit=int(2e6), action_shape=env.action_space.shape, observation_shape=env.observation_space.shape)
    critic = Critic(layer_norm=layer_norm)
    actor = Actor(nb_actions, layer_norm=layer_norm)

    # Seed everything to make things reproducible.
    seed = seed + 2000000 * rank
    logger.info('rank {}: seed={}, logdir={}'.format(rank, seed, logger.get_dir()))
    tf.reset_default_graph()
    set_global_seeds(seed)
    env.seed(seed)
    if eval_env is not None:
        eval_env.seed(seed)

    # Disable logging for rank != 0 to avoid noise.
    if rank == 0:
        start_time = time.time()
    training.train(env=env, eval_env=eval_env, param_noise=param_noise,
        action_noise=action_noise, actor=actor, critic=critic, memory=memory, **kwargs)
    env.close()
    if eval_env is not None:
        eval_env.close()
    if rank == 0:
        logger.info('total runtime: {}s'.format(time.time() - start_time))
Esempio n. 12
0
    def __init__(self, num_steps, max_frames):
        self.use_cuda = torch.cuda.is_available()
        self.device = torch.device("cuda" if self.use_cuda else "cpu")
        self.env = ProstheticsEnv(visualize=False)

        self.num_steps = num_steps
        self.max_frames = max_frames

        # num_inputs = self.envs.observation_space.shape[0]  # 158
        num_inputs = 160
        num_outputs = self.env.action_space.shape[0]  # 19
        hidden_size = 256

        self.model = A2CWorker(num_inputs, num_outputs,
                               hidden_size).to(self.device)
        self.optimizer = optim.Adam(self.model.parameters())
Esempio n. 13
0
    def __init__(self,
                 env_seed,
                 env_params=None,
                 policy_params=None,
                 deltas=None,
                 rollout_length=1000,
                 delta_std=0.02):

        # initialize OpenAI environment for each worker
        if (env_params["type"] == "MuJoCo"):
            self.env = gym.make(env_params["name"])
            self.env.seed(env_seed)
        elif (env_params["type"] == "Prosthetics"):
            self.env = ProstheticsEnv(visualize=False,
                                      difficulty=get_difficulty(
                                          env_params['round']))
            self.env = ObsProcessWrapper(self.env, True, 1)

        # each worker gets access to the shared noise table
        # with independent random streams for sampling
        # from the shared noise table.
        self.deltas = SharedNoiseTable(deltas, env_seed + 7)
        self.policy_params = policy_params

        if policy_params['type'] == 'Linear':
            self.policy = LinearPolicy(policy_params)
        else:
            self.policy = MLPPolicy("policy", policy_params["ob_dim"], policy_params["ac_dim"], \
                                    policy_params["layer_norm"], tf.nn.selu, policy_params["layer_depth"], \
                                    policy_params["layer_width"], None)

        self.delta_std = delta_std
        self.rollout_length = rollout_length
        return
Esempio n. 14
0
def main(saved_model=None):
    parser = argparse.ArgumentParser(description='Test or submit agent.')
    parser.add_argument('-t', '--test', action='store', default=True, help='test agent locally')
    parser.add_argument('-s', '--submit', action='store_true', default=True, help='submit agent to crowdAI server')
    parser.add_argument('-v', '--visualize', action='store_true', default=False, help='render the environment locally')
    args = parser.parse_args()

    # Create environment

    env = ProstheticsEnv(visualize=args.visualize)
    env = Env_With_Dict_Observation(env)
    env = Dict_To_List(env)
    env = Env_With_JSONable_Actions(env)

    # Specify Agent

    #agent = SpecifiedAgent(env.observation_space, env.action_space)
    agent = PPOAgent(env.observation_space, env.action_space)
    
    if saved_model:
    	agent.load(saved_model)

    train(agent, env)

    if args.test:
    	test(agent,env)

    if args.submit:
    	submit(agent)
Esempio n. 15
0
def main(saved_model=None):

    parser = argparse.ArgumentParser(description='Submit agent')
    parser.add_argument('-v',
                        '--visualize',
                        action='store_true',
                        default=False,
                        help='render the environment locally')
    args = parser.parse_args()

    # Create environment

    env = ProstheticsEnv(visualize=args.visualize)
    env = Env_With_Dict_Observation(env)
    env = Dict_To_List(env)
    env = Env_With_JSONable_Actions(env)

    # Specify Agent

    agent = SpecifiedAgent(env.observation_space, env.action_space)

    if saved_model:
        agent.load(saved_model)

    submit(agent)
Esempio n. 16
0
    def __init__(self, env_name='HalfCheetah-v1',
                 policy_params=None,
                 num_workers=32, 
                 num_deltas=320, 
                 deltas_used=320,
                 delta_std=0.02, 
                 logdir=None, 
                 rollout_length=1000,
                 step_size=0.01,
                 shift='constant zero',
                 params=None,
                 seed=123):

        logz.configure_output_dir(logdir)
        logz.save_params(params)
        
        env = ProstheticsEnv(visualize=False)
        
        self.timesteps = 0
        self.action_size = env.action_space.shape[0]
        self.ob_size = 160
        self.num_deltas = num_deltas
        self.deltas_used = deltas_used
        self.rollout_length = rollout_length
        self.step_size = step_size
        self.delta_std = delta_std
        self.logdir = logdir
        self.shift = shift
        self.params = params
        self.max_past_avg_reward = float('-inf')
        self.num_episodes_used = float('inf')

        
        # create shared table for storing noise
        print("Creating deltas table.")
        deltas_id = create_shared_noise.remote()
        self.deltas = SharedNoiseTable(ray.get(deltas_id), seed = seed + 3)
        print('Created deltas table.')

        # initialize workers with different random seeds
        print('Initializing workers.') 
        self.num_workers = num_workers
        self.workers = [Worker.remote(seed + 7 * i,
                                      env_name=env_name,
                                      policy_params=policy_params,
                                      deltas=deltas_id,
                                      rollout_length=rollout_length,
                                      delta_std=delta_std) for i in range(num_workers)]


        # initialize policy 
        if policy_params['type'] == 'linear':
            self.policy = LinearPolicy(policy_params)
            self.w_policy = self.policy.get_weights()
        else:
            raise NotImplementedError
            
        # initialize optimization algorithm
        self.optimizer = optimizers.SGD(self.w_policy, self.step_size)        
        print("Initialization of ARS complete.")
Esempio n. 17
0
File: ppo.py Progetto: XDang13/CGSL
    def start_sim(self):
        torch.manual_seed(random.randint(1,100))
        env = ProstheticsEnv(visualize=False)
        memory = []
        observation = env.reset()
        for t in range(1000):
            state = observation
            action,log_prob = self.select_action(state)
            observation,reward,done,_ = env.step(action)
            e = [state,action,reward,log_prob]
            memory.append(e)
            if done:
                for i in range(len(memory)):
                    memory[i][2] -= self.punish
                break   

        return memory
Esempio n. 18
0
def env_creator(env_config):
    # env = ProstheticsEnv(False, integrator_accuracy=3e-2)
    # env = ProstheticsEnv(True)
    env = ProstheticsEnv(False)
    print(env.action_space)
    print(env.action_space.low)
    # env.action_space = gym.spaces.Tuple([gym.spaces.Discrete(11) for _ in range(19)])
    return env
    def reset(self):
        self.time_step = 0

        if self.episodes % self.ep2reload == 0:
            self.env = ProstheticsEnv(visualize=self.visualize,
                                      integrator_accuracy=1e-3)
            seed = random.randrange(SEED_RANGE)
            set_global_seeds(seed)
            self.env.change_model(model=self.model,
                                  prosthetic=True,
                                  difficulty=1,
                                  seed=seed)

        state_desc = self.env.reset(project=False)
        if self.randomized_start:
            state = get_simbody_state(state_desc)

            amplitude = random.gauss(0.8, 0.05)
            direction = random.choice([-1., 1])
            amplitude_knee = random.gauss(-1.2, 0.05)
            state[4] = 0.8
            state[6] = amplitude * direction  # right leg
            state[9] = amplitude * direction * (-1.)  # left leg
            state[13] = amplitude_knee if direction == 1. else 0  # right knee
            state[14] = amplitude_knee if direction == -1. else 0  # left knee

            # noise = np.random.normal(scale=0.1, size=72)
            # noise[3:6] = 0
            # noise[6] = np.random.uniform(-1., 1., size=1)
            # noise[9] = np.random.uniform(-1., 1., size=1)
            # noise[13] = -np.random.uniform(0., 1., size=1)  # knee_r
            # noise[14] = -np.random.uniform(0., 1., size=1)  # knee_l
            # state = (np.array(state) + noise).tolist()

            simbody_state = self.env.osim_model.get_state()
            obj = simbody_state.getY()
            for i in range(72):
                obj[i] = state[i]
            self.env.osim_model.set_state(simbody_state)

        observation = preprocess_obs_round2(state_desc)
        if self.observe_time:
            observation.append(-1.0)

        return observation
Esempio n. 20
0
def play_multi_episode(submit_model, episode_num=2, vis=False, seed=0):
    np.random.seed(seed)
    env = ProstheticsEnv(visualize=vis)
    env.change_model(model='3D', difficulty=1, prosthetic=True, seed=seed)
    env = ForwardReward(env)
    env = FrameSkip(env, 4)
    env = ActionScale(env)
    env = PelvisBasedObs(env)
    all_reward = []
    all_shaping_reward = 0
    last_frames_count = 0

    for e in range(episode_num):
        t = time.time()
        episode_reward = 0.0
        episode_shaping_reward = 0.0
        observation = env.reset(project=False)
        target_change_times = 0
        step = 0
        loss = []
        while True:
            step += 1
            action = submit_model.pred_batch(observation, target_change_times)
            observation, reward, done, info = env.step(action, project=False)
            step_frames = info['frame_count'] - last_frames_count
            last_frames_count = info['frame_count']
            episode_reward += reward
            # we pacle it here to drop the first step after changing
            if target_change_times >= 1:
                loss.append(10 * step_frames - reward)
            if info['target_changed']:
                target_change_times = min(target_change_times + 1, 3)
            logger.info("[step/{}]reward:{}  info:{}".format(
                step, reward, info))
            episode_shaping_reward += info['shaping_reward']
            if done:
                break
        all_reward.append(episode_reward)
        all_shaping_reward += episode_shaping_reward
        t = time.time() - t
        logger.info(
            "[episode/{}] time: {} episode_reward:{} change_loss:{} after_change_loss:{} mean_reward:{}"
            .format(e, t, episode_reward, np.sum(loss[:15]), np.sum(loss[15:]),
                    np.mean(all_reward)))
    logger.info("Mean reward:{}".format(np.mean(all_reward)))
Esempio n. 21
0
    def test_activations(self):
        env = ProstheticsEnv(
            visualize=False,
            integrator_accuracy=1e-1)  # we quickly want to see what happens
        env.reset()
        state_checkpoint = env.osim_model.get_state()  # store state

        for i in range(5):
            env.step(env.action_space.high)  # execute step with static action
        obs1 = env.get_observation()

        env.osim_model.set_state(state_checkpoint)  # restore state
        for i in range(5):
            env.step(env.action_space.high)
        obs2 = env.get_observation()

        dist = np.sum((np.array(obs1) - np.array(obs2))**2)
        self.assertTrue(dist < 0.05)
Esempio n. 22
0
    def test1(self):
        
        env = ProstheticsEnv(visualize=True)
        observation = env.reset()

        simbody_state = env.osim_model.get_state()
        print(simbody_state.getNumSubsystems())
        print(simbody_state.getY())
        oldy = simbody_state.updY()
        for i in range(len(oldy)):
            oldy[i] += 0.2
        print(simbody_state.getY())
        env.osim_model.set_state(simbody_state)

        action = env.action_space.sample()

        for i in range(50):
            env.step(action)
Esempio n. 23
0
def create_env(env_config):
    env = ProstheticsEnv(**env_config)
    env = ProstheticsEnvWrapper(
        env,
        do_norm=True,
        state_norm_file=state_norm_file,
        custom_reward=False,
        downsample_factor=downsample_factor,
        reduce_action=True,
        test_mode=True)
    return env
Esempio n. 24
0
 def __init__(self,
              visualize=True,
              integrator_accuracy=5e-5,
              difficulty=0,
              seed=0,
              feature_mapper=None):
     ProstheticsEnv().__init__(self,
                               visualize=visualize,
                               integrator_accuracy=integrator_accuracy,
                               difficulty=difficulty,
                               seed=seed)
     GAILEnv.__init__(self, feature_mapper)
def main():
    env = ProstheticsEnv(visualize=True, difficulty=1)
    agent = DDPG(env)
#     env.monitor.start('experiments/' + ENV_NAME,force=True)
    
    # Playing Episodes
    for episode in range(EPISODES):
        state = env.reset()
        #print "episode:",episode
        # Train
        for step in range(env.spec.timestep_limit):
            action = agent.noise_action(state)
            next_state,reward,done,_ = env.step(action)
            agent.perceive(state,action,reward,next_state,done)
            state = next_state
            if done:
                break
                
        # Testing:
        if episode % 100 == 0 and episode > 100:
            total_reward = 0
            for i in range(TEST):
                state = env.reset()
                for j in range(env.spec.timestep_limit):
                    #env.render()
                    action = agent.action(state) # direct action for test
                    state,reward,done,_ = env.step(action)
                    total_reward += reward
                    if done:
                        break
            ave_reward = total_reward/TEST
            print ('episode: ',episode,'Evaluation Average Reward:',ave_reward)
Esempio n. 26
0
    def __init__(self, env_seed,
                 env_name='',
                 policy_params = None,
                 deltas=None,
                 rollout_length=1000,
                 delta_std=0.02):

        # initialize OpenAI environment for each worker
        self.env = ProstheticsEnv(visualize=False)
        self.env.seed(env_seed)

        # each worker gets access to the shared noise table
        # with independent random streams for sampling
        # from the shared noise table. 
        self.deltas = SharedNoiseTable(deltas, env_seed + 7)
        self.policy_params = policy_params
        if policy_params['type'] == 'linear':
            self.policy = LinearPolicy(policy_params)
        else:
            raise NotImplementedError
            
        self.delta_std = delta_std
        self.rollout_length = rollout_length
Esempio n. 27
0
        def _thunk():
            env = ProstheticsEnv(visualize=False)
            env.seed(seed + 10000 * mpi_rank +
                     rank if seed is not None else None)
            env = ForceDictObservation(env)
            env = DictToListFull(env)
            env = JSONable(env)

            env = Monitor(
                env,
                logger.get_dir()
                and os.path.join(logger.get_dir(),
                                 str(mpi_rank) + '.' + str(rank)))

            if reward_scale != 1: return RewardScaler(env, reward_scale)
            else: return env
Esempio n. 28
0
class FixedActionAgent(object):
    def __init__(self):
        self.agent = AgentWorker()
        self.env = ProstheticsEnv(visualize=False)

    def run(self):
        try:
            observation = self.env.reset()
            total_reward = 0.0

            for i in range(200):
                action = self.agent.get_action(observation)
                observation, reward, done, info = self.env.step(action)
                total_reward += reward
                if done:
                    break
            print('Total reward %f' % total_reward)
            return {'status': 'DONE', 'total reward': total_reward}
        except Exception as e:
            raise e

    def get_action(self, observation):
        action = self.agent.get_action(observation)
        return action
def runAgent(args):
    agent = args[0]
    scoreList = args[1]
    step = args[2]

    # skip if task already done by agent
    if agent.taskDone():
        print('Agent #' + str(agent.getAgentNum()) + ' can skip.')
        scoreList.append((agent.getUid(), agent.getOutcomes()))
        return

    env = ProstheticsEnv(visualize=False)

    score = 0

    state = env.reset(project=False)
    state = obsTrans(state)
    state.extend([0] * 19)
    curAction = [0] * 19
    for i in range(300):  # frame loop
        act = agent.act(state)
        if not isinstance(act, list):
            act = [0] * 19
            print(
                'AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAHHHHHHHHHHHHHHHHHHHHHHHhhhhhhhhhhhhhhh!!!!!!!!'
            )
        for i in range(19):
            if act[i] > 0.333:
                curAction[i] += step
            elif act[i] < -0.333:
                curAction[i] -= step

            if curAction[i] < 0:
                curAction[i] = 0
            elif curAction[i] > 1:
                curAction[i] = 1

        # feedback from env
        state, reward, isDone, debug = env.step(curAction, project=False)
        state = obsTrans(state)
        state.extend(
            curAction)  # feedback action, because sequence is important

        score += reward  # accumulate reward in score
        if isDone:
            break  # end early if losing state

    print('Agent #' + str(agent.getAgentNum()) + ' | Score: ' + str(score))

    env.close()
    agent.reward(score)
    scoreList.append((agent.getUid(), agent.getOutcomes()))
Esempio n. 30
0
def make_env(test, render=False):

    env = ProstheticsEnv(visualize=render)
    env.change_model(model='3D', prosthetic=True, difficulty=0, seed=None)
    # Use different random seeds for train and test envs
    env_seed = 2**32 - 1 - seed if test else seed
    env.seed(env_seed)
    if isinstance(env.action_space, spaces.Box):
        misc.env_modifiers.make_action_filtered(env, clip_action_filter)
    if not test:
        misc.env_modifiers.make_reward_filtered(env, reward_filter)
    if render and not test:
        misc.env_modifiers.make_rendered(env)
    return env