def test(): env = RunEnv(visualize=False) observation_d = env.reset(project=False) observation = process_obs_dict(observation_d) total_reward = 0 steps = 0 while True: #a = AGENT OUTPUT a, q = agent.act(observation) observation_d, reward, done, info = env.step(a, project=False) observation = process_obs_dict(observation_d) total_reward += reward steps += 1 #print(observation) print(steps, 'total reward:', total_reward) if done: break print('finished testing!')
def test(frameskip = 1, vis = False): env = RunEnv(visualize=vis) #env.change_model(model='2D', prosthetic=True, difficulty=0, seed=None) observation_d = env.reset(project = False) #observation = process_obs_dict(observation_d) total_reward = 0 steps = 0 while True: #a = AGENT OUTPUT observation = process_obs_dict(observation_d) a, q = agent.act(observation) for _ in range(frameskip): observation_d, reward, done, info = env.step(a, project = False) #observation = process_obs_dict(observation_d) total_reward += reward steps += 1 #print(observation) print(steps, 'total reward:', total_reward) if done: break print('finished testing!')
def play_multi_episode(agent, episode_num=2, vis=False, seed=0): np.random.seed(seed) env = ProstheticsEnv(visualize=vis) env.change_model(model='3D', difficulty=1, prosthetic=True, seed=seed) env = TestReward(env) env = FrameSkip(env, 4) env = ActionScale(env) env = PelvisBasedObs(env) all_reward = [] for e in range(episode_num): t = time.time() episode_reward = 0.0 obs = env.reset(project=False) step = 0 while True: step += 1 batch_obs = np.expand_dims(obs, axis=0) action = agent.ensemble_predict(batch_obs) action = np.squeeze(action, axis=0) obs, reward, done, info = env.step(action, project=False) episode_reward += reward logger.info("[step/{}]reward:{}".format(step, reward)) if done: break all_reward.append(episode_reward) t = time.time() - t logger.info( "[episode/{}] time: {} episode_reward:{} mean_reward:{}".format( e, t, episode_reward, np.mean(all_reward))) logger.info("Mean reward:{}".format(np.mean(all_reward)))
def _f(): #if seed%2==0: env = ProstheticsEnv(visualize=False, integrator_accuracy=3e-3) env.change_model(model='2D', difficulty=2, prosthetic=False, seed=seed) #else: #env=ProstheticsEnv(visualize=False,integrator_accuracy = 3e-3) #env.change_model(model = '3D', difficulty = 0, prosthetic = True, seed=seed) ''' if seed>=4 and seed<6: env=ProstheticsEnv(visualize=False,integrator_accuracy = 3e-3) env.change_model(model = '2D', difficulty = 1, prosthetic = True, seed=seed) if seed>=6 and seed<8: env=ProstheticsEnv(visualize=False,integrator_accuracy = 3e-3) env.change_model(model = '3D', difficulty = 0, prosthetic = True, seed=seed) if seed>=8 and seed<10: env=ProstheticsEnv(visualize=False,integrator_accuracy = 3e-3) env.change_model(model = '3D', difficulty = 0, prosthetic = True, seed=seed) if seed>=10 and seed<12 : env=ProstheticsEnv(visualize=False,integrator_accuracy = 3e-3) env.change_model(model = '3D', difficulty = 0, prosthetic = True, seed=seed) #env.seed(seed) ''' return env
def main(): import argparse parser = argparse.ArgumentParser() parser.add_argument('--exp_name', type=str, default='desc_200_1_1') parser.add_argument('--render', action='store_true') parser.add_argument('--discount', type=float, default=0.99) parser.add_argument('--n_iter', '-n', type=int, default=1000000) parser.add_argument('--batch_size', '-b', type=int, default=500) parser.add_argument('--ep_len', '-ep', type=float, default=-1.) parser.add_argument('--learning_rate', '-lr', type=float, default=5e-6) parser.add_argument('--reward_to_go', '-rtg', action='store_true', default=True) parser.add_argument('--test', '-t', action='store_true', default=False) parser.add_argument('--dont_normalize_advantages', '-dna', action='store_true', default=True) parser.add_argument('--nn_baseline', '-bl', action='store_true') parser.add_argument('--seed', type=int, default=1) parser.add_argument('--n_experiments', '-e', type=int, default=1) parser.add_argument('--n_layers', '-l', type=int, default=3) parser.add_argument('--size', '-s', type=int, default=150) parser.add_argument('--disc_levels', '-dl', type=int, default=100) args = parser.parse_args() print('test: ', args.test) if not (os.path.exists('data')): os.makedirs('data') logdir = args.exp_name+'_'+time.strftime("%d-%m-%Y_%H-%M-%S") logdir = os.path.join('data', logdir) if not (os.path.exists(logdir)): os.makedirs(logdir) env = ProstheticsEnv(visualize=False, integrator_accuracy=1e-4) env.change_model(model='3D', difficulty=2, prosthetic=True, seed=0) print('ac_dim: ', env.action_space.shape) print('obs_dim: ', env.observation_space.shape) print('normalize: ', not (args.dont_normalize_advantages)) max_path_length = args.ep_len if args.ep_len > 0 else None train_PG( exp_name=args.exp_name, env_name=env, n_iter=args.n_iter, gamma=args.discount, min_timesteps_per_batch=args.batch_size, max_path_length=max_path_length, learning_rate=args.learning_rate, reward_to_go=args.reward_to_go, animate=args.render, logdir=os.path.join(logdir, '%d' % 0), seed=0, normalize_advantages=not (args.dont_normalize_advantages), nn_baseline=args.nn_baseline, n_layers=args.n_layers, size=args.size, test=args.test, disc_levels = args.disc_levels )
def __init__(self, reward_scale=0.1, frame_skip=1, visualize=False, reinit_random_action_every=1, randomized_start=False, max_episode_length=300, death_penalty=0.0, living_bonus=0.0, crossing_legs_penalty=0.0, bending_knees_bonus=0.0, left_knee_bonus=0., right_knee_bonus=0., max_reward=10.0, activations_penalty=0., bonus_for_knee_angles_scale=0., bonus_for_knee_angles_angle=0.): self.reinit_random_action_every = reinit_random_action_every self.visualize = visualize self.randomized_start = randomized_start self.env = ProstheticsEnv(visualize=visualize, integrator_accuracy=1e-3) self.env.change_model(model="3D", prosthetic=True, difficulty=1, seed=np.random.randint(200)) self.frame_skip = frame_skip self.observation_shapes = [(345, )] self.action_size = 19 self.max_ep_length = max_episode_length - 2 self.activations_penalty = activations_penalty self.bonus_for_knee_angles_scale = bonus_for_knee_angles_scale self.bonus_for_knee_angles_angle = bonus_for_knee_angles_angle self.observation_space = Box(low=self.env.observation_space.low[0], high=self.env.observation_space.high[0], shape=(344, )) self.action_space = Box(low=self.env.action_space.low[0], high=self.env.action_space.high[0], shape=(19, )) # reward shaping self.reward_scale = reward_scale self.death_penalty = np.abs(death_penalty) self.living_bonus = living_bonus self.cross_legs_coef = crossing_legs_penalty self.bending_knees_coef = bending_knees_bonus self.left_knee_bonus = left_knee_bonus self.right_knee_bonus = right_knee_bonus self.max_reward = max_reward self.episodes = 1 self.ep2reload = 10
def respawn(self): """Method to respawn the env (hard reset) Parameters: None Returns: None """ self.env = ProstheticsEnv(visualize=False, difficulty=self.difficulty)
def start_sim_evaluate(self): env = ProstheticsEnv(visualize=False) total_reward = 0 observation = env.reset() for t in range(1000): state = observation action = self.select_action_evaluate(state) observation,reward,done,_ = env.step(action) total_reward += reward if done: break return total_reward
def make_env(test, render=False): env = ProstheticsEnv(visualize=render) # Use different random seeds for train and test envs env_seed = 2**32 - 1 - seed if test else seed env.seed(env_seed) #if args.monitor: #env = gym.wrappers.Monitor(env, args.outdir) if isinstance(env.action_space, spaces.Box): misc.env_modifiers.make_action_filtered(env, clip_action_filter) if not test: misc.env_modifiers.make_reward_filtered(env, reward_filter) if render and not test: misc.env_modifiers.make_rendered(env) return env
def __init__(self, server_ip='localhost', server_port=5007): if args.ident is not None: self.worker_id = args.ident else: self.worker_id = np.random.randint(int(1e18)) self.address = '{}:{}'.format(server_ip, server_port) random_seed = int(self.worker_id % int(1e9)) np.random.seed(random_seed) env = ProstheticsEnv(visualize=False, seed=random_seed) env.change_model(model='3D', difficulty=1, prosthetic=True, seed=random_seed) env.spec.timestep_limit = MAXTIME_LIMIT env = CustomR2Env(env) if args.reward_type == 'RunFastest': env = RunFastestReward(env) elif args.reward_type == 'FixedTargetSpeed': env = FixedTargetSpeedReward(env, args.target_v, args.act_penalty_lowerbound, args.act_penalty_coeff, args.vel_penalty_coeff) elif args.reward_type == 'Round2': env = Round2Reward(env, args.act_penalty_lowerbound, args.act_penalty_coeff, args.vel_penalty_coeff) else: assert False, 'Not supported reward type!' env = FrameSkip(env, 4) env = ActionScale(env) self.env = PelvisBasedObs(env)
def run(env_id, seed, noise_type, layer_norm, evaluation, **kwargs): # Configure things. rank = MPI.COMM_WORLD.Get_rank() if rank != 0: logger.set_level(logger.DISABLED) logger.configure(dir='/home/vaisakhs_shaj/Desktop/DeepReinforcementLearning/5_Deep_Deterministic_Policy_Gradients/LOGS/OSIM') # Create envs. env = ProstheticsEnv(visualize=True) env.change_model(model = '2D', difficulty = 0, prosthetic = True, seed=seed) #env.seed(seed) #env = bench.Monitor(env, logger.get_dir() and os.path.join(logger.get_dir(), str(rank))) eval_env = None # Parse noise_type action_noise = None param_noise = None nb_actions = env.action_space.shape[-1] for current_noise_type in noise_type.split(','): current_noise_type = current_noise_type.strip() if current_noise_type == 'none': pass elif 'adaptive-param' in current_noise_type: _, stddev = current_noise_type.split('_') param_noise = AdaptiveParamNoiseSpec(initial_stddev=float(stddev), desired_action_stddev=float(stddev)) elif 'normal' in current_noise_type: _, stddev = current_noise_type.split('_') action_noise = NormalActionNoise(mu=np.zeros(nb_actions), sigma=float(stddev) * np.ones(nb_actions)) elif 'ou' in current_noise_type: _, stddev = current_noise_type.split('_') action_noise = OrnsteinUhlenbeckActionNoise(mu=np.zeros(nb_actions), sigma=float(stddev) * np.ones(nb_actions)) else: raise RuntimeError('unknown noise type "{}"'.format(current_noise_type)) # Configure components. memory = Memory(limit=int(2e6), action_shape=env.action_space.shape, observation_shape=env.observation_space.shape) critic = Critic(layer_norm=layer_norm) actor = Actor(nb_actions, layer_norm=layer_norm) # Seed everything to make things reproducible. seed = seed + 2000000 * rank logger.info('rank {}: seed={}, logdir={}'.format(rank, seed, logger.get_dir())) tf.reset_default_graph() set_global_seeds(seed) env.seed(seed) if eval_env is not None: eval_env.seed(seed) # Disable logging for rank != 0 to avoid noise. if rank == 0: start_time = time.time() training.train(env=env, eval_env=eval_env, param_noise=param_noise, action_noise=action_noise, actor=actor, critic=critic, memory=memory, **kwargs) env.close() if eval_env is not None: eval_env.close() if rank == 0: logger.info('total runtime: {}s'.format(time.time() - start_time))
def __init__(self, num_steps, max_frames): self.use_cuda = torch.cuda.is_available() self.device = torch.device("cuda" if self.use_cuda else "cpu") self.env = ProstheticsEnv(visualize=False) self.num_steps = num_steps self.max_frames = max_frames # num_inputs = self.envs.observation_space.shape[0] # 158 num_inputs = 160 num_outputs = self.env.action_space.shape[0] # 19 hidden_size = 256 self.model = A2CWorker(num_inputs, num_outputs, hidden_size).to(self.device) self.optimizer = optim.Adam(self.model.parameters())
def __init__(self, env_seed, env_params=None, policy_params=None, deltas=None, rollout_length=1000, delta_std=0.02): # initialize OpenAI environment for each worker if (env_params["type"] == "MuJoCo"): self.env = gym.make(env_params["name"]) self.env.seed(env_seed) elif (env_params["type"] == "Prosthetics"): self.env = ProstheticsEnv(visualize=False, difficulty=get_difficulty( env_params['round'])) self.env = ObsProcessWrapper(self.env, True, 1) # each worker gets access to the shared noise table # with independent random streams for sampling # from the shared noise table. self.deltas = SharedNoiseTable(deltas, env_seed + 7) self.policy_params = policy_params if policy_params['type'] == 'Linear': self.policy = LinearPolicy(policy_params) else: self.policy = MLPPolicy("policy", policy_params["ob_dim"], policy_params["ac_dim"], \ policy_params["layer_norm"], tf.nn.selu, policy_params["layer_depth"], \ policy_params["layer_width"], None) self.delta_std = delta_std self.rollout_length = rollout_length return
def main(saved_model=None): parser = argparse.ArgumentParser(description='Test or submit agent.') parser.add_argument('-t', '--test', action='store', default=True, help='test agent locally') parser.add_argument('-s', '--submit', action='store_true', default=True, help='submit agent to crowdAI server') parser.add_argument('-v', '--visualize', action='store_true', default=False, help='render the environment locally') args = parser.parse_args() # Create environment env = ProstheticsEnv(visualize=args.visualize) env = Env_With_Dict_Observation(env) env = Dict_To_List(env) env = Env_With_JSONable_Actions(env) # Specify Agent #agent = SpecifiedAgent(env.observation_space, env.action_space) agent = PPOAgent(env.observation_space, env.action_space) if saved_model: agent.load(saved_model) train(agent, env) if args.test: test(agent,env) if args.submit: submit(agent)
def main(saved_model=None): parser = argparse.ArgumentParser(description='Submit agent') parser.add_argument('-v', '--visualize', action='store_true', default=False, help='render the environment locally') args = parser.parse_args() # Create environment env = ProstheticsEnv(visualize=args.visualize) env = Env_With_Dict_Observation(env) env = Dict_To_List(env) env = Env_With_JSONable_Actions(env) # Specify Agent agent = SpecifiedAgent(env.observation_space, env.action_space) if saved_model: agent.load(saved_model) submit(agent)
def __init__(self, env_name='HalfCheetah-v1', policy_params=None, num_workers=32, num_deltas=320, deltas_used=320, delta_std=0.02, logdir=None, rollout_length=1000, step_size=0.01, shift='constant zero', params=None, seed=123): logz.configure_output_dir(logdir) logz.save_params(params) env = ProstheticsEnv(visualize=False) self.timesteps = 0 self.action_size = env.action_space.shape[0] self.ob_size = 160 self.num_deltas = num_deltas self.deltas_used = deltas_used self.rollout_length = rollout_length self.step_size = step_size self.delta_std = delta_std self.logdir = logdir self.shift = shift self.params = params self.max_past_avg_reward = float('-inf') self.num_episodes_used = float('inf') # create shared table for storing noise print("Creating deltas table.") deltas_id = create_shared_noise.remote() self.deltas = SharedNoiseTable(ray.get(deltas_id), seed = seed + 3) print('Created deltas table.') # initialize workers with different random seeds print('Initializing workers.') self.num_workers = num_workers self.workers = [Worker.remote(seed + 7 * i, env_name=env_name, policy_params=policy_params, deltas=deltas_id, rollout_length=rollout_length, delta_std=delta_std) for i in range(num_workers)] # initialize policy if policy_params['type'] == 'linear': self.policy = LinearPolicy(policy_params) self.w_policy = self.policy.get_weights() else: raise NotImplementedError # initialize optimization algorithm self.optimizer = optimizers.SGD(self.w_policy, self.step_size) print("Initialization of ARS complete.")
def start_sim(self): torch.manual_seed(random.randint(1,100)) env = ProstheticsEnv(visualize=False) memory = [] observation = env.reset() for t in range(1000): state = observation action,log_prob = self.select_action(state) observation,reward,done,_ = env.step(action) e = [state,action,reward,log_prob] memory.append(e) if done: for i in range(len(memory)): memory[i][2] -= self.punish break return memory
def env_creator(env_config): # env = ProstheticsEnv(False, integrator_accuracy=3e-2) # env = ProstheticsEnv(True) env = ProstheticsEnv(False) print(env.action_space) print(env.action_space.low) # env.action_space = gym.spaces.Tuple([gym.spaces.Discrete(11) for _ in range(19)]) return env
def reset(self): self.time_step = 0 if self.episodes % self.ep2reload == 0: self.env = ProstheticsEnv(visualize=self.visualize, integrator_accuracy=1e-3) seed = random.randrange(SEED_RANGE) set_global_seeds(seed) self.env.change_model(model=self.model, prosthetic=True, difficulty=1, seed=seed) state_desc = self.env.reset(project=False) if self.randomized_start: state = get_simbody_state(state_desc) amplitude = random.gauss(0.8, 0.05) direction = random.choice([-1., 1]) amplitude_knee = random.gauss(-1.2, 0.05) state[4] = 0.8 state[6] = amplitude * direction # right leg state[9] = amplitude * direction * (-1.) # left leg state[13] = amplitude_knee if direction == 1. else 0 # right knee state[14] = amplitude_knee if direction == -1. else 0 # left knee # noise = np.random.normal(scale=0.1, size=72) # noise[3:6] = 0 # noise[6] = np.random.uniform(-1., 1., size=1) # noise[9] = np.random.uniform(-1., 1., size=1) # noise[13] = -np.random.uniform(0., 1., size=1) # knee_r # noise[14] = -np.random.uniform(0., 1., size=1) # knee_l # state = (np.array(state) + noise).tolist() simbody_state = self.env.osim_model.get_state() obj = simbody_state.getY() for i in range(72): obj[i] = state[i] self.env.osim_model.set_state(simbody_state) observation = preprocess_obs_round2(state_desc) if self.observe_time: observation.append(-1.0) return observation
def play_multi_episode(submit_model, episode_num=2, vis=False, seed=0): np.random.seed(seed) env = ProstheticsEnv(visualize=vis) env.change_model(model='3D', difficulty=1, prosthetic=True, seed=seed) env = ForwardReward(env) env = FrameSkip(env, 4) env = ActionScale(env) env = PelvisBasedObs(env) all_reward = [] all_shaping_reward = 0 last_frames_count = 0 for e in range(episode_num): t = time.time() episode_reward = 0.0 episode_shaping_reward = 0.0 observation = env.reset(project=False) target_change_times = 0 step = 0 loss = [] while True: step += 1 action = submit_model.pred_batch(observation, target_change_times) observation, reward, done, info = env.step(action, project=False) step_frames = info['frame_count'] - last_frames_count last_frames_count = info['frame_count'] episode_reward += reward # we pacle it here to drop the first step after changing if target_change_times >= 1: loss.append(10 * step_frames - reward) if info['target_changed']: target_change_times = min(target_change_times + 1, 3) logger.info("[step/{}]reward:{} info:{}".format( step, reward, info)) episode_shaping_reward += info['shaping_reward'] if done: break all_reward.append(episode_reward) all_shaping_reward += episode_shaping_reward t = time.time() - t logger.info( "[episode/{}] time: {} episode_reward:{} change_loss:{} after_change_loss:{} mean_reward:{}" .format(e, t, episode_reward, np.sum(loss[:15]), np.sum(loss[15:]), np.mean(all_reward))) logger.info("Mean reward:{}".format(np.mean(all_reward)))
def test_activations(self): env = ProstheticsEnv( visualize=False, integrator_accuracy=1e-1) # we quickly want to see what happens env.reset() state_checkpoint = env.osim_model.get_state() # store state for i in range(5): env.step(env.action_space.high) # execute step with static action obs1 = env.get_observation() env.osim_model.set_state(state_checkpoint) # restore state for i in range(5): env.step(env.action_space.high) obs2 = env.get_observation() dist = np.sum((np.array(obs1) - np.array(obs2))**2) self.assertTrue(dist < 0.05)
def test1(self): env = ProstheticsEnv(visualize=True) observation = env.reset() simbody_state = env.osim_model.get_state() print(simbody_state.getNumSubsystems()) print(simbody_state.getY()) oldy = simbody_state.updY() for i in range(len(oldy)): oldy[i] += 0.2 print(simbody_state.getY()) env.osim_model.set_state(simbody_state) action = env.action_space.sample() for i in range(50): env.step(action)
def create_env(env_config): env = ProstheticsEnv(**env_config) env = ProstheticsEnvWrapper( env, do_norm=True, state_norm_file=state_norm_file, custom_reward=False, downsample_factor=downsample_factor, reduce_action=True, test_mode=True) return env
def __init__(self, visualize=True, integrator_accuracy=5e-5, difficulty=0, seed=0, feature_mapper=None): ProstheticsEnv().__init__(self, visualize=visualize, integrator_accuracy=integrator_accuracy, difficulty=difficulty, seed=seed) GAILEnv.__init__(self, feature_mapper)
def main(): env = ProstheticsEnv(visualize=True, difficulty=1) agent = DDPG(env) # env.monitor.start('experiments/' + ENV_NAME,force=True) # Playing Episodes for episode in range(EPISODES): state = env.reset() #print "episode:",episode # Train for step in range(env.spec.timestep_limit): action = agent.noise_action(state) next_state,reward,done,_ = env.step(action) agent.perceive(state,action,reward,next_state,done) state = next_state if done: break # Testing: if episode % 100 == 0 and episode > 100: total_reward = 0 for i in range(TEST): state = env.reset() for j in range(env.spec.timestep_limit): #env.render() action = agent.action(state) # direct action for test state,reward,done,_ = env.step(action) total_reward += reward if done: break ave_reward = total_reward/TEST print ('episode: ',episode,'Evaluation Average Reward:',ave_reward)
def __init__(self, env_seed, env_name='', policy_params = None, deltas=None, rollout_length=1000, delta_std=0.02): # initialize OpenAI environment for each worker self.env = ProstheticsEnv(visualize=False) self.env.seed(env_seed) # each worker gets access to the shared noise table # with independent random streams for sampling # from the shared noise table. self.deltas = SharedNoiseTable(deltas, env_seed + 7) self.policy_params = policy_params if policy_params['type'] == 'linear': self.policy = LinearPolicy(policy_params) else: raise NotImplementedError self.delta_std = delta_std self.rollout_length = rollout_length
def _thunk(): env = ProstheticsEnv(visualize=False) env.seed(seed + 10000 * mpi_rank + rank if seed is not None else None) env = ForceDictObservation(env) env = DictToListFull(env) env = JSONable(env) env = Monitor( env, logger.get_dir() and os.path.join(logger.get_dir(), str(mpi_rank) + '.' + str(rank))) if reward_scale != 1: return RewardScaler(env, reward_scale) else: return env
class FixedActionAgent(object): def __init__(self): self.agent = AgentWorker() self.env = ProstheticsEnv(visualize=False) def run(self): try: observation = self.env.reset() total_reward = 0.0 for i in range(200): action = self.agent.get_action(observation) observation, reward, done, info = self.env.step(action) total_reward += reward if done: break print('Total reward %f' % total_reward) return {'status': 'DONE', 'total reward': total_reward} except Exception as e: raise e def get_action(self, observation): action = self.agent.get_action(observation) return action
def runAgent(args): agent = args[0] scoreList = args[1] step = args[2] # skip if task already done by agent if agent.taskDone(): print('Agent #' + str(agent.getAgentNum()) + ' can skip.') scoreList.append((agent.getUid(), agent.getOutcomes())) return env = ProstheticsEnv(visualize=False) score = 0 state = env.reset(project=False) state = obsTrans(state) state.extend([0] * 19) curAction = [0] * 19 for i in range(300): # frame loop act = agent.act(state) if not isinstance(act, list): act = [0] * 19 print( 'AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAHHHHHHHHHHHHHHHHHHHHHHHhhhhhhhhhhhhhhh!!!!!!!!' ) for i in range(19): if act[i] > 0.333: curAction[i] += step elif act[i] < -0.333: curAction[i] -= step if curAction[i] < 0: curAction[i] = 0 elif curAction[i] > 1: curAction[i] = 1 # feedback from env state, reward, isDone, debug = env.step(curAction, project=False) state = obsTrans(state) state.extend( curAction) # feedback action, because sequence is important score += reward # accumulate reward in score if isDone: break # end early if losing state print('Agent #' + str(agent.getAgentNum()) + ' | Score: ' + str(score)) env.close() agent.reward(score) scoreList.append((agent.getUid(), agent.getOutcomes()))
def make_env(test, render=False): env = ProstheticsEnv(visualize=render) env.change_model(model='3D', prosthetic=True, difficulty=0, seed=None) # Use different random seeds for train and test envs env_seed = 2**32 - 1 - seed if test else seed env.seed(env_seed) if isinstance(env.action_space, spaces.Box): misc.env_modifiers.make_action_filtered(env, clip_action_filter) if not test: misc.env_modifiers.make_reward_filtered(env, reward_filter) if render and not test: misc.env_modifiers.make_rendered(env) return env