def run(args): if args.machine == "Mac": env = UnityEnvironment(file_name='./Reacher.app',seed=1) else : env = UnityEnvironment(file_name='./Reacher_Linux_NoVis/Reacher.x86_64',seed=1) if torch.cuda.is_available(): device = torch.device('cuda') else : device = torch.device('cpu') print("using device", device) # get the default brain brain_name = env.brain_names[0] brain = env.brains[brain_name] # reset the environment env_info = env.reset(train_mode=False)[brain_name] # number of agents num_agents = len(env_info.agents) print('Number of agents:', num_agents) # size of each action action_size = brain.vector_action_space_size print('Size of each action:', action_size) # examine the state space states = env_info.vector_observations state_size = states.shape[1] print('There are {} agents. Each observes a state with length: {}'.format(states.shape[0], state_size)) print('The state for the first agent looks like:', states[0]) #==========================my version========================= agent = Agent(a_dim=4, s_dim=33, clip_value=1, device=device) # continuous action clip agent.load("./pretrained/") eval(env, agent, brain_name) env.close()
NOISE_C = 1.1 first_ep = 0 with tf.device('/GPU:0'): env = Environment("data/u20.txt", SEED) # env = gym.wrappers.Monitor(e.env, 'video/', video_callable=lambda episode_id: True,force = True) # video = VideoRecorder(env, "video.mp4" state_shape = env.state_shape action_len = env.action_shape[0] action_scale = None NOISE = 0.6 # np.random.seed(SEED) agent = Agent(state_shape, action_len, action_scale) if continued: agent.load(path) agent.summary() for episode in range(first_ep, EPISODES): state = env.reset() state = np.reshape(state, state_shape) score = 0 # print(state) # done = False noise = np.random.normal(NOISE, NOISE / 2, 2) / (1 + pow(NOISE_C, episode + 10)) for st in range(MAX_STEPS): # while not done : # env.render() # video.capture_frame() action = agent.act(state)