scopes = [network_scope, scene_scope, task_scope] for i_episode in range(NUM_EVAL_EPISODES): env.reset() terminal = False ep_reward = 0 ep_collision = 0 ep_t = 0 while not terminal: usf_s_g = global_network.run_usf(sess, env.s_t, env.target, scopes) pi_values = global_network.run_policy( sess, env.s_t, env.target, usf_s_g, scopes) action = sample_action(pi_values) env.step(action) env.update() terminal = env.terminal if ep_t == 500: break if env.collided: ep_collision += 1 ep_reward += env.reward ep_t += 1 ep_lengths.append(ep_t) ep_rewards.append(ep_reward) ep_collisions.append(ep_collision) if VERBOSE: print("episode #{} ends after {} steps".format(
for i_episode in range(NNN): optimal = env.reset() terminal = False ep_reward = 0 ep_collision = 0 ep_t = 0 f_path.write(str(counter * NNN + i_episode) + ': [') while not terminal: #viewer.imshow(env.observation) #time.sleep(0.3) #print(env.isCheckpoint) pi_values = global_network.run_policy( sess, env.s_t, env.s_position, env.checkpoint, env.s_a_t, env.s_c_t, env.isCheckpoint, env.s_aux_cl, scopes) #print(pi_values) action = sample_action(pi_values) env.step(action) env.update() terminal = env.terminal if ep_t == 500: break if env.collided: ep_collision += 1 ep_reward += env.reward ep_t += 1 #if not terminal: # f_path.write('['+str(int(env.x*2))+', '+str(int(env.z*2))+'], ') #else:
h = target[3]*300 xa = int((x+w)/2) xm = int(x-xa) ya = int((y+h)/2) ym = int(y-ya) #print(env.observation) env.observation[ym:ym+2,xm:xa] = 255 env.observation[ya:ya+2,xm:xa] = 255 env.observation[ym:ya,xm:xm+2] = 255 env.observation[ym:ya,xa:xa+2] = 255 ''' #viewer.imshow(env.observation) #time.sleep(0.5) pi_values = global_network.run_policy( sess, env.s_t, env.s_position, env.checkpoint, scopes) action = sample_action(pi_values) env.step(action) env.update() terminal = env.terminal if ep_t == 500: break if env.collided: ep_collision += 1 ep_reward += env.reward ep_t += 1 if not terminal: f.write('[' + str(int(env.x * 2)) + ', ' + str(int(env.z * 2)) + '], ') else: f.write('[' + str(int(env.x * 2)) + ', ' +
def main(): # disable all v2 behavior tf.disable_v2_behavior() tf.disable_eager_execution() device = "/cpu:0" # use CPU for display tool network_scope = TASK_TYPE # Always 'navigation' list_of_tasks = TASK_LIST scene_scopes = list_of_tasks.keys() global_network = ActorCriticFFNetwork(action_size=ACTION_SIZE, device=device, network_scope=network_scope, scene_scopes=scene_scopes) sess = tf.Session() # sess = tf.coSession() init = tf.global_variables_initializer() sess.run(init) saver = tf.train.Saver() checkpoint = tf.train.get_checkpoint_state(CHECKPOINT_DIR) # see if we saved a checkpoint from past training? if checkpoint and checkpoint.model_checkpoint_path: saver.restore(sess, checkpoint.model_checkpoint_path) print("checkpoint loaded: {}".format(checkpoint.model_checkpoint_path)) else: print("Could not find old checkpoint") scene_stats = dict() for scene_scope in scene_scopes: # TODO: remove scene_scope = "FloorPlan402" scene_stats[scene_scope] = [] for task_scope in list_of_tasks[scene_scope]: # tasks are positions!!! # env = ai2thor.controller.Controller(scene="FloorPlan227", gridSize=0.25, width=1000, height=1000) with open(GOAL_FILE, 'r') as f: GOAL_DATA = json.load(f) GOAL_POS = GOAL_DATA["agent_position"] env = RLController({ 'scene': scene_scope, 'terminal_state_id': int(task_scope), 'goal_pos': GOAL_POS, 'goal_image_fpath': "data/FP402_goal_towel.png" }) env.docker_enabled = True ep_rewards = [] ep_lengths = [] ep_collisions = [] scopes = [network_scope, scene_scope] for i_episode in range(NUM_EVAL_EPISODES): env.reset() terminal = False ep_reward = 0 ep_collision = 0 ep_t = 0 while not terminal: # mirrors actions taken in paper # NOTE: rearranged these to mirror code in scene_loader list_of_actions = [ "MoveAhead", "RotateRight", "RotateLeft", "MoveBack" ] pi_values = global_network.run_policy( sess, env.curr_state, env.target, scopes) # action returned is an integer -- critical that the list_of_actions is in correct order action = sample_action(pi_values) print( "Ep_t: {} \n\tCollided?: {} \n\tAction: {} \n\tValue: {} \n\tAll Action Values: {}" .format(ep_t, env.collided, list_of_actions[action], pi_values[action], pi_values)) env.step(list_of_actions[action]) env.update() terminal = env.terminal if ep_t == 10000: break if env.collided: ep_collision += 1 ep_reward += env.reward ep_t += 1 ep_lengths.append(ep_t) ep_rewards.append(ep_reward) ep_collisions.append(ep_collision) print('evaluation: %s %s' % (scene_scope, task_scope)) print('mean episode reward: %.2f' % np.mean(ep_rewards)) print('mean episode length: %.2f' % np.mean(ep_lengths)) print('mean episode collision: %.2f' % np.mean(ep_collisions))