def create_saliency(model_idx, sess): graph = tf.get_default_graph() env = utils.make_general_env(1) env = wrappers.add_final_wrappers(env) agent = create_act_model(sess, env, 1) action_selector = tf.placeholder(tf.int32) gradient_saliency = saliency.GradientSaliency(graph, sess, agent.pd.logits[0][action_selector], agent.X) sess.run(tf.compat.v1.global_variables_initializer()) # setup_utils.restore_file(models[model_idx]) try: loaded_params = utils.load_params_for_scope(sess, 'model') if not loaded_params: print('NO SAVED PARAMS LOADED') except AssertionError as e: models[model_idx] = None return [None]*3 return agent, gradient_saliency, action_selector
def test(sess, load_path, env, should_render=False, rep_count=Config.REP): rank = MPI.COMM_WORLD.Get_rank() size = MPI.COMM_WORLD.Get_size() should_eval = Config.TRAIN_EVAL or Config.TEST_EVAL if should_eval: #env = utils.make_general_env(Config.NUM_EVAL) should_render = False else: env = utils.make_general_env(1) env = wrappers.add_final_wrappers(env) if should_render: from gym.envs.classic_control import rendering nenvs = env.num_envs model = load_model(sess, filename) agent = create_act_model(sess, env, nenvs) sess.run(tf.global_variables_initializer()) loaded_params = utils.load_params_for_scope(sess, 'model') if not loaded_params: print('NO SAVED PARAMS LOADED') obs = env.reset() t_step = 0 if should_render: viewer = rendering.SimpleImageViewer() should_render_obs = not Config.IS_HIGH_RES def maybe_render(info=None): if should_render and not should_render_obs: env.render() maybe_render() scores = np.array([0] * nenvs) score_counts = np.array([0] * nenvs) curr_rews = np.zeros((nenvs, 3)) def should_continue(): if should_eval: return np.sum(score_counts) < rep_count * nenvs return True state = agent.initial_state done = np.zeros(nenvs) while should_continue(): action, values, state, _ = agent.step(obs, state, done) obs, rew, done, info = env.step(action) if should_render and should_render_obs: if np.shape(obs)[-1] % 3 == 0: ob_frame = obs[0, :, :, -3:] else: ob_frame = obs[0, :, :, -1] ob_frame = np.stack([ob_frame] * 3, axis=2) viewer.imshow(ob_frame) curr_rews[:, 0] += rew for i, d in enumerate(done): if d: if score_counts[i] < rep_count: score_counts[i] += 1 if 'episode' in info[i]: scores[i] += info[i].get('episode')['r'] if t_step % 100 == 0: mpi_print('t', t_step, values[0], done[0], rew[0], curr_rews[0], np.shape(obs)) maybe_render(info[0]) t_step += 1 if should_render: time.sleep(.02) if done[0]: if should_render: mpi_print('ep_rew', curr_rews) curr_rews[:] = 0 result = { 'steps_elapsed': steps_elapsed, } if should_eval: testset_size = rep_count * nenvs mean_score = np.sum(scores) / testset_size succ_rate = np.sum(scores == 10.0) / testset_size max_idx = np.argmax(scores) mpi_print('max idx', max_idx) mpi_print('steps_elapsed', steps_elapsed) if size > 1: mean_score = utils.mpi_average([mean_score]) mpi_print('mpi_mean', mpi_mean_score) wandb.log({'Test_Rew_mean': mean_score, 'Test_Succ_rate': succ_rate}) result['scores'] = scores result['testset_size'] = testset_size result['test_rew_mean'] = mean_score result['test_succ_rate'] = succ_rate return result
def enjoy_env_sess(sess): should_render = True should_eval = Config.TRAIN_EVAL or Config.TEST_EVAL rep_count = Config.REP if should_eval: env = utils.make_general_env(Config.NUM_EVAL) should_render = False else: env = utils.make_general_env(1) env = wrappers.add_final_wrappers(env) if should_render: from gym.envs.classic_control import rendering nenvs = env.num_envs agent = create_act_model(sess, env, nenvs) sess.run(tf.global_variables_initializer()) loaded_params = utils.load_params_for_scope(sess, 'model') if not loaded_params: print('NO SAVED PARAMS LOADED') obs = env.reset() t_step = 0 if should_render: viewer = rendering.SimpleImageViewer() should_render_obs = not Config.IS_HIGH_RES def maybe_render(info=None): if should_render and not should_render_obs: env.render() maybe_render() scores = np.array([0] * nenvs) score_counts = np.array([0] * nenvs) curr_rews = np.zeros((nenvs, 3)) def should_continue(): if should_eval: return np.sum(score_counts) < rep_count * nenvs return True state = agent.initial_state done = np.zeros(nenvs) while should_continue(): action, values, state, _ = agent.step(obs, state, done) obs, rew, done, info = env.step(action) if should_render and should_render_obs: if np.shape(obs)[-1] % 3 == 0: ob_frame = obs[0, :, :, -3:] else: ob_frame = obs[0, :, :, -1] ob_frame = np.stack([ob_frame] * 3, axis=2) viewer.imshow(ob_frame) curr_rews[:, 0] += rew for i, d in enumerate(done): if d: if score_counts[i] < rep_count: score_counts[i] += 1 if 'episode' in info[i]: scores[i] += info[i].get('episode')['r'] if t_step % 100 == 0: mpi_print('t', t_step, values[0], done[0], rew[0], curr_rews[0], np.shape(obs)) maybe_render(info[0]) t_step += 1 if should_render: time.sleep(.02) if done[0]: if should_render: mpi_print('ep_rew', curr_rews) curr_rews[:] = 0 result = 0 if should_eval: mean_score = np.mean(scores) / rep_count max_idx = np.argmax(scores) mpi_print('scores', scores / rep_count) print('mean_score', mean_score) mpi_print('max idx', max_idx) mpi_mean_score = utils.mpi_average([mean_score]) mpi_print('mpi_mean', mpi_mean_score) result = mean_score return result
def enjoy_env_sess(sess, DIR_NAME): should_render = True should_eval = Config.TRAIN_EVAL or Config.TEST_EVAL rep_count = Config.REP file_name = '%s/%s.txt' % (DIR_NAME, Config.RESTORE_ID) f_io = open(file_name, 'a') if should_eval: if Config.TEST_NUM_EVAL > -1: env = utils.make_general_env(Config.TEST_NUM_EVAL) else: env = utils.make_general_env(Config.NUM_EVAL) should_render = False else: env = utils.make_general_env(1) env = wrappers.add_final_wrappers(env) if should_render: from gym.envs.classic_control import rendering nenvs = env.num_envs vae = ConvVAE(z_size=Config.VAE_Z_SIZE, batch_size=nenvs, is_training=False, reuse=False, gpu_mode=True, use_coord_conv=True) agent = create_act_model(sess, env, nenvs, Config.VAE_Z_SIZE) num_actions = env.action_space.n init_rand = tf.variables_initializer( [v for v in tf.global_variables() if 'randcnn' in v.name]) sess.run(tf.compat.v1.global_variables_initializer()) soft_numpy = tf.placeholder(tf.float32, [nenvs, num_actions], name='soft_numpy') dist = tfp.distributions.Categorical(probs=soft_numpy) sampled_action = dist.sample() loaded_params = utils.load_params_for_scope(sess, 'model') vae.load_json_full(Config.VAE_PATH) if not loaded_params: print('NO SAVED PARAMS LOADED') obs = env.reset() t_step = 0 if should_render: viewer = rendering.SimpleImageViewer() should_render_obs = not Config.IS_HIGH_RES def maybe_render(info=None): if should_render and not should_render_obs: env.render() maybe_render() scores = np.array([0] * nenvs) score_counts = np.array([0] * nenvs) curr_rews = np.zeros((nenvs, 3)) def should_continue(): if should_eval: return np.sum(score_counts) < rep_count * nenvs return True state = agent.initial_state done = np.zeros(nenvs) actions = [env.action_space.sample() for _ in range(nenvs)] actions = np.array(actions) obs, _, _, _ = env.step(actions) sess.run(init_rand) while should_continue(): #scipy.misc.imsave('raw_inputs.png', obs[0]) encoder_in = obs.astype(np.float32) / 255.0 batch_z = vae.encode(encoder_in) #reconstruct = vae.decode(batch_z) #scipy.misc.imsave('recon.png', reconstruct[0]) action, values, state, _ = agent.step(batch_z, state, done) obs, rew, done, info = env.step(action) if should_render and should_render_obs: if np.shape(obs)[-1] % 3 == 0: ob_frame = obs[0, :, :, -3:] else: ob_frame = obs[0, :, :, -1] ob_frame = np.stack([ob_frame] * 3, axis=2) viewer.imshow(ob_frame) curr_rews[:, 0] += rew for i, d in enumerate(done): if d: if score_counts[i] < rep_count: score_counts[i] += 1 if 'episode' in info[i]: scores[i] += info[i].get('episode')['r'] maybe_render(info[0]) t_step += 1 if should_render: time.sleep(.02) if done[0]: if should_render: mpi_print('ep_rew', curr_rews) curr_rews[:] = 0 result = 0 if should_eval: mean_score = np.mean(scores) / rep_count max_idx = np.argmax(scores) result = mean_score f_io.write("{}\n".format(result)) f_io.close() return result
def main(sess): comm = MPI.COMM_WORLD rank = comm.Get_rank() seed = int(time.time()) % 10000 if Config.EXTRACT_SEED != -1: seed = Config.EXTRACT_SEED if Config.EXTRACT_RANK != -1: rank = Config.EXTRACT_RANK set_global_seeds(seed * 100 + rank) utils.setup_mpi_gpus() config = tf.compat.v1.ConfigProto() config.gpu_options.allow_growth = True # pylint: disable=E1101 use_policy = (Config.RESTORE_ID != '') nenvs = Config.NUM_ENVS total_timesteps = int(502e6) env = utils.make_general_env(nenvs, seed=rank) if use_policy: agent = create_act_model(sess, env, nenvs) sess.run(tf.compat.v1.global_variables_initializer()) loaded_params = utils.load_params_for_scope(sess, 'model') if not loaded_params: print('NO SAVED PARAMS LOADED') # make directory DIR_NAME = './VAE/records/' if not os.path.exists(DIR_NAME): os.makedirs(DIR_NAME, exist_ok=True) # set file name filename = DIR_NAME+"/"+Config.get_save_file()+"_"+str(seed * 100 + rank)+".npz" with tf.compat.v1.Session(config=config): env = wrappers.add_final_wrappers(env) nenv = nenv = env.num_envs if hasattr(env, 'num_envs') else 1 obs = np.zeros((nenv,) + env.observation_space.shape, dtype=env.observation_space.dtype.name) obs[:] = env.reset() dones = [False for _ in range(nenv)] # remove noisy inputs actions = [env.action_space.sample() for _ in range(nenv)] actions = np.array(actions) obs[:], rewards, dones, _ = env.step(actions) state = agent.initial_state mb_obs, mb_rewards, mb_actions, mb_next_obs, mb_dones = [],[],[],[],[] # For n in range number of steps for _ in range(400): # Given observations, get action value and neglopacs # We already have self.obs because Runner superclass run self.obs[:] = env.reset() on init if use_policy: actions, _, _, _ = agent.step(obs, state, dones) else: actions = [env.action_space.sample() for _ in range(nenv)] actions = np.array(actions) mb_obs.append(obs.copy()) mb_actions.append(actions) mb_dones.append(dones) # Take actions in env and look the results # Infos contains a ton of useful informations obs[:], rewards, dones, _ = env.step(actions) mb_next_obs.append(obs.copy()) mb_rewards.append(rewards) #batch of steps to batch of rollouts mb_obs = np.asarray(mb_obs, dtype=obs.dtype) mb_next_obs = np.asarray(mb_next_obs, dtype=obs.dtype) mb_rewards = np.asarray(mb_rewards, dtype=np.float32) mb_actions = np.asarray(mb_actions) mb_dones = np.asarray(mb_dones, dtype=np.bool) #np.savez_compressed(filename, obs=mb_obs, action=mb_actions, next_obs=mb_next_obs, reward=mb_rewards, dones=mb_dones) np.savez_compressed(filename, obs=mb_obs) return filename
def enjoy_env_sess(sess, checkpoint, overlap): #base_name = str(8*checkpoint) + 'M' #load_file = setup_utils.restore_file(Config.RESTORE_ID,base_name=base_name) should_eval = True mpi_print('test levels seed', Config.SET_SEED) mpi_print('test levels ', Config.NUM_LEVELS) rep_count = 50 env = utils.make_general_env(20) env = wrappers.add_final_wrappers(env) nenvs = env.num_envs sess.run(tf.global_variables_initializer()) args_now = Config.get_args_dict() #args_run = utils.load_args() agent = create_act_model(sess, env, nenvs) # load name is specified by config.RESTORE_ID adn return True/False if checkpoint != 32: base_name = str(8 * checkpoint) + 'M' elif checkpoint == 0: mean_score = 0.0 succ_rate = 0.0 wandb.log({ 'Rew_mean': mean_score, 'Succ_rate': succ_rate, 'Step_elapsed': steps_elapsed }) return mean_score, succ_rate else: base_name = None sess.run(tf.global_variables_initializer()) # env init here load_file = setup_utils.restore_file(Config.RESTORE_ID, overlap_config=overlap, base_name=base_name) is_loaded = utils.load_params_for_scope(sess, 'model') if not is_loaded: mpi_print('NO SAVED PARAMS LOADED') return mean_score, succ_rate obs = env.reset() t_step = 0 scores = np.zeros((nenvs, rep_count)) eplens = np.zeros((nenvs, rep_count)) #scores = np.array([0] * nenvs) score_counts = np.array([0] * nenvs) # curr_rews = np.zeros((nenvs, 3)) def should_continue(): if should_eval: return np.sum(score_counts) < rep_count * nenvs return True state = agent.initial_state done = np.zeros(nenvs) def rollout(obs, state, done): """rollout for rep * nenv times and return scores""" t = 0 count = 0 rews = np.zeros((nenvs, rep_count)) while should_continue(): action, values, state, _ = agent.step(obs, state, done) obs, rew, done, info = env.step(action) rews[:, count] += rew t += 1 for i, d in enumerate(done): if d: eplens[i][count] = t if score_counts[i] < rep_count: score_counts[i] += 1 count = score_counts[i] - 1 # aux score if 'episode' in info[i]: scores[i][count] = info[i].get('episode')['r'] return scores, rews, eplens if is_loaded: mpi_print(load_file) scores, rews, eplens = rollout(obs, state, done) size = MPI.COMM_WORLD.Get_size() rank = MPI.COMM_WORLD.Get_rank() if size == 1: if rank == 0: testset_size = rep_count * nenvs utils.save_pickle(scores, Config.LOGDIR + 'scores') mean_score = np.sum(scores) / testset_size succ_rate = np.sum(scores == 10.0) / testset_size mpi_print('cpus ', size) mpi_print('testset size', testset_size) # NUM_LEVELS = 0 means unbounded set so the set size is rep_counts * nenvs # each one has a new seed(maybe counted) # mpi_print('score detail',scores.flatten()) mpi_print('succ_rate', succ_rate) steps_elapsed = checkpoint * 8000000 mpi_print('steps_elapsed:', steps_elapsed) mpi_print('mean score', mean_score) wandb.log({ 'Rew_mean': mean_score, 'Succ_rate': succ_rate, 'Step_elapsed': steps_elapsed }) #mpi_print('mean score of each env',[np.mean(s) for s in scores]) else: testset_size = rep_count * nenvs succ = np.sum(scores=10.0) / testset_size succ_rate = utils.mpi_average([succ]) mean_score_tmp = np.sum(scores) / testset_size mean_score = utils.mpi_average([mean_score_tmp]) if rank == 0: mpi_print('testset size', rep_count * nenvs * size) mpi_print('load file name', load_file) mpi_print('testset size', testset_size) # NUM_LEVELS = 0 means unbounded set so the set size is rep_counts * nenvs # each one has a new seed(maybe counted) # mpi_print('score detail',scores.flatten()) mpi_print('succ_rate', succ_rate) mpi_print('mean score', mean_score) wandb.log({'Rew_mean': mean_score, 'Succ_rate': succ_rate}) return mean_score, succ_rate
def enjoy_env_sess(sess, DIR_NAME): should_render = True should_eval = Config.TRAIN_EVAL or Config.TEST_EVAL rep_count = Config.REP mpi_print = utils.mpi_print file_name = '%s/%s.txt' % (DIR_NAME, Config.RESTORE_ID) f_io = open(file_name, 'a') if should_eval: if Config.TEST_NUM_EVAL > -1: env = utils.make_general_env(Config.TEST_NUM_EVAL) else: env = utils.make_general_env(Config.NUM_EVAL) should_render = False else: env = utils.make_general_env(1) env = wrappers.add_final_wrappers(env) if should_render: from gym.envs.classic_control import rendering nenvs = env.num_envs agent = create_act_model(sess, env, nenvs) num_actions = env.action_space.n init_rand = tf.variables_initializer( [v for v in tf.global_variables() if 'randcnn' in v.name]) sess.run(tf.compat.v1.global_variables_initializer()) soft_numpy = tf.placeholder(tf.float32, [nenvs, num_actions], name='soft_numpy') dist = tfp.distributions.Categorical(probs=soft_numpy) sampled_action = dist.sample() loaded_params = utils.load_params_for_scope(sess, 'model') if not loaded_params: print('NO SAVED PARAMS LOADED') obs = env.reset() t_step = 0 if should_render: viewer = rendering.SimpleImageViewer() should_render_obs = not Config.IS_HIGH_RES def maybe_render(info=None): if should_render and not should_render_obs: env.render() maybe_render() scores = np.array([0] * nenvs) score_counts = np.array([0] * nenvs) curr_rews = np.zeros((nenvs, 3)) def should_continue(): if should_eval: return np.sum(score_counts) < rep_count * nenvs return True state = agent.initial_state done = np.zeros(nenvs) sess.run(init_rand) while should_continue(): if Config.USE_LSTM == 8425 or Config.USE_LSTM == 1081: q_actions, values, state, _ = agent.step(obs, state, done) # e-greedy greedy_flag = np.random.rand(q_actions.shape[0]) greedy_flag = greedy_flag < 0.1 greedy_flag.astype(np.int) random_actions = np.random.randint(0, num_actions, size=q_actions.shape[0]) action = random_actions * greedy_flag + (1 - greedy_flag) * q_actions else: total_soft = agent.get_softmax(obs, state, done) action = sess.run([sampled_action], {soft_numpy: total_soft}) action = action[0] #action, values, state, _ = agent.step(obs, state, done) obs, rew, done, info = env.step(action) #scipy.misc.imsave('raw_inputs.png', obs[0]) #print(dd) if should_render and should_render_obs: if np.shape(obs)[-1] % 3 == 0: ob_frame = obs[0, :, :, -3:] else: ob_frame = obs[0, :, :, -1] ob_frame = np.stack([ob_frame] * 3, axis=2) viewer.imshow(ob_frame) curr_rews[:, 0] += rew for i, d in enumerate(done): if d: if score_counts[i] < rep_count: score_counts[i] += 1 if 'episode' in info[i]: scores[i] += info[i].get('episode')['r'] maybe_render(info[0]) t_step += 1 if should_render: time.sleep(.02) if done[0]: if should_render: mpi_print('ep_rew', curr_rews) curr_rews[:] = 0 result = 0 if should_eval: mean_score = np.mean(scores) / rep_count max_idx = np.argmax(scores) result = mean_score f_io.write("{}\n".format(result)) f_io.close() return result