def build_agent(env, env_id, agent_configs): agent_configs = {} if (env_id in AWR_CONFIGS): agent_configs.update(AWR_CONFIGS[env_id]) graph = tf.Graph() sess = tf.Session(graph=graph) agent = awr_agent.AWRAgent(env=env, sess=sess, **agent_configs) return agent
def build_agent(env): env_id = arg_parser.env agent_configs = {} if (env_id in awr_configs.AWR_CONFIGS): agent_configs = awr_configs.AWR_CONFIGS[env_id] graph = tf.Graph() sess = tf.Session(graph=graph) agent = awr_agent.AWRAgent(env=env, sess=sess, **agent_configs) return agent
.format(data_std, data_std), "rb")), args.constraint) terminals = [] for el in datas: terminal = np.ones(len(el[0])) terminal[-1] = 0 terminals.append(terminal) datas2 = [] for el, terminal in zip(datas, terminals): datas2.append((el[0], el[1], el[2], terminal)) env = gym.make("Reacher-v2") graph = tf.Graph() sess = tf.Session(graph=graph) agent = awr_agent.AWRAgent(env=env, sess=sess, **configs) agent.load_model( "../output/Reacher-v2_{}_offline/model.ckpt".format(data_std)) qnn = Q(agent.get_state_size(), agent.get_action_size(), 0.001).cuda() eval_std = args.eval_std # For each, try different eval_std num_epochs = args.n_epochs FQE = FittedQEvaluation(qnn) policy = Policy(agent, eval_std) FQE.fit_Q(policy, datas2, num_epochs, agent._discount) vals0 = [] for _ in tqdm(range(100)): path = rollout_path(agent, eval_std)