Ejemplo n.º 1
0
def run_evaluation(config, path, episodes=50):
    env = config["env"]
    agent = NECAgent(config)
    agent.nec_net.load_state_dict(torch.load(path))
    env.eval()
    agent.eval()

    rewards = []

    for ep in range(1, episodes + 1):
        obs, reward_sum = env.reset(), 0

        while True:
            env.render(mode='rgb-array')
            obs = torch.from_numpy(np.float32(obs))
            action = agent.step(obs)
            next_obs, reward, done, info = env.step(action)
            reward_sum += reward
            obs = next_obs

            if done:
                if config['env_name'].startswith('CartPole'):
                    reward_sum -= reward

                rewards.append(reward_sum)
                break

    plt.plot(range(1, episodes + 1), rewards)
    plt.savefig(f"eval_{config['exp_name']}.png")
Ejemplo n.º 2
0
def main(env_id, embedding_size):
    env = wrap_deepmind(make_atari(env_id), scale=True)
    embedding_model = DQN(embedding_size)
    agent = NECAgent(env, embedding_model)

    # subprocess.Popen(["tensorboard", "--logdir", "runs"])
    configure("runs/pong-run")

    for t in count():
        if t == 0:
            reward = agent.warmup()
        else:
            reward = agent.episode()
        print("Episode {}\nTotal Reward: {}".format(t, reward))
        log_value('score', reward, t)
Ejemplo n.º 3
0
def run_training(config, return_agent=False):
    env = config["env"]
    env.train()
    agent = NECAgent(config)

    done = True
    epsilon = 1
    for t in tqdm(range(1, config["max_steps"] + 1)):
        if done:
            obs, done = env.reset(), False
            agent.new_episode()

        if config["epsilon_anneal_start"] < t <= config["epsilon_anneal_end"]:
            epsilon -= (config["initial_epsilon"] - config["final_epsilon"]
                        ) / (config["epsilon_anneal_end"] -
                             config["epsilon_anneal_start"])
            agent.set_epsilon(epsilon)

        # env.render()
        if type(obs) is np.ndarray:
            obs = torch.from_numpy(np.float32(obs))
        action = agent.step(obs.to(config['device']))
        next_obs, reward, done, info = env.step(action)
        solved = agent.update((reward, done))

        if solved:
            return

        obs = next_obs

        if t >= config["start_learning_step"]:
            if t % config["replay_frequency"] == 0:
                agent.optimize()

            if t % config["eval_frequency"] == 0:
                # agent.eval()
                # # evaluate agent here #
                path = f'{os.getcwd()}/pong/trained_agents/nec_{agent.exp_name}_{t // config["eval_frequency"]}.pth'
                torch.save(agent.nec_net.state_dict(), path)
                # agent.train()

    if return_agent:
        return agent
Ejemplo n.º 4
0
def main():
    env = PongWrapper(gym.make('Pong-v0'))
    embedding_model = AtariDQN(5)
    agent = NECAgent(env, embedding_model)
    agent.train()
Ejemplo n.º 5
0
def main():
    env = PongWrapper(gym.make('CartPole-v0'))
    embedding_model = CartPoleDQN(5)
    agent = NECAgent(env, embedding_model)
    agent.train()
Ejemplo n.º 6
0
def main():
    env = CartPoleWrapper(gym.make('CartPole-v1'))
    embedding_model = DQN(5)
    agent = NECAgent(env, embedding_model, test_period=25)
    agent.train()
Ejemplo n.º 7
0
            if df.loc[self.cur, 'EMPI'] == df.loc[self.cur+1, 'EMPI']:
                next_state = self.df.loc[self.cur + 1, feature_fields].values
                reward=reward+intermediate_reward(self.cur_state, next_state)
                done= 0
                self.cur+=1
            else:
                # trajectory is finished
                next_state = np.zeros(len(self.cur_state))
                done = 1
        self.cur_state=next_state
        
        return next_state,reward,done,action

env=Pseudo_env(df)
embedding_model = Embed(len(feature_fields),32)
agent = NECAgent(env, embedding_model,batch_size=32,sgd_lr=1e-5)

for t in count():
    if t < 100:
        reward = agent.warmup()
    else:
        reward = agent.episode()
    print("Episode {}\nTotal Reward: {}".format(t, reward))


test_df = pd.read_csv('HFpEF data/aim3data_test_set.csv')
a = test_df.copy()
num = np.size(a,0)
patient_num = np.size(pd.unique(a['EMPI']))

from torch import Tensor