Ejemplo n.º 1
0
def main(env_id, embedding_size):
    env = wrap_deepmind(make_atari(env_id), scale=True)
    embedding_model = DQN(embedding_size)
    agent = NECAgent(env, embedding_model)

    # subprocess.Popen(["tensorboard", "--logdir", "runs"])
    configure("runs/pong-run")

    for t in count():
        if t == 0:
            reward = agent.warmup()
        else:
            reward = agent.episode()
        print("Episode {}\nTotal Reward: {}".format(t, reward))
        log_value('score', reward, t)
Ejemplo n.º 2
0
                # trajectory is finished
                next_state = np.zeros(len(self.cur_state))
                done = 1
        self.cur_state=next_state
        
        return next_state,reward,done,action

env=Pseudo_env(df)
embedding_model = Embed(len(feature_fields),32)
agent = NECAgent(env, embedding_model,batch_size=32,sgd_lr=1e-5)

for t in count():
    if t < 100:
        reward = agent.warmup()
    else:
        reward = agent.episode()
    print("Episode {}\nTotal Reward: {}".format(t, reward))


test_df = pd.read_csv('HFpEF data/aim3data_test_set.csv')
a = test_df.copy()
num = np.size(a,0)
patient_num = np.size(pd.unique(a['EMPI']))

from torch import Tensor
from torch.autograd import Variable

embedding_model.eval()

import pickle
filename = 'evaluate data/aim3state_test.data'