def build_a2c_net_from_file(cfg, use_grad_cam=True): net_file = cfg.A2C_FN net = common.getNet(cfg) net.load_state_dict( torch.load(net_file, map_location=lambda storage, loc: storage)) net.eval() net.grad_cam = use_grad_cam return net
def build_a2c_nets_from_files(cfg, use_grad_cam=True): net_files = [ cfg.I2A_RP_FN1, cfg.I2A_RP_FN2, cfg.I2A_RP_FN3, cfg.I2A_RP_FN4 ] nets = [] for file in range(4): net = common.getNet(cfg) net.load_state_dict( torch.load(net_files[file], map_location=lambda storage, loc: storage)) net.eval() net.grad_cam = use_grad_cam nets.append(net) return nets
saves_path = writer.logdir envs = [ common.makeCustomizedGridEnv(config) for _ in range(config.NUM_ENVS) ] test_env = common.makeCustomizedGridEnv(config) #sets seed on torch operations and on all environments common.set_seed(config.SEED, envs=envs) common.set_seed(config.SEED, envs=[test_env]) obs_shape = envs[0].observation_space.shape act_n = envs[0].action_space.n # net_policy = common.AtariA2C(obs_shape, act_n).to(device) net_policy = common.getNet(config) config.A2CNET = str(net_policy) net_em = models.environment_model.EnvironmentModel(obs_shape, act_n, config) # net_em.load_state_dict(torch.load(config.EM_FILE_NAME, map_location=lambda storage, loc: storage)) net_em = net_em.to(device) config.EM_NET = str(net_em) net_i2a = i2a_model_no_LSTM.I2A_FC(obs_shape, act_n, net_em, net_policy, config).to(device) config.I2A_NET = str(net_i2a) config.ROLLOUT_ENCODER = str(net_i2a.encoder) # net_i2a.load_state_dict(torch.load("saves/03_i2a_test/best_pong_-018.667_1300.dat", map_location=lambda storage, loc: storage)) # print(net_policy) # print(net_em)
help="learning rate") fig, _ = plt.subplots() config = ExperimentCfg() config.make_test_env_config(parser) device = torch.device(config.DEVICE) env = common.makeCustomizedGridEnv(config) device = torch.device("cuda") config.DEVICE = 'cuda' obs_shape = env.observation_space.shape act_n = env.action_space.n net = common.getNet(config) net.load_state_dict( torch.load(config.A2C_FN, map_location=lambda storage, loc: storage)) agent = ptan.agent.PolicyAgent( lambda x: net(x)[0], action_selector=ptan.actions.ProbabilityActionSelector(), apply_softmax=True, device=device) state = env.reset() total_rw = 0 total_steps = 0 episodes = config.EPISODES for i in tqdm(range(episodes)):
"--INPUT", default=False, required=False, help="") config = ExperimentCfg() config.make_replay_config(parser) device = torch.device(config.DEVICE) env = common.makeCustomizedGridEnv(config) device = torch.device("cuda") print(config.REPLACEMENT) obs_shape = env.observation_space.shape act_n = env.action_space.n #load the a2c policy used for the EM training net = common.getNet(device, config) if (config.A2C_FN != ""): net.load_state_dict( torch.load(config.A2C_FN, map_location=lambda storage, loc: storage)) agent = ptan.agent.PolicyAgent( lambda x: net(x)[0], action_selector=ptan.actions.ProbabilityActionSelector(), apply_softmax=True, device=device) net_em = environment_model.EnvironmentModel(obs_shape, act_n, config) net_em.load_state_dict( torch.load(config.EM_FN, map_location=lambda storage, loc: storage)) net_em = net_em.to(device)
negativeReward=-5, positiveReward=1) return env config = ExperimentCfg() config.FRAME_SIZE = 14 device = torch.device(config.DEVICE) env = makeCustomizedGridEnv() device = torch.device('cpu') obs_shape = env.observation_space.shape act_n = env.action_space.n net_a2c = common.getNet(config) net_a2c.load_state_dict( torch.load( "//home/valy/OneDrive/repos/I2A-all/master/no-repl/5_14/Jan31_15-57-28_valy_a2c_14_5_0.0008_0.0001_False/best_0004.000_6000.dat", map_location=lambda storage, loc: storage)) net_a2c.eval() net_em = environment_model.EnvironmentModel(obs_shape, act_n, config) net_em.load_state_dict( torch.load( "/home/valy/OneDrive/repos/I2A-all/master/no-repl/5_14/Jan31_16-40-11_valy_em_14_5_0.0008_0.0001_False/best_9.1968e-08_223449.dat", map_location=lambda storage, loc: storage)) net_em = net_em.to(device) net_em.eval() net_distilled_policy = common.getNet(config)
parser.add_argument("-e", "--EPISODES", default=5000, type=int,required=False, help="") parser.add_argument("-p", "--PLOT", default=False, required=False, help="") parser.add_argument("-in", "--INPUT", default=False, required=False, help="") parser.add_argument("-lr", required=True, type=float, help="learning rate") config = ExperimentCfg() config.make_i2a_replay_config(parser) device = torch.device(config.DEVICE) env = common.makeCustomizedGridEnv(config) device = torch.device("cuda") obs_shape = env.observation_space.shape act_n = env.action_space.n net_a2c = common.getNet(config) net_a2c.load_state_dict(torch.load(config.A2C_FN, map_location=lambda storage, loc: storage)) net_a2c.eval() net_a2c.to(device) # if(config.IS_I2A): net_em = environment_model.EnvironmentModel(obs_shape, act_n, config) net_em.load_state_dict(torch.load(config.EM_FN, map_location=lambda storage, loc: storage)) net_em = net_em.to(device) net_em.eval() net_i2a = i2a_model.I2A(obs_shape, act_n, net_em, net_a2c, config).to(device) net_i2a.load_state_dict(torch.load(config.I2A_FN, map_location=lambda storage, loc: storage)) net_i2a.eval() # net = net_i2a