Esempio n. 1
0
def build_a2c_net_from_file(cfg, use_grad_cam=True):
    net_file = cfg.A2C_FN
    net = common.getNet(cfg)
    net.load_state_dict(
        torch.load(net_file, map_location=lambda storage, loc: storage))
    net.eval()
    net.grad_cam = use_grad_cam

    return net
Esempio n. 2
0
def build_a2c_nets_from_files(cfg, use_grad_cam=True):
    net_files = [
        cfg.I2A_RP_FN1, cfg.I2A_RP_FN2, cfg.I2A_RP_FN3, cfg.I2A_RP_FN4
    ]
    nets = []

    for file in range(4):
        net = common.getNet(cfg)
        net.load_state_dict(
            torch.load(net_files[file],
                       map_location=lambda storage, loc: storage))
        net.eval()
        net.grad_cam = use_grad_cam
        nets.append(net)

    return nets
Esempio n. 3
0
    saves_path = writer.logdir

    envs = [
        common.makeCustomizedGridEnv(config) for _ in range(config.NUM_ENVS)
    ]
    test_env = common.makeCustomizedGridEnv(config)

    #sets seed on torch operations and on all environments
    common.set_seed(config.SEED, envs=envs)
    common.set_seed(config.SEED, envs=[test_env])

    obs_shape = envs[0].observation_space.shape
    act_n = envs[0].action_space.n

    #    net_policy = common.AtariA2C(obs_shape, act_n).to(device)
    net_policy = common.getNet(config)
    config.A2CNET = str(net_policy)

    net_em = models.environment_model.EnvironmentModel(obs_shape, act_n,
                                                       config)
    # net_em.load_state_dict(torch.load(config.EM_FILE_NAME, map_location=lambda storage, loc: storage))
    net_em = net_em.to(device)
    config.EM_NET = str(net_em)

    net_i2a = i2a_model_no_LSTM.I2A_FC(obs_shape, act_n, net_em, net_policy,
                                       config).to(device)
    config.I2A_NET = str(net_i2a)
    config.ROLLOUT_ENCODER = str(net_i2a.encoder)
    #    net_i2a.load_state_dict(torch.load("saves/03_i2a_test/best_pong_-018.667_1300.dat", map_location=lambda storage, loc: storage))
    #     print(net_policy)
    #     print(net_em)
Esempio n. 4
0
                        help="learning rate")

    fig, _ = plt.subplots()

    config = ExperimentCfg()
    config.make_test_env_config(parser)
    device = torch.device(config.DEVICE)

    env = common.makeCustomizedGridEnv(config)
    device = torch.device("cuda")
    config.DEVICE = 'cuda'

    obs_shape = env.observation_space.shape
    act_n = env.action_space.n

    net = common.getNet(config)
    net.load_state_dict(
        torch.load(config.A2C_FN, map_location=lambda storage, loc: storage))

    agent = ptan.agent.PolicyAgent(
        lambda x: net(x)[0],
        action_selector=ptan.actions.ProbabilityActionSelector(),
        apply_softmax=True,
        device=device)

    state = env.reset()

    total_rw = 0
    total_steps = 0
    episodes = config.EPISODES
    for i in tqdm(range(episodes)):
Esempio n. 5
0
                        "--INPUT",
                        default=False,
                        required=False,
                        help="")

    config = ExperimentCfg()
    config.make_replay_config(parser)
    device = torch.device(config.DEVICE)

    env = common.makeCustomizedGridEnv(config)
    device = torch.device("cuda")
    print(config.REPLACEMENT)
    obs_shape = env.observation_space.shape
    act_n = env.action_space.n
    #load the a2c policy used for the EM training
    net = common.getNet(device, config)
    if (config.A2C_FN != ""):
        net.load_state_dict(
            torch.load(config.A2C_FN,
                       map_location=lambda storage, loc: storage))

    agent = ptan.agent.PolicyAgent(
        lambda x: net(x)[0],
        action_selector=ptan.actions.ProbabilityActionSelector(),
        apply_softmax=True,
        device=device)

    net_em = environment_model.EnvironmentModel(obs_shape, act_n, config)
    net_em.load_state_dict(
        torch.load(config.EM_FN, map_location=lambda storage, loc: storage))
    net_em = net_em.to(device)
Esempio n. 6
0
                   negativeReward=-5,
                   positiveReward=1)
    return env


config = ExperimentCfg()
config.FRAME_SIZE = 14
device = torch.device(config.DEVICE)

env = makeCustomizedGridEnv()
device = torch.device('cpu')

obs_shape = env.observation_space.shape
act_n = env.action_space.n

net_a2c = common.getNet(config)
net_a2c.load_state_dict(
    torch.load(
        "//home/valy/OneDrive/repos/I2A-all/master/no-repl/5_14/Jan31_15-57-28_valy_a2c_14_5_0.0008_0.0001_False/best_0004.000_6000.dat",
        map_location=lambda storage, loc: storage))
net_a2c.eval()

net_em = environment_model.EnvironmentModel(obs_shape, act_n, config)
net_em.load_state_dict(
    torch.load(
        "/home/valy/OneDrive/repos/I2A-all/master/no-repl/5_14/Jan31_16-40-11_valy_em_14_5_0.0008_0.0001_False/best_9.1968e-08_223449.dat",
        map_location=lambda storage, loc: storage))
net_em = net_em.to(device)
net_em.eval()

net_distilled_policy = common.getNet(config)
Esempio n. 7
0
    parser.add_argument("-e", "--EPISODES", default=5000, type=int,required=False, help="")
    parser.add_argument("-p", "--PLOT", default=False, required=False, help="")
    parser.add_argument("-in", "--INPUT", default=False, required=False, help="")
    parser.add_argument("-lr", required=True, type=float, help="learning rate")
    
    config = ExperimentCfg()
    config.make_i2a_replay_config(parser)
    device = torch.device(config.DEVICE)

    env = common.makeCustomizedGridEnv(config)
    device = torch.device("cuda")

    obs_shape = env.observation_space.shape
    act_n = env.action_space.n
    
    net_a2c = common.getNet(config)
    net_a2c.load_state_dict(torch.load(config.A2C_FN, map_location=lambda storage, loc: storage))
    net_a2c.eval()
    net_a2c.to(device)

    # if(config.IS_I2A):
    net_em = environment_model.EnvironmentModel(obs_shape, act_n, config)
    net_em.load_state_dict(torch.load(config.EM_FN, map_location=lambda storage, loc: storage))
    net_em = net_em.to(device)
    net_em.eval()

    net_i2a = i2a_model.I2A(obs_shape, act_n, net_em, net_a2c, config).to(device)
    net_i2a.load_state_dict(torch.load(config.I2A_FN, map_location=lambda storage, loc: storage))
    net_i2a.eval()
    # net = net_i2a