コード例 #1
0
EPS = 1e-8  # numerical residual
TEST_EP = 10
# ppo-penalty
KL_TARGET = 0.01
LAM = 0.5

# ppo-clip
EPSILON = 0.2

RENDER = False
PLOT_RESULT = False
ARG_NAME = 'PPO'
METHOD = ['penalty', 'clip'][0]

# underlying controller candidates
model_1 = Actor(state_size=4, action_size=1, seed=0).to(device)
model_1.load_state_dict(torch.load("./actor5000_1.pth"))
model_1.eval()

model_2 = Actor(state_size=4, action_size=1, seed=0).to(device)
model_2.load_state_dict(torch.load("./actor4850_1.pth"))
model_2.eval()
WEIGHT = 1
# assert False


class AddBias(nn.Module):
    def __init__(self, bias):
        super(AddBias, self).__init__()
        self._bias = nn.Parameter(bias.unsqueeze(1))
コード例 #2
0
Variable = lambda *args, **kwargs: autograd.Variable(*args, **kwargs).cuda(
) if USE_CUDA else autograd.Variable(*args, **kwargs)
batch_size = 128
gamma = 0.99
epsilon_start = 1.0
epsilon_final = 0.01
epsilon_decay = 3000
replay_buffer = ReplayBuffer(int(5e3))
epsilon_by_frame = lambda frame_idx: epsilon_final + (
    epsilon_start - epsilon_final) * math.exp(-1. * frame_idx / epsilon_decay)

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

model_1 = Actor(state_size=2,
                action_size=1,
                seed=0,
                fc1_units=25,
                fc2_units=None).to(device)
model_1.load_state_dict(torch.load("./models/actor_2800.pth"))
model_1.eval()

model_2 = Actor(state_size=2, action_size=1, seed=0, fc1_units=25).to(device)
if EXP1:
    model_2.load_state_dict(torch.load("./0731actors/actor_2400.pth"))
else:
    model_2.load_state_dict(torch.load("./0801actors/actor_1400.pth"))
model_2.eval()

Individual = Individualtanh(state_size=2, action_size=1, seed=0).to(device)

agent = Agent(state_size=2,
コード例 #3
0
    num_warmup = 1000
    num_train = 200000
    num_eval = 0
    buffer_length = 600000

    # env = NormalizedEnv(gym.make('Pendulum-v0'))
    GODOT_BIN_PATH = "InvPendulum/InvPendulum.x86_64"
    env_abs_path = "InvPendulum/InvPendulum.pck"
    env = NormalizedEnv(
        InvPendulumEnv(exec_path=GODOT_BIN_PATH,
                       env_path=env_abs_path,
                       render=True))

    num_states = env.observation_space.shape[0]
    num_actions = env.action_space.shape[0]
    policy = Actor(num_states, num_actions)
    policy.load_state_dict(torch.load('./policy.pkl'))

    state = env.reset()
    state = state.to(dtype=torch.float32)

    traced_policy = torch.jit.trace(policy, state)
    print(traced_policy.graph)
    print(traced_policy.code)
    traced_policy.save('ddpg_policy.jit')

    for step in range(1000):

        action = policy(state)
        #			torch.tensor([1.0 for i in range(num_actions)])).sample().to(device='cuda')
        time.sleep(0.02)