Ejemplo n.º 1
0
criticTargetNet = deepcopy(criticNet)

actorOptimizer = optim.Adam(actorNet.parameters(),
                            lr=config['actorLearningRate'])
criticOptimizer = optim.Adam(criticNet.parameters(),
                             lr=config['criticLearningRate'])

actorNets = {'actor': actorNet, 'target': actorTargetNet}
criticNets = {'critic': criticNet, 'target': criticTargetNet}
optimizers = {'actor': actorOptimizer, 'critic': criticOptimizer}
agent = DDPGAgent(config,
                  actorNets,
                  criticNets,
                  env,
                  optimizers,
                  torch.nn.MSELoss(reduction='mean'),
                  N_A,
                  stateProcessor=stateProcessor,
                  experienceProcessor=experienceProcessor)

checkpoint = torch.load('../../Log/Epoch15000_checkpoint.pt')

agent.actorNet.load_state_dict(checkpoint['actorNet_state_dict'])

config['randomMoveFlag'] = True
config['dynamicInitialStateFlag'] = False
config['dynamicTargetFlag'] = False
config['currentState'] = [-20, -20, 1, 1, 0, 0]
config['currentState'] = [0, 0, 1, 1, 0, 0]
config['targetState'] = [15, 15, 25]
Ejemplo n.º 2
0
actorTargetNet = deepcopy(actorNet)

criticNet = Critic(netParameter['n_feature'] + N_A, netParameter['n_hidden'])

criticTargetNet = deepcopy(criticNet)

actorOptimizer = optim.Adam(actorNet.parameters(),
                            lr=config['actorLearningRate'])
criticOptimizer = optim.Adam(criticNet.parameters(),
                             lr=config['criticLearningRate'])

actorNets = {'actor': actorNet, 'target': actorTargetNet}
criticNets = {'critic': criticNet, 'target': criticTargetNet}
optimizers = {'actor': actorOptimizer, 'critic': criticOptimizer}
agent = DDPGAgent(config, actorNets, criticNets, env, optimizers,
                  torch.nn.MSELoss(reduction='mean'), N_A)

plotPolicyFlag = False
N = 100
if plotPolicyFlag:
    phi = 0.0

    xSet = np.linspace(-10, 10, N)
    ySet = np.linspace(-10, 10, N)
    policy = np.zeros((N, N))

    value = np.zeros((N, N))
    for i, x in enumerate(xSet):
        for j, y in enumerate(ySet):
            # x, y is the target position, (0, 0, 0) is the particle configuration
            distance = np.array([x, y])