optimizer = optim.Adam(policyNet.parameters(), lr=config['learningRate'])

agent = DQNAgent(config, policyNet, targetNet, env, optimizer,
                 torch.nn.MSELoss(reduction='none'), N_A)

xSet = np.linspace(-1, 1, 100)
policy = np.zeros_like(xSet)
for i, x in enumerate(xSet):
    policy[i] = agent.getPolicy(np.array([x]))

np.savetxt('StabilizerPolicyBeforeTrain.txt', policy, fmt='%d')

#agent.perform_random_exploration(10)
agent.train()
#storeMemory = ReplayMemory(100000)
agent.testPolicyNet(100)
#storeMemory.write_to_text('testPolicyMemory.txt')


def customPolicy(state):
    x = state[0]
    # move towards negative
    if x > 0.1:
        action = 2
    # move towards positive
    elif x < -0.1:
        action = 1
    # do not move
    else:
        action = 0
    return action
Ejemplo n.º 2
0
                       'phiIdx' + str(phiIdx) + '.txt',
                       policy,
                       fmt='%d',
                       delimiter='\t')
            np.savetxt('DynamicMazeValueAfterTrain' + config['mapName'] +
                       'phiIdx' + str(phiIdx) + '.txt',
                       value,
                       fmt='%.3f',
                       delimiter='\t')

    torch.save(
        {
            'model_state_dict': agent.policyNet.state_dict(),
            'optimizer_state_dict': agent.optimizer.state_dict(),
        }, config['saveModelFile'])

if testFlag:
    config['loadExistingModel'] = True

    if config['loadExistingModel']:
        checkpoint = torch.load(config['saveModelFile'])
        agent.policyNet.load_state_dict(checkpoint['model_state_dict'])
        agent.optimizer.load_state_dict(checkpoint['optimizer_state_dict'])

    recorder = TrajRecorder()
    agent.env.agent.config['stochMoveFlag'] = True
    agent.testPolicyNet(100, recorder)
    recorder.write_to_file(config['mapName'] + 'TestTraj.txt')

#plotPolicy(policy, N_A)
                 optimizer,
                 torch.nn.MSELoss(reduction='none'),
                 N_A,
                 config=config)

xSet = np.linspace(-1, 1, 100)
policy = np.zeros_like(xSet)
for i, x in enumerate(xSet):
    policy[i] = agent.getPolicy(np.array([x]))

np.savetxt('StabilizerPolicyBeforeTrain.txt', policy, fmt='%d')

#agent.perform_random_exploration(10)
agent.train()
storeMemory = ReplayMemory(100000)
agent.testPolicyNet(100, storeMemory)
storeMemory.write_to_text('testPolicyMemory.txt')


def customPolicy(state):
    x = state[0]
    # move towards negative
    if x > 0.1:
        action = 2
    # move towards positive
    elif x < -0.1:
        action = 1
    # do not move
    else:
        action = 0
    return action