actorTargetNet = deepcopy(actorNet) criticNet = Critic(netParameter['n_feature'] + N_A, netParameter['n_hidden']) criticTargetNet = deepcopy(criticNet) actorOptimizer = optim.Adam(actorNet.parameters(), lr=config['actorLearningRate']) criticOptimizer = optim.Adam(criticNet.parameters(), lr=config['criticLearningRate']) actorNets = {'actor': actorNet, 'target': actorTargetNet} criticNets = {'critic': criticNet, 'target': criticTargetNet} optimizers = {'actor': actorOptimizer, 'critic': criticOptimizer} agent = DDPGAgent(config, actorNets, criticNets, env, optimizers, torch.nn.MSELoss(reduction='mean'), N_A) plotPolicyFlag = True N = 100 if plotPolicyFlag: phi = 0.0 xSet = np.linspace(-10, 10, N) ySet = np.linspace(-10, 10, N) policy = np.zeros((N, N)) value = np.zeros((N, N)) for i, x in enumerate(xSet): for j, y in enumerate(ySet): # x, y is the target position, (0, 0, 0) is the particle configuration distance = np.array([x, y])
criticTargetNet = deepcopy(criticNet) actorOptimizer = optim.Adam(actorNet.parameters(), lr=config['actorLearningRate']) criticOptimizer = optim.Adam(criticNet.parameters(), lr=config['criticLearningRate']) actorNets = {'actor': actorNet, 'target': actorTargetNet} criticNets = {'critic': criticNet, 'target': criticTargetNet} optimizers = {'actor': actorOptimizer, 'critic': criticOptimizer} agent = DDPGAgent(config, actorNets, criticNets, env, optimizers, torch.nn.MSELoss(reduction='mean'), N_A, stateProcessor=stateProcessor, experienceProcessor=experienceProcessor) checkpoint = torch.load('../../Log/Epoch15000_checkpoint.pt') agent.actorNet.load_state_dict(checkpoint['actorNet_state_dict']) config['randomMoveFlag'] = True config['dynamicInitialStateFlag'] = False config['dynamicTargetFlag'] = False config['currentState'] = [-20, -20, 1, 1, 0, 0] config['currentState'] = [0, 0, 1, 1, 0, 0]
criticTargetNet = deepcopy(criticNet) actorOptimizer = optim.Adam(actorNet.parameters(), lr=config['actorLearningRate']) criticOptimizer = optim.Adam(criticNet.parameters(), lr=config['criticLearningRate']) actorNets = {'actor': actorNet, 'target': actorTargetNet} criticNets = {'critic': criticNet, 'target': criticTargetNet} optimizers = {'actor': actorOptimizer, 'critic': criticOptimizer} agent = DDPGAgent(config, actorNets, criticNets, env, optimizers, torch.nn.MSELoss(reduction='mean'), N_A, stateProcessor=stateProcessor, experienceProcessor=experienceProcessor) if config['loadExistingModel']: checkpoint = torch.load(config['loadExistingModelCheckPoint']) agent.actorNet.load_state_dict(checkpoint['actorNet_state_dict']) agent.actorNet_target.load_state_dict(checkpoint['actorNet_state_dict']) agent.criticNet.load_state_dict(checkpoint['criticNet_state_dict']) agent.criticNet_target.load_state_dict(checkpoint['criticNet_state_dict']) plotPolicyFlag = False N = 100 if plotPolicyFlag:
netParameter['n_output']) actorTargetNet = deepcopy(actorNet) criticNet = Critic(netParameter['n_feature'] + N_A, netParameter['n_hidden']) criticTargetNet = deepcopy(criticNet) actorOptimizer = optim.Adam(actorNet.parameters(), lr=config['actorLearningRate']) criticOptimizer = optim.Adam(criticNet.parameters(), lr=config['criticLearningRate']) actorNets = {'actor': actorNet, 'target': actorTargetNet} criticNets = {'critic': criticNet, 'target': criticTargetNet} optimizers = {'actor': actorOptimizer, 'critic':criticOptimizer} agent = DDPGAgent(config, actorNets, criticNets, env, optimizers, torch.nn.MSELoss(reduction='mean'), N_A) checkpoint = torch.load('Log/Finalepoch20000_checkpoint.pt') agent.actorNet.load_state_dict(checkpoint['actorNet_state_dict']) plotPolicyFlag = True N = 100 if plotPolicyFlag: phi = 0.0 xSet = np.linspace(-10,10,N) ySet = np.linspace(-10,10,N) policyX = np.zeros((N, N))
actorTargetNet = deepcopy(actorNet) criticNet = CriticConvNet(netParameter['n_feature'] , netParameter['n_hidden'], netParameter['n_output']) criticTargetNet = deepcopy(criticNet) actorOptimizer = optim.Adam(actorNet.parameters(), lr=config['actorLearningRate']) criticOptimizer = optim.Adam(criticNet.parameters(), lr=config['criticLearningRate']) actorNets = {'actor': actorNet, 'target': actorTargetNet} criticNets = {'critic': criticNet, 'target': criticTargetNet} optimizers = {'actor': actorOptimizer, 'critic':criticOptimizer} agent = DDPGAgent(config, actorNets, criticNets, env, optimizers, torch.nn.MSELoss(reduction='mean'), N_A, stateProcessor=stateProcessor) checkpoint = torch.load('Log/Finalepoch2500_checkpoint.pt') agent.actorNet.load_state_dict(checkpoint['actorNet_state_dict']) config['dynamicInitialStateFlag'] = False config['dynamicTargetFlag'] = False config['currentState'] = [15, 15] config['targetState'] = [10, 15] config['filetag'] = 'test' config['trajOutputInterval'] = 10 config['trajOutputFlag'] = True config['customExploreFlag'] = False with open('config_test.json', 'w') as f:
criticTargetNet = deepcopy(criticNet) actorOptimizer = optim.Adam(actorNet.parameters(), lr=config['actorLearningRate']) criticOptimizer = optim.Adam(criticNet.parameters(), lr=config['criticLearningRate']) actorNets = {'actor': actorNet, 'target': actorTargetNet} criticNets = {'critic': criticNet, 'target': criticTargetNet} optimizers = {'actor': actorOptimizer, 'critic': criticOptimizer} agent = DDPGAgent(config, actorNets, criticNets, env, optimizers, torch.nn.MSELoss(reduction='mean'), N_A, stateProcessor=stateProcessor, experienceProcessor=experienceProcessor) plotPolicyFlag = False N = 100 if plotPolicyFlag: phi = 0.0 xSet = np.linspace(-10, 10, N) ySet = np.linspace(-10, 10, N) policy = np.zeros((N, N)) value = np.zeros((N, N))
actorTargetNet = deepcopy(actorNet) criticNet = Critic(netParameter['n_feature'] + N_A, netParameter['n_hidden']) criticTargetNet = deepcopy(criticNet) actorOptimizer = optim.Adam(actorNet.parameters(), lr=config['actorLearningRate']) criticOptimizer = optim.Adam(criticNet.parameters(), lr=config['criticLearningRate']) actorNets = {'actor': actorNet, 'target': actorTargetNet} criticNets = {'critic': criticNet, 'target': criticTargetNet} optimizers = {'actor': actorOptimizer, 'critic': criticOptimizer} agent = DDPGAgent(config, actorNets, criticNets, env, optimizers, torch.nn.MSELoss(reduction='mean'), N_A) plotPolicyFlag = False N = 100 if plotPolicyFlag: phi = 0.0 xSet = np.linspace(-10, 10, N) ySet = np.linspace(-10, 10, N) policy = np.zeros((N, N)) value = np.zeros((N, N)) for i, x in enumerate(xSet): for j, y in enumerate(ySet): # x, y is the target position, (0, 0, 0) is the particle configuration distance = np.array([x, y])
actorTargetNet = deepcopy(actorNet) criticNet = CriticConvNet(netParameter['n_feature'] , netParameter['n_hidden'], netParameter['n_output'], config['n_channels']) criticTargetNet = deepcopy(criticNet) actorOptimizer = optim.Adam(actorNet.parameters(), lr=config['actorLearningRate']) criticOptimizer = optim.Adam(criticNet.parameters(), lr=config['criticLearningRate']) actorNets = {'actor': actorNet, 'target': actorTargetNet} criticNets = {'critic': criticNet, 'target': criticTargetNet} optimizers = {'actor': actorOptimizer, 'critic':criticOptimizer} agent = DDPGAgent(config, actorNets, criticNets, env, optimizers, torch.nn.MSELoss(reduction='mean'), N_A, stateProcessor=stateProcessor) if config['loadCheckpointFlag']: agent.load_checkpoint(config['loadCheckpointPrefix']) plotPolicyFlag = False N = 100 if plotPolicyFlag: phi = 0.0 xSet = np.linspace(-10,10,N) ySet = np.linspace(-10,10,N) policy = np.zeros((N, N))
actorTargetNet = deepcopy(actorNet) criticNet = CriticConvNet(netParameter['n_feature'] , netParameter['n_hidden'], netParameter['n_output']) criticTargetNet = deepcopy(criticNet) actorOptimizer = optim.Adam(actorNet.parameters(), lr=config['actorLearningRate']) criticOptimizer = optim.Adam(criticNet.parameters(), lr=config['criticLearningRate']) actorNets = {'actor': actorNet, 'target': actorTargetNet} criticNets = {'critic': criticNet, 'target': criticTargetNet} optimizers = {'actor': actorOptimizer, 'critic':criticOptimizer} agent = DDPGAgent(config, actorNets, criticNets, env, optimizers, torch.nn.MSELoss(reduction='mean'), N_A, stateProcessor=stateProcessor, experienceProcessor=experienceProcessor) if config['loadExistingModel']: checkpoint = torch.load(config['loadExistingModelCheckPoint']) agent.actorNet.load_state_dict(checkpoint['actorNet_state_dict']) agent.actorNet_target.load_state_dict(checkpoint['actorNet_state_dict']) agent.criticNet.load_state_dict(checkpoint['criticNet_state_dict']) agent.criticNet_target.load_state_dict(checkpoint['criticNet_state_dict']) if config['loadCheckpointFlag']: agent.load_checkpoint(config['loadCheckpointPrefix']) agent.actor_optimizer = optimizers['actor'] agent.critic_optimizer = optimizers['critic']