optimizer = optim.Adam(policyNet.parameters(), lr=config['learningRate']) agent = DQNAgent(config, policyNet, targetNet, env, optimizer, torch.nn.MSELoss(reduction='none'), N_A) xSet = np.linspace(-1, 1, 100) policy = np.zeros_like(xSet) for i, x in enumerate(xSet): policy[i] = agent.getPolicy(np.array([x])) np.savetxt('StabilizerPolicyBeforeTrain.txt', policy, fmt='%d') #agent.perform_random_exploration(10) agent.train() #storeMemory = ReplayMemory(100000) agent.testPolicyNet(100) #storeMemory.write_to_text('testPolicyMemory.txt') def customPolicy(state): x = state[0] # move towards negative if x > 0.1: action = 2 # move towards positive elif x < -0.1: action = 1 # do not move else: action = 0 return action
'phiIdx' + str(phiIdx) + '.txt', policy, fmt='%d', delimiter='\t') np.savetxt('DynamicMazeValueAfterTrain' + config['mapName'] + 'phiIdx' + str(phiIdx) + '.txt', value, fmt='%.3f', delimiter='\t') torch.save( { 'model_state_dict': agent.policyNet.state_dict(), 'optimizer_state_dict': agent.optimizer.state_dict(), }, config['saveModelFile']) if testFlag: config['loadExistingModel'] = True if config['loadExistingModel']: checkpoint = torch.load(config['saveModelFile']) agent.policyNet.load_state_dict(checkpoint['model_state_dict']) agent.optimizer.load_state_dict(checkpoint['optimizer_state_dict']) recorder = TrajRecorder() agent.env.agent.config['stochMoveFlag'] = True agent.testPolicyNet(100, recorder) recorder.write_to_file(config['mapName'] + 'TestTraj.txt') #plotPolicy(policy, N_A)
optimizer, torch.nn.MSELoss(reduction='none'), N_A, config=config) xSet = np.linspace(-1, 1, 100) policy = np.zeros_like(xSet) for i, x in enumerate(xSet): policy[i] = agent.getPolicy(np.array([x])) np.savetxt('StabilizerPolicyBeforeTrain.txt', policy, fmt='%d') #agent.perform_random_exploration(10) agent.train() storeMemory = ReplayMemory(100000) agent.testPolicyNet(100, storeMemory) storeMemory.write_to_text('testPolicyMemory.txt') def customPolicy(state): x = state[0] # move towards negative if x > 0.1: action = 2 # move towards positive elif x < -0.1: action = 1 # do not move else: action = 0 return action