dy = distance[0] * math.sin( phi) - distance[1] * math.cos(phi) if math.sqrt(dx**2 + dy**2) > agent.env.agent.targetClipLength: angle = math.atan2(dy, dx) dx = agent.env.agent.targetClipLength * math.cos( angle) dy = agent.env.agent.targetClipLength * math.sin( angle) #dx = agent.env.agent.targetClipMap(dx) if dx > 0 else -agent.env.agent.targetClipMap(-dx) #dy = agent.env.agent.targetClipMap(dy) if dy > 0 else -agent.env.agent.targetClipMap(-dy) state = { 'sensor': sensorInfo, 'target': np.array([dx, dy]) } policy[i, j] = agent.getPolicy(state) Qvalue = agent.policyNet( stateProcessor([state], config['device'])[0]) value[i, j] = Qvalue[0, policy[i, j]].cpu().item() np.savetxt('DynamicMazePolicyBeforeTrain' + config['mapName'] + 'phiIdx' + str(phiIdx) + '.txt', policy, fmt='%d', delimiter='\t') np.savetxt('DynamicMazeValueBeforeTrain' + config['mapName'] + 'phiIdx' + str(phiIdx) + '.txt', value, fmt='%.3f', delimiter='\t') # plotPolicy(policy, N_A)
policyNet = MultiLayerNetRegression(netParameter['n_feature'], netParameter['n_hidden'], netParameter['n_output']) targetNet = deepcopy(policyNet) optimizer = optim.Adam(policyNet.parameters(), lr=config['learningRate']) agent = DQNAgent(config, policyNet, targetNet, env, optimizer, torch.nn.MSELoss(reduction='none'), N_A) xSet = np.linspace(-1, 1, 100) policy = np.zeros_like(xSet) for i, x in enumerate(xSet): policy[i] = agent.getPolicy(np.array([x])) np.savetxt('StabilizerPolicyBeforeTrain.txt', policy, fmt='%d') #agent.perform_random_exploration(10) agent.train() #storeMemory = ReplayMemory(100000) agent.testPolicyNet(100) #storeMemory.write_to_text('testPolicyMemory.txt') def customPolicy(state): x = state[0] # move towards negative if x > 0.1: action = 2
agent = DQNAgent(policyNet, targetNet, env, optimizer, torch.nn.MSELoss(), N_S, N_A, config=config) policy = deepcopy(env.map) for i in range(policy.shape[0]): for j in range(policy.shape[1]): if env.map[i, j] == 0: policy[i, j] = -1 else: policy[i, j] = agent.getPolicy(np.array([i, j])) np.savetxt('DoubleQSimpleMazePolicyBeforeTrain' + mapName + '.txt', policy, fmt='%d', delimiter='\t') plotPolicy(policy, N_A) agent.train() policy = deepcopy(env.map) for i in range(policy.shape[0]): for j in range(policy.shape[1]): if env.map[i, j] == 0: policy[i, j] = -1
agent = DQNAgent(policyNet, targetNet, env, optimizer, torch.nn.MSELoss(), N_A, config=config) policy = deepcopy(env.mapMat) for i in range(policy.shape[0]): for j in range(policy.shape[1]): if env.mapMat[i, j] == 1: policy[i, j] = -1 else: sensorInfo = env.agent.getSensorInfoFromPos(np.array([i, j])) policy[i, j] = agent.getPolicy(sensorInfo) np.savetxt('DynamicMazePolicyBeforeTrain' + mapName + '.txt', policy, fmt='%d', delimiter='\t') # # plotPolicy(policy, N_A) agent.train() for i in range(policy.shape[0]): for j in range(policy.shape[1]): if env.mapMat[i, j] == 1: policy[i, j] = -1 else: