senorList = [item['state'] for item in state if item is not None]
    nonFinalState = torch.tensor(senorList, dtype=torch.float32, device=device)
    return nonFinalState, nonFinalMask

env = CooperativeSimpleMazeTwoD(config=config)
N_S = env.stateDim
N_A = env.nbActions

netParameter = dict()
netParameter['n_feature'] = N_S
netParameter['n_hidden'] = [100]
netParameter['n_output'] = N_A


policyNet = MultiLayerNetRegression(netParameter['n_feature'],
                                    netParameter['n_hidden'],
                                    netParameter['n_output'])
targetNet = MultiLayerNetRegression(netParameter['n_feature'],
                                    netParameter['n_hidden'],
                                    netParameter['n_output'])
optimizers = optim.Adam(policyNet.parameters(), lr=config['learningRate'])

agent = DQNAgent(config, policyNet, targetNet, env, optimizers, torch.nn.MSELoss(reduction='none'), N_A, stateProcessor=stateProcessor)

agent.train()





nTraj = 100
config['logFlag'] = True
config['logFileName'] = 'SimpleMazeLog/DoubleQtraj' + mapName
config['logFrequency'] = 50
config['netUpdateOption'] = 'doubleQ'

env = SimpleMazeTwoD(mapName)
N_S = env.stateDim
N_A = env.nbActions

netParameter = dict()
netParameter['n_feature'] = N_S
netParameter['n_hidden'] = [100]
netParameter['n_output'] = N_A

policyNet = MultiLayerNetRegression(netParameter['n_feature'],
                                    netParameter['n_hidden'],
                                    netParameter['n_output'])

print(policyNet.state_dict())

targetNet = deepcopy(policyNet)

optimizer = optim.Adam(policyNet.parameters(), lr=config['learningRate'])

agent = DQNAgent(policyNet,
                 targetNet,
                 env,
                 optimizer,
                 torch.nn.MSELoss(),
                 N_S,
                 N_A,
config['device'] = 'cpu'

# Get the environment and extract the number of actions.
# env = CartPoleEnvCustom()
trainEnv = gym.make("CartPole-v0")
testEnv = gym.make("CartPole-v0")
N_S = trainEnv.observation_space.shape[0]
N_A = trainEnv.action_space.n

netParameter = dict()
netParameter['n_feature'] = N_S
netParameter['n_hidden'] = [40, 40]
netParameter['n_output'] = N_A

actorNet = MultiLayerNetLogSoftmax(netParameter['n_feature'],
                                netParameter['n_hidden'],
                                N_A)

criticNet = MultiLayerNetRegression(netParameter['n_feature'],
                                    netParameter['n_hidden'],
                                    1)

optimizer1 = optim.Adam(actorNet.parameters(), lr=config['learningRate'])
optimizer2 = optim.Adam(criticNet.parameters(), lr=config['learningRate'])

agent = ActorCriticTwoNet(actorNet, criticNet, [trainEnv, testEnv], [optimizer1, optimizer2], torch.nn.MSELoss(), N_A, config)


agent.train()

agent.test(100)
        finalState = torch.tensor(sensorList, device=device, dtype=torch.float32)

        return nonFinalState, nonFinalMask, finalState, finalMask
env = CooperativeSimpleMazeTwoD(config)
N_S = env.stateDim
N_A = env.nbActions

netParameter = dict()
netParameter['n_feature'] = N_S
netParameter['n_hidden'] = [128]
netParameter['n_output'] = N_A

nPeriods = config['numStages']

policyNets = [MultiLayerNetRegression(netParameter['n_feature'],
                                    netParameter['n_hidden'],
                                    netParameter['n_output']) for _ in range(nPeriods)]

targetNets = [MultiLayerNetRegression(netParameter['n_feature'],
                                    netParameter['n_hidden'],
                                    netParameter['n_output']) for _ in range(nPeriods)]
optimizers = [optim.Adam(net.parameters(), lr=config['learningRate']) for net in policyNets]


agent = StackedDQNAgent(config, policyNets, targetNets, env, optimizers, torch.nn.MSELoss(reduction='none'), N_A, stateProcessor=stateProcessor)


policyFlag = True

if policyFlag:
    for n in range(nPeriods):
        return nonFinalState, nonFinalMask, finalState, finalMask


env = CooperativeSimpleMazeTwoD(config)
N_S = env.stateDim
N_A = env.nbActions

netParameter = dict()
netParameter['n_feature'] = N_S
netParameter['n_hidden'] = [128]
netParameter['n_output'] = N_A

nPeriods = config['numStages']

policyNets = [
    MultiLayerNetRegression(netParameter['n_feature'],
                            netParameter['n_hidden'], netParameter['n_output'])
    for _ in range(nPeriods)
]

targetNets = [
    MultiLayerNetRegression(netParameter['n_feature'],
                            netParameter['n_hidden'], netParameter['n_output'])
    for _ in range(nPeriods)
]
optimizers = [
    optim.Adam(net.parameters(), lr=config['learningRate'])
    for net in policyNets
]

agent = StackedDQNAgent(config,
                        policyNets,
config['trainBatchSize'] = 32
config['gamma'] = 0.9
config['learningRate'] = 0.001
config['netGradClip'] = 1
config['logFlag'] = False
config['logFrequency'] = 100
config['priorityMemoryOption'] = False
config['netUpdateOption'] = 'doubleQ'
config['netUpdateFrequency'] = 1
config['priorityMemory_absErrUpper'] = 5
config['numWorkers'] = 4

env = StablizerOneD()
N_S = env.stateDim
N_A = env.nbActions

netParameter = dict()
netParameter['n_feature'] = N_S
netParameter['n_hidden'] = [4]
netParameter['n_output'] = N_A

policyNet = MultiLayerNetRegression(netParameter['n_feature'],
                                    netParameter['n_hidden'],
                                    netParameter['n_output'])

optimizer = SharedAdam(policyNet.parameters(), lr=1.0)

agent = DQNA3CMaster(config, policyNet, env, optimizer,
                     torch.nn.MSELoss(reduction='none'), N_A)

agent.test_multiProcess()
예제 #7
0
config['mapHeight'] = 6
config['numAgents'] = 2

env = TwoAgentCooperativeTransport(config)

N_S = env.stateDim
N_A = env.nbActions

numAgents = env.numAgents
netParameter = dict()
netParameter['n_feature'] = N_S
netParameter['n_hidden'] = [128]
netParameter['n_output'] = N_A

policyNets = [
    MultiLayerNetRegression(N_S[n], netParameter['n_hidden'], N_A[n])
    for n in range(numAgents)
]

targetNets = [
    MultiLayerNetRegression(N_S[n], netParameter['n_hidden'], N_A[n])
    for n in range(numAgents)
]

optimizers = [
    optim.Adam(net.parameters(), lr=config['learningRate'])
    for net in policyNets
]

agent = MADQNAgent(config, policyNets, targetNets, env, optimizers,
                   torch.nn.MSELoss(reduction='none'), N_A)