criticOptimizerTwo = optim.Adam(criticNetTwo.parameters(),
                                    lr=config['criticLearningRate'])

    actorNets = {'actor': actorNet, 'target': actorTargetNet}
    criticNets = {
        'criticOne': criticNet,
        'criticTwo': criticNetTwo,
        'targetOne': criticTargetNet,
        'targetTwo': criticTargetNetTwo
    }
    optimizers = {
        'actor': actorOptimizer,
        'criticOne': criticOptimizer,
        'criticTwo': criticOptimizerTwo
    }
    agent = TD3MultiStageUnit(config, actorNets, criticNets, env, optimizers,
                              torch.nn.MSELoss(reduction='mean'), N_A)
    agents.append(agent)

controller = MultiStageStackedController(config, agents, env)

loadCheckPoint = False
if loadCheckPoint:
    checkpoint = torch.load('Log/Stage1Finalepoch8000_checkpoint.pt')
    controller.agents[1].actorNet.load_state_dict(
        checkpoint['actorNet_state_dict'])
    controller.agents[1].actorNet_target.load_state_dict(
        checkpoint['actorNet_state_dict'])
    controller.agents[1].criticNetOne.load_state_dict(
        checkpoint['criticNetOne_state_dict'])
    controller.agents[1].criticNet_targetOne.load_state_dict(
        checkpoint['criticNetOne_state_dict'])
actorNets = {'actor': actorNet, 'target': actorTargetNet}
criticNets = {
    'criticOne': criticNet,
    'criticTwo': criticNetTwo,
    'targetOne': criticTargetNet,
    'targetTwo': criticTargetNetTwo
}
optimizers = {
    'actor': actorOptimizer,
    'criticOne': criticOptimizer,
    'criticTwo': criticOptimizerTwo
}

agent = TD3MultiStageUnit(config, actorNets, criticNets, env, optimizers,
                          torch.nn.MSELoss(reduction='mean'),
                          netParameter['n_output'])

agents.append(agent)

netParameter['n_output'] = N_A[1]
netParameter['n_hidden'] = [128]

policyNet = MultiLayerNetRegression(netParameter['n_feature'],
                                    netParameter['n_hidden'],
                                    netParameter['n_output'])

targetNet = MultiLayerNetRegression(netParameter['n_feature'],
                                    netParameter['n_hidden'],
                                    netParameter['n_output'])
optimizer = optim.Adam(policyNet.parameters(), lr=config['learningRate'])