def __init__(self, alpha, beta, input_dims, tau, env, brain_name, gamma=.99,
                 n_actions=2, mem_capacity=1e6, layer1_size=400,
                 layer2_size=300, batch_size=64, multiagent=False,
                 n_agents=None, game_name='Rollerball'):

        # Initialize memory
        self.batch_size = batch_size
        self.memory = ReplayBuffer(mem_capacity)
        
        # Initialize noise
        self.noise = OUActionNoise(np.zeros(n_actions))

        # Setup device used for torch computations
        self.device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

        # Create actor critic and target networks
        self.actor = ActorNet(alpha, input_dims, layer1_size, layer2_size, n_actions, name='actor_' + game_name + '_ddpg_model').to(self.device)
        self.target_actor = ActorNet(alpha, input_dims, layer1_size, layer2_size, n_actions).to(self.device)

        self.critic = CriticNet(beta, input_dims, layer1_size, layer2_size, n_actions, name='critic_' + game_name + '_ddpg_model').to(self.device)
        self.target_critic = CriticNet(beta, input_dims, layer1_size, layer2_size, n_actions).to(self.device)
        
        # Initialize target nets to be identical to actor and critic networks
        self.init_networks()

        # Target networks set to eval, since they are not 
        # trained but simply updated with the target_network_update function
        self.target_actor.eval()
        self.target_critic.eval()

        # Set global parameters
        self.gamma = gamma
        self.env = env
        self.tau = tau
        self.state_space = input_dims
        self.action_space = n_actions
        self.multiagent = multiagent
        self.brain_name = brain_name
        if self.multiagent:
            self.n_agents = n_agents

        # Plotter object for showing live training graphs and saving them
        self.plotter = RLPlots('ddpg_training')
    def __init__(self,
                 alpha,
                 beta,
                 tau,
                 gamma,
                 state_space,
                 l1_size,
                 l2_size,
                 l3_size,
                 l4_size,
                 action_space,
                 env,
                 brain_name,
                 multibrain,
                 version,
                 mem_capacity=1e6,
                 batch_size=128,
                 multiagent=False,
                 n_agents=None,
                 eval=False):

        # Initialize memory
        self.batch_size = batch_size
        self.memory = ReplayBuffer(mem_capacity)

        # Initialize noise
        # In case of a multiagent environment, create a separate noise object for each agent
        self.noise = [OUActionNoise(np.zeros(action_space)) for i in range(n_agents)] if multiagent else \
                    OUActionNoise(np.zeros(action_space))

        # Setup device used for torch computations
        self.device = torch.device(
            'cuda:0' if torch.cuda.is_available() else 'cpu')

        # Create actor critic and target networks
        self.actor = ActorNet(alpha,
                              state_space,
                              l1_size,
                              l2_size,
                              l3_size,
                              l4_size,
                              action_space,
                              name='actor_' + version + '_ddpg_model').to(
                                  self.device)
        self.target_actor = ActorNet(alpha, state_space, l1_size, l2_size,
                                     l3_size, l4_size,
                                     action_space).to(self.device)

        self.critic = CriticNet(beta,
                                state_space,
                                l1_size,
                                l2_size,
                                l3_size,
                                l4_size,
                                action_space,
                                name='critic_' + version + '_ddpg_model').to(
                                    self.device)
        self.target_critic = CriticNet(beta, state_space, l1_size, l2_size,
                                       l3_size, l4_size,
                                       action_space).to(self.device)

        # Initialize target nets to be identical to actor and critic networks
        self.init_networks()

        # Target networks set to eval, since they are not
        # trained but simply updated with the target_network_update function
        self.target_actor.eval()
        self.target_critic.eval()

        # Set global parameters
        self.gamma = gamma
        self.env = env
        self.tau = tau
        self.eval = eval
        self.state_space = state_space
        self.action_space = action_space
        self.multiagent = multiagent
        self.multibrain = multibrain
        self.brain_name = brain_name
        self.n_agents = n_agents if self.multiagent else None

        # Initialize plotter for showing live training graphs and saving them
        self.plotter = RLPlots('ddpg_training')