def __init__(self, input_space, action_space, args): self.use_expert = args.use_expert self.gamma = args.gamma self.tau = args.tau self.alpha = args.alpha self.action_range = [action_space.low, action_space.high] self.policy_type = args.policy self.target_update_interval = args.target_update_interval self.automatic_entropy_tuning = args.automatic_entropy_tuning # self.device = torch.device("cuda" if args.cuda else "cpu") self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # print(torch.cuda.is_available()) # print(torch.cuda.current_device()) # print(torch.cuda.device(0)) # print(torch.cuda.device_count()) # print(torch.cuda.get_device_name()) # print(torch.backends.cudnn.version()) # print(torch.backends.cudnn.is_available()) self.critic = QNetwork(input_space, action_space.shape[0], args.hidden_size).to(device=self.device) self.critic_optim = Adam(self.critic.parameters(), lr=args.lr) self.critic_target = QNetwork(input_space, action_space.shape[0], args.hidden_size).to(self.device) hard_update(self.critic_target, self.critic) if self.policy_type == "Gaussian": # Target Entropy = −dim(A) (e.g. , -6 for HalfCheetah-v2) as given in the paper if self.automatic_entropy_tuning is True: self.target_entropy = -torch.prod(torch.Tensor(action_space.shape).to(self.device)).item() self.log_alpha = torch.zeros(1, requires_grad=True, device=self.device) self.alpha_optim = Adam([self.log_alpha], lr=args.lr) self.policy = GaussianPolicy(input_space, action_space.shape[0], args.hidden_size, action_space).to(self.device) self.policy_optim = Adam(self.policy.parameters(), lr=args.lr) else: raise ValueError("Not supper another type yet.")
def __init__(self, state_size, action_size, buffer_size=int(1e5), batch_size=64, learning_rate=5e-4, gamma=0.99, tau=0.001): self.state_size = state_size self.action_size = action_size self.buffer_size = buffer_size self.batch_size = batch_size self.learning_rate = learning_rate self.gamma = gamma self.tau = tau self.q_network = QNetwork(self.state_size, self.action_size) self.q_network_target = QNetwork(self.state_size, self.action_size) self.optimizer = optim.Adam(self.q_network.parameters(), lr=self.learning_rate) self.replay_buffer = ReplayBuffer(self.buffer_size)
def __init__(self, state_size: int, action_size: int, seed: int): """ Initialize Agent. :param state_size: :param action_size: :param seed: random seed """ self.state_size = state_size self.action_size = action_size self.seed = seed # QNetwork self.qnetwork_local = QNetwork(state_size, action_size, seed).to(device) self.qnetwork_target = QNetwork(state_size, action_size, seed).to(device) self.optimzer = optim.Adam(self.qnetwork_local.parameters(), lr=LR) # Reply memory self.memory = ReplyBuffer(action_size, BUFFER_SIZE, BATCH_SIZE, seed) # initialize time step for updating self.t_step = 0
def __init__(self, state_size, action_size, seed, lr_decay=0.9999): """Initialize an Agent object. Params ====== state_size (int): dimension of each state action_size (int): dimension of each action seed (int): random seed lr_decay (float): multiplicative factor of learning rate decay """ self.state_size = state_size self.action_size = action_size self.seed = random.seed(seed) print("Running on: "+str(device)) # Q-Network hidden_layers = [128, 32] if USE_DUELING_NETWORK: hidden_state_value = [64, 32] self.qnetwork_local = DuelingQNetwork(state_size, action_size, seed, hidden_layers, hidden_state_value).to(device) self.qnetwork_target = DuelingQNetwork(state_size, action_size, seed, hidden_layers, hidden_state_value).to(device) self.qnetwork_target.eval() else: self.qnetwork_local = QNetwork(state_size, action_size, seed, hidden_layers).to(device) self.qnetwork_target = QNetwork(state_size, action_size, seed, hidden_layers).to(device) self.qnetwork_target.eval() self.optimizer = optim.Adam(self.qnetwork_local.parameters(), lr=LR) self.lr_scheduler = optim.lr_scheduler.ExponentialLR(self.optimizer, lr_decay) self.memory = ReplayBuffer(action_size, BUFFER_SIZE, BATCH_SIZE, seed, device) # Initialize time step (for updating every UPDATE_EVERY steps) self.t_step = 0
def __init__(self, state_size, action_size, seed): """Initialize an Agent object. Params ====== state_size (int): dimension of each state action_size (int): dimension of each action seed (int): random seed """ self.state_size = state_size self.action_size = action_size self.seed = random.seed(seed) # Q-Network self.qnetwork_local = QNetwork(state_size, action_size, seed).to(device) self.qnetwork_target = QNetwork(state_size, action_size, seed).to(device) self.optimizer = optim.Adam(self.qnetwork_local.parameters(), lr=LR) # Replay memory self.memory = ReplayBuffer(action_size, BUFFER_SIZE, BATCH_SIZE, seed) # Initialize time step (for updating every UPDATE_EVERY steps) self.t_step = 0
def __init__(self, num_inputs, action_space, \ device, hidden_size, lr, gamma, tau, alpha): self.gamma = gamma self.tau = tau self.alpha = alpha self.device = device self.critic = QNetwork(num_inputs, action_space.shape[0], hidden_size).to(device=self.device) self.critic_optim = Adam(self.critic.parameters(), lr=lr) self.critic_target = QNetwork(num_inputs, action_space.shape[0], hidden_size).to(self.device) hard_update(self.critic_target, self.critic) # Target Entropy = −dim(A) (e.g. , -6 for HalfCheetah-v2) as given in the paper self.target_entropy = -torch.prod(torch.Tensor(action_space.shape).to(self.device)).item() self.log_alpha = torch.zeros(1, requires_grad=True, device=self.device) self.alpha_optim = Adam([self.log_alpha], lr=lr) self.policy = GaussianPolicy(num_inputs, action_space.shape[0], \ hidden_size, action_space).to(self.device) self.policy_optim = Adam(self.policy.parameters(), lr=lr)
def __init__(self, state_size, action_size, seed): """Initialize an Agent object. Params ====== state_size (int): dimension of each state action_size (int): dimension of each action random_seed (int): random seed """ self.state_size = state_size self.action_size = action_size self.seed = random.seed(seed) # Q- Network self.qnetwork_local = QNetwork(state_size, action_size, seed).to(device) self.qnetwork_target = QNetwork(state_size, action_size, seed).to(device) self.optimizer = optim.Adam(self.qnetwork_local.parameters(), lr=LR) self.noise = OUNoise(action_size, seed) self.memory = ReplayBuffer(action_size, BUFFER_SIZE, BATCH_SIZE, seed)
def __init__(self, state_size, action_size, buffer_size, batch_size, gamma, tau, learning_rate, update_every, device, seed): """Initialize an Agent object. Params ====== state_size (int): dimension of each state action_size (int): dimension of each action buffer_size (int): replay buffer size batch_size (int): minibatch size gamma (float): discount factor tau (float): used for soft update of target parameters learning_rate (float): learning rate update_every (int): how many steps between network updates device (torch.Device): pytorch device seed (int): random seed """ self.state_size = state_size self.action_size = action_size self.batch_size = batch_size self.gamma = gamma self.tau = tau self.update_every = update_every self.device = device self.seed = random.seed(seed) # Q-Network self.qnetwork_local = QNetwork(state_size, action_size, seed).to(device) self.qnetwork_target = QNetwork(state_size, action_size, seed).to(device) self.optimizer = optim.Adam(self.qnetwork_local.parameters(), lr=learning_rate) # Replay memory self.memory = ReplayBuffer(action_size, buffer_size, batch_size, device, seed) # Initialize time step (for updating every self.update_every steps) self.t_step = 0
def __init__(self, state_size, action_size, seed, dqn_type="double", dueling=True): """Initialize an Agent object. Params ====== state_size (int): dimension of each state action_size (int): dimension of each action seed (int): random seed dqn_type: can be simple, double, dual """ self.state_size = state_size self.action_size = action_size self.seed = random.seed(seed) self.dqn_type = dqn_type # Q-Network if dueling: self.qnetwork_local = DuelQNetwork(state_size, action_size, seed).to(device) self.qnetwork_target = DuelQNetwork(state_size, action_size, seed).to(device) else: self.qnetwork_local = QNetwork(state_size, action_size, seed).to(device) self.qnetwork_target = QNetwork(state_size, action_size, seed).to(device) self.optimizer = optim.Adam(self.qnetwork_local.parameters(), lr=LR) # self.scheduler = optim.lr_scheduler.CosineAnnealingWarmRestarts(self.optimizer, 10, 2) # Replay memory self.memory = ReplayBuffer(action_size, BUFFER_SIZE, BATCH_SIZE, seed) # Initialize time step (for updating every UPDATE_EVERY steps) self.t_step = 0
def __init__(self, n_state, n_actions, n_hidden=32, n_layers=2, seed=333, snapshotfile="snapshot.pth"): """ Initialize the agent. Args: n_state (int): Number of features that represent the state n_actions (int): Number of actions available to agent n_hidden (int): Number of units in hidden neural net layers n_layers (int): Number of layers for neural network seed (int): Set the random seed (for reproducibility) snapshotfile (str): Filepath to use for saving weights """ self.n_state = n_state self.n_actions = n_actions self.seed = random.seed(seed) self.snapshotfile = snapshotfile # Deep Q-Network self.qnetwork_local = QNetwork(n_state, n_actions, seed, n_hidden=64).to(device) self.qnetwork_target = QNetwork(n_state, n_actions, seed, n_hidden=64).to(device) self.optimizer = optim.Adam(self.qnetwork_local.parameters(), lr=LR) self.loss_func = torch.nn.MSELoss(reduce=True) # Experience Replay Memory self.memory = ReplayBuffer(n_actions, EXPERIENCE_MEMORY_SIZE, BATCH_SIZE, seed) # Initialize time step (for updating every UPDATE_EVERY steps) self.t_step = 0 # TODO: have the is_training attribute control eval and train # mode in pytprch network self.is_training = True
def __init__(self, num_inputs, action_space, args): self.gamma = args.gamma self.tau = args.tau self.alpha = args.alpha self.policy_type = args.policy self.target_update_interval = args.target_update_interval self.automatic_entropy_tuning = args.automatic_entropy_tuning self.device = torch.device("cuda" if args.cuda else "cpu") self.critic = QNetwork(num_inputs, action_space.shape[0], args.hidden_size).to(device=self.device) self.critic_optim = Adam(self.critic.parameters(), lr=args.lr) self.critic_target = QNetwork(num_inputs, action_space.shape[0], args.hidden_size).to(self.device) self.safe_critic = SafeQNetwork(num_inputs, action_space.shape[0], args.hidden_size).to(self.device) self.safe_critic_optim = Adam(self.critic.parameters(), lr=args.lr) self.safe_critic_target = SafeQNetwork(num_inputs, action_space.shape[0], args.hidden_size).to(self.device) hard_update(self.critic_target, self.critic) hard_update(self.safe_critic_target, self.safe_critic) if self.policy_type == "Gaussian": # Target Entropy = −dim(A) (e.g. , -6 for HalfCheetah-v2) as given in the paper if self.automatic_entropy_tuning is True: self.target_entropy = -torch.prod(torch.Tensor(action_space.shape).to(self.device)).item() self.log_alpha = torch.zeros(1, requires_grad=True, device=self.device) self.alpha_optim = Adam([self.log_alpha], lr=args.lr) self.policy = GaussianPolicy(num_inputs, action_space.shape[0], args.hidden_size, action_space).to(self.device) self.policy_optim = Adam(self.policy.parameters(), lr=args.lr) else: self.alpha = 0 self.automatic_entropy_tuning = False self.policy = DeterministicPolicy(num_inputs, action_space.shape[0], args.hidden_size, action_space).to(self.device) self.policy_optim = Adam(self.policy.parameters(), lr=args.lr)
def __init__(self, state_size, action_size, seed, use_dueling=False, use_double=False): """Initialize an Agent object. Params ====== state_size (int): dimension of each state action_size (int): dimension of each action seed (int): random seed use_dueling (bool): if 'True' use dueling agent use_double (bool): if 'True' use double DDQN agent """ self.state_size = state_size self.action_size = action_size self.seed = random.seed(seed) self.use_dueling = use_dueling self.use_double = use_double # Q-Network self.qnetwork_local = QNetwork(state_size, action_size, seed, use_dueling=use_dueling).to(device) self.qnetwork_target = QNetwork(state_size, action_size, seed, use_dueling=use_dueling).to(device) self.optimizer = optim.Adam(self.qnetwork_local.parameters(), lr=LR) # Replay memory self.memory = ReplayBuffer(action_size, BUFFER_SIZE, BATCH_SIZE, seed) # Initialize time step (for updating every UPDATE_EVERY steps) self.t_step = 0
def __init__(self, state_size, action_size, seed): """Initialize an Agent object. Params ====== state_size (int): dimension of each state action_size (int): dimension of each action seed (int): random seed """ self.state_size = state_size self.action_size = action_size self.seed = random.seed(seed) # Q-Network self.qnetwork_local = QNetwork(state_size, action_size, seed).to(device) self.qnetwork_target = QNetwork(state_size, action_size, seed).to(device) self.q_optimizer = optim.RMSprop(self.qnetwork_local.parameters(), lr=LR, eps=1e-5) # Succesor Representation Network self.snetwork = SRNetwork().to(device) self.snetwork_target = SRNetwork().to(device) self.s_optimizer = optim.RMSprop(self.snetwork.parameters(), lr=LR, eps=1e-5) self.pnetwork = PredNetwork(state_size, 1).to(device) self.p_optimizer = optim.RMSprop(self.pnetwork.parameters(), lr=LR, eps=1e-5) # Replay memory self.memory = ReplayBuffer(action_size, BUFFER_SIZE, BATCH_SIZE, seed) # Initialize time step (for updating every UPDATE_EVERY steps) self.t_step = 0
def __init__(self, state_size, action_size, seed=0, use_dueling=False, use_double=False, fc1=64, fc2=64): print('device is {}'.format(device)) """Initialize an Agent object. Params ====== state_size (int): dimension of each state action_size (int): dimension of each action seed (int): random seed """ self.state_size = state_size self.action_size = action_size self.seed = random.seed(seed) # Q-Network self.qnetwork_local = QNetwork(state_size, action_size, seed, fc1_units=fc1, fc2_units=fc2).to(device) self.qnetwork_target = QNetwork(state_size, action_size, seed, fc1_units=fc1, fc2_units=fc2).to(device) self.optimizer = optim.Adam(self.qnetwork_local.parameters(), lr=LR) # Replay memory self.memory = ReplayBuffer(action_size, BUFFER_SIZE, BATCH_SIZE, seed) # Initialize time step (for updating every UPDATE_EVERY steps) self.t_step = 0
def __init__(self, sess, state_size, action_size, seed, arguments): """Initialize an Agent object. Params ====== state_size (int): dimension of each state action_size (int): dimension of each action seed (int): random seed """ self.sess = sess self.state_size = state_size self.action_size = action_size self.seed = random.seed(seed) self.learning_rate = arguments['lr'] self.gamma = arguments['gamma'] self.update_every = arguments['update_every'] self.tau = arguments['tau'] self.history_size = arguments['history_size'] self.buffer_size = arguments['buffer_size'] self.batch_size = arguments['batch_size'] # Q-Network self.qnetwork_local = QNetwork('local_q', state_size, action_size, self.history_size) self.qnetwork_target = QNetwork('target_q', state_size, action_size, self.history_size) copy_ops = [] for local_w, target_w in zip(self.qnetwork_local.variables, self.qnetwork_target.variables): copy_op = tf.assign(local_w, local_w * self.tau + (1.0 - self.tau) * target_w) copy_ops.append(copy_op) self.copy_ops = tf.group(*copy_ops, name='copy_op') # Replay memory self.memory = ReplayBuffer(action_size, self.buffer_size, self.batch_size, seed) # Initialize time step (for updating every self.update_every steps) self.t_step = 0
def __init__(self, state_size, action_size, buffer_size=BUFFER_SIZE, batch_size=BATCH_SIZE, gamma=GAMMA, tau=TAU, lr=LR, update_every=UPDATE_EVERY, seed=0): """Initialize an Agent object. Params ====== state_size (int): dimension of each state action_size (int): dimension of each action buffer_size (int): replay buffer size batch_size (int): batch size batch_size (int): batch size gamma (int): time discount tau (int): target network soft update rate lr (int): learning rate update_every (int): learn every this number of steps seed (int): random seed """ self.state_size = state_size self.action_size = action_size self.buffer_size = buffer_size self.batch_size = batch_size self.gamma = gamma self.tau = tau self.lr = lr self.update_every = update_every self.seed = seed self.rng = np.random.RandomState(seed) # Q-Network self.qnetwork_local = QNetwork(state_size, action_size, seed).to(device) self.qnetwork_target = QNetwork(state_size, action_size, seed).to(device) self.optimizer = optim.Adam(self.qnetwork_local.parameters(), lr=LR) # Replay memory self.memory = ReplayBuffer(action_size, self.buffer_size, self.batch_size, seed) # Initialize time step (for updating every UPDATE_EVERY steps) self.t_step = 0
def __init__(self, n_states, n_actions, hidden_dim, lr, device): """Agent class that choose action and train Args: n_states (int): input dimension n_actions (int): output dimension hidden_dim (int): hidden dimension """ self.device = device self.q_local = QNetwork(n_states, n_actions, hidden_dim=16).to(self.device) self.q_target = QNetwork(n_states, n_actions, hidden_dim=16).to(self.device) self.mse_loss = torch.nn.MSELoss() self.optim = optim.Adam(self.q_local.parameters(), lr=lr) self.n_states = n_states self.n_actions = n_actions # ReplayMemory: trajectory is saved here self.replay_memory = ReplayMemory(10000)
def _create_nn(self, nn_type, state_size, action_size, seed, device): if nn_type == 'noisydueling': self._sample_noise = True return NoisyDuelingQNetwork(state_size, action_size, seed, device=device).to(device) elif nn_type == 'dueling': return DuelingQNetwork(state_size, action_size, seed).to(device) elif nn_type == 'q': return QNetwork(state_size, action_size, seed).to(device) else: raise Exception( 'Unknown NN type - must be one of NoisyDueling, Dueling or Q')
def __init__(self, state_size, action_size, seed): """Initialize an Agent object. Params ====== state_size (int): dimension of each state action_size (int): dimension of each action seed (int): random seed """ self.state_size = state_size self.action_size = action_size self.seed = random.seed(seed) self.qnetwork_local = QNetwork(state_size, action_size, seed).to(device) # Q(S,A;w) self.qnetwork_target = QNetwork(state_size, action_size, seed).to( device) # fixated target Q(S',a;w-) self.optimizer = optim.Adam(self.qnetwork_local.parameters(), lr=LR) self.memory = ReplayBuffer(action_size, BUFFER_SIZE, BATCH_SIZE, seed) # Replay memory self.t_step = 0 # count time step
def __init__(self, state_size, action_size, double_dqn=True): self.state_size = state_size self.action_size = action_size self.double_dqn = double_dqn # Q-Network self.qnetwork_local = QNetwork(state_size, action_size).to(device) self.qnetwork_target = copy.deepcopy(self.qnetwork_local) self.optimizer = torch.optim.Adam(self.qnetwork_local.parameters(), lr=LR) # Replay memory self.memory = ReplayBuffer(action_size, BUFFER_SIZE, BATCH_SIZE) self.t_step = 0
def __init__(self, state_size, action_size, update_type='dqn', seed=0): """Initialize an Agent object. Params ====== state_size (int): dimension of each state action_size (int): dimension of each action seed (int): random seed """ self.state_size = state_size self.action_size = action_size self.seed = random.seed(seed) self.update_type = update_type # Q-Network self.qnetwork_local = QNetwork(state_size, action_size) self.qnetwork_target = QNetwork(state_size, action_size) self.optimizer = tf.keras.optimizers.Adam(learning_rate=LR) # Replay memory self.memory = ReplayBuffer(action_size, BUFFER_SIZE, BATCH_SIZE, seed) # Initialize time step (for updating every UPDATE_EVERY steps) self.t_step = 0
def __init__(self, state_size, action_size, num_agents, double_dqn=False): self.action_size = action_size self.double_dqn = double_dqn # Q-Network self.qnetwork_local = QNetwork(state_size, action_size).to(device) self.qnetwork_target = copy.deepcopy(self.qnetwork_local) self.optimizer = torch.optim.Adam(self.qnetwork_local.parameters(), lr=LR) self.lr_scheduler = torch.optim.lr_scheduler.StepLR(self.optimizer, step_size=4000, gamma=0.98, last_epoch=-1) # Replay memory self.memory = ReplayBuffer(BUFFER_SIZE) self.num_agents = num_agents self.t_step = 0
def __init__(self, state_size, action_size, seed, enable_curiosity): """Initialize an Agent object. Params ====== state_size (int): dimension of each state action_size (int): dimension of each action seed (int): random seed """ self.state_size = state_size self.action_size = action_size self.seed = random.seed(seed) self.enable_curiosity = enable_curiosity # Q-Network self.qnetwork_local = QNetwork(state_size, action_size, seed).to(device) self.qnetwork_target = QNetwork(state_size, action_size, seed).to(device) # Curiosity Elements self.fwd_model = FwdModel(state_size, action_size, seed).to(device) self.inverse_model = InverseModel(state_size, action_size, seed).to(device) ##Optimizer params_to_opt = list(self.qnetwork_local.parameters()) + list( self.fwd_model.parameters()) + list( self.inverse_model.parameters()) self.optimizer = optim.Adam(params_to_opt, lr=LR) # Replay memory self.memory = ReplayBuffer(action_size, BUFFER_SIZE, BATCH_SIZE, seed) # Initialize time step (for updating every UPDATE_EVERY steps) self.t_step = 0 self.loss_list = []
def __init__(self, state_size, action_size, seed=0, lr=1e-3, update_every=4, batch_size=4, buffer_size=64, gamma=0.0994, tau=1e-3, model_path="model.pth"): self.device = torch.device( "cuda:0" if torch.cuda.is_available() else "cpu") print("=== AGENT ===") print(f"Created agent on device: {self.device}") self.model_path = model_path self.state_size = state_size self.action_size = action_size self.seed = random.seed(seed) self.update_every = update_every self.batch_size = batch_size self.gamma = gamma self.tau = tau # network variables self.qnetwork_local = QNetwork(state_size, action_size, seed).to(self.device) self.qnetwork_target = QNetwork(state_size, action_size, seed).to(self.device) self.optimizer = optim.Adam(self.qnetwork_local.parameters(), lr=lr) self.load() # Control variables self.memory = ReplayBuffer(action_size, buffer_size, self.batch_size, seed, self.device) self.t_step = 0
def __init__(self, state_size, action_size, seed, network_type=None): """Initialize an Agent object. Params ====== state_size (int): dimension of each state action_size (int): dimension of each action seed (int): random seed """ self.state_size = state_size self.action_size = action_size self.seed = random.seed(seed) #Specify Network # Vanilla Q-Network if network_type == None: print("Using double network") self.qnetwork_local = QNetwork(state_size, action_size, seed).to(device) self.qnetwork_target = QNetwork(state_size, action_size, seed).to(device) # Dueling Q-Network elif network_type == 'Dueling': print("Using double Dueling network") self.qnetwork_local = Dueling_QNetwork(state_size, action_size, seed).to(device) self.qnetwork_target = Dueling_QNetwork(state_size, action_size, seed).to(device) self.optimizer = optim.Adam(self.qnetwork_local.parameters(), lr=LR) # Replay memory self.memory = ReplayBuffer(action_size, BUFFER_SIZE, BATCH_SIZE, seed) # Initialize time step (for updating every UPDATE_EVERY steps) self.t_step = 0
def __init__(self, env_name="BreakoutDeterministic-v4", gamma=0.99, batch_size=32, lr=0.00025, update_period=4, target_update_period=10000, n_frames=4): self.env_name = env_name self.gamma = gamma self.batch_size = batch_size self.epsilon_scheduler = ( lambda steps: max(1.0 - 0.9 * steps / 1000000, 0.1)) self.update_period = update_period self.target_update_period = target_update_period env = gym.make(self.env_name) self.action_space = env.action_space.n self.qnet = QNetwork(self.action_space) self.target_qnet = QNetwork(self.action_space) self.optimizer = Adam(lr=lr, epsilon=0.01 / self.batch_size) self.n_frames = n_frames self.use_reward_clipping = True self.huber_loss = tf.keras.losses.Huber()
def __init__(self, state_size, action_size, duel, fc1_units, fc2_units, seed): """Initialize an Agent object. Params ====== state_size (int): dimension of each state action_size (int): dimension of each action fc1_units : number of nodes in the first hidden layer fc2_units : number of nodes in the second hidden layer seed (int): random seed """ self.state_size = state_size self.action_size = action_size self.seed = random.seed(seed) #Chose betweeen regulat Q-Network or duel architecture #if(duel): # self.qnetwork_local = Duel_QNetwork(state_size, action_size,fc1_units,fc2_units, seed).to(device) # self.qnetwork_target = Duel_QNetwork(state_size, action_size,fc1_units,fc2_units, seed).to(device) #else: # self.qnetwork_local = QNetwork(state_size, action_size,fc1_units,fc2_units, seed).to(device) # self.qnetwork_target = QNetwork(state_size, action_size,fc1_units,fc2_units, seed).to(device) self.qnetwork_local = QNetwork(state_size, action_size, seed).to(device) self.qnetwork_target = QNetwork(state_size, action_size, seed).to(device) self.optimizer = optim.Adam(self.qnetwork_local.parameters(), lr=LR) # Visualize network print(self.qnetwork_local) # Replay memory self.memory = ReplayBuffer(action_size, BUFFER_SIZE, BATCH_SIZE, seed) # Initialize time step (for updating every UPDATE_EVERY steps) self.t_step = 0
def __init__(self, num_inputs, action_space, args): #self.n_flow = args.n_flows #assert self.n_flow == 0 self.num_inputs = num_inputs #self.flow_family = args.flow_family self.num_layers = args.num_layers self.args = args self.gamma = args.gamma self.tau = args.tau self.alpha = args.alpha self.target_update_interval = args.target_update_interval self.automatic_entropy_tuning = args.automatic_entropy_tuning self.device = torch.device("cuda" if args.cuda else "cpu") self.critic = QNetwork(num_inputs, action_space.shape[0], args.hidden_size).to(device=self.device) self.critic_optim = Adam(self.critic.parameters(), lr=args.lr) self.critic_target = QNetwork(num_inputs, action_space.shape[0], args.hidden_size).to(self.device) hard_update(self.critic_target, self.critic) if self.automatic_entropy_tuning: self.target_entropy = -torch.prod( torch.Tensor(action_space.shape).to(self.device)).item() self.log_alpha = torch.zeros(1, requires_grad=True, device=self.device) self.alpha_optim = Adam([self.log_alpha], lr=args.lr) self.policy = GaussianPolicy(num_inputs, action_space.shape[0], args.hidden_size, self.num_layers, args).to(self.device) self.policy_optim = Adam(self.policy.parameters(), lr=args.lr)
def __init__(self, state_size, action_size, seed, lr, buffer_size, batch_size, update_step, gamma, tau, dual_network=False): self.state_size = state_size self.action_size = action_size self.seed = random.seed(seed) # Q-Network if dual_network == False: self.qnetwork_local = QNetwork(state_size, action_size, seed).to(device) self.qnetwork_target = QNetwork(state_size, action_size, seed).to(device) else: self.qnetwork_local = Dual_QNetwork(state_size, action_size, seed).to(device) self.qnetwork_target = Dual_QNetwork(state_size, action_size, seed).to(device) self.optimizer = optim.Adam(self.qnetwork_local.parameters(), lr=lr) # Replay memory self.memory = ReplayBuffer(action_size, buffer_size, batch_size, seed) # Initialize time step (for updating every UPDATE_EVERY steps) self.t_step = 0 self.update_step = update_step self.batch_size = batch_size self.gamma = gamma self.tau = tau
def __init__(self, action_size, frame_history=4, seed=42): """Initialize an Agent object. Params ====== action_size (int): Dimension of each action frame_history (int): Number of continuous frames to be considered in each state seed (int): Random seed """ self.action_size = action_size self.seed = random.seed(seed) # Q-Network self.qnetwork_local = QNetwork(action_size, frame_history, seed).to(device) self.qnetwork_target = QNetwork(action_size, frame_history, seed).to(device) self.optimizer = optim.Adam(self.qnetwork_local.parameters(), lr=LR) # Replay memory self.memory = ReplayBuffer(action_size, BUFFER_SIZE, BATCH_SIZE, frame_history, seed) # Initialize time step (for updating every UPDATE_EVERY steps) self.t_step = 0