def __init__(self, architecture, weight_init=gauss_weights_init(0, 0.02), activation_functions=None): super(ActorCriticPPO, self).__init__() if len(architecture) < 2: raise Exception( "Architecture needs at least two numbers to create network") #assert architecture[-1]%2 == 1, "Last layer has to represent 2*actions_space for the Gaussian + 1 for value" self.activation_functions = activation_functions self.layer_list = [] self.layer_list_val = [] self.siglog = tor.zeros(1, requires_grad=True) self.siglog = nn.Parameter(self.siglog) for i in range(len(architecture) - 1): self.layer_list.append( nn.Linear(architecture[i], architecture[i + 1])) setattr(self, "fc" + str(i), self.layer_list[-1]) for i in range(len(architecture) - 2): self.layer_list_val.append( nn.Linear(architecture[i], architecture[i + 1])) setattr(self, "fc_val" + str(i), self.layer_list_val[-1]) self.layer_list_val.append(nn.Linear(architecture[-2], 1)) setattr(self, "fc_val" + str(len(architecture) - 1), self.layer_list_val[-1]) self.apply(weight_init)
def __init__(self, architecture, weight_init=gauss_weights_init(0,0.02), activation_functions=None): super(NeuralNet, self).__init__() self.activation_functions = activation_functions self.layer_list = [] for i in range(len(architecture)-1): self.layer_list.append(nn.Linear(architecture[i], architecture[i+1])) setattr(self, "fc" + str(i), self.layer_list[-1]) #self.last_linear = nn.Linear(architecture[-1], 1) self.apply(weight_init)
def __init__(self, architecture, weight_init=gauss_weights_init(0,0.02),activation_functions=None): super(SimpleNetwork, self).__init__() if len(architecture) < 2: raise Exception("Architecture needs at least two numbers to create network") self.activation_functions = activation_functions self.layer_list = [] for i in range(len(architecture)-1): self.layer_list.append(nn.Linear(architecture[i], architecture[i+1])) setattr(self, "fc" + str(i), self.layer_list[-1]) self.apply(weight_init)
def __init__(self, architecture, num_grus=2, weight_init=gauss_weights_init(0, 0.02), activation_functions=None): """ First number of architecture indicates number of GRU units """ super(RecurrentActorCriticPPO, self).__init__() if len(architecture) < 2: raise Exception( "Architecture needs at least two numbers to create network") #assert architecture[-1]%2 == 1, "Last layer has to represent 2*actions_space for the Gaussian + 1 for value" self.activation_functions = activation_functions self.layer_list = [] self.layer_list_val = [] gru_size = architecture[0] #architecture = architecture[1:] self.value_gru = nn.GRU(input_size=gru_size, hidden_size=gru_size, num_layers=num_grus, batch_first=True) self.policy_gru = nn.GRU(input_size=gru_size, hidden_size=gru_size, num_layers=num_grus, batch_first=True) self.siglog = tor.zeros(1, requires_grad=True) self.siglog = nn.Parameter(self.siglog) for i in range(len(architecture) - 1): self.layer_list.append( nn.Linear(architecture[i], architecture[i + 1])) setattr(self, "fc" + str(i), self.layer_list[-1]) for i in range(len(architecture) - 2): self.layer_list_val.append( nn.Linear(architecture[i], architecture[i + 1])) setattr(self, "fc_val" + str(i), self.layer_list_val[-1]) self.layer_list_val.append(nn.Linear(architecture[-2], 1)) setattr(self, "fc_val" + str(len(architecture) - 1), self.layer_list_val[-1]) self.apply(weight_init)
def __init__(self, architecture, weight_init=gauss_weights_init(0, 0.02), activation_functions=None): super(RandSigmaPPONetwork, self).__init__() if len(architecture) < 2: raise Exception( "Architecture needs at least two numbers to create network") #assert architecture[-1]%2 == 1, "Last layer has to represent 2*actions_space for the Gaussian + 1 for value" self.activation_functions = activation_functions self.layer_list = [] self.sigma_log = self.sigma_log_bounds[0] for i in range(len(architecture) - 1): self.layer_list.append( nn.Linear(architecture[i], architecture[i + 1])) setattr(self, "fc" + str(i), self.layer_list[-1]) self.apply(weight_init)
def __init__(self, architecture, recurrent_layers=2, weight_init=gauss_weights_init(0, 0.02), activation_functions=None, bidirectional=True, recurr_type=nn.GRU): """ First number of architecture indicates number of GRU units """ super(RecurrentNet, self).__init__() if len(architecture) < 2: raise Exception( "Architecture needs at least two numbers to create network") #assert architecture[-1]%2 == 1, "Last layer has to represent 2*actions_space for the Gaussian + 1 for value" self.activation_functions = activation_functions self.layer_list = [] self.layer_list_val = [] self.recurrent = True recurr_size = architecture[0] #architecture = architecture[1:] self.recurrent_unit = recurr_type(input_size=recurr_size, hidden_size=recurr_size, num_layers=recurrent_layers, batch_first=True, bidirectional=bidirectional) for i in range(len(architecture) - 1): self.layer_list.append( nn.Linear(architecture[i], architecture[i + 1])) setattr(self, "fc" + str(i), self.layer_list[-1]) self.h_n = None self.apply(weight_init)
Implementation of the hindsight policy gradient paper: https://arxiv.org/pdf/1711.06006.pdf. """ # Training parameters num_bits = 8 num_episodes = 10000 episode_length = 16 mvr_tracker = deque(maxlen=400) env = BitFlippingEnv(num_bits) policy = PolicyAHG(num_bits * 2, num_bits + 1) # Initialization of weights policy.apply(gauss_weights_init(0, 0.02)) policy.zero_grad() optimizer = Adam(policy.parameters(), lr=0.001) # Keeps track of the current episode episode_steps = [0] * episode_length for i in range(num_episodes): # The reward accumulated in the episode acc_reward = 0 acc_distance = 0 actions = [] goal_occurances = {} goal_occurances[tuple(env.goal)] = 1 state_and_goal = np.zeros((1, num_bits * 2))