def build_model(self): l2_regularization_kernel = 1e-5 # Input Layer input = layers.Input(shape=(self.state_size,), name='input_states') # Hidden Layers model = layers.Dense(units=300, kernel_regularizer=regularizers.l2(l2_regularization_kernel))(input) model = layers.BatchNormalization()(model) model = layers.LeakyReLU(1e-2)(model) model = layers.Dense(units=400, kernel_regularizer=regularizers.l2(l2_regularization_kernel))(model) model = layers.BatchNormalization()(model) model = layers.LeakyReLU(1e-2)(model) model = layers.Dense(units=200, kernel_regularizer=regularizers.l2(l2_regularization_kernel))(model) model = layers.BatchNormalization()(model) model = layers.LeakyReLU(1e-2)(model) # Our output layer - a fully connected layer output = layers.Dense(units=self.action_size, activation='tanh', kernel_regularizer=regularizers.l2(l2_regularization_kernel), kernel_initializer=initializers.RandomUniform(minval=-3e-3, maxval=3e-3), name='output_actions')(model) # Keras model self.model = models.Model(inputs=input, outputs=output) # Define loss and optimizer action_gradients = layers.Input(shape=(self.action_size,)) loss = K.mean(-action_gradients * output) optimizer = optimizers.Adam(lr=1e-4) update_operation = optimizer.get_updates(params=self.model.trainable_weights, loss=loss) self.train_fn = K.function(inputs=[self.model.input, action_gradients, K.learning_phase()], outputs=[], updates=update_operation)
def build_model(self): l2_kernel_regularization = 1e-5 # Define input layers input_states = layers.Input(shape=(self.state_size, ), name='input_states') input_actions = layers.Input(shape=(self.action_size, ), name='input_actions') # Hidden layers for states model_states = layers.Dense( units=32, kernel_regularizer=regularizers.l2(l2_kernel_regularization))( input_states) model_states = layers.BatchNormalization()(model_states) model_states = layers.LeakyReLU(1e-2)(model_states) model_states = layers.Dense( units=64, kernel_regularizer=regularizers.l2(l2_kernel_regularization))( model_states) model_states = layers.BatchNormalization()(model_states) model_states = layers.LeakyReLU(1e-2)(model_states) # Hidden layers for actions model_actions = layers.Dense( units=64, kernel_regularizer=regularizers.l2(l2_kernel_regularization))( input_actions) model_actions = layers.BatchNormalization()(model_actions) model_actions = layers.LeakyReLU(1e-2)(model_actions) # Both models merge here model = layers.add([model_states, model_actions]) # Fully connected and batch normalization model = layers.Dense(units=32, kernel_regularizer=regularizers.l2( l2_kernel_regularization))(model) model = layers.BatchNormalization()(model) model = layers.LeakyReLU(1e-2)(model) # Q values / output layer Q_values = layers.Dense( units=1, activation=None, kernel_regularizer=regularizers.l2(l2_kernel_regularization), kernel_initializer=initializers.RandomUniform(minval=-5e-3, maxval=5e-3), name='output_Q_values')(model) # Keras wrap the model self.model = models.Model(inputs=[input_states, input_actions], outputs=Q_values) optimizer = optimizers.Adam(lr=1e-2) self.model.compile(optimizer=optimizer, loss='mse') action_gradients = K.gradients(Q_values, input_actions) self.get_action_gradients = K.function( inputs=[*self.model.input, K.learning_phase()], outputs=action_gradients)
def build_model(self): """Build an actor (policy) network that maps states -> actions.""" # Define input layer (states) states = layers.Input(shape=(self.state_size, ), name='states') '''# Add hidden layers net = layers.Dense(units=32, activation='relu')(states) net = layers.Dense(units=64, activation='relu')(net) net = layers.Dense(units=32, activation='relu')(net) # Try different layer sizes, activations, add batch normalization, regularizers, etc. # Add final output layer with sigmoid activation raw_actions = layers.Dense(units=self.action_size, activation='sigmoid', name='raw_actions')(net) ''' ################################### # Add hidden layers net = layers.Dense(units=400, kernel_regularizer=regularizers.l2(1e-6))(states) net = layers.BatchNormalization()(net) net = layers.LeakyReLU(1e-2)(net) net = layers.Dense(units=300, kernel_regularizer=regularizers.l2(1e-6))(net) net = layers.BatchNormalization()(net) net = layers.LeakyReLU(1e-2)(net) # Add final output layer with sigmoid activation raw_actions = layers.Dense( units=self.action_size, activation='sigmoid', name='raw_actions', kernel_initializer=initializers.RandomUniform(minval=-0.003, maxval=0.003))(net) ####################################### # Scale [0, 1] output for each action dimension to proper range actions = layers.Lambda(lambda x: (x * self.action_range) + self.action_low, name='actions')(raw_actions) # Create Keras model self.model = models.Model(inputs=states, outputs=actions) # Define loss function using action value (Q value) gradients action_gradients = layers.Input(shape=(self.action_size, )) loss = K.mean(-action_gradients * actions) # Incorporate any additional losses here (e.g. from regularizers) # Define optimizer and training function optimizer = optimizers.Adam(lr=1e-6) updates_op = optimizer.get_updates(params=self.model.trainable_weights, loss=loss) self.train_fn = K.function( inputs=[self.model.input, action_gradients, K.learning_phase()], outputs=[], updates=updates_op)
def res_block(inputs, size): kernel_l2_reg = 1e-3 net = layers.Dense(size, activation=None, kernel_regularizer=regularizers.l2(kernel_l2_reg), kernel_initializer=initializers.RandomUniform( minval=-5e-3, maxval=5e-3))(inputs) net = layers.BatchNormalization()(net) net = layers.LeakyReLU(1e-2)(net) net = layers.Dense(size, activation=None, kernel_regularizer=regularizers.l2(kernel_l2_reg), kernel_initializer=initializers.RandomUniform( minval=-5e-3, maxval=5e-3))(net) net = layers.BatchNormalization()(net) net = layers.LeakyReLU(1e-2)(net) net = layers.add([inputs, net]) return net
def build_model(self): """Build a critic (value) network that maps (state, action) pairs -> Q-values.""" # Define input layers states = layers.Input(shape=(self.state_size, ), name='states') actions = layers.Input(shape=(self.action_size, ), name='actions') # Add hidden layer(s) for state pathway net_states = layers.Dense( units=300, kernel_regularizer=regularizers.l2(0.00001))(states) net_states = layers.BatchNormalization()(net_states) net_states = layers.LeakyReLU(0.01)(net_states) net_states = layers.Dense( units=400, kernel_regularizer=regularizers.l2(0.00001))(net_states) net_states = layers.BatchNormalization()(net_states) net_states = layers.LeakyReLU(0.01)(net_states) # Add hidden layer(s) for action pathway net_actions = layers.Dense( units=400, kernel_regularizer=regularizers.l2(0.00001))(actions) net_actions = layers.BatchNormalization()(net_actions) net_actions = layers.LeakyReLU(0.01)(net_actions) # Try different layer sizes, activations, add batch normalization, regularizers, etc. # Combine state and action pathways net = layers.Add()([net_states, net_actions]) net = layers.Activation('relu')(net) # Add more layers to the combined network if needed net = layers.Dense(units=256, kernel_regularizer=regularizers.l2(0.00001))(net) net = layers.BatchNormalization()(net) net = layers.Activation(activation='relu')(net) # Add final output layer to prduce action values (Q values) Q_values = layers.Dense(units=1, activation=None, name='q_values', kernel_initializer=initializers.RandomUniform( minval=-3e-4, maxval=3e-4))(net) # Create Keras model self.model = models.Model(inputs=[states, actions], outputs=Q_values) # Define optimizer and compile model for training with built-in loss function optimizer = optimizers.Adam() self.model.compile(optimizer=optimizer, loss='mse') # Compute action gradients (derivative of Q values w.r.t. to actions) action_gradients = K.gradients(Q_values, actions) # Define an additional function to fetch action gradients (to be used by actor model) self.get_action_gradients = K.function( inputs=[*self.model.input, K.learning_phase()], outputs=action_gradients)
def build_model(self): """Build an actor (policy) network that maps states -> actions.""" # Define input layer (states) states = layers.Input(shape=(self.state_size, ), name='states') #--------- copy from DDPG quadcopter ----------- net = layers.Dense(units=400)(states) # net = layers.BatchNormalization()(net) net = layers.Activation("relu")(net) net = layers.Dense(units=200)(net) # net = layers.BatchNormalization()(net) net = layers.Activation("relu")(net) actions = layers.Dense(units=self.action_size, activation='softmax', name='actions', kernel_initializer=initializers.RandomUniform( minval=-1, maxval=1))(net) # actions = layers.Dense(units=self.action_size, activation='sigmoid', name='actions', # kernel_initializer=initializers.RandomUniform(minval=-0.001, maxval=0.001))(net) # Add hidden layers # net = layers.Dense(units=16,activation=activations.sigmoid)(states) # net = layers.BatchNormalization()(net) # net = layers.Dense(units=16,activation=activations.sigmoid)(net) # net = layers.BatchNormalization()(net) # net = layers.Dense(units=128,activation=activations.relu)(net) # net = layers.BatchNormalization()(net) # Add final output layer with sigmoid activation # actions = layers.Dense(units=self.action_size, activation='linear', # sigmoid # name='raw_actions' )(net) # Scale [0, 1] output for each action dimension to proper range # actions = layers.Lambda(lambda x: (x * self.action_range) + self.action_low, # name='actions')(raw_actions) # Create Keras model self.model = models.Model(inputs=states, outputs=actions) action_gradients = layers.Input(shape=(self.action_size, )) loss = K.mean(-action_gradients * actions) # Define optimizer and training function optimizer = optimizers.Adam(lr=.0001) updates_op = optimizer.get_updates(params=self.model.trainable_weights, loss=loss) self.train_fn = K.function( inputs=[self.model.input, action_gradients, K.learning_phase()], outputs=[], updates=updates_op)
def build_model(self): # Define input layers input_states = layers.Input(shape=(self.state_size, ), name='input_states') input_actions = layers.Input(shape=(self.action_size, ), name='input_actions') #---------- copy from DDPG quadcopter --------- # Add hidden layer(s) for state pathway net_states = layers.Dense(units=400)(input_states) # net_states = layers.BatchNormalization()(net_states) net_states = layers.Activation("relu")(net_states) net_states = layers.Dense(units=300)(net_states) net_states = layers.Activation("relu")(net_states) # Add hidden layer(s) for action pathway net_actions = layers.Dense(units=300)(input_actions) net_actions = layers.Activation("relu")(net_actions) # net_actions = layers.Dense(units=250,kernel_regularizer=regularizers.l2(1e-7))(net_actions) # net_actions = layers.BatchNormalization()(net_actions) # net_actions = layers.Activation("relu")(net_actions) # Combine state and action pathways net = layers.Add()([net_states, net_actions]) net = layers.Activation('relu')(net) net = layers.Dense(units=200, kernel_initializer=initializers.RandomUniform( minval=-0.5, maxval=0.5))(net) net = layers.Activation('relu')(net) # Add final output layer to prduce action values (Q values) Q_values = layers.Dense(units=1, name='q_values')(net) # ---------------- Hidden layers for states ---------------- # model_states = layers.Dense(units=32, activation=activations.sigmoid)(input_states) # # model_states = layers.BatchNormalization()(model_states) # model_states = layers.Dense(units=16, activation=activations.sigmoid)(model_states) # # model_states = layers.BatchNormalization()(model_states) # # model_states = layers.Dense(units=64)(model_states) # # model_states = layers.BatchNormalization()(model_states) # # ---------------- Hidden layers for actions ---------------- # model_actions = layers.Dense(units=16, activation=activations.sigmoid)(input_actions) # # model_actions = layers.BatchNormalization()(model_actions) # model_actions = layers.Dense(units=16, activation=activations.sigmoid)(model_actions) # # model_actions = layers.BatchNormalization()(model_actions) # # Both models merge here # model = layers.add([model_states, model_actions]) # # Fully connected and batch normalization # model = layers.Dense(units=8, activation=activations.sigmoid)(model) # # model = layers.BatchNormalization()(model) # # model = layers.Dense(units=64, activation=activations.relu)(model) # # model = layers.BatchNormalization()(model) # # Q values / output layer # Q_values = layers.Dense(units=1, name='Q_s_a')(model) # # model = layers.BatchNormalization()(model) # Keras wrap the model self.model = models.Model(inputs=[input_states, input_actions], outputs=Q_values) optimizer = optimizers.Adam(lr=0.0001) self.model.compile(optimizer=optimizer, loss='mse') action_gradients = K.gradients(Q_values, input_actions) self.get_action_gradients = K.function( inputs=[*self.model.input, K.learning_phase()], outputs=action_gradients)
def build_model(self): kernel_l2_reg = 1e-5 # Dense Options # units = 200, # activation='relu', # activation = None, # activity_regularizer=regularizers.l2(0.01), # kernel_regularizer=regularizers.l2(kernel_l2_reg), # bias_initializer=initializers.Constant(1e-2), # use_bias = True # use_bias=False """Build a critic (value) network that maps (state, action) pairs -> Q-values.""" # Define input layers states = layers.Input(shape=(self.state_size, ), name='states') actions = layers.Input(shape=(self.action_size, ), name='actions') # size_repeat = 30 # state_size = size_repeat*self.state_size # action_size = size_repeat*self.action_size # block_size = size_repeat*self.state_size + size_repeat*self.action_size # print("Critic block size = {}".format(block_size)) # # net_states = layers.concatenate(size_repeat * [states]) # net_states = layers.BatchNormalization()(net_states) # net_states = layers.Dropout(0.2)(net_states) # # net_actions = layers.concatenate(size_repeat * [actions]) # net_actions = layers.BatchNormalization()(net_actions) # net_actions = layers.Dropout(0.2)(net_actions) # # # State pathway # for _ in range(3): # net_states = res_block(net_states, state_size) # # # Action pathway # for _ in range(2): # net_actions = res_block(net_actions, action_size) # # # Merge state and action pathways # net = layers.concatenate([net_states, net_actions]) # # # Final blocks # for _ in range(3): # net = res_block(net, block_size) # Add hidden layer(s) for state pathway net_states = layers.Dense( units=300, kernel_regularizer=regularizers.l2(kernel_l2_reg))(states) net_states = layers.BatchNormalization()(net_states) net_states = layers.LeakyReLU(1e-2)(net_states) net_states = layers.Dense( units=400, kernel_regularizer=regularizers.l2(kernel_l2_reg))(net_states) net_states = layers.BatchNormalization()(net_states) net_states = layers.LeakyReLU(1e-2)(net_states) # Add hidden layer(s) for action pathway net_actions = layers.Dense( units=400, kernel_regularizer=regularizers.l2(kernel_l2_reg))(actions) net_actions = layers.BatchNormalization()(net_actions) net_actions = layers.LeakyReLU(1e-2)(net_actions) # Merge state and action pathways net = layers.add([net_states, net_actions]) net = layers.Dense( units=200, kernel_regularizer=regularizers.l2(kernel_l2_reg))(net) net = layers.BatchNormalization()(net) net = layers.LeakyReLU(1e-2)(net) # Add final output layer to prduce action values (Q values) Q_values = layers.Dense( units=1, activation=None, kernel_regularizer=regularizers.l2(kernel_l2_reg), kernel_initializer=initializers.RandomUniform(minval=-5e-3, maxval=5e-3), # bias_initializer=initializers.RandomUniform(minval=-3e-3, maxval=3e-3), name='q_values')(net) # Create Keras model self.model = models.Model(inputs=[states, actions], outputs=Q_values) # Define optimizer and compile model for training with built-in loss function optimizer = optimizers.Adam(lr=1e-2) self.model.compile(optimizer=optimizer, loss='mse') # Compute action gradients (derivative of Q values w.r.t. to actions) action_gradients = K.gradients(Q_values, actions) # Define an additional function to fetch action gradients (to be used by actor model) self.get_action_gradients = K.function( inputs=[*self.model.input, K.learning_phase()], outputs=action_gradients)
def build_model(self): kernel_l2_reg = 1e-5 """Build an actor (policy) network that maps states -> actions.""" # Define input layer (states) states = layers.Input(shape=(self.state_size, ), name='states') # size_repeat = 30 # block_size = size_repeat*self.state_size # print("Actor block size = {}".format(block_size)) # # net = layers.concatenate([states]*size_repeat) # # net = layers.Dense(block_size, # # # kernel_initializer=initializers.RandomNormal(mean=1.0, stddev=0.1), # # # bias_initializer=initializers.RandomNormal(mean=0.0, stddev=0.01), # # activation=None, # # use_bias=False)(states) # net = layers.BatchNormalization()(net) # net = layers.Dropout(0.2)(net) # # net = layers.LeakyReLU(1e-2)(net) # # for _ in range(5): # net = res_block(net, block_size) # Add hidden layers net = layers.Dense( units=300, kernel_regularizer=regularizers.l2(kernel_l2_reg))(states) net = layers.BatchNormalization()(net) net = layers.LeakyReLU(1e-2)(net) net = layers.Dense( units=400, kernel_regularizer=regularizers.l2(kernel_l2_reg))(net) net = layers.BatchNormalization()(net) net = layers.LeakyReLU(1e-2)(net) net = layers.Dense( units=200, kernel_regularizer=regularizers.l2(kernel_l2_reg))(net) net = layers.BatchNormalization()(net) net = layers.LeakyReLU(1e-2)(net) # Try different layer sizes, activations, add batch normalization, regularizers, etc. # # Add final output layer with sigmoid activation # raw_actions = layers.Dense(units=self.action_size, # activation='sigmoid', # # kernel_regularizer=regularizers.l2(kernel_l2_reg), # kernel_initializer=initializers.RandomUniform(minval=-3e-3, maxval=3e-3), # # bias_initializer=initializers.RandomUniform(minval=-3e-3, maxval=3e-3), # name='raw_actions')(net) # # # Scale [0, 1] output for each action dimension to proper range # actions = layers.Lambda(lambda x: (x * self.action_range) + self.action_low, name='actions')(raw_actions) actions = layers.Dense( units=self.action_size, activation='tanh', kernel_regularizer=regularizers.l2(kernel_l2_reg), kernel_initializer=initializers.RandomUniform(minval=-3e-3, maxval=3e-3), name='actions')(net) # Create Keras model self.model = models.Model(inputs=states, outputs=actions) # Define loss function using action value (Q value) gradients action_gradients = layers.Input(shape=(self.action_size, )) loss = K.mean(-action_gradients * actions) # Incorporate any additional losses here (e.g. from regularizers) # Define optimizer and training function optimizer = optimizers.Adam(lr=1e-4) updates_op = optimizer.get_updates(params=self.model.trainable_weights, loss=loss) self.train_fn = K.function( inputs=[self.model.input, action_gradients, K.learning_phase()], outputs=[], updates=updates_op)