def build(self, input_shape): if self.mode is None: self.kernel = self.add_weight(name='kernel', shape=(input_shape, ), initializer=initializers.Zeros(), trainable=True) elif self.mode == 'dense': num_miss = sum(self.miss_col) num_w = input_shape[1] - sum(self.miss_col) self.kernel = self.add_weight( name='kernel', shape=(num_w, num_miss), initializer=initializers.RandomUniform(), trainable=True) self.bias = self.add_weight( name='kernel', shape=(num_miss, ), initializer=initializers.RandomUniform(), trainable=True) else: raise NameError('There is no such mode') self.output_dim = input_shape super(missValueLayer, self).build(input_shape)
def get_embed(n_vocab=None, n_units=None, glove=False): if not glove: return Embedding(n_vocab, n_units, embeddings_initializer=initializers.RandomUniform( minval=-0.05, maxval=0.05, seed=0)) else: gf = h5py.File("data/glovePrepped.h5") gloveVecs_42 = np.array(gf['glove_common_42_vecs']) gloveSize = gloveVecs_42.shape[-1] vocabSize = gloveVecs_42.shape[0] glove_embed42 = (Embedding( vocabSize, gloveSize, weights=[gloveVecs_42], trainable=False, embeddings_initializer=initializers.RandomUniform(minval=-0.05, maxval=0.05, seed=0))) return glove_embed42
def __init__(self, state_size, action_size, action_low, action_high): """Initialize parameters and build model. Params ====== state_size (int): Dimension of each state action_size (int): Dimension of each action action_low (array): Min value of each action dimension action_high (array): Max value of each action dimension """ self.state_size = state_size self.action_size = action_size self.action_low = action_low self.action_high = action_high self.action_range = self.action_high - self.action_low # Initialize any other variables here self.initializer_h1 = initializers.RandomUniform(minval=-1 / sqrt(400), maxval=1 / sqrt(400)) self.initializer_h2 = initializers.RandomUniform(minval=-1 / sqrt(300), maxval=1 / sqrt(300)) self.initializer_final = initializers.RandomUniform(minval=-3e-3, maxval=3e-3) self.build_model()
def build(self, input_shape): self.Wh = self.add_weight(name='kernel', shape=(input_shape[0][2], input_shape[0][2]), initializer=initializers.RandomUniform( minval=-0.1, maxval=0.1), trainable=True) self.Wv = self.add_weight(name='vweight', shape=(input_shape[1][2], input_shape[1][2]), initializer=initializers.RandomUniform( minval=-0.1, maxval=0.1), trainable=True) self.w = self.add_weight( name='vweight', shape=(input_shape[0][2] + input_shape[1][2], 1), initializer=initializers.RandomUniform(minval=-0.1, maxval=0.1), trainable=True) if self.use_bias: self.bias = self.add_weight(name='bias', shape=(1, ), initializer=initializers.RandomUniform( minval=-0.1, maxval=0.1), trainable=True) else: self.bias = None super(AtaeAttention, self).build(input_shape)
def build_model(self): """Build an actor (policy) network that maps states -> actions.""" # Define input layer (states) states = layers.Input(shape=(self.state_size, ), name='states') # Add hidden layers net = layers.Dense(units=400, kernel_initializer=initializers.lecun_uniform(), bias_initializer=initializers.lecun_uniform(), kernel_regularizer=regularizers.l2(self.L2), bias_regularizer=regularizers.l1())(states) net = layers.BatchNormalization()(net) net = layers.Activation('relu')(net) net = layers.Dense(units=300, kernel_initializer=initializers.lecun_uniform(), bias_initializer=initializers.lecun_uniform(), kernel_regularizer=regularizers.l2(self.L2), bias_regularizer=regularizers.l1())(net) net = layers.BatchNormalization()(net) net = layers.Activation('relu')(net) # Try different layer sizes, activations, add batch normalization, regularizers, etc. # Add final output layer with sigmoid activation raw_actions = layers.Dense( units=self.action_size, activation='tanh', name='raw_actions', kernel_initializer=initializers.RandomUniform(-0.003, 0.003), bias_initializer=initializers.RandomUniform(-0.003, 0.003))(net) # Scale output for each action dimension to proper range actions = layers.Lambda(lambda x: (x * self.action_range) + self.action_low, name='actions')(raw_actions) # Create Keras model self.model = models.Model(inputs=states, outputs=actions) # Define loss function using action value (Q value) gradients action_gradients = layers.Input(shape=(self.action_size, )) loss = K.mean(-action_gradients * actions) # Incorporate any additional losses here (e.g. from regularizers) # Define optimizer and training function optimizer = optimizers.Adam(lr=self.lr) updates_op = optimizer.get_updates(params=self.model.trainable_weights, loss=loss) self.train_fn = K.function( inputs=[self.model.input, action_gradients, K.learning_phase()], outputs=[], updates=updates_op)
def build_model(self): '''build an actor policy network that maps state -> action''' # define input layers states = layers.Input(shape=(self.state_size,), name='states') '''try different layer size, regluarization, batch normalization, activation Reference: Continuous Control With Deep Reinforcement Learning(2016) === kernel (weight) regularization : L2 weight decay of 10^-2 activation : rectified non-linearity for all hidden layers (old) hidden layer : 2 hidden layers with 400 and 300 units respectively from paper (new) hidden layer: 3 hidden layers with size 64, 128, 64 respectively output layer : final output weights were initialized from uniform distribution of (-3e-3, 3e-3) and bias were initialized from uniform distribution of (3e-4, 3e-4) learning rate : 0.001 ''' # add hidden layers net = layers.Dense(units=64, kernel_regularizer=regularizers.l2(0.01))(states) net = layers.BatchNormalization()(net) net = layers.Activation('relu')(net) net = layers.Dense(units=128, kernel_regularizer=regularizers.l2(0.01))(net) net = layers.BatchNormalization()(net) net = layers.Activation('relu')(net) net = layers.Dense(units=64, kernel_regularizer=regularizers.l2(0.01))(net) net = layers.BatchNormalization()(net) net = layers.Activation('relu')(net) # add final output layer with sigmoid activation raw_actions = layers.Dense(units=self.action_size, activation='sigmoid',\ name='raw_actions',\ kernel_initializer=initializers.RandomUniform(minval=-3e-3, maxval=3e-3), bias_initializer=initializers.RandomUniform(minval=-3e-4, maxval=3e-4))(net) # scale [0,1] output for each action dimension to proper range actions = layers.Lambda(lambda x: (x*self.action_range) + self.action_low,\ name='actions')(raw_actions) # create keras model (to simplify code script via a model object) self.model = models.Model(inputs=states, outputs=actions) # define loss function using action value (Q-value) gradients action_gradients = layers.Input(shape=(self.action_size,)) loss = K.mean(-action_gradients*actions) # Policy Loss: L = (1/N)*log(𝝅(s)) * Q(s) # define optimizer and training function optimizer = optimizers.Adam(lr=0.0001) updates_op = optimizer.get_updates(params=self.model.trainable_weights, loss=loss) self.train_fn = K.function(inputs=[self.model.input, action_gradients, K.learning_phase()],\ outputs=[],\ updates=updates_op)
def build(self): def Kernel_layer(mu, sigma): def kernel(x): return K.tf.exp(-0.5 * (x - mu) * (x - mu) / sigma / sigma) return Activation(kernel) query = Input(name='query', shape=(self.config['text1_maxlen'], )) show_layer_info('Input', query) doc = Input(name='doc', shape=(self.config['text2_maxlen'], )) show_layer_info('Input', doc) embedding = Embedding(self.config['vocab_size'], self.config['embed_size'], weights=[self.config['embed']], trainable=self.config['train_embed']) q_embed = embedding(query) show_layer_info('Embedding', q_embed) d_embed = embedding(doc) show_layer_info('Embedding', d_embed) mm = Dot(axes=[2, 2], normalize=True)([q_embed, d_embed]) show_layer_info('Dot', mm) KM = [] for i in range(self.config['kernel_num']): mu = 1. / (self.config['kernel_num'] - 1) + (2. * i) / (self.config['kernel_num'] - 1) - 1.0 sigma = self.config['sigma'] if mu > 1.0: sigma = self.config['exact_sigma'] mu = 1.0 mm_exp = Kernel_layer(mu, sigma)(mm) show_layer_info('Exponent of mm:', mm_exp) mm_doc_sum = Lambda(lambda x: K.tf.reduce_sum(x, 2))(mm_exp) show_layer_info('Sum of document', mm_doc_sum) mm_log = Activation(K.tf.log1p)(mm_doc_sum) show_layer_info('Logarithm of sum', mm_log) mm_sum = Lambda(lambda x: K.tf.reduce_sum(x, 1))(mm_log) show_layer_info('Sum of all exponent', mm_sum) KM.append(mm_sum) Phi = Lambda(lambda x: K.tf.stack(x, 1))(KM) show_layer_info('Stack', Phi) if self.config['target_mode'] == 'classification': out_ = Dense(2, activation='softmax', kernel_initializer=initializers.RandomUniform( minval=-0.014, maxval=0.014), bias_initializer='zeros')(Phi) elif self.config['target_mode'] in ['regression', 'ranking']: out_ = Dense(1, kernel_initializer=initializers.RandomUniform( minval=-0.014, maxval=0.014), bias_initializer='zeros')(Phi) show_layer_info('Dense', out_) model = Model(inputs=[query, doc], outputs=[out_]) return model
def build_model(self): # Define input layer (states) states = layers.Input(shape=(self.state_size, ), name='states') # First hidden layer with batch normalization and dropout. net = layers.Dense(units=400)(states) net = layers.BatchNormalization()(net) net = layers.Activation('relu')(net) net = layers.Dropout(self.dropoutRate)(net) # Second hidden layer with batch normalization and dropout. net = layers.Dense(units=300)(net) net = layers.BatchNormalization()(net) net = layers.Activation('relu')(net) net = layers.Dropout(self.dropoutRate)(net) # Add final output layer with sigmoid activation net = layers.Dense(units=self.action_size, kernel_initializer=initializers.RandomUniform( minval=-self.initLimit, maxval=self.initLimit, seed=0), bias_initializer=initializers.RandomUniform( minval=0, maxval=self.initLimit, seed=0))(net) net = layers.BatchNormalization()(net) raw_actions = layers.Activation('tanh', name='raw_actions')(net) # Scale [0, 1] output for each action dimension to proper range actions = layers.Lambda(lambda x: (( (x + 1) / 2) * self.action_range) + self.action_low, name='actions')(raw_actions) # Create Keras model self.model = models.Model(inputs=states, outputs=actions) # Save model to image. plot_model(self.model, to_file='actor.png', show_shapes=True) # Define loss function using action value (Q value) gradients action_gradients = layers.Input(shape=(self.action_size, )) loss = K.mean(-action_gradients * actions) # Incorporate any additional losses here (e.g. from regularizers) # Define optimizer and training function optimizer = optimizers.Adam(lr=self.learningRate) updates_op = optimizer.get_updates(params=self.model.trainable_weights, loss=loss) self.train_fn = K.function( inputs=[self.model.input, action_gradients, K.learning_phase()], outputs=[], updates=updates_op)
def create_actor(in_shape, n_actions=1): # custom_objects=None ''' actor = Sequential() # actor.add(Input(shape=in_shape)) initializer = initializers.TruncatedNormal(mean=0.0, stddev=0.01) actor.add(BatchNormalization(input_shape=in_shape, axis=-1)) actor.add(Dense(200, kernel_initializer=initializer, bias_initializer="zeros", name='L1')) actor.add(BatchNormalization(axis=-1, name='L2')) actor.add(Activation("relu", name='L3')) actor.add(Dense(200, kernel_initializer=initializer, bias_initializer="zeros", name='L4')) actor.add(BatchNormalization(axis=-1, name='L5')) actor.add(Activation("relu", name='L6')) actor.add(Dense(80, kernel_initializer=initializer, bias_initializer="zeros", name='L7')) actor.add(BatchNormalization(axis=-1, name='L8')) actor.add(Activation("relu", name='L9')) actor.add(Dense(n_actions, activation="sigmoid", kernel_initializer=initializer, bias_initializer="zeros", name='L10')) # actor.summary() return actor, actor.input, actor.trainable_weights ''' actor = Sequential() # actor.add(Input(shape=in_shape)) #initializer = initializers.TruncatedNormal(mean=0.0, stddev=0.01) weight_initializer = initializers.RandomUniform(minval=-0.003, maxval=0.003, seed=None) bias_initializer = initializers.RandomUniform(minval=-0.0003, maxval=0.0003, seed=None) #actor.add(BatchNormalization(input_shape=in_shape, axis=1)) actor.add( Dense(32, input_shape=in_shape, kernel_initializer=weight_initializer, bias_initializer=bias_initializer, name='L1')) #actor.add(BatchNormalization(axis=-1, name='L2')) actor.add(Activation("elu", name='L3')) actor.add( Dense(64, kernel_initializer=weight_initializer, bias_initializer=bias_initializer, name='L4')) #actor.add(BatchNormalization(axis=-1, name='L5')) actor.add(Activation("elu", name='L6')) actor.add( Dense(n_actions, activation="tanh", kernel_initializer=weight_initializer, bias_initializer=bias_initializer, name='L10')) actor.summary() return actor, actor.input, actor.trainable_weights
def build_model(self, lr_critic): """ Build a critic (value) network that maps (state, action) pairs -> Q-values. """ # Define input layers states = layers.Input(shape=(self.state_size, ), name='states') actions = layers.Input(shape=(self.action_size, ), name='actions') # net = layers.BatchNormalization()(states) # Add hidden layer(s) for state pathway # net = layers.Dense(units=400, \ # activation='relu', \ # kernel_initializer=initializers.VarianceScaling(scale=1.0/3, mode='fan_in', distribution='uniform'), \ # bias_initializer=initializers.VarianceScaling(scale=1.0/3, mode='fan_in', distribution='uniform'), \ # kernel_regularizer=regularizers.l2(1e-2))(states) net = layers.Dense(units=400, activation='relu')(states) # net = layers.Add()([net, actions]) net = layers.Concatenate()([net, actions]) # net = layers.Dense(units=300, \ # activation='relu', \ # kernel_initializer=initializers.VarianceScaling(scale=1.0/3, mode='fan_in', distribution='uniform'), \ # bias_initializer=initializers.VarianceScaling(scale=1.0/3, mode='fan_in', distribution='uniform'), \ # kernel_regularizer=regularizers.l2(1e-2))(net) net = layers.Dense(units=300, activation='relu')(net) # Add final output layer to produce action values (Q values) # Q_values = layers.Dense(units=1, name='q_values', kernel_initializer=initializers.RandomUniform(minval=-3e-3, maxval=3e-3), \ # bias_initializer=initializers.RandomUniform(minval=-3e-3, maxval=3e-3), \ # kernel_regularizer=regularizers.l2(1e-2))(net) Q_values = layers.Dense(units=1, name='q_values', kernel_initializer=initializers.RandomUniform(minval=-3e-3, maxval=3e-3), \ bias_initializer=initializers.RandomUniform(minval=-3e-3, maxval=3e-3))(net) # Create Keras model self.model = models.Model(inputs=[states, actions], outputs=Q_values) # Define optimizer and compile model for training with # built-in loss function optimizer = optimizers.Adam(lr=lr_critic) self.model.compile(optimizer=optimizer, loss='mse') # Compute action gradients (derivative of Q values w.r.t. to actions) action_gradients = K.gradients(Q_values, actions) # Define an additional function to fetch action gradients (to be used by actor model) self.get_action_gradients = K.function(inputs=[ self.model.input[0], self.model.input[1], K.learning_phase() ], outputs=action_gradients)
def build_model(self, lr_actor): """ Build an actor (policy) network that maps states -> actions. """ # Define input layer (states) states = layers.Input(shape=(self.state_size, ), name='states') # net = layers.BatchNormalization()(states) # net = layers.Dense(units=400, \ # activation='relu', \ # kernel_initializer=initializers.VarianceScaling(scale=1.0/3, mode='fan_in', distribution='uniform'), \ # bias_initializer=initializers.VarianceScaling(scale=1.0/3, mode='fan_in', distribution='uniform'))(states) net = layers.Dense(units=400, activation='relu')(states) # net = layers.BatchNormalization()(net) # net = layers.Dense(units=300, \ # activation='relu', \ # kernel_initializer=initializers.VarianceScaling(scale=1.0/3, mode='fan_in', distribution='uniform'), \ # bias_initializer=initializers.VarianceScaling(scale=1.0/3, mode='fan_in', distribution='uniform'))(net) net = layers.Dense(units=300, activation='relu')(net) # net = layers.BatchNormalization()(net) # final output layer actions = layers.Dense(units=self.action_size, activation='tanh', name='raw_actions', \ kernel_initializer=initializers.RandomUniform(minval=-3e-3, maxval=3e-3), \ bias_initializer=initializers.RandomUniform(minval=-3e-3, maxval=3e-3))(net) # Create Keras model self.model = models.Model(inputs=states, outputs=actions) # Define loss function using action value (Q value) gradients action_gradients = layers.Input(shape=(self.action_size, )) loss = K.mean(-action_gradients * actions) # output1 = [layer.output for layer in self.model.layers] # print_func = K.function([self.model.input, K.learning_phase()],output1) # layer_outputs = print_func(inputs=[states, 1.]) # print("hiyyyy",self.model.layers[1].output) # Define optimizer and training function optimizer = optimizers.Adam(lr=lr_actor) updates_op = optimizer.get_updates(params=self.model.trainable_weights, loss=loss) self.train_fn = K.function( inputs=[self.model.input, action_gradients, K.learning_phase()], outputs=[], updates=updates_op)
def build(self, input_shape): self.kernel = self.add_weight(name='kernel', shape=(input_shape[2], 1), initializer=initializers.RandomUniform( minval=-0.1, maxval=0.1), trainable=True) if self.use_bias: self.bias = self.add_weight(name='bias', shape=(1, ), initializer=initializers.RandomUniform( minval=-0.1, maxval=0.1), trainable=True) else: self.bias = None super(LocationAttentionLayer, self).build(input_shape)
def build_model(self): """Build a critic (value) network that maps (state, action) pairs -> Q-values.""" #LSTM Architecture # Define input layers states = Input((self.state_size, 1), name='states') actions = Input((self.action_size, 1), name='actions') net_states = LSTM(16, kernel_regularizer=regularizers.l2(1e-6), return_sequences=True)(states) net_states = layers.BatchNormalization()(net_states) net_states = layers.Activation("relu")(net_states) net_states = LSTM(32, kernel_regularizer=regularizers.l2(1e-6), return_sequences=True)(net_states) net_actions = LSTM(32, kernel_regularizer=regularizers.l2(1e-6))(actions) net = layers.Add()([net_states, net_actions]) net = layers.Activation('relu')(net) Q_values = layers.Dense(units=1, name='q_values', kernel_initializer=initializers.RandomUniform( minval=-0.003, maxval=0.003))(net) self.model = models.Model(inputs=[states, actions], outputs=Q_values) optimizer = optimizers.Adam(lr=0.001) self.model.compile(optimizer=optimizer, loss='mse') action_gradients = K.gradients(Q_values, actions) self.get_action_gradients = K.function( inputs=[*self.model.input, K.learning_phase()], outputs=action_gradients)
def __init__(self, output_dim,regularizer=None, constraint=None, **kwargs): self.output_dim = output_dim self.combine_weights_seed = np.random.randint(2**32-1)#Seed must be between 0 and 2**32 - 1 self.initializer=initializers.RandomUniform(minval=0, maxval=1, seed=self.combine_weights_seed)#initializer self.regularizer=regularizer self.constraint=constraint super(SmartInput, self).__init__(**kwargs)
def build(self, input_shape): # Create a trainable weight variable for this layer. #print('input_shape[3]', input_shape[3]) #print('self.output_dim)', self.output_dim) self.kernel = self.add_weight(name='kernel_smart',shape=self.kernel_size + (input_shape[3], self.filters), initializer=self.kernel_initializer, constraint=self.kernel_constraint, regularizer=self.kernel_regularizer, trainable=True) self.channel_selector = self.add_weight(shape=(self.filters,), initializer=initializers.RandomUniform(minval=0, maxval=self.filters, seed=self.channel_selector_seed), name='selector', regularizer=None, constraint=NonNeg()) if self.use_bias: self.bias = self.add_weight(shape=(self.filters,), initializer=self.bias_initializer, name='bias', regularizer=self.bias_regularizer, constraint=self.bias_constraint) super(SmartConv2D, self).build(input_shape) # Be sure to call this somewhere!
def build_dense_layer(self, input, n, init_offset, activation=None, use_batch_norm=False, use_dropout=False, dropout_rate=0.2): """Helper method for building fully connected layers with bells and whistles.""" # Initialize weights init = initializers.RandomUniform(-init_offset, init_offset) # Bias not needed when using batch norm if use_batch_norm: x = layers.Dense(units=n, kernel_initializer=init, use_bias=False)(input) output = layers.BatchNormalization()(x) else: output = layers.Dense(units=n, kernel_initializer=init, bias_initializer=init)(input) # Activation if activation is not None: output = layers.Activation(activation)(output) # Dropout if use_dropout: output = layers.Dropout(dropout_rate)(output) return output
def __init__(self, kernel_size=(3, 3), strides=(1, 1), padding='same', depth_multiplier=1, data_format=None, activation=None, use_bias=True, depthwise_initializer=initializers.RandomUniform(minval=0.5, maxval=5., seed=None), bias_initializer='zeros', depthwise_regularizer=None, bias_regularizer=None, activity_regularizer=None, depthwise_constraint=None, bias_constraint=None, **kwargs): super(Gaussian_filter, self).__init__(filters=None, kernel_size=kernel_size, strides=strides, padding=padding, data_format=data_format, activation=activation, use_bias=use_bias, bias_regularizer=bias_regularizer, activity_regularizer=activity_regularizer, bias_constraint=bias_constraint, **kwargs) self.depth_multiplier = depth_multiplier self.depthwise_initializer = initializers.get(depthwise_initializer) self.depthwise_regularizer = regularizers.get(depthwise_regularizer) self.depthwise_constraint = constraints.get(depthwise_constraint) self.bias_initializer = initializers.get(bias_initializer)
def _build_model(self): states = layers.Input(shape=(self.state_size, ), name='states') # create all layers for model. first an up to 128 node layers start layer_x = layers.Dense(units=32, activation=None)(states) layer_x = layers.Dense(units=64, activation=None)(layer_x) # fully connected 3 layers sub system layer_x = layers.Dense(units=128, activation=None)(layer_x) layer_x = layers.Dense(units=128, activation=None)(layer_x) layer_x = layers.Dense(units=128, activation=None)(layer_x) # down sample to output subsytem layer_x = layers.Dense(units=64, activation=None)(layer_x) layer_x = layers.Dense(units=32, activation=None)(layer_x) # output layer layer_x = layers.Activation('relu')(layer_x) # Add final output layer with sigmoid activation w_init = initializers.RandomUniform(minval=-0.001, maxval=0.001) raw_actions = layers.Dense(units=self.action_size, activation='sigmoid', name='raw_actions', kernel_initializer=w_init)(layer_x) # scale the action output by dimensions scaled_actions = layers.Lambda( lambda x: (x * self.action_range) + self.action_low, name='actions')(raw_actions) return models.Model(inputs=states, outputs=scaled_actions), scaled_actions
def build(self, input_shape=None): self.input_spec = InputSpec(shape=input_shape) if not self.layer.built: self.layer.build(input_shape) self.layer.built = True super(ConcreteDropout, self).build() # this is very weird.. we must call super before we add new losses # initialise p self.p_logit = self.layer.add_weight(name='p_logit', shape=(1,), initializer=initializers.RandomUniform(self.init_min, self.init_max), trainable=True) self.p = K.sigmoid(self.p_logit[0]) # initialise regulariser / prior KL term assert len(input_shape) == 2, 'this wrapper only supports Dense layers' input_dim = np.prod(input_shape[-1]) # we drop only last dim weight = self.layer.kernel kernel_regularizer = self.weight_regularizer * K.sum(K.square(weight)) / (1. - self.p) dropout_regularizer = self.p * K.log(self.p) dropout_regularizer += (1. - self.p) * K.log(1. - self.p) dropout_regularizer *= self.dropout_regularizer * input_dim regularizer = K.sum(kernel_regularizer + dropout_regularizer) self.layer.add_loss(regularizer)
def build(self, input_shape): self.input_dim = input_shape[-1] #See section 3.2 of Fortunato et al. sqr_inputs = self.input_dim**(1 / 2) self.sigma_initializer = initializers.Constant(value=.5 / sqr_inputs) self.mu_initializer = initializers.RandomUniform( minval=(-1 / sqr_inputs), maxval=(1 / sqr_inputs)) self.mu_weight = self.add_weight(shape=(self.input_dim, self.units), initializer=self.mu_initializer, name='mu_weights', constraint=self.kernel_constraint, regularizer=self.kernel_regularizer) self.sigma_weight = self.add_weight( shape=(self.input_dim, self.units), initializer=self.sigma_initializer, name='sigma_weights', constraint=self.kernel_constraint, regularizer=self.kernel_regularizer) self.mu_bias = self.add_weight(shape=(self.units, ), initializer=self.mu_initializer, name='mu_bias', constraint=self.bias_constraint, regularizer=self.bias_regularizer) self.sigma_bias = self.add_weight(shape=(self.units, ), initializer=self.sigma_initializer, name='sigma_bias', constraint=self.bias_constraint, regularizer=self.bias_regularizer) super(NoisyNetDense, self).build(input_shape=input_shape)
def compile_model(self, learning_rate): i_input = layers.Input(shape=(1, ), dtype='int32') ij_input = layers.Input(shape=(self.n_pairs, ), dtype='int32') self.W = layers.Embedding( self.n, self.k, embeddings_constraint=constraints.NonNeg(), embeddings_initializer=initializers.RandomUniform(minval=0, maxval=1), embeddings_regularizer=regularizers.l1(1e-3)) squeeze_layer = layers.Lambda(lambda x: backend.squeeze(x, axis=1)) w_i = squeeze_layer(self.W(i_input)) w_j = self.W(ij_input) predicted_ij = PredictedIJ(self.k, name='predicted_ij')([w_i, w_j]) self.keras_model = models.Model( inputs=[i_input, ij_input], outputs=predicted_ij, ) self.keras_model.compile(optimizers.Adam(lr=learning_rate), loss='mse', sample_weight_mode='temporal')
def build_controller(self): """ Builds controller computational graph """ with tf.variable_scope("Controller"): input_layer = layers.Input(shape = INPUT_SHAPE) initializer = initializers.RandomUniform(minval=-0.1, maxval=0.1, seed=None) input_layers = [input_layer] hidden_layers = [] output_softmaxes = [] for i in range(N_SUBPOL): hidden_layers.append(layers.CuDNNLSTM(units = N_UNITS, kernel_initializer = initializer)(input_layers[-1])) output_layer = [] for j in range(N_OPS): name = "subpol_{}_operation_{}".format(i + 1, j + 1) output_layer.extend([ layers.Dense(N_TYPES, activation ='softmax', name = name + '_type', kernel_initializer = initializer)(hidden_layers[-1]), layers.Dense(N_PROBS, activation ='softmax', name = name + '_prob', kernel_initializer = initializer)(hidden_layers[-1]), layers.Dense(N_MAG, activation ='softmax', name = name + '_magn', kernel_initializer = initializer)(hidden_layers[-1]) ]) output_softmaxes.append(output_layer) input_layers.append(layers.Lambda(expand_dims)(layers.Concatenate()(output_layer))) output_list = [item for sublist in output_softmaxes for item in sublist] model = models.Model(input_layer, output_list) exists = os.path.isfile(os.path.join(LOG_DIR, "controller_model", "model.json")) if not exists: model_json = model.to_json() # Converts model to JSON with open(os.path.join(LOG_DIR, "controller_model", "model.json"), "w") as json_file: json_file.write(model_json) # Write to file return model
def get_kernel_init(type, param=None, seed=None): kernel_init = None if type == 'glorot_uniform': kernel_init = initializers.glorot_uniform(seed=seed) elif type == 'VarianceScaling': kernel_init = initializers.VarianceScaling(seed=seed) elif type == 'RandomNormal': if param is None: param = 0.04 kernel_init = initializers.RandomNormal(mean=0.0, stddev=param, seed=seed) elif type == 'TruncatedNormal': if param is None: param = 0.045 # Best for non-normalized coordinates # param = 0.09 # "Best" for normalized coordinates kernel_init = initializers.TruncatedNormal(mean=0.0, stddev=param, seed=seed) elif type == 'RandomUniform': if param is None: param = 0.055 # Best for non-normalized coordinates # param = ?? # "Best" for normalized coordinates kernel_init = initializers.RandomUniform(minval=-param, maxval=param, seed=seed) return kernel_init
def create_model(): model = Sequential() #model.add(Conv3D(4, kernel_size=(3, 3, 1), activation='relu', padding='same', input_shape=(30,480,640,3))) model.add( Conv3D(4, kernel_size=(3, 3, 1), activation='relu', padding='same', input_shape=(30, 240, 320, 3), kernel_initializer=initializers.RandomUniform(minval=-0.05, maxval=0.05, seed=None))) model.add(Conv3D(8, kernel_size=(3, 3, 1), activation='relu')) model.add(MaxPooling3D(pool_size=(2, 2, 1))) model.add(Dropout(0.25)) model.add(Conv3D(16, kernel_size=(3, 3, 1), activation='relu')) model.add(Conv3D(32, kernel_size=(3, 3, 1), activation='relu')) model.add(MaxPooling3D(pool_size=(2, 2, 1))) model.add(Dropout(0.25)) model.add(Conv3D(64, kernel_size=(3, 3, 1), activation='relu')) model.add(MaxPooling3D(pool_size=(2, 2, 1))) model.add(Flatten()) model.add(Dense(256, activation='relu')) model.add(Dense(1, activation='sigmoid')) return model
def build(self, input_shape): if self.data_format == 'channels_first': channel_axis = 1 else: channel_axis = -1 if input_shape[channel_axis] is None: raise ValueError('The channel dimension of the inputs ' 'should be defined. Found `None`.') input_dim = input_shape[channel_axis] kernel_shape = self.kernel_size + (input_dim, self.filters) base = self.kernel_size[0] * self.kernel_size[1] if self.H == 'Glorot': nb_input = int(input_dim * base) nb_output = int(self.filters * base) self.H = np.float32(np.sqrt(1.5 / (nb_input + nb_output))) if self.kernel_lr_multiplier == 'Glorot': nb_input = int(input_dim * base) nb_output = int(self.filters * base) self.kernel_lr_multiplier = np.float32(1. / np.sqrt(1.5/ (nb_input + nb_output))) self.depthwise_constraint = Clip(-self.H, self.H) self.depthwise_initializer = initializers.RandomUniform(-self.H, self.H) depthwise_kernel_shape = (input_dim, self.depth_multiplier) depthwise_kernel_shape = self.kernel_size + depthwise_kernel_shape pointwise_kernel_shape = (self.depth_multiplier * input_dim, self.filters) pointwise_kernel_shape = (1,) * 2 + pointwise_kernel_shape self.depthwise_kernel = self.add_weight( shape=depthwise_kernel_shape, initializer=self.depthwise_initializer, name='depthwise_kernel', regularizer=self.depthwise_regularizer, constraint=self.depthwise_constraint) self.pointwise_kernel = self.add_weight( shape=pointwise_kernel_shape, initializer=self.pointwise_initializer, name='pointwise_kernel', regularizer=self.pointwise_regularizer, constraint=self.pointwise_constraint) if self.use_bias: self.lr_multipliers = [self.kernel_lr_multiplier, self.bias_lr_multiplier] self.bias = self.add_weight((self.output_dim,), initializer=self.bias_initializers, name='bias', regularizer=self.bias_regularizer, constraint=self.bias_constraint) else: self.lr_multipliers = [self.kernel_lr_multiplier] self.bias = None # Set input spec. self.input_spec = InputSpec(ndim=4, axes={channel_axis: input_dim}) self.built = True
def _create_actor_model(self): """ Actor model corresponds to a policy that maps from currentState to action. """ # Tested model one # stateInput = Input(shape=self.observation_space.shape) # h1 = Dense(48, activation = 'relu')(stateInput) # h2 = Dense(32, activation = 'relu')(h1) # h3 = Dense(48, activation = 'relu')(h2) # actionOutput = Dense(self.action_space.shape[0], activation = 'sigmoid')(h3) # Tested model two stateInput = Input(shape=self.observation_space.shape) stateInputNorm = BatchNormalization()(stateInput) h1 = Dense(400, activation='relu')(stateInputNorm) h1Norm = BatchNormalization()(h1) h2 = Dense(300, activation='relu')(h1Norm) actionOutput = Dense(self.action_space.shape[0], activation='tanh', kernel_initializer=initializers.RandomUniform( minval=-0.003, maxval=0.003))(h2) # This is very important scaled_actionOutput = Lambda(lambda x: x * self.action_space.high)( actionOutput) model = Model(inputs=stateInput, outputs=scaled_actionOutput) adam = Adam(lr=0.001) model.compile(optimizer=adam, loss='mse') plot_model(model, to_file='actor_model.png', show_shapes=True, show_layer_names=True) return stateInput, model
def create_model(self): # Implementation note: Keras requires an input. I create an input and then feed # zeros to the network. Ugly, but it's the same as disabling those weights. # Furthermore, Keras LSTM input=output, so we cannot produce more than SUBPOLICIES # outputs. This is not desirable, since the paper produces 25 subpolicies in the # end. input_layer = layers.Input(shape=(SUBPOLICIES, 1)) init = initializers.RandomUniform(-0.1, 0.1) lstm_layer = layers.LSTM(LSTM_UNITS, recurrent_initializer=init, return_sequences=True, name='controller')(input_layer) outputs = [] for i in range(SUBPOLICY_OPS): name = 'op%d-' % (i + 1) outputs += [ layers.Dense(OP_TYPES, activation='softmax', name=name + 't')(lstm_layer), layers.Dense(OP_PROBS, activation='softmax', name=name + 'p')(lstm_layer), layers.Dense(OP_MAGNITUDES, activation='softmax', name=name + 'm')(lstm_layer), ] return models.Model(input_layer, outputs)
def lstmmodel(input_nodes, lstm_node, hidden_layer, output_nodes, lr=0.001, pca_variance=None, std_pca=None, dropout=True): # definition model = Sequential() xavier = initializers.glorot_normal(seed=9001) randomUniform = initializers.RandomUniform(0, 1, seed=9001) randomNormal = initializers.random_normal(stddev=0.01, seed=9001) model.add( LSTM( lstm_node, input_shape=input_nodes, kernel_initializer=randomNormal, recurrent_initializer=randomNormal, unit_forget_bias=True, )) model.add(Dense(hidden_layer, kernel_initializer=xavier, activation='tanh')) if dropout: model.add(Dropout(0.3)) model.add(Dense(output_nodes, kernel_initializer=xavier)) # compile adam = optimizers.adam(lr=lr) model.compile(loss=getPcaStdLoss(pca_variance, std_pca), optimizer=adam) return model
def build(self, input_shape): assert len(input_shape) >= 2 input_dim = input_shape[1] if self.H == 'Glorot': self.H = np.float32(np.sqrt(1.5 / (input_dim + self.units))) #print('Glorot H: {}'.format(self.H)) if self.kernel_lr_multiplier == 'Glorot': self.kernel_lr_multiplier = np.float32(1. / np.sqrt(1.5 / (input_dim + self.units))) #print('Glorot learning rate multiplier: {}'.format(self.lr_multiplier)) self.kernel_constraint = Clip(-self.H, self.H) self.kernel_initializer = initializers.RandomUniform(-self.H, self.H) self.kernel = self.add_weight(shape=(input_dim, self.units), initializer=self.kernel_initializer, name='kernel', regularizer=self.kernel_regularizer, constraint=self.kernel_constraint) if self.use_bias: self.lr_multipliers = [self.kernel_lr_multiplier, self.bias_lr_multiplier] self.bias = self.add_weight(shape=(self.output_dim,), initializer=self.bias_initializer, name='bias', regularizer=self.bias_regularizer, constraint=self.bias_constraint) else: self.lr_multipliers = [self.kernel_lr_multiplier] self.bias = None self.built = True
def build_initializer(type, kerasDefaults, seed=None, constant=0.): if type == 'constant': return initializers.Constant(value=constant) elif type == 'uniform': return initializers.RandomUniform( minval=kerasDefaults['minval_uniform'], maxval=kerasDefaults['maxval_uniform'], seed=seed) elif type == 'normal': return initializers.RandomNormal(mean=kerasDefaults['mean_normal'], stddev=kerasDefaults['stddev_normal'], seed=seed) # Not generally available # elif type == 'glorot_normal': # return initializers.glorot_normal(seed=seed) elif type == 'glorot_uniform': return initializers.glorot_uniform(seed=seed) elif type == 'lecun_uniform': return initializers.lecun_uniform(seed=seed) elif type == 'lecun_normal': return initializers.lecun_normal(seed=seed) elif type == 'he_normal': return initializers.he_normal(seed=seed)