def _create_mlp(): global seed, input_dim, hidden_1_dim, hidden_2_dim, output_dim # Uniform initializer for weight and bias alpha = 1. / np.sqrt(hidden_1_dim) initializer_1 = initializers.RandomUniform(minval=-alpha, maxval=alpha, seed=seed) alpha= 1. / np.sqrt(hidden_2_dim) initializer_2 = initializers.RandomUniform(minval=-alpha, maxval=alpha, seed=seed) alpha= 1. / np.sqrt(output_dim) initializer_3 = initializers.RandomUniform(minval=-alpha, maxval=alpha, seed=seed) model = keras.Sequential( [ keras.Input(shape=input_dim), layers.Dense(hidden_1_dim, use_bias=True, activation="relu", kernel_initializer=initializer_1, bias_initializer='zeros'), layers.Dense(hidden_2_dim, use_bias=True, activation="relu", kernel_initializer=initializer_2, bias_initializer='zeros'), layers.Dense(output_dim, use_bias=True, activation="linear", kernel_initializer=initializer_3, bias_initializer='zeros') ] ) model.compile(loss='mean_absolute_error', optimizer=keras.optimizers.SGD(learning_rate=learning_rate, momentum=momentum)) return model
def __init__(self, state_dim, action_dim, action_bound, name='actor'): super(ActorNetwork, self).__init__() self.dimensions1 = state_dim self.dimensions2 = 1080 self.dimensions3 = 400 self.dimensions4 = 128 self.num_actions = action_bound self.filepath = "/home/asohail2/ros/src/rl_navigation/src/models/" self.model_name = name self.checkpoint_file = os.path.join(self.filepath, self.model_name+'_ddpg_saved_model') self.full_con1 = Dense(self.dimensions1) self.full_con1_bn = BatchNormalization(scale=True , center=True , epsilon=1e-5) self.full_con1 = Activation("relu") self.full_con2 = Dense(self.dimensions2) self.full_con2_bn = BatchNormalization(scale=True , center=True , epsilon=1e-5) self.full_con2 = Activation("relu") self.full_con3 = Dense(self.dimensions3) self.full_con3_bn = BatchNormalization(scale=True , center=True , epsilon=1e-5) self.full_con3 = Activation("relu") self.full_con4 = Dense(self.dimensions4) self.full_con4_bn = BatchNormalization(scale=True , center=True , epsilon=1e-5) self.full_con4 = Activation("relu") self.speed = Dense(1, activation="tanh", bias_initializer=initializers.RandomUniform(-0.003, 0.003), kernel_initializer=initializers.RandomUniform(-0.003, 0.003)) self.angular = Dense(1, activation="tanh", bias_initializer=initializers.RandomUniform(-0.003, 0.003), kernel_initializer=initializers.RandomUniform(-0.003, 0.003))
def create_mlp(): global seed, input_dim, hidden_1_dim, hidden_2_dim, output_dim # Uniform initializer for weight and bias alpha = 1. / np.sqrt(hidden_1_dim) initializer_1 = initializers.RandomUniform(minval=-alpha, maxval=alpha, seed=seed) alpha= 1. / np.sqrt(hidden_2_dim) initializer_2 = initializers.RandomUniform(minval=-alpha, maxval=alpha, seed=seed) alpha= 1. / np.sqrt(output_dim) initializer_3 = initializers.RandomUniform(minval=-alpha, maxval=alpha, seed=seed) model = keras.Sequential( [ keras.Input(shape=input_dim), layers.Dense(hidden_1_dim, use_bias=True, activation="relu", kernel_initializer=initializer_1, bias_initializer='zeros'), layers.Dense(hidden_2_dim, use_bias=True, activation="relu", kernel_initializer=initializer_2, bias_initializer='zeros'), layers.Dense(output_dim, use_bias=True, activation="softmax", kernel_initializer=initializer_3, bias_initializer='zeros') ] ) model.compile(loss='sparse_categorical_crossentropy', optimizer=keras.optimizers.SGD(learning_rate=learning_rate, momentum=momentum), metrics=['accuracy', 'sparse_categorical_crossentropy']) return model
def __init__(self, n_states, n_actions, hidden_neurons_1, hidden_neurons_2, network_name, save_dir='tmp/SAC', init_value=3e-3): super(CriticNetwork, self).__init__() self.network_name = network_name self.checkpoint_dir = save_dir if not os.path.exists(self.checkpoint_dir): os.makedirs(self.checkpoint_dir) self.checkpoint_file = os.path.join(self.checkpoint_dir, network_name + '_SAC') self.fc1 = Dense(units=hidden_neurons_1, activation='relu', input_shape=(n_states + n_actions, )) self.fc2 = Dense(units=hidden_neurons_2, activation='relu') self.q = Dense( units=1, kernel_initializer=initializers.RandomUniform( minval=-init_value, maxval=init_value), # Änderung bias_initializer=initializers.RandomUniform(minval=-init_value, maxval=init_value))
def build_model( hparams ): input_layer = Input(shape=(hparams["max_sequence_length"], )) embedding_layer_static = get_w2v('').get_keras_embedding(train_embeddings=False)(input_layer) embedding_layer = get_w2v('').get_keras_embedding(train_embeddings=True)(input_layer) submodels = [] kernel_sizes = hparams['kernel_sizes'].split('-') for ks in kernel_sizes: model = Sequential() conv_1_d = Conv1D( activation = 'relu', filters = hparams["filters"], kernel_size = int(ks), kernel_constraint = max_norm(hparams["max_norm_value"]) ) conv_layer_static = conv_1_d(embedding_layer_static) conv_layer = conv_1_d(embedding_layer) max_pooling_static = GlobalMaxPooling1D()(conv_layer_static) max_pooling = GlobalMaxPooling1D()(conv_layer) concatenate_layer = Concatenate()([max_pooling_static, max_pooling]) submodels.append(concatenate_layer) concat = Concatenate()(submodels) dropout_layer_1 = Dropout(hparams['dropout_ratio'])(concat) hidden_layer = Dense( hparams['hidden_size'], activation = 'relu', kernel_initializer = initializers.RandomUniform( minval = - 1 / np.sqrt(len(kernel_sizes) * 2* hparams['filters']), maxval = 1 / np.sqrt(len(kernel_sizes) * 2 * hparams['filters']) ), bias_initializer = initializers.Zeros(), kernel_regularizer = regularizers.l2(hparams['l2_regularization']) )(dropout_layer_1) dropout_layer_2 = Dropout(hparams['dropout_ratio'])(hidden_layer) output_layer = Dense( 2, activation = 'sigmoid', kernel_initializer = initializers.RandomUniform( minval = - 1 / np.sqrt(hparams['hidden_size']), maxval = 1 / np.sqrt(hparams['hidden_size']) ), bias_initializer = initializers.Zeros(), kernel_regularizer = regularizers.l2(hparams['l2_regularization']) )(dropout_layer_2) model = Model(inputs=[input_layer], outputs=[output_layer]) model.compile( loss = dice_loss, optimizer = Adam(learning_rate = hparams["learning_rate"]), metrics = [f1_score] ) from keras.utils.vis_utils import plot_model plot_model(model, "model_cnn_multichannel.png", show_layer_names=False) return model
def build_model( hparams ): if hparams['word_embedding'] == 'w2v': input_layer = Input(shape=(hparams['max_sequence_length'], )) embedding_layer = get_w2v('').get_keras_embedding(train_embeddings=hparams['train_embeddings'])(input_layer) if hparams['word_embedding'] == 'elmo': input_layer = Input(shape=(hparams['max_sequence_length'], 1024, )) embedding_layer = input_layer submodels = [] kernel_sizes = hparams['kernel_sizes'].split('-') for ks in kernel_sizes: model = Sequential() conv_layer = Conv1D( activation = 'relu', filters = hparams['filters'], kernel_size = int(ks), kernel_constraint = max_norm(hparams['max_norm_value']) )(embedding_layer) max_pooling = GlobalMaxPooling1D()(conv_layer) submodels.append(max_pooling) concat = Concatenate()(submodels) dropout_layer_1 = Dropout(hparams['dropout_ratio'])(concat) hidden_layer = Dense( hparams['hidden_size'], activation = 'relu', kernel_initializer = initializers.RandomUniform( minval = - 1 / np.sqrt(len(kernel_sizes) * hparams['filters']), maxval = 1 / np.sqrt(len(kernel_sizes) * hparams['filters']) ), bias_initializer = initializers.Zeros(), kernel_regularizer = regularizers.l2(hparams['l2_regularization']) )(dropout_layer_1) dropout_layer_2 = Dropout(hparams['dropout_ratio'])(concat) output_layer = Dense( 2, activation = 'sigmoid', kernel_initializer = initializers.RandomUniform( minval = - 1 / np.sqrt(hparams['hidden_size']), maxval = 1 / np.sqrt(hparams['hidden_size']) ), bias_initializer = initializers.Zeros(), kernel_regularizer = regularizers.l2(hparams['l2_regularization']) )(dropout_layer_2) model = Model(inputs=[input_layer], outputs=[output_layer]) model.compile( loss = metric.dice_loss, optimizer = Adam(learning_rate = hparams['learning_rate']), metrics = [f1_score] ) return model
def build(self, input_shape): dtype = dtypes.as_dtype(self.dtype or k.floatx()) if not (dtype.is_floating or dtype.is_complex): raise TypeError('Unable to build `NoisyDense` layer with non-floating point' 'dtype %s' % (dtype,)) input_shape = tensor_shape.TensorShape(input_shape) if tensor_shape.dimension_value(input_shape[-1]) is None: raise ValueError('The last dimension of the inputs to `NoisyDense` ' 'should be defined. Found `None`.') last_dim = tensor_shape.dimension_value(input_shape[-1]) self.input_spec = InputSpec(min_ndim=2, axes={-1: last_dim}) if self.std_func is None: std = math.sqrt(3 / input_shape[-1]) else: std = self.std_func(input_shape[-1]) if self.sigma_func is not None: sigma_init = self.sigma_func(self.sigma_init, input_shape[-1]) else: sigma_init = self.sigma_init self.mu_weights = self.add_weight( 'mu_weights', shape=[last_dim, self.units], initializer=initializers.RandomUniform(minval=-std, maxval=std), regularizer=self.kernel_regularizer, constraint=self.kernel_constraint, dtype=self.dtype, trainable=True) self.sigma_weights = self.add_weight( 'sigma_weights', shape=[last_dim, self.units], initializer=initializers.Constant(value=sigma_init), dtype=self.dtype, trainable=True) if self.use_bias: self.mu_bias = self.add_weight( 'mu_bias', shape=[self.units, ], initializer=initializers.RandomUniform(minval=-std, maxval=std), regularizer=self.bias_regularizer, constraint=self.bias_constraint, dtype=self.dtype, trainable=True) self.sigma_bias = self.add_weight( 'sigma_bias', shape=[self.units, ], initializer=initializers.Constant(value=sigma_init), dtype=self.dtype, trainable=True) self.built = True
def __init__(self, env): super().__init__() self.obs_norm = kl.BatchNormalization() self.act_norm = kl.BatchNormalization() self.concat = kl.Concatenate(axis=-1) self.hidden1 = kl.Dense(units=400, activation='relu') self.hidden2 = kl.Dense(units=300, activation='relu') self.last_layer = kl.Dense( units=1, kernel_initializer=ki.RandomUniform(-3e-3, 3e-3), bias_initializer=ki.RandomUniform(-3e-3, 3e-3), activation='linear') self.reshape = kl.Reshape(tuple())
def build_model(hparams): if hparams['word_embedding'] == 'w2v': input_layer = Input(shape=(hparams['max_sequence_length'], )) embedding_layer = get_w2v('').get_keras_embedding(train_embeddings=hparams['train_embeddings'])(input_layer) if hparams['word_embedding'] == 'elmo': input_layer = Input(shape=(hparams['max_sequence_length'], 1024, )) embedding_layer = input_layer if hparams["word_embedding"] == "random": input_layer = Input(shape=(hparams['max_sequence_length'], )) embedding_layer = Embedding( hparams["dictionary_len"] + 2, hparams["embedding_size"], input_length = hparams["max_sequence_length"], embeddings_initializer = initializers.RandomNormal( mean=0., stddev = 2 / hparams["max_sequence_length"] ) )(input_layer) flatten_layer = Flatten()(embedding_layer) dropout_layer_1 = Dropout(hparams["dropout_ratio"])(flatten_layer) hidden_layer = Dense( hparams["hidden_size"], activation = 'relu', kernel_initializer = initializers.RandomUniform( minval = - 1 / np.sqrt(hparams["embedding_size"] * hparams["max_sequence_length"]), maxval = 1 / np.sqrt(hparams["embedding_size"] * hparams["max_sequence_length"]) ), bias_initializer = initializers.Zeros(), kernel_regularizer = regularizers.l2(hparams['l2_regularization']) )(dropout_layer_1) dropout_layer_2 = Dropout(hparams["dropout_ratio"])(hidden_layer) output_layer = Dense( 2, activation = 'sigmoid', kernel_initializer = initializers.RandomUniform( minval = - 1 / np.sqrt(hparams["hidden_size"]), maxval = 1 / np.sqrt(hparams["hidden_size"]) ), bias_initializer = initializers.Zeros(), kernel_regularizer = regularizers.l2(hparams['l2_regularization']) )(dropout_layer_2) model = Model(inputs=[input_layer], outputs=[output_layer]) model.compile( loss = metric.dice_loss, optimizer = Adam(learning_rate = hparams["learning_rate"]), metrics = [f1_score] ) return model
def build_model( hparams): bert_model = TFAutoModel.from_pretrained(hparams["bert_file_name"]) bert_model.trainable = True if not hparams['trainable_bert'] is None: bert_model.trainable = hparams['trainable_bert'] input_layer_ids = Input(shape = (hparams['max_sequence_length'],), dtype='int64') input_layer_masks = Input(shape = (hparams['max_sequence_length'],), dtype='int64') bert_output = bert_model([input_layer_ids,input_layer_masks]) bert_output = bert_output[1] classifier = Dense(units = 2, activation = 'sigmoid', kernel_initializer = initializers.RandomUniform( minval = - 1 / np.sqrt(bert_output.shape[1]), maxval = 1 / np.sqrt(bert_output.shape[1]) ), bias_initializer = initializers.Zeros(), kernel_regularizer = regularizers.l2(hparams['l2_regularization']) )(bert_output) model = Model(inputs=[input_layer_ids,input_layer_masks], outputs=classifier) model.compile( loss= dice_loss, optimizer = Adam(learning_rate = hparams["learning_rate"]), metrics = [f1_score] ) plot_model(model, "model_bert.png", show_layer_names=False) return model
def _build_model(self): # Neural Net for Deep Q learning Model from state_size |S| to action_size |A| # Keep adding depth until the losses start to subside (from explosive) while Q increases. hidden_dim = 8 # Ensure reproducibility alpha = 1. / np.sqrt(hidden_dim) initializer = initializers.RandomUniform(minval=-alpha, maxval=alpha, seed=self.random_state) model = keras.Sequential( [ keras.Input(shape=self.state_size), layers.Dense(hidden_dim, use_bias=True, activation="relu", bias_initializer='zeros', kernel_initializer=initializer), layers.Dense(hidden_dim, use_bias=True, activation="relu", bias_initializer='zeros', kernel_initializer=initializer), layers.Dense(hidden_dim, use_bias=True, activation="relu", bias_initializer='zeros', kernel_initializer=initializer), layers.Dense(hidden_dim, use_bias=True, activation="relu", bias_initializer='zeros', kernel_initializer=initializer), layers.Dense(hidden_dim, use_bias=True, activation="relu", bias_initializer='zeros', kernel_initializer=initializer), layers.Dense(hidden_dim, use_bias=True, activation="relu", bias_initializer='zeros', kernel_initializer=initializer), layers.Dense(hidden_dim, use_bias=True, activation="relu", bias_initializer='zeros', kernel_initializer=initializer), layers.Dense(self.action_size, activation='linear') ] ) model.compile(loss='mse', optimizer=keras.optimizers.Adam(lr=self.learning_rate)) return model
def create_model(self): # Implementation note: Keras requires an input. I create an input and then feed # zeros to the network. Ugly, but it's the same as disabling those weights. # Furthermore, Keras LSTM input=output, so we cannot produce more than SUBPOLICIES # outputs. This is not desirable, since the paper produces 25 subpolicies in the # end. input_layer = layers.Input(shape=(SUBPOLICIES, 1)) init = initializers.RandomUniform(-0.1, 0.1) #生成均匀分布的随机数 lstm_layer = layers.LSTM( LSTM_UNITS, #输出维度 recurrent_initializer=init, #给偏置进行初始化操作的方法 return_sequences=True, #返回全部输出的序列 name='controller')(input_layer) outputs = [] for i in range(SUBPOLICY_OPS): name = 'op%d-' % (i + 1) outputs += [ layers.Dense(OP_TYPES, activation='softmax', name=name + 't')(lstm_layer), layers.Dense(OP_PROBS, activation='softmax', name=name + 'p')(lstm_layer), layers.Dense(OP_MAGNITUDES, activation='softmax', name=name + 'm')(lstm_layer), ] #我们看到对每个操作里面建了三个网络,细节具体讨论 return models.Model(input_layer, outputs)
def build(self, input_shape): self.input_spec = InputSpec(shape=input_shape) if not self.layer.built: self.layer.build(input_shape) self.layer.built = True super(ConcreteDropout, self).build() # initialise p self.p_logit = self.layer.add_weight( name="p_logit", shape=(1, ), initializer=initializers.RandomUniform(self.init_min, self.init_max), trainable=True) self.p = K.sigmoid(self.p_logit[0]) # Initialise regulariser / prior KL term input_dim = np.prod(input_shape[1:]) # We drop only last dim weight = self.layer.kernel kernel_regularizer = self.weight_regularizer * K.sum( K.square(weight)) / (1.0 - self.p) dropout_regularizer = self.p * K.log(self.p) dropout_regularizer += (1. - self.p) * K.log(1. - self.p) dropout_regularizer *= self.dropout_regularizer * input_dim regularizer = K.sum(kernel_regularizer + dropout_regularizer) self.layer.add_loss(regularizer)
def get_quantized_initializer(w_initializer, w_range): """Gets the initializer and scales it by the range.""" if isinstance(w_initializer, six.string_types): if w_initializer == "he_normal": return initializers.VarianceScaling(scale=2 * w_range, mode="fan_in", distribution="normal", seed=None) if w_initializer == "he_uniform": return initializers.VarianceScaling(scale=2 * w_range, mode="fan_in", distribution="uniform", seed=None) elif w_initializer == "glorot_normal": return initializers.VarianceScaling(scale=w_range, mode="fan_avg", distribution="normal", seed=None) elif w_initializer == "glorot_uniform": return initializers.VarianceScaling(scale=w_range, mode="fan_avg", distribution="uniform", seed=None) elif w_initializer == "random_uniform": return initializers.RandomUniform(-w_range, w_range) return w_initializer
def build(self, input_shape): """ This method must be defined for any custom layer, here you define the training parameters. input_shape: a tensor that automatically captures the dimensions of the input by tensorflow. """ # get the the number of paramters self.num_wg = input_shape.as_list()[-1] # define the trainable parameters representing the zero-voltage Hamiltonian self.H0 = self.add_weight( name="H0", shape=tf.TensorShape((self.num_wg, self.num_wg)), initializer=initializers.RandomUniform(minval=0, maxval=100, seed=30), trainable=True) # define an operator to convert lower matrix into pure imaginary self.complex_operator = -1j * np.ones( (self.num_wg, self.num_wg) ) * np.tri(self.num_wg, self.num_wg, -1) + np.ones( (self.num_wg, self.num_wg)) * np.tri(self.num_wg, self.num_wg, 0).T # this has to be called for any tensorflow custom layer super(Param_to_Ham_Layer, self).build(input_shape)
def test_rotate(knowledge_graph): margin = 2.34 norm_order = 1.234 # this test creates a random untrained model and predicts every possible edge in the graph, and # compares that to a direct implementation of the scoring method in the paper gen = KGTripleGenerator(knowledge_graph, 3) # use a random initializer with a large range, so that any differences are obvious init = initializers.RandomUniform(-1, 1) rotate_model = RotatE( gen, 5, margin=margin, norm_order=norm_order, embeddings_initializer=init ) x_inp, x_out = rotate_model.in_out_tensors() model = Model(x_inp, x_out) model.compile(loss=tf_losses.BinaryCrossentropy(from_logits=True)) every_edge = itertools.product( knowledge_graph.nodes(), knowledge_graph._edges.types.pandas_index, knowledge_graph.nodes(), ) df = triple_df(*every_edge) # check the model can be trained on a few (uneven) batches model.fit( gen.flow(df.iloc[:7], negative_samples=2), validation_data=gen.flow(df.iloc[7:14], negative_samples=3), ) # compute the exact values based on the model by extracting the embeddings for each element and # doing the y_(e_1)^T M_r y_(e_2) = <e_1, w_r, e_2> inner product s_idx = knowledge_graph.node_ids_to_ilocs(df.source) r_idx = knowledge_graph._edges.types.to_iloc(df.label) o_idx = knowledge_graph.node_ids_to_ilocs(df.target) nodes, edge_types = rotate_model.embeddings() # the rows correspond to the embeddings for the given edge, so we can do bulk operations e_s = nodes[s_idx, :] w_r = edge_types[r_idx, :] e_o = nodes[o_idx, :] # every edge-type embedding should be a unit rotation np.testing.assert_allclose(np.abs(w_r), 1) actual = margin - np.linalg.norm(e_s * w_r - e_o, ord=norm_order, axis=1) # predict every edge using the model prediction = model.predict(gen.flow(df)) # (use an absolute tolerance to allow for catastrophic cancellation around very small values) np.testing.assert_allclose(prediction[:, 0], actual, rtol=1e-3, atol=1e-14) # the model is stateful (i.e. it holds the weights permanently) so the predictions with a second # 'build' should be the same as the original one model2 = Model(*rotate_model.in_out_tensors()) prediction2 = model2.predict(gen.flow(df)) np.testing.assert_array_equal(prediction, prediction2)
def build(self, input_shape): assert len(input_shape) >= 2 input_dim = input_shape[1] if self.H == 'Glorot': self.H = np.float32(np.sqrt(1.5 / (input_dim + self.units))) #print('Glorot H: {}'.format(self.H)) if self.kernel_lr_multiplier == 'Glorot': self.kernel_lr_multiplier = np.float32( 1. / np.sqrt(1.5 / (input_dim + self.units))) #print('Glorot learning rate multiplier: {}'.format(self.kernel_lr_multiplier)) self.kernel_constraint = Clip(-self.H, self.H) self.kernel_initializer = initializers.RandomUniform(-self.H, self.H) self.kernel = self.add_weight(shape=(input_dim, self.units), initializer=self.kernel_initializer, name='kernel', regularizer=self.kernel_regularizer, constraint=self.kernel_constraint) if self.use_bias: self.lr_multipliers = [ self.kernel_lr_multiplier, self.bias_lr_multiplier ] self.bias = self.add_weight(shape=(self.output_dim, ), initializer=self.bias_initializer, name='bias', regularizer=self.bias_regularizer, constraint=self.bias_constraint) else: self.lr_multipliers = [self.kernel_lr_multiplier] self.bias = None self.input_spec = InputSpec(min_ndim=2, axes={-1: input_dim}) self.built = True
def create_mlp(): global seed, input_dim, hidden_1_dim, hidden_2_dim, output_dim # Convert the tuple into a single number input_dim_1d = np.prod(input_dim) # Uniform initializer for weight initializer = initializers.RandomUniform(minval=-1, maxval=1, seed=seed) model = keras.Sequential( [ keras.Input(shape=input_dim), layers.Conv2D(filter_1_dim, kernel_size=(3, 3), padding='valid', activation="relu", kernel_initializer=initializer, bias_initializer='zeros'), layers.MaxPooling2D(pool_size=(2, 2)), layers.Conv2D(filter_2_dim, kernel_size=(3, 3), padding='valid', activation="relu", kernel_initializer=initializer, bias_initializer='zeros'), layers.MaxPooling2D(pool_size=(2, 2)), layers.Flatten(), layers.Dropout(rate=0.2, seed=seed), layers.Dense(input_dim_1d, use_bias=True, activation='relu'), layers.Dropout(rate=0.2, seed=seed), layers.Dense(input_dim_1d, use_bias=True, activation='relu'), layers.Dropout(rate=0.2, seed=seed), layers.Dense(output_dim, use_bias=True, activation='softmax') ] ) model.compile(loss='sparse_categorical_crossentropy', optimizer=keras.optimizers.SGD(learning_rate=learning_rate, momentum=momentum), metrics=['accuracy', 'sparse_categorical_crossentropy']) return model
def iris_2_layers(name): initializer = initializers.RandomUniform(minval=-2, maxval=2, seed=None) model = Sequential(name=name) model.add(Dense(3, input_dim=2, activation='softmax', kernel_initializer=initializer)) model.add(Dense(3, activation='softmax')) model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) return model
def build(self, input_shape=None): self.input_spec = InputSpec(shape=input_shape) if not self.layer.built: self.layer.build(input_shape) self.layer.built = True super(SpatialConcreteDropout, self).build( ) # this is very weird.. we must call super before we add new losses # initialise p self.p_logit = self.layer.add_weight( name='p_logit', shape=(1, ), initializer=initializers.RandomUniform(self.init_min, self.init_max), trainable=True) self.p = K.sigmoid(self.p_logit[0]) # initialise regulariser / prior KL term assert len( input_shape) == 4, 'this wrapper only supports Conv2D layers' if self.data_format == 'channels_first': input_dim = input_shape[1] # we drop only channels else: input_dim = input_shape[3] weight = self.layer.kernel kernel_regularizer = self.weight_regularizer * K.sum( K.square(weight)) / (1. - self.p) dropout_regularizer = self.p * K.log(self.p) dropout_regularizer += (1. - self.p) * K.log(1. - self.p) dropout_regularizer *= self.dropout_regularizer * int(input_dim) regularizer = K.sum(kernel_regularizer + dropout_regularizer) self.layer.add_loss(regularizer)
def build(self, input_shape=None): self.input_spec = InputSpec(shape=input_shape) if not self.layer.built: self.layer.build(input_shape) self.layer.built = True super(ConcreteDropout, self).build() # this is very weird.. we must call super before we add new losses # initialise p self.p_logit = self.layer.add_weight(name='p_logit', shape=(1,), initializer=initializers.RandomUniform(self.init_min, self.init_max), trainable=True) self.p = K.sigmoid(self.p_logit[0]) # initialise regulariser / prior KL term assert len(input_shape) == 2, 'this wrapper only supports Dense layers' input_dim = np.prod(input_shape[-1]).value # we drop only last dim weight = self.layer.kernel kernel_regularizer = self.weight_regularizer * K.sum(K.square(weight)) / (1. - self.p) dropout_regularizer = self.p * K.log(self.p) dropout_regularizer += (1. - self.p) * K.log(1. - self.p) print(type(self.dropout_regularizer), type(input_dim), input_dim) dropout_regularizer *= self.dropout_regularizer * input_dim regularizer = K.sum(kernel_regularizer + dropout_regularizer) self.layer.add_loss(regularizer)
def build_model(hparams): input_layer_dynamic = Input(shape=(hparams['max_sequence_length'],), name='w2v_input') input_layer_static = Input(shape=(hparams['max_sequence_length'],hparams['embedding_size']),name='ELMo_input') embedding_layer = get_w2v('').get_keras_embedding(train_embeddings=True)(input_layer_dynamic) submodels = [] submodels.extend(build_submodels(hparams['kernel_sizes'],hparams['filters'], hparams['max_norm_value'],embedding_layer)) submodels.extend(build_submodels(hparams['kernel_sizes'],hparams['filters'], hparams['max_norm_value'],input_layer_static)) concat = Concatenate()(submodels) dropout_layer_1 = Dropout(hparams['dropout_ratio'])(concat) hidden_layer = Dense( hparams['hidden_size'], activation = 'relu', kernel_initializer = initializers.RandomUniform( minval = - 1 / np.sqrt(2 * len(hparams['kernel_sizes'])*hparams['filters']), maxval = 1 / np.sqrt(2 * len(hparams['kernel_sizes'])*hparams['filters'] ) ), bias_initializer = initializers.Zeros(), kernel_regularizer = regularizers.l2(hparams['l2_regularization']) )(dropout_layer_1) dropout_layer_2 = Dropout(hparams['dropout_ratio'])(hidden_layer) output_layer = Dense( 2, activation = 'sigmoid', kernel_initializer = initializers.RandomUniform( minval = - 1 / np.sqrt(hparams['hidden_size']), maxval = 1 / np.sqrt(hparams['hidden_size']) ), bias_initializer = initializers.Zeros(), kernel_regularizer = regularizers.l2(hparams['l2_regularization']) )(dropout_layer_2) model = Model(inputs=[input_layer_dynamic,input_layer_static], outputs=output_layer) model.compile( loss = metric.dice_loss, optimizer = Adam(learning_rate = hparams['learning_rate']), metrics = [f1_score] ) #model.summary() return model
def __init__(self, env): super().__init__() self.act_dim = len(env.action_space.shape) self.act_high = env.action_space.high self.act_low = env.action_space.low self.norm = kl.BatchNormalization() self.hidden1 = kl.Dense(units=400, activation='relu') self.hidden2 = kl.Dense(units=300, activation='relu') self.last_layer = kl.Dense( units=self.act_dim, kernel_initializer=ki.RandomUniform(-3e-3, 3e-3), bias_initializer=ki.RandomUniform(-3e-3, 3e-3), activation='tanh') self.transform = kl.Lambda(lambda x: (x + 1.) * ( self.act_high - self.act_low) / 2. + self.act_low)
def __init__(self, units, activation=None, is_base_trainable=True, is_diag_trainable=True, use_bias=True, base_initializer='optimized_uniform', diag_initializer='optimized_uniform', bias_initializer='zeros', base_regularizer=None, diag_regularizer=None, bias_regularizer=None, activity_regularizer=None, base_constraint=None, diag_constraint=None, bias_constraint=None, **kwargs): super(LeanSpectral, self).__init__( activity_regularizer=activity_regularizer, **kwargs) self.units = int(units) if not isinstance(units, int) else units self.activation = activations.get(activation) self.is_base_trainable = is_base_trainable self.is_diag_trainable = is_diag_trainable self.use_bias = use_bias # 'optimized_uniform' initializers optmized by Buffoni and Giambagli if base_initializer is 'optimized_uniform': self.base_initializer = initializers.RandomUniform(-0.2, 0.2) else: self.base_initializer = initializers.get(base_initializer) if diag_initializer is 'optimized_uniform': self.diag_initializer = initializers.RandomUniform(-0.5, 0.5) else: self.diag_initializer = initializers.get(diag_initializer) self.bias_initializer = initializers.get(bias_initializer) self.base_regularizer = regularizers.get(base_regularizer) self.diag_regularizer = regularizers.get(diag_regularizer) self.bias_regularizer = regularizers.get(bias_regularizer) self.base_constraint = constraints.get(base_constraint) self.diag_constraint = constraints.get(diag_constraint) self.bias_constraint = constraints.get(bias_constraint)
def __init__(self, t_left_initializer='zeros', a_left_initializer=initializers.RandomUniform(minval=0, maxval=1), t_right_initializer=initializers.RandomUniform(minval=0, maxval=5), a_right_initializer='ones', shared_axes=None, **kwargs): super(SReLU, self).__init__(**kwargs) self.supports_masking = True self.t_left_initializer = initializers.get(t_left_initializer) self.a_left_initializer = initializers.get(a_left_initializer) self.t_right_initializer = initializers.get(t_right_initializer) self.a_right_initializer = initializers.get(a_right_initializer) if shared_axes is None: self.shared_axes = None elif not isinstance(shared_axes, (list, tuple)): self.shared_axes = [shared_axes] else: self.shared_axes = list(shared_axes)
def test_complex(knowledge_graph, sample_strategy): # this test creates a random untrained model and predicts every possible edge in the graph, and # compares that to a direct implementation of the scoring method in the paper gen = KGTripleGenerator(knowledge_graph, 3) # use a random initializer with a large positive range, so that any differences are obvious init = initializers.RandomUniform(-1, 1) complex_model = ComplEx(gen, 5, embeddings_initializer=init) x_inp, x_out = complex_model.in_out_tensors() model = Model(x_inp, x_out) if sample_strategy == "uniform": loss = tf_losses.BinaryCrossentropy(from_logits=True) else: loss = sg_losses.SelfAdversarialNegativeSampling() model.compile(loss=loss) every_edge = itertools.product( knowledge_graph.nodes(), knowledge_graph._edges.types.pandas_index, knowledge_graph.nodes(), ) df = triple_df(*every_edge) # check the model can be trained on a few (uneven) batches model.fit( gen.flow(df.iloc[:7], negative_samples=2, sample_strategy=sample_strategy), validation_data=gen.flow( df.iloc[7:14], negative_samples=3, sample_strategy=sample_strategy ), ) # compute the exact values based on the model by extracting the embeddings for each element and # doing the Re(<e_s, w_r, conj(e_o)>) inner product s_idx = knowledge_graph.node_ids_to_ilocs(df.source) r_idx = knowledge_graph._edges.types.to_iloc(df.label) o_idx = knowledge_graph.node_ids_to_ilocs(df.target) nodes, edge_types = complex_model.embeddings() # the rows correspond to the embeddings for the given edge, so we can do bulk operations e_s = nodes[s_idx, :] w_r = edge_types[r_idx, :] e_o = nodes[o_idx, :] actual = (e_s * w_r * e_o.conj()).sum(axis=1).real # predict every edge using the model prediction = model.predict(gen.flow(df)) # (use an absolute tolerance to allow for catastrophic cancellation around very small values) np.testing.assert_allclose(prediction[:, 0], actual, rtol=1e-3, atol=1e-6) # the model is stateful (i.e. it holds the weights permanently) so the predictions with a second # 'build' should be the same as the original one model2 = Model(*complex_model.in_out_tensors()) prediction2 = model2.predict(gen.flow(df)) np.testing.assert_array_equal(prediction, prediction2)
def build_model(self): """Build an actor (policy) network that maps states -> actions.""" # Define input layer (states) states = layers.Input(shape=(self.state_size, ), name='states') l2_reg = 1e-2 # Add hidden layers net = layers.Dense(units=400, kernel_regularizer=regularizers.l2(l2_reg))(states) net = layers.BatchNormalization()(net) net = layers.LeakyReLU(1e-2)(net) net = layers.Dense(units=200, kernel_regularizer=regularizers.l2(l2_reg))(states) net = layers.BatchNormalization()(net) net = layers.LeakyReLU(1e-2)(net) net = layers.Dense(units=128, kernel_regularizer=regularizers.l2(l2_reg))(states) net = layers.BatchNormalization()(net) net = layers.LeakyReLU(1e-2)(net) # Try different layer sizes, activations, add batch normalization, regularizers, etc. # Add final output layer with sigmoid activation #raw_actions = layers.Dense(units=self.action_size, activation='tanh', kernel_regularizer=regularizers.l2(1e-5), kernel_initializer=initializers.RandomUniform(minval=-3e-3, maxval=3e-3), name='raw_actions')(net) raw_actions = layers.Dense( units=self.action_size, activation='sigmoid', kernel_initializer=initializers.RandomUniform(minval=-3e-3, maxval=3e-3), name='raw_actions')(net) # Scale [0, 1] output for each action dimension to proper range actions = layers.Lambda(lambda x: (x * self.action_range) + self.action_low, name='actions')(raw_actions) # Create Keras model self.model = models.Model(inputs=states, outputs=actions) # Define loss function using action value (Q value) gradients action_gradients = layers.Input(shape=(self.action_size, )) loss = K.mean(-action_gradients * actions) # Incorporate any additional losses here (e.g. from regularizers) # Define optimizer and training function optimizer = optimizers.Adam() updates_op = optimizer.get_updates(params=self.model.trainable_weights, loss=loss) self.train_fn = K.function( inputs=[self.model.input, action_gradients, K.learning_phase()], outputs=[], updates=updates_op)
def build(self, input_shape): assert len(input_shape) == 2 input_dim = input_shape[1] self.gamma_elm = self.add_weight( name='gamma_elm', shape=(1, ), initializer=initializers.RandomUniform(-2, -1)) super(GaussianKernel2, self).build(input_shape) # Be sure to call this somewhere!
def _buildLayer(self, layerDef): _layer = None _lname = layerDef['name'] _ltype = layerDef['type'] ## set_trace() if _ltype == 'fc': # units for the layer, and default as outputshape if last layer _lUnits = layerDef.get('units', self._outputShape[0]) # activation fcn for the layer, and default to linear if none given _lActivation = layerDef.get('activation', 'linear') # whether or not to use bias _lUseBias = layerDef.get('useBias', True) # whether or not use an initializer _lInitializerId = layerDef.get('initializer', 'glorot_uniform') _lInitializerArgs = layerDef.get('initializerArgs', {}) if _lInitializerId == 'uniform': _lInitializer = initializers.RandomUniform( minval=_lInitializerArgs.get('min', -0.05), maxval=_lInitializerArgs.get('max', 0.05), seed=_lInitializerArgs.get('seed', None)) elif _lInitializerId == 'normal': _lInitializer = initializers.RandomNormal( mean=_lInitializerArgs.get('mean', 0.0), stddev=_lInitializerArgs.get('stddev', 0.05), seed=_lInitializerArgs.get('seed', None)) else: _lInitializer = initializers.glorot_uniform( seed=_lInitializerArgs.get('seed', None)) # create the dense|fully-connected layer if len(self._backbone) < 1: # first layer, from inputs to hidden (or perhaps output) units _layer = layers.Dense(units=_lUnits, input_shape=self._inputShape, activation=_lActivation, use_bias=_lUseBias, kernel_initializer=_lInitializer) else: # intermediate layer, with input shape to be inferred by keras _layer = layers.Dense(units=_lUnits, activation=_lActivation, use_bias=_lUseBias, kernel_initializer=_lInitializer) elif _ltype == 'flatten': # create a flatten layer (flatten n-dim cnn output volume to vector) pass elif _ltype == 'conv2d': # create a conv2d layer pass return _layer
def build_initializer(type, kerasDefaults, seed=None, constant=0.): """ Set the initializer to the appropriate Keras initializer function based on the input string and learning rate. Other required values are set to the Keras default values Parameters ---------- type : string String to choose the initializer Options recognized: 'constant', 'uniform', 'normal', 'glorot_uniform', 'lecun_uniform', 'he_normal' See the Keras documentation for a full description of the options kerasDefaults : list List of default parameter values to ensure consistency between frameworks seed : integer Random number seed constant : float Constant value (for the constant initializer only) Return ---------- The appropriate Keras initializer function """ if type == 'constant': return initializers.Constant(value=constant) elif type == 'uniform': return initializers.RandomUniform(minval=kerasDefaults['minval_uniform'], maxval=kerasDefaults['maxval_uniform'], seed=seed) elif type == 'normal': return initializers.RandomNormal(mean=kerasDefaults['mean_normal'], stddev=kerasDefaults['stddev_normal'], seed=seed) elif type == 'glorot_normal': # aka Xavier normal initializer. keras default return initializers.glorot_normal(seed=seed) elif type == 'glorot_uniform': return initializers.glorot_uniform(seed=seed) elif type == 'lecun_uniform': return initializers.lecun_uniform(seed=seed) elif type == 'he_normal': return initializers.he_normal(seed=seed)