def test_optimizer(optimizer): """Tests compiling of single optimizer with options. Since there can only ever be a single optimizer, there is no ("name", optimizer, "output") option. Only optimizer classes will be compiled with custom options, all others (class names, function names) should pass through untouched. """ # Single output X, y = make_classification() est = KerasClassifier( model=get_model, optimizer=optimizer, optimizer__learning_rate=0.15, optimizer__momentum=0.5, loss="binary_crossentropy", ) est.fit(X, y) est_opt = est.model_.optimizer if not isinstance(optimizer, str): assert float(est_opt.momentum.value()) == pytest.approx(0.5) assert float(est_opt.learning_rate) == pytest.approx(0.15, abs=1e-6) else: est_opt.__class__ == optimizers_module.get(optimizer).__class__
def compile(self, optimizer, quadrature_size=20, num_samples=None): if num_samples is not None: raise NotImplementedError("Monte Carlo estimation of ELL not yet " "supported!") self.optimizer = optimizers.get(optimizer) self.quadrature_size = quadrature_size
def compile(self, optimizer, num_samples=None): if isinstance(optimizer, str): self.optimizer = optimizers.get(optimizer) else: self.optimizer = optimizer # TODO: issue warning that `num_samples` currently has no effect. self.num_samples = num_samples
def compile(self, optimizer_gen='rmsprop', optimizer_disc='rmsprop', loss_gen=None, loss_disc=None, metrics_gen=None, metrics_disc=None): self.gen_optimizer = optimizers.get(optimizer_gen) self.disc_optimizer = optimizers.get(optimizer_disc) self.gen_losses_container = compile_utils.LossesContainer(loss_gen) if loss_gen else None self.disc_losses_container = compile_utils.LossesContainer(loss_disc) if loss_disc else None self.gen_metrics_container = compile_utils.MetricsContainer(metrics_gen) if metrics_gen else None self.disc_metrics_container = compile_utils.MetricsContainer(metrics_disc) if metrics_disc else None self.m_formatter = Metrics_Formatter( gen_loss_name=_get_tag_name(self.gen_losses_container), disc_loss_name=_get_tag_name(self.disc_losses_container), gen_metric_name=_get_tag_name(self.gen_metrics_container), disc_metric_name=_get_tag_name(self.disc_metrics_container), num_format='.03f' )
def __init__(self, optimizer, multipliers, **kwargs): """Initialize the optimizer wrapper. :param optimizer: The original optimizer. :param multipliers: A dict representing the multipliers. The key is the prefix of the weight to be multiplied. :param kwargs: Arguments for parent class. """ super(LRMultiplier, self).__init__(**kwargs) self.optimizer = optimizers.get(optimizer) self.multipliers = multipliers self.lr = self.optimizer.lr self.updates, self.weights = [], []
def get_model(input_shape=(256, 256, 3)): inputs = layers.Input(shape=input_shape) # 256 decoder0 = u_net_block(inputs) outputs = layers.Conv2D(1, (1, 1), activation='sigmoid')(decoder0) model = models.Model(inputs=[inputs], outputs=[outputs]) model.compile( optimizer=optimizers.get(OPTIMIZER), loss=losses.get(LOSS), metrics=[metrics.get(metric) for metric in METRICS]) return model
def __init__(self, units, learning_rate=0.01, online=True, n_passes=1, return_hidden=True, use_bias=True, visible_activation='sigmoid', hidden_activation='sigmoid', kernel_initializer='glorot_uniform', bias_initializer='glorot_uniform', kernel_regularizer='l2', bias_regularizer='l2', activity_regularizer='l2', # kernel_constraint=None, kernel_constraint=constraints.MinMaxNorm( min_value=-1.0, max_value=1.0, rate=1.0, axis=-1 ), # bias_constraint=None, bias_constraint=constraints.MinMaxNorm( min_value=-1.0, max_value=1.0, rate=1.0, axis=-1 ), optimizer='Adam', ** kwargs): if 'input_shape' not in kwargs and 'input_dim' in kwargs: kwargs['input_shape'] = (kwargs.pop('input_dim'),) super(OnlineBolzmannCell, self).__init__( activity_regularizer=regularizers.get(activity_regularizer), **kwargs) self.units = units self.learning_rate = learning_rate self.online = online self.return_hidden = return_hidden self.visible_activation = activations.get(visible_activation) self.hidden_activation = activations.get(hidden_activation) self.use_bias = use_bias self.kernel_initializer = initializers.get(kernel_initializer) self.bias_initializer = initializers.get(bias_initializer) self.kernel_regularizer = regularizers.get(kernel_regularizer) self.bias_regularizer = regularizers.get(bias_regularizer) self.kernel_constraint = constraints.get(kernel_constraint) self.bias_constraint = constraints.get(bias_constraint) self.optimizer = optimizers.get(optimizer) self.optimizer.learning_rate = self.learning_rate self.n_passes = n_passes self.supports_masking = True self.input_spec = InputSpec(min_ndim=2)
def get_optimizer(name: str, learning_rate: float, steps_per_epoch: int = None, learning_rate_schedule: str = None, optimizer_kwargs=None): if not optimizer_kwargs: optimizer_kwargs = {} name = str.lower(name) rate_or_schedule = _get_learning_rate_schedule(learning_rate, learning_rate_schedule, steps_per_epoch) try: opt = optimizers.get(name) opt.__init__(rate_or_schedule, **optimizer_kwargs) return opt except ValueError: pass if name in NON_KERAS_OPTIMIZERS: return NON_KERAS_OPTIMIZERS[name](rate_or_schedule, **optimizer_kwargs) raise ValueError(f'Unknown optimizer {name}.')
def get_siamese_model(input_shape=(256, 256, 3)): inputs = layers.Input(shape=input_shape) # 256 block0 = u_net_block(inputs) block1 = u_net_block(inputs) decoder_siamese = layers.concatenate([block0, block1], axis=-1) outputs = layers.Conv2D(1, (1, 1), activation='sigmoid')(decoder_siamese) model = models.Model(inputs=[inputs], outputs=[outputs]) model.compile( optimizer=optimizers.get(OPTIMIZER), loss=losses.get(LOSS), metrics=[metrics.get(metric) for metric in METRICS]) return model
def clone_optimizer(optimizer): if type(optimizer) is str: return optimizers.get(optimizer) # Requires Keras 1.0.7 since get_config has breaking changes. params = dict([(k, v) for k, v in optimizer.get_config().items()]) config = { 'class_name': optimizer.__class__.__name__, 'config': params, } if hasattr(optimizers, 'optimizer_from_config'): # COMPATIBILITY: Keras < 2.0 clone = optimizers.optimizer_from_config(config) else: clone = optimizers.deserialize(config) return clone
def __init__(self, optimizer, multipliers, **kwargs): """Initialize the optimizer wrapper. :param optimizer: The original optimizer. :param multipliers: A dict representing the multipliers. The key is the prefix of the weight to be multiplied. :param kwargs: Arguments for parent class. """ super(LRMultiplier, self).__init__(self, **kwargs) self.optimizer = optimizers.get(optimizer) self.multipliers = multipliers if hasattr(self.optimizer, 'learning_rate'): self.lr_attr = 'learning_rate' else: self.lr_attr = 'lr'
def get_optimizer(optimizer): """Overwrite keras default parameters for optimizer. Parameters ---------- optimizer : dict Dictionary containing information on the optimizer. Returns ------- tf.keras.optimizers Optimizer. """ learning_rate = optimizer.get("learning_rate") optimizer_name = optimizer.get("optimizer_name") optimizer_class = optimizers.get(optimizer_name) optimizer_class.learning_rate = learning_rate return optimizer_class
def create_model(input_shape=10, output_shape=10, layers=[50], activation='relu', optimizer='adam', learning_rate=0.001, dropout=True, decay=0.9, decay_steps=10000, batch_norm=True): model = Sequential(name=str(time.time())) model.add(Input(input_shape)) for l in layers: model.add(Dense(l)) if batch_norm: model.add(BatchNormalization()) model.add(Activation(activation=activation)) if dropout: model.add(Dropout(0.5)) model.add(Dense(output_shape)) # Compile model lr_schedule = ExponentialDecay(initial_learning_rate=learning_rate, decay_steps=decay_steps, decay_rate=decay) opt = optimizers.get({ 'class_name': optimizer, 'config': { 'learning_rate': lr_schedule } }) model.compile(loss='mse', optimizer=opt, metrics=[r2_metric]) return model
def _build_model(self, units, output_shape, input_shape=None, backbone=None): if input_shape is None and backbone is None: raise ValueError( 'Either backbone or input_shape should be specified') if backbone is None: model = Sequential([ Dense( units, input_shape=input_shape, activation=self.activation, kernel_regularizer=self.regularization, ), Dense(output_shape[0], activation=self.output_activation) ]) else: model = Sequential([ backbone, Dense( units, activation=self.activation, kernel_regularizer=self.regularization, ), Dense(output_shape[0], activation=self.output_activation) ]) model.compile(loss=self.loss, optimizer=optimizers.get(self.optimizer).from_config( self.optimizer_params)) return model
def get_optimizer_class( optimizer: Union[str, optimizers_mod.Optimizer, Type[optimizers_mod.Optimizer]] ) -> optimizers_mod.Optimizer: return type(optimizers_mod.get( optimizer)) # optimizers.get returns instances instead of classes
inducing_index_points_initial = (inducing_index_points_initializer( shape=(num_inducing_points, num_features))) # %% vgp = gpflow.models.SVGP( likelihood=gpflow.likelihoods.Bernoulli(invlink=tf.sigmoid), inducing_variable=inducing_index_points_initial, kernel=kernel_cls(), num_data=len(X_train), whiten=whiten, ) # %% optimizer = optimizers.get(optimizer_name) # %% @tf.function def train_on_batch(X_batch, y_batch): with tf.GradientTape(watch_accessed_variables=False) as tape: tape.watch(vgp.trainable_variables) loss = vgp.training_loss((X_batch, y_batch)) gradients = tape.gradient(loss, vgp.trainable_variables) optimizer.apply_gradients(zip(gradients, vgp.trainable_variables)) # %%
def compile(self, optimizer, metrics=[]): metrics += [mean_q] if type(optimizer) in (list, tuple): if len(optimizer) != 2: raise ValueError( 'More than two optimizers provided. Please only provide a maximum of two optimizers, the first one for the actor and the second one for the critic.' ) actor_optimizer, critic_optimizer = optimizer else: actor_optimizer = optimizer critic_optimizer = clone_optimizer(optimizer) if type(actor_optimizer) is str: actor_optimizer = optimizers.get(actor_optimizer) if type(critic_optimizer) is str: critic_optimizer = optimizers.get(critic_optimizer) assert actor_optimizer != critic_optimizer if len(metrics) == 2 and hasattr(metrics[0], '__len__') and hasattr( metrics[1], '__len__'): actor_metrics, critic_metrics = metrics else: actor_metrics = critic_metrics = metrics def clipped_error(y_true, y_pred): loss = K.mean(huber_loss(y_true, y_pred, self.delta_clip), axis=-1) return loss ## Compile target networks. We only use them in feed-forward mode, hence we can pass any ## optimizer and loss since we never use it anyway. self.target_actor.compile(optimizer='adam', loss='binary_crossentropy') self.target_critic.compile(optimizer='adam', loss='binary_crossentropy') self.actor.compile(loss='binary_crossentropy', metrics=['binary_accuracy'], optimizer='adam') # Compile the critic. if self.target_model_update < 1.: # We use the `AdditionalUpdatesOptimizer` to efficiently soft-update the target model. critic_updates = get_soft_target_model_updates( self.target_critic, self.critic, self.target_model_update) critic_optimizer = AdditionalUpdatesOptimizer( 'critic', critic_optimizer, critic_updates) self.critic.compile(optimizer=critic_optimizer, loss=clipped_error, metrics=critic_metrics) # compile actor self.actor_optimizer = actor_optimizer actor_optimizer = self.actor_optimizer combined_inputs = [self.actor.outputs[0], self.actor.inputs[0]] combined_output = self.critic(combined_inputs) updates = actor_optimizer.get_updates( params=self.actor.trainable_weights, loss=-K.mean(combined_output) + self.actor.losses) # Actor is to maximize critic output if self.target_model_update < 1.: # Include soft target model updates. updates += get_soft_target_model_updates(self.target_actor, self.actor, self.target_model_update) updates += self.actor.updates # include other updates of the actor, e.g. for BN self.actor_train_fn = K.function(self.actor.inputs, self.actor.outputs, updates=updates) # compile perturbed actor if self.param_noise is not None: print('Compile perturbed_actor') self.perturbed_actor.compile(optimizer='adam', loss='binary_crossentropy') self.perturbed_actor_optimizer = clone_optimizer(optimizer) combined_inputs = [ self.perturbed_actor.outputs[0], self.perturbed_actor.inputs[0] ] combined_output = self.critic(combined_inputs) updates = self.perturbed_actor_optimizer.get_updates( params=self.perturbed_actor.trainable_weights, loss=-K.mean(combined_output) + self.perturbed_actor.losses) updates += self.perturbed_actor.updates # include other updates of the actor, e.g. for BN self.perturbed_actor_train_fn = K.function( self.perturbed_actor.inputs, self.perturbed_actor.outputs, updates=updates) self.compiled = True
def compile(self, optimizer, metrics=[]): metrics += [mean_q] if type(optimizer) in (list, tuple): if len(optimizer) != 2: raise ValueError('More than two optimizers provided. Please only provide a maximum of two optimizers, the first one for the actor and the second one for the critic.') actor_optimizer, critic_optimizer = optimizer else: actor_optimizer = optimizer critic_optimizer = clone_optimizer(optimizer) if type(actor_optimizer) is str: actor_optimizer = optimizers.get(actor_optimizer) if type(critic_optimizer) is str: critic_optimizer = optimizers.get(critic_optimizer) assert actor_optimizer != critic_optimizer if len(metrics) == 2 and hasattr(metrics[0], '__len__') and hasattr(metrics[1], '__len__'): actor_metrics, critic_metrics = metrics else: actor_metrics = critic_metrics = metrics def clipped_error(y_true, y_pred): return K.mean(huber_loss(y_true, y_pred, self.delta_clip), axis=-1) # Compile target networks. We only use them in feed-forward mode, hence we can pass any # optimizer and loss since we never use it anyway. self.target_actor = clone_model(self.actor, self.custom_model_objects) self.target_actor.compile(optimizer='sgd', loss='mse') self.target_critic = clone_model(self.critic, self.custom_model_objects) self.target_critic.compile(optimizer='sgd', loss='mse') # We also compile the actor. We never optimize the actor using Keras but instead compute # the policy gradient ourselves. However, we need the actor in feed-forward mode, hence # we also compile it with any optimzer and self.actor.compile(optimizer='sgd', loss='mse') # Compile the critic. if self.target_model_update < 1.: # We use the `AdditionalUpdatesOptimizer` to efficiently soft-update the target model. critic_updates = get_soft_target_model_updates(self.target_critic, self.critic, self.target_model_update) critic_optimizer = AdditionalUpdatesOptimizer(critic_optimizer, critic_updates) self.critic.compile(optimizer=critic_optimizer, loss=clipped_error, metrics=critic_metrics) # Combine actor and critic so that we can get the policy gradient. # Assuming critic's state inputs are the same as actor's. combined_inputs = [] state_inputs = [] for i in self.critic.input: if i == self.critic_action_input: combined_inputs.append([]) else: combined_inputs.append(i) state_inputs.append(i) combined_inputs[self.critic_action_input_idx] = self.actor(state_inputs) combined_output = self.critic(combined_inputs) updates = actor_optimizer.get_updates( params=self.actor.trainable_weights, loss=-K.mean(combined_output)) if self.target_model_update < 1.: # Include soft target model updates. updates += get_soft_target_model_updates(self.target_actor, self.actor, self.target_model_update) updates += self.actor.updates # include other updates of the actor, e.g. for BN # Finally, combine it all into a callable function. if K.backend() == 'tensorflow': self.actor_train_fn = K.function(state_inputs + [K.learning_phase()], [self.actor(state_inputs)], updates=updates) else: if self.uses_learning_phase: state_inputs += [K.learning_phase()] self.actor_train_fn = K.function(state_inputs, [self.actor(state_inputs)], updates=updates) self.actor_optimizer = actor_optimizer self.compiled = True
def _train_model(self, train_x, train_y, valid_x, valid_y, test_x, test_y, labels): if self.augmentation: train_generator = build_augmentation_generator( train_x, train_y, batch_size=self.batch_size, random_seed=self.random_seed, **self.augmentation, ) else: train_generator = None # TODO: doesn't callback have other ways to catch exception # inside of model training loop? with MLflowCheckpoint(test_x, test_y, self.metrics) as mlflow_logger: # TODO: add batch normalization and dropout model = self.model(input_shape=(28, 28, 1)) if model is None: raise Exception('Model should be defined') # TODO: it is solution to save weight only # # tensorflow:This model was compiled with a Keras optimizer # (<tensorflow.python.keras.optimizers.Adam object at 0x7fe86828bf28>) # but is being saved in TensorFlow format with `save_weights`. # The model's weights will be saved, but unlike with TensorFlow optimizers # in the TensorFlow format the optimizer's state will not be saved. # # Consider using a TensorFlow optimizer from `tf.train`. # # name_to_optimizer = { # 'adam': tf.train.AdamOptimizer, # } # optimizer = name_to_optimizer[self.optimizer](**self.optimizer_props) # print('tf.train[self.optimizer]', tf.train[self.optimizer]) # # but when we need to save not only weights tf.train doesn't work properly # # WARNING:tensorflow:TensorFlow optimizers do not make it possible to access optimizer attributes # or optimizer state after instantiation. As a result, we cannot save the optimizer as part of # the model save file.You will have to compile your model again after loading it. # Prefer using a Keras optimizer instead (see keras.io/optimizers). # we are getting instance of optimizer here optimizer = optimizers.get(self.optimizer) # so to create new with out setting we should use `from_config` optimizer = optimizer.from_config(self.optimizer_props) model.compile(optimizer=optimizer, loss=self.loss, metrics=[self.metrics]) model_name = encode_task_to_filename(self) # log model params to mlflow mlflow.log_param('model_name', model_name) # for the moment mlflow doesn't support nested params # so we need to flatten them params = get_params_of_task(self) mlflow.log_params(flatten(params)) with self.output()['params'].open('w') as f: yaml.dump(params, f, default_flow_style=False) mlflow.log_param('model.num_of_params', model.count_params()) if self.verbose > 0: model.summary() tf_log_dir = os.path.join(self.tf_log_dir, model_name) # remove previous log to prevent duplication # once I found way to resume training it could make sense to preserve it # TODO: sadly sometime it doesn't because TensorFlow Board could cache # logs and if you don't refresh tfb after logs were deleted you will get old logs shutil.rmtree(tf_log_dir, ignore_errors=True) # so temporal solution - add random tail at the end of path # it would force invalidate logs for tfb tf_log_dir = os.path.join(tf_log_dir, str(int(time.time()))) start = time.time() # create needed dirs to store model checkpoint output_model = self.output()['model'] output_model.makedirs() model_checkpoint_path = f'{output_model.path}_checkpoint' callbecks = [ EarlyStopping(monitor='val_loss', patience=2), # isn't clear where to store and how would it work with self.output()['model'] ModelCheckpoint( filepath=model_checkpoint_path, save_best_only=True, # FIXME: # the goal of that saving that we can continue train from this point # but it doesn't work properly because TF doesn't allow Keras optimizer # save_weights_only=True, save_weights_only=False, ), TensorBoard( log_dir=tf_log_dir, write_images=True, ), # TODO: how can I use it? # LearningRateScheduler # should be optional # ReduceLROnPlateau mlflow_logger ] if self.log_confusion_matrix: callbecks.append(LogConfusionMatrix(valid_x, valid_y, labels)) fit_args = dict(epochs=self.epoch, verbose=self.verbose, validation_data=(valid_x, valid_y), callbacks=callbecks) if train_generator: model.fit_generator(train_generator, **fit_args) else: model.fit(train_x, train_y, batch_size=self.batch_size, **fit_args) training_time = time.time() - start mlflow.log_metric('train_time.total', training_time) metrics = mlflow_logger.get_best_metrics() return model_checkpoint_path, metrics