def test_get_config(self): opt = Lookahead("adam", sync_period=10, slow_step_size=0.4) opt = tf.keras.optimizers.deserialize( tf.keras.optimizers.serialize(opt)) config = opt.get_config() self.assertEqual(config["sync_period"], 10) self.assertEqual(config["slow_step_size"], 0.4)
def test_get_config(self): self.skipTest('Wait #33614 to be fixed') opt = Lookahead('adam', sync_period=10, slow_step_size=0.4) opt = tf.keras.optimizers.deserialize( tf.keras.optimizers.serialize(opt)) config = opt.get_config() self.assertEqual(config['sync_period'], 10) self.assertEqual(config['slow_step_size'], 0.4)
def test_fit_simple_linear_model_mixed_precision(): if test_utils.is_gpu_available() and LooseVersion( tf.__version__) <= "2.2.0": pytest.xfail( "See https://github.com/tensorflow/tensorflow/issues/39775") np.random.seed(0x2019) tf.random.set_seed(0x2019) x = np.random.standard_normal((10000, 3)) w = np.random.standard_normal((3, 1)) y = np.dot(x, w) + np.random.standard_normal((10000, 1)) * 1e-4 try: tf.keras.mixed_precision.experimental.set_policy("mixed_float16") model = tf.keras.models.Sequential() model.add(tf.keras.layers.Dense(input_shape=(3, ), units=1)) model.compile(Lookahead("sgd"), loss="mse") finally: tf.keras.mixed_precision.experimental.set_policy("float32") model.fit(x, y, epochs=3) x = np.random.standard_normal((100, 3)) y = np.dot(x, w) predicted = model.predict(x) max_abs_diff = np.max(np.abs(predicted - y)) assert max_abs_diff < 2.3e-3 assert max_abs_diff >= 1e-3
def build_ranger_optimizer(radam_options={}, lookahead_options={ "sync_period": 6, "slow_step_size": 0.5 }): radam = RectifiedAdam(**radam_options) ranger = Lookahead(radam, **lookahead_options) return ranger
def test_model_dynamic_lr(): grad = tf.Variable([[0.1]]) model = tf.keras.Sequential([ tf.keras.layers.Dense( 1, kernel_initializer=tf.keras.initializers.Constant([[1.0]]), use_bias=False, ) ]) model.build(input_shape=[1, 1]) opt = Lookahead("adam", sync_period=10, slow_step_size=0.4) _ = opt.apply_gradients(list(zip([grad], model.variables))) np.testing.assert_allclose(opt.lr.read_value(), 1e-3) opt.lr = 1e-4 np.testing.assert_allclose(opt.lr.read_value(), 1e-4)
def test_model_dynamic_lr(self): grad = tf.Variable([[0.1]]) model = tf.keras.Sequential([ tf.keras.layers.Dense( 1, kernel_initializer=tf.keras.initializers.Constant([[1.0]]), use_bias=False) ]) model.build(input_shape=[1, 1]) opt = Lookahead('adam', sync_period=10, slow_step_size=0.4) update = opt.apply_gradients(list(zip([grad], model.variables))) self.evaluate(tf.compat.v1.global_variables_initializer()) self.evaluate(update) self.assertAllClose(opt.lr.read_value(), 1e-3) opt.lr = 1e-4 self.assertAllClose(opt.lr.read_value(), 1e-4)
def test_dense_sample_with_lookahead(): # Expected values are obtained from the original implementation # of Ranger run_dense_sample( iterations=100, expected=[[0.993126, 1.992901], [2.993283, 3.993261]], optimizer=Lookahead( RectifiedAdam(lr=1e-3, beta_1=0.95,), sync_period=6, slow_step_size=0.45, ), )
def test_sparse_sample_with_lookahead(): # Expected values are obtained from the previous implementation # of Ranger. run_sparse_sample( iterations=150, expected=[[0.988156, 2.0], [3.0, 3.988291]], optimizer=Lookahead( RectifiedAdam(lr=1e-3, beta_1=0.95,), sync_period=6, slow_step_size=0.45, ), )
def test_sparse_exact_ratio(self): for k in [5, 10, 100]: for alpha in [0.3, 0.7]: optimizer = tf.keras.optimizers.get('adam') vals, quick_vars = self.run_sparse_sample(k, optimizer) optimizer = Lookahead( 'adam', sync_period=k, slow_step_size=alpha) _, slow_vars = self.run_sparse_sample(k, optimizer) for val, quick, slow in zip(vals, quick_vars, slow_vars): expected = val + (quick - val) * alpha self.assertAllClose(expected, slow)
def test_sparse_sample_with_lookahead(): # Expected values are obtained from the previous implementation # of Ranger. run_sparse_sample( iterations=150, expected=[[0.8114481, 2.0], [3.0, 3.8114486]], optimizer=Lookahead( AdaBelief(lr=1e-3, beta_1=0.95, rectify=False), sync_period=6, slow_step_size=0.45, ), )
def test_dense_sample_with_lookahead(): # Expected values are obtained from the original implementation # of Ranger run_dense_sample( iterations=100, expected=[[0.88910455, 1.889104], [2.8891046, 3.8891046]], optimizer=Lookahead( AdaBelief(lr=1e-3, beta_1=0.95, rectify=False), sync_period=6, slow_step_size=0.45, ), )
def test_sparse_exact_ratio(): for k in [5, 10, 100]: for alpha in [0.3, 0.7]: optimizer = tf.keras.optimizers.get("adam") vals, quick_vars = run_sparse_sample(k, optimizer) optimizer = Lookahead("adam", sync_period=k, slow_step_size=alpha) _, slow_vars = run_sparse_sample(k, optimizer) for val, quick, slow in zip(vals, quick_vars, slow_vars): expected = val + (quick - val) * alpha np.testing.assert_allclose( expected.numpy(), slow.numpy(), rtol=1e-06, atol=1e-06 )
def compile_model(self, optimizer='SGD', learning_rate=0.01, momentum=0.00, loss='binary_crossentropy', warmup_proportion=0.1, total_steps=10000, min_lr=1e-5, measurment_metric_name='accuracy'): # MMMM self.learning_rate = learning_rate self.momentum = momentum self.warmup_proportion = warmup_proportion self.total_steps = total_steps self.min_lr = min_lr self.measurment_metric_name = measurment_metric_name if type(optimizer) == str: # Assign Optimizers based on selection optimizerType = optimizer.upper() if optimizerType == 'SGD': optimizerClass = SGD(learning_rate=learning_rate, momentum=momentum) elif optimizerType == 'ADAM': optimizerClass = Adam(learning_rate=learning_rate) elif optimizerType == 'RMSPROP': optimizerClass = RMSprop(learning_rate=learning_rate) elif optimizerType == 'RADAM': optimizerClass = RectifiedAdam(learning_rate=learning_rate, warmup_proportion=warmup_proportion, decay=min_lr) # total_steps=total_steps,min_lr=min_lr) elif optimizerType == 'LOOKAHEAD': optimizerClass = RectifiedAdam(learning_rate=learning_rate, warmup_proportion=warmup_proportion, decay=min_lr) # total_steps=total_steps,min_lr=min_lr) optimizerClass = Lookahead(optimizerClass) else: print("Optimizer is NOT in string list.") else: optimizerClass = optimizer optimizerType = "ManualFunc" # Assign measure matrics based on selection: measurment_metric = '' if measurment_metric_name.lower() == 'accuracy': measurment_metric = Accuracy self.measurment_metric_name = 'accuracy' elif measurment_metric_name.lower() == 'categorical_accuracy': measurment_metric = [CategoricalAccuracy] self.measurment_metric_name = 'categorical_accuracy' # elif measurment_metric_name.lower() == 'iou': # measurment_metric = [iou] # self.measurment_metric_name = 'iou' # elif measurment_metric_name.lower() == 'iou_thresholded': # measurment_metric = [iou_thresholded] # self.measurment_metric_name = 'iou_thresholded' super().compile(optimizer=optimizerClass, loss=loss, metrics=self.measurment_metric_name) # super().compile(loss=loss,optimizer=optimizer,metrics=['accuracy']) self.optimizerType = optimizerType print("Model was complied. optimizer: %s, learning_rate: %s, momentum: %s" % ( self.optimizerType, self.learning_rate, self.momentum))
def test_dense_sample_with_lookahead(self): # Expected values are obtained from the original implementation # of Ranger self.run_dense_sample( iterations=1000, expected=[[0.7985, 1.7983], [2.7987, 3.7986]], optimizer=Lookahead( RectifiedAdam( lr=1e-3, beta_1=0.95, ), sync_period=6, slow_step_size=0.45, ), )
def test_sparse_sample_with_lookahead(self): # Expected values are obtained from the original implementation # of Ranger. # Dense results should be: [0.6417, 1.6415], [2.6419, 3.6418] self.run_sparse_sample( iterations=1500, expected=[[0.6417, 2.0], [3.0, 3.6418]], optimizer=Lookahead( RectifiedAdam( lr=1e-3, beta_1=0.95, ), sync_period=6, slow_step_size=0.45, ), )
def test_fit_simple_linear_model_mixed_precision(): np.random.seed(0x2019) tf.random.set_seed(0x2019) x = np.random.standard_normal((10000, 3)) w = np.random.standard_normal((3, 1)) y = np.dot(x, w) + np.random.standard_normal((10000, 1)) * 1e-4 model = tf.keras.models.Sequential() model.add(tf.keras.layers.Dense(input_shape=(3, ), units=1)) model.compile(Lookahead("sgd"), loss="mse") model.fit(x, y, epochs=3) x = np.random.standard_normal((100, 3)) y = np.dot(x, w) predicted = model.predict(x) max_abs_diff = np.max(np.abs(predicted - y)) assert max_abs_diff < 2.3e-3
def test_fit_simple_linear_model(self): np.random.seed(0x2019) x = np.random.standard_normal((100000, 3)) w = np.random.standard_normal((3, 1)) y = np.dot(x, w) + np.random.standard_normal((100000, 1)) * 1e-4 model = tf.keras.models.Sequential() model.add(tf.keras.layers.Dense(input_shape=(3,), units=1)) model.compile(Lookahead('adam'), loss='mse') model.fit(x, y, epochs=3) x = np.random.standard_normal((100, 3)) y = np.dot(x, w) predicted = model.predict(x) max_abs_diff = np.max(np.abs(predicted - y)) self.assertLess(max_abs_diff, 1e-4)
def get_optimizer(optimizer_param: dict): optimizer_name = optimizer_param['name'].lower() lr = optimizer_param['lr'] kwargs = {} if optimizer_param['clipnorm'] != 0: kwargs['clipnorm'] = optimizer_param['clipnorm'] if optimizer_param['clipvalue'] != 0: kwargs['clipvalue'] = optimizer_param['clipvalue'] optimizer_dict = { 'adadelta': Adadelta(lr, **kwargs), 'adagrad': Adagrad(lr, **kwargs), 'adam': Adam(lr, **kwargs), 'adam_amsgrad': Adam(lr, amsgrad=True, **kwargs), 'sgd': SGD(lr, **kwargs), 'sgd_momentum': SGD(lr, momentum=optimizer_param['momentum'], **kwargs), 'sgd_nesterov': SGD(lr, momentum=optimizer_param['momentum'], nesterov=True, **kwargs), 'nadam': Nadam(lr, **kwargs), 'rmsprop': RMSprop(lr, **kwargs), 'radam': RectifiedAdam(lr, **kwargs), } optimizer = optimizer_dict[optimizer_name] if optimizer_param['lookahead']: optimizer = Lookahead(optimizer=optimizer, sync_period=optimizer_param['sync_period']) return optimizer
def wrap(opt): return MovingAverage(Lookahead(opt))
def test_get_config(): opt = Lookahead("adam", sync_period=10, slow_step_size=0.4) opt = tf.keras.optimizers.deserialize(tf.keras.optimizers.serialize(opt)) config = opt.get_config() assert config["sync_period"] == 10 assert config["slow_step_size"] == 0.4
def fit(self, X, y, **kwargs): """ Fit the seq2seq model to convert sequences one to another. Each sequence is unicode text composed from the tokens. Tokens are separated by spaces. The Rectified Adam with Lookahead algorithm is used for training. To avoid overfitting, you must use an early stopping criterion. This criterion is included automatically if evaluation set is defined. You can do this in one of two ways: 1) set a `validation_split` parameter of this object, and in this case evaluation set will be selected as a corresponded part of training set proportionally to the `validation_split` value; 2) set an `eval_set` argument of this method, and then evaluation set is defined entirely by this argument. :param X: input texts for training. :param y: target texts for training. :param eval_set: optional argument containing input and target texts for evaluation during an early-stopping. :return self """ self.check_params(**self.get_params(deep=False)) self.check_X(X, 'X') self.check_X(y, 'y') if len(X) != len(y): raise ValueError(f'`X` does not correspond to `y`! {len(X)} != {len(y)}.') if 'eval_set' in kwargs: if (not isinstance(kwargs['eval_set'], tuple)) and (not isinstance(kwargs['eval_set'], list)): raise ValueError(f'`eval_set` must be `{type((1, 2))}` or `{type([1, 2])}`, not `{type(kwargs["eval_set"])}`!') if len(kwargs['eval_set']) != 2: raise ValueError(f'`eval_set` must be a two-element sequence! {len(kwargs["eval_set"])} != 2') self.check_X(kwargs['eval_set'][0], 'X_eval_set') self.check_X(kwargs['eval_set'][1], 'y_eval_set') if len(kwargs['eval_set'][0]) != len(kwargs['eval_set'][1]): raise ValueError(f'`X_eval_set` does not correspond to `y_eval_set`! ' f'{len(kwargs["eval_set"][0])} != {len(kwargs["eval_set"][1])}.') X_eval_set = kwargs['eval_set'][0] y_eval_set = kwargs['eval_set'][1] else: if self.validation_split is None: X_eval_set = None y_eval_set = None else: n_eval_set = int(round(len(X) * self.validation_split)) if n_eval_set < 1: raise ValueError('`validation_split` is too small! There are no samples for evaluation!') if n_eval_set >= len(X): raise ValueError('`validation_split` is too large! There are no samples for training!') X_eval_set = X[-n_eval_set:-1] y_eval_set = y[-n_eval_set:-1] X = X[:-n_eval_set] y = y[:-n_eval_set] input_characters = set() target_characters = set() max_encoder_seq_length = 0 max_decoder_seq_length = 0 for sample_ind in range(len(X)): prep = self.tokenize_text(X[sample_ind], self.lowercase) n = len(prep) if n == 0: raise ValueError(f'Sample {sample_ind} of `X` is wrong! This sample is empty.') if n > max_encoder_seq_length: max_encoder_seq_length = n input_characters |= set(prep) prep = self.tokenize_text(y[sample_ind], self.lowercase) n = len(prep) if n == 0: raise ValueError(f'Sample {sample_ind} of `y` is wrong! This sample is empty.') if (n + 2) > max_decoder_seq_length: max_decoder_seq_length = n + 2 target_characters |= set(prep) if len(input_characters) == 0: raise ValueError('`X` is empty!') if len(target_characters) == 0: raise ValueError('`y` is empty!') input_characters_ = set() target_characters_ = set() if (X_eval_set is not None) and (y_eval_set is not None): for sample_ind in range(len(X_eval_set)): prep = self.tokenize_text(X_eval_set[sample_ind], self.lowercase) n = len(prep) if n == 0: raise ValueError(f'Sample {sample_ind} of `X_eval_set` is wrong! This sample is empty.') if n > max_encoder_seq_length: max_encoder_seq_length = n input_characters_ |= set(prep) prep = self.tokenize_text(y_eval_set[sample_ind], self.lowercase) n = len(prep) if n == 0: raise ValueError(f'Sample {sample_ind} of `y_eval_set` is wrong! This sample is empty.') if (n + 2) > max_decoder_seq_length: max_decoder_seq_length = n + 2 target_characters_ |= set(prep) if len(input_characters_) == 0: raise ValueError('`X_eval_set` is empty!') if len(target_characters_) == 0: raise ValueError('`y_eval_set` is empty!') input_characters = sorted(list(input_characters | input_characters_)) target_characters = sorted(list(target_characters | target_characters_ | {'\t', '\n'})) if self.verbose: print('') print(f'Number of samples for training: {len(X)}.') if X_eval_set is not None: print(f'Number of samples for evaluation and early stopping: {len(X_eval_set)}.') print(f'Number of unique input tokens: {len(input_characters)}.') print(f'Number of unique output tokens: {len(target_characters)}.') print(f'Max sequence length for inputs: {max_encoder_seq_length}.') print(f'Max sequence length for outputs: {max_decoder_seq_length}.') print('') self.input_token_index_ = dict([(char, i) for i, char in enumerate(input_characters)]) self.target_token_index_ = dict([(char, i) for i, char in enumerate(target_characters)]) self.max_encoder_seq_length_ = max_encoder_seq_length self.max_decoder_seq_length_ = max_decoder_seq_length K.clear_session() encoder_inputs = Input(shape=(None, len(self.input_token_index_)), name='EncoderInputs') encoder_mask = Masking(name='EncoderMask', mask_value=0.0)(encoder_inputs) encoder = LSTM( self.latent_dim, return_sequences=False, return_state=True, kernel_initializer=GlorotUniform(seed=self.generate_random_seed()), recurrent_initializer=Orthogonal(seed=self.generate_random_seed()), name='EncoderLSTM' ) encoder_outputs, state_h, state_c = encoder(encoder_mask) encoder_states = [state_h, state_c] decoder_inputs = Input(shape=(None, len(self.target_token_index_)), name='DecoderInputs') decoder_mask = Masking(name='DecoderMask', mask_value=0.0)(decoder_inputs) decoder_lstm = LSTM( self.latent_dim, return_sequences=True, return_state=True, kernel_initializer=GlorotUniform(seed=self.generate_random_seed()), recurrent_initializer=Orthogonal(seed=self.generate_random_seed()), name='DecoderLSTM' ) decoder_outputs, _, _ = decoder_lstm(decoder_mask, initial_state=encoder_states) decoder_dense = Dense( len(self.target_token_index_), activation='softmax', kernel_initializer=GlorotUniform(seed=self.generate_random_seed()), name='DecoderOutput' ) decoder_outputs = decoder_dense(decoder_outputs) model = Model([encoder_inputs, decoder_inputs], decoder_outputs, name='Seq2SeqModel') radam = RectifiedAdam(learning_rate=self.lr, weight_decay=self.weight_decay) optimizer = Lookahead(radam, sync_period=6, slow_step_size=0.5) model.compile(optimizer=optimizer, loss='categorical_crossentropy') if self.verbose: model.summary(positions=[0.23, 0.77, 0.85, 1.0]) print('') training_set_generator = TextPairSequence( input_texts=X, target_texts=y, batch_size=self.batch_size, max_encoder_seq_length=max_encoder_seq_length, max_decoder_seq_length=max_decoder_seq_length, input_token_index=self.input_token_index_, target_token_index=self.target_token_index_, lowercase=self.lowercase ) if (X_eval_set is not None) and (y_eval_set is not None): evaluation_set_generator = TextPairSequence( input_texts=X_eval_set, target_texts=y_eval_set, batch_size=self.batch_size, max_encoder_seq_length=max_encoder_seq_length, max_decoder_seq_length=max_decoder_seq_length, input_token_index=self.input_token_index_, target_token_index=self.target_token_index_, lowercase=self.lowercase ) callbacks = [ EarlyStopping(patience=5, verbose=(1 if self.verbose else 0), monitor='val_loss') ] else: evaluation_set_generator = None callbacks = [] tmp_weights_name = self.get_temp_name() try: callbacks.append( ModelCheckpoint(filepath=tmp_weights_name, verbose=(1 if self.verbose else 0), save_best_only=True, save_weights_only=True, monitor='loss' if evaluation_set_generator is None else 'val_loss') ) model.fit_generator( generator=training_set_generator, epochs=self.epochs, verbose=(1 if self.verbose else 0), shuffle=True, validation_data=evaluation_set_generator, callbacks=callbacks ) if os.path.isfile(tmp_weights_name): model.load_weights(tmp_weights_name) finally: if os.path.isfile(tmp_weights_name): os.remove(tmp_weights_name) self.encoder_model_ = Model(encoder_inputs, encoder_states) decoder_state_input_h = Input(shape=(self.latent_dim,)) decoder_state_input_c = Input(shape=(self.latent_dim,)) decoder_states_inputs = [decoder_state_input_h, decoder_state_input_c] decoder_outputs, state_h, state_c = decoder_lstm( decoder_mask, initial_state=decoder_states_inputs) decoder_states = [state_h, state_c] decoder_outputs = decoder_dense(decoder_outputs) self.decoder_model_ = Model( [decoder_inputs] + decoder_states_inputs, [decoder_outputs] + decoder_states) self.reverse_target_char_index_ = dict( (i, char) for char, i in self.target_token_index_.items()) return self
def build_optimizer(): return Lookahead(RectifiedAdam(1e-3), sync_period=6, slow_step_size=0.5)
def __init__(self, data=(), nb_class=16, opts='adam', lossfunc='sparse_categorical_crossentropy', model='dnn_1', batch_size=64, val_data=(), learning_rate=0.001, savepath=None, callback_type=None, epoch=100, restore=False, logger=None): self.model = model self.x_train, self.y_train = data self.nb_class = nb_class self.opts = opts self.lossfunc = lossfunc self.batch_size = batch_size self.x_val, self.y_val = val_data self.learning_rate = learning_rate self.savepath = savepath self.callback_type = callback_type self.epoch = epoch self.restore = restore input_shape = self.x_train.shape[1] nb_class = self.nb_class # model choose if self.model == 'dnn_1': model = dnn_1(input_shape, nb_class) elif self.model == 'bilstm': model = bilstm(input_shape, nb_class) elif self.model == 'advanced_dnn': model = sample_dnn(input_shape, nb_class) elif self.model == 'rwn': model = RWN(self, input_shape) elif self.model == 'semi_gan': model = SemiGan(self, input_shape) else: pass # loss funtion choose if self.lossfunc == 'focal_loss': lossfunc = focal_loss(alpha=1) elif self.lossfunc == 'binary_crossentropy': lossfunc = 'binary_crossentropy' elif self.lossfunc == 'hinge': lossfunc = 'hinge' elif self.lossfunc == 'squared_hinge': lossfunc = 'squared_hinge' elif self.lossfunc == 'categorical_crossentropy': lossfunc = 'categorical_crossentropy' elif self.lossfunc == 'sparse_categorical_crossentropy': lossfunc = 'sparse_categorical_crossentropy' elif self.lossfunc == 'kulback_leibler_divergence': lossfunc = 'kulback_leibler_divergence' else: pass # optimizer choose if self.opts == 'adam': opts = Adam(lr=self.learning_rate, decay=1e-6) elif self.opts == 'rmsprop': opts = RMSprop(lr=self.learning_rate, epsilon=1.0) elif self.opts == 'adagrad': opts = Adam(lr=self.learning_rate, decay=1e-6, adagrad=True) elif self.opts == 'sgd': opts = SGD(lr=self.learning_rate, momentum=0.9, decay=1e-6, nestrov=True) elif self.opts == 'amsgrad': opts = Adam(lr=self.learning_rate, decay=1e-6, amsgrad=True) elif self.opts == 'adagrad': opts = Adagrad(lr=self.learning_rate, decay=1e-6) elif self.opts == 'nadam': opts = Nadam(lr=self.learning_rate, beta_1=0.9, beta_2=0.999, epsilon=None, schedule_decay=0.004) elif self.opts == 'adadelta': opts = Adadelta(lr=1.0, rho=0.95, epsilon=None, decay=0.0) elif self.opts == 'radam': opts = RectifiedAdam(lr=1e-3, total_steps=self.epoch, warmup_proportion=0.1, min_lr=1e-6) elif self.opts == 'lookahead': radam = RectifiedAdam() opts = Lookahead(radam, sync_period=6, slow_step_size=0.5) elif self.opts == 'lazyadam': opts = LazyAdam(lr=self.learning_rate) elif self.opts == 'conditionalgradient': opts = ConditionalGradient(lr=0.99949, lambda_=203) else: pass print("loss func is {}".format(lossfunc)) auc = tf.keras.metrics.AUC() recall = tf.keras.metrics.Recall() precision = tf.keras.metrics.Precision() model.compile(optimizer=opts, loss=lossfunc, metrics=['acc', auc, recall, precision]) # x_train = self.x_train # x_val = self.x_val # y_train = self.y_train # y_val = self.y_val x_train = np.asarray(self.x_train) x_val = np.asarray(self.x_val) y_train = np.asarray(self.y_train) y_val = np.asarray(self.y_val) MODEL_SAVE_FOLDER_PATH = os.path.join(self.savepath, self.model) if not os.path.exists(MODEL_SAVE_FOLDER_PATH): os.mkdir(MODEL_SAVE_FOLDER_PATH) def lrdropping(self): initial_lrate = self.learning_rate drop = 0.9 epochs_drop = 3.0 lrate = initial_lrate * math.pow( drop, math.floor((1 + epoch) / epochs_drop)) return lrate callbacks = [] if 'checkpoint' in self.callback_type: checkpoint = ModelCheckpoint(os.path.join( MODEL_SAVE_FOLDER_PATH, 'checkpoint-{epoch:02d}.h5'), monitor='val_auc', save_best_only=False, mode='max') callbacks.append(checkpoint) else: pass if 'elarystopping' in self.callback_type: earlystopping = EarlyStopping(monitor='val_auc', patience=5, verbose=1, mode='max') callbacks.append(earlystopping) else: pass if 'tensorboard' in self.callback_type: logger.info("tensorboard path : {}".format(MODEL_SAVE_FOLDER_PATH)) tensorboard = TensorBoard(log_dir=os.path.join( "tensorboard", MODEL_SAVE_FOLDER_PATH), histogram_freq=0, write_graph=True, write_images=True) tensorboard.set_model(model) train_summary_writer = tf.summary.create_file_writer( os.path.join("tensorboard", MODEL_SAVE_FOLDER_PATH)) callbacks.append(tensorboard) else: pass if 'rateschedule' in self.callback_type: lrd = LearningRateScheduler(lrdropping) callbacks.append(lrd) else: pass if 'interval_check' in self.callback_type: inter = IntervalEvaluation( validation_data=(x_val, y_val), interval=1, savedir=os.path.join(self.savepath, self.model), file='Evaluation_{}.csv'.format(self.model), logger=logger) callbacks.append(inter) else: pass if self.opts == 'conditionalgradient': def frobenius_norm(m): total_reduce_sum = 0 for i in range(len(m)): total_reduce_sum = total_reduce_sum + tf.math.reduce_sum( m[i]**2) norm = total_reduce_sum**0.5 return norm CG_frobenius_norm_of_weight = [] CG_get_weight_norm = LambdaCallback( on_epoch_end=lambda batch, logs: CG_frobenius_norm_of_weight. append(frobenius_norm(model.trainable_weights).np())) cgnorm = CG_frobenius_norm_of_weight callbacks.append(cgnorm) else: pass model_json = model.to_json() with open(os.path.join(MODEL_SAVE_FOLDER_PATH, 'model.json'), 'w') as json_file: json_file.write(model_json) if self.restore: chkp = last_checkpoint(MODEL_SAVE_FOLDER_PATH) model.load_weight(chkp) init_epoch = int(os.path.basename(chkp).split('-')[1]) print("================== restore checkpoint ==================") else: init_epoch = 0 print("================== restore failed ==================") cw = class_weight.compute_class_weight(class_weight='balanced', classes=np.unique(y_train), y=y_train) logger.info("callback is {}".format(callbacks)) hist = model.fit(x=x_train, y=y_train, epochs=self.epoch, verbose=1, validation_data=(x_val, y_val), shuffle=True, callbacks=callbacks, class_weight=cw)
def create_model(X_train, y_train, X_valid, y_valid, X_test, y_test): """ Model providing function: Create Keras model with double curly brackets dropped-in as needed. Return value has to be a valid python dictionary with two customary keys: - loss: Specify a numeric evaluation metric to be minimized - status: Just use STATUS_OK and see hyperopt documentation if not feasible The last one is optional, though recommended, namely: - model: specify the model just created so that we can later use it again. """ dp = {{uniform(0, 0.5)}} N_CLASSES = y_train.shape[1] nb_features = X_train.shape[1] sequence_input = tf.keras.layers.Input(shape=nb_features, dtype='float32') dense2 = tf.keras.layers.Dense(64, activation='relu')( sequence_input) # Does using 2*32 layers make sense ? drop2 = tf.keras.layers.Dropout(dp)(dense2) dense3 = tf.keras.layers.Dense(32, activation='relu')(drop2) drop3 = tf.keras.layers.Dropout(dp)(dense3) dense4 = tf.keras.layers.Dense(16, activation='relu')(drop3) drop4 = tf.keras.layers.Dropout(dp)(dense4) predictions = tf.keras.layers.Dense(N_CLASSES, activation='softmax')(drop4) model = tf.keras.Model(sequence_input, predictions) #================================================== # Specifying the optimizer #================================================== es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=15) check = ModelCheckpoint(filepath='w_cbf_hyperopt.hdf5',\ verbose = 1, save_best_only=True) optim_ch = {{choice(['adam', 'ranger'])}} lr = {{uniform(1e-3, 1e-2)}} if optim_ch == 'adam': optim = tf.keras.optimizers.Adam(lr=lr) else: sync_period = {{choice([2, 6, 10])}} slow_step_size = {{normal(0.5, 0.1)}} rad = RectifiedAdam(lr=lr) optim = Lookahead(rad, sync_period=sync_period, slow_step_size=slow_step_size) batch_size = {{choice([64 * 4, 64 * 8])}} STEP_SIZE_TRAIN = (len(X_train) // batch_size) + 1 STEP_SIZE_VALID = 1 beta = {{choice([0.9, 0.99, 0.999, 0.9999, 0.99993])}} gamma = {{uniform(1, 2.2)}} print('gamma value is :', gamma) sample_per_class = np.sum(y_train, axis=0) model.compile(loss=[CB_loss(sample_per_class, beta=beta, gamma=gamma)], metrics=['accuracy'], optimizer=optim) result = model.fit(X_train, y_train, validation_data=(X_valid, y_valid), \ steps_per_epoch = STEP_SIZE_TRAIN, validation_steps = STEP_SIZE_VALID,\ epochs = 60, shuffle=True, verbose=2, callbacks = [check, es]) #Get the highest validation accuracy of the training epochs loss_acc = np.amin(result.history['val_loss']) print('Min loss of epoch:', loss_acc) model.load_weights('w_cbf_hyperopt.hdf5') return {'loss': loss_acc, 'status': STATUS_OK, 'model': model}
def test_serialization(): optimizer = Lookahead("adam", sync_period=10, slow_step_size=0.4) config = tf.keras.optimizers.serialize(optimizer) new_optimizer = tf.keras.optimizers.deserialize(config) assert new_optimizer.get_config() == optimizer.get_config()
########################################################################## # On the fly prediction ########################################################################## os.chdir('C:/Users/rfuchs/Documents/GitHub/phyto_curves_reco') from pred_functions import pred_n_count import tensorflow as tf from tensorflow_addons.optimizers import Lookahead, RectifiedAdam from time import time from copy import deepcopy from losses import categorical_focal_loss # Model and nomenclature model = tf.keras.models.load_model('trained_models/hyperopt_model_focal2', compile = False) model.compile(optimizer=Lookahead(RectifiedAdam(lr = 0.003589101299926042), sync_period = 10, slow_step_size = 0.20736365316666247), loss = categorical_focal_loss(gamma = 2.199584705628343, alpha = 0.25)) tn = pd.read_csv('train_test_nomenclature.csv') tn.columns = ['Particle_class', 'label'] phyto_ts = pd.DataFrame(columns = ['picoeucaryote', 'synechococcus', 'nanoeucaryote', 'cryptophyte', \ 'unassigned particle', 'airbubble', 'microphytoplancton', 'prochlorococcus', 'date']) phyto_ts_proba = deepcopy(phyto_ts) # Extracted from X_test thrs = [0.8158158158158159, 0.7297297297297297, 0.5085085085085085, 0.3963963963963964, 0.8378378378378378, \
def create_model(X_train, y_train, X_valid, y_valid, X_test, y_test): """ Model providing function: Create Keras model with double curly brackets dropped-in as needed. Return value has to be a valid python dictionary with two customary keys: - loss: Specify a numeric evaluation metric to be minimized - status: Just use STATUS_OK and see hyperopt documentation if not feasible The last one is optional, though recommended, namely: - model: specify the model just created so that we can later use it again. """ dp = {{uniform(0, 0.5)}} N_CLASSES = y_train.shape[1] max_len = X_train.shape[1] nb_curves = X_train.shape[2] sequence_input = tf.keras.layers.Input(shape=(max_len, nb_curves), dtype='float32') # A 1D convolution with 128 output channels: Extract features from the curves x = tf.keras.layers.Conv1D(64, 5, activation='relu')(sequence_input) x = tf.keras.layers.Conv1D(32, 5, activation='relu')(x) x = tf.keras.layers.Conv1D(16, 5, activation='relu')(x) # Average those features average = tf.keras.layers.GlobalAveragePooling1D()(x) dense2 = tf.keras.layers.Dense(32, activation='relu')( average) # Does using 2*32 layers make sense ? drop2 = tf.keras.layers.Dropout(dp)(dense2) dense3 = tf.keras.layers.Dense(32, activation='relu')(drop2) drop3 = tf.keras.layers.Dropout(dp)(dense3) dense4 = tf.keras.layers.Dense(16, activation='relu')(drop3) drop4 = tf.keras.layers.Dropout(dp)(dense4) predictions = tf.keras.layers.Dense(N_CLASSES, activation='softmax')(drop4) model = tf.keras.Model(sequence_input, predictions) #================================================== # Specifying the optimizer #================================================== es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=15) check = ModelCheckpoint(filepath='w_categ_hyperopt.hdf5',\ verbose = 1, save_best_only=True) optim_ch = {{choice(['adam', 'ranger'])}} lr = {{uniform(1e-3, 1e-2)}} if optim_ch == 'adam': optim = tf.keras.optimizers.Adam(lr=lr) else: sync_period = {{choice([2, 6, 10])}} slow_step_size = {{normal(0.5, 0.1)}} rad = RectifiedAdam(lr=lr) optim = Lookahead(rad, sync_period=sync_period, slow_step_size=slow_step_size) # Defining the weights: Take the average over SSLAMM data weights = {{choice(['regular', 'sqrt'])}} if weights == 'regular': w = 1 / np.sum(y_train, axis=0) w = w / w.sum() else: w = 1 / np.sqrt(np.sum(y_train, axis=0)) w = w / w.sum() w = dict(zip(range(N_CLASSES), w)) batch_size = {{choice([64 * 4, 64 * 8])}} STEP_SIZE_TRAIN = (len(X_train) // batch_size) + 1 STEP_SIZE_VALID = 1 model.compile(loss='categorical_crossentropy', metrics=['accuracy'], optimizer=optim) result = model.fit(X_train, y_train, validation_data=(X_valid, y_valid), \ steps_per_epoch = STEP_SIZE_TRAIN, validation_steps = STEP_SIZE_VALID,\ epochs = 60, class_weight = w, shuffle=True, verbose=2, callbacks = [check, es]) #Get the highest validation accuracy of the training epochs loss_acc = np.amin(result.history['val_loss']) print('Min loss of epoch:', loss_acc) model.load_weights('w_categ_hyperopt.hdf5') return {'loss': loss_acc, 'status': STATUS_OK, 'model': model}