def train(tensor, model, model_config, callbacks): # import keras if isinstance(model_config["loss"], list): loss = [losses.get(l) for l in model_config["loss"]] else: loss = losses.get(model_config["loss"]) optimizer = model_config["optimizer"] x = io_local.get_array(tensor, model_config["x"]) y = io_local.get_array(tensor, model_config["y"]) model.compile(optimizer=optimizer, loss=loss, metrics=['acc']) history = model.fit( x=x, y=y, epochs=constants.TRAIN_EPOCHS, batch_size=constants.TRAIN_BATCH_SIZE, validation_split=1 - constants.VAL_SPLIT, callbacks=callbacks, ) keras.backend.get_session().close() return (history)
def build(self, loss='mse', optimizer='gd') -> None: """ build() itterates through self.model, initializing all the layers' variables. Arguments: loss : lo.Base_Loss/str : The loss function this model will use to calculate how well the model is doing. optimizer : op.Base_Optimizer/str : The optimizer function this model will use to optimize the vars of each layer. """ self.loss = lo.get(loss) self.optimizer = op.get(optimizer) for layer in self.model: layer.build()
def run(trial_dir: str): """Run the experiment.""" tf.random.set_seed(FLAGS.seed) np.random.seed(FLAGS.seed) strategy = ub.strategy_utils.get_strategy(FLAGS.tpu, FLAGS.use_tpu) with strategy.scope(): # Setup CIFAR-10 tf.data.Dataset splits. dataset_builder = ub.datasets.Cifar10Dataset( batch_size=FLAGS.batch_size, eval_batch_size=FLAGS.eval_batch_size, validation_percent=0.1) # Use 5000 validation images. train_dataset = ub.utils.build_dataset(dataset_builder, strategy, 'train', as_tuple=True) val_dataset = ub.utils.build_dataset(dataset_builder, strategy, 'validation', as_tuple=True) test_dataset = ub.utils.build_dataset(dataset_builder, strategy, 'test', as_tuple=True) # Setup optimizer. _check_batch_replica_divisible(FLAGS.batch_size, strategy) _check_batch_replica_divisible(FLAGS.eval_batch_size, strategy) optimizer = ub.optimizers.get(optimizer_name=FLAGS.optimizer, learning_rate_schedule='constant', learning_rate=FLAGS.learning_rate, weight_decay=FLAGS.weight_decay) # Setup model. # TODO(shreyaspadhy): How does one get the number of classes in dataset model = models_lib.create_model(batch_size=FLAGS.batch_size, l2_weight=None, num_classes=10, distance_logits=FLAGS.distance_logits) loss_fn = loss_lib.get(loss_name=FLAGS.loss_name, from_logits=True, dm_alpha=FLAGS.dm_alpha, focal_gamma=FLAGS.focal_gamma) model.compile(optimizer=optimizer, loss=loss_fn, metrics=['sparse_categorical_accuracy']) # Train and eval. steps_per_epoch = (dataset_builder.info['num_train_examples'] // FLAGS.batch_size) validation_steps = (dataset_builder.info['num_validation_examples'] // FLAGS.eval_batch_size) history = model.fit(x=train_dataset, batch_size=FLAGS.batch_size, epochs=FLAGS.train_steps // steps_per_epoch, steps_per_epoch=steps_per_epoch, validation_data=val_dataset, validation_steps=validation_steps, validation_freq=FLAGS.eval_frequency, shuffle=False) logging.info(history) test_steps = (dataset_builder.info['num_test_examples'] // FLAGS.eval_batch_size) test_result = model.evaluate(x=test_dataset, batch_size=FLAGS.eval_batch_size, steps=test_steps) logging.info(test_result) # Save a checkpoint after training. if trial_dir: model.save_weights( os.path.join(trial_dir, 'model.ckpt-{}'.format(FLAGS.train_steps)))
def run(trial_dir: str, flag_string: Optional[str]): """Run the experiment. Args: trial_dir: String to the dir to write checkpoints to and read them from. flag_string: Optional string used to record what flags the job was run with. """ tf.random.set_seed(FLAGS.seed) np.random.seed(FLAGS.seed) if not FLAGS.eval_frequency: FLAGS.eval_frequency = FLAGS.log_frequency if FLAGS.eval_frequency % FLAGS.log_frequency != 0: raise ValueError( 'log_frequency ({}) must evenly divide eval_frequency ' '({}).'.format(FLAGS.log_frequency, FLAGS.eval_frequency)) strategy = ub.strategy_utils.get_strategy(FLAGS.tpu, FLAGS.use_tpu) with strategy.scope(): _maybe_setup_trial_dir(strategy, trial_dir, flag_string) # TODO(znado): pass all dataset and model kwargs. train_dataset_builder = ub.datasets.get( dataset_name=FLAGS.dataset_name, split='train', validation_percent=FLAGS.validation_percent, shuffle_buffer_size=FLAGS.shuffle_buffer_size) if FLAGS.validation_percent > 0: validation_dataset_builder = ub.datasets.get( dataset_name=FLAGS.dataset_name, split='validation', validation_percent=FLAGS.validation_percent) else: validation_dataset_builder = None test_dataset_builder = ub.datasets.get(dataset_name=FLAGS.dataset_name, split='test') model = models_lib.create_model(batch_size=FLAGS.batch_size, num_classes=10, distance_logits=FLAGS.distance_logits) loss_fn = loss_lib.get(FLAGS.loss_name, from_logits=True, dm_alpha=FLAGS.dm_alpha) if FLAGS.mode == 'eval': _check_batch_replica_divisible(FLAGS.eval_batch_size, strategy) eval_lib.run_eval_loop( validation_dataset_builder=validation_dataset_builder, test_dataset_builder=test_dataset_builder, batch_size=FLAGS.eval_batch_size, model=model, loss_fn=loss_fn, trial_dir=trial_dir, train_steps=FLAGS.train_steps, strategy=strategy, metric_names=['accuracy', 'loss'], checkpoint_step=FLAGS.checkpoint_step) return _check_batch_replica_divisible(FLAGS.batch_size, strategy) if FLAGS.mode == 'train_and_eval': _check_batch_replica_divisible(FLAGS.eval_batch_size, strategy) steps_per_epoch = train_dataset_builder.num_examples // FLAGS.batch_size optimizer_kwargs = { k[len('optimizer_hparams_'):]: FLAGS[k].value for k in FLAGS if k.startswith('optimizer_hparams_') } optimizer = ub.optimizers.get( optimizer_name=FLAGS.optimizer, learning_rate_schedule=FLAGS.learning_rate_schedule, learning_rate=FLAGS.learning_rate, weight_decay=FLAGS.weight_decay, steps_per_epoch=steps_per_epoch, **optimizer_kwargs) train_lib.run_train_loop( train_dataset_builder=train_dataset_builder, validation_dataset_builder=validation_dataset_builder, test_dataset_builder=test_dataset_builder, batch_size=FLAGS.batch_size, eval_batch_size=FLAGS.eval_batch_size, model=model, optimizer=optimizer, loss_fn=loss_fn, eval_frequency=FLAGS.eval_frequency, log_frequency=FLAGS.log_frequency, trial_dir=trial_dir, train_steps=FLAGS.train_steps, mode=FLAGS.mode, strategy=strategy, metric_names=['accuracy', 'loss'])