def __init__(self, context: det.TrialContext) -> None: self.context = context self.data_config = context.get_data_config() self.hparams = context.get_hparams() self.criterion = torch.nn.functional.cross_entropy # The last epoch is only used for logging. self._last_epoch = -1
def __init__(self, context: det.TrialContext) -> None: self.context = context self.data_config = context.get_data_config() self.criterion = CrossEntropyLabelSmooth( context.get_hparam("num_classes"), # num classes context.get_hparam("label_smoothing_rate"), ) self.last_epoch_idx = -1
def from_trial( trial_inst: det.Trial, context: det.TrialContext, env: det.EnvContext, workloads: workload.Stream, load_path: Optional[pathlib.Path], rendezvous_info: det.RendezvousInfo, hvd_config: horovod.HorovodContext, ) -> det.TrialController: check.is_instance( context, keras.TFKerasTrialContext, "TFKerasTrialController needs a TFKerasTrialContext") context = cast(keras.TFKerasTrialContext, context) check.is_instance(trial_inst, TFKerasTrial, "TFKerasTrialController needs a TFKerasTrial") trial = cast(TFKerasTrial, trial_inst) session = TFKerasTrialController._configure_session( env, hvd_config, trial.session_config()) training_data = keras._adapt_data_from_data_loader( input_data=trial.build_training_data_loader(), batch_size=context.get_per_slot_batch_size(), ) validation_data = keras._adapt_data_from_data_loader( input_data=trial.build_validation_data_loader(), batch_size=context.get_per_slot_batch_size(), ) trial.build_model() check.is_not_none(context.model, "Please call wrap_model(...).") check.is_not_none(context.compile_args, "Please call model.compile(...).") compile_args = cast(inspect.BoundArguments, context.compile_args) TFKerasTrialController.compile_model(context=context, compile_args=compile_args, env=env, hvd_config=hvd_config) tf_keras_callbacks = trial.keras_callbacks() return TFKerasTrialController( context.model, session, keras.TFKerasTrainConfig(training_data, validation_data, tf_keras_callbacks), context, env, workloads, load_path, rendezvous_info, hvd_config, )
def __init__(self, context: det.TrialContext) -> None: self.context = context self.data_config = context.get_data_config() self.hparams = context.get_hparams() self.criterion = torch.nn.functional.cross_entropy # The last epoch is only used for logging. self._last_epoch = -1 self.results = { "loss": float("inf"), "top1_accuracy": 0, "top5_accuracy": 0 }
def from_trial( cls: Type["TFKerasTrialController"], trial_inst: det.Trial, context: det.TrialContext, env: det.EnvContext, workloads: Optional[workload.Stream] = None, ) -> det.TrialController: check.is_instance( context, keras.TFKerasTrialContext, "TFKerasTrialController needs a TFKerasTrialContext" ) context = cast(keras.TFKerasTrialContext, context) check.is_instance(trial_inst, TFKerasTrial, "TFKerasTrialController needs a TFKerasTrial") trial = cast(TFKerasTrial, trial_inst) # Keras only supports horovod backend for distributed training session = cls._configure_session( env, trial.session_config(), use_horovod=context.distributed.size > 1 ) training_data = keras._adapt_data_from_data_loader( input_data=trial.build_training_data_loader(), batch_size=context.get_per_slot_batch_size(), ) validation_data = keras._adapt_data_from_data_loader( input_data=trial.build_validation_data_loader(), batch_size=context.get_per_slot_batch_size(), ) trial.build_model() check.is_not_none(context.model, "Please call wrap_model(...).") check.is_not_none(context.compile_args, "Please call model.compile(...).") compile_args = cast(inspect.BoundArguments, context.compile_args) cls.compile_model(context=context, compile_args=compile_args, env=env) tf_keras_callbacks = trial.keras_callbacks() return cls( context.model, session, keras.TFKerasTrainConfig(training_data, validation_data, tf_keras_callbacks), trial, context, env, workloads, )
def __init__(self, context: det.TrialContext) -> None: self.context = context self.data_config = context.get_data_config() self.hparams = AttrDict(context.get_hparams()) # Create a unique download directory for each rank so they don't overwrite each other. self.download_directory = self.data_config["data_download_dir"] data.download_data(self.download_directory) corpus = data_util.Corpus(self.download_directory) self.corpus = corpus self.ntokens = len(corpus.dictionary) self.hidden = None # This is used to store eval history and will switch to ASGD # once validation perplexity stops improving. self._last_loss = None self._eval_history = [] self._last_epoch = -1
def __init__(self, context: det.TrialContext) -> None: self.context = context self.hparams = context.get_hparams() self.data_config = context.get_data_config() self.cfg = Config.fromfile(self.hparams["config_file"]) self.cfg.data.train.ann_file = self.data_config["train_ann_file"] self.cfg.data.val.ann_file = self.data_config["val_ann_file"] self.cfg.data.val.test_mode = True self.cfg.data.workers_per_gpu = self.data_config["workers_per_gpu"] if self.data_config["backend"] in ["gcs", "fake"]: sub_backend(self.data_config["backend"], self.cfg) print(self.cfg) self.model = self.context.wrap_model( build_detector(self.cfg.model, train_cfg=self.cfg.train_cfg, test_cfg=self.cfg.test_cfg)) self.optimizer = self.context.wrap_optimizer( build_optimizer(self.model, self.cfg.optimizer)) scheduler_cls = WarmupWrapper(MultiStepLR) scheduler = scheduler_cls( self.hparams["warmup"], # warmup schedule self.hparams["warmup_iters"], # warmup_iters self.hparams["warmup_ratio"], # warmup_ratio self.optimizer, [self.hparams["step1"], self.hparams["step2"]], # milestones self.hparams["gamma"], # gamma ) self.scheduler = self.context.wrap_lr_scheduler( scheduler, step_mode=LRScheduler.StepMode.MANUAL_STEP) self.clip_grads_fn = (lambda x: torch.nn.utils.clip_grad_norm_( x, self.hparams["clip_grads_norm"]) if self.hparams["clip_grads"] else None)
def __init__(self, context: det.TrialContext) -> None: super().__init__() # Set hyperparameters that influence the model architecture. self.n_filters1 = context.get_hparam("n_filters1") self.n_filters2 = context.get_hparam("n_filters2") self.dropout = context.get_hparam("dropout") # Define the central model. self.model = nn.Sequential( nn.Conv2d(1, self.n_filters1, kernel_size=5), nn.MaxPool2d(2), nn.ReLU(), nn.Conv2d(self.n_filters1, self.n_filters2, kernel_size=5), nn.MaxPool2d(2), nn.ReLU(), Flatten(), nn.Linear(16 * self.n_filters2, 50), nn.ReLU(), nn.Dropout2d(self.dropout), ) # type: nn.Sequential # Predict digit labels from self.model. self.digit = nn.Sequential(nn.Linear(50, 10), nn.Softmax(dim=0)) # Predict binary labels from self.model. self.binary = nn.Sequential(nn.Linear(50, 1), nn.Sigmoid(), Squeeze())
def __init__(self, context: det.TrialContext) -> None: self.context = context self.data_config = context.get_data_config() self.num_classes = { "train": context.get_hparam("num_classes_train"), "val": context.get_hparam("num_classes_val"), } self.num_support = { "train": context.get_hparam("num_support_train"), "val": context.get_hparam("num_support_val"), } self.num_query = { "train": context.get_hparam("num_query_train"), "val": None, # Use all available examples for val at meta-test time } self.get_train_valid_splits()
def from_trial( trial_inst: det.Trial, context: det.TrialContext, env: det.EnvContext, workloads: workload.Stream, load_path: Optional[pathlib.Path], rendezvous_info: det.RendezvousInfo, hvd_config: horovod.HorovodContext, ) -> det.TrialController: check.is_instance( context, keras.TFKerasTrialContext, "TFKerasTrialController needs a TFKerasTrialContext", ) context = cast(keras.TFKerasTrialContext, context) check.is_instance(trial_inst, TFKerasTrial, "TFKerasTrialController needs a TFKerasTrial") trial = cast(TFKerasTrial, trial_inst) session = TFKerasTrialController._configure_session( env, hvd_config, trial.session_config()) training_x, training_y, training_sample_weight = keras._get_x_y_and_sample_weight( input_data=trial.build_training_data_loader()) training_data = keras._adapt_keras_data( x=training_x, y=training_y, sample_weight=training_sample_weight, batch_size=context.get_per_slot_batch_size(), drop_leftovers=True, ) val_x, val_y, val_sample_weight = keras._get_x_y_and_sample_weight( input_data=trial.build_validation_data_loader()) validation_data = keras._adapt_keras_data( x=val_x, y=val_y, sample_weight=val_sample_weight, batch_size=context.get_per_slot_batch_size(), drop_leftovers=False, ) trial.build_model() check.is_not_none(context.model, "Please call wrap_model(...).") check.is_not_none(context.compile_args, "Please call model.compile(...).") compile_args = cast(inspect.BoundArguments, context.compile_args) ( context.model, compile_args.arguments["optimizer"], ) = keras._get_multi_gpu_model_and_optimizer( pre_compiled_model=context.model, optimizer=compile_args.arguments["optimizer"], env=env, hvd_config=hvd_config, profile_frequency=env.experiment_config.profile_frequency(), profile_filename=DeterminedProfiler.OUTPUT_FILENAME, ) if hvd_config.use and version.parse( tf.__version__) >= version.parse("2.0.0"): logging.info( "Calling `model.compile(...)` with `experimental_run_tf_function=False` to ensure " "TensorFlow calls `optimizer.get_gradients()` to compute gradients." ) context.model.compile(*compile_args.args, **compile_args.kwargs, experimental_run_tf_function=False) else: context.model.compile(*compile_args.args, **compile_args.kwargs) tf_keras_callbacks = trial.keras_callbacks() return TFKerasTrialController( context.model, session, keras.TFKerasTrainConfig(training_data, validation_data, tf_keras_callbacks), context, env, workloads, load_path, rendezvous_info, hvd_config, )
def from_trial( trial_inst: det.Trial, context: det.TrialContext, env: det.EnvContext, workloads: workload.Stream, load_path: Optional[pathlib.Path], rendezvous_info: det.RendezvousInfo, hvd_config: horovod.HorovodContext, ) -> det.TrialController: check.is_instance( context, keras.TFKerasTrialContext, "TFKerasTrialController needs a TFKerasTrialContext") context = cast(keras.TFKerasTrialContext, context) check.is_instance(trial_inst, TFKerasTrial, "TFKerasTrialController needs a TFKerasTrial") trial = cast(TFKerasTrial, trial_inst) session = TFKerasTrialController._configure_session( env, hvd_config, trial.session_config()) training_data_loader = trial.build_training_data_loader() validation_data_loader = trial.build_validation_data_loader() trial.build_model() check.is_not_none(context.model, "Please call wrap_model(...).") training_x, training_y, training_sample_weight = keras._get_x_y_and_sample_weight( input_data=training_data_loader) training_data = keras._adapt_keras_data( x=training_x, y=training_y, sample_weight=training_sample_weight, batch_size=context.get_per_slot_batch_size(), use_multiprocessing=context._fit_use_multiprocessing, workers=context._fit_workers, max_queue_size=context._fit_max_queue_size, drop_leftovers=True, ) val_x, val_y, val_sample_weight = keras._get_x_y_and_sample_weight( input_data=validation_data_loader) validation_data = keras._adapt_keras_data( x=val_x, y=val_y, sample_weight=val_sample_weight, batch_size=context.get_per_slot_batch_size(), use_multiprocessing=context._fit_use_multiprocessing, workers=context._fit_workers, max_queue_size=context._fit_max_queue_size, drop_leftovers=False, ) check.is_not_none(context.compile_args, "Please call model.compile(...).") compile_args = cast(inspect.BoundArguments, context.compile_args) TFKerasTrialController.compile_model(context=context, compile_args=compile_args, env=env, hvd_config=hvd_config) tf_keras_callbacks = trial.keras_callbacks() return TFKerasTrialController( context.model, session, keras.TFKerasTrainConfig(training_data, validation_data, tf_keras_callbacks), context, env, workloads, load_path, rendezvous_info, hvd_config, )