def __init__(self, metric_names: Tuple[str] or str, conditions: Tuple[str] or str, model: nn.Module, save_dir: str, prefix: str = "", keep_best_only: bool = True, mode="and", stage='eval'): super().__init__(ctype="saver") self.__metric_names = wrap_tuple(metric_names) self.__conditions = wrap_tuple(conditions) self.__prefix = prefix if prefix else "model" self.__save_dir = save_dir self.__keep_best_only = keep_best_only self.__prev_model_path = "" self.__mode = mode self.__stage = wrap_tuple(stage) if self.__mode == "avg" and len(self.__conditions) > 1: if len(set(self.__conditions)) > 1: raise ValueError( "All modes must be the same in {} mode, but got".format( self.__mode, self.__conditions)) if not exists(self.__save_dir): print( "Not found directory {} to save models. Create the directory.". format(self.__save_dir)) mkdir(self.__save_dir) if len(self.__metric_names) != len(self.__conditions): raise ValueError( "Metric names ({}) and conditions ({}) must be the same, " "but got {} != {}".format(len(self.__metric_names), len(self.__conditions), len(self.__metric_names), len(self.__conditions))) self.__best_metrics = dict() for i, metric_name in enumerate(self.__metric_names): cond = self.__conditions[i].lower() if cond in ["min", "max"]: self.__best_metrics[metric_name] = dict() self.__best_metrics[metric_name]["value"] = float( 'Inf') if cond == "min" else float('-Inf') self.__best_metrics[metric_name]["cond"] = cond else: raise ValueError( 'Values of conditions must be either min or max, but got {}' .format(cond)) self.__model = model
def __init__(self, writer, optimizers, names, log_dir: str = None, tag: str = '', name='batch_lr_logger'): super().__init__(name=name) self.__log_dir = log_dir self.__tag = tag self.__summary_writer = writer self.__optims = wrap_tuple(optimizers) self.__names = wrap_tuple(names) if len(self.__optims) != len(self.__names): raise ValueError( 'The num of optimizers and names must match, but found {} and {}' .format(len(self.__optims), len(self.__names))) self.__single = len(self.__names) == 1
def __init__(self, modules: Tuple[Module] or Module, mode: str = "norm", **kwargs): super().__init__(ctype="tuner") if mode not in self.modes: raise ValueError("Mode must be in {}".format(self.modes)) self.__mode: str = mode self.__modules: Tuple[Module] = wrap_tuple(modules) self.__kwargs = kwargs
def __init__(self, modules: Module or Tuple[Module]): super().__init__(ctype="freezer") self.__modules: Tuple[Module] = wrap_tuple(modules)
def __init__(self, modules: Module or Tuple[Module]): super().__init__() self.__modules: Tuple[Module] = wrap_tuple(modules)
def __init__(self, data_provider: DataProvider, data_sampling_config: dict, device: torch.device, strategy_config: dict = None, callbacks: Tuple[Callback] or Callback or None = None, n_epochs: int or None = 10, n_train_batches: int or None = None, train_batchs_choice: str = 'max', sessions: Dict[str, Session] or Session = None, distributed=False, use_apex=False): """Constructor of Strategy Parameters ---------- data_provider: DataProvider Abstracts the access to all data for all sessions. data_sampling_config: dict Holds the configuration about how the data would be sampled strategy_config: dict Holds training configuration for multi-model-stepper. This is the core of Multi-model Strategy callbacks: Tuple[Callback] callback tuples to be executed inside the strategy main loop n_epochs: int Number of epochs to be trained. n_train_batches: int Number of training batches. Can be figured out from batch size and total data size sessions: Dict[str, Session] Dict of Session objects. Strategy will iterate through this dict by name and execute them with proper callbacks and strategy configuration. Raises ------- ValueError: The constructor will raise ValueError if data_provider or data_sampling_config or strategy_config or device is None """ if data_provider is None or data_sampling_config is None or strategy_config is None or device is None: raise ValueError( 'data_provider or data_sampling_config or strategy_config or device cannot be None' ) # self.__stage names contains the learning stages e.g. 'train' and 'eval'. If you want to do only training but # no validation, go to the strategy.yml, remove 'eval' from 'stage_names' self.__stage_names = wrap_tuple(strategy_config['stage_names']) # self.__train_model_names contains the name of trainable models. A trainable model may contain multiple # NN models # TODO: Is it necessary to be a set instead of tuple? self.__model_names_by_stage = dict() self.__model_names = [] for stage in self.__stage_names: self.__model_names_by_stage[stage] = wrap_tuple( data_sampling_config[stage].data_provider.keys()) self.__model_names += self.__model_names_by_stage[stage] self.__model_names = tuple(set(self.__model_names)) self.__train_starts_at_epoch = strategy_config['train_starts_at_epoch'] self.__n_epochs = n_epochs self.__data_provider = data_provider self.__callbacks = wrap_tuple(callbacks) # self.__accumulate_grad is retrieved from strategy config yml file. It contains boolean value for each stepper # in the sessionss tuple. This is a mandatory field in the yml file self.__accumulate_grad = strategy_config['accumulate_grad'] self.__accumulate_grad_in_iter = strategy_config[ 'accumulate_grad_in_iter'] self.__data_sampling_config = data_sampling_config sessions = wrap_tuple(sessions) # self.__num_samples_by_stage is a dictionary holding the number of batches per stage self.__num_samples_by_stage = dict() # self.__data_key_by_stage is a dictionary of dictionary. Key to the first dictionary is the stage name and # key to the embedded dictionary is the trainable model name, the final value is data key to be used retrieving # data from the data_provider self.__data_key_by_stage = dict() # self.__target_key_by_stage is similar to __data_key_by_stage, as the name suggests the final value is the # target key to be used to retrieve target value from data_provider self.__target_key_by_stage = dict() self.__num_batches_by_stage = dict() self.__distributed = distributed self.__use_apex = use_apex for stage in self.__stage_names: self.__num_batches_by_stage[stage] = None self.__data_key_by_stage[stage] = dict() self.__num_samples_by_stage[stage] = dict() self.__target_key_by_stage[stage] = dict() model_names = self.__model_names_by_stage[stage] # iterate through each trainable model stepper for model_name in model_names: # n_sample_dict is a local variable of dict type. trainable model name and number of samples for this # stepper are saved as key: value pair. # readers be aware that model_name is the name of the stepper which might contain multiple # NN models n_samples_dict = dict() data_keys = [] target_keys = [] if model_name in self.__data_sampling_config[stage][ 'data_provider']: data_loader_names = self.__data_sampling_config[stage][ 'data_provider'][model_name] else: continue for loader_name in data_loader_names: n_samples_dict[loader_name] = data_loader_names[ loader_name]['batches_per_iter'] n_batches = len( self.__data_provider.get_loader_by_name(loader_name)) data_keys.append( data_loader_names[loader_name]['data_key']) target_keys.append( data_loader_names[loader_name]['target_key']) if self.__num_batches_by_stage[stage] is None: self.__num_batches_by_stage[stage] = n_batches elif (train_batchs_choice == 'max' and self.__num_batches_by_stage[stage] < n_batches): self.__num_batches_by_stage[stage] = n_batches elif train_batchs_choice == 'min' and self.__num_batches_by_stage[ stage] > n_batches: self.__num_batches_by_stage[stage] = n_batches self.__data_key_by_stage[stage][model_name] = tuple(data_keys) self.__target_key_by_stage[stage][model_name] = tuple( target_keys) self.__num_samples_by_stage[stage][model_name] = n_samples_dict if n_train_batches is not None and n_train_batches > 0: self.__num_batches_by_stage['train'] = n_train_batches self.__device = device self.__sessions = dict() # save the sessions into dictionary according to model stepper name for easy access optimizers = dict() for name in self.__model_names: self.__sessions[name] = sessions[name] if self.__sessions[name].data_provider is None: self.__sessions[name].data_provider = self.__data_provider if len(sessions) == 1: optimizers = self.__sessions[name].optimizer else: optimizers[name] = self.__sessions[name].optimizer if self.__use_apex: from collagen.parallel._apex import first_gpu_or_cpu_in_use if first_gpu_or_cpu_in_use(self.__device): self.__default_strategy_callback = (ProgressbarLogger( update_freq=1, optimizers=optimizers), ) else: self.__default_strategy_callback = () else: self.__default_strategy_callback = (ProgressbarLogger( update_freq=1, optimizers=optimizers), ) self.__callbacks += self.__default_strategy_callback
def add_eval_callbacks(self, cbs): self.__val_callbacks += wrap_tuple(cbs)
def add_train_callbacks(self, cbs): self.__train_callbacks += wrap_tuple(cbs)
def eval(self, data_key: Tuple[str] or str = 'img', accumulate_grad_in_iter=None, accumulate_grad=None, target_key: Tuple[str] or str = 'target', cast_target=None): """ Runs stepper in `eval` mode as many iterations as given in the validation / test loader. This method does not return anything a stores everything in the callbacks. The callbacks here are called before the minibatch and after minibatch Parameters ---------- data_key : Tuple[str] or str Key of the dictionary, which corresponds to the data. Sometimes (e.g. in Siamese modelzoo), we need two items thus we might need multiple keys. target_key : Tuple[str] or str Key of the dictionary, which corresponds to the target. In case of modelzoo with e.g. multiple heads and heterogeneous outputs, it could be useful to use multiple keys. cast_target : None or str Performs type casting for target """ accumulate_grad_in_iter = None accumulate_grad = None data_key = wrap_tuple(data_key) target_key = wrap_tuple(target_key) for ind, loader_name in enumerate(self.__val_loader_names): cur_loader_state = self.__data_provider.state_dict()[loader_name] n_iter = len(cur_loader_state["samples"]) i = 0 for i in range(n_iter - 1): batch = cur_loader_state["samples"][i] input_data = self._parse_data(batch, data_key[ind]) target = self._parse_data(batch, target_key[ind]) for cb in self.__val_callbacks: cb.on_minibatch_begin(loader_name=loader_name, batches_count=self.__eval_batches_count, batch=batch, input=input_data, target=target, data_key=data_key[ind], target_key=target_key[ind], stepper=self.__stepper) loss, eval_result = self.__stepper.eval_step(input_data, target, return_out=True, callbacks=self.__val_callbacks) self.__eval_batches_count += 1 for cb in self.__val_callbacks: cb.on_minibatch_end(loader_name=loader_name, batches_count=self.__eval_batches_count, loss=loss, input=input_data, output=eval_result, target=target, data_key=data_key[ind], target_key=target_key[ind], stepper=self.__stepper) batch = cur_loader_state["samples"][i] input_data = self._parse_data(batch, data_key[ind]) target = self._parse_data(batch, target_key[ind]) for cb in self.__val_callbacks: cb.on_minibatch_begin(loader_name=loader_name, batches_count=self.__eval_batches_count, batch=batch, input=input_data, target=target, data_key=data_key[ind], target_key=target_key[ind], stepper=self.__stepper) loss, eval_result = self.__stepper.eval_step(input_data, target, return_out=True, callbacks=self.__val_callbacks) self.__eval_batches_count += 1 for cb in self.__val_callbacks: cb.on_minibatch_end(loader_name=loader_name, batches_count=self.__eval_batches_count, loss=loss, input=input_data, output=eval_result, target=target, data_key=data_key[ind], target_key=target_key[ind], stepper=self.__stepper)
def train(self, data_key: Tuple[str] or str = 'img', target_key: Tuple[str] or str = 'target', accumulate_grad_in_iter: bool = True, accumulate_grad=False, cast_target=None): """ Runs stepper in train mode as many iterations as given in the train loader. This method does not return anything a stores everything in the callbacks. Parameters ---------- data_key : Tuple[str] or str Key of the dictionary, which corresponds to the data. Sometimes (e.g. in Siamese modelzoo), we need two items thus we might need multiple keys. target_key : Tuple[str] or str Key of the dictionary, which corresponds to the target. In case of modelzoo with e.g. multiple heads and heterogeneous outputs, it could be useful to use multiple keys. accumulate_grad : bool Whether to accumulate gradient. cast_target : None or str Performs type casting for target """ #TODO: Check default of accumulate_grad_in_iter accumulate_grad_in_iter = True if accumulate_grad_in_iter is None else accumulate_grad_in_iter accumulate_grad = False if accumulate_grad is None else accumulate_grad data_key = wrap_tuple(data_key) target_key = wrap_tuple(target_key) for ind, loader_name in enumerate(self.__train_loader_names): cur_loader_state = self.__data_provider.state_dict()[loader_name] n_iter = len(cur_loader_state["samples"]) i = 0 for i in range(n_iter - 1): batch = cur_loader_state["samples"][i] input_data = self._parse_data(batch, data_key[ind]) target = self._parse_data(batch, target_key[ind]) for cb in self.__train_callbacks: cb.on_minibatch_begin(loader_name=loader_name, batches_count=self.__train_batches_count, batch=batch, input=input_data, target=target, data_key=data_key[ind], target_key=target_key[ind], stepper=self.__stepper) first_minibatch = self.check_first_minibatch(loader_i=ind, minibatch_i=i) last_minibatch = self.check_last_minibatch(n_loaders=len(self.__train_loader_names), loader_i=ind, n_minibatches=n_iter, minibatch_i=i) no_zero_grad = accumulate_grad or (not first_minibatch and accumulate_grad_in_iter) with_step = last_minibatch or not accumulate_grad_in_iter loss, train_result = self.__stepper.train_step(input_data, target, retain_graph=accumulate_grad_in_iter, accumulate_grad=no_zero_grad, with_step=with_step, return_out=True, callbacks=self.__train_callbacks) self.__train_batches_count += 1 for cb in self.__train_callbacks: cb.on_minibatch_end(loader_name=loader_name, batches_count=self.__train_batches_count, loss=loss, input=input_data, output=train_result, target=target, data_key=data_key[ind], target_key=target_key[ind], stepper=self.__stepper) batch = cur_loader_state["samples"][i] input_data = self._parse_data(batch, data_key[ind]) target = self._parse_data(batch, target_key[ind]) for cb in self.__train_callbacks: cb.on_minibatch_begin(loader_name=loader_name, batches_count=self.__train_batches_count, batch=batch, input=input_data, target=target, data_key=data_key[ind], target_key=target_key[ind], stepper=self.__stepper) first_minibatch = self.check_first_minibatch(loader_i=ind, minibatch_i=i) last_minibatch = self.check_last_minibatch(n_loaders=len(self.__train_loader_names), loader_i=ind, n_minibatches=n_iter, minibatch_i=i) no_zero_grad = accumulate_grad or (not first_minibatch and accumulate_grad_in_iter) with_step = last_minibatch or not accumulate_grad_in_iter loss, train_result = self.__stepper.train_step(input_data, target, return_out=True, accumulate_grad=no_zero_grad, with_step=with_step, callbacks=self.__train_callbacks) self.__train_batches_count += 1 for cb in self.__train_callbacks: cb.on_minibatch_end(loader_name=loader_name, batches_count=self.__train_batches_count, loss=loss, input=input_data, output=train_result, target=target, data_key=data_key[ind], target_key=target_key[ind], stepper=self.__stepper)