예제 #1
0
    def __init__(self,
                 metric_names: Tuple[str] or str,
                 conditions: Tuple[str] or str,
                 model: nn.Module,
                 save_dir: str,
                 prefix: str = "",
                 keep_best_only: bool = True,
                 mode="and",
                 stage='eval'):
        super().__init__(ctype="saver")
        self.__metric_names = wrap_tuple(metric_names)
        self.__conditions = wrap_tuple(conditions)
        self.__prefix = prefix if prefix else "model"
        self.__save_dir = save_dir
        self.__keep_best_only = keep_best_only
        self.__prev_model_path = ""
        self.__mode = mode
        self.__stage = wrap_tuple(stage)

        if self.__mode == "avg" and len(self.__conditions) > 1:
            if len(set(self.__conditions)) > 1:
                raise ValueError(
                    "All modes must be the same in {} mode, but got".format(
                        self.__mode, self.__conditions))

        if not exists(self.__save_dir):
            print(
                "Not found directory {} to save models. Create the directory.".
                format(self.__save_dir))
            mkdir(self.__save_dir)

        if len(self.__metric_names) != len(self.__conditions):
            raise ValueError(
                "Metric names ({}) and conditions ({}) must be the same, "
                "but got {} != {}".format(len(self.__metric_names),
                                          len(self.__conditions),
                                          len(self.__metric_names),
                                          len(self.__conditions)))

        self.__best_metrics = dict()
        for i, metric_name in enumerate(self.__metric_names):
            cond = self.__conditions[i].lower()
            if cond in ["min", "max"]:
                self.__best_metrics[metric_name] = dict()
                self.__best_metrics[metric_name]["value"] = float(
                    'Inf') if cond == "min" else float('-Inf')
                self.__best_metrics[metric_name]["cond"] = cond
            else:
                raise ValueError(
                    'Values of conditions must be either min or max, but got {}'
                    .format(cond))

        self.__model = model
예제 #2
0
 def __init__(self,
              writer,
              optimizers,
              names,
              log_dir: str = None,
              tag: str = '',
              name='batch_lr_logger'):
     super().__init__(name=name)
     self.__log_dir = log_dir
     self.__tag = tag
     self.__summary_writer = writer
     self.__optims = wrap_tuple(optimizers)
     self.__names = wrap_tuple(names)
     if len(self.__optims) != len(self.__names):
         raise ValueError(
             'The num of optimizers and names must match, but found {} and {}'
             .format(len(self.__optims), len(self.__names)))
     self.__single = len(self.__names) == 1
예제 #3
0
    def __init__(self,
                 modules: Tuple[Module] or Module,
                 mode: str = "norm",
                 **kwargs):
        super().__init__(ctype="tuner")
        if mode not in self.modes:
            raise ValueError("Mode must be in {}".format(self.modes))

        self.__mode: str = mode
        self.__modules: Tuple[Module] = wrap_tuple(modules)
        self.__kwargs = kwargs
예제 #4
0
 def __init__(self, modules: Module or Tuple[Module]):
     super().__init__(ctype="freezer")
     self.__modules: Tuple[Module] = wrap_tuple(modules)
예제 #5
0
 def __init__(self, modules: Module or Tuple[Module]):
     super().__init__()
     self.__modules: Tuple[Module] = wrap_tuple(modules)
예제 #6
0
    def __init__(self,
                 data_provider: DataProvider,
                 data_sampling_config: dict,
                 device: torch.device,
                 strategy_config: dict = None,
                 callbacks: Tuple[Callback] or Callback or None = None,
                 n_epochs: int or None = 10,
                 n_train_batches: int or None = None,
                 train_batchs_choice: str = 'max',
                 sessions: Dict[str, Session] or Session = None,
                 distributed=False,
                 use_apex=False):
        """Constructor of Strategy
        Parameters
        ----------
        data_provider: DataProvider
            Abstracts the access to all data for all sessions.
        data_sampling_config: dict
            Holds the configuration about how the data would be sampled
        strategy_config: dict
            Holds training configuration for multi-model-stepper. This is the core of Multi-model Strategy
        callbacks: Tuple[Callback]
            callback tuples to be executed inside the strategy main loop
        n_epochs: int
            Number of epochs to be trained.
        n_train_batches: int
            Number of training batches. Can be figured out from batch size and total data size
        sessions: Dict[str, Session]
            Dict of Session objects. Strategy will iterate through this dict by name and execute them with proper
            callbacks and strategy configuration.
        Raises
        -------
        ValueError:
            The constructor will raise ValueError if data_provider or data_sampling_config or strategy_config or device
            is None

        """
        if data_provider is None or data_sampling_config is None or strategy_config is None or device is None:
            raise ValueError(
                'data_provider or data_sampling_config or strategy_config or device cannot be None'
            )
        # self.__stage names contains the learning stages e.g. 'train' and 'eval'. If you want to do only training but
        # no validation, go to the strategy.yml, remove 'eval' from 'stage_names'
        self.__stage_names = wrap_tuple(strategy_config['stage_names'])
        # self.__train_model_names contains the name of trainable models. A trainable model may contain multiple
        # NN models

        # TODO: Is it necessary to be a set instead of tuple?
        self.__model_names_by_stage = dict()
        self.__model_names = []
        for stage in self.__stage_names:
            self.__model_names_by_stage[stage] = wrap_tuple(
                data_sampling_config[stage].data_provider.keys())
            self.__model_names += self.__model_names_by_stage[stage]
        self.__model_names = tuple(set(self.__model_names))

        self.__train_starts_at_epoch = strategy_config['train_starts_at_epoch']
        self.__n_epochs = n_epochs
        self.__data_provider = data_provider
        self.__callbacks = wrap_tuple(callbacks)
        # self.__accumulate_grad is retrieved from strategy config yml file. It contains boolean value for each stepper
        # in the sessionss tuple. This is a mandatory field in the yml file
        self.__accumulate_grad = strategy_config['accumulate_grad']
        self.__accumulate_grad_in_iter = strategy_config[
            'accumulate_grad_in_iter']
        self.__data_sampling_config = data_sampling_config

        sessions = wrap_tuple(sessions)

        # self.__num_samples_by_stage is a dictionary holding the number of batches per stage
        self.__num_samples_by_stage = dict()
        # self.__data_key_by_stage is a dictionary of dictionary. Key to the first dictionary is the stage name and
        # key to the embedded dictionary is the trainable model name, the final value is data key to be used retrieving
        # data from the data_provider
        self.__data_key_by_stage = dict()
        # self.__target_key_by_stage is similar to __data_key_by_stage, as the name suggests the final value is the
        # target key to be used to retrieve target value from data_provider
        self.__target_key_by_stage = dict()
        self.__num_batches_by_stage = dict()
        self.__distributed = distributed
        self.__use_apex = use_apex
        for stage in self.__stage_names:
            self.__num_batches_by_stage[stage] = None
            self.__data_key_by_stage[stage] = dict()
            self.__num_samples_by_stage[stage] = dict()
            self.__target_key_by_stage[stage] = dict()

            model_names = self.__model_names_by_stage[stage]

            # iterate through each trainable model stepper
            for model_name in model_names:
                # n_sample_dict is a local variable of dict type. trainable model name and number of samples for this
                # stepper are saved as key: value pair.
                # readers be aware that model_name is the name of the stepper which might contain multiple
                # NN models
                n_samples_dict = dict()
                data_keys = []
                target_keys = []
                if model_name in self.__data_sampling_config[stage][
                        'data_provider']:
                    data_loader_names = self.__data_sampling_config[stage][
                        'data_provider'][model_name]

                else:
                    continue

                for loader_name in data_loader_names:
                    n_samples_dict[loader_name] = data_loader_names[
                        loader_name]['batches_per_iter']
                    n_batches = len(
                        self.__data_provider.get_loader_by_name(loader_name))
                    data_keys.append(
                        data_loader_names[loader_name]['data_key'])
                    target_keys.append(
                        data_loader_names[loader_name]['target_key'])

                    if self.__num_batches_by_stage[stage] is None:
                        self.__num_batches_by_stage[stage] = n_batches
                    elif (train_batchs_choice == 'max'
                          and self.__num_batches_by_stage[stage] < n_batches):
                        self.__num_batches_by_stage[stage] = n_batches
                    elif train_batchs_choice == 'min' and self.__num_batches_by_stage[
                            stage] > n_batches:
                        self.__num_batches_by_stage[stage] = n_batches

                self.__data_key_by_stage[stage][model_name] = tuple(data_keys)
                self.__target_key_by_stage[stage][model_name] = tuple(
                    target_keys)

                self.__num_samples_by_stage[stage][model_name] = n_samples_dict

        if n_train_batches is not None and n_train_batches > 0:
            self.__num_batches_by_stage['train'] = n_train_batches

        self.__device = device
        self.__sessions = dict()

        # save the sessions into dictionary according to model stepper name for easy access
        optimizers = dict()
        for name in self.__model_names:
            self.__sessions[name] = sessions[name]
            if self.__sessions[name].data_provider is None:
                self.__sessions[name].data_provider = self.__data_provider

            if len(sessions) == 1:
                optimizers = self.__sessions[name].optimizer
            else:
                optimizers[name] = self.__sessions[name].optimizer

        if self.__use_apex:
            from collagen.parallel._apex import first_gpu_or_cpu_in_use
            if first_gpu_or_cpu_in_use(self.__device):
                self.__default_strategy_callback = (ProgressbarLogger(
                    update_freq=1, optimizers=optimizers), )
            else:
                self.__default_strategy_callback = ()
        else:
            self.__default_strategy_callback = (ProgressbarLogger(
                update_freq=1, optimizers=optimizers), )

        self.__callbacks += self.__default_strategy_callback
예제 #7
0
 def add_eval_callbacks(self, cbs):
     self.__val_callbacks += wrap_tuple(cbs)
예제 #8
0
 def add_train_callbacks(self, cbs):
     self.__train_callbacks += wrap_tuple(cbs)
예제 #9
0
    def eval(self, data_key: Tuple[str] or str = 'img', accumulate_grad_in_iter=None, accumulate_grad=None,
             target_key: Tuple[str] or str = 'target', cast_target=None):
        """
        Runs stepper in `eval` mode as many iterations as given in the validation / test loader.

        This method does not return anything a stores everything in the callbacks.
        The callbacks here are called before the minibatch and after minibatch

        Parameters
        ----------
        data_key : Tuple[str] or str
            Key of the dictionary, which corresponds to the data. Sometimes (e.g. in Siamese modelzoo),
            we need two items thus we might need multiple keys.
        target_key : Tuple[str] or str
            Key of the dictionary, which corresponds to the target. In case of modelzoo with e.g. multiple
            heads and heterogeneous outputs, it could be useful to use multiple keys.
        cast_target : None or str
            Performs type casting for target

        """

        accumulate_grad_in_iter = None
        accumulate_grad = None

        data_key = wrap_tuple(data_key)
        target_key = wrap_tuple(target_key)

        for ind, loader_name in enumerate(self.__val_loader_names):
            cur_loader_state = self.__data_provider.state_dict()[loader_name]
            n_iter = len(cur_loader_state["samples"])

            i = 0
            for i in range(n_iter - 1):
                batch = cur_loader_state["samples"][i]
                input_data = self._parse_data(batch, data_key[ind])
                target = self._parse_data(batch, target_key[ind])

                for cb in self.__val_callbacks:
                    cb.on_minibatch_begin(loader_name=loader_name,
                                          batches_count=self.__eval_batches_count,
                                          batch=batch,
                                          input=input_data,
                                          target=target,
                                          data_key=data_key[ind],
                                          target_key=target_key[ind],
                                          stepper=self.__stepper)

                loss, eval_result = self.__stepper.eval_step(input_data,
                                                             target,
                                                             return_out=True,
                                                             callbacks=self.__val_callbacks)
                self.__eval_batches_count += 1

                for cb in self.__val_callbacks:
                    cb.on_minibatch_end(loader_name=loader_name,
                                        batches_count=self.__eval_batches_count,
                                        loss=loss,
                                        input=input_data,
                                        output=eval_result,
                                        target=target,
                                        data_key=data_key[ind],
                                        target_key=target_key[ind],
                                        stepper=self.__stepper)

            batch = cur_loader_state["samples"][i]
            input_data = self._parse_data(batch, data_key[ind])
            target = self._parse_data(batch, target_key[ind])

            for cb in self.__val_callbacks:
                cb.on_minibatch_begin(loader_name=loader_name,
                                      batches_count=self.__eval_batches_count,
                                      batch=batch,
                                      input=input_data,
                                      target=target,
                                      data_key=data_key[ind],
                                      target_key=target_key[ind],
                                      stepper=self.__stepper)

            loss, eval_result = self.__stepper.eval_step(input_data,
                                                         target,
                                                         return_out=True,
                                                         callbacks=self.__val_callbacks)
            self.__eval_batches_count += 1

            for cb in self.__val_callbacks:
                cb.on_minibatch_end(loader_name=loader_name,
                                    batches_count=self.__eval_batches_count,
                                    loss=loss,
                                    input=input_data,
                                    output=eval_result,
                                    target=target,
                                    data_key=data_key[ind],
                                    target_key=target_key[ind],
                                    stepper=self.__stepper)
예제 #10
0
    def train(self, data_key: Tuple[str] or str = 'img', target_key: Tuple[str] or str = 'target',
              accumulate_grad_in_iter: bool = True, accumulate_grad=False, cast_target=None):
        """
        Runs stepper in train mode as many iterations as given in the train loader.

        This method does not return anything a stores everything in the callbacks.

        Parameters
        ----------
        data_key : Tuple[str] or str
            Key of the dictionary, which corresponds to the data. Sometimes (e.g. in Siamese modelzoo),
            we need two items thus we might need multiple keys.
        target_key : Tuple[str] or str
            Key of the dictionary, which corresponds to the target. In case of modelzoo with e.g. multiple
            heads and heterogeneous outputs, it could be useful to use multiple keys.
        accumulate_grad : bool
            Whether to accumulate gradient.
        cast_target : None or str
            Performs type casting for target

        """

        #TODO: Check default of accumulate_grad_in_iter
        accumulate_grad_in_iter = True if accumulate_grad_in_iter is None else accumulate_grad_in_iter
        accumulate_grad = False if accumulate_grad is None else accumulate_grad

        data_key = wrap_tuple(data_key)
        target_key = wrap_tuple(target_key)

        for ind, loader_name in enumerate(self.__train_loader_names):
            cur_loader_state = self.__data_provider.state_dict()[loader_name]
            n_iter = len(cur_loader_state["samples"])

            i = 0
            for i in range(n_iter - 1):
                batch = cur_loader_state["samples"][i]
                input_data = self._parse_data(batch, data_key[ind])
                target = self._parse_data(batch, target_key[ind])

                for cb in self.__train_callbacks:
                    cb.on_minibatch_begin(loader_name=loader_name,
                                          batches_count=self.__train_batches_count,
                                          batch=batch,
                                          input=input_data,
                                          target=target,
                                          data_key=data_key[ind],
                                          target_key=target_key[ind],
                                          stepper=self.__stepper)

                first_minibatch = self.check_first_minibatch(loader_i=ind, minibatch_i=i)
                last_minibatch = self.check_last_minibatch(n_loaders=len(self.__train_loader_names), loader_i=ind,
                                                           n_minibatches=n_iter, minibatch_i=i)
                no_zero_grad = accumulate_grad or (not first_minibatch and accumulate_grad_in_iter)
                with_step = last_minibatch or not accumulate_grad_in_iter
                loss, train_result = self.__stepper.train_step(input_data,
                                                               target, retain_graph=accumulate_grad_in_iter,
                                                               accumulate_grad=no_zero_grad,
                                                               with_step=with_step,
                                                               return_out=True, callbacks=self.__train_callbacks)
                self.__train_batches_count += 1

                for cb in self.__train_callbacks:
                    cb.on_minibatch_end(loader_name=loader_name,
                                        batches_count=self.__train_batches_count,
                                        loss=loss,
                                        input=input_data,
                                        output=train_result,
                                        target=target,
                                        data_key=data_key[ind],
                                        target_key=target_key[ind],
                                        stepper=self.__stepper)

            batch = cur_loader_state["samples"][i]
            input_data = self._parse_data(batch, data_key[ind])
            target = self._parse_data(batch, target_key[ind])

            for cb in self.__train_callbacks:
                cb.on_minibatch_begin(loader_name=loader_name,
                                      batches_count=self.__train_batches_count,
                                      batch=batch,
                                      input=input_data,
                                      target=target,
                                      data_key=data_key[ind],
                                      target_key=target_key[ind],
                                      stepper=self.__stepper)

            first_minibatch = self.check_first_minibatch(loader_i=ind, minibatch_i=i)
            last_minibatch = self.check_last_minibatch(n_loaders=len(self.__train_loader_names), loader_i=ind,
                                                       n_minibatches=n_iter, minibatch_i=i)
            no_zero_grad = accumulate_grad or (not first_minibatch and accumulate_grad_in_iter)
            with_step = last_minibatch or not accumulate_grad_in_iter
            loss, train_result = self.__stepper.train_step(input_data, target, return_out=True,
                                                           accumulate_grad=no_zero_grad,
                                                           with_step=with_step,
                                                           callbacks=self.__train_callbacks)
            self.__train_batches_count += 1
            for cb in self.__train_callbacks:
                cb.on_minibatch_end(loader_name=loader_name,
                                    batches_count=self.__train_batches_count,
                                    loss=loss,
                                    input=input_data,
                                    output=train_result,
                                    target=target,
                                    data_key=data_key[ind],
                                    target_key=target_key[ind],
                                    stepper=self.__stepper)