Example #1
0
 def __init__(self):
     self.name = ''
     self.data_format = General.data_format
     self._modules = Config()
     self._parameters = OrderedDict()
     self._weights_buffer = OrderedDict()
     self._init_configs()
Example #2
0
 def __init__(self):
     self.parent_scope_name = ''
     self._scope_name = ''
     self._modules = Config()
     self._training = True
     self.enable_scope_name = enable_scope_name
     self.data_format = General.data_format
Example #3
0
def _set_startup(args):
    if args.startup in ['benchmark', 'b']:
        cfg = Config(args.config_file)
        config = deepcopy(cfg)
        if 'benchmark' in cfg.keys():
            benchmark_config = cfg.pop('benchmark')
            config = update_dict(benchmark_config, cfg)
    else:
        config = Config(args.config_file)
    return config
Example #4
0
 def __init__(self):
     self.parent_scope_name = ''
     self._scope_name = ''
     self._modules = Config()
     self._training = True
     self.enable_scope_name = enable_scope_name
     self.data_format = General.data_format
     self.pretrained_model_file = None
     self._is_load_pretrained = False
     self.load_pretrained_type = None
     self._trainable = True
     self.pretrained_prefix = None
Example #5
0
 def __init__(self, config=None):
     """Initialize."""
     self.is_multi_opt = False
     if config is not None:
         self.config = Config(config)
     raw_config = self.config.to_dict()
     raw_config.type = self.config.type
     map_dict = OptimMappingDict
     self.map_config = ConfigBackendMapping(
         map_dict.type_mapping_dict,
         map_dict.params_mapping_dict).backend_mapping(raw_config)
     self.optim_cls = ClassFactory.get_cls(ClassType.OPTIMIZER,
                                           self.map_config.type)
Example #6
0
 def __init__(self, **desc):
     """Initialize."""
     super(SimpleCnn, self).__init__()
     desc = Config(**desc)
     self.num_class = desc.num_class
     self.fp16 = desc.get('fp16', False)
     self.channels = desc.channels
     self.conv1 = ops.Conv2d(3, 32, padding=1, kernel_size=3)
     self.pool1 = ops.MaxPool2d(2, stride=2)
     self.blocks = self._blocks(self.channels, desc.blocks)
     self.pool2 = ops.MaxPool2d(2, stride=2)
     self.conv2 = ops.Conv2d(self.channels, 64, padding=1, kernel_size=3)
     self.global_conv = ops.Conv2d(64, 64, kernel_size=8)
     self.view = ops.View()
     self.fc = ops.Linear(64, self.num_class)
Example #7
0
 def __init__(self, search_space=None, **kwargs):
     super(SpNasCodec, self).__init__(search_space, **kwargs)
     config_template_file = search_space.config_template_file
     assert config_template_file is not None
     self.config_template = Config(config_template_file)
     if 'epoch' in search_space.keys():
         self.config_template['total_epochs'] = search_space.epoch
Example #8
0
File: args.py Project: ylfzr/vega
def _parse_args(sections, desc):
    parser = argparse.ArgumentParser(description=desc)
    parser.add_argument("-backend", "--general.backend", default="pytorch", type=str,
                        help="pytorch|tensorflow|mindspore")
    if "cluster" in sections:
        parser.add_argument("-devices_per_trainer", "--general.worker.devices_per_trainer", default=None, type=int)
        parser.add_argument("-master_ip", "--general.cluster.master_ip", default=None, type=str)
        parser.add_argument("-listen_port", "--general.cluster.listen_port", default=8000, type=int)
        parser.add_argument("-slaves", "--general.cluster.slaves", default=[],
                            action='store', dest='general.cluster.slaves', type=str, nargs='*',
                            help="slave IP list")
    parser.add_argument("-dataset", "--dataset.type", required=True, type=str, help="dataset name.")
    parser.add_argument("-data_path", "--dataset.common.data_path", type=str, help="dataset path.")
    parser.add_argument("-batch_size", "--dataset.common.batch_size", default=256, type=int)
    if "model" in sections:
        parser.add_argument("-model_desc", "--model.model_desc", type=str)
        parser.add_argument("-model_file", "--model.pretrained_model_file", type=str)
    if "trainer" in sections:
        parser.add_argument("-epochs", "--trainer.epochs", type=int)
    if "fine_tune" in sections:
        parser.add_argument("-task_type", "--task_type", default="classification", type=str,
                            help="classification|detection|segmentation|super_resolution")
        parser.add_argument("-num_classes", "--trainer.num_classes", type=int)
    parser.add_argument("-evaluator", "--evaluator", default=[],
                        action='store', dest='evaluator', type=str, nargs='*',
                        help="evaluator list, eg. -evaluator GpuEvaluator DavinciMobileEvaluator")
    args = vars(parser.parse_args())
    args = {key: value for key, value in args.items() if args[key]}
    tree = Config(build_tree(args))
    return tree
Example #9
0
File: args.py Project: ylfzr/vega
def _set_config(args, step_name, step_type):
    """Fully train."""
    # general
    General.step_name = step_name
    if hasattr(args, "general"):
        General.from_json(args.general)
    # pipeline
    PipelineConfig.steps = [step_name]
    # pipestep
    PipeStepConfig.type = step_type
    # model
    if hasattr(args, "model"):
        if hasattr(args.model, "model_desc"):
            args.model.model_desc = Config(args.model.model_desc)
        PipeStepConfig.model.from_json(args.model)
    # dataset
    if hasattr(args, "dataset"):
        PipeStepConfig.dataset.from_json(args.dataset)
    # trainer
    if hasattr(args, "trainer"):
        TrainerConfig.from_json(args.trainer)
    # evaluator
    if hasattr(args, "evaluator"):
        # PipeStepConfig.evaluator._type_name = args.evaluator
        if "GpuEvaluator" in args.evaluator:
            PipeStepConfig.evaluator_enable = True
            PipeStepConfig.evaluator.gpu_evaluator_enable = True
        if "DavinciMobileEvaluator" in args.evaluator:
            PipeStepConfig.evaluator_enable = True
            PipeStepConfig.evaluator.davinci_mobile_evaluator_enable = True
Example #10
0
 def _create_examples(self, lines, set_type):
     """Create examples for the training, dev and test sets."""
     examples = []
     for (i, line) in enumerate(lines):
         if i == 0:
             continue
         guid = "%s-%s" % (set_type, i)
         text_a = line[3]
         text_b = line[4]
         label = None if set_type == "test" else line[0]
         examples.append(
             Config(
                 dict(guid=guid, text_a=text_a, text_b=text_b,
                      label=label)))
     return examples
Example #11
0
 def __new__(cls, *args, **kwargs):
     """Record params."""
     desc = {}
     params_sig = sig(cls.__init__).parameters
     param_names = list(params_sig.keys())
     if len(param_names) > len(args):
         # not dynamic parameter for connections
         for idx, arg in enumerate(args):
             arg_name = param_names[idx + 1]
             desc[arg_name] = arg
     if kwargs:
         desc.update(kwargs)
     instance = super(Serializable, cls).__new__(cls)
     instance._deep_level = 0
     instance._target_level = None
     instance.desc = Config(desc)
     return instance
Example #12
0
class LrScheduler(object):
    """Register and call LrScheduler class."""

    config = LrSchedulerConfig()

    def __init__(self, config=None):
        """Initialize."""
        # register pytorch optim as default
        if config:
            self.config = Config(config)
            raw_config = deepcopy(self.config)
        else:
            self.config = LrScheduler.config
            raw_config = self.config.to_json()
        raw_config.type = self.config.type
        map_dict = LrSchedulerMappingDict()

        self.map_config = ConfigBackendMapping(
            map_dict.type_mapping_dict,
            map_dict.params_mapping_dict).backend_mapping(raw_config)
        self._cls = ClassFactory.get_cls(ClassType.LR_SCHEDULER,
                                         self.map_config.type)

    def __call__(self, optimizer=None, epochs=None, steps=None):
        """Call lr scheduler class."""
        params = self.map_config.get("params", {})
        logging.debug("Call LrScheduler. name={}, params={}".format(
            self._cls.__name__, params))

        setattr(self._cls, "by_epoch", True)
        if hasattr(self.config, "by_epoch"):
            setattr(self._cls, "by_epoch", self.config.by_epoch)

        try:
            if params:
                return self._cls(optimizer, **params)
            else:
                return self._cls(optimizer)
        except Exception as ex:
            logging.error(
                "Failed to call LrScheduler name={}, params={}".format(
                    self._cls.__name__, params))
            raise ex
Example #13
0
 def _init_hps(self, hps=None):
     """Load hps from file."""
     # load config
     if hps is not None:
         pass
     elif self.config.hps_file is not None:
         desc_file = self.config.hps_file.replace("{local_base_path}",
                                                  self.local_base_path)
         hps = Config(desc_file)
         if "trainer" in hps:
             if "epochs" in hps["trainer"]:
                 hps["trainer"].pop("epochs")
             if "checkpoint_path" in hps["trainer"]:
                 hps["trainer"].pop("checkpoint_path")
     elif self.config.hps_folder is not None:
         folder = self.config.hps_folder.replace("{local_base_path}",
                                                 self.local_base_path)
         pattern = FileOps.join_path(folder, "hps_*.json")
         desc_file = glob.glob(pattern)[0]
         hps = Config(desc_file)
         if "trainer" in hps:
             if "epochs" in hps["trainer"]:
                 hps["trainer"].pop("epochs")
             if "checkpoint_path" in hps["trainer"]:
                 hps["trainer"].pop("checkpoint_path")
     # merge config
     if not self.hps:
         self.hps = hps
     elif hps:
         hps.from_dict(self.hps)
         self.hps = hps
     # set config
     if self.hps and self.hps.get('trainer'):
         self.config.from_dict(self.hps.get('trainer'))
         self.load_checkpoint = self.config.load_checkpoint
     self.epochs = self.config.epochs
Example #14
0
 def __init__(self, config=None):
     """Initialize."""
     self.is_multi_opt = True
     if config is not None:
         self.config = Config(config)
     self._opts = OrderedDict()
Example #15
0
class TrainerBase(DistributedWorker):
    """Trainer base class."""

    config = TrainerConfig()

    def __init__(self,
                 model=None,
                 id=None,
                 hps=None,
                 load_ckpt_flag=False,
                 model_desc=None,
                 lazy_build=True,
                 **kwargs):
        super().__init__()

        self.worker_type = WorkerTypes.TRAINER
        TrainerBase.__worker_id__ += 1
        if id is not None:
            self._worker_id = id
        else:
            self._worker_id = TrainerBase.__worker_id__

        # Data Memeber list of Trainer
        self.is_chief = True
        self.use_cuda = self.config.cuda
        self.epochs = self.config.epochs
        self.do_validation = True
        self.auto_save_ckpt = True
        self.auto_save_perf = True
        self.skip_train = False
        self.valid_interval = self.config.valid_interval
        self.hps = hps
        self.model = model
        self.model_desc = model_desc
        self.optimizer = None
        self.lr_scheduler = None
        self.loss = None
        self.use_syncbn = self.config.syncbn
        self.use_amp = self.config.amp
        self.train_metrics = None
        self.valid_metrics = None
        self.call_metrics_on_train = self.config.call_metrics_on_train
        self.train_verbose = self.config.train_verbose
        self.valid_verbose = self.config.valid_verbose
        self.train_report_steps = self.config.train_report_steps
        self.valid_report_steps = self.config.valid_report_steps
        self.train_loader = None
        self.valid_loader = None
        self.train_step = None
        self.valid_step = None
        self.make_batch = None
        self.model_fn = None
        self.train_input_fn = None
        self.valid_input_fn = None
        self.callbacks = None
        self.performance = None
        self.runtime = None
        self.load_checkpoint = False
        self._resume_training = False
        self._start_epoch = 0
        self.visual_data = {}
        self.load_ckpt_flag = load_ckpt_flag
        self.distributed = self.config.distributed
        # Used by TimmTrainerCallbacks since it builds its trainer in
        # the before_train callback
        self.lazy_built = self.config.lazy_built
        # Indicate whether the necessary components of a trainer
        # has been built for running
        self._world_size = 1
        self._rank_id = 0
        self._local_rank_id = 0
        self.config.kwargs = kwargs
        self.checkpoint_file_name = 'checkpoint.pth'
        self.model_pickle_file_name = 'model.pkl'
        worker_path = self.get_local_worker_path()
        self.model_path = FileOps.join_path(worker_path,
                                            self.model_pickle_file_name)
        self.checkpoint_file = FileOps.join_path(worker_path,
                                                 self.checkpoint_file_name)
        self.weights_file = FileOps.join_path(
            worker_path, "model_{}.pth".format(self.worker_id))
        self.loss_input = kwargs.get('loss_input', None)
        self.gpu_nums = kwargs.get('gpu_nums', 1)
        self.use_unsupervised_pretrain = self.config.use_unsupervised_pretrain
        if TrainerConfig.model_desc is None:
            TrainerConfig.model_desc = model_desc
        if not lazy_build:
            self.init_trainer()

    def init_trainer(self):
        """Init Trainer."""
        init_log(level=General.logger.level,
                 log_file="worker_{}.log".format(self.worker_id),
                 log_path=self.get_local_worker_path())
        self._set_default_funcs()
        self._set_condition()
        self._init_callbacks()
        self.callbacks.init_trainer()

        self.init_train_op()

    def train_process(self):
        """Whole train process of the TrainWorker specified in config.

        After training, the model and validation results are saved to local_worker_path and s3_path.
        """
        init_log(level=General.logger.level,
                 log_file="worker_{}.log".format(self.worker_id),
                 log_path=self.local_log_path)
        self._set_default_funcs()
        self._set_condition()
        self._init_callbacks()
        self.callbacks.init_trainer()
        if not self.lazy_built:
            self.build()
        self._train_loop()

    def build(self):
        """Build the trainer by assembling the necessary components."""
        logging.debug("Trainer Config: {}".format(self.config))
        self._init_hps()
        self.do_validation = self.config.with_valid
        self.use_syncbn = self.config.syncbn
        if self.use_syncbn and zeus.is_torch_backend():
            import apex
            self.model = apex.parallel.convert_syncbn_model(self.model)
        self.train_loader = self._init_dataloader(mode='train')
        self.valid_loader = self._init_dataloader(mode='val')
        self.batch_num_train = len(self.train_loader)
        self.batch_num_valid = len(self.valid_loader)

    def train(self, inputs, labels):
        """Train model."""
        pass

    def predict(self, input):
        """Inference model."""
        pass

    def save(self, file_name):
        """Save model."""
        pass

    def load(self, model_name, by_name):
        """Load model."""
        pass

    def set_weights(self, weights):
        """Set weight with memory tensor."""
        pass

    def get_weights(self):
        """Get the weights."""
        pass

    def init_trainer_op(self):
        """Init Train Op."""
        pass

    def _train_epoch(self):
        pass

    def _valid_epoch(self):
        pass

    def _set_default_funcs(self):
        pass

    def _set_condition(self):
        pass

    def _init_tf_estimator(self):
        pass

    def _init_horovod_setting(self):
        """Init horovod setting."""
        self.is_chief = True

    def _init_hps(self, hps=None):
        """Load hps from file."""
        if hps is not None:
            self.hps = hps
        elif self.config.hps_file is not None:
            desc_file = self.config.hps_file.replace("{local_base_path}",
                                                     self.local_base_path)
            self.hps = Config(desc_file)
        elif self.config.hps_folder is not None:
            folder = self.config.hps_folder.replace("{local_base_path}",
                                                    self.local_base_path)
            pattern = FileOps.join_path(folder, "desc_*.json")
            desc_file = glob.glob(pattern)[0]
            self.hps = Config(desc_file)
        if self.hps and self.hps.get('trainer'):
            self.config.from_json(self.hps.get('trainer'))
            self.load_checkpoint = self.config.load_checkpoint
        self.epochs = self.config.epochs

    def _init_minimize_op(self, loss, global_step, var_list=None):
        """Init loss minimize operation, include loss scale method."""
        loss_scale = self.config.loss_scale if self.use_amp else 1.
        if loss_scale != 1:
            scaled_grad_vars = self.optimizer.compute_gradients(
                loss * loss_scale, var_list=var_list)
            unscaled_grad_vars = []
            for grad, var in scaled_grad_vars:
                unscaled_grad_vars.append((grad, var) if grad is None else (
                    grad / loss_scale, var))
            minimize_op = self.optimizer.apply_gradients(
                unscaled_grad_vars, global_step)
        else:
            grad_vars = self.optimizer.compute_gradients(loss,
                                                         var_list=var_list)
            minimize_op = self.optimizer.apply_gradients(
                grad_vars, global_step)
        return minimize_op

    def _init_metrics(self, metrics=None):
        """Init metrics."""
        if metrics is not None:
            return metrics
        else:
            if zeus.is_torch_backend():
                from zeus.metrics.pytorch.metrics import Metrics
            elif zeus.is_tf_backend():
                from zeus.metrics.tensorflow.metrics import Metrics
            elif zeus.is_ms_backend():
                from zeus.metrics.mindspore.metrics import Metrics
            return Metrics()

    def _init_dataloader(self, mode, loader=None, transforms=None):
        """Init dataloader."""
        if loader is not None:
            return loader
        if mode == "train" and self.hps is not None and self.hps.get(
                "dataset") is not None:
            if self.hps.get("dataset") and self.hps.get("dataset").get('type'):
                dataset_cls = ClassFactory.get_cls(
                    ClassType.DATASET,
                    self.hps.get("dataset").get('type'))
            else:
                dataset_cls = ClassFactory.get_cls(ClassType.DATASET)
            dataset = dataset_cls(mode=mode, hps=self.hps.get("dataset"))
        elif self.hps:
            if self.hps.get("dataset") and self.hps.get("dataset").get('type'):
                dataset_cls = ClassFactory.get_cls(
                    ClassType.DATASET,
                    self.hps.get("dataset").get('type'))
                dataset = dataset_cls(mode=mode, hps=self.hps.get("dataset"))
            else:
                dataset_cls = ClassFactory.get_cls(ClassType.DATASET)
                dataset = dataset_cls(mode=mode)
        else:
            dataset_cls = ClassFactory.get_cls(ClassType.DATASET)
            dataset = dataset_cls(mode=mode)
        if transforms is not None:
            dataset.transforms = transforms
        if self.distributed and mode == "train":
            dataset.set_distributed(self._world_size, self._rank_id)
        # adapt the dataset to specific backend
        dataloader = Adapter(dataset).loader
        return dataloader

    def _train_loop(self):
        """Do the training with data, callbacks and step functions etc."""
        # Allow user to build trainer in before_train() callback, but they
        # should set lazy_built in configuration file to True
        self.callbacks.before_train()
        if self.skip_train:
            return

        if self.use_unsupervised_pretrain and zeus.is_torch_backend():
            from .trainer.simclr.transforms import TransformsSimCLR
            from .trainer.simclr.train import simclr_train
            train_loader = self._init_dataloader(mode="train",
                                                 transforms=TransformsSimCLR())
            self.model = simclr_train(self.model, train_loader)

        repeat_time = 1 if zeus.is_ms_backend() else self.epochs
        for epoch in range(self._start_epoch, repeat_time):
            epoch_logs = {'train_num_batches': self.batch_num_train}
            if self.do_validation:
                epoch_logs.update({'valid_num_batches': self.batch_num_valid})
            self.callbacks.before_epoch(epoch, epoch_logs)
            self._train_epoch()
            if self.do_validation and self._should_run_validation(epoch):
                self._valid_epoch()
            self.callbacks.after_epoch(epoch)
        self.callbacks.after_train()
        if self.distributed:
            self._shutdown_distributed()

    def _should_run_validation(self, epoch):
        # Zero valid_interval means doesn't run _valid_loop of the trainer
        # and user may provide _valid_loop in other callbacks
        if self.valid_interval == 0:
            return False
        else:
            return epoch % self.valid_interval == 0 or (epoch +
                                                        1) == self.epochs

    def _init_callbacks(self):
        disables = []
        customs = self.config.callbacks or []
        if customs and not isinstance(customs, list):
            customs = [customs]
        if not self.config.model_statistics:
            disables.append('ModelStatistics')
        self.callbacks = CallbackList(customs, disables)
        self.callbacks.set_trainer(self)

    def _metric_average(self, val, name):
        """Do metric average.

        :param val: input value
        :param name: metric name
        :return:
        """
        import torch
        import horovod.torch as hvd
        tensor = torch.tensor(val)
        avg_tensor = hvd.allreduce(tensor, name=name)
        return avg_tensor.item()

    def _backup(self):
        """Backup result worker folder."""
        if self.need_backup is True and self.backup_base_path is not None:
            backup_worker_path = FileOps.join_path(self.backup_base_path,
                                                   self.get_worker_subpath())
            FileOps.copy_folder(
                self.get_local_worker_path(self.step_name, self.worker_id),
                backup_worker_path)

    def _shutdown_distributed(self):
        if zeus.is_npu_device() and self.distributed:
            self.sess.run(self.npu_shutdown)
            self.sess.close()
Example #16
0
    def convert_examples_to_features(self, examples, label_list,
                                     max_seq_length, tokenizer):
        """Load a data file into a list of `InputBatch`s."""
        label_map = {label: i for i, label in enumerate(label_list)}
        features = []
        for (ex_index, example) in enumerate(examples):
            tokens_a = tokenizer.tokenize(example.text_a)

            tokens_b = None
            if example.text_b:
                tokens_b = tokenizer.tokenize(example.text_b)
                # Modifies `tokens_a` and `tokens_b` in place so that the total
                # length is less than the specified length.
                # Account for [CLS], [SEP], [SEP] with "- 3"
                _truncate_seq_pair(tokens_a, tokens_b, max_seq_length - 3)
            else:
                # Account for [CLS] and [SEP] with "- 2"
                if len(tokens_a) > max_seq_length - 2:
                    tokens_a = tokens_a[:(max_seq_length - 2)]

            # The convention in BERT is:
            # (a) For sequence pairs:
            #  tokens:   [CLS] is this jack ##son ##ville ? [SEP] no it is not . [SEP]
            #  type_ids: 0   0  0    0    0     0       0 0    1  1  1  1   1 1
            # (b) For single sequences:
            #  tokens:   [CLS] the dog is hairy . [SEP]
            #  type_ids: 0   0   0   0  0     0 0
            #
            # Where "type_ids" are used to indicate whether this is the first
            # sequence or the second sequence. The embedding vectors for `type=0` and
            # `type=1` were learned during pre-training and are added to the wordpiece
            # embedding vector (and position vector). This is not *strictly* necessary
            # since the [SEP] token unambigiously separates the sequences, but it makes
            # it easier for the model to learn the concept of sequences.
            #
            # For classification tasks, the first vector (corresponding to [CLS]) is
            # used as as the "sentence vector". Note that this only makes sense because
            # the entire model is fine-tuned.
            tokens = ["[CLS]"] + tokens_a + ["[SEP]"]
            segment_ids = [0] * len(tokens)

            if tokens_b:
                tokens += tokens_b + ["[SEP]"]
                segment_ids += [1] * (len(tokens_b) + 1)

            input_ids = tokenizer.convert_tokens_to_ids(tokens)

            # The mask has 1 for real tokens and 0 for padding tokens. Only real
            # tokens are attended to.
            input_mask = [1] * len(input_ids)

            # Zero-pad up to the sequence length.
            padding = [0] * (max_seq_length - len(input_ids))
            input_ids += padding
            input_mask += padding
            segment_ids += padding

            assert len(input_ids) == max_seq_length
            assert len(input_mask) == max_seq_length
            assert len(segment_ids) == max_seq_length

            label_id = label_map[example.label]
            if ex_index < 5:
                logging.info("*** Example ***")
                logging.info("guid: %s" % (example.guid))
                logging.info("tokens: %s" % " ".join([str(x) for x in tokens]))
                logging.info("input_ids: %s" %
                             " ".join([str(x) for x in input_ids]))
                logging.info("input_mask: %s" %
                             " ".join([str(x) for x in input_mask]))
                logging.info("segment_ids: %s" %
                             " ".join([str(x) for x in segment_ids]))
                logging.info("label: %s (id = %d)" % (example.label, label_id))

            features.append(
                Config(
                    dict(input_ids=input_ids,
                         input_mask=input_mask,
                         segment_ids=segment_ids,
                         label_id=label_id)))
        return features
Example #17
0
class Module(object):
    """Base Module to adapter tf Module."""

    def __init__(self):
        self.name = ''
        self.data_format = General.data_format
        self._modules = Config()
        self._parameters = OrderedDict()
        self._weights_buffer = OrderedDict()
        self._init_configs()

    def _init_configs(self):
        self._training = True
        self._trainable = True
        self.weight_file = None
        self.from_weight_type = None
        self._is_load_pretrained = False
        self._is_adaptive_weight = False
        self.exclude_weight_prefix = None

    def add_module(self, name, model):
        """Add models into self._models."""
        setattr(self, str(name), model)

    def build(self):
        """Build model or params."""
        pass

    def named_modules(self):
        """Return names spaces."""
        self._apply_names()
        _modules = []
        for module in self.children():
            _modules.append((module.name, module))
            _modules.extend(module.named_modules())
        return _modules

    def named_children(self):
        """Return names children."""
        return [(name, module) for name, module in self._modules.items()]

    def children(self):
        """Get child models of current Module."""
        for model in self._modules.values():
            yield model

    def load_checkpoint(self, weight_file):
        """Load weight state dict from last checkpoint file."""
        if not weight_file:
            return
        logging.info("Load checkpoint form file ({}).".format(weight_file))
        # model_file = tf.train.latest_checkpoint(weight_file)
        reader = tf.train.NewCheckpointReader(weight_file)
        variables = reader.get_variable_to_shape_map()
        states = {v: reader.get_tensor(v) for v in variables}
        self.load_checkpoint_from_numpy(states)

    def load_checkpoint_from_numpy(self, states):
        """Load checkpoint from numpy."""
        states = self._exclude_checkpoint_by_prefix(states)
        for name, module in self.named_modules():
            child_state = [(k, v) for k, v in states.items() if k.startswith(module.name + '/')]
            for k, v in child_state:
                module.set_weights(k, v)

    def _exclude_checkpoint_by_prefix(self, states):
        if self.exclude_weight_prefix:
            if not isinstance(self.exclude_weight_prefix, list):
                self.exclude_weight_prefix = [self.exclude_weight_prefix]
            for prefix in self.exclude_weight_prefix:
                states = {k: v for k, v in states.items() if not k.startswith(prefix)}
        return states

    def set_weights(self, name, value):
        """Set weights into weights buffer."""
        self._weights_buffer[name] = value

    @property
    def training(self):
        """Get training flag."""
        return self._training

    @training.setter
    def training(self, value):
        """Set training flag."""
        self._training = value
        for module in self.children():
            module.training = value

    @property
    def is_adaptive_weight(self):
        """Get _is_adaptive_weight flag."""
        return self._is_adaptive_weight

    @is_adaptive_weight.setter
    def is_adaptive_weight(self, value):
        """Set _is_adaptive_weight flag."""
        self._is_adaptive_weight = value
        for module in self.children():
            module.is_adaptive_weight = value

    def freeze(self):
        """Set training flag."""
        self._trainable = False
        for module in self.children():
            module.freeze()

    def __setattr__(self, key, value):
        """Set name to modules."""
        super().__setattr__(key, value)
        if isinstance(value, Module):
            self._modules[key] = value

    def set_parameters(self, name, value):
        """Set Parameters."""
        self._parameters[name] = value
        setattr(self, name, value)
        return self.name

    def get_weights(self, name=None):
        """Get weights by name."""
        if self._weights_buffer:
            return self._weights_buffer
        return tf.get_default_graph().get_tensor_by_name('{}:0'.format(name))

    def get_all_weights(self):
        """Get all weights."""
        all_weights = OrderedDict()
        for child in self.children():
            all_weights.update(child._weights_buffer)
            if isinstance(child, Module):
                all_weights.update(child.get_all_weights())
        return all_weights

    def get_weight_ops(self, name):
        """Get weight ops."""
        all_weight = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)
        weight_ops = [t for t in all_weight if not t.name.startswith(name)]
        return weight_ops

    def call(self, inputs, *args, **kwarg):
        """Call inputs."""
        output = inputs
        for model in self.children():
            output = model(output)
        return output

    def adaptive_weight(self, inputs):
        """Adaptive weight."""
        return {}

    def _apply_names(self, parent_name=''):
        """Apply names spaces."""
        for scope_name, module in self._modules.items():
            scope_name = '{}.{}'.format(parent_name, scope_name) if parent_name else scope_name
            module.name = module.name or scope_name + '/' + module.__class__.__name__
            module._apply_names(scope_name)

    def _apply_parameters(self):
        """Apply names spaces."""
        for name, params in self._parameters.items():
            setattr(self, name, tf.Variable(params, name='{}.{}'.format(self.name, name) if self.name else name))

    def __call__(self, inputs, *args, **kwargs):
        """Call call function."""
        self.build()
        self._apply_parameters()
        self._apply_names()
        for module in self.children():
            module._is_load_pretrained = True
        out = self.call(inputs, *args, **kwargs)
        self._apply_weights(inputs)
        return out

    def _apply_weights(self, inputs):
        if not self._weights_buffer:
            return
        variables = tf.get_collection(tf.GraphKeys.VARIABLES)
        if self.is_adaptive_weight:
            self._weights_buffer.update(self.adaptive_weight(inputs))
        values = [(var, self._weights_buffer.get(var.name.replace(':0', ''))) for var in variables if
                  var.name.replace(':0', '') in self._weights_buffer]
        for v, weight in values:
            v._initializer_op = state_ops.assign(v, weight)
        self._weights_buffer.clear()

    def modules(self):
        """Get the current modules."""
        if self._modules.values():
            return self._modules.values()
        else:
            return [self]
Example #18
0
class Optimizer(object):
    """Register and call Optimizer class."""

    config = OptimConfig()

    def __new__(cls, *args, **kwargs):
        """Create optimizer or multi-optimizer class."""
        if isinstance(cls.config.to_dict, list):
            t_cls = ClassFactory.get_cls(ClassType.OPTIMIZER,
                                         'MultiOptimizers')
            return super().__new__(t_cls)
        return super().__new__(cls)

    def __init__(self, config=None):
        """Initialize."""
        self.is_multi_opt = False
        if config is not None:
            self.config = Config(config)
        raw_config = self.config.to_dict()
        raw_config.type = self.config.type
        map_dict = OptimMappingDict
        self.map_config = ConfigBackendMapping(
            map_dict.type_mapping_dict,
            map_dict.params_mapping_dict).backend_mapping(raw_config)
        self.optim_cls = ClassFactory.get_cls(ClassType.OPTIMIZER,
                                              self.map_config.type)

    def __call__(self, model=None, distributed=False, **kwargs):
        """Call Optimizer class.

        :param model: model, used in torch case
        :param distributed: use distributed
        :return: optimizer
        """
        params = self.map_config.get("params", {})
        logging.debug("Call Optimizer. name={}, params={}".format(
            self.optim_cls.__name__, params))
        optimizer = None
        try:
            if zeus.is_torch_backend():
                learnable_params = [
                    param for param in model.parameters()
                    if param.requires_grad
                ]
                optimizer = self.optim_cls(learnable_params, **params)
                if distributed:
                    optimizer = self.set_distributed(optimizer, model)
            elif zeus.is_tf_backend():
                optimizer = dynamic_optimizer(self.optim_cls, **params)
            elif zeus.is_ms_backend():
                if "dynamic_lr" in kwargs:
                    params.update({"learning_rate": kwargs["dynamic_lr"]})
                learnable_params = [
                    param for param in model.trainable_params()
                    if param.requires_grad
                ]
                optimizer = self.optim_cls(learnable_params, **params)
            return optimizer
        except Exception as ex:
            logging.error("Failed to call Optimizer name={}, params={}".format(
                self.optim_cls.__name__, params))
            raise ex

    @classmethod
    def set_distributed(cls, optimizer, model=None):
        """Set distributed optimizer."""
        if zeus.is_torch_backend():
            optimizer = hvd.DistributedOptimizer(
                optimizer,
                named_parameters=model.named_parameters(),
                compression=hvd.Compression.none)
        elif zeus.is_tf_backend():
            optim_class = hvd.DistributedOptimizer if zeus.is_gpu_device(
            ) else NPUDistributedOptimizer
            optimizer = dynamic_distributed_optimizer(optim_class, optimizer)
        return optimizer
Example #19
0
class Module(object):
    """Base Module to adapter tf Module."""

    data_format = 'channels_first'

    def __init__(self):
        self.parent_scope_name = ''
        self._scope_name = ''
        self._modules = Config()
        self._training = True
        self.enable_scope_name = enable_scope_name
        self.data_format = General.data_format
        self.pretrained_model_file = None
        self._is_load_pretrained = False
        self.load_pretrained_type = None
        self._trainable = True
        self.pretrained_prefix = None

    def add_module(self, name, model):
        """Add models into self._models."""
        setattr(self, str(name), model)

    def named_modules(self):
        """Return names spaces."""
        _names_modules = []
        for model in self.children():
            if isinstance(model, Module):
                _names_modules.append(((model._scope_name, model)))
                child_modules = model.named_modules()
                _names_modules.extend(child_modules)
        return _names_modules

    def named_children(self):
        """Return names children."""
        return [(name, module) for name, module in self._modules.items()]

    def children(self):
        """Get child models of current Module."""
        for model in self._modules.values():
            if isinstance(model, Module):
                model._scope_name = "{}.{}".format(
                    self._scope_name, model.parent_scope_name) if self._scope_name else model.parent_scope_name
            yield model

    def pretrained(self, pretrained_model_file=None):
        """Load Pretrained weights."""
        if self._is_load_pretrained:
            return []
        assign_vars = []
        checkpoint_path = pretrained_model_file or self.pretrained_model_file
        if not checkpoint_path:
            return
        pretrained_prefix = self.pretrained_prefix or {self._scope_name: self._scope_name}
        if self.load_pretrained_type == 'pytorch':
            assign_vars = assign_pytorch_weights(checkpoint_path, pretrained_prefix)
        else:
            tf.train.init_from_checkpoint(checkpoint_path, pretrained_prefix)
        self._is_load_pretrained = True
        return assign_vars

    @property
    def training(self):
        """Get training flag."""
        return self._training

    @training.setter
    def training(self, value):
        """Set training flag."""
        self._training = value
        for module in self.children():
            module.training = value

    @property
    def freeze(self):
        """Get training flag."""
        return self.freeze

    @freeze.setter
    def freeze(self, value):
        """Set training flag."""
        self._trainable = not value
        for module in self.children():
            module.freeze = value

    def __setattr__(self, key, value):
        """Set name to modules."""
        self.__dict__[key] = value
        # super().__setattr__(key, value)
        if isinstance(value, Module):
            if self.enable_scope_name:
                value.parent_scope_name = key
            self._modules[key] = value

    def __getattribute__(self, name):
        """Get modules by name."""
        value = object.__getattribute__(self, name)
        if isinstance(value, Module) and self.enable_scope_name:
            value._scope_name = "{}.{}".format(
                self._scope_name, value.parent_scope_name) if self._scope_name else value.parent_scope_name
        return value

    def set_parameters(self, name, value):
        """Set Parameters."""
        with tf.variable_scope('', reuse=tf.AUTO_REUSE):
            setattr(self, name, tf.get_variable(name, initializer=value))

    def get_weights(self, name):
        """Get weights by name."""
        return tf.get_default_graph().get_tensor_by_name('{}:0'.format(name))

    def get_weight_ops(self, name):
        """Get weight ops."""
        all_weight = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)
        weight_ops = [t for t in all_weight if not t.name.startswith(name)]
        return weight_ops

    def call(self, inputs, *args, **kwarg):
        """Call inputs."""
        output = inputs
        for model in self.children():
            output = model(output)
        return output

    def __call__(self, inputs, *args, **kwargs):
        """Call call function."""
        return self.call(inputs, *args, **kwargs)

    def modules(self):
        """Get the current modules."""
        if self._modules.values():
            return self._modules.values()
        else:
            return [self]