Example #1
0
    def __init__(self, search_space,
                 arch_embedder_type="lstm", arch_embedder_cfg=None,
                 mlp_hiddens=(200, 200, 200), mlp_dropout=0.1,
                 optimizer={
                     "type": "Adam",
                     "lr": 0.001
                 }, scheduler=None,
                 compare_loss_type="margin_linear",
                 compare_margin=0.01,
                 pairing_method="concat",
                 diff_only=False,
                 train_use_sigmoid=False,
                 sorting_residue_worse_thresh=100,
                 sorting_residue_better_thresh=100,
                 max_grad_norm=None,
                 schedule_cfg=None):
        # [optional] arch reconstruction loss (arch_decoder_type/cfg)
        super(PairwiseComparator, self).__init__(schedule_cfg)
        nn.Module.__init__(self)

        # configs
        expect(compare_loss_type in {"binary_cross_entropy", "margin_linear"},
               "comparing loss type {} not supported".format(compare_loss_type),
               ConfigException)
        self.compare_loss_type = compare_loss_type
        self.compare_margin = compare_margin
        expect(pairing_method in {"concat", "diff"},
               "pairing method {} not supported".format(pairing_method),
               ConfigException)
        self.pairing_method = pairing_method
        self.sorting_residue_worse_thresh = sorting_residue_worse_thresh
        self.sorting_residue_better_thresh = sorting_residue_better_thresh
        self.max_grad_norm = max_grad_norm
        self.train_use_sigmoid = train_use_sigmoid
        self.diff_only = diff_only

        self.search_space = search_space
        ae_cls = ArchEmbedder.get_class_(arch_embedder_type)
        self.arch_embedder = ae_cls(self.search_space, **(arch_embedder_cfg or {}))

        dim = self.embedding_dim = self.arch_embedder.out_dim \
                                   if (diff_only and pairing_method == "diff") \
                                   else 2 * self.arch_embedder.out_dim
        # construct MLP from embedding to score
        self.mlp = []
        for hidden_size in mlp_hiddens:
            self.mlp.append(nn.Sequential(
                nn.Linear(dim, hidden_size),
                nn.ReLU(inplace=False),
                nn.Dropout(p=mlp_dropout)))
            dim = hidden_size
        self.mlp.append(nn.Linear(dim, 1))
        self.mlp = nn.Sequential(*self.mlp)

        # init optimizer and scheduler
        self.optimizer = utils.init_optimizer(self.parameters(), optimizer)
        self.scheduler = utils.init_scheduler(self.optimizer, scheduler)
Example #2
0
    def __init__(self,
                 search_space,
                 arch_embedder_type="lstm",
                 arch_embedder_cfg=None,
                 mlp_hiddens=(200, 200, 200),
                 mlp_dropout=0.1,
                 optimizer={
                     "type": "Adam",
                     "lr": 0.001
                 },
                 scheduler=None,
                 compare_loss_type="margin_linear",
                 compare_margin=0.01,
                 margin_l2=False,
                 use_incorrect_list_only=False,
                 tanh_score=None,
                 max_grad_norm=None,
                 schedule_cfg=None):
        # [optional] arch reconstruction loss (arch_decoder_type/cfg)
        super(PointwiseComparator, self).__init__(schedule_cfg)
        nn.Module.__init__(self)

        # configs
        expect(
            compare_loss_type in {"binary_cross_entropy", "margin_linear"},
            "comparing loss type {} not supported".format(compare_loss_type),
            ConfigException)
        self.compare_loss_type = compare_loss_type
        self.compare_margin = compare_margin
        self.margin_l2 = margin_l2
        self.max_grad_norm = max_grad_norm
        # for update_argsort listwise only
        self.use_incorrect_list_only = use_incorrect_list_only
        self.tanh_score = tanh_score

        self.search_space = search_space
        ae_cls = ArchEmbedder.get_class_(arch_embedder_type)
        self.arch_embedder = ae_cls(self.search_space,
                                    **(arch_embedder_cfg or {}))

        dim = self.embedding_dim = self.arch_embedder.out_dim
        # construct MLP from embedding to score
        self.mlp = []
        for hidden_size in mlp_hiddens:
            self.mlp.append(
                nn.Sequential(nn.Linear(dim, hidden_size),
                              nn.ReLU(inplace=False),
                              nn.Dropout(p=mlp_dropout)))
            dim = hidden_size
        self.mlp.append(nn.Linear(dim, 1))
        self.mlp = nn.Sequential(*self.mlp)

        # init optimizer and scheduler
        self.optimizer = utils.init_optimizer(self.parameters(), optimizer)
        self.scheduler = utils.init_scheduler(self.optimizer, scheduler)
Example #3
0
    def __init__(
            self,  #pylint: disable=dangerous-default-value
            controller,
            evaluator,
            rollout_type="discrete",
            epochs=200,
            test_every=10,

            # optimizer and scheduler
            controller_optimizer={
                "type": "Adam",
                "lr": 0.001
            },
            controller_scheduler=None,

            # number of rollout/arch samples
            controller_samples=1,
            derive_samples=8,

            # >1 only work for differentiable rollout now
            rollout_batch_size=1,

            # alternative training config
            evaluator_steps=None,
            controller_steps=None,
            controller_train_every=1,
            controller_train_begin=1,
            interleave_controller_every=None,
            schedule_cfg=None):
        """
        Args:
            controller_steps (int): If None, (not explicitly given), assume every epoch consume
                one pass of the controller queue.
            interleave_controller_every (int): Interleave controller update steps every
                `interleave_controller_every` steps. If None, do not interleave, which means
                controller will only be updated after one epoch of mepa update.
        """
        super(SimpleTrainer, self).__init__(controller, evaluator,
                                            rollout_type, schedule_cfg)

        expect(self.rollout_type == self.controller.rollout_type == \
               self.evaluator.rollout_type,
               "the rollout type of trainer/controller/evaluator must match, "
               "check the configuration. ({}/{}/{})".format(
                   self.rollout_type, self.controller.rollout_type,
                   self.evaluator.rollout_type), ConfigException)

        # configurations
        self.epochs = epochs
        self.test_every = test_every

        self.controller_samples = controller_samples
        self.derive_samples = derive_samples

        self.rollout_batch_size = rollout_batch_size

        self.evaluator_steps = evaluator_steps
        self.controller_steps = controller_steps
        self.controller_train_every = controller_train_every
        self.controller_train_begin = controller_train_begin
        self.interleave_controller_every = interleave_controller_every

        # prepare `self.controller_steps`
        suggested = self.evaluator.suggested_controller_steps_per_epoch()
        if self.controller_steps is None:
            # if `controller_steps` not specified, use the suggested value by calling
            # `evaluator.suggested_controller_steps_per_epoch`
            expect(
                suggested is not None,
                "Cannot infer `controller_steps`! Neigher `controller_steps` is given in "
                "configuration, nor the evaluator return"
                " a suggested `controller_steps`.", ConfigException)
            self.controller_steps = suggested
        else:  # `controller_steps` is provided, check if it matches with the suggested value
            if suggested is not None and not suggested == self.controller_steps:
                self.logger.warning(
                    "The suggested `controller_steps` (%3d) from "
                    "`evaluator.suggested_controller_steps_per_epoch()` differs "
                    "from the config setting (%3d).", suggested,
                    self.controller_steps)

        # prepare `self.evaluator_steps`
        expect(self.interleave_controller_every is None or (
            self.evaluator_steps is None or self.evaluator_steps == \
            self.controller_steps * self.interleave_controller_every),
               "`controller_steps` must not be given or must match with "
               "`evaluator_steps/interleave_controller_every`in interleave mode", ConfigException)

        suggested = self.evaluator.suggested_evaluator_steps_per_epoch()
        if self.evaluator_steps is None:
            if self.interleave_controller_every is None:
                # if `evaluator_steps` is not explicitly given, and not in interleave mode,
                # use the suggested value from `evaluator.suggested_evaluator_steps_per_epoch()`
                self.evaluator_steps = suggested
            else:
                # in interleave mode
                self.evaluator_steps = self.controller_steps * self.interleave_controller_every
        elif self.interleave_controller_every is None:
            # `evaluator_steps` is provided, check if it matches with the suggested value
            if suggested is not None and not suggested == self.evaluator_steps:
                self.logger.warning(
                    "The suggested `evaluator_steps` (%3d) from "
                    "`evaluator.suggested_evaluator_steps_per_epoch()` differs "
                    "from the config setting (%3d).", suggested,
                    self.evaluator_steps)

        # init controller optimizer and scheduler
        self.controller_scheduler = None
        self.controller_optimizer = None
        if isinstance(self.controller, torch.nn.Module):
            self.controller_optimizer = utils.init_optimizer(
                self.controller.parameters(), controller_optimizer)
            self.controller_scheduler = utils.init_scheduler(
                self.controller_optimizer, controller_scheduler)

        # states and other help attributes
        self.last_epoch = 0
        self.epoch = 0
Example #4
0
    def __init__(
        self,
        dataset,
        weights_manager,
        objective,
        rollout_type="discrete",
        batch_size=128,
        eval_optimizer={
            "type": "SGD",
            "lr": 0.01,
            "momentum": 0.9,
            "weight_decay": 1e-4,
        },
        eval_scheduler={
            "type": "CosineWithRestarts",
            "t_0": 10,
            "eta_min": 0.0001,
            "factor": 2.0,
        },
        schedule_every_batch=False,
        load_optimizer=True,
        load_scheduler=True,
        strict_load_weights_manager=True,
        eval_samples=1,
        disable_step_current=False,
        evaluate_with_whole_queue=False,
        data_portion=(0.5, 0.5),
        shuffle_data_before_split=False,  # by default not shuffle data before train-val splito
        shuffle_indice_file=None,
        shuffle_data_before_split_seed=None,
        workers_per_queue=2,
        # only work for differentiable controller now
        rollout_batch_size=1,
        # only for rnn data
        bptt_steps=35,
        multiprocess=False,
        schedule_cfg=None,
    ):
        super(SharedweightEvaluator, self).__init__(
            dataset, weights_manager, objective, rollout_type, schedule_cfg
        )

        # check rollout type
        if self.rollout_type != "compare":
            expect(
                self.rollout_type == self.weights_manager.rollout_type,
                "the rollout type of evaluator/weights_manager must match, "
                "check the configuration. ({}/{})".format(
                    self.rollout_type, self.weights_manager.rollout_type
                ),
                ConfigException,
            )
        else:
            # Do not check for now
            pass

        self._data_type = self.dataset.data_type()
        self._device = self.weights_manager.device
        self.multiprocess = multiprocess

        # configs
        self.batch_size = batch_size
        self.evaluate_with_whole_queue = evaluate_with_whole_queue
        self.disable_step_current = disable_step_current
        self.data_portion = data_portion
        self.workers_per_queue = workers_per_queue
        self.shuffle_data_before_split = shuffle_data_before_split
        self.shuffle_indice_file = shuffle_indice_file
        self.shuffle_data_before_split_seed = shuffle_data_before_split_seed
        self.eval_samples = eval_samples
        self.rollout_batch_size = rollout_batch_size
        self.schedule_every_batch = schedule_every_batch
        self.load_optimizer = load_optimizer
        self.load_scheduler = load_scheduler
        self.strict_load_weights_manager = strict_load_weights_manager

        # rnn specific configs
        self.bptt_steps = bptt_steps

        # initialize optimizers and schedulers
        # do some checks
        expect(
            len(data_portion) in {2, 3},
            "`data_portion` should have length 2/3.",
            ConfigException,
        )

        self.eval_optimizer = utils.init_optimizer(
            self.weights_manager.parameters(), eval_optimizer
        )
        self.eval_scheduler = utils.init_scheduler(self.eval_optimizer, eval_scheduler)

        # for performance when doing 1-sample ENAS in `update_evaluator`
        if not self.disable_step_current and self.eval_samples == 1:
            # Will call `step_current_gradients` of weights manager
            self.logger.info(
                "As `eval_sample==1` and `disable_step_current` is not set, "
                "to speed up, will accumulate supernet gradients in-place and call "
                "`super_net.step_current_gradients`."
            )
            self.eval_step_current = True
        else:
            self.eval_step_current = False

        # initialize the data queues
        self._init_data_queues_and_hidden(self._data_type, data_portion)

        # to make pylint happy, actual initialization in _init_criterions method
        self._dataset_related_attrs = None
        self._criterions_related_attrs = None
        self._all_perf_names = None
        self._reward_func = None
        self._reward_kwargs = None
        self._scalar_reward_func = None
        self._reward_kwargs = None
        self._perf_names = None
        self._eval_loss_func = None
        self._report_loss_funcs = None
        # initialize reward criterions used by `get_rollout_reward`
        self._init_criterions(self.rollout_type)

        # for report loss
        self.epoch_average_meters = defaultdict(utils.AverageMeter)

        # evaluator update steps
        self.step = 0

        self.plateau_scheduler_loss = []