def __init__(self, search_space, arch_embedder_type="lstm", arch_embedder_cfg=None, mlp_hiddens=(200, 200, 200), mlp_dropout=0.1, optimizer={ "type": "Adam", "lr": 0.001 }, scheduler=None, compare_loss_type="margin_linear", compare_margin=0.01, pairing_method="concat", diff_only=False, train_use_sigmoid=False, sorting_residue_worse_thresh=100, sorting_residue_better_thresh=100, max_grad_norm=None, schedule_cfg=None): # [optional] arch reconstruction loss (arch_decoder_type/cfg) super(PairwiseComparator, self).__init__(schedule_cfg) nn.Module.__init__(self) # configs expect(compare_loss_type in {"binary_cross_entropy", "margin_linear"}, "comparing loss type {} not supported".format(compare_loss_type), ConfigException) self.compare_loss_type = compare_loss_type self.compare_margin = compare_margin expect(pairing_method in {"concat", "diff"}, "pairing method {} not supported".format(pairing_method), ConfigException) self.pairing_method = pairing_method self.sorting_residue_worse_thresh = sorting_residue_worse_thresh self.sorting_residue_better_thresh = sorting_residue_better_thresh self.max_grad_norm = max_grad_norm self.train_use_sigmoid = train_use_sigmoid self.diff_only = diff_only self.search_space = search_space ae_cls = ArchEmbedder.get_class_(arch_embedder_type) self.arch_embedder = ae_cls(self.search_space, **(arch_embedder_cfg or {})) dim = self.embedding_dim = self.arch_embedder.out_dim \ if (diff_only and pairing_method == "diff") \ else 2 * self.arch_embedder.out_dim # construct MLP from embedding to score self.mlp = [] for hidden_size in mlp_hiddens: self.mlp.append(nn.Sequential( nn.Linear(dim, hidden_size), nn.ReLU(inplace=False), nn.Dropout(p=mlp_dropout))) dim = hidden_size self.mlp.append(nn.Linear(dim, 1)) self.mlp = nn.Sequential(*self.mlp) # init optimizer and scheduler self.optimizer = utils.init_optimizer(self.parameters(), optimizer) self.scheduler = utils.init_scheduler(self.optimizer, scheduler)
def __init__(self, search_space, arch_embedder_type="lstm", arch_embedder_cfg=None, mlp_hiddens=(200, 200, 200), mlp_dropout=0.1, optimizer={ "type": "Adam", "lr": 0.001 }, scheduler=None, compare_loss_type="margin_linear", compare_margin=0.01, margin_l2=False, use_incorrect_list_only=False, tanh_score=None, max_grad_norm=None, schedule_cfg=None): # [optional] arch reconstruction loss (arch_decoder_type/cfg) super(PointwiseComparator, self).__init__(schedule_cfg) nn.Module.__init__(self) # configs expect( compare_loss_type in {"binary_cross_entropy", "margin_linear"}, "comparing loss type {} not supported".format(compare_loss_type), ConfigException) self.compare_loss_type = compare_loss_type self.compare_margin = compare_margin self.margin_l2 = margin_l2 self.max_grad_norm = max_grad_norm # for update_argsort listwise only self.use_incorrect_list_only = use_incorrect_list_only self.tanh_score = tanh_score self.search_space = search_space ae_cls = ArchEmbedder.get_class_(arch_embedder_type) self.arch_embedder = ae_cls(self.search_space, **(arch_embedder_cfg or {})) dim = self.embedding_dim = self.arch_embedder.out_dim # construct MLP from embedding to score self.mlp = [] for hidden_size in mlp_hiddens: self.mlp.append( nn.Sequential(nn.Linear(dim, hidden_size), nn.ReLU(inplace=False), nn.Dropout(p=mlp_dropout))) dim = hidden_size self.mlp.append(nn.Linear(dim, 1)) self.mlp = nn.Sequential(*self.mlp) # init optimizer and scheduler self.optimizer = utils.init_optimizer(self.parameters(), optimizer) self.scheduler = utils.init_scheduler(self.optimizer, scheduler)
def __init__( self, #pylint: disable=dangerous-default-value controller, evaluator, rollout_type="discrete", epochs=200, test_every=10, # optimizer and scheduler controller_optimizer={ "type": "Adam", "lr": 0.001 }, controller_scheduler=None, # number of rollout/arch samples controller_samples=1, derive_samples=8, # >1 only work for differentiable rollout now rollout_batch_size=1, # alternative training config evaluator_steps=None, controller_steps=None, controller_train_every=1, controller_train_begin=1, interleave_controller_every=None, schedule_cfg=None): """ Args: controller_steps (int): If None, (not explicitly given), assume every epoch consume one pass of the controller queue. interleave_controller_every (int): Interleave controller update steps every `interleave_controller_every` steps. If None, do not interleave, which means controller will only be updated after one epoch of mepa update. """ super(SimpleTrainer, self).__init__(controller, evaluator, rollout_type, schedule_cfg) expect(self.rollout_type == self.controller.rollout_type == \ self.evaluator.rollout_type, "the rollout type of trainer/controller/evaluator must match, " "check the configuration. ({}/{}/{})".format( self.rollout_type, self.controller.rollout_type, self.evaluator.rollout_type), ConfigException) # configurations self.epochs = epochs self.test_every = test_every self.controller_samples = controller_samples self.derive_samples = derive_samples self.rollout_batch_size = rollout_batch_size self.evaluator_steps = evaluator_steps self.controller_steps = controller_steps self.controller_train_every = controller_train_every self.controller_train_begin = controller_train_begin self.interleave_controller_every = interleave_controller_every # prepare `self.controller_steps` suggested = self.evaluator.suggested_controller_steps_per_epoch() if self.controller_steps is None: # if `controller_steps` not specified, use the suggested value by calling # `evaluator.suggested_controller_steps_per_epoch` expect( suggested is not None, "Cannot infer `controller_steps`! Neigher `controller_steps` is given in " "configuration, nor the evaluator return" " a suggested `controller_steps`.", ConfigException) self.controller_steps = suggested else: # `controller_steps` is provided, check if it matches with the suggested value if suggested is not None and not suggested == self.controller_steps: self.logger.warning( "The suggested `controller_steps` (%3d) from " "`evaluator.suggested_controller_steps_per_epoch()` differs " "from the config setting (%3d).", suggested, self.controller_steps) # prepare `self.evaluator_steps` expect(self.interleave_controller_every is None or ( self.evaluator_steps is None or self.evaluator_steps == \ self.controller_steps * self.interleave_controller_every), "`controller_steps` must not be given or must match with " "`evaluator_steps/interleave_controller_every`in interleave mode", ConfigException) suggested = self.evaluator.suggested_evaluator_steps_per_epoch() if self.evaluator_steps is None: if self.interleave_controller_every is None: # if `evaluator_steps` is not explicitly given, and not in interleave mode, # use the suggested value from `evaluator.suggested_evaluator_steps_per_epoch()` self.evaluator_steps = suggested else: # in interleave mode self.evaluator_steps = self.controller_steps * self.interleave_controller_every elif self.interleave_controller_every is None: # `evaluator_steps` is provided, check if it matches with the suggested value if suggested is not None and not suggested == self.evaluator_steps: self.logger.warning( "The suggested `evaluator_steps` (%3d) from " "`evaluator.suggested_evaluator_steps_per_epoch()` differs " "from the config setting (%3d).", suggested, self.evaluator_steps) # init controller optimizer and scheduler self.controller_scheduler = None self.controller_optimizer = None if isinstance(self.controller, torch.nn.Module): self.controller_optimizer = utils.init_optimizer( self.controller.parameters(), controller_optimizer) self.controller_scheduler = utils.init_scheduler( self.controller_optimizer, controller_scheduler) # states and other help attributes self.last_epoch = 0 self.epoch = 0
def __init__( self, dataset, weights_manager, objective, rollout_type="discrete", batch_size=128, eval_optimizer={ "type": "SGD", "lr": 0.01, "momentum": 0.9, "weight_decay": 1e-4, }, eval_scheduler={ "type": "CosineWithRestarts", "t_0": 10, "eta_min": 0.0001, "factor": 2.0, }, schedule_every_batch=False, load_optimizer=True, load_scheduler=True, strict_load_weights_manager=True, eval_samples=1, disable_step_current=False, evaluate_with_whole_queue=False, data_portion=(0.5, 0.5), shuffle_data_before_split=False, # by default not shuffle data before train-val splito shuffle_indice_file=None, shuffle_data_before_split_seed=None, workers_per_queue=2, # only work for differentiable controller now rollout_batch_size=1, # only for rnn data bptt_steps=35, multiprocess=False, schedule_cfg=None, ): super(SharedweightEvaluator, self).__init__( dataset, weights_manager, objective, rollout_type, schedule_cfg ) # check rollout type if self.rollout_type != "compare": expect( self.rollout_type == self.weights_manager.rollout_type, "the rollout type of evaluator/weights_manager must match, " "check the configuration. ({}/{})".format( self.rollout_type, self.weights_manager.rollout_type ), ConfigException, ) else: # Do not check for now pass self._data_type = self.dataset.data_type() self._device = self.weights_manager.device self.multiprocess = multiprocess # configs self.batch_size = batch_size self.evaluate_with_whole_queue = evaluate_with_whole_queue self.disable_step_current = disable_step_current self.data_portion = data_portion self.workers_per_queue = workers_per_queue self.shuffle_data_before_split = shuffle_data_before_split self.shuffle_indice_file = shuffle_indice_file self.shuffle_data_before_split_seed = shuffle_data_before_split_seed self.eval_samples = eval_samples self.rollout_batch_size = rollout_batch_size self.schedule_every_batch = schedule_every_batch self.load_optimizer = load_optimizer self.load_scheduler = load_scheduler self.strict_load_weights_manager = strict_load_weights_manager # rnn specific configs self.bptt_steps = bptt_steps # initialize optimizers and schedulers # do some checks expect( len(data_portion) in {2, 3}, "`data_portion` should have length 2/3.", ConfigException, ) self.eval_optimizer = utils.init_optimizer( self.weights_manager.parameters(), eval_optimizer ) self.eval_scheduler = utils.init_scheduler(self.eval_optimizer, eval_scheduler) # for performance when doing 1-sample ENAS in `update_evaluator` if not self.disable_step_current and self.eval_samples == 1: # Will call `step_current_gradients` of weights manager self.logger.info( "As `eval_sample==1` and `disable_step_current` is not set, " "to speed up, will accumulate supernet gradients in-place and call " "`super_net.step_current_gradients`." ) self.eval_step_current = True else: self.eval_step_current = False # initialize the data queues self._init_data_queues_and_hidden(self._data_type, data_portion) # to make pylint happy, actual initialization in _init_criterions method self._dataset_related_attrs = None self._criterions_related_attrs = None self._all_perf_names = None self._reward_func = None self._reward_kwargs = None self._scalar_reward_func = None self._reward_kwargs = None self._perf_names = None self._eval_loss_func = None self._report_loss_funcs = None # initialize reward criterions used by `get_rollout_reward` self._init_criterions(self.rollout_type) # for report loss self.epoch_average_meters = defaultdict(utils.AverageMeter) # evaluator update steps self.step = 0 self.plateau_scheduler_loss = []