def _train_desc(self, model_desc:ModelDesc, conf_train:Config)->MetricsStats: """Train given description""" # region conf vars conf_trainer = conf_train['trainer'] conf_loader = conf_train['loader'] trainer_title = conf_trainer['title'] epochs = conf_trainer['epochs'] drop_path_prob = conf_trainer['drop_path_prob'] # endregion logger.pushd(trainer_title) if epochs == 0: # nothing to pretrain, save time metrics_stats = MetricsStats(model_desc, None, None) else: model = nas_utils.model_from_desc(model_desc, droppath=drop_path_prob>0.0, affine=True) # get data train_dl, val_dl = self.get_data(conf_loader) assert train_dl is not None trainer = Trainer(conf_trainer, model, checkpoint=None) train_metrics = trainer.fit(train_dl, val_dl) metrics_stats = Search._create_metrics_stats(model, train_metrics, self.finalizers) logger.popd() return metrics_stats
def eval_arch(conf_eval: Config, cell_builder: Optional[CellBuilder]): logger.pushd('eval_arch') # region conf vars conf_loader = conf_eval['loader'] model_filename = conf_eval['model_filename'] metric_filename = conf_eval['metric_filename'] conf_checkpoint = conf_eval['checkpoint'] resume = conf_eval['resume'] conf_train = conf_eval['trainer'] # endregion if cell_builder: cell_builder.register_ops() model = create_model(conf_eval) # get data train_dl, _, test_dl = data.get_data(conf_loader) assert train_dl is not None and test_dl is not None checkpoint = nas_utils.create_checkpoint(conf_checkpoint, resume) trainer = Trainer(conf_train, model, checkpoint) train_metrics = trainer.fit(train_dl, test_dl) train_metrics.save(metric_filename) # save model if model_filename: model_filename = utils.full_path(model_filename) ml_utils.save_model(model, model_filename) logger.info({'model_save_path': model_filename}) logger.popd()
def train_test()->Metrics: conf = common.get_conf() conf_eval = conf['nas']['eval'] # region conf vars conf_loader = conf_eval['loader'] conf_trainer = conf_eval['trainer'] # endregion conf_trainer['validation']['freq']=1 conf_trainer['epochs'] = 1 conf_loader['train_batch'] = 128 conf_loader['test_batch'] = 4096 conf_loader['cutout'] = 0 conf_trainer['drop_path_prob'] = 0.0 conf_trainer['grad_clip'] = 0.0 conf_trainer['aux_weight'] = 0.0 Net = cifar10_models.resnet34 model = Net().to(torch.device('cuda')) # get data data_loaders = data.get_data(conf_loader) assert data_loaders.train_dl is not None and data_loaders.test_dl is not None trainer = Trainer(conf_trainer, model, None) trainer.fit(data_loaders) met = trainer.get_metrics() return met
def train_test(conf_eval: Config): logger = get_logger() # region conf vars conf_loader = conf_eval['loader'] conf_trainer = conf_eval['trainer'] # endregion conf_trainer['validation']['freq'] = 1 conf_trainer['epochs'] = 10 conf_loader['train_batch'] = 128 conf_loader['test_batch'] = 4096 conf_loader['cutout'] = 0 conf_trainer['drop_path_prob'] = 0.0 conf_trainer['grad_clip'] = 0.0 conf_trainer['aux_weight'] = 0.0 device = torch.device(conf_eval['device']) Net = cifar10_models.resnet34 model = Net().to(device) # get data train_dl, _, test_dl = data.get_data(conf_loader) assert train_dl is not None and test_dl is not None trainer = Trainer(conf_trainer, model, device, None, False) trainer.fit(train_dl, test_dl)
def train_model_desc(self, model_desc:ModelDesc, conf_train:Config)\ ->Optional[ModelMetrics]: """Train given description""" # region conf vars conf_trainer = conf_train['trainer'] conf_loader = conf_train['loader'] trainer_title = conf_trainer['title'] epochs = conf_trainer['epochs'] drop_path_prob = conf_trainer['drop_path_prob'] # endregion # if epochs ==0 then nothing to train, so save time if epochs <= 0: return None logger.pushd(trainer_title) model = Model(model_desc, droppath=drop_path_prob > 0.0, affine=True) # get data data_loaders = self.get_data(conf_loader) trainer = Trainer(conf_trainer, model, checkpoint=None) train_metrics = trainer.fit(data_loaders) logger.popd() return ModelMetrics(model, train_metrics)
def train_model(self, conf_train: Config, model: nn.Module, checkpoint: Optional[CheckPoint]) -> Metrics: conf_loader = conf_train['loader'] conf_train = conf_train['trainer'] # get data train_dl, test_dl = self.get_data(conf_loader) trainer = Trainer(conf_train, model, checkpoint) train_metrics = trainer.fit(train_dl, test_dl) return train_metrics
def train_test(conf_eval: Config): conf_loader = conf_eval['loader'] conf_trainer = conf_eval['trainer'] # create model Net = cifar10_models.resnet34 model = Net().to(torch.device('cuda', 0)) # get data train_dl, _, test_dl = data.get_data(conf_loader) # train! trainer = Trainer(conf_trainer, model) trainer.fit(train_dl, test_dl)
def main(): #6, 7, 9, 10, 16 #model = model_builder.build(model_builder.EXAMPLE_DESC_MATRIX, model_builder.EXAMPLE_VERTEX_OPS) nsds = Nasbench101Dataset('~/dataroot/nasbench_ds/nasbench_full.pkl') conf = common_init(config_filepath='confs/algos/nasbench101.yaml') conf_eval = conf['nas']['eval'] conf_loader = conf_eval['loader'] conf_trainer = conf_eval['trainer'] model = nsds.create_model(5) # 401277 is same model as example data_loaders = data.get_data(conf_loader) trainer = Trainer(conf_trainer, model) trainer.fit(data_loaders)