Beispiel #1
0
    def _train_desc(self, model_desc:ModelDesc, conf_train:Config)->MetricsStats:
        """Train given description"""
        # region conf vars
        conf_trainer = conf_train['trainer']
        conf_loader = conf_train['loader']
        trainer_title = conf_trainer['title']
        epochs = conf_trainer['epochs']
        drop_path_prob = conf_trainer['drop_path_prob']
        # endregion

        logger.pushd(trainer_title)

        if epochs == 0:
            # nothing to pretrain, save time
            metrics_stats = MetricsStats(model_desc, None, None)
        else:
            model = nas_utils.model_from_desc(model_desc,
                                              droppath=drop_path_prob>0.0,
                                              affine=True)

            # get data
            train_dl, val_dl = self.get_data(conf_loader)
            assert train_dl is not None

            trainer = Trainer(conf_trainer, model, checkpoint=None)
            train_metrics = trainer.fit(train_dl, val_dl)

            metrics_stats = Search._create_metrics_stats(model, train_metrics, self.finalizers)

        logger.popd()
        return metrics_stats
Beispiel #2
0
def eval_arch(conf_eval: Config, cell_builder: Optional[CellBuilder]):
    logger.pushd('eval_arch')

    # region conf vars
    conf_loader = conf_eval['loader']
    model_filename = conf_eval['model_filename']
    metric_filename = conf_eval['metric_filename']
    conf_checkpoint = conf_eval['checkpoint']
    resume = conf_eval['resume']
    conf_train = conf_eval['trainer']
    # endregion

    if cell_builder:
        cell_builder.register_ops()

    model = create_model(conf_eval)

    # get data
    train_dl, _, test_dl = data.get_data(conf_loader)
    assert train_dl is not None and test_dl is not None

    checkpoint = nas_utils.create_checkpoint(conf_checkpoint, resume)
    trainer = Trainer(conf_train, model, checkpoint)
    train_metrics = trainer.fit(train_dl, test_dl)
    train_metrics.save(metric_filename)

    # save model
    if model_filename:
        model_filename = utils.full_path(model_filename)
        ml_utils.save_model(model, model_filename)

    logger.info({'model_save_path': model_filename})

    logger.popd()
Beispiel #3
0
def train_test()->Metrics:
    conf = common.get_conf()
    conf_eval = conf['nas']['eval']

    # region conf vars
    conf_loader       = conf_eval['loader']
    conf_trainer = conf_eval['trainer']
    # endregion

    conf_trainer['validation']['freq']=1
    conf_trainer['epochs'] = 1
    conf_loader['train_batch'] = 128
    conf_loader['test_batch'] = 4096
    conf_loader['cutout'] = 0
    conf_trainer['drop_path_prob'] = 0.0
    conf_trainer['grad_clip'] = 0.0
    conf_trainer['aux_weight'] = 0.0

    Net = cifar10_models.resnet34
    model = Net().to(torch.device('cuda'))

    # get data
    data_loaders = data.get_data(conf_loader)
    assert data_loaders.train_dl is not None and data_loaders.test_dl is not None

    trainer = Trainer(conf_trainer, model, None)
    trainer.fit(data_loaders)
    met = trainer.get_metrics()
    return met
Beispiel #4
0
def train_test(conf_eval: Config):
    logger = get_logger()

    # region conf vars
    conf_loader = conf_eval['loader']
    conf_trainer = conf_eval['trainer']
    # endregion

    conf_trainer['validation']['freq'] = 1
    conf_trainer['epochs'] = 10
    conf_loader['train_batch'] = 128
    conf_loader['test_batch'] = 4096
    conf_loader['cutout'] = 0
    conf_trainer['drop_path_prob'] = 0.0
    conf_trainer['grad_clip'] = 0.0
    conf_trainer['aux_weight'] = 0.0

    device = torch.device(conf_eval['device'])
    Net = cifar10_models.resnet34
    model = Net().to(device)

    # get data
    train_dl, _, test_dl = data.get_data(conf_loader)
    assert train_dl is not None and test_dl is not None

    trainer = Trainer(conf_trainer, model, device, None, False)
    trainer.fit(train_dl, test_dl)
Beispiel #5
0
    def train_model_desc(self, model_desc:ModelDesc, conf_train:Config)\
            ->Optional[ModelMetrics]:
        """Train given description"""

        # region conf vars
        conf_trainer = conf_train['trainer']
        conf_loader = conf_train['loader']
        trainer_title = conf_trainer['title']
        epochs = conf_trainer['epochs']
        drop_path_prob = conf_trainer['drop_path_prob']
        # endregion

        # if epochs ==0 then nothing to train, so save time
        if epochs <= 0:
            return None

        logger.pushd(trainer_title)

        model = Model(model_desc, droppath=drop_path_prob > 0.0, affine=True)

        # get data
        data_loaders = self.get_data(conf_loader)

        trainer = Trainer(conf_trainer, model, checkpoint=None)
        train_metrics = trainer.fit(data_loaders)

        logger.popd()

        return ModelMetrics(model, train_metrics)
Beispiel #6
0
    def train_model(self, conf_train: Config, model: nn.Module,
                    checkpoint: Optional[CheckPoint]) -> Metrics:
        conf_loader = conf_train['loader']
        conf_train = conf_train['trainer']

        # get data
        train_dl, test_dl = self.get_data(conf_loader)

        trainer = Trainer(conf_train, model, checkpoint)
        train_metrics = trainer.fit(train_dl, test_dl)
        return train_metrics
Beispiel #7
0
def train_test(conf_eval: Config):
    conf_loader = conf_eval['loader']
    conf_trainer = conf_eval['trainer']

    # create model
    Net = cifar10_models.resnet34
    model = Net().to(torch.device('cuda', 0))

    # get data
    train_dl, _, test_dl = data.get_data(conf_loader)

    # train!
    trainer = Trainer(conf_trainer, model)
    trainer.fit(train_dl, test_dl)
Beispiel #8
0
def main():
    #6, 7, 9, 10, 16

    #model = model_builder.build(model_builder.EXAMPLE_DESC_MATRIX, model_builder.EXAMPLE_VERTEX_OPS)
    nsds = Nasbench101Dataset('~/dataroot/nasbench_ds/nasbench_full.pkl')
    conf = common_init(config_filepath='confs/algos/nasbench101.yaml')
    conf_eval = conf['nas']['eval']
    conf_loader = conf_eval['loader']
    conf_trainer = conf_eval['trainer']

    model = nsds.create_model(5)  # 401277 is same model as example

    data_loaders = data.get_data(conf_loader)

    trainer = Trainer(conf_trainer, model)
    trainer.fit(data_loaders)