Esempio n. 1
0
def eval_arch(conf_eval: Config, cell_builder: Optional[CellBuilder]):
    logger.pushd('eval_arch')

    # region conf vars
    conf_loader = conf_eval['loader']
    model_filename = conf_eval['model_filename']
    metric_filename = conf_eval['metric_filename']
    conf_checkpoint = conf_eval['checkpoint']
    resume = conf_eval['resume']
    conf_train = conf_eval['trainer']
    # endregion

    if cell_builder:
        cell_builder.register_ops()

    model = create_model(conf_eval)

    # get data
    train_dl, _, test_dl = data.get_data(conf_loader)
    assert train_dl is not None and test_dl is not None

    checkpoint = nas_utils.create_checkpoint(conf_checkpoint, resume)
    trainer = Trainer(conf_train, model, checkpoint)
    train_metrics = trainer.fit(train_dl, test_dl)
    train_metrics.save(metric_filename)

    # save model
    if model_filename:
        model_filename = utils.full_path(model_filename)
        ml_utils.save_model(model, model_filename)

    logger.info({'model_save_path': model_filename})

    logger.popd()
Esempio n. 2
0
def train_test()->Metrics:
    conf = common.get_conf()
    conf_eval = conf['nas']['eval']

    # region conf vars
    conf_loader       = conf_eval['loader']
    conf_trainer = conf_eval['trainer']
    # endregion

    conf_trainer['validation']['freq']=1
    conf_trainer['epochs'] = 1
    conf_loader['train_batch'] = 128
    conf_loader['test_batch'] = 4096
    conf_loader['cutout'] = 0
    conf_trainer['drop_path_prob'] = 0.0
    conf_trainer['grad_clip'] = 0.0
    conf_trainer['aux_weight'] = 0.0

    Net = cifar10_models.resnet34
    model = Net().to(torch.device('cuda'))

    # get data
    data_loaders = data.get_data(conf_loader)
    assert data_loaders.train_dl is not None and data_loaders.test_dl is not None

    trainer = Trainer(conf_trainer, model, None)
    trainer.fit(data_loaders)
    met = trainer.get_metrics()
    return met
Esempio n. 3
0
 def get_data(self, conf_loader:Config)->Tuple[Optional[DataLoader], Optional[DataLoader]]:
     # first get from cache
     train_ds, val_ds = self._data_cache.get(id(conf_loader), (None, None))
     # if not found in cache then create
     if train_ds is None:
         train_ds, val_ds, _ = data.get_data(conf_loader)
         self._data_cache[id(conf_loader)] = (train_ds, val_ds)
     return train_ds, val_ds
Esempio n. 4
0
def train_test(conf_eval: Config):
    conf_loader = conf_eval['loader']
    conf_trainer = conf_eval['trainer']

    # create model
    Net = cifar10_models.resnet34
    model = Net().to(torch.device('cuda', 0))

    # get data
    train_dl, _, test_dl = data.get_data(conf_loader)

    # train!
    trainer = Trainer(conf_trainer, model)
    trainer.fit(train_dl, test_dl)
Esempio n. 5
0
    def get_data(self, conf_loader:Config)->Tuple[Optional[DataLoader], Optional[DataLoader]]:

        # this dict caches the dataset objects per dataset config so we don't have to reload
        # the reason we do dynamic attribute is so that any dependent methods
        # can do ray.remote
        if not hasattr(self, '_data_cache'):
            self._data_cache = {}

        # first get from cache
        train_ds, val_ds = self._data_cache.get(id(conf_loader), (None, None))
        # if not found in cache then create
        if train_ds is None:
            train_ds, val_ds, _ = data.get_data(conf_loader)
            self._data_cache[id(conf_loader)] = (train_ds, val_ds)
        return train_ds, val_ds
Esempio n. 6
0
    def get_data(self, conf_loader: Config) -> data.DataLoaders:

        # this dict caches the dataset objects per dataset config so we don't have to reload
        # the reason we do dynamic attribute is so that any dependent methods
        # can do ray.remote
        if not hasattr(self, '_data_cache'):
            self._data_cache: Dict[int, data.DataLoaders] = {}

        # first get from cache
        if id(conf_loader) in self._data_cache:
            data_loaders = self._data_cache[id(conf_loader)]
        else:
            data_loaders = data.get_data(conf_loader)
            self._data_cache[id(conf_loader)] = data_loaders

        return data_loaders
Esempio n. 7
0
def main():
    #6, 7, 9, 10, 16

    #model = model_builder.build(model_builder.EXAMPLE_DESC_MATRIX, model_builder.EXAMPLE_VERTEX_OPS)
    nsds = Nasbench101Dataset('~/dataroot/nasbench_ds/nasbench_full.pkl')
    conf = common_init(config_filepath='confs/algos/nasbench101.yaml')
    conf_eval = conf['nas']['eval']
    conf_loader = conf_eval['loader']
    conf_trainer = conf_eval['trainer']

    model = nsds.create_model(5)  # 401277 is same model as example

    data_loaders = data.get_data(conf_loader)

    trainer = Trainer(conf_trainer, model)
    trainer.fit(data_loaders)
Esempio n. 8
0
    def finalize_model(self,
                       model: Model,
                       to_cpu=True,
                       restore_device=True) -> ModelDesc:

        logger.pushd('finalize')

        # get config and train data loader
        # TODO: confirm this is correct in case you get silent bugs
        conf = get_conf()
        conf_loader = conf['nas']['search']['loader']
        train_dl, val_dl, test_dl = get_data(conf_loader)

        # wrap all cells in the model
        self._divnas_cells: Dict[int, Divnas_Cell] = {}
        for _, cell in enumerate(model.cells):
            divnas_cell = Divnas_Cell(cell)
            self._divnas_cells[id(cell)] = divnas_cell

        # go through all edges in the DAG and if they are of divop
        # type then set them to collect activations
        sigma = conf['nas']['search']['divnas']['sigma']
        for _, dcell in enumerate(self._divnas_cells.values()):
            dcell.collect_activations(DivOp, sigma)

        # now we need to run one evaluation epoch to collect activations
        # we do it on cpu otherwise we might run into memory issues
        # later we can redo the whole logic in pytorch itself
        # at the end of this each node in a cell will have the covariance
        # matrix of all incoming edges' ops
        model = model.cpu()
        model.eval()
        with torch.no_grad():
            for _ in range(1):
                for _, (x, _) in enumerate(train_dl):
                    _, _ = model(x), None
                    # now you can go through and update the
                    # node covariances in every cell
                    for dcell in self._divnas_cells.values():
                        dcell.update_covs()

        logger.popd()

        return super().finalize_model(model, to_cpu, restore_device)
Esempio n. 9
0
def train_test(conf_eval:Config):
    # region conf vars
    conf_loader       = conf_eval['loader']
    conf_trainer = conf_eval['trainer']
    # endregion

    conf_trainer['validation']['freq']=1
    conf_trainer['epochs'] = 10
    conf_loader['train_batch'] = 128
    conf_loader['test_batch'] = 4096
    conf_loader['cutout'] = 0
    conf_trainer['drop_path_prob'] = 0.0
    conf_trainer['grad_clip'] = 0.0
    conf_trainer['aux_weight'] = 0.0

    Net = cifar10_models.resnet34
    model = Net().to(torch.device('cuda', 0))

    # get data
    train_dl, _, test_dl = data.get_data(conf_loader)
    assert train_dl is not None and test_dl is not None

    trainer = Trainer(conf_trainer, model, None)
    trainer.fit(train_dl, test_dl)
Esempio n. 10
0
def imagenet_test():
    conf = Config('confs/algos/darts.yaml;confs/datasets/imagenet.yaml', )
    conf_loader = conf['nas']['eval']['loader']
    dl_train, *_ = data.get_data(conf_loader)
Esempio n. 11
0
import logging

from archai.datasets import data
from archai.common import utils
from archai.common.timing import MeasureTime, print_all_timings, print_timing, get_timing
from archai.common.common import logger, common_init

conf = common_init(config_filepath='confs/algos/darts.yaml',
                   param_args=['--common.experiment_name', 'restnet_test'])

conf_eval = conf['nas']['eval']
conf_loader = conf_eval['loader']
conf_loader['train_batch'] = 512
conf_loader['test_batch'] = 4096
conf_loader['cutout'] = 0
train_dl, _, test_dl = data.get_data(conf_loader)


@MeasureTime
def iter_dl(dl):
    dummy = 0.0
    for x, y in train_dl:
        x = x.cuda()
        y = y.cuda()
        dummy += len(x)
    # dummy += len(y)
    return dummy


logging.info(f'batch_cout={len(train_dl)}')
Esempio n. 12
0
import logging

from archai.datasets import data
from archai.common import utils
from archai.common.timing import MeasureTime, print_all_timings, print_timing, get_timing
from archai.common.common import logger, common_init

conf = common_init(config_filepath='confs/algos/darts.yaml',
                   param_args=['--common.experiment_name', 'restnet_test'])

conf_eval = conf['nas']['eval']
conf_loader = conf_eval['loader']
conf_loader['train_batch'] = 512
conf_loader['test_batch'] = 4096
conf_loader['cutout'] = 0
data_loaders = data.get_data(conf_loader)
assert data_loaders.train_dl is not None


@MeasureTime
def iter_dl(dl):
    dummy = 0.0
    for x, y in dl:
        x = x.cuda()
        y = y.cuda()
        dummy += len(x)
    # dummy += len(y)
    return dummy


logging.info(f'batch_count={len(data_loaders.train_dl)}')