Esempio n. 1
0
def download():
    DOWNLOAD_URL = 'http://vision.stanford.edu/lijiali/event_dataset/event_dataset.rar'

    # make sport8 directory
    sport8 = utils.full_path(os.path.join(dataroot, 'sport8'))
    meta = utils.full_path(os.path.join(sport8, 'meta'))

    os.makedirs(sport8, exist_ok=True)
    os.makedirs(meta, exist_ok=True)

    dir_downloads = utils.dir_downloads()
    filename = os.path.basename(DOWNLOAD_URL)
    archive = os.path.join(dir_downloads, filename)
    if not os.path.isfile(archive):
        tvutils.download_url(DOWNLOAD_URL, dir_downloads, filename)
    print(f"Extracting {archive} to {sport8}")
    pyunpack.Archive(archive).extractall(sport8)

    # download the csv files for the train and test split
    # from 'NAS Evaluation is Frustrating' repo
    # note that download_url doesn't work in vscode debug mode
    test_file_url = 'https://raw.githubusercontent.com/antoyang/NAS-Benchmark/master/data/Sport8_test.csv'
    train_file_url = 'https://raw.githubusercontent.com/antoyang/NAS-Benchmark/master/data/Sport8_train.csv'

    tvutils.download_url(test_file_url, meta, filename=None, md5=None)
    tvutils.download_url(train_file_url, meta, filename=None, md5=None)

    return sport8, meta
Esempio n. 2
0
def main():
    in_dataset_file = utils.full_path(
        '~/dataroot/nasbench_ds/nasbench_full.tfrecord.pkl')
    out_dataset_file = utils.full_path(
        '~/dataroot/nasbench_ds/nasbench_full.pkl')

    stats: Dict[str, dict] = {}

    with open(in_dataset_file, 'rb') as f:
        records = pickle.load(f)

    for module_hash, epochs, raw_adjacency, raw_operations, raw_metrics in records:
        dim = int(np.sqrt(len(raw_adjacency)))
        adjacency = np.array([int(e) for e in list(raw_adjacency)],
                             dtype=np.int8)
        adjacency = np.reshape(adjacency, (dim, dim))
        operations = raw_operations.split(',')
        metrics = model_metrics_pb2.ModelMetrics.FromString(
            base64.b64decode(raw_metrics))

        if module_hash not in stats:
            stats[module_hash] = {
                'module_hash': module_hash,
                'module_adjacency': adjacency,
                'module_operations': operations,
                'trainable_parameters': metrics.trainable_parameters,
                'total_time': metrics.total_time,
                'metrics': {}
            }

        entry = stats[module_hash]
        assert entry['module_hash'] == module_hash
        #assert entry['module_adjacency'] == adjacency
        assert entry['module_operations'] == operations
        assert entry['trainable_parameters'] == metrics.trainable_parameters

        if epochs not in entry['metrics']:
            entry['metrics'][epochs] = []
        entry['metrics'][epochs].append(
            [eval_to_dict(e) for e in metrics.evaluation_data])

    dataset = sorted(stats.values(),
                     key=lambda d: np.mean(
                         [r[-1]['test_accuracy'] for r in d['metrics'][108]]))
    for i, d in enumerate(dataset):
        d['rank'] = i

    odict = OrderedDict((d['module_hash'], d) for d in dataset)

    with open(out_dataset_file, 'wb') as f:
        pickle.dump(odict, f)
Esempio n. 3
0
def main():
    default_dir = r'D:\GitHubSrc\archaiphilly\phillytools\nasbench_darts_lr0.025_wd3_b128'

    parser = argparse.ArgumentParser(description='Pytorch cifar training')
    parser.add_argument('--in-dir', default=default_dir)
    parser.add_argument('--out-dir', default=default_dir)

    args = parser.parse_args()

    parsed_metrics = delimited_text.DelimitedText()

    in_dir = pathlib.Path(utils.full_path(args.in_dir))
    assert in_dir.exists(), f'Does not exist: {in_dir}'
    metrics_filepaths = in_dir.rglob('metrics*.tsv')

    for metrics_filepath in metrics_filepaths:
        text = metrics_filepath.read_text()
        parsed_metrics.add_from_text(text, has_header=True)

    assert len(parsed_metrics) >= 1

    model_nums = [int(r) for r in parsed_metrics.get_col('model_name')]
    nasbench_acc = [
        statistics.mean(literal_eval(r))
        for r in parsed_metrics.get_col('nasbenc101_test_acc')
    ]
    retrain_acc = [float(r) for r in parsed_metrics.get_col('test_acc')]

    stats = list(zip(model_nums, nasbench_acc, retrain_acc))
    stats.sort(key=lambda t: t[0])

    retrain_ranks = utils.get_ranks(stats, key=lambda t: t[2])
    stats = list(
        (i, rr, *t) for i, (t, rr) in enumerate(zip(stats, retrain_ranks)))

    corr = scipy.stats.pearsonr([t[0] for t in stats], [t[1] for t in stats])

    out_metrics = delimited_text.DelimitedText()
    out_metrics.add_from_cols_list(stats,
                                   header=[
                                       'nasbench_rank', 'rerank', 'model_num',
                                       'nasbench_acc', 'retrain_acc'
                                   ])

    rerank_filepath = os.path.join(utils.full_path(args.out_dir),
                                   'reranking.tsv')
    out_metrics.save(rerank_filepath)

    corr_filepath = os.path.join(utils.full_path(args.out_dir), 'corr.txt')
    utils.write_string(corr_filepath, str(corr))
Esempio n. 4
0
    def _copy_final_desc(self, search_conf) -> Tuple[Config, Config]:
        # get desc file path that search has produced
        search_desc_filename = search_conf['nas']['search'][
            'final_desc_filename']
        search_desc_filepath = utils.full_path(search_desc_filename)
        assert search_desc_filepath and os.path.exists(search_desc_filepath)

        # get file path that eval would need
        eval_conf = self._init('eval')
        eval_desc_filename = eval_conf['nas']['eval']['final_desc_filename']
        eval_desc_filepath = utils.full_path(eval_desc_filename)
        assert eval_desc_filepath
        shutil.copy2(search_desc_filepath, eval_desc_filepath)

        return search_conf, eval_conf
Esempio n. 5
0
 def _draw_model(self) -> None:
     if not self._plotsdir:
         return
     train_metrics = self.get_metrics()
     if train_metrics:
         best_train, best_val, best_test = train_metrics.run_metrics.best_epoch(
         )
         # if test is available and is best for this epoch then mark it as best
         is_best = best_test and best_test.index == train_metrics.cur_epoch(
         ).index
         # if val is available and is best for this epoch then mark it as best
         is_best = is_best or best_val and best_val.index == train_metrics.cur_epoch(
         ).index
         # if neither val or test availavle then use train metrics
         is_best = is_best or best_train.index == train_metrics.cur_epoch(
         ).index
         if is_best:
             # log model_desc as a image
             plot_filepath = utils.full_path(os.path.join(
                 self._plotsdir,
                 f"EP{train_metrics.cur_epoch().index:03d}"),
                                             create=True)
             draw_model_desc(
                 self.model.finalize(),
                 filepath=plot_filepath,
                 caption=f"Epoch {train_metrics.cur_epoch().index}")
Esempio n. 6
0
def eval_arch(conf_eval: Config, cell_builder: Optional[CellBuilder]):
    logger.pushd('eval_arch')

    # region conf vars
    conf_loader = conf_eval['loader']
    model_filename = conf_eval['model_filename']
    metric_filename = conf_eval['metric_filename']
    conf_checkpoint = conf_eval['checkpoint']
    resume = conf_eval['resume']
    conf_train = conf_eval['trainer']
    # endregion

    if cell_builder:
        cell_builder.register_ops()

    model = create_model(conf_eval)

    # get data
    train_dl, _, test_dl = data.get_data(conf_loader)
    assert train_dl is not None and test_dl is not None

    checkpoint = nas_utils.create_checkpoint(conf_checkpoint, resume)
    trainer = Trainer(conf_train, model, checkpoint)
    train_metrics = trainer.fit(train_dl, test_dl)
    train_metrics.save(metric_filename)

    # save model
    if model_filename:
        model_filename = utils.full_path(model_filename)
        ml_utils.save_model(model, model_filename)

    logger.info({'model_save_path': model_filename})

    logger.popd()
Esempio n. 7
0
def untar_dataset(conf_name: str, pt_data_dir: str, conf_data: Config,
                  dataroot: str) -> None:
    if 'storage_name' not in conf_data or not conf_data['storage_name']:
        print(
            f'data config {conf_name} ignored because storage_name key was not found or not set'
        )
        return

    print(f'Untaring for data config: {conf_name}')

    storage_name = conf_data['storage_name']
    tar_filepath = os.path.join(pt_data_dir, storage_name + '.tar')
    if not os.path.isfile(tar_filepath):
        raise RuntimeError(
            f'Tar file for dataset at {tar_filepath} was not found')

    tar_size = pathlib.Path(tar_filepath).stat().st_size
    print('tar_filepath:', tar_filepath, 'tar_size:', tar_size)

    local_dataroot = utils.full_path(dataroot)
    print('local_dataroot:', local_dataroot)
    _create_ram_disk(tar_size, local_dataroot)
    # os.makedirs(local_dataroot, exist_ok=True)

    utils.exec_shell_command(f'tar -xf "{tar_filepath}" -C "{local_dataroot}"')

    print(
        f'dataset copied from {tar_filepath} to {local_dataroot} sucessfully')
Esempio n. 8
0
    def copy_search_to_eval(self)->None:
        # do not cache conf_search or conf_eval as it may have values that
        # needs env var expansion.

        # get desc file path that search has produced
        conf_search = self.get_conf(True)['nas']['search']
        search_desc_filename = conf_search['final_desc_filename']
        search_desc_filepath = utils.full_path(search_desc_filename)
        assert search_desc_filepath and os.path.exists(search_desc_filepath)

        # get file path that eval would need
        conf_eval = self.get_conf(False)['nas']['eval']
        eval_desc_filename = conf_eval['final_desc_filename']
        eval_desc_filepath = utils.full_path(eval_desc_filename)
        assert eval_desc_filepath
        utils.copy_file(search_desc_filepath, eval_desc_filepath)
Esempio n. 9
0
    def evaluate(self, conf_eval: Config,
                 model_desc_builder: ModelDescBuilder) -> EvalResult:
        logger.pushd('eval_arch')

        # region conf vars
        conf_checkpoint = conf_eval['checkpoint']
        resume = conf_eval['resume']

        model_filename = conf_eval['model_filename']
        metric_filename = conf_eval['metric_filename']
        # endregion

        model = self.create_model(conf_eval, model_desc_builder)

        checkpoint = nas_utils.create_checkpoint(conf_checkpoint, resume)
        train_metrics = self.train_model(conf_eval, model, checkpoint)
        train_metrics.save(metric_filename)

        # save model
        if model_filename:
            model_filename = utils.full_path(model_filename)
            ml_utils.save_model(model, model_filename)

        logger.info({'model_save_path': model_filename})

        logger.popd()

        return EvalResult(train_metrics)
Esempio n. 10
0
def main():
    parser = argparse.ArgumentParser(description='Archai data install')
    parser.add_argument('--dataroot',
                        type=str,
                        default='~/dataroot',
                        help='path to dataroot on local drive')
    parser.add_argument(
        '--dataset',
        type=str,
        default='cifar10',
        help=
        'Name of the dataset for which confs/dataset/name.yaml should exist and have name of folder or tar file it resides in'
    )
    args, extra_args = parser.parse_known_args()

    pt_data_dir = os.environ.get('PT_DATA_DIR', '')
    if not pt_data_dir:
        raise RuntimeError(
            'This script needs PT_DATA_DIR environment variable with path to dataroot on cloud drive'
        )
    pt_data_dir = utils.full_path(pt_data_dir)
    print('pt_data_dir:', pt_data_dir)

    conf_data_filepath = f'confs/datasets/{args.dataset}.yaml'
    print('conf_data_filepath:', conf_data_filepath)

    conf = Config(config_filepath=conf_data_filepath)
    for dataset_key in ['dataset', 'dataset_search', 'dataset_eval']:
        if dataset_key in conf:
            conf_data = conf[dataset_key]
            untar_dataset(dataset_key, pt_data_dir, conf_data, args.dataroot)
Esempio n. 11
0
    def _save_trained(self, reductions:int, cells:int, nodes:int,
                      search_iter:int,
                      metrics_stats:MetricsStats)->None:
        """Save the model and metric info into a log file"""

        # construct path where we will save
        subdir = utils.full_path(self.metrics_dir.format(**vars()), create=True)

        # save metric_infi
        metrics_stats_filepath = os.path.join(subdir, 'metrics_stats.yaml')
        if metrics_stats_filepath:
            with open(metrics_stats_filepath, 'w') as f:
                yaml.dump(metrics_stats, f)

        # save just metrics separately
        metrics_filepath = os.path.join(subdir, 'metrics.yaml')
        if metrics_filepath:
            with open(metrics_filepath, 'w') as f:
                yaml.dump(metrics_stats.train_metrics, f)

        logger.info({'metrics_stats_filepath': metrics_stats_filepath,
                     'metrics_filepath': metrics_filepath})

        # append key info in root pareto data
        if self._parito_filepath:
            train_top1 = val_top1 = train_epoch = val_epoch = math.nan
            # extract metrics
            if metrics_stats.train_metrics:
                best_metrics = metrics_stats.train_metrics.run_metrics.best_epoch()
                train_top1 = best_metrics[0].top1.avg
                train_epoch = best_metrics[0].index
                if best_metrics[1]:
                    val_top1 = best_metrics[1].top1.avg if len(best_metrics)>1 else math.nan
                    val_epoch = best_metrics[1].index if len(best_metrics)>1 else math.nan

            # extract model stats
            if metrics_stats.model_stats:
                flops = metrics_stats.model_stats.Flops
                parameters = metrics_stats.model_stats.parameters
                inference_memory = metrics_stats.model_stats.inference_memory
                inference_duration = metrics_stats.model_stats.duration
            else:
                flops = parameters = inference_memory = inference_duration = math.nan

            utils.append_csv_file(self._parito_filepath, [
                ('reductions', reductions),
                ('cells', cells),
                ('nodes', nodes),
                ('search_iter', search_iter),
                ('train_top1', train_top1),
                ('train_epoch', train_epoch),
                ('val_top1', val_top1),
                ('val_epoch', val_epoch),
                ('flops', flops),
                ('params', parameters),
                ('inference_memory', inference_memory),
                ('inference_duration', inference_duration)
                ])
Esempio n. 12
0
    def save_trained(self, conf_search: Config, reductions: int, cells: int,
                     nodes: int, model_metrics: ModelMetrics) -> None:
        """Save the model and metric info into a log file"""

        metrics_dir = conf_search['metrics_dir']

        # construct path where we will save
        subdir = utils.full_path(metrics_dir.format(**vars()), create=True)

        model_stats = nas_utils.get_model_stats(model_metrics.model)

        # save model_stats in its own file
        model_stats_filepath = os.path.join(subdir, 'model_stats.yaml')
        if model_stats_filepath:
            with open(model_stats_filepath, 'w') as f:
                yaml.dump(model_stats, f)

        # save just metrics separately for convinience
        metrics_filepath = os.path.join(subdir, 'metrics.yaml')
        if metrics_filepath:
            with open(metrics_filepath, 'w') as f:
                yaml.dump(model_stats.metrics, f)

        logger.info({
            'model_stats_filepath': model_stats_filepath,
            'metrics_filepath': metrics_filepath
        })

        # append key info in root pareto data
        if self._summary_filepath:
            train_top1 = val_top1 = train_epoch = val_epoch = math.nan
            # extract metrics
            if model_metrics.metrics:
                best_metrics = model_metrics.metrics.run_metrics.best_epoch()
                train_top1 = best_metrics[0].top1.avg
                train_epoch = best_metrics[0].index
                if best_metrics[1]:
                    val_top1 = best_metrics[1].top1.avg if len(
                        best_metrics) > 1 else math.nan
                    val_epoch = best_metrics[1].index if len(
                        best_metrics) > 1 else math.nan

            # extract model stats
            flops = model_stats.Flops
            parameters = model_stats.parameters
            inference_memory = model_stats.inference_memory
            inference_duration = model_stats.duration

            utils.append_csv_file(self._summary_filepath,
                                  [('reductions', reductions),
                                   ('cells', cells), ('nodes', nodes),
                                   ('train_top1', train_top1),
                                   ('train_epoch', train_epoch),
                                   ('val_top1', val_top1),
                                   ('val_epoch', val_epoch), ('flops', flops),
                                   ('params', parameters),
                                   ('inference_memory', inference_memory),
                                   ('inference_duration', inference_duration)])
Esempio n. 13
0
def main():
    in_dataset_file = utils.full_path(
        '~/dataroot/nasbench_ds/nasbench_full.tfrecord.pkl')
    out_dataset_file = utils.full_path(
        '~/dataroot/nasbench_ds/nasbench101_sample.tfrecord.pkl')

    with open(in_dataset_file, 'rb') as f:
        records = pickle.load(f)

    sampled_indices = set()
    adj_samples = 1000
    for i in [0, 4000, 40000, len(records) - 1 - adj_samples + 1]:
        sampled_indices = sampled_indices.union(
            [i + k for k in range(adj_samples)])

    sampled_hashes = set(records[i][0] for i in sorted(list(sampled_indices)))
    sampled = [r for r in records if r[0] in sampled_hashes]
    with open(out_dataset_file, 'wb') as f:
        pickle.dump(sampled, f)
Esempio n. 14
0
    def copy_search_to_eval(self) -> None:
        # get folder of model gallery that search has produced
        conf_search = self.get_conf(True)['nas']['search']
        search_desc_foldername = conf_search['final_desc_foldername']
        search_desc_folderpath = utils.full_path(search_desc_foldername)
        assert search_desc_foldername and os.path.exists(
            search_desc_folderpath)

        # get folder path that eval would need
        conf_eval = self.get_conf(False)['nas']['eval']
        eval_desc_foldername = conf_eval['final_desc_foldername']
        eval_desc_folderpath = utils.full_path(eval_desc_foldername)
        assert eval_desc_folderpath
        # only later version of shutil copytree has dirs_exists_ok option
        # so being robust to pre-existing directory
        if os.path.exists(eval_desc_folderpath):
            shutil.rmtree(eval_desc_folderpath)
        utils.copy_dir(search_desc_folderpath,
                       eval_desc_folderpath,
                       use_shutil=True)
Esempio n. 15
0
    def __init__(self, dataset_file, seed=None):
        self.config = config.build_config()
        random.seed(seed)

        dataset_file = utils.full_path(dataset_file)
        logging.info(f'Loading dataset from file "{dataset_file}"...')
        start = time.time()

        with open(dataset_file, 'rb') as f:
            self.data: OrderedDict[str, dict] = pickle.load(f)
        self.module_hashes = list(self.data.keys())

        elapsed = time.time() - start
        logging.info('Loaded dataset in %d seconds' % elapsed)
Esempio n. 16
0
    def save(self, filename: str, save_trainables=False) -> Optional[str]:
        if filename:
            filename = utils.full_path(filename)

            if save_trainables:
                state_dict = self.state_dict()
                pt_filepath = ModelDesc._pt_filepath(filename)
                torch.save(state_dict, pt_filepath)

            # save yaml
            cloned = self.clone()
            cloned.clear_trainables()
            utils.write_string(filename, yaml.dump(cloned))

        return filename
Esempio n. 17
0
def main():
    parser = argparse.ArgumentParser(description='Visualize model description')
    parser.add_argument('-f',
                        '--model-desc-file',
                        type=str,
                        default='models/final_model_desc5.yaml',
                        help='Model desc file')
    args, extra_args = parser.parse_known_args()

    model_desc_filepath = utils.full_path(args.model_desc_file)
    model_desc = ModelDesc.load(model_desc_filepath)

    out_file = pathlib.Path(model_desc_filepath).with_suffix('')

    draw_model_desc(model_desc, str(out_file))
Esempio n. 18
0
    def evaluate(self, conf_eval: Config,
                 model_desc_builder: ModelDescBuilder) -> EvalResult:
        """Takes a folder of model descriptions output by search process and
        trains them in a distributed manner using ray with 1 gpu"""

        logger.pushd('evaluate')

        final_desc_foldername: str = conf_eval['final_desc_foldername']

        # get list of model descs in the gallery folder
        final_desc_folderpath = utils.full_path(final_desc_foldername)
        files = [os.path.join(final_desc_folderpath, f) \
                for f in glob.glob(os.path.join(final_desc_folderpath, 'model_desc_*.yaml')) \
                    if os.path.isfile(os.path.join(final_desc_folderpath, f))]
        logger.info({'model_desc_files': len(files)})

        # to avoid all workers download datasets individually, let's do it before hand
        self._ensure_dataset_download(conf_eval)

        future_ids = []
        for model_desc_filename in files:
            future_id = EvaluaterPetridish._train_dist.remote(
                self, conf_eval, model_desc_builder, model_desc_filename,
                common.get_state())
            future_ids.append(future_id)

        # wait for all eval jobs to be finished
        ready_refs, remaining_refs = ray.wait(future_ids,
                                              num_returns=len(future_ids))

        # plot pareto curve of gallery of models
        hull_points = [ray.get(ready_ref) for ready_ref in ready_refs]
        save_hull(hull_points, common.get_expdir())
        plot_pool(hull_points, common.get_expdir())

        best_point = max(hull_points, key=lambda p: p.metrics.best_val_top1())
        logger.info({
            'best_val_top1': best_point.metrics.best_val_top1(),
            'best_MAdd': best_point.model_stats.MAdd
        })

        logger.popd()

        return EvalResult(best_point.metrics)
Esempio n. 19
0
    def load(filename:str, load_trainables=False)->'ModelDesc':
        filename = utils.full_path(filename)
        if not filename or not os.path.exists(filename):
            raise RuntimeError("Model description file is not found."
                "Typically this file should be generated from the search."
                "Please copy this file to '{}'".format(filename))

        logger.info({'final_desc_filename': filename})
        with open(filename, 'r') as f:
            model_desc = yaml.load(f, Loader=yaml.Loader)

        if load_trainables:
            # look for pth file that should have pytorch parameters state_dict
            pt_filepath = ModelDesc._pt_filepath(filename)
            if os.path.exists(pt_filepath):
                state_dict = torch.load(pt_filepath, map_location=torch.device('cpu'))
                model_desc.load_state_dict(state_dict)
            # else no need to restore weights

        return model_desc
Esempio n. 20
0
    def __init__(self, conf_search:Config, cell_builder:Optional[CellBuilder],
                 trainer_class:TArchTrainer, finalizers:Finalizers) -> None:
        # region config vars
        conf_checkpoint = conf_search['checkpoint']
        resume = conf_search['resume']
        self.conf_model_desc = conf_search['model_desc']
        self.conf_loader = conf_search['loader']
        self.conf_train = conf_search['trainer']
        self.final_desc_filename = conf_search['final_desc_filename']
        self.full_desc_filename = conf_search['full_desc_filename']
        self.metrics_dir = conf_search['metrics_dir']
        self.conf_presearch_train = conf_search['seed_train']
        self.conf_postsearch_train = conf_search['post_train']
        conf_pareto = conf_search['pareto']
        self.base_cells = self.conf_model_desc['n_cells']
        self.max_cells = conf_pareto['max_cells']
        self.base_reductions = self.conf_model_desc['n_reductions']
        self.max_reductions = conf_pareto['max_reductions']
        self.base_nodes = self.conf_model_desc['n_nodes']
        self.max_nodes = conf_pareto['max_nodes']
        self.search_iters = conf_search['search_iters']
        self.pareto_enabled = conf_pareto['enabled']
        pareto_summary_filename = conf_pareto['summary_filename']
        # endregion

        self.cell_builder = cell_builder
        self.trainer_class = trainer_class
        self.finalizers = finalizers
        self._data_cache = {}
        self._parito_filepath = utils.full_path(pareto_summary_filename)
        self._checkpoint = nas_utils.create_checkpoint(conf_checkpoint, resume)

        logger.info({'pareto_enabled': self.pareto_enabled,
                     'base_reductions': self.base_reductions,
                     'base_cells': self.base_cells,
                     'base_nodes': self.base_nodes,
                     'max_reductions': self.max_reductions,
                     'max_cells': self.max_cells,
                     'max_nodes': self.max_nodes
                     })
Esempio n. 21
0
    def restore_checkpoint(self, conf_search:Config, macro_combinations)\
            ->Tuple[int, Optional[SearchResult]]:

        conf_pareto = conf_search['pareto']
        pareto_summary_filename = conf_pareto['summary_filename']

        summary_filepath = utils.full_path(pareto_summary_filename)

        # if checkpoint is available then restart from last combination we were running
        checkpoint_avail = self._checkpoint is not None
        resumed, state = False, None
        start_macro_i, best_result = 0, None
        if checkpoint_avail:
            state = self._checkpoint.get('search', None)
            if state is not None:
                start_macro_i = state['start_macro_i']
                assert start_macro_i >= 0 and start_macro_i < len(
                    macro_combinations)

                best_result = yaml.load(state['best_result'],
                                        Loader=yaml.Loader)

                start_macro_i += 1  # resume after the last checkpoint
                resumed = True

        if not resumed:
            # erase previous file left over from run
            utils.zero_file(summary_filepath)

        logger.warn({
            'resumed': resumed,
            'checkpoint_avail': checkpoint_avail,
            'checkpoint_val': state is not None,
            'start_macro_i': start_macro_i,
            'total_macro_combinations': len(macro_combinations)
        })
        return start_macro_i, best_result
Esempio n. 22
0
def get_filepath(suffix):
    conf = common_init(config_filepath='confs/algos/darts.yaml',
        param_args=['--common.experiment_name', 'test_basename' + f'_{suffix}'
                    ])
    return utils.full_path(os.path.join('$expdir' ,'somefile.txt'))
Esempio n. 23
0
import tensorflow as tf
import json, base64
import numpy as np
#import model_metrics_pb2
import pickle

from archai.common import utils

dataset_file = utils.full_path(
    '~/dataroot/nasbench_ds/nasbench_only108.tfrecord')

records = []
for serialized_row in tf.python_io.tf_record_iterator(dataset_file):
    module_hash, epochs, raw_adjacency, raw_operations, raw_metrics = (
        json.loads(serialized_row.decode('utf-8')))
    #   dim = int(np.sqrt(len(raw_adjacency)))
    #   adjacency = np.array([int(e) for e in list(raw_adjacency)], dtype=np.int8)
    #   adjacency = np.reshape(adjacency, (dim, dim))
    #   operations = raw_operations.split(',')
    #   metrics = base64.b64decode(raw_metrics)
    records.append(
        (module_hash, epochs, raw_adjacency, raw_operations, raw_metrics))

with open(dataset_file + '.pkl', 'wb') as f:
    pickle.dump(records, f)
Esempio n. 24
0
def main():
    # accept search and eval scripts to run
    # config file can be supplied using --config
    parser = argparse.ArgumentParser(description='NAS E2E Runs')
    parser.add_argument('--search-script',
                        type=str,
                        default='scripts/darts/cifar_search.py',
                        help='Search script to run')
    parser.add_argument('--eval-script',
                        type=str,
                        default='scripts/darts/cifar_eval.py',
                        help='Eval script to run')
    parser.add_argument('--exp_prefix',
                        type=str,
                        default='darts',
                        help='Experiment prefix to use')
    args, extra_args = parser.parse_known_args()

    # load config to some of the settings like logdir
    conf = common_init(use_args=True)
    logdir = get_conf_common()['logdir']
    assert logdir

    # get script, resume flag and experiment dir for search
    search_script = args.search_script
    resume = conf['nas']['search']['resume']
    search_script = utils.full_path(search_script.strip())
    experiment_name = args.exp_prefix + '_' + Path(search_script).stem
    experiment_dir = os.path.join(logdir, experiment_name)

    # see if search has already produced the output
    final_desc_filepath = os.path.join(
        experiment_dir, conf['nas']['search']['final_desc_filename'])
    if not resume or not os.path.exists(final_desc_filepath):
        print(f'Starting {search_script}...')
        result = subprocess.run([
            'python', search_script, '--config', conf.config_filepath,
            '--config-defaults', conf.config_defaults_filepath,
            '--common.experiment_name', experiment_name
        ])
        print(f'Script {search_script} returned {result.returncode}')
        if result.returncode != 0:
            exit(result.returncode)
    else:
        print(
            f'Search is skipped because file {final_desc_filepath} already exists'
        )

    # get script, resume flag and experiment dir for eval
    eval_script = args.eval_script
    resume = conf['nas']['eval']['resume']
    eval_script = utils.full_path(eval_script.strip())
    experiment_name = args.exp_prefix + '_' + Path(eval_script).stem
    experiment_dir = os.path.join(logdir, experiment_name)

    # if eval has already produced the output, skip eval run
    model_filepath = os.path.join(experiment_dir,
                                  conf['nas']['eval']['save_filename'])
    if not resume or not os.path.exists(model_filepath):
        # copy output of search to eval folder
        # TODO: take final_desc_filename from eval config
        os.makedirs(experiment_dir, exist_ok=True)
        shutil.copy2(final_desc_filepath, experiment_dir)

        print(f'Starting {eval_script}...')
        result = subprocess.run([
            'python', eval_script, '--config', conf.config_filepath,
            '--config-defaults', conf.config_defaults_filepath,
            '--common.experiment_name', experiment_name
        ])
        print(f'Script {eval_script} returned {result.returncode}')
        if result.returncode != 0:
            exit(result.returncode)
    else:
        print(f'Eval is skipped because file {model_filepath} already exists')
    print('Search and eval done.')
    exit(0)
Esempio n. 25
0
 def __init__(self, conf_dataset: Config):
     super().__init__(conf_dataset)
     self._dataroot = utils.full_path(conf_dataset['dataroot'])
Esempio n. 26
0
def main():
    parser = argparse.ArgumentParser(description='Report creator')
    parser.add_argument(
        '--results-dir',
        '-d',
        type=str,
        #default=r'D:\GitHubSrc\archaiphilly\phillytools\darts_baseline_20200411',
        default=r'~/logdir/report_test',
        help='folder with experiment results from pt')
    parser.add_argument('--out-dir',
                        '-o',
                        type=str,
                        default=r'~/logdir/reports',
                        help='folder to output reports')
    args, extra_args = parser.parse_known_args()

    # root dir where all results are stored
    results_dir = pathlib.Path(utils.full_path(args.results_dir))
    print(f'results_dir: {results_dir}')

    # extract experiment name which is top level directory
    exp_name = results_dir.parts[-1]

    # create results dir for experiment
    out_dir = utils.full_path(os.path.join(args.out_dir, exp_name))
    print(f'out_dir: {out_dir}')
    os.makedirs(out_dir, exist_ok=True)

    # get list of all structured logs for each job
    logs = {}
    job_count = 0
    for job_dir in results_dir.iterdir():
        job_count += 1
        for subdir in job_dir.iterdir():
            if not subdir.is_dir():
                continue
            # currently we expect that each job was ExperimentRunner job which should have
            # _search or _eval folders
            if subdir.stem.endswith('_search'):
                sub_job = 'search'
            elif subdir.stem.endswith('_eval'):
                sub_job = 'eval'
            else:
                raise RuntimeError(
                    f'Sub directory "{subdir}" in job "{job_dir}" must '
                    'end with either _search or _eval which '
                    'should be the case if ExperimentRunner was used.')

            logs_filepath = os.path.join(str(subdir), 'logs.yaml')
            if os.path.isfile(logs_filepath):
                fix_yaml(logs_filepath)
                with open(logs_filepath, 'r') as f:
                    key = job_dir.name + ':' + sub_job
                    logs[key] = yaml.load(f, Loader=yaml.Loader)

    # create list of epoch nodes having same path in the logs
    grouped_logs = group_multi_runs(logs)
    collated_grouped_logs = collect_epoch_nodes(grouped_logs)
    summary_text, details_text = '', ''

    for log_key, grouped_logs in collated_grouped_logs.items():
        # for each path for epochs nodes, compute stats
        for node_path, logs_epochs_nodes in grouped_logs.items():
            collated_epoch_stats = get_epoch_stats(node_path,
                                                   logs_epochs_nodes)
            summary_text += get_summary_text(log_key, out_dir, node_path,
                                             collated_epoch_stats,
                                             len(logs_epochs_nodes))
            details_text += get_details_text(log_key, out_dir, node_path,
                                             collated_epoch_stats,
                                             len(logs_epochs_nodes))

    write_report('summary.md', **vars())
    write_report('details.md', **vars())
Esempio n. 27
0
    def _train_dist(evaluater: Evaluater, conf_eval: Config,
                    model_desc_builder: ModelDescBuilder,
                    model_desc_filename: str, common_state) -> ConvexHullPoint:
        """Train given a model"""

        common.init_from(common_state)

        # region config vars
        conf_model_desc = conf_eval['model_desc']
        max_cells = conf_model_desc['n_cells']

        conf_checkpoint = conf_eval['checkpoint']
        resume = conf_eval['resume']

        conf_petridish = conf_eval['petridish']
        cell_count_scale = conf_petridish['cell_count_scale']
        #endregion

        #register ops as we are in different process now
        model_desc_builder.pre_build(conf_model_desc)

        model_filename = utils.append_to_filename(model_desc_filename,
                                                  '_model', '.pt')
        full_desc_filename = utils.append_to_filename(model_desc_filename,
                                                      '_full', '.yaml')
        metrics_filename = utils.append_to_filename(model_desc_filename,
                                                    '_metrics', '.yaml')
        model_stats_filename = utils.append_to_filename(
            model_desc_filename, '_model_stats', '.yaml')

        # create checkpoint for this specific model desc by changing the config
        checkpoint = None
        if conf_checkpoint is not None:
            conf_checkpoint['filename'] = model_filename.split(
                '.')[0] + '_checkpoint.pth'
            checkpoint = nas_utils.create_checkpoint(conf_checkpoint, resume)

            if checkpoint is not None and resume:
                if 'metrics_stats' in checkpoint:
                    # return the output we had recorded in the checkpoint
                    convex_hull_point = checkpoint['metrics_stats']
                    return convex_hull_point

        # template model is what we used during the search
        template_model_desc = ModelDesc.load(model_desc_filename)

        # we first scale this model by number of cells, keeping reductions same as in search
        n_cells = math.ceil(
            len(template_model_desc.cell_descs()) * cell_count_scale)
        n_cells = min(n_cells, max_cells)

        conf_model_desc = copy.deepcopy(conf_model_desc)
        conf_model_desc['n_cells'] = n_cells
        conf_model_desc[
            'n_reductions'] = n_reductions = template_model_desc.cell_type_count(
                CellType.Reduction)

        model_desc = model_desc_builder.build(conf_model_desc,
                                              template=template_model_desc)
        # save desc for reference
        model_desc.save(full_desc_filename)

        model = evaluater.model_from_desc(model_desc)

        train_metrics = evaluater.train_model(conf_eval, model, checkpoint)
        train_metrics.save(metrics_filename)

        # get metrics_stats
        model_stats = nas_utils.get_model_stats(model)
        # save metrics_stats
        with open(model_stats_filename, 'w') as f:
            yaml.dump(model_stats, f)

        # save model
        if model_filename:
            model_filename = utils.full_path(model_filename)
            ml_utils.save_model(model, model_filename)
            # TODO: Causes logging error at random times. Commenting out as stop-gap fix.
            # logger.info({'model_save_path': model_filename})

        hull_point = ConvexHullPoint(
            JobStage.EVAL_TRAINED,
            0,
            0,
            model_desc,
            (n_cells, n_reductions, len(model_desc.cell_descs()[0].nodes())),
            metrics=train_metrics,
            model_stats=model_stats)

        if checkpoint:
            checkpoint.new()
            checkpoint['metrics_stats'] = hull_point
            checkpoint.commit()

        return hull_point
Esempio n. 28
0
def main():
    parser = argparse.ArgumentParser(description='Pytorch cifar training')
    parser.add_argument('--experiment-name', '-n', default='train_pytorch')
    parser.add_argument('--experiment-description',
                        '-d',
                        default='Train cifar usin pure PyTorch code')
    parser.add_argument('--epochs', '-e', type=int, default=108)
    parser.add_argument('--model-name', '-m', default='5')
    parser.add_argument(
        '--device',
        default='',
        help='"cuda" or "cpu" or "" in which case use cuda if available')
    parser.add_argument('--train-batch-size', '-b', type=int, default=256)
    parser.add_argument('--test-batch-size', type=int, default=256)
    parser.add_argument('--seed', '-s', type=float, default=42)
    parser.add_argument('--half',
                        type=lambda x: x.lower() == 'true',
                        nargs='?',
                        const=True,
                        default=False)
    parser.add_argument('--cutout', type=int, default=0)
    parser.add_argument('--grad-clip', type=float, default=5.0)

    parser.add_argument(
        '--datadir',
        default='',
        help='where to find dataset files, default is ~/torchvision_data_dir')
    parser.add_argument('--outdir',
                        default='',
                        help='where to put results, default is ~/logdir')

    parser.add_argument(
        '--loader-workers',
        type=int,
        default=-1,
        help='number of thread/workers for data loader (-1 means auto)')

    args = parser.parse_args()

    if not args.datadir:
        args.datadir = common.default_dataroot()
    nsds_dir = args.datadir
    if os.environ.get('PT_DATA_DIR', ''):
        nsds_dir = os.environ.get('PT_DATA_DIR')
    if not args.outdir:
        args.outdir = os.environ.get('PT_OUTPUT_DIR', '')
        if not args.outdir:
            args.outdir = os.path.join('~/logdir', 'nasbench101',
                                       args.experiment_name)
    assert isinstance(nsds_dir, str)

    expdir = utils.full_path(args.outdir)
    os.makedirs(expdir, exist_ok=True)

    utils.setup_cuda(args.seed)
    datadir = utils.full_path(args.datadir)
    os.makedirs(datadir, exist_ok=True)

    utils.create_logger(filepath=os.path.join(expdir, 'logs.log'))

    # log config for reference
    logging.info(
        f'exp_name="{args.experiment_name}", exp_desc="{args.experiment_description}"'
    )
    logging.info(
        f'model_name="{args.model_name}", seed={args.seed}, epochs={args.epochs}'
    )
    logging.info(f'half={args.half}, cutout={args.cutout}')
    logging.info(f'datadir="{datadir}"')
    logging.info(f'expdir="{expdir}"')
    logging.info(f'train_batch_size={args.train_batch_size}')

    if args.device:
        device = torch.device(args.device)
    else:
        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    nsds = Nasbench101Dataset(
        os.path.join(nsds_dir, 'nasbench_ds', 'nasbench_full.pkl'))

    # load data just before train start so any errors so far is not delayed
    train_dl, val_dl, test_dl = get_data(
        datadir=datadir,
        train_batch_size=args.train_batch_size,
        test_batch_size=args.test_batch_size,
        train_num_workers=args.loader_workers,
        test_num_workers=args.loader_workers,
        cutout=args.cutout)

    model_id = int(args.model_name)  # 5, 401, 4001, 40001, 400001
    epochs = args.epochs

    net = create_model(nsds, model_id, device, args.half)
    crit = create_crit(device, args.half)
    optim, sched, sched_on_epoch = optim_sched_darts(
        net, epochs)  # optim_sched_darts optim_sched_paper

    train_metrics = train(epochs,
                          train_dl,
                          val_dl,
                          net,
                          device,
                          crit,
                          optim,
                          sched,
                          sched_on_epoch,
                          args.half,
                          False,
                          grad_clip=args.grad_clip)
    test_acc = test(net, test_dl, device, args.half)
    log_metrics(expdir, f'metrics_{model_id}', train_metrics, test_acc, args,
                nsds, model_id)