def __init__(self, data_dir, multi_fidelity=False, log_scale=True, negative=True, seed=None): optim = 0.04944576819737756 if log_scale: optim = np.log(optim) if negative: optim = -optim super(NAS101Cifar10, self).__init__(dim=None, optimum_location=None, optimal_val=optim, bounds=None) self.seed = seed self.multi_fidelity = multi_fidelity self.log_scale = log_scale if self.multi_fidelity: self.dataset = api.NASBench(os.path.join(data_dir, 'nasbench_full.tfrecord'), seed=0) else: self.dataset = api.NASBench( os.path.join(data_dir, 'nasbench_only108.tfrecord')) self.X = [] self.y_valid = [] self.y_test = [] self.costs = [] self.model_spec_list = [] self.negative = negative
def _init_nasbench(self): # the arch -> performances dataset self.base_dir = os.path.join(utils.get_awnas_dir("AWNAS_DATA", "data"), "nasbench-101") if self.multi_fidelity: self.nasbench = api.NASBench(os.path.join(self.base_dir, "nasbench_full.tfrecord")) else: self.nasbench = api.NASBench(os.path.join(self.base_dir, "nasbench_only108.tfrecord"))
def __init__(self, benchmark_name): self.benchmark_name = benchmark_name if benchmark_name.startswith('nasbench101'): if benchmark_name == 'nasbench101_full': self.bench = api.NASBench( "/home/zengxia6/research/NAS/nasbench_data/nasbench_full.tfrecord" ) else: self.bench = api.NASBench( "/home/zengxia6/research/NAS/nasbench_data/nasbench_only108.tfrecord" )
def random_models(argv): del argv # Unused LIMIT = 100 OUTPUT = './experiments/random_sampling.json' # Load the data from file (this will take some time) nasbench = api.NASBench(NASBENCH_TFRECORD) npEnc = api._NumpyEncoder() for index, unique_hash in enumerate(nasbench.hash_iterator()): if index >= LIMIT: break fixed_metrics, computed_metrics = nasbench.get_metrics_from_hash( unique_hash) model_spec = api.ModelSpec(matrix=fixed_metrics['module_adjacency'], ops=fixed_metrics['module_operations']) random_estimate = nasbench.random_estimate(model_spec, MODEL_DIR) data = nasbench.query(model_spec) merge = {**fixed_metrics, **computed_metrics} merge['module_adjacency'] = npEnc.default( fixed_metrics['module_adjacency']) merge['random_sampling_time'] = random_estimate['prediction_time'] merge['random_samples'] = random_estimate['evaluation_results'] merge['train_accuracy'] = data['train_accuracy'] merge['test_accuracy'] = data['test_accuracy'] merge['validation_accuracy'] = data['validation_accuracy'] print(index, merge) with open(OUTPUT, 'a') as f: f.write(json.dumps(merge) + '\n')
def single_model(argv): del argv # Unused # Load the data from file (this will take some time) nasbench = api.NASBench(NASBENCH_TFRECORD) # Create an Inception-like module (5x5 convolution replaced with two 3x3 # convolutions). model_spec = api.ModelSpec( # Adjacency matrix of the module matrix=[ [0, 1, 1, 1, 0, 1, 0], # input layer [0, 0, 0, 0, 0, 0, 1], # 1x1 conv [0, 0, 0, 0, 0, 0, 1], # 3x3 conv [0, 0, 0, 0, 1, 0, 0], # 5x5 conv (replaced by two 3x3's) [0, 0, 0, 0, 0, 0, 1], # 5x5 conv (replaced by two 3x3's) [0, 0, 0, 0, 0, 0, 1], # 3x3 max-pool [0, 0, 0, 0, 0, 0, 0] ], # output layer # Operations at the vertices of the module, matches order of matrix ops=[INPUT, CONV1X1, CONV3X3, CONV3X3, CONV3X3, MAXPOOL3X3, OUTPUT]) random_estimate = nasbench.random_estimate(model_spec, MODEL_DIR) print(random_estimate) # Query this model from dataset, returns a dictionary containing the metrics # associated with this model. # print('Querying an Inception-like model.') data = nasbench.query(model_spec) print(data)
def __init__(self, dim=-1, minimize=True, filename="nasbench_only108.tfrecord"): """ Parameters ---------- dim : int, default -1 The dimension of the problem. In this function, this is a dummy variable, i.e. not used. filename : str, default "nasbench_only108.tfrecord" "nasbench_full.tfrecord" or "nasbench_only108.tfrecord". """ super(NasBench101, self).__init__(MATRIX_ELEMENTS + VERTICES - 2, minimize=minimize) self._categories[MATRIX_ELEMENTS:] = 3 data_dir = "{}/data".format(os.path.dirname(os.path.abspath(__file__))) if not os.path.exists("{}/{}".format(data_dir, filename)): os.makedirs(data_dir, exist_ok=True) print("downloading data now...") subprocess.run( "wget -P {} https://storage.googleapis.com/nasbench/{}".format( data_dir, filename), shell=True) self.nasbench = api.NASBench('{}/{}'.format(data_dir, filename)) self.estimated_wall_clock_time = 0 self.y_star_valid = 0.04944576819737756 # lowest mean validation error self.y_star_test = 0.056824247042338016 # lowest mean test error
def main(*args, **kwargs): nasbench = nasbench_api.NASBench(FLAGS.path_to_nasbench) module = nasbench.fixed_statistics[FLAGS.hash_key] spec = model_spec.ModelSpec(module['module_adjacency'], module['module_operations']) config = nasbench_config.build_config() for flag in FLAGS.flags_by_module_dict()[args[0][0]]: config[flag.name] = flag.value config['use_tpu'] = False config['use_KD'] = False config['intermediate_evaluations'] = ['1.0'] trainset_multipier = FLAGS.trainset_part_percentage / 100.0 config['num_train'] = int(config['num_train'] * trainset_multipier) config['num_train_eval'] = int(config['num_train_eval'] * trainset_multipier) config['num_augment'] = int(config['num_augment'] * trainset_multipier) logging.info("Prepare KD dataset") dataset_files = FLAGS.train_data_files + [ FLAGS.valid_data_file, FLAGS.test_data_file, FLAGS.sample_data_file ] prepare_kd_dataset(spec, config, FLAGS.save_path, dataset_files, FLAGS.new_dataset_path, FLAGS.trainset_part_percentage)
def __init__(self, search_space, dataset='cifar10', nasbench_folder='./', index_hash_folder='./', loaded_nasbench=None): self.search_space = search_space self.dataset = dataset """ Some of the path-based encoding methods require a hash map from path indices to cell architectures. We have created a pickle file which contains this hash map, located at https://drive.google.com/file/d/1yMRFxT6u3ZyfiWUPhtQ_B9FbuGN3X-Nf/view?usp=sharing """ self.index_hash = pickle.load( open(os.path.expanduser(index_hash_folder + 'index_hash.pkl'), 'rb')) # instructions for installing nasbench-101 and nas-bench-201 are in the readme if loaded_nasbench: self.nasbench = loaded_nasbench elif search_space == 'nasbench': self.nasbench = api.NASBench(nasbench_folder + 'nasbench_only108.tfrecord') elif search_space == 'nasbench_201': self.nasbench = API( os.path.expanduser( '~/nas-bench-201/NAS-Bench-201-v1_0-e61699.pth')) elif search_space != 'darts': print(search_space, 'is not a valid search space') sys.exit()
def __init__(self, data_dir, multi_fidelity=False): self.multi_fidelity = multi_fidelity if self.multi_fidelity: self.dataset = api.NASBench( os.path.join(data_dir, 'nasbench_full.tfrecord')) else: self.dataset = api.NASBench( os.path.join(data_dir, 'nasbench_only108.tfrecord')) self.X = [] self.y_valid = [] self.y_test = [] self.costs = [] self.y_star_valid = 0.04944576819737756 # lowest mean validation error self.y_star_test = 0.056824247042338016 # lowest mean test error
def setUpClass(cls): with initialize(config_path="../configs"): cfg = compose(config_name="test") cls.edit_distance = cfg.edit_distance cls.graph_modify_ratio = cfg.graph_modify_ratio cls.TESTCASE_COUNT = cfg.TESTCASE_COUNT stats_cfg = cfg.stats cls.SAMPLES_PER_CLASS = cls.TESTCASE_COUNT cls.min_accuracy = stats_cfg.min_accuracy cls.min_diff = stats_cfg.min_diff dataset = PretrainNASBench( engine=api101.NASBench(cfg.dataset_path), model_spec=api101.ModelSpec, samples_per_class=cls.SAMPLES_PER_CLASS, max_seq_len=cfg.max_seq_len, graph_modify_ratio=cls.graph_modify_ratio ) cls.graph_modifier = dataset.graph_modifier cls.testcases = [] for _, key in enumerate(random.sample(dataset.engine.hash_iterator(), cls.TESTCASE_COUNT)): arch = dataset.engine.get_modelspec_by_hash(key) matrix, ops = arch.matrix, arch.ops cls.testcases.append((matrix, ops))
def get_engine_modelspec(name, path): if name == 'nasbench101': return api101.NASBench(path), api101.ModelSpec elif name == 'nasbench201': return api201.NASBench201API(path), api201.ModelSpec else: raise ValueError('Invalid name')
def gen_json_file(): nasbench = api.NASBench(NASBENCH_TFRECORD) nas_gen = gen_data_point(nasbench) data_dict = OrderedDict() for data_point in nas_gen: data_dict.update(data_point) with open('data/data.json', 'w') as outfile: json.dump(data_dict, outfile)
def __init__(self): if not os.path.isfile(NASBENCH_TFRECORD): print('Downloading NASBench-101 Data.') pdownload(file_url, NASBENCH_TFRECORD) print('Downloaded') self.dataset = api.NASBench(NASBENCH_TFRECORD) self.checked_models = {}
def main(argv): with open( 'C:/Users/Dawei/OneDrive/USYD-GTW/Project-NAS Bench/nasbench/printed_example/res_full.txt', "w+") as f: del argv # Unused # Load the data from file (this will take some time) nasbench = api.NASBench(NASBENCH_TFRECORD) # Create an Inception-like module (5x5 convolution replaced with two 3x3 # convolutions). model_spec = api.ModelSpec( # Adjacency matrix of the module matrix=[ [0, 1, 1, 1, 0, 1, 0], # input layer [0, 0, 0, 0, 0, 0, 1], # 1x1 conv [0, 0, 0, 0, 0, 0, 1], # 3x3 conv [0, 0, 0, 0, 1, 0, 0], # 5x5 conv (replaced by two 3x3's) [0, 0, 0, 0, 0, 0, 1], # 5x5 conv (replaced by two 3x3's) [0, 0, 0, 0, 0, 0, 1], # 3x3 max-pool [0, 0, 0, 0, 0, 0, 0] ], # output layer # Operations at the vertices of the module, matches order of matrix ops=[ INPUT, CONV1X1, CONV3X3, CONV3X3, CONV3X3, MAXPOOL3X3, OUTPUT ]) # Query this model from dataset, returns a dictionary containing the metrics # associated with this model. print('Querying an Inception-like model.', file=f) data = nasbench.query(model_spec) print(data, file=f) print(nasbench.get_budget_counters(), file=f) # prints (total time, total epochs) # Get all metrics (all epoch lengths, all repeats) associated with this # model_spec. This should be used for dataset analysis and NOT for # benchmarking algorithms (does not increment budget counters). print('\nGetting all metrics for the same Inception-like model.', file=f) fixed_metrics, computed_metrics = nasbench.get_metrics_from_spec( model_spec) print(fixed_metrics, file=f) for epochs in nasbench.valid_epochs: for repeat_index in range(len(computed_metrics[epochs])): data_point = computed_metrics[epochs][repeat_index] print('Epochs trained %d, repeat number: %d' % (epochs, repeat_index + 1), file=f) print(data_point, file=f) # Iterate through unique models in the dataset. Models are unqiuely identified # by a hash. print('\nIterating over unique models in the dataset.', file=f) for unique_hash in nasbench.hash_iterator(): fixed_metrics, computed_metrics = nasbench.get_metrics_from_hash( unique_hash) print(fixed_metrics, file=f)
def get_dict(name, data_path, nasbench_file, num_samples, num_operations, val_acc_threshold=0., seed=1234, **kwargs): nasbench_file_abs = os.path.join(data_path, nasbench_file) print(f'Loading nasbench101: {nasbench_file_abs}') nasbench = api.NASBench(nasbench_file_abs, num_samples=num_samples, seed=seed) archs = [] seqs = [] valid_accs = [] all_keys = list(nasbench.hash_iterator()) dataset_dicts = [] min_val_acc = float('inf') max_val_acc = 0 min_data = max_data = None for idx, key in enumerate(all_keys): fixed_stat, computed_stat = nasbench.get_metrics_from_hash(key) if len(fixed_stat['module_operations']) not in num_operations: continue arch = api.ModelSpec(matrix=fixed_stat['module_adjacency'], ops=fixed_stat['module_operations']) data = nasbench.query(arch) if data['validation_accuracy'] < val_acc_threshold: continue if min_val_acc > data['validation_accuracy']: min_val_acc = data['validation_accuracy'] min_data = data if max_val_acc < data['validation_accuracy']: max_val_acc = data['validation_accuracy'] max_data = data data["id"] = idx dataset_dicts.append(data) meta_dict = {} meta_dict['num_samples'] = len(dataset_dicts) meta_dict['num_operations'] = num_operations meta_dict['min_val_acc'] = min_val_acc meta_dict['max_val_acc'] = max_val_acc meta_dict['min_data'] = min_data meta_dict['max_data'] = max_data print(f'min_val_acc: {min_val_acc}, max_val_acc: {max_val_acc}') if name not in MetadataCatalog.list(): MetadataCatalog.get(name).set(**meta_dict) return dataset_dicts
def load(use_pickle=True, full=False): fname = NASBENCH_TFRECORD_FULL if full else NASBENCH_TFRECORD_PARTIAL start = time.time() if use_pickle: if not os.path.isfile(fname % 'dat'): nasbench = api.NASBench(fname % 'tfrecord') pickle.dump(nasbench, open(fname % 'dat', "wb")) nasbench = pickle.load(open(fname % 'dat', "rb")) else: nasbench = api.NASBench(fname % 'tfrecord') end = time.time() print("Data loaded in %s seconds" % (end - start)) return nasbench
def __init__(self, data_dir): self.dataset = api.NASBench(os.path.join(data_dir, 'nasbench_full.tfrecord')) self.X = [] self.y_valid = [] self.y_test = [] self.costs = [] self.y_star_valid = 0.04944576819737756 # lowest mean validation error self.y_star_test = 0.056824247042338016 # lowest mean test error
def __init__(self, path): super(NAS, self).__init__() PATH = path self.OPS = [ INPUT, CONV1X1, CONV3X3, CONV3X3, CONV3X3, MAXPOOL3X3, OUTPUT ] self.nasbench = api.NASBench(PATH) self.size = 7 self.var_er = None self.D = 21
def __init__(self, data_folder=default_data_folder, index_hash_folder='./', mf=False): self.mf = mf self.dataset = 'cifar10' """ For NAS encodings experiments, some of the path-based encodings currently require a hash map from path indices to cell architectuers. We have created a pickle file which contains the hash map, located at https://drive.google.com/file/d/1yMRFxT6u3ZyfiWUPhtQ_B9FbuGN3X-Nf/view?usp=sharing """ self.index_hash = None index_hash_path = os.path.expanduser(index_hash_folder + 'index_hash.pkl') if os.path.isfile(index_hash_path): self.index_hash = pickle.load(open(index_hash_path, 'rb')) if not self.mf: self.nasbench = api.NASBench(os.path.expanduser(data_folder + 'nasbench_only108.tfrecord')) else: self.nasbench = api.NASBench(os.path.expanduser(data_folder + 'nasbench_full.tfrecord'))
def load(self) -> Any: """ Loads data from data directory as defined in config_file.data_directory""" self.logger.debug('NasBench101DataManager: Starting to load data') t = time() self.download() from nasbench import api data = api.NASBench(str(self.save_dir / self.fname)) self.logger.info( f'NasBench101DataManager: Data successfully loaded after {time() - t:.2f}' ) return data
def Eval_nasbench1shot1(theta, search_space, logger): nasbench1shot1_path = 'benchmark/nasbench_full.tfrecord' nasbench = api.NASBench(nasbench1shot1_path) current_best = np.argmax(theta, axis=1) config = ConfigSpace.Configuration( search_space.search_space.get_configuration_space(), vector=current_best) adjacency_matrix, node_list = search_space.search_space.convert_config_to_nasbench_format( config) node_list = [INPUT, *node_list, OUTPUT] if search_space.search_space.search_space_number == 3 else [ INPUT, *node_list, CONV1X1, OUTPUT] adjacency_list = adjacency_matrix.astype(np.int).tolist() model_spec = api.ModelSpec(matrix=adjacency_list, ops=node_list) nasbench_data = nasbench.query(model_spec, epochs=108) logger.info("test accuracy = {}".format(nasbench_data['test_accuracy']))
def __init__(self, records_path='../../nasbench/nasbench_full.tfrecord'): super().__init__() self.api = api.NASBench(records_path) self.precise_epochs = None self.epochs = None self.total_training_time = 0 self.best_m = np.array([[0, 1, 1, 0, 0, 1, 1], [0, 0, 0, 1, 0, 0, 0], [0, 0, 0, 0, 0, 1, 0], [0, 0, 0, 0, 1, 0, 0], [0, 0, 0, 0, 0, 1, 0], [0, 0, 0, 0, 0, 0, 1], [0, 0, 0, 0, 0, 0, 0]], dtype=np.int32) self.best_op = [ 'input', 'conv3x3-bn-relu', 'conv1x1-bn-relu', 'maxpool3x3', 'conv3x3-bn-relu', 'conv3x3-bn-relu', 'output' ] self.best_nasbench = ChromosomeNASBench(self.best_op, self.best_m) self.best_mean_acc = self.get_test_mean_acc(self.best_nasbench)
def main(*args, **kwargs): nasbench = nasbench_api.NASBench(FLAGS.path_to_nasbench) module = nasbench.fixed_statistics[FLAGS.hash_key] spec = model_spec.ModelSpec(module['module_adjacency'], module['module_operations']) config = nasbench_config.build_config() for flag in FLAGS.flags_by_module_dict()[args[0][0]]: config[flag.name] = flag.value config['use_tpu'] = False config['use_KD'] = False config['intermediate_evaluations'] = ['1.0'] trainset_multipier = FLAGS.trainset_part_percentage / 100.0 config['num_train'] = int(config['num_train'] * trainset_multipier) config['num_train_eval'] = int(config['num_train_eval'] * trainset_multipier) config['num_augment'] = int(config['num_augment'] * trainset_multipier) logging.info("Train and evaluate with config\n{}\n and spec\n{}".format(config, spec)) train(spec, config, FLAGS.save_path)
def main(): parser = argparse.ArgumentParser() parser.add_argument( 'input_file', help='Path to the file to be converted, e.g., nasbench_full.tfrecord') args = parser.parse_args() nasbench = api.NASBench(args.input_file) db = load_benchmark('nasbench101') with db: db.create_tables( [Nb101TrialConfig, Nb101TrialStats, Nb101IntermediateStats]) for hashval in tqdm(nasbench.hash_iterator(), desc='Dumping data into database'): metadata, metrics = nasbench.get_metrics_from_hash(hashval) num_vertices, architecture = nasbench_format_to_architecture_repr( metadata['module_adjacency'], metadata['module_operations']) assert hashval == hash_module(architecture, num_vertices) for epochs in [4, 12, 36, 108]: trial_config = Nb101TrialConfig.create( arch=architecture, num_vertices=num_vertices, hash=hashval, num_epochs=epochs) for seed in range(3): cur = metrics[epochs][seed] trial = Nb101TrialStats.create( config=trial_config, train_acc=cur['final_train_accuracy'] * 100, valid_acc=cur['final_validation_accuracy'] * 100, test_acc=cur['final_test_accuracy'] * 100, parameters=metadata['trainable_parameters'] / 1e6, training_time=cur['final_training_time'] * 60) for t in ['halfway', 'final']: Nb101IntermediateStats.create( trial=trial, current_epoch=epochs // 2 if t == 'halfway' else epochs, training_time=cur[t + '_training_time'], train_acc=cur[t + '_train_accuracy'] * 100, valid_acc=cur[t + '_validation_accuracy'] * 100, test_acc=cur[t + '_test_accuracy'] * 100)
def __init__(self, seed): # Set random seed to check reproducibility of results self.seed = seed np.random.seed(seed) # Load the data from file (this will take some time) self.nasbench = api.NASBench('./models/nasbench_only108.tfrecord', seed=seed) # Lines below are just to construct proper pandas column structure cell = self.random_cell() model_spec = api.ModelSpec(cell['matrix'], cell['ops']) data = self.nasbench.query(model_spec) md5hash = calculate_hash(cell) data.pop('module_adjacency') data.pop('module_operations') data['hash'] = md5hash self.df = pd.DataFrame.from_records([data], index='hash') self.df.drop(self.df.index, inplace=True) self.reset_budget( ) # Clear budgeting of this initial query as this was needed just to capture column names
def __init__(self, search_space, dataset='cifar10', nasbench_folder='./', loaded_nasbench=None): self.search_space = search_space self.dataset = dataset if loaded_nasbench: self.nasbench = loaded_nasbench elif search_space == 'nasbench': self.nasbench = api.NASBench(nasbench_folder + 'nasbench_only108.tfrecord') elif search_space == 'nasbench_201': self.nasbench = API( os.path.expanduser( '~/nas-bench-201/NAS-Bench-201-v1_0-e61699.pth')) elif search_space != 'darts': print(search_space, 'is not a valid search space') sys.exit()
def get_model_info(nb101_dataset, cache_dir): if cache_dir is not None: nbmodels_cache = pathlib.Path(cache_dir) / 'nb101_models_info.pickle' if nbmodels_cache.exists(): with nbmodels_cache.open('rb') as f: return pickle.load(f) if nb101_dataset: import nasbench.api as nbapi nasbench = nbapi.NASBench(nb101_dataset) model_info = { model_hash: (stats['module_adjacency'], stats['module_operations']) for model_hash, stats in nasbench.fixed_statistics.items() } if cache_dir is not None: with nbmodels_cache.open('wb') as f: pickle.dump(model_info, f) return model_info return None
def main(): nasbench = api.NASBench( '/Users/hua/Documents/datasets/nasbench_full.tfrecord') max_num_blocks = 5 ops = ['conv1x1-bn-relu', 'conv3x3-bn-relu', 'maxpool3x3'] num_ops = len(ops) top_k = 256 lstm_units = 100 embed_size = 100 pnas = PNAS(nasbench, max_num_blocks, ops, top_k, lstm_units, embed_size, epochs=50, batch_size=16) pnas.train() return pnas.get_top_architectures(num_blocks=5, top_k=10)
parser.add_argument('--folder', default=None, type=str, nargs='?', help='name of folder where files will be dumped') parser.add_argument('--version', default=None, type=str, nargs='?', help='version of DEHB to run') args = parser.parse_args() args.verbose = True if args.verbose == 'True' else False args.fix_seed = True if args.fix_seed == 'True' else False nasbench = api.NASBench(args.data_dir) if args.search_space is None: spaces = [1, 2, 3] else: spaces = [int(args.search_space)] for space in spaces: print('##### Search Space {} #####'.format(space)) search_space = eval('SearchSpace{}()'.format(space)) y_star_valid, y_star_test, inc_config = (search_space.valid_min_error, search_space.test_min_error, None) min_budget, max_budget = (4, 108) # derived for Cifar-X from NAS-Bench-101 # Parameter space to be used by DE
cs = b.get_configuration_space() dimensions = len(cs.get_hyperparameters()) elif benchmark == '1shot1': assert benchmark_type in ['1', '2', '3'] sys.path.append(os.path.join(os.getcwd(), '../nasbench/')) sys.path.append(os.path.join(os.getcwd(), '../nasbench-1shot1/')) from nasbench import api from nasbench_analysis.search_spaces.search_space_1 import SearchSpace1 from nasbench_analysis.search_spaces.search_space_2 import SearchSpace2 from nasbench_analysis.search_spaces.search_space_3 import SearchSpace3 nasbench = api.NASBench( os.path.join( os.getcwd(), "../nasbench-1shot1/nasbench_analysis/" "nasbench_data/108_e/" "nasbench_only108.tfrecord")) search_space = eval('SearchSpace{}()'.format(benchmark_type)) def f(config, budget=None): if budget is not None: fitness, cost = search_space.objective_function(nasbench, config, budget=int(budget)) else: fitness, cost = search_space.objective_function(nasbench, config) fitness = 1 - fitness return fitness, cost cs = search_space.get_configuration_space()