class BayesianSearcher(Searcher): """ Class to search for neural architectures using Bayesian search strategy. Attribute: optimizer: An instance of BayesianOptimizer. t_min: A float. The minimum temperature during simulated annealing. """ def __init__(self, n_output_node, input_shape, path, metric, loss, generators, verbose, trainer_args=None, default_model_len=None, default_model_width=None, t_min=None): super(BayesianSearcher, self).__init__(n_output_node, input_shape, path, metric, loss, generators, verbose, trainer_args, default_model_len, default_model_width) if t_min is None: t_min = Constant.T_MIN self.optimizer = BayesianOptimizer(self, t_min, metric) def generate(self, multiprocessing_queue): """Generate the next neural architecture. Args: multiprocessing_queue: the Queue for multiprocessing return value. Returns: list of 2-element tuples: generated_graph and other_info, for bayesian searcher the length of list is 1. generated_graph: An instance of Graph. other_info: Anything to be saved in the training queue together with the architecture. """ remaining_time = self._timeout - time.time() generated_graph, new_father_id = self.optimizer.generate(self.descriptors, remaining_time, multiprocessing_queue) if new_father_id is None: new_father_id = 0 generated_graph = self.generators[0](self.n_classes, self.input_shape). \ generate(self.default_model_len, self.default_model_width) return [(generated_graph, new_father_id)] def update(self, other_info, model_id, graph, metric_value): """ Update the controller with evaluation result of a neural architecture. Args: other_info: Anything. In our case it is the father ID in the search tree. model_id: An integer. graph: An instance of Graph. The trained neural architecture. metric_value: The final evaluated metric value. """ father_id = other_info self.optimizer.fit([graph.extract_descriptor()], [metric_value]) self.optimizer.add_child(father_id, model_id)
class Searcher: """Class to search for neural architectures. This class generate new architectures, call the trainer to train it, and update the Bayesian optimizer. Attributes: n_classes: Number of classes in the target classification task. input_shape: Arbitrary, although all dimensions in the input shaped must be fixed. Use the keyword argument `input_shape` (tuple of integers, does not include the batch axis) when using this layer as the first layer in a model. verbose: Verbosity mode. history: A list that stores the performance of model. Each element in it is a dictionary of 'model_id', 'loss', and 'metric_value'. path: A string. The path to the directory for saving the searcher. metric: An instance of the Metric subclasses. loss: A function taking two parameters, the predictions and the ground truth. generators: A list of generators used to initialize the search. model_count: An integer. the total number of neural networks in the current searcher. descriptors: A dictionary of all the neural network architectures searched. trainer_args: A dictionary. The params for the constructor of ModelTrainer. default_model_len: An integer. Number of convolutional layers in the initial architecture. default_model_width: An integer. The number of filters in each layer in the initial architecture. training_queue: A list of the generated architectures to be trained. x_queue: A list of trained architectures not updated to the gpr. y_queue: A list of trained architecture performances not updated to the gpr. beta: A float. The beta in the UCB acquisition function. t_min: A float. The minimum temperature during simulated annealing. bo: An instance of BayesianOptimizer. """ def __init__(self, n_output_node, input_shape, path, metric, loss, generators, verbose, trainer_args=None, default_model_len=Constant.MODEL_LEN, default_model_width=Constant.MODEL_WIDTH, beta=Constant.BETA, t_min=None): """Initialize the Searcher. Args: n_output_node: An integer, the number of classes. input_shape: A tuple. e.g. (28, 28, 1). path: A string. The path to the directory to save the searcher. metric: An instance of the Metric subclasses. loss: A function taking two parameters, the predictions and the ground truth. generators: A list of generators used to initialize the search. verbose: A boolean. Whether to output the intermediate information to stdout. trainer_args: A dictionary. The params for the constructor of ModelTrainer. default_model_len: An integer. Number of convolutional layers in the initial architecture. default_model_width: An integer. The number of filters in each layer in the initial architecture. beta: A float. The beta in the UCB acquisition function. t_min: A float. The minimum temperature during simulated annealing. """ if trainer_args is None: trainer_args = {} self.n_classes = n_output_node self.input_shape = input_shape self.verbose = verbose self.history = [] self.path = path self.metric = metric self.loss = loss self.generators = generators self.model_count = 0 self.descriptors = [] self.trainer_args = trainer_args self.default_model_len = default_model_len self.default_model_width = default_model_width if 'max_iter_num' not in self.trainer_args: self.trainer_args['max_iter_num'] = Constant.SEARCH_MAX_ITER self.training_queue = [] self.x_queue = [] self.y_queue = [] if t_min is None: t_min = Constant.T_MIN self.bo = BayesianOptimizer(self, t_min, metric, beta) def load_model_by_id(self, model_id): return pickle_from_file( os.path.join(self.path, str(model_id) + '.graph')) def load_best_model(self): return self.load_model_by_id(self.get_best_model_id()) def get_metric_value_by_id(self, model_id): for item in self.history: if item['model_id'] == model_id: return item['metric_value'] return None def get_best_model_id(self): if self.metric.higher_better(): return max(self.history, key=lambda x: x['metric_value'])['model_id'] return min(self.history, key=lambda x: x['metric_value'])['model_id'] def replace_model(self, graph, model_id): pickle_to_file(graph, os.path.join(self.path, str(model_id) + '.graph')) def add_model(self, metric_value, loss, graph, model_id): """Append the information of evaluated architecture to history.""" if self.verbose: print('\nSaving model.') pickle_to_file(graph, os.path.join(self.path, str(model_id) + '.graph')) ret = { 'model_id': model_id, 'loss': loss, 'metric_value': metric_value } self.history.append(ret) # Update best_model text file if model_id == self.get_best_model_id(): file = open(os.path.join(self.path, 'best_model.txt'), 'w') file.write('best model: ' + str(model_id)) file.close() if self.verbose: idx = ['model_id', 'loss', 'metric_value'] header = ['Model ID', 'Loss', 'Metric Value'] line = '|'.join(x.center(24) for x in header) print('+' + '-' * len(line) + '+') print('|' + line + '|') if self.history: r = self.history[-1] print('+' + '-' * len(line) + '+') line = '|'.join(str(r[x]).center(24) for x in idx) print('|' + line + '|') print('+' + '-' * len(line) + '+') return ret def init_search(self): """Call the generators to generate the initial architectures for the search.""" if self.verbose: print('\nInitializing search.') for generator in self.generators: graph = generator(self.n_classes, self.input_shape). \ generate(self.default_model_len, self.default_model_width) model_id = self.model_count self.model_count += 1 self.training_queue.append((graph, -1, model_id)) self.descriptors.append(graph.extract_descriptor()) if self.verbose: print('Initialization finished.') def search(self, train_data, test_data, timeout=60 * 60 * 24): """Run the search loop of training, generating and updating once. The function will run the training and generate in parallel. Then it will update the controller. The training is just pop out a graph from the training_queue and train it. The generate will call teh self.generate function. The update will call the self.update function. Args: train_data: An instance of DataLoader. test_data: An instance of Dataloader. timeout: An integer, time limit in seconds. """ start_time = time.time() torch.cuda.empty_cache() if not self.history: self.init_search() # Start the new process for training. graph, other_info, model_id = self.training_queue.pop(0) if self.verbose: print('\n') print('+' + '-' * 46 + '+') print('|' + 'Training model {}'.format(model_id).center(46) + '|') print('+' + '-' * 46 + '+') # Temporary solution to support GOOGLE Colab if get_system() == Constant.SYS_GOOGLE_COLAB: ctx = mp.get_context('fork') else: ctx = mp.get_context('spawn') q = ctx.Queue() p = ctx.Process(target=train, args=(q, graph, train_data, test_data, self.trainer_args, self.metric, self.loss, self.verbose, self.path)) try: p.start() # Do the search in current thread. searched = False generated_graph = None generated_other_info = None if not self.training_queue: searched = True remaining_time = timeout - (time.time() - start_time) generated_other_info, generated_graph = self.generate( remaining_time, q) new_model_id = self.model_count self.model_count += 1 self.training_queue.append( (generated_graph, generated_other_info, new_model_id)) self.descriptors.append(generated_graph.extract_descriptor()) remaining_time = timeout - (time.time() - start_time) if remaining_time <= 0: raise TimeoutError metric_value, loss, graph = q.get(timeout=remaining_time) if self.verbose and searched: verbose_print(generated_other_info, generated_graph) self.add_model(metric_value, loss, graph, model_id) self.update(other_info, graph, metric_value, model_id) self.export_json(os.path.join(self.path, 'history.json')) except (TimeoutError, queue.Empty) as e: raise TimeoutError from e except RuntimeError as e: if not re.search('out of memory', str(e)): raise e if self.verbose: print( '\nCurrent model size is too big. Discontinuing training this model to search for other models.' ) Constant.MAX_MODEL_SIZE = graph.size() - 1 return finally: # terminate and join the subprocess to prevent any resource leak p.terminate() p.join() def update(self, other_info, graph, metric_value, model_id): """ Update the controller with evaluation result of a neural architecture. Args: other_info: Anything. In our case it is the father ID in the search tree. graph: An instance of Graph. The trained neural architecture. metric_value: The final evaluated metric value. model_id: An integer. """ father_id = other_info self.bo.fit([graph.extract_descriptor()], [metric_value]) self.bo.add_child(father_id, model_id) def generate(self, remaining_time, multiprocessing_queue): """Generate the next neural architecture. Args: remaining_time: The remaining time in seconds. multiprocessing_queue: the Queue for multiprocessing return value. Returns: other_info: Anything to be saved in the training queue together with the architecture. generated_graph: An instance of Graph. """ generated_graph, new_father_id = self.bo.generate( self.descriptors, remaining_time, multiprocessing_queue) if new_father_id is None: new_father_id = 0 generated_graph = self.generators[0](self.n_classes, self.input_shape). \ generate(self.default_model_len, self.default_model_width) return new_father_id, generated_graph def export_json(self, path): """Export a json file of the search process.""" data = dict() networks = [] for model_id in range(self.model_count - len(self.training_queue)): networks.append( self.load_model_by_id(model_id).extract_descriptor().to_json()) tree = self.bo.search_tree.get_dict() # Saving the data to file. # data['networks'] = networks data['tree'] = tree import json with open(path, 'w') as fp: json.dump(data, fp)