def __init__(self, n_classes, input_shape, path, metric, verbose, trainer_args=None, default_model_len=Constant.MODEL_LEN, default_model_width=Constant.MODEL_WIDTH, beta=Constant.BETA, kernel_lambda=Constant.KERNEL_LAMBDA, t_min=None): """Initialize the BayesianSearcher. Args: n_classes: An integer, the number of classes. input_shape: A tuple. e.g. (28, 28, 1). path: A string. The path to the directory to save the searcher. verbose: A boolean. Whether to output the intermediate information to stdout. trainer_args: A dictionary. The params for the constructor of ModelTrainer. default_model_len: An integer. Number of convolutional layers in the initial architecture. default_model_width: An integer. The number of filters in each layer in the initial architecture. beta: A float. The beta in the UCB acquisition function. kernel_lambda: A float. The balance factor in the neural network kernel. t_min: A float. The minimum temperature during simulated annealing. """ if trainer_args is None: trainer_args = {} self.n_classes = n_classes self.input_shape = input_shape self.verbose = verbose self.history = [] self.metric = metric self.path = path self.model_count = 0 self.descriptors = [] self.trainer_args = trainer_args self.default_model_len = default_model_len self.default_model_width = default_model_width if 'max_iter_num' not in self.trainer_args: self.trainer_args['max_iter_num'] = Constant.SEARCH_MAX_ITER self.gpr = IncrementalGaussianProcess(kernel_lambda) self.search_tree = SearchTree() self.training_queue = [] self.x_queue = [] self.y_queue = [] self.beta = beta if t_min is None: t_min = Constant.T_MIN self.t_min = t_min
def __init__(self, n_classes, input_shape, path, verbose, trainer_args=None, default_model_len=constant.MODEL_LEN, default_model_width=constant.MODEL_WIDTH, beta=constant.BETA, kernel_lambda=constant.KERNEL_LAMBDA, t_min=constant.T_MIN): super().__init__(n_classes, input_shape, path, verbose, trainer_args, default_model_len, default_model_width) self.gpr = IncrementalGaussianProcess(kernel_lambda) self.search_tree = SearchTree() self.init_search_queue = None self.init_gpr_x = [] self.init_gpr_y = [] self.beta = beta self.t_min = t_min
class BayesianSearcher: """Base class of all searcher class This class is the base class of all searcher class, every searcher class can override its search function to implements its strategy Attributes: n_classes: number of classification input_shape: Arbitrary, although all dimensions in the input shaped must be fixed. Use the keyword argument input_shape (tuple of integers, does not include the batch axis) when using this layer as the first layer in a model. verbose: verbosity mode history: A list that stores the performance of model. Each element in it is a dictionary of 'model_id', 'loss', and 'accuracy'. path: A string. The path to the directory for saving the searcher. model_count: An integer. the total number of neural networks in the current searcher. descriptors: A dictionary of all the neural networks architectures searched. trainer_args: A dictionary. The params for the constructor of ModelTrainer. default_model_len: An integer. Number of convolutional layers in the initial architecture. default_model_width: An integer. The number of filters in each layer in the initial architecture. gpr: A GaussianProcessRegressor for bayesian optimization. search_tree: The data structure for storing all the searched architectures in tree structure. training_queue: A list of the generated architectures to be trained. x_queue: A list of trained architectures not updated to the gpr. y_queue: A list of trained architecture performances not updated to the gpr. beta: A float. The beta in the UCB acquisition function. t_min: A float. The minimum temperature during simulated annealing. """ def __init__(self, n_classes, input_shape, path, verbose, trainer_args=None, default_model_len=Constant.MODEL_LEN, default_model_width=Constant.MODEL_WIDTH, beta=Constant.BETA, kernel_lambda=Constant.KERNEL_LAMBDA, t_min=None): """ Args: n_classes: An integer, the number of classes. input_shape: A tuple. e.g. (28, 28, 1). path: A string. The path to the directory to save the searcher. verbose: A boolean. Whether to output the intermediate information to stdout. trainer_args: A dictionary. The params for the constructor of ModelTrainer. default_model_len: An integer. Number of convolutional layers in the initial architecture. default_model_width: An integer. The number of filters in each layer in the initial architecture. beta: A float. The beta in the UCB acquisition function. kernel_lambda: A float. The balance factor in the neural network kernel. t_min: A float. The minimum temperature during simulated annealing. """ if trainer_args is None: trainer_args = {} self.n_classes = n_classes self.input_shape = input_shape self.verbose = verbose self.history = [] self.path = path self.model_count = 0 self.descriptors = [] self.trainer_args = trainer_args self.default_model_len = default_model_len self.default_model_width = default_model_width if 'max_iter_num' not in self.trainer_args: self.trainer_args['max_iter_num'] = Constant.SEARCH_MAX_ITER self.gpr = IncrementalGaussianProcess(kernel_lambda) self.search_tree = SearchTree() self.training_queue = [] self.x_queue = [] self.y_queue = [] self.beta = beta if t_min is None: t_min = Constant.T_MIN self.t_min = t_min def load_model_by_id(self, model_id): return pickle_from_file(os.path.join(self.path, str(model_id) + '.h5')) def load_best_model(self): return self.load_model_by_id(self.get_best_model_id()) def get_accuracy_by_id(self, model_id): for item in self.history: if item['model_id'] == model_id: return item['accuracy'] return None def get_best_model_id(self): return max(self.history, key=lambda x: x['accuracy'])['model_id'] def replace_model(self, graph, model_id): pickle_to_file(graph, os.path.join(self.path, str(model_id) + '.h5')) def add_model(self, accuracy, loss, graph, model_id): if self.verbose: print('Saving model.') pickle_to_file(graph, os.path.join(self.path, str(model_id) + '.h5')) # Update best_model text file if self.verbose: print('Model ID:', model_id) print('Loss:', loss) print('Accuracy', accuracy) ret = {'model_id': model_id, 'loss': loss, 'accuracy': accuracy} self.history.append(ret) if model_id == self.get_best_model_id(): file = open(os.path.join(self.path, 'best_model.txt'), 'w') file.write('best model: ' + str(model_id)) file.close() descriptor = graph.extract_descriptor() self.x_queue.append(descriptor) self.y_queue.append(accuracy) return ret def init_search(self): if self.verbose: print('Initializing search.') graph = DefaultClassifierGenerator(self.n_classes, self.input_shape).generate( self.default_model_len, self.default_model_width) model_id = self.model_count self.model_count += 1 self.training_queue.append((graph, -1, model_id)) self.descriptors.append(graph.extract_descriptor()) for child_graph in default_transform(graph): child_id = self.model_count self.model_count += 1 self.training_queue.append((child_graph, model_id, child_id)) self.descriptors.append(child_graph.extract_descriptor()) if self.verbose: print('Initialization finished.') def search(self, train_data, test_data): torch.cuda.empty_cache() if not self.history: self.init_search() # Start the new process for training. graph, father_id, model_id = self.training_queue.pop(0) if self.verbose: print('Training model ', model_id) multiprocessing.set_start_method('spawn', force=True) pool = multiprocessing.Pool(1) train_results = pool.map_async( train, [(graph, train_data, test_data, self.trainer_args, os.path.join(self.path, str(model_id) + '.png'), self.verbose)]) # Do the search in current thread. if not self.training_queue: new_graph, new_father_id = self.maximize_acq() new_model_id = self.model_count self.model_count += 1 self.training_queue.append( (new_graph, new_father_id, new_model_id)) descriptor = new_graph.extract_descriptor() self.descriptors.append(new_graph.extract_descriptor()) accuracy, loss, graph = train_results.get()[0] pool.terminate() pool.join() self.add_model(accuracy, loss, graph, model_id) self.search_tree.add_child(father_id, model_id) self.gpr.fit(self.x_queue, self.y_queue) self.x_queue = [] self.y_queue = [] pickle_to_file(self, os.path.join(self.path, 'searcher')) self.export_json(os.path.join(self.path, 'history.json')) def maximize_acq(self): model_ids = self.search_tree.adj_list.keys() target_graph = None father_id = None descriptors = deepcopy(self.descriptors) # Initialize the priority queue. pq = PriorityQueue() temp_list = [] for model_id in model_ids: accuracy = self.get_accuracy_by_id(model_id) temp_list.append((accuracy, model_id)) temp_list = sorted(temp_list) for accuracy, model_id in temp_list: graph = self.load_model_by_id(model_id) graph.clear_operation_history() pq.put(Elem(accuracy, model_id, graph)) t = 1.0 t_min = self.t_min alpha = 0.9 max_acq = -1 while not pq.empty() and t > t_min: elem = pq.get() temp_exp = min((elem.accuracy - max_acq) / t, 709.0) ap = math.exp(temp_exp) if ap > random.uniform(0, 1): graphs = transform(elem.graph) for temp_graph in graphs: if contain(descriptors, temp_graph.extract_descriptor()): continue temp_acq_value = self.acq(temp_graph) pq.put(Elem(temp_acq_value, elem.father_id, temp_graph)) descriptors.append(temp_graph.extract_descriptor()) if temp_acq_value > max_acq: max_acq = temp_acq_value father_id = elem.father_id target_graph = temp_graph t *= alpha nm_graph = self.load_model_by_id(father_id) if self.verbose: print('Father ID: ', father_id) print(target_graph.operation_history) for args in target_graph.operation_history: getattr(nm_graph, args[0])(*list(args[1:])) return nm_graph, father_id def acq(self, graph): mean, std = self.gpr.predict(np.array([graph.extract_descriptor()])) return mean + self.beta * std def export_json(self, path): data = dict() networks = [] for model_id in range(self.model_count - len(self.training_queue)): networks.append( self.load_model_by_id(model_id).extract_descriptor().to_json()) tree = self.search_tree.get_dict() # Saving the data to file. data['networks'] = networks data['tree'] = tree import json with open(path, 'w') as fp: json.dump(data, fp)
class BayesianSearcher(Searcher): def __init__(self, n_classes, input_shape, path, verbose, trainer_args=None, default_model_len=constant.MODEL_LEN, default_model_width=constant.MODEL_WIDTH, beta=constant.BETA, kernel_lambda=constant.KERNEL_LAMBDA, t_min=constant.T_MIN): super().__init__(n_classes, input_shape, path, verbose, trainer_args, default_model_len, default_model_width) self.gpr = IncrementalGaussianProcess(kernel_lambda) self.search_tree = SearchTree() self.init_search_queue = None self.init_gpr_x = [] self.init_gpr_y = [] self.beta = beta self.t_min = t_min def search(self, x_train, y_train, x_test, y_test): if not self.history: model = DefaultClassifierGenerator(self.n_classes, self.input_shape).generate( self.default_model_len, self.default_model_width) history_item = self.add_model(model, x_train, y_train, x_test, y_test) self.search_tree.add_child(-1, history_item['model_id']) graph = Graph(model) self.init_search_queue = [] # for child_graph in transform(graph): # self.init_search_queue.append((child_graph, history_item['model_id'])) self.init_gpr_x.append(graph.extract_descriptor()) self.init_gpr_y.append(history_item['accuracy']) pickle_to_file(self, os.path.join(self.path, 'searcher')) return if self.init_search_queue: graph, father_id = self.init_search_queue.pop() model = graph.produce_model() history_item = self.add_model(model, x_train, y_train, x_test, y_test) self.search_tree.add_child(father_id, history_item['model_id']) self.init_gpr_x.append(graph.extract_descriptor()) self.init_gpr_y.append(history_item['accuracy']) pickle_to_file(self, os.path.join(self.path, 'searcher')) return if not self.init_search_queue and not self.gpr.first_fitted: self.gpr.first_fit(self.init_gpr_x, self.init_gpr_y) new_model, father_id = self.maximize_acq() history_item = self.add_model(new_model, x_train, y_train, x_test, y_test) self.search_tree.add_child(father_id, history_item['model_id']) self.gpr.incremental_fit( Graph(new_model).extract_descriptor(), history_item['accuracy']) pickle_to_file(self, os.path.join(self.path, 'searcher')) def maximize_acq(self): model_ids = self.search_tree.adj_list.keys() target_graph = None father_id = None descriptors = self.descriptors pq = PriorityQueue() temp_list = [] for model_id in model_ids: accuracy = self.get_accuracy_by_id(model_id) temp_list.append((accuracy, model_id)) temp_list = sorted(temp_list) if len(temp_list) > 5: temp_list = temp_list[:-5] for accuracy, model_id in temp_list: model = self.load_model_by_id(model_id) graph = Graph(model, False) pq.put(Elem(accuracy, model_id, graph)) t = 1.0 t_min = self.t_min alpha = 0.9 max_acq = -1 while not pq.empty() and t > t_min: elem = pq.get() ap = math.exp((elem.accuracy - max_acq) / t) if ap > random.uniform(0, 1): graphs = transform(elem.graph) graphs = list( filter(lambda x: x.extract_descriptor() not in descriptors, graphs)) if not graphs: continue for temp_graph in graphs: temp_acq_value = self.acq(temp_graph) pq.put(Elem(temp_acq_value, elem.father_id, temp_graph)) descriptors[temp_graph.extract_descriptor()] = True if temp_acq_value > max_acq: max_acq = temp_acq_value father_id = elem.father_id target_graph = temp_graph t *= alpha model = self.load_model_by_id(father_id) nm_graph = Graph(model, True) if self.verbose: print('Father ID: ', father_id) print(target_graph.operation_history) for args in target_graph.operation_history: getattr(nm_graph, args[0])(*list(args[1:])) return nm_graph.produce_model(), father_id def acq(self, graph): mean, std = self.gpr.predict(np.array([graph.extract_descriptor()])) return mean + self.beta * std
def __init__(self, n_classes, input_shape, path, verbose): super().__init__(n_classes, input_shape, path, verbose) self.gpr = IncrementalGaussianProcess() self.search_tree = SearchTree()
class BayesianSearcher(Searcher): def __init__(self, n_classes, input_shape, path, verbose): super().__init__(n_classes, input_shape, path, verbose) self.gpr = IncrementalGaussianProcess() self.search_tree = SearchTree() def search(self, x_train, y_train, x_test, y_test): if not self.history: model = DefaultClassifierGenerator(self.n_classes, self.input_shape).generate() history_item = self.add_model(model, x_train, y_train, x_test, y_test) self.search_tree.add_child(-1, history_item['model_id']) self.gpr.first_fit( Graph(model).extract_descriptor(), history_item['accuracy']) pickle.dump(self, open(os.path.join(self.path, 'searcher'), 'wb')) del model backend.clear_session() else: model_ids = self.search_tree.get_leaves() new_model, father_id = self.maximize_acq(model_ids) history_item = self.add_model(new_model, x_train, y_train, x_test, y_test) self.search_tree.add_child(father_id, history_item['model_id']) self.gpr.incremental_fit( Graph(new_model).extract_descriptor(), history_item['accuracy']) pickle.dump(self, open(os.path.join(self.path, 'searcher'), 'wb')) del new_model backend.clear_session() def maximize_acq(self, model_ids): overall_max_acq_value = -1 father_id = None target_graph = None # exploration for model_id in model_ids: model = self.load_model_by_id(model_id) graph = Graph(to_stub_model(model)) graph.clear_operation_history() graphs = transform(graph) for temp_graph in graphs: temp_acq_value = self._acq(temp_graph) if temp_acq_value > overall_max_acq_value: overall_max_acq_value = temp_acq_value father_id = model_id target_graph = temp_graph # exploitation for i in range(constant.ACQ_EXPLOITATION_DEPTH): graphs = transform(target_graph) for temp_graph in graphs: temp_acq_value = self._acq(temp_graph) if temp_acq_value > overall_max_acq_value: overall_max_acq_value = temp_acq_value target_graph = temp_graph model = self.load_model_by_id(father_id) nm_graph = NetworkMorphismGraph(model) for args in target_graph.operation_history: getattr(nm_graph, args[0])(*list(args[1:])) return nm_graph.produce_model(), father_id def _acq(self, graph): return self.gpr.predict(np.array([graph.extract_descriptor()]), )[0]