def search(self, x_train, y_train, x_test, y_test): """Override parent's search function. First model is randomly generated""" if not self.history: model = DefaultClassifierGenerator(self.n_classes, self.input_shape).generate() self.add_model(model, x_train, y_train, x_test, y_test) pickle_to_file(self, os.path.join(self.path, 'searcher')) else: model = self.load_best_model() new_graphs = transform(Graph(model, False)) new_models = [] for graph in new_graphs: nm_graph = Graph(model, True) for args in graph.operation_history: getattr(nm_graph, args[0])(*list(args[1:])) new_models.append(nm_graph.produce_model()) new_models = self._remove_duplicate(list(new_models)) for model in new_models: if self.model_count < constant.MAX_MODEL_NUM: self.add_model(model, x_train, y_train, x_test, y_test) pickle_to_file(self, os.path.join(self.path, 'searcher')) backend.clear_session() return self.load_best_model()
def maximize_acq(self, model_ids): overall_max_acq_value = -1 father_id = None target_graph = None # exploration for model_id in model_ids: model = self.load_model_by_id(model_id) graph = Graph(to_stub_model(model)) graph.clear_operation_history() graphs = transform(graph) for temp_graph in graphs: temp_acq_value = self._acq(temp_graph) if temp_acq_value > overall_max_acq_value: overall_max_acq_value = temp_acq_value father_id = model_id target_graph = temp_graph # exploitation for i in range(constant.ACQ_EXPLOITATION_DEPTH): graphs = transform(target_graph) for temp_graph in graphs: temp_acq_value = self._acq(temp_graph) if temp_acq_value > overall_max_acq_value: overall_max_acq_value = temp_acq_value target_graph = temp_graph model = self.load_model_by_id(father_id) nm_graph = NetworkMorphismGraph(model) for args in target_graph.operation_history: getattr(nm_graph, args[0])(*list(args[1:])) return nm_graph.produce_model(), father_id
def test_gpr(): gpr = IncrementalGaussianProcess(1.0) gpr.first_fit([Graph(get_add_skip_model()).extract_descriptor()], [0.5]) assert gpr.first_fitted gpr.incremental_fit(Graph(get_concat_skip_model()).extract_descriptor(), 0.6) assert abs(gpr.predict(np.array([Graph(get_concat_skip_model()).extract_descriptor()]))[0] - 0.6) < 1e-4
def cross_validate(self, x_all, y_all, n_splits, trainer_args=None): """Do the n_splits cross-validation for the input.""" if trainer_args is None: trainer_args = {} if constant.LIMIT_MEMORY: config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) init = tf.global_variables_initializer() sess.run(init) backend.set_session(sess) k_fold = StratifiedKFold(n_splits=n_splits, shuffle=False, random_state=7) ret = [] y_raw_all = y_all y_all = self.y_encoder.transform(y_all) model = self.load_searcher().load_best_model() for train, test in k_fold.split(x_all, y_raw_all): graph = Graph(model, False) backend.clear_session() model = graph.produce_model() ModelTrainer(model, x_all[train], y_all[train], x_all[test], y_all[test], False).train_model(**trainer_args) scores = model.evaluate(x_all[test], y_all[test], verbose=self.verbose) if self.verbose: print('Score:', scores[1]) ret.append(scores[1] * 100) return np.array(ret)
def search(self, x_train, y_train, x_test, y_test): if not self.history: model = DefaultClassifierGenerator(self.n_classes, self.input_shape).generate() history_item = self.add_model(model, x_train, y_train, x_test, y_test) self.search_tree.add_child(-1, history_item['model_id']) self.gpr.first_fit( Graph(model).extract_descriptor(), history_item['accuracy']) pickle.dump(self, open(os.path.join(self.path, 'searcher'), 'wb')) del model backend.clear_session() else: model_ids = self.search_tree.get_leaves() new_model, father_id = self.maximize_acq(model_ids) history_item = self.add_model(new_model, x_train, y_train, x_test, y_test) self.search_tree.add_child(father_id, history_item['model_id']) self.gpr.incremental_fit( Graph(new_model).extract_descriptor(), history_item['accuracy']) pickle.dump(self, open(os.path.join(self.path, 'searcher'), 'wb')) del new_model backend.clear_session()
def copy_conv_model(model): """Return copied convolution model Args: model: the model we want to copy Returns: The copied model """ graph = Graph(model) return graph.produce_model()
def maximize_acq(self): model_ids = self.search_tree.adj_list.keys() target_graph = None father_id = None descriptors = self.descriptors pq = PriorityQueue() temp_list = [] for model_id in model_ids: accuracy = self.get_accuracy_by_id(model_id) temp_list.append((accuracy, model_id)) temp_list = sorted(temp_list) if len(temp_list) > 5: temp_list = temp_list[:-5] for accuracy, model_id in temp_list: model = self.load_model_by_id(model_id) graph = Graph(model, False) pq.put(Elem(accuracy, model_id, graph)) t = 1.0 t_min = self.t_min alpha = 0.9 max_acq = -1 while not pq.empty() and t > t_min: elem = pq.get() ap = math.exp((elem.accuracy - max_acq) / t) if ap > random.uniform(0, 1): graphs = transform(elem.graph) graphs = list( filter(lambda x: x.extract_descriptor() not in descriptors, graphs)) if not graphs: continue for temp_graph in graphs: temp_acq_value = self.acq(temp_graph) pq.put(Elem(temp_acq_value, elem.father_id, temp_graph)) descriptors[temp_graph.extract_descriptor()] = True if temp_acq_value > max_acq: max_acq = temp_acq_value father_id = elem.father_id target_graph = temp_graph t *= alpha model = self.load_model_by_id(father_id) nm_graph = Graph(model, True) if self.verbose: print('Father ID: ', father_id) print(target_graph.operation_history) for args in target_graph.operation_history: getattr(nm_graph, args[0])(*list(args[1:])) return nm_graph.produce_model(), father_id
def to_deeper_model(model): """Return deeper model Args: model: the model from which we get deeper model Returns: The deeper model """ graph = Graph(model) weighted_layers = list(filter(lambda x: isinstance(x, tuple(WEIGHTED_LAYER_FUNC_LIST)), model.layers))[:-1] target = weighted_layers[randint(0, len(weighted_layers) - 1)] if is_conv_layer(target): return graph.to_dense_deeper_model(target) return graph.to_conv_deeper_model(target, randint(1, 2) * 2 + 1)
def generate(self, model_len=constant.MODEL_LEN, model_width=constant.MODEL_WIDTH): pool = self._get_pool_layer_func() conv = get_conv_layer_func(len(self._get_shape(3))) ave = get_ave_layer_func(len(self._get_shape(3))) pooling_len = int(model_len / 4) model = StubModel() model.input_shape = self.input_shape model.inputs = [0] model.layers.append(StubInput()) for i in range(model_len): model.layers += [ StubActivation('relu'), StubConv(model_width, kernel_size=3, func=conv), StubBatchNormalization(), StubDropout(constant.CONV_DROPOUT_RATE) ] if pooling_len == 0 or ((i + 1) % pooling_len == 0 and i != model_len - 1): model.layers.append(StubPooling(func=pool)) model.layers.append(StubGlobalPooling(ave)) model.layers.append(StubDense(self.n_classes, activation='softmax')) model.outputs = [len(model.layers)] for index, layer in enumerate(model.layers): layer.input = index layer.output = index + 1 return Graph(model, False)
def to_wider_model(model): """Return wider model Args: model: the model from which we get wider model Returns: The wider model """ graph = Graph(model) weighted_layers = list(filter(lambda x: isinstance(x, tuple(WEIGHTED_LAYER_FUNC_LIST)), model.layers))[:-1] target = weighted_layers[randint(0, len(weighted_layers) - 1)] if is_conv_layer(target): n_add = randint(1, 4 * target.filters) else: n_add = randint(1, 4 * target.units) return graph.to_wider_model(target, n_add)
def train(args): graph, x_train, y_train, x_test, y_test, trainer_args, path = args model = graph.produce_model() # if path is not None: # plot_model(model, to_file=path, show_shapes=True) loss, accuracy = ModelTrainer(model, x_train, y_train, x_test, y_test, False).train_model(**trainer_args) return accuracy, loss, Graph(model, True)
def final_fit(self, x_train, y_train, x_test, y_test, trainer_args=None, retrain=False): if trainer_args is None: trainer_args = {} y_train = self.y_encoder.transform(y_train) y_test = self.y_encoder.transform(y_test) searcher = self.load_searcher() model = searcher.load_best_model() if retrain: model = Graph(model, False).produce_model() ModelTrainer(model, x_train, y_train, x_test, y_test, True).train_model(**trainer_args) searcher.replace_model(model, searcher.get_best_model_id())
def to_skip_connection_model(model): """Return skip_connected model Args: model: the model from which we get skip_connected model Returns: The skip_connected model """ graph = Graph(model) weighted_layers = list(filter(lambda x: is_conv_layer(x), model.layers)) index_a = randint(0, len(weighted_layers) - 1) index_b = randint(0, len(weighted_layers) - 1) if index_a > index_b: index_a, index_b = index_b, index_a a = weighted_layers[index_a] b = weighted_layers[index_b] if a.input.shape == b.output.shape: return graph.to_add_skip_model(a, b) elif random() < 0.5: return graph.to_add_skip_model(a, b) else: return graph.to_concat_skip_model(a, b)
def add_model(self, model, x_train, y_train, x_test, y_test): """add one model while will be trained to history list Returns: History object. """ loss, accuracy = ModelTrainer(model, x_train, y_train, x_test, y_test, False).train_model(**self.trainer_args) accuracy += 0.005 * len( Graph(model, False).extract_descriptor().skip_connections) accuracy = min(accuracy, 1) model.save(os.path.join(self.path, str(self.model_count) + '.h5')) plot_model(model, to_file=os.path.join(self.path, str(self.model_count) + '.png'), show_shapes=True) model_id = self.model_count ret = {'model_id': model_id, 'loss': loss, 'accuracy': accuracy} self.history.append(ret) self.history_configs.append(extract_config(model)) self.model_count += 1 self.descriptors[Graph(model, False).extract_descriptor()] = True # Update best_model text file if model_id == self.get_best_model_id(): file = open(os.path.join(self.path, 'best_model.txt'), 'w') file.write('best model: ' + str(model_id)) file.close() if self.verbose: print('Model ID:', model_id) print('Loss:', loss) print('Accuracy', accuracy) return ret
def search(self, x_train, y_train, x_test, y_test): if not self.history: model = DefaultClassifierGenerator(self.n_classes, self.input_shape).generate( self.default_model_len, self.default_model_width) history_item = self.add_model(model, x_train, y_train, x_test, y_test) self.search_tree.add_child(-1, history_item['model_id']) graph = Graph(model) self.init_search_queue = [] # for child_graph in transform(graph): # self.init_search_queue.append((child_graph, history_item['model_id'])) self.init_gpr_x.append(graph.extract_descriptor()) self.init_gpr_y.append(history_item['accuracy']) pickle_to_file(self, os.path.join(self.path, 'searcher')) return if self.init_search_queue: graph, father_id = self.init_search_queue.pop() model = graph.produce_model() history_item = self.add_model(model, x_train, y_train, x_test, y_test) self.search_tree.add_child(father_id, history_item['model_id']) self.init_gpr_x.append(graph.extract_descriptor()) self.init_gpr_y.append(history_item['accuracy']) pickle_to_file(self, os.path.join(self.path, 'searcher')) return if not self.init_search_queue and not self.gpr.first_fitted: self.gpr.first_fit(self.init_gpr_x, self.init_gpr_y) new_model, father_id = self.maximize_acq() history_item = self.add_model(new_model, x_train, y_train, x_test, y_test) self.search_tree.add_child(father_id, history_item['model_id']) self.gpr.incremental_fit( Graph(new_model).extract_descriptor(), history_item['accuracy']) pickle_to_file(self, os.path.join(self.path, 'searcher'))
def test_deeper(): model = to_deeper_graph(Graph(get_conv_dense_model(), False)) assert isinstance(model, Graph)
def test_transform(): models = transform(Graph(get_pooling_model(), False)) assert len(models) == constant.N_NEIGHBOURS
def get_pooling_model(): graph = Graph((5, 5, 3), False) output_node_id = 0 output_node_id = graph.add_layer(StubReLU(), output_node_id) output_node_id = graph.add_layer(StubConv(3, 3, 3), output_node_id) output_node_id = graph.add_layer(StubBatchNormalization(3), output_node_id) output_node_id = graph.add_layer(StubDropout(constant.CONV_DROPOUT_RATE), output_node_id) output_node_id = graph.add_layer(StubReLU(), output_node_id) output_node_id = graph.add_layer(StubConv(3, 3, 3), output_node_id) output_node_id = graph.add_layer(StubBatchNormalization(3), output_node_id) output_node_id = graph.add_layer(StubDropout(constant.CONV_DROPOUT_RATE), output_node_id) output_node_id = graph.add_layer(StubPooling(2), output_node_id) output_node_id = graph.add_layer(StubReLU(), output_node_id) output_node_id = graph.add_layer(StubConv(3, 3, 3), output_node_id) output_node_id = graph.add_layer(StubBatchNormalization(3), output_node_id) output_node_id = graph.add_layer(StubDropout(constant.CONV_DROPOUT_RATE), output_node_id) output_node_id = graph.add_layer(StubReLU(), output_node_id) output_node_id = graph.add_layer(StubConv(3, 3, 3), output_node_id) output_node_id = graph.add_layer(StubBatchNormalization(3), output_node_id) output_node_id = graph.add_layer(StubDropout(constant.CONV_DROPOUT_RATE), output_node_id) output_node_id = graph.add_layer(StubFlatten(), output_node_id) output_node_id = graph.add_layer(StubReLU(), output_node_id) output_node_id = graph.add_layer(StubDense(graph.node_list[output_node_id].shape[0], 5), output_node_id) output_node_id = graph.add_layer(StubDropout(constant.DENSE_DROPOUT_RATE), output_node_id) output_node_id = graph.add_layer(StubReLU(), output_node_id) output_node_id = graph.add_layer(StubDense(5, 5), output_node_id) output_node_id = graph.add_layer(StubDropout(constant.DENSE_DROPOUT_RATE), output_node_id) graph.add_layer(StubSoftmax(), output_node_id) graph.produce_model().set_weight_to_graph() return graph
def test_edit_distance(): descriptor1 = Graph(get_add_skip_model()).extract_descriptor() descriptor2 = Graph(get_concat_skip_model()).extract_descriptor() assert edit_distance(descriptor1, descriptor2) == 2.0
def generate(self, model_len=Constant.MODEL_LEN, model_width=Constant.MODEL_WIDTH): pooling_len = int(model_len / 4) graph = Graph(self.input_shape, False) temp_input_channel = self.input_shape[-1] output_node_id = 0 for i in range(model_len): output_node_id = graph.add_layer(StubReLU(), output_node_id) output_node_id = graph.add_layer( StubConv(temp_input_channel, model_width, kernel_size=3), output_node_id) output_node_id = graph.add_layer( StubBatchNormalization(model_width), output_node_id) temp_input_channel = model_width if pooling_len == 0 or ((i + 1) % pooling_len == 0 and i != model_len - 1): output_node_id = graph.add_layer(StubPooling(), output_node_id) output_node_id = graph.add_layer(StubFlatten(), output_node_id) output_node_id = graph.add_layer( StubDropout(Constant.CONV_DROPOUT_RATE), output_node_id) output_node_id = graph.add_layer( StubDense(graph.node_list[output_node_id].shape[0], model_width), output_node_id) output_node_id = graph.add_layer(StubReLU(), output_node_id) output_node_id = graph.add_layer( StubDense(model_width, self.n_classes), output_node_id) graph.add_layer(StubSoftmax(), output_node_id) return graph
def test_legal_graph(): graph = Graph(get_pooling_model(), False) graph.to_add_skip_model(1, 5) assert legal_graph(graph) graph.to_add_skip_model(1, 5) assert not legal_graph(graph)
def test_deeper(): model = to_deeper_graph(Graph(to_stub_model(get_conv_dense_model()))) assert isinstance(model, Graph)
def test_wider(): model = to_wider_graph(Graph(get_pooling_model(), False)) assert isinstance(model, Graph)
def generate(self, model_len=Constant.MODEL_LEN, model_width=Constant.MODEL_WIDTH): pooling_len = int(model_len / 4) graph = Graph(self.input_shape, False) temp_input_channel = self.input_shape[-1] output_node_id = 0 for i in range(model_len): output_node_id = graph.add_layer(StubReLU(), output_node_id) output_node_id = graph.add_layer(StubConv(temp_input_channel, model_width, kernel_size=3), output_node_id) output_node_id = graph.add_layer(StubBatchNormalization(model_width), output_node_id) temp_input_channel = model_width if pooling_len == 0 or ((i + 1) % pooling_len == 0 and i != model_len - 1): output_node_id = graph.add_layer(StubPooling(), output_node_id) output_node_id = graph.add_layer(StubFlatten(), output_node_id) output_node_id = graph.add_layer(StubDropout(Constant.CONV_DROPOUT_RATE), output_node_id) output_node_id = graph.add_layer(StubDense(graph.node_list[output_node_id].shape[0], model_width), output_node_id) output_node_id = graph.add_layer(StubReLU(), output_node_id) graph.add_layer(StubDense(model_width, self.n_output_node), output_node_id) return graph
def test_legal_graph2(): graph = Graph(get_pooling_model(), False) graph.to_concat_skip_model(2, 6) assert legal_graph(graph) graph.to_concat_skip_model(2, 6) assert not legal_graph(graph)
def test_transform(): models = transform(Graph(get_pooling_model(), False)) assert len(models) == 12
def test_transform(): models = transform(Graph(to_stub_model(get_pooling_model()))) assert len(models) == constant.N_NEIGHBORS
def get_add_skip_model(): graph = Graph((32, 32, 3), False) output_node_id = 0 output_node_id = graph.add_layer(StubReLU(), output_node_id) output_node_id = graph.add_layer(StubConv(3, 3, 3), output_node_id) output_node_id = graph.add_layer(StubBatchNormalization(3), output_node_id) output_node_id = graph.add_layer(StubReLU(), output_node_id) output_node_id = graph.add_layer(StubConv(3, 3, 3), output_node_id) output_node_id = graph.add_layer(StubBatchNormalization(3), output_node_id) temp_node_id = output_node_id output_node_id = graph.add_layer(StubReLU(), output_node_id) output_node_id = graph.add_layer(StubConv(3, 3, 3), output_node_id) output_node_id = graph.add_layer(StubBatchNormalization(3), output_node_id) temp_node_id = graph.add_layer(StubReLU(), temp_node_id) temp_node_id = graph.add_layer(StubConv(3, 3, 1), temp_node_id) temp_node_id = graph.add_layer(StubBatchNormalization(3), temp_node_id) output_node_id = graph.add_layer(StubAdd(), [output_node_id, temp_node_id]) temp_node_id = output_node_id output_node_id = graph.add_layer(StubReLU(), output_node_id) output_node_id = graph.add_layer(StubConv(3, 3, 3), output_node_id) output_node_id = graph.add_layer(StubBatchNormalization(3), output_node_id) temp_node_id = graph.add_layer(StubReLU(), temp_node_id) temp_node_id = graph.add_layer(StubConv(3, 3, 1), temp_node_id) temp_node_id = graph.add_layer(StubBatchNormalization(3), temp_node_id) output_node_id = graph.add_layer(StubAdd(), [output_node_id, temp_node_id]) output_node_id = graph.add_layer(StubReLU(), output_node_id) output_node_id = graph.add_layer(StubConv(3, 3, 3), output_node_id) output_node_id = graph.add_layer(StubBatchNormalization(3), output_node_id) output_node_id = graph.add_layer(StubFlatten(), output_node_id) output_node_id = graph.add_layer(StubDropout(Constant.CONV_DROPOUT_RATE), output_node_id) output_node_id = graph.add_layer(StubReLU(), output_node_id) output_node_id = graph.add_layer(StubDense(graph.node_list[output_node_id].shape[0], 5), output_node_id) output_node_id = graph.add_layer(StubReLU(), output_node_id) output_node_id = graph.add_layer(StubDense(5, 5), output_node_id) graph.add_layer(StubSoftmax(), output_node_id) graph.produce_model().set_weight_to_graph() return graph
def test_wider(): model = to_wider_graph(Graph(to_stub_model(get_pooling_model()))) assert isinstance(model, Graph)
def test_skip(): model = to_skip_connection_graph(Graph(get_pooling_model(), False)) assert isinstance(model, Graph)