def crossover(self, parent_1, parent_2, crossover_index=-1): parent_1_co = parent_1[:-2] parent_2_co = parent_2[:-2] if self.weight_inheritance: parent_1_state_dict = parent_1.state_dict() parent_2_state_dict = parent_2.state_dict() parent_1_layer_list = sequential_to_layer_list(parent_1_co) parent_2_layer_list = sequential_to_layer_list(parent_2_co) valid_1, valid_2 = False, False n_attempts = 0 while not valid_1 and not valid_2: if crossover_index == -1: crossover_index = random.randrange(1, len(parent_1_co)) child_1_layer_list = parent_1_layer_list[: crossover_index] + parent_2_layer_list[ crossover_index:] child_2_layer_list = parent_2_layer_list[: crossover_index] + parent_1_layer_list[ crossover_index:] valid_1 = calculate_activation_sizes(child_1_layer_list, self.X_train.shape[1:]) != -1 valid_2 = calculate_activation_sizes(child_2_layer_list, self.X_train.shape[1:]) != -1 if n_attempts > self.max_crossover_attempts: break if valid_1 and valid_2: child_1, child_2 = generate_sequential(child_1_layer_list, self.X_train.shape[1:], len(np.unique(self.y_train)), append_flatten=True),\ generate_sequential(child_2_layer_list, self.X_train.shape[1:], len(np.unique(self.y_train)), append_flatten=True) if self.weight_inheritance: load_state(child_1, parent_1_state_dict, parent_2_state_dict, crossover_index) load_state(child_2, parent_2_state_dict, parent_1_state_dict, crossover_index) return child_1, child_2 elif valid_1 and not valid_2: child_1 = generate_sequential(child_1_layer_list, self.X_train.shape[1:], len(np.unique(self.y_train)), append_flatten=True) if self.weight_inheritance: load_state(child_1, parent_1_state_dict, parent_2_state_dict, crossover_index) return child_1, parent_2 elif valid_2 and not valid_1: child_2 = generate_sequential(child_2_layer_list, self.X_train.shape[1:], len(np.unique(self.y_train)), append_flatten=True) if self.weight_inheritance: load_state(child_2, parent_2_state_dict, parent_1_state_dict, crossover_index) return parent_1, child_2 else: logging.debug( f'Failed crossover attempt after {self.max_crossover_attempts} tries' ) return parent_1, parent_2
def get_best_individual(self): if len(self.ga.current_generation) == 0: raise Exception( 'Cannot return best individual before running the GA') return generate_sequential(self.ga.best_individual()[1], self.X_train.shape[1:], len(np.unique(self.y_train)), append_flatten=False)
def test_sequential_generation(self): input_shape = [3, 100, 100] layer_list = model_generation.generate_random_model( 10, (100, 100), AVAILABLE_MODULES, MAX_N_KERNELS, MAX_CONV_KERNEL_SIZE, MAX_CONV_STRIDE, MAX_CONV_DILATION, MAX_CONV_OUT_CHANNELS, MAX_POOLING_KERNEL_SIZE, MAX_POOLING_STRIDE) sequential = generate_sequential(layer_list, input_shape=input_shape, output_size=10) self.assertEqual(len(sequential), len(layer_list) + 2)
def create_individual(self, data): individual = generate_random_model( self.model_n_layers, self.X_train.shape[1:], self.available_modules, self.max_n_kernels, self.max_conv_kernel_size, self.max_conv_stride, self.max_conv_dilation, self.max_conv_out_channels, self.max_pooling_kernel_size, self.max_pooling_stride) return generate_sequential(individual, input_shape=self.X_train.shape[1:], output_size=len(np.unique(self.y_train)))
def test_sequential_to_layer_list(self): input_shape = [3, 100, 100] layer_list = model_generation.generate_random_model( 30, (100, 100), AVAILABLE_MODULES, MAX_N_KERNELS, MAX_CONV_KERNEL_SIZE, MAX_CONV_STRIDE, MAX_CONV_DILATION, MAX_CONV_OUT_CHANNELS, MAX_POOLING_KERNEL_SIZE, MAX_POOLING_STRIDE) sequential = generate_sequential(layer_list, input_shape=input_shape, output_size=10) new_layer_list = sequential_to_layer_list(sequential) for new_layer, layer in zip(new_layer_list, layer_list): self.assertEqual(type(new_layer), type(layer))
def test_score_model(self): input_shape = [1, 20] layer_list = model_generation.generate_random_model( 10, input_shape, AVAILABLE_MODULES, MAX_N_KERNELS, MAX_CONV_KERNEL_SIZE, MAX_CONV_STRIDE, MAX_CONV_DILATION, MAX_CONV_OUT_CHANNELS, MAX_POOLING_KERNEL_SIZE, MAX_POOLING_STRIDE) X, y = make_classification(100) X = X[:, None, :, None] sequential = generate_sequential(layer_list, input_shape=input_shape, output_size=10) print( get_model_score(sequential, X[:50], y[:50], X[50:], y[50:], 32, 1))
def mutate(self, individual): if self.weight_inheritance: individual_state_dict = individual.state_dict() individual_layer_list = sequential_to_layer_list(individual) mutate_index = random.randrange(len(individual)) valid_model = False while not valid_model: # this will eventually work so infinite loop is OK individual_layer_list[mutate_index] = random_initialize_layer( random.choice(self.available_modules), self.X_train.shape[1:], self.max_n_kernels, self.max_conv_kernel_size, self.max_conv_stride, self.max_conv_dilation, self.max_conv_out_channels, self.max_pooling_kernel_size, self.max_pooling_stride) valid_model = calculate_activation_sizes( individual_layer_list, self.X_train.shape[1:]) != -1 individual = generate_sequential(individual_layer_list, self.X_train.shape[1:], len(np.unique(self.y_train)), append_flatten=False) if self.weight_inheritance: load_state(individual, individual_state_dict, individual_state_dict, 0) return individual
def test_weight_inheritance(self): X, y = make_classification(100, n_features=100) X = X[:, None, :, None] ea = EasyNASGA(X, y, population_size=2, generations=1, weight_inheritance=True) input_shape = [1, 100, 1] layer_list_1 = [ init_conv_layer(input_shape, n_kernels=2, conv_kernel_size=(10, 1), conv_stride=(1, 1), conv_dilation=(1, 1), conv_out_channels=10, random_sizes=False), init_conv_layer(input_shape, n_kernels=2, conv_kernel_size=(10, 1), conv_stride=(1, 1), conv_dilation=(1, 1), conv_out_channels=10, random_sizes=False), init_conv_layer(input_shape, n_kernels=2, conv_kernel_size=(10, 1), conv_stride=(1, 1), conv_dilation=(1, 1), conv_out_channels=10, random_sizes=False) ] layer_list_2 = [ init_conv_layer(input_shape, n_kernels=2, conv_kernel_size=(20, 1), conv_stride=(1, 1), conv_dilation=(1, 1), conv_out_channels=10, random_sizes=False), init_conv_layer(input_shape, n_kernels=2, conv_kernel_size=(20, 1), conv_stride=(1, 1), conv_dilation=(1, 1), conv_out_channels=10, random_sizes=False), init_conv_layer(input_shape, n_kernels=2, conv_kernel_size=(20, 1), conv_stride=(1, 1), conv_dilation=(1, 1), conv_out_channels=10, random_sizes=False) ] seq_1 = generate_sequential(layer_list_1, input_shape=input_shape, output_size=10) seq_2 = generate_sequential(layer_list_2, input_shape=input_shape, output_size=10) cut_point = 1 child_1, child_2 = ea.crossover(seq_1, seq_2, cut_point) seq_1_child = generate_sequential(child_1, input_shape=input_shape, output_size=10, append_flatten=False) seq_2_child = generate_sequential(child_2, input_shape=input_shape, output_size=10, append_flatten=False) assert (seq_1_child._modules['0'].weight == seq_1._modules['0'].weight ).all() assert (seq_1_child._modules['2'].weight == seq_2._modules['2'].weight ).all() assert (seq_2_child._modules['0'].weight == seq_2._modules['0'].weight ).all() assert (seq_2_child._modules['2'].weight == seq_1._modules['2'].weight ).all()