def fitness(indiv): optimizer_dict = indiv['optimizer'] # if possible get computation time and add penalty if optimizer_dict['optimizer_type'] == 'adam': # lr_schedule = tensorflow.keras.optimizers.schedules.ExponentialDecay(initial_learning_rate=optimizer_dict['lr'], # decay_steps=optimizer_dict['decay_steps'], # decay_rate=optimizer_dict['decay'], # staircase=optimizer_dict['staircase']) # opt = tensorflow.keras.optimizers.Adam(learning_rate=lr_schedule, beta_1=optimizer_dict['b1'], # beta_2=optimizer_dict['b2'], # epsilon=optimizer_dict['epsilon']) opt = tensorflow.keras.optimizers.Adam(learning_rate=optimizer_dict['lr'], beta_1=optimizer_dict['b1'], beta_2=optimizer_dict['b2'], epsilon=optimizer_dict['epsilon']) elif optimizer_dict['optimizer_type'] == 'sga': lr_schedule = tensorflow.keras.optimizers.schedules.ExponentialDecay(initial_learning_rate=optimizer_dict['lr'], decay_steps=optimizer_dict['decay_steps'], decay_rate=optimizer_dict['decay'], staircase=optimizer_dict['staircase']) opt = tensorflow.keras.optimizers.SGD(learning_rate=lr_schedule, momentum=optimizer_dict['momentum'], nesterov=optimizer_dict['nesterov']) try: model = Sequential.from_config(indiv['architecture']) print(model.layers) model.compile(loss='binary_crossentropy', optimizer=opt, metrics=['accuracy']) model.fit(data_split[0], data_split[2], batch_size=32, epochs=20, verbose=False) fit = model.evaluate(data_split[1], data_split[3])[1] except tensorflow.errors.ResourceExhaustedError: print("Went OOM") print(indiv['architecture']) fit = 0 print(fit) try: del model except UnboundLocalError: pass tensorflow.keras.backend.clear_session() tensorflow.compat.v1.reset_default_graph() return (fit,)
def mutate_architecture(indiv): model = Sequential.from_config(indiv['architecture']) print(model.summary()) rand_val = random.random() # TODO double check that this works flatten_index = next(index for index, layer in enumerate(model.layers) if isinstance(layer, Flatten)) if rand_val < 1: # insert a random layer (2.5% chance) model = insert_new_layer(model, flatten_index) elif rand_val < 0.05: # insert a copied existing layer (2.5% chance) insert_new_layer(model, flatten_index, is_copy=True) elif rand_val < 0.1: # remove a random layer (5% chance) remove_random_layer(model, flatten_index) elif rand_val < 0.125: # move an existing layer (2.5% chance) insert_new_layer(model, flatten_index, is_copy=True, remove_original=True) # TODO decide if there should be an 'else:' here, so layer parameters are only tweaked if none of the above happened for i, layer in enumerate( model.layers[:-1] ): # Last layer (dense with 2 nodes) should never be mutated if i < flatten_index: if isinstance(layer, Conv2D): mutate_conv(layer) elif isinstance(layer, (MaxPooling2D, AveragePooling2D)): mutate_pool(layer) elif i > flatten_index: if isinstance(layer, Dense): mutate_dense(layer) elif isinstance(layer, Dropout): mutate_dropout(layer) print(model.summary()) indiv['architecture'] = model.get_config()
def architecture_crossover(indiv1, indiv2): new_model = Sequential() model1, model2 = Sequential.from_config( indiv1['architecture']), Sequential.from_config(indiv2['architecture']) i1, i2 = 1, 1 # Ignore the first layer while i1 < len(model1.layers) and i2 < len(model2.layers): layer1, layer2 = model1.layers.index(i1), model2.layers.index(i2) # When reaching end of one (or both) loops if i1 == len(model1.layers) - 1: if random.random( ) < 0.5: # Add the final Dense layer and break out of loop new_model.add(layer1) break else: # Add the other layer and increment, but continue to next loop run (doesn't increment i1) new_model.add(layer2) i2 += 1 continue # Note: even if both layers are final Dense, this should still cause loop to end elif i2 == len(model2.layers) - 1: if random.random( ) < 0.5: # Add the final Dense layer and break out of loop new_model.add(layer2) break else: # Add the other layer and increment, but continue to next loop run (doesn't increment i2) new_model.add(layer1) i1 += 1 continue # Note: even if both layers are final Dense, this should still cause loop to end # TODO maybe will not work as is because output shapes will change in new_model, and # we are simply copy-pasting layers with their existing input shapes. # Potential solution would be to recreate the layer with the same parameter if not isinstance(layer1, type(layer2)): # If different layer types if isinstance(layer1, Flatten): if random.random( ) < 0.5: # TODO Might not be best; will favor shorter children bc 50% chance of ending new_model.add(Flatten) while not isinstance( model2.layers.index(i2), Flatten ): # increment i2 until reaching Flatten layer i2 += 1 else: new_model.add(layer2) i2 += 1 continue # Don't want i1 to increment, because then it'd move onto Dense/Dropout sections elif isinstance(layer2, Flatten): if random.random( ) < 0.5: # TODO Might not be best; will favor shorter children bc 50% chance of ending new_model.add(Flatten) while not isinstance( model1.layers.index(i1), Flatten ): # increment i1 until reaching Flatten layer i1 += 1 else: new_model.add(layer1) i1 += 1 continue # Don't want i2 to increment, because then it'd move onto Dense/Dropout sections else: new_model.add(random.choice([layer1, layer2])) else: # If same layer types # TODO to save computation, only need to check for one of the layers (because they are both the same) if all(isinstance(x, Conv2D) for x in (layer1, layer2)): # If both are convolutional new_model.add(crossover_convolutional(layer1, layer2)) elif all( isinstance(x, (MaxPooling2D, AveragePooling2D)) for x in (layer1, layer2)): # If both are pooling layer_type = random.choice([type(layer1), type(layer2)]) new_model.add(crossover_pooling(layer_type, layer1, layer2)) elif all(isinstance(x, Dense) for x in (layer1, layer2)): # If both are Dense new_model.add(Dense(round((layer1.units + layer2.units) / 2))) elif all(isinstance(x, Dropout) for x in (layer1, layer2)): # If both are Dropout new_model.add(Dropout((layer1.rate + layer2.rate) / 2)) elif all(isinstance(x, Flatten) for x in (layer1, layer2)): new_model.add(Flatten()) else: print( "An unexpected edge case has occurred: no recombination possible for this layer" ) i1 += 1 i2 += 1 return new_model