Ejemplo n.º 1
0
def fitness(indiv):
    optimizer_dict = indiv['optimizer']
    # if possible get computation time and add penalty
    if optimizer_dict['optimizer_type'] == 'adam':
        # lr_schedule = tensorflow.keras.optimizers.schedules.ExponentialDecay(initial_learning_rate=optimizer_dict['lr'],
        #                                                                      decay_steps=optimizer_dict['decay_steps'],
        #                                                                      decay_rate=optimizer_dict['decay'],
        #                                                                      staircase=optimizer_dict['staircase'])
        # opt = tensorflow.keras.optimizers.Adam(learning_rate=lr_schedule, beta_1=optimizer_dict['b1'],
        #                                        beta_2=optimizer_dict['b2'],
        #                                        epsilon=optimizer_dict['epsilon'])
        opt = tensorflow.keras.optimizers.Adam(learning_rate=optimizer_dict['lr'], beta_1=optimizer_dict['b1'],
                                               beta_2=optimizer_dict['b2'],
                                               epsilon=optimizer_dict['epsilon'])
    elif optimizer_dict['optimizer_type'] == 'sga':
        lr_schedule = tensorflow.keras.optimizers.schedules.ExponentialDecay(initial_learning_rate=optimizer_dict['lr'],
                                                                             decay_steps=optimizer_dict['decay_steps'],
                                                                             decay_rate=optimizer_dict['decay'],
                                                                             staircase=optimizer_dict['staircase'])
        opt = tensorflow.keras.optimizers.SGD(learning_rate=lr_schedule, momentum=optimizer_dict['momentum'],
                                              nesterov=optimizer_dict['nesterov'])

    try:
        model = Sequential.from_config(indiv['architecture'])
        print(model.layers)
        model.compile(loss='binary_crossentropy', optimizer=opt, metrics=['accuracy'])

        model.fit(data_split[0], data_split[2], batch_size=32, epochs=20, verbose=False)
        fit = model.evaluate(data_split[1], data_split[3])[1]
    except tensorflow.errors.ResourceExhaustedError:
        print("Went OOM")
        print(indiv['architecture'])
        fit = 0
    print(fit)

    try:
        del model
    except UnboundLocalError:
        pass
    tensorflow.keras.backend.clear_session()
    tensorflow.compat.v1.reset_default_graph()

    return (fit,)
Ejemplo n.º 2
0
def mutate_architecture(indiv):
    model = Sequential.from_config(indiv['architecture'])
    print(model.summary())
    rand_val = random.random()

    # TODO double check that this works
    flatten_index = next(index for index, layer in enumerate(model.layers)
                         if isinstance(layer, Flatten))

    if rand_val < 1:  # insert a random layer (2.5% chance)
        model = insert_new_layer(model, flatten_index)
    elif rand_val < 0.05:  # insert a copied existing layer (2.5% chance)
        insert_new_layer(model, flatten_index, is_copy=True)
    elif rand_val < 0.1:  # remove a random layer (5% chance)
        remove_random_layer(model, flatten_index)
    elif rand_val < 0.125:  # move an existing layer (2.5% chance)
        insert_new_layer(model,
                         flatten_index,
                         is_copy=True,
                         remove_original=True)
    # TODO decide if there should be an 'else:' here, so layer parameters are only tweaked if none of the above happened
    for i, layer in enumerate(
            model.layers[:-1]
    ):  # Last layer (dense with 2 nodes) should never be mutated
        if i < flatten_index:
            if isinstance(layer, Conv2D):
                mutate_conv(layer)
            elif isinstance(layer, (MaxPooling2D, AveragePooling2D)):
                mutate_pool(layer)
        elif i > flatten_index:
            if isinstance(layer, Dense):
                mutate_dense(layer)
            elif isinstance(layer, Dropout):
                mutate_dropout(layer)
    print(model.summary())
    indiv['architecture'] = model.get_config()
def architecture_crossover(indiv1, indiv2):
    new_model = Sequential()
    model1, model2 = Sequential.from_config(
        indiv1['architecture']), Sequential.from_config(indiv2['architecture'])
    i1, i2 = 1, 1  # Ignore the first layer

    while i1 < len(model1.layers) and i2 < len(model2.layers):
        layer1, layer2 = model1.layers.index(i1), model2.layers.index(i2)

        # When reaching end of one (or both) loops
        if i1 == len(model1.layers) - 1:
            if random.random(
            ) < 0.5:  # Add the final Dense layer and break out of loop
                new_model.add(layer1)
                break
            else:  # Add the other layer and increment, but continue to next loop run (doesn't increment i1)
                new_model.add(layer2)
                i2 += 1
                continue  # Note: even if both layers are final Dense, this should still cause loop to end
        elif i2 == len(model2.layers) - 1:
            if random.random(
            ) < 0.5:  # Add the final Dense layer and break out of loop
                new_model.add(layer2)
                break
            else:  # Add the other layer and increment, but continue to next loop run (doesn't increment i2)
                new_model.add(layer1)
                i1 += 1
                continue  # Note: even if both layers are final Dense, this should still cause loop to end

        # TODO maybe will not work as is because output shapes will change in new_model, and
        #  we are simply copy-pasting layers with their existing input shapes.
        #  Potential solution would be to recreate the layer with the same parameter
        if not isinstance(layer1, type(layer2)):  # If different layer types
            if isinstance(layer1, Flatten):
                if random.random(
                ) < 0.5:  # TODO Might not be best; will favor shorter children bc 50% chance of ending
                    new_model.add(Flatten)
                    while not isinstance(
                            model2.layers.index(i2), Flatten
                    ):  # increment i2 until reaching Flatten layer
                        i2 += 1
                else:
                    new_model.add(layer2)
                    i2 += 1
                    continue  # Don't want i1 to increment, because then it'd move onto Dense/Dropout sections
            elif isinstance(layer2, Flatten):
                if random.random(
                ) < 0.5:  # TODO Might not be best; will favor shorter children bc 50% chance of ending
                    new_model.add(Flatten)
                    while not isinstance(
                            model1.layers.index(i1), Flatten
                    ):  # increment i1 until reaching Flatten layer
                        i1 += 1
                else:
                    new_model.add(layer1)
                    i1 += 1
                    continue  # Don't want i2 to increment, because then it'd move onto Dense/Dropout sections
            else:
                new_model.add(random.choice([layer1, layer2]))
        else:  # If same layer types
            # TODO to save computation, only need to check for one of the layers (because they are both the same)
            if all(isinstance(x, Conv2D)
                   for x in (layer1, layer2)):  # If both are convolutional
                new_model.add(crossover_convolutional(layer1, layer2))
            elif all(
                    isinstance(x, (MaxPooling2D, AveragePooling2D))
                    for x in (layer1, layer2)):  # If both are pooling
                layer_type = random.choice([type(layer1), type(layer2)])
                new_model.add(crossover_pooling(layer_type, layer1, layer2))
            elif all(isinstance(x, Dense)
                     for x in (layer1, layer2)):  # If both are Dense
                new_model.add(Dense(round((layer1.units + layer2.units) / 2)))
            elif all(isinstance(x, Dropout)
                     for x in (layer1, layer2)):  # If both are Dropout
                new_model.add(Dropout((layer1.rate + layer2.rate) / 2))
            elif all(isinstance(x, Flatten) for x in (layer1, layer2)):
                new_model.add(Flatten())
            else:
                print(
                    "An unexpected edge case has occurred: no recombination possible for this layer"
                )

        i1 += 1
        i2 += 1

        return new_model