예제 #1
0
def mutate_network(n_mutations, model, model_idx, batch, max_n_params, times):
    """
    Function to mutate the initial network n_mutations times

    Args:
        n_mutations: number of mutations to apply
        model: initial model to mutate
        model_idx: index of the model
        batch: first minibatch of the training set
        max_n_params: number of parameters threshold
        times: a list to keep track of mutating time, training time and overall processing time
    Returns:
        Returns the mutated model and updated times 
    """
    
    for _ in range(0, n_mutations):

        time_mut_s = time.time()

        # Do not mutate the first child. The mutations might make it worse, so keep it 
        # If it is the winner of the given round, it might be mutated later.
        if model_idx != 0:
            
            # Given a model, mutate and return the new model
            model = network_operators.MutateNetwork(model, batch, mutations_probs)

            time_mut_e = time.time()

            times[1] = times[1] + (time_mut_e - time_mut_s)

            # Check the number of parameters reached. If it is more than the threshold stop mutation
            pytorch_total_params = sum(p.numel() for p in model['pytorch_model'].parameters() if p.requires_grad)

            if pytorch_total_params > max_n_params:
                break
    return times, model
예제 #2
0
def EvalNextGen(n_models, n_mutations, n_epochs_total, initial_model, savepath,
                folder_out):
    """
    generate and train children, update best model

    n_models = number of child models
    n_mutations = number of mutations/network operators to be applied per model_descriptor
    n_epochs_total = number of epochs for training in total
    initial model = current best model_descriptor
    savepath = where to save stuff
    folder_out = where to save the general files for one run
    """

    # epochs for training each model
    n_epochs_each = int(np.floor(n_epochs_total / n_models))

    print('Train all models for', int(n_epochs_each), 'epochs.')

    init_weights_path = savepath + 'ini_weights'
    torch.save(initial_model['pytorch_model'].state_dict(), init_weights_path)

    performance = np.zeros(shape=(n_models, ))
    descriptors = []

    for model_idx in range(0, n_models):
        print('model idx' + str(model_idx))

        # save some data
        time_overall_s = time.time()

        pytorch_model = ConvNet(initial_model['model_descriptor'])
        pytorch_model.cuda()
        pytorch_model.load_state_dict(torch.load(init_weights_path),
                                      strict=False)

        model = {
            'pytorch_model': pytorch_model,
            'model_descriptor':
            copy.deepcopy(initial_model['model_descriptor']),
            'topo_ordering': pytorch_model.topo_ordering
        }

        descriptors.append(model['model_descriptor'])
        mutations_applied = []
        # overall , mutations, training
        times = [0, 0, 0]

        # apply operators
        for i in range(0, n_mutations):

            time_mut_s = time.time()

            # we don't mutate the first child!
            if model_idx != 0:

                mutations_probs = np.array([1, 1, 1, 1, 1, 0])
                [model, mutation_type,
                 params] = network_operators.MutateNetwork(
                     model, batch, mutation_probs=mutations_probs)
                mutations_applied.append(mutation_type)

                time_mut_e = time.time()
                times[1] = times[1] + (time_mut_e - time_mut_s)

                pytorch_total_params = sum(
                    p.numel() for p in model['pytorch_model'].parameters()
                    if p.requires_grad)

                if pytorch_total_params > max_n_params:
                    break

        time_train_s = time.time()

        # train the child
        model['pytorch_model'].fit(trainloader, epochs=n_epochs_each)
        time_train_e = time.time()
        times[2] = times[2] + (time_train_e - time_train_s)

        # evaluate the child
        performance[model_idx] = model['pytorch_model'].evaluate(validloader)

        pytorch_total_params_child = sum(
            p.numel() for p in model['pytorch_model'].parameters()
            if p.requires_grad)
        with open(folder_out + "performance.txt", "a+") as f_out:
            f_out.write('child ' + str(model_idx) + ' performance ' +
                        str(performance[model_idx]) + ' num params ' +
                        str(pytorch_total_params_child) + '\n')
        torch.save(model['pytorch_model'].state_dict(),
                   savepath + 'model_' + str(model_idx))

        descriptors[model_idx] = copy.deepcopy(model['model_descriptor'])

        time_overall_e = time.time()
        times[0] = times[0] + (time_overall_e - time_overall_s)

        np.savetxt(savepath + 'model_' + str(model_idx) + '_times', times)
        descriptor_file = open(
            savepath + 'model_' + str(model_idx) + '_model_descriptor.txt',
            'w')

        for layer in model['model_descriptor']['layers']:
            layer_str = str(layer)
            descriptor_file.write(layer_str + "\n")
        descriptor_file.close()

        # delete the model (attempt to clean the memory)
        del model['pytorch_model']
        del model
        torch.cuda.empty_cache()

    #update the current model to be best model
    winner_idx = np.argsort(performance)[-1]

    if performance[winner_idx] > 0:

        print('Winner model index:' + str(winner_idx))
        print("winner's performance", performance[winner_idx])
        pytorch_model = ConvNet(descriptors[winner_idx])
        pytorch_model.cuda()

        pytorch_model.load_state_dict(torch.load(savepath + 'model_' +
                                                 str(winner_idx)),
                                      strict=False)
        model = {
            'pytorch_model': pytorch_model,
            'model_descriptor': copy.deepcopy(descriptors[winner_idx]),
            'topo_ordering': pytorch_model.topo_ordering
        }

    else:
        print('no trainable models found ')
        model = initial_model

    with open(folder_out + "performance.txt", "a+") as f_out:
        f_out.write("****************************\n")

    return model, performance[winner_idx]
예제 #3
0
def SpecialChild(n_models, n_mutations, n_epochs_total, initial_model,
                 savepath, folder_out):
    """
    generate and train children, update best model

    n_models = number of child models
    n_mutations = number of mutations/network operators to be applied per model_descriptor
    n_epochs_total = number of epochs for training in total
    initial model = current best model_descriptor
    savepath = where to save stuff
    folder_out = where to save the general files for one run
    """

    # epochs for training each model
    n_epochs_each = int(n_epochs_total)

    print('Train all models for', int(n_epochs_each), 'epochs.')

    init_weights_path = savepath + 'ini_weights'
    torch.save(initial_model['pytorch_model'].state_dict(), init_weights_path)

    performance = np.zeros(shape=(n_models, ))
    descriptors = []

    sch_epochs = 0
    for s in range(0, int(np.log2(n_models)) + 1):
        sch_epochs += n_epochs_total * (2**(s))
    print("number of steps: ", sch_epochs)

    # how many steps per epoch
    n_minibatches = len(trainloader)

    sch_epochs = sch_epochs * n_minibatches

    for model_idx in range(0, n_models):
        print('\nmodel idx ' + str(model_idx))

        # save some data
        time_overall_s = time.time()

        pytorch_model = ConvNet(initial_model['model_descriptor'])
        pytorch_model.cuda()
        pytorch_model.load_state_dict(torch.load(init_weights_path),
                                      strict=False)

        model = {
            'pytorch_model': pytorch_model,
            'model_descriptor':
            copy.deepcopy(initial_model['model_descriptor']),
            'topo_ordering': pytorch_model.topo_ordering
        }
        # set scheduler
        model[
            'pytorch_model'].scheduler = optim.lr_scheduler.CosineAnnealingLR(
                model['pytorch_model'].optimizer,
                T_max=sch_epochs,
                eta_min=0.0,
                last_epoch=-1)

        descriptors.append(model['model_descriptor'])
        mutations_applied = []
        # overall , mutations, training
        times = [0, 0, 0]

        # apply operators
        for i in range(0, n_mutations):

            time_mut_s = time.time()

            # we don't mutate the first child!
            if model_idx != 0:

                mutations_probs = np.array([1, 1, 1, 1, 1, 0])
                [model, mutation_type,
                 params] = network_operators.MutateNetwork(
                     model, batch, mutation_probs=mutations_probs)
                mutations_applied.append(mutation_type)
                time_mut_e = time.time()

                times[1] = times[1] + (time_mut_e - time_mut_s)

                pytorch_total_params = sum(
                    p.numel() for p in model['pytorch_model'].parameters()
                    if p.requires_grad)

                if pytorch_total_params > max_n_params:
                    break

        # initial short training of the children
        time_train_s = time.time()
        model['pytorch_model'].fit_with_sch(trainloader, epochs=n_epochs_each)

        time_train_e = time.time()
        times[2] = times[2] + (time_train_e - time_train_s)

        performance[model_idx] = model['pytorch_model'].evaluate(validloader)

        pytorch_total_params_child = sum(
            p.numel() for p in model['pytorch_model'].parameters()
            if p.requires_grad)
        with open(folder_out + "performance.txt", "a+") as f_out:
            f_out.write('child ' + str(model_idx) + ' performance ' +
                        str(performance[model_idx]) + ' num params ' +
                        str(pytorch_total_params_child) + '\n')

        torch.save(model['pytorch_model'].state_dict(),
                   savepath + 'model_' + str(model_idx))
        torch.save(model['pytorch_model'].scheduler.state_dict(),
                   savepath + 'model_' + str(model_idx) + '_scheduler')
        torch.save(model['pytorch_model'].optimizer.state_dict(),
                   savepath + 'model_' + str(model_idx) + '_optimizer')

        descriptors[model_idx] = copy.deepcopy(model['model_descriptor'])

        time_overall_e = time.time()

        times[0] = times[0] + (time_overall_e - time_overall_s)

        np.savetxt(savepath + 'model_' + str(model_idx) + '_times', times)

        descriptor_file = open(
            savepath + 'model_' + str(model_idx) + '_model_descriptor.txt',
            'w')

        for layer in model['model_descriptor']['layers']:
            layer_str = str(layer)
            descriptor_file.write(layer_str + "\n")
        descriptor_file.close()
        del model['pytorch_model']
        del model
        torch.cuda.empty_cache()

    # continue SH steps
    sorted_children = np.argsort(performance)

    n_children = len(sorted_children)
    n_epochs_train_children = n_epochs_each

    while n_children > 1:

        # pick the best halve of the children
        best_children = sorted_children[(n_children // 2):]
        # increase the training budget for them
        n_epochs_train_children = n_epochs_train_children * 2
        print("\nbest_children", best_children)
        print("n_epochs_train_children", n_epochs_train_children)

        for child in best_children:
            print("child ", child)
            # load the child parameters
            pytorch_model = ConvNet(descriptors[child])
            pytorch_model.cuda()
            pytorch_model.load_state_dict(torch.load(savepath + 'model_' +
                                                     str(child)),
                                          strict=False)
            pytorch_model.scheduler.load_state_dict(
                torch.load(savepath + 'model_' + str(child) + '_scheduler'))
            pytorch_model.optimizer.load_state_dict(
                torch.load(savepath + 'model_' + str(child) + '_optimizer'))
            model = {
                'pytorch_model': pytorch_model,
                'model_descriptor': copy.deepcopy(descriptors[child]),
                'topo_ordering': pytorch_model.topo_ordering
            }

            # train a child
            model['pytorch_model'].fit_with_sch(trainloader,
                                                epochs=n_epochs_train_children)

            # evaluate a child
            performance[child] = model['pytorch_model'].evaluate(validloader)
            pytorch_total_params_child = sum(
                p.numel() for p in model['pytorch_model'].parameters()
                if p.requires_grad)
            with open(folder_out + "performance.txt", "a+") as f_out:
                f_out.write('child ' + str(child) + ' performance ' +
                            str(performance[child]) + ' num params ' +
                            str(pytorch_total_params_child) + '\n')

            # update a child model
            torch.save(model['pytorch_model'].state_dict(),
                       savepath + 'model_' + str(child))
            torch.save(model['pytorch_model'].scheduler.state_dict(),
                       savepath + 'model_' + str(child) + '_scheduler')
            torch.save(model['pytorch_model'].optimizer.state_dict(),
                       savepath + 'model_' + str(child) + '_optimizer')

            # delete the model (attempt to clean the memory)
            del model['pytorch_model']
            del model
            torch.cuda.empty_cache()

        print("\nperformance ", performance)
        temp_children_array = np.argsort(performance)
        sorted_children = []

        for i, t in enumerate(temp_children_array):
            if t in best_children:
                sorted_children.append(t)

        print("sorted_children", sorted_children)
        n_children = len(sorted_children)

    print("it should be the winner model ", sorted_children[0])
    print("it should be the best performance", performance[sorted_children[0]])

    # load the best child
    the_best_child = sorted_children[0]

    pytorch_model = ConvNet(descriptors[the_best_child])
    pytorch_model.cuda()
    pytorch_model.load_state_dict(torch.load(savepath + 'model_' +
                                             str(the_best_child)),
                                  strict=False)
    model = {
        'pytorch_model': pytorch_model,
        'model_descriptor': copy.deepcopy(descriptors[the_best_child]),
        'topo_ordering': pytorch_model.topo_ordering
    }

    with open(folder_out + "performance.txt", "a+") as f_out:
        f_out.write("****************************\n")

    return model, performance[sorted_children[0]]