def mutate_network(n_mutations, model, model_idx, batch, max_n_params, times): """ Function to mutate the initial network n_mutations times Args: n_mutations: number of mutations to apply model: initial model to mutate model_idx: index of the model batch: first minibatch of the training set max_n_params: number of parameters threshold times: a list to keep track of mutating time, training time and overall processing time Returns: Returns the mutated model and updated times """ for _ in range(0, n_mutations): time_mut_s = time.time() # Do not mutate the first child. The mutations might make it worse, so keep it # If it is the winner of the given round, it might be mutated later. if model_idx != 0: # Given a model, mutate and return the new model model = network_operators.MutateNetwork(model, batch, mutations_probs) time_mut_e = time.time() times[1] = times[1] + (time_mut_e - time_mut_s) # Check the number of parameters reached. If it is more than the threshold stop mutation pytorch_total_params = sum(p.numel() for p in model['pytorch_model'].parameters() if p.requires_grad) if pytorch_total_params > max_n_params: break return times, model
def EvalNextGen(n_models, n_mutations, n_epochs_total, initial_model, savepath, folder_out): """ generate and train children, update best model n_models = number of child models n_mutations = number of mutations/network operators to be applied per model_descriptor n_epochs_total = number of epochs for training in total initial model = current best model_descriptor savepath = where to save stuff folder_out = where to save the general files for one run """ # epochs for training each model n_epochs_each = int(np.floor(n_epochs_total / n_models)) print('Train all models for', int(n_epochs_each), 'epochs.') init_weights_path = savepath + 'ini_weights' torch.save(initial_model['pytorch_model'].state_dict(), init_weights_path) performance = np.zeros(shape=(n_models, )) descriptors = [] for model_idx in range(0, n_models): print('model idx' + str(model_idx)) # save some data time_overall_s = time.time() pytorch_model = ConvNet(initial_model['model_descriptor']) pytorch_model.cuda() pytorch_model.load_state_dict(torch.load(init_weights_path), strict=False) model = { 'pytorch_model': pytorch_model, 'model_descriptor': copy.deepcopy(initial_model['model_descriptor']), 'topo_ordering': pytorch_model.topo_ordering } descriptors.append(model['model_descriptor']) mutations_applied = [] # overall , mutations, training times = [0, 0, 0] # apply operators for i in range(0, n_mutations): time_mut_s = time.time() # we don't mutate the first child! if model_idx != 0: mutations_probs = np.array([1, 1, 1, 1, 1, 0]) [model, mutation_type, params] = network_operators.MutateNetwork( model, batch, mutation_probs=mutations_probs) mutations_applied.append(mutation_type) time_mut_e = time.time() times[1] = times[1] + (time_mut_e - time_mut_s) pytorch_total_params = sum( p.numel() for p in model['pytorch_model'].parameters() if p.requires_grad) if pytorch_total_params > max_n_params: break time_train_s = time.time() # train the child model['pytorch_model'].fit(trainloader, epochs=n_epochs_each) time_train_e = time.time() times[2] = times[2] + (time_train_e - time_train_s) # evaluate the child performance[model_idx] = model['pytorch_model'].evaluate(validloader) pytorch_total_params_child = sum( p.numel() for p in model['pytorch_model'].parameters() if p.requires_grad) with open(folder_out + "performance.txt", "a+") as f_out: f_out.write('child ' + str(model_idx) + ' performance ' + str(performance[model_idx]) + ' num params ' + str(pytorch_total_params_child) + '\n') torch.save(model['pytorch_model'].state_dict(), savepath + 'model_' + str(model_idx)) descriptors[model_idx] = copy.deepcopy(model['model_descriptor']) time_overall_e = time.time() times[0] = times[0] + (time_overall_e - time_overall_s) np.savetxt(savepath + 'model_' + str(model_idx) + '_times', times) descriptor_file = open( savepath + 'model_' + str(model_idx) + '_model_descriptor.txt', 'w') for layer in model['model_descriptor']['layers']: layer_str = str(layer) descriptor_file.write(layer_str + "\n") descriptor_file.close() # delete the model (attempt to clean the memory) del model['pytorch_model'] del model torch.cuda.empty_cache() #update the current model to be best model winner_idx = np.argsort(performance)[-1] if performance[winner_idx] > 0: print('Winner model index:' + str(winner_idx)) print("winner's performance", performance[winner_idx]) pytorch_model = ConvNet(descriptors[winner_idx]) pytorch_model.cuda() pytorch_model.load_state_dict(torch.load(savepath + 'model_' + str(winner_idx)), strict=False) model = { 'pytorch_model': pytorch_model, 'model_descriptor': copy.deepcopy(descriptors[winner_idx]), 'topo_ordering': pytorch_model.topo_ordering } else: print('no trainable models found ') model = initial_model with open(folder_out + "performance.txt", "a+") as f_out: f_out.write("****************************\n") return model, performance[winner_idx]
def SpecialChild(n_models, n_mutations, n_epochs_total, initial_model, savepath, folder_out): """ generate and train children, update best model n_models = number of child models n_mutations = number of mutations/network operators to be applied per model_descriptor n_epochs_total = number of epochs for training in total initial model = current best model_descriptor savepath = where to save stuff folder_out = where to save the general files for one run """ # epochs for training each model n_epochs_each = int(n_epochs_total) print('Train all models for', int(n_epochs_each), 'epochs.') init_weights_path = savepath + 'ini_weights' torch.save(initial_model['pytorch_model'].state_dict(), init_weights_path) performance = np.zeros(shape=(n_models, )) descriptors = [] sch_epochs = 0 for s in range(0, int(np.log2(n_models)) + 1): sch_epochs += n_epochs_total * (2**(s)) print("number of steps: ", sch_epochs) # how many steps per epoch n_minibatches = len(trainloader) sch_epochs = sch_epochs * n_minibatches for model_idx in range(0, n_models): print('\nmodel idx ' + str(model_idx)) # save some data time_overall_s = time.time() pytorch_model = ConvNet(initial_model['model_descriptor']) pytorch_model.cuda() pytorch_model.load_state_dict(torch.load(init_weights_path), strict=False) model = { 'pytorch_model': pytorch_model, 'model_descriptor': copy.deepcopy(initial_model['model_descriptor']), 'topo_ordering': pytorch_model.topo_ordering } # set scheduler model[ 'pytorch_model'].scheduler = optim.lr_scheduler.CosineAnnealingLR( model['pytorch_model'].optimizer, T_max=sch_epochs, eta_min=0.0, last_epoch=-1) descriptors.append(model['model_descriptor']) mutations_applied = [] # overall , mutations, training times = [0, 0, 0] # apply operators for i in range(0, n_mutations): time_mut_s = time.time() # we don't mutate the first child! if model_idx != 0: mutations_probs = np.array([1, 1, 1, 1, 1, 0]) [model, mutation_type, params] = network_operators.MutateNetwork( model, batch, mutation_probs=mutations_probs) mutations_applied.append(mutation_type) time_mut_e = time.time() times[1] = times[1] + (time_mut_e - time_mut_s) pytorch_total_params = sum( p.numel() for p in model['pytorch_model'].parameters() if p.requires_grad) if pytorch_total_params > max_n_params: break # initial short training of the children time_train_s = time.time() model['pytorch_model'].fit_with_sch(trainloader, epochs=n_epochs_each) time_train_e = time.time() times[2] = times[2] + (time_train_e - time_train_s) performance[model_idx] = model['pytorch_model'].evaluate(validloader) pytorch_total_params_child = sum( p.numel() for p in model['pytorch_model'].parameters() if p.requires_grad) with open(folder_out + "performance.txt", "a+") as f_out: f_out.write('child ' + str(model_idx) + ' performance ' + str(performance[model_idx]) + ' num params ' + str(pytorch_total_params_child) + '\n') torch.save(model['pytorch_model'].state_dict(), savepath + 'model_' + str(model_idx)) torch.save(model['pytorch_model'].scheduler.state_dict(), savepath + 'model_' + str(model_idx) + '_scheduler') torch.save(model['pytorch_model'].optimizer.state_dict(), savepath + 'model_' + str(model_idx) + '_optimizer') descriptors[model_idx] = copy.deepcopy(model['model_descriptor']) time_overall_e = time.time() times[0] = times[0] + (time_overall_e - time_overall_s) np.savetxt(savepath + 'model_' + str(model_idx) + '_times', times) descriptor_file = open( savepath + 'model_' + str(model_idx) + '_model_descriptor.txt', 'w') for layer in model['model_descriptor']['layers']: layer_str = str(layer) descriptor_file.write(layer_str + "\n") descriptor_file.close() del model['pytorch_model'] del model torch.cuda.empty_cache() # continue SH steps sorted_children = np.argsort(performance) n_children = len(sorted_children) n_epochs_train_children = n_epochs_each while n_children > 1: # pick the best halve of the children best_children = sorted_children[(n_children // 2):] # increase the training budget for them n_epochs_train_children = n_epochs_train_children * 2 print("\nbest_children", best_children) print("n_epochs_train_children", n_epochs_train_children) for child in best_children: print("child ", child) # load the child parameters pytorch_model = ConvNet(descriptors[child]) pytorch_model.cuda() pytorch_model.load_state_dict(torch.load(savepath + 'model_' + str(child)), strict=False) pytorch_model.scheduler.load_state_dict( torch.load(savepath + 'model_' + str(child) + '_scheduler')) pytorch_model.optimizer.load_state_dict( torch.load(savepath + 'model_' + str(child) + '_optimizer')) model = { 'pytorch_model': pytorch_model, 'model_descriptor': copy.deepcopy(descriptors[child]), 'topo_ordering': pytorch_model.topo_ordering } # train a child model['pytorch_model'].fit_with_sch(trainloader, epochs=n_epochs_train_children) # evaluate a child performance[child] = model['pytorch_model'].evaluate(validloader) pytorch_total_params_child = sum( p.numel() for p in model['pytorch_model'].parameters() if p.requires_grad) with open(folder_out + "performance.txt", "a+") as f_out: f_out.write('child ' + str(child) + ' performance ' + str(performance[child]) + ' num params ' + str(pytorch_total_params_child) + '\n') # update a child model torch.save(model['pytorch_model'].state_dict(), savepath + 'model_' + str(child)) torch.save(model['pytorch_model'].scheduler.state_dict(), savepath + 'model_' + str(child) + '_scheduler') torch.save(model['pytorch_model'].optimizer.state_dict(), savepath + 'model_' + str(child) + '_optimizer') # delete the model (attempt to clean the memory) del model['pytorch_model'] del model torch.cuda.empty_cache() print("\nperformance ", performance) temp_children_array = np.argsort(performance) sorted_children = [] for i, t in enumerate(temp_children_array): if t in best_children: sorted_children.append(t) print("sorted_children", sorted_children) n_children = len(sorted_children) print("it should be the winner model ", sorted_children[0]) print("it should be the best performance", performance[sorted_children[0]]) # load the best child the_best_child = sorted_children[0] pytorch_model = ConvNet(descriptors[the_best_child]) pytorch_model.cuda() pytorch_model.load_state_dict(torch.load(savepath + 'model_' + str(the_best_child)), strict=False) model = { 'pytorch_model': pytorch_model, 'model_descriptor': copy.deepcopy(descriptors[the_best_child]), 'topo_ordering': pytorch_model.topo_ordering } with open(folder_out + "performance.txt", "a+") as f_out: f_out.write("****************************\n") return model, performance[sorted_children[0]]