def get_model_of_target_size(blueprint: BlueprintGenome, sample_map: Dict[int, int], original_model_size, input_size: List[int], target_size=-1) -> Network: if target_size == -1: target_size = config.target_network_size feature_mulitplication_first_guess = pow(target_size / original_model_size, 0.5) feature_mult_best_approximation = refine_feature_multiplication_guess( feature_mulitplication_first_guess, blueprint, sample_map, input_size, target_size) model: Network = Network( blueprint, input_size, feature_multiplier=feature_mult_best_approximation, sample_map=sample_map, allow_module_map_ignores=False).to(config.get_device()) model_size = sum(p.numel() for p in model.parameters() if p.requires_grad) print("targeting size using feature mult:", feature_mult_best_approximation, "original size:", original_model_size, "normalised size:", model_size, "target:", target_size, "change ratio:", (model_size / original_model_size), "target ratio:", (model_size / target_size)) return model
def evaluate(model: Network, n_epochs, training_target=-1, attempt=0, reporter=ReporterSet()) -> Union[float, str]: """trains model on training data, test on testing and returns test acc""" if config.dummy_run: if config.dummy_time > 0: time.sleep(config.dummy_time) return random.random() aug = None if not config.evolve_da else model.blueprint.get_da( ).to_phenotype() train_loader = load_data(load_transform(aug), 'train') test_loader = load_data(load_transform(), 'test') device = config.get_device() training_results = TrainingResults() for epoch in range(n_epochs): reporter.on_start_epoch(model, epoch) loss = train_epoch(model, train_loader, aug, device, reporter) training_results.add_loss(loss) acc = -1 needs_intermediate_acc = config.ft_auto_stop_count != -1 or config.ft_retries or config.ft_allow_lr_drops test_intermediate_accuracy = config.fully_train and epoch % config.fully_train_accuracy_test_period == 0 \ and needs_intermediate_acc if test_intermediate_accuracy: acc = test_nn(model, test_loader) training_results.add_accuracy(acc, epoch) # if config.fully_train: # _fully_train_logging(model, loss, epoch, attempt, acc) reporter.on_end_epoch(model, epoch, loss, acc) TRAINING_INSTRUCTION = fetch_training_instruction( training_results, training_target) if TRAINING_INSTRUCTION == CONTINUE: continue if TRAINING_INSTRUCTION == RETRY: return RETRY if TRAINING_INSTRUCTION == STOP: if len(training_results.accuracies) > 0: return training_results.get_max_acc() else: break # exits for, runs final acc test, returns if TRAINING_INSTRUCTION == DROP_LR: model.drop_lr() print("doing final acc test") final_test_acc = test_nn(model, test_loader) return max(final_test_acc, training_results.get_max_acc())
def test_nn(model: Network, test_loader: DataLoader): model.eval() count = 0 total_acc = 0 with torch.no_grad(): for batch_idx, (inputs, targets) in enumerate(test_loader): inputs, targets = inputs.to(config.get_device()), targets.to(config.get_device()) output = model(inputs) softmax = torch.exp(output).cpu() prob = list(softmax.numpy()) predictions = np.argmax(prob, axis=1) acc = accuracy_score(targets.cpu(), predictions) total_acc += acc count = batch_idx return total_acc / count
def evaluate_blueprint(blueprint: BlueprintGenome, input_size: List[int], num_epochs, feature_multiplier: float = 1) -> BlueprintGenome: """ Parses the blueprint into its phenotype NN Handles the assignment of the single/multi obj finesses to the blueprint in parallel """ model: Network = Network(blueprint, input_size, feature_multiplier=feature_multiplier).to( config.get_device()) model_size = sum(p.numel() for p in model.parameters() if p.requires_grad) sample_map = model.sample_map if config.target_network_size != -1: model = get_model_of_target_size(blueprint, sample_map, model_size, input_size) model_size = sum(p.numel() for p in model.parameters() if p.requires_grad) if model_size > config.max_model_params: print( f"dropped model which was too large with {model_size} params. Max is: {config.max_model_params}" ) accuracy = 0 else: accuracy = evaluate(model, num_epochs) if accuracy == "retry": raise Exception("no retries in evolution") blueprint.update_best_sample_map(sample_map, accuracy) fitness = [accuracy, model_size] blueprint.report_fitness(fitness) parse_number = blueprint.n_evaluations print( f'Blueprint - {blueprint.id:^5} - accuracy: {accuracy * 100:05.2f}% (proc {mp.current_process().name})' ) if config.plot_every_genotype: blueprint.visualize(parse_number=parse_number, prefix="g" + str(singleton.instance.generation_number) + "_" + str(blueprint.id)) if config.plot_every_phenotype: model.visualize(parse_number=parse_number, prefix="g" + str(singleton.instance.generation_number) + "_" + str(blueprint.id)) return blueprint
def evaluate(model: Network, n_epochs, training_target=-1, attempt=0) -> Union[float, str]: """trains model on training data, test on testing and returns test acc""" if config.dummy_run: if config.dummy_time > 0: time.sleep(config.dummy_time) return random.random() aug = None if not config.evolve_da else model.blueprint.get_da().to_phenotype() train_loader = load_data(load_transform(aug), 'train') test_loader = load_data(load_transform(), 'test') if config.fully_train else None device = config.get_device() start = model.ft_epoch if config.fully_train: n_epochs = config.fully_train_max_epochs # max number of epochs for a fully train max_acc = 0 max_acc_age = 0 for epoch in range(start, n_epochs): loss = train_epoch(model, train_loader, aug, device) acc = -1 test_intermediate_accuracy = config.fully_train and epoch % config.fully_train_accuracy_test_period == 0 if test_intermediate_accuracy: acc = test_nn(model, test_loader) if acc > max_acc: max_acc = acc max_acc_age = 0 if should_retry_training(max_acc, training_target, epoch): # the training is not making target, start again # this means that the network is not doing as well as its duplicate in evolution return RETRY if max_acc_age >= 2: # wait 2 accuracy checks, if the max acc has not increased - this network has finished training print("training has plateaued stopping") return max_acc max_acc_age += 1 if config.fully_train: _fully_train_logging(model, loss, epoch, attempt, acc) test_loader = load_data(load_transform(), 'test') if test_loader is None else test_loader final_test_acc = test_nn(model, test_loader) return max(final_test_acc, max_acc)
def _load_model(dummy_bp: BlueprintGenome, run: Run, gen_num: int, in_size) -> Network: if not config.resume_fully_train: raise Exception( 'Calling resume training, but config.resume_fully_train is false') S.instance = run.generations[gen_num] model: Network = Network(dummy_bp, in_size, sample_map=dummy_bp.best_module_sample_map, allow_module_map_ignores=False).to( config.get_device()) model.load() return model
def create_layer(self, in_shape: list, feature_multiplier=None): # print('creating agg layer') self.n_inputs_received += 1 self.inputs.append(torch.zeros(in_shape).to(config.get_device())) if self.n_inputs_received > self.n_inputs_expected: raise Exception('Received more inputs than expected') elif self.n_inputs_received < self.n_inputs_expected: return # Calculate the output shape of the layer by passing input through it self.out_shape = list(merge(self.inputs, self).size()) self.reset() return self.out_shape
def _create_model(run: Run, blueprint: BlueprintGenome, gen_num, in_size, target_feature_multiplier) -> Network: S.instance = run.generations[gen_num] modules = run.get_modules_for_blueprint(blueprint) model: Network = Network( blueprint, in_size, sample_map=blueprint.best_module_sample_map, allow_module_map_ignores=False, feature_multiplier=1, target_feature_multiplier=target_feature_multiplier).to( config.get_device()) model_size = sum(p.numel() for p in model.parameters() if p.requires_grad) sample_map = model.sample_map if target_feature_multiplier != 1: model = get_model_of_target_size(blueprint, sample_map, model_size, in_size, target_size=model_size * target_feature_multiplier) model.target_feature_multiplier = target_feature_multiplier print( "Blueprint: {}\nModules: {}\nSample map: {}\n Species used: {}".format( blueprint, modules, blueprint.best_module_sample_map, list( set([ blueprint.nodes[node_id].species_id for node_id in blueprint.get_fully_connected_node_ids() if type(blueprint.nodes[node_id]) == BlueprintNode ])))) print( "Training model which scored: {} in evolution , with {} parameters with feature mult: {}\n" .format(blueprint.max_acc, model.size(), target_feature_multiplier)) return model
def homogenise_channel(conv_inputs: List[tensor], agg_layer: AggregationLayer) -> List[tensor]: """This will only be used when merging using a lossy strategy""" if not agg_layer.channel_resizers: # If 1x1 convs not yet created then create them # print('No 1x1 convs found for channel resizing, creating them') target_size = round( sum([list(conv_input.size())[1] for conv_input in conv_inputs]) / len(conv_inputs)) for conv_input in conv_inputs: # creating 1x1 convs channel = list(conv_input.size())[1] agg_layer.channel_resizers.append( nn.Conv2d(channel, target_size, 1)) agg_layer.channel_resizers.to(config.get_device()) for i in range(len(conv_inputs)): # passing inputs through 1x1 convs # print('using 1x1 conv for passing input through an agg node with ', len(agg_layer.inputs), 'inputs') conv_inputs[i] = agg_layer.channel_resizers[i](conv_inputs[i]) # print('done passing through 1x1s') return conv_inputs
def refine_feature_multiplication_guess(feature_mulitplication_guess, blueprint: BlueprintGenome, sample_map: Dict[int, int], input_size: List[int], target_size, remaining_tries=5, best_guess=-1, best_target_ratio=-1): """method to iteratively refine the FM guess to get closest to target size""" if remaining_tries == 0: return best_guess model: Network = Network(blueprint, input_size, feature_multiplier=feature_mulitplication_guess, sample_map=sample_map, allow_module_map_ignores=False).to( config.get_device()) model_size = sum(p.numel() for p in model.parameters() if p.requires_grad) target_ratio = model_size / target_size # ideal target ratio is 1, >1 means decrease FM, <1 means increase FM if best_guess == -1 or abs(1 - target_ratio) < abs(1 - best_target_ratio): # no best or new guess is better than best best_guess = feature_mulitplication_guess best_target_ratio = target_ratio # print("fm guess:",feature_mulitplication_guess,"target ratio:",target_ratio, "best guess:",best_guess,"best rat:",best_target_ratio) adjustment_factor = 0.2 + ( remaining_tries / 20 ) # how big of a jump to make - decrease each guess next_guess = best_guess / pow(best_target_ratio, adjustment_factor) return refine_feature_multiplication_guess( next_guess, blueprint, sample_map, input_size, target_size, remaining_tries=remaining_tries - 1, best_guess=best_guess, best_target_ratio=best_target_ratio)