def _run_lipizzaner(config): LogHelper.log_only_flask_warnings() cc = ConfigurationContainer.instance() cc.settings = config output_base_dir = cc.output_dir ClientAPI._set_output_dir(cc) if 'logging' in cc.settings['general'] and cc.settings['general']['logging']['enabled']: LogHelper.setup(cc.settings['general']['logging']['log_level'], cc.output_dir) ClientAPI._logger.info('Distributed training recognized, set log directory to {}'.format(cc.output_dir)) try: lipizzaner = Lipizzaner() lipizzaner.run(cc.settings['trainer']['n_iterations'], ClientAPI._stop_event) ClientAPI.is_finished = True # Wait until master finishes experiment, i.e. collects results, or experiment is terminated or_event(ClientAPI._finish_event, ClientAPI._stop_event).wait() except Exception as ex: ClientAPI.is_finished = True ClientAPI._logger.critical('An unhandled error occured while running Lipizzaner: {}'.format(ex)) # Flask 1.0.2 does not print the stack trace of exceptions anymore ClientAPI._logger.critical('An unhandled error occured while running Lipizzaner: {}'.format(traceback.print_stack())) traceback.print_exc() raise ex finally: ClientAPI.is_busy = False ClientAPI._logger.info('Finished experiment, waiting for new requests.') cc.output_dir = output_base_dir ConcurrentPopulations.instance().lock()
def __init__(self, dataloader, network_factory, population_size=10, tournament_size=2, mutation_probability=0.9, n_replacements=1, sigma=0.25, alpha=0.25, default_adam_learning_rate=0.001, calc_mixture=False, mixture_sigma=0.01, score_sample_size=10000, discriminator_skip_each_nth_step=0, enable_selection=True): super().__init__(dataloader, network_factory, population_size, tournament_size, mutation_probability, n_replacements, sigma, alpha) self.batch_number = 0 self._default_adam_learning_rate = self.settings.get( 'default_adam_learning_rate', default_adam_learning_rate) self._discriminator_skip_each_nth_step = self.settings.get( 'discriminator_skip_each_nth_step', discriminator_skip_each_nth_step) self._enable_selection = self.settings.get('enable_selection', enable_selection) self.mixture_sigma = self.settings.get('mixture_sigma', mixture_sigma) self.neighbourhood = Neighbourhood.instance() for i, individual in enumerate(self.population_gen.individuals): individual.learning_rate = self._default_adam_learning_rate individual.id = '{}/G{}'.format(self.neighbourhood.cell_number, i) for i, individual in enumerate(self.population_dis.individuals): individual.learning_rate = self._default_adam_learning_rate individual.id = '{}/D{}'.format(self.neighbourhood.cell_number, i) self.concurrent_populations = ConcurrentPopulations.instance() self.concurrent_populations.generator = self.population_gen self.concurrent_populations.discriminator = self.population_dis self.concurrent_populations.unlock() experiment_id = ConfigurationContainer.instance( ).settings['general']['logging'].get('experiment_id', None) self.db_logger = DbLogger(current_experiment=experiment_id) if 'score' in self.settings and self.settings['score'].get( 'enabled', calc_mixture): self.score_calc = ScoreCalculatorFactory.create() self.score_sample_size = self.settings['score'].get( 'sample_size', score_sample_size) self.score = float( 'inf') if self.score_calc.is_reversed else float('-inf') else: self.score_calc = None self.score = 0
def _task_generators_best(self, data): populations = ConcurrentPopulations.instance() if populations.generator is not None: best_individual = sorted(populations.generator.individuals, key=lambda x: x.fitness)[0] parameters = [individual_to_dict(best_individual)] else: parameters = [] return parameters
def get_best_generator(): populations = ConcurrentPopulations.instance() if populations.generator is not None: best_individual = sorted(populations.generator.individuals, key=lambda x: x.fitness)[0] parameters = [ClientAPI._individual_to_json(best_individual)] else: parameters = [] data = json.dumps(parameters) return Response(response=data, status=200, mimetype="application/json")
def get_discriminators(): populations = ConcurrentPopulations.instance() populations.lock() if populations.discriminator is not None: parameters = [ClientAPI._individual_to_json(i) for i in populations.discriminator.individuals] else: parameters = [] populations.unlock() data = json.dumps(parameters) return Response(response=data, status=200, mimetype="application/json")
def _run_lipizzaner(config): cc = ConfigurationContainer.instance() cc.settings = config grid = Grid.instance() grid.load_grid() output_base_dir = cc.output_dir LipizzanerMpiClient._set_output_dir(cc) if 'logging' in cc.settings['general'] and cc.settings['general'][ 'logging']['enabled']: LogHelper.setup(cc.settings['general']['logging']['log_level'], cc.output_dir) message = 'Distributed training recognized, set log directory to {}'.format( cc.output_dir) LipizzanerMpiClient._logger.info(message) try: lipizzaner = Lipizzaner() lipizzaner.run(cc.settings['trainer']['n_iterations'], LipizzanerMpiClient._stop_event) LipizzanerMpiClient.is_finished = True # Wait until master finishes experiment, i.e. collects results, or experiment is terminated or_event(LipizzanerMpiClient._finish_event, LipizzanerMpiClient._stop_event).wait() except Exception as ex: LipizzanerMpiClient.is_finished = True LipizzanerMpiClient._logger.critical( 'An unhandled error occured while running Lipizzaner: {}'. format(ex)) traceback.print_exc() raise ex finally: LipizzanerMpiClient.is_busy = False LipizzanerMpiClient._logger.info( 'Finished experiment, waiting for new requests.') cc.output_dir = output_base_dir ConcurrentPopulations.instance().lock()
def _task_discriminators(self, data): populations = ConcurrentPopulations.instance() populations.lock() if populations.discriminator is not None: parameters = [ individual_to_dict(i) for i in populations.discriminator.individuals ] else: parameters = [] populations.unlock() return parameters
def __init__(self): self.cc = ConfigurationContainer.instance() self.grid_x = self.cc.settings["general"]["distribution"]["grid"][ "x_size"] self.grid_y = self.cc.settings["general"]["distribution"]["grid"][ "y_size"] self.grid_size = self.grid_x * self.grid_y # Modifications from Neighbourhood __init__ self.concurrent_populations = ConcurrentPopulations.instance() self.node_client = CommsManager.instance() self.grid = np.array([]) self._mixture_weights_generators = None self._mixture_weights_discriminators = None
def __init__(self): self.cc = ConfigurationContainer.instance() self.concurrent_populations = ConcurrentPopulations.instance() dataloader = self.cc.create_instance(self.cc.settings['dataloader']['dataset_name']) network_factory = self.cc.create_instance(self.cc.settings['network']['name'], dataloader.n_input_neurons) self.node_client = NodeClient(network_factory) self.grid_size, self.grid_position, self.local_node = self._load_topology_details() self.cell_number = self._load_cell_number() self.neighbours = self._adjacent_cells() self.all_nodes = self.neighbours + [self.local_node] self.mixture_weights_generators = self._init_mixture_weights() self.mixture_weights_discriminators = self._init_mixture_weights()
def _task_discriminators_best(self, data): populations = ConcurrentPopulations.instance() not_finished = not LipizzanerMpiClient.is_finished if not_finished: populations.lock() if populations.discriminator is not None: best_individual = sorted(populations.discriminator.individuals, key=lambda x: x.fitness)[0] parameters = [individual_to_dict(best_individual)] else: parameters = [] if not_finished: populations.unlock() return parameters
def __init__(self): self.cc = ConfigurationContainer.instance() self.concurrent_populations = ConcurrentPopulations.instance() dataloader = self.cc.create_instance( self.cc.settings['dataloader']['dataset_name']) network_factory = self.cc.create_instance( self.cc.settings['network']['name'], dataloader.n_input_neurons) # TRACE: Node client es la comunicación con las apis de otros clientes self.node_client = NodeClient(network_factory) self.grid_size, self.grid_position, self.local_node = self._load_topology_details( ) self.cell_number = self._load_cell_number() self.neighbours = self._adjacent_cells() self.all_nodes = self.neighbours + [self.local_node] # TRACE: Se generan pesos iniciales para cada nodo de all_nodes, como 1/cantidad de nodos en all_nodes self.mixture_weights_generators = self._init_mixture_weights() if self.cc.settings['trainer']['name'] == 'with_disc_mixture_wgan' \ or self.cc.settings['trainer']['name'] == 'with_disc_mixture_gan': self.mixture_weights_discriminators = self._init_mixture_weights() else: self.mixture_weights_discriminators = None
def listen(self, port): ClientEnvironment.port = port ConcurrentPopulations.instance().lock() self.app.run(threaded=True, port=port, host="0.0.0.0")
def __init__( self, dataloader, network_factory, population_size=10, tournament_size=2, mutation_probability=0.9, n_replacements=1, sigma=0.25, alpha=0.25, default_adam_learning_rate=0.001, calc_mixture=False, mixture_sigma=0.01, score_sample_size=10000, discriminator_skip_each_nth_step=0, enable_selection=True, fitness_sample_size=10000, calculate_net_weights_dist=False, fitness_mode="worst", es_score_sample_size=10000, es_random_init=False, checkpoint_period=0, ): super().__init__( dataloader, network_factory, population_size, tournament_size, mutation_probability, n_replacements, sigma, alpha, ) self.batch_number = 0 self.cc = ConfigurationContainer.instance() self._default_adam_learning_rate = self.settings.get( "default_adam_learning_rate", default_adam_learning_rate) self._discriminator_skip_each_nth_step = self.settings.get( "discriminator_skip_each_nth_step", discriminator_skip_each_nth_step, ) self._enable_selection = self.settings.get("enable_selection", enable_selection) self.mixture_sigma = self.settings.get("mixture_sigma", mixture_sigma) self.neighbourhood = Neighbourhood.instance() for i, individual in enumerate(self.population_gen.individuals): individual.learning_rate = self.settings.get( "default_g_adam_learning_rate", self._default_adam_learning_rate, ) individual.id = "{}/G{}".format(self.neighbourhood.cell_number, i) for i, individual in enumerate(self.population_dis.individuals): individual.learning_rate = self.settings.get( "default_d_adam_learning_rate", self._default_adam_learning_rate, ) individual.id = "{}/D{}".format(self.neighbourhood.cell_number, i) self.concurrent_populations = ConcurrentPopulations.instance() self.concurrent_populations.generator = self.population_gen self.concurrent_populations.discriminator = self.population_dis self.concurrent_populations.unlock() experiment_id = self.cc.settings["general"]["logging"].get( "experiment_id", None) self.db_logger = DbLogger(current_experiment=experiment_id) if "fitness" in self.settings: self.fitness_sample_size = self.settings["fitness"].get( "fitness_sample_size", fitness_sample_size) self.fitness_loaded = self.dataloader.load() self.fitness_iterator = iter( self.fitness_loaded) # Create iterator for fitness loader # Determine how to aggregate fitness calculated among neighbourhood self.fitness_mode = self.settings["fitness"].get( "fitness_mode", fitness_mode) if self.fitness_mode not in ["worse", "best", "average"]: raise NotImplementedError( "Invalid argument for fitness_mode: {}".format( self.fitness_mode)) else: # TODO: Add code for safe implementation & error handling raise KeyError( "Fitness section must be defined in configuration file") n_iterations = self.cc.settings["trainer"].get("n_iterations", 0) if ("score" in self.settings and self.settings["score"].get( "enabled", calc_mixture)) or "optimize_mixture" in self.settings: self.score_calc = ScoreCalculatorFactory.create() self.score_sample_size = self.settings["score"].get( "sample_size", score_sample_size) self.score = float( "inf") if self.score_calc.is_reversed else float("-inf") self.mixture_generator_samples_mode = self.cc.settings["trainer"][ "mixture_generator_samples_mode"] else: self.score_sample_size = score_sample_size self.score_calc = None self.score = 0 if "optimize_mixture" in self.settings: self.optimize_weights_at_the_end = self.settings[ "optimize_mixture"].get("enabled", True) self.score_sample_size = self.settings["optimize_mixture"].get( "sample_size", es_score_sample_size) self.es_generations = self.settings["optimize_mixture"].get( "es_generations", n_iterations) self.es_random_init = self.settings["optimize_mixture"].get( "es_random_init", es_random_init) self.mixture_sigma = self.settings["optimize_mixture"].get( "mixture_sigma", mixture_sigma) self.mixture_generator_samples_mode = self.cc.settings["trainer"][ "mixture_generator_samples_mode"] else: self.optimize_weights_at_the_end = True self.score_sample_size = es_score_sample_size self.es_generations = n_iterations self.es_random_init = es_random_init self.mixture_sigma = mixture_sigma self.mixture_generator_samples_mode = self.cc.settings["trainer"][ "mixture_generator_samples_mode"] assert 0 <= checkpoint_period <= n_iterations, ( "Checkpoint period paramenter (checkpoint_period) should be " "between 0 and the number of iterations (n_iterations).") self.checkpoint_period = self.cc.settings["general"].get( "checkpoint_period", checkpoint_period)
def __init__(self, dataloader, network_factory, population_size=10, tournament_size=2, mutation_probability=0.9, n_replacements=1, sigma=0.25, alpha=0.25, default_adam_learning_rate=0.001, calc_mixture=False, mixture_sigma=0.01, score_sample_size=10000, discriminator_skip_each_nth_step=0, enable_selection=True, fitness_sample_size=10000, calculate_net_weights_dist=False, fitness_mode='worst'): super().__init__(dataloader, network_factory, population_size, tournament_size, mutation_probability, n_replacements, sigma, alpha) self.batch_number = 0 self.cc = ConfigurationContainer.instance() self._default_adam_learning_rate = self.settings.get( 'default_adam_learning_rate', default_adam_learning_rate) self._discriminator_skip_each_nth_step = self.settings.get( 'discriminator_skip_each_nth_step', discriminator_skip_each_nth_step) self._enable_selection = self.settings.get('enable_selection', enable_selection) self.mixture_sigma = self.settings.get('mixture_sigma', mixture_sigma) self.neighbourhood = Grid.instance() for i, individual in enumerate(self.population_gen.individuals): individual.learning_rate = self._default_adam_learning_rate individual.id = '{}/G{}'.format(self.neighbourhood.cell_number, i) for i, individual in enumerate(self.population_dis.individuals): individual.learning_rate = self._default_adam_learning_rate individual.id = '{}/D{}'.format(self.neighbourhood.cell_number, i) # TRACE: Se genera un lock para setear la población? Usa multithread lock # Faltaria revisar si singleton es thread safe self.concurrent_populations = ConcurrentPopulations.instance() self.concurrent_populations.generator = self.population_gen self.concurrent_populations.discriminator = self.population_dis if self.concurrent_populations.locked(): self.concurrent_populations.unlock() experiment_id = self.cc.settings['general']['logging'].get( 'experiment_id', None) self.db_logger = DbLogger(current_experiment=experiment_id) if 'score' in self.settings and self.settings['score'].get( 'enabled', calc_mixture): self.score_calc = ScoreCalculatorFactory.create() self.score_sample_size = self.settings['score'].get( 'sample_size', score_sample_size) self.score = float( 'inf') if self.score_calc.is_reversed else float('-inf') else: self.score_calc = None self.score = 0 if 'fitness' in self.settings: self.fitness_sample_size = self.settings['fitness'].get( 'fitness_sample_size', fitness_sample_size) self.fitness_loaded = self.dataloader.load() self.fitness_iterator = iter( self.fitness_loaded) # Create iterator for fitness loader # Determine how to aggregate fitness calculated among neighbourhood self.fitness_mode = self.settings['fitness'].get( 'fitness_mode', fitness_mode) if self.fitness_mode not in ['worse', 'best', 'average']: raise NotImplementedError( "Invalid argument for fitness_mode: {}".format( self.fitness_mode)) else: # TODO: Add code for safe implementation & error handling raise KeyError( "Fitness section must be defined in configuration file")
def listen(self, port): ClientEnvironment.port = port ConcurrentPopulations.instance().lock() self.app.config['JSONIFY_PRETTYPRINT_REGULAR'] = False self.app.run(threaded=True, port=port, host="0.0.0.0")
def __init__(self, dataloader, network_factory, population_size=10, tournament_size=2, mutation_probability=0.9, n_replacements=1, sigma=0.25, alpha=0.25, default_adam_learning_rate=0.001, calc_mixture=False, mixture_sigma=0.01, score_sample_size=10000, discriminator_skip_each_nth_step=0, enable_selection=True, fitness_sample_size=10000, calculate_net_weights_dist=False, fitness_mode='worst', es_generations=10, es_score_sample_size=10000, es_random_init=False, checkpoint_period=0): super().__init__(dataloader, network_factory, population_size, tournament_size, mutation_probability, n_replacements, sigma, alpha) self.batch_number = 0 self.cc = ConfigurationContainer.instance() self._default_adam_learning_rate = self.settings.get( 'default_adam_learning_rate', default_adam_learning_rate) self._discriminator_skip_each_nth_step = self.settings.get( 'discriminator_skip_each_nth_step', discriminator_skip_each_nth_step) self._enable_selection = self.settings.get('enable_selection', enable_selection) self.mixture_sigma = self.settings.get('mixture_sigma', mixture_sigma) self.neighbourhood = Neighbourhood.instance() for i, individual in enumerate(self.population_gen.individuals): individual.learning_rate = self._default_adam_learning_rate individual.id = '{}/G{}'.format(self.neighbourhood.cell_number, i) for i, individual in enumerate(self.population_dis.individuals): individual.learning_rate = self._default_adam_learning_rate individual.id = '{}/D{}'.format(self.neighbourhood.cell_number, i) self.concurrent_populations = ConcurrentPopulations.instance() self.concurrent_populations.generator = self.population_gen self.concurrent_populations.discriminator = self.population_dis self.concurrent_populations.unlock() experiment_id = self.cc.settings['general']['logging'].get( 'experiment_id', None) self.db_logger = DbLogger(current_experiment=experiment_id) if 'fitness' in self.settings: self.fitness_sample_size = self.settings['fitness'].get( 'fitness_sample_size', fitness_sample_size) self.fitness_loaded = self.dataloader.load() self.fitness_iterator = iter( self.fitness_loaded) # Create iterator for fitness loader # Determine how to aggregate fitness calculated among neighbourhood self.fitness_mode = self.settings['fitness'].get( 'fitness_mode', fitness_mode) if self.fitness_mode not in ['worse', 'best', 'average']: raise NotImplementedError( "Invalid argument for fitness_mode: {}".format( self.fitness_mode)) else: # TODO: Add code for safe implementation & error handling raise KeyError( "Fitness section must be defined in configuration file") if 'score' in self.settings and self.settings['score'].get( 'enabled', calc_mixture): self.score_calc = ScoreCalculatorFactory.create() self.score_sample_size = self.settings['score'].get( 'sample_size', score_sample_size) self.score = float( 'inf') if self.score_calc.is_reversed else float('-inf') self.mixture_generator_samples_mode = self.cc.settings['trainer'][ 'mixture_generator_samples_mode'] elif 'optimize_mixture' in self.settings: self.score_calc = ScoreCalculatorFactory.create() self.score = float( 'inf') if self.score_calc.is_reversed else float('-inf') else: self.score_sample_size = score_sample_size self.score_calc = None self.score = 0 if 'optimize_mixture' in self.settings: self.optimize_weights_at_the_end = True self.score_sample_size = self.settings['optimize_mixture'].get( 'sample_size', es_score_sample_size) self.es_generations = self.settings['optimize_mixture'].get( 'es_generations', es_generations) self.es_random_init = self.settings['optimize_mixture'].get( 'es_random_init', es_random_init) self.mixture_sigma = self.settings['optimize_mixture'].get( 'mixture_sigma', mixture_sigma) self.mixture_generator_samples_mode = self.cc.settings['trainer'][ 'mixture_generator_samples_mode'] else: self.optimize_weights_at_the_end = False n_iterations = self.cc.settings['trainer'].get('n_iterations', 0) assert 0 <= checkpoint_period <= n_iterations, 'Checkpoint period paramenter (checkpoint_period) should be ' \ 'between 0 and the number of iterations (n_iterations).' self.checkpoint_period = self.cc.settings['general'].get( 'checkpoint_period', checkpoint_period)