コード例 #1
0
class Heartbeat(Thread):
    def __init__(self, event, kill_clients_on_disconnect):
        Thread.__init__(self)
        self.kill_clients_on_disconnect = kill_clients_on_disconnect
        self.stopped = event
        self.success = None
        self.node_client = NodeClient(None)

    def run(self):
        while not self.stopped.wait(HEARTBEAT_FREQUENCY_SEC):
            client_statuses = self.node_client.get_client_statuses()
            dead_clients = [c for c in client_statuses if not c['alive'] or not c['busy']]
            alive_clients = [c for c in client_statuses if c['alive'] and c['busy']]

            if dead_clients and self.kill_clients_on_disconnect:
                printable_names = '.'.join([c['address'] for c in dead_clients])
                _logger.critical('Heartbeat: One or more clients ({}) are not alive anymore; '
                                 'exiting others as well.'.format(printable_names))

                self.node_client.stop_running_experiments(dead_clients)
                self.success = False
                return
            elif all(c['finished'] for c in alive_clients):
                _logger.info('Heartbeat: All clients finished their experiments.')
                self.success = True
                return
コード例 #2
0
    def __init__(self):
        self.cc = ConfigurationContainer.instance()
        self.concurrent_populations = ConcurrentPopulations.instance()

        dataloader = self.cc.create_instance(self.cc.settings['dataloader']['dataset_name'])
        network_factory = self.cc.create_instance(self.cc.settings['network']['name'], dataloader.n_input_neurons)
        self.node_client = NodeClient(network_factory)

        self.grid_size, self.grid_position, self.local_node = self._load_topology_details()
        self.cell_number = self._load_cell_number()
        self.neighbours = self._adjacent_cells()
        self.all_nodes = self.neighbours + [self.local_node]

        self.mixture_weights_generators = self._init_mixture_weights()
        self.mixture_weights_discriminators = self._init_mixture_weights()
コード例 #3
0
    def _terminate(self, stop_clients=True, return_code=-1):
        try:
            if self.heartbeat_thread:
                self._logger.info('Stopping heartbeat...')
                self.heartbeat_thread.stopped.set()
                self.heartbeat_thread.join()

            if stop_clients:
                self._logger.info('Stopping clients...')
                node_client = NodeClient(None)
                node_client.stop_running_experiments()
        finally:
            db_logger = DbLogger()
            if db_logger.is_enabled and self.experiment_id is not None:
                db_logger.finish_experiment(self.experiment_id)

            exit(return_code)
コード例 #4
0
    def __init__(self):
        self.cc = ConfigurationContainer.instance()
        self.concurrent_populations = ConcurrentPopulations.instance()

        dataloader = self.cc.create_instance(
            self.cc.settings['dataloader']['dataset_name'])
        network_factory = self.cc.create_instance(
            self.cc.settings['network']['name'], dataloader.n_input_neurons)
        # TRACE: Node client es la comunicación con las apis de otros clientes
        self.node_client = NodeClient(network_factory)

        self.grid_size, self.grid_position, self.local_node = self._load_topology_details(
        )
        self.cell_number = self._load_cell_number()
        self.neighbours = self._adjacent_cells()
        self.all_nodes = self.neighbours + [self.local_node]

        # TRACE: Se generan pesos iniciales para cada nodo de all_nodes, como 1/cantidad de nodos en all_nodes
        self.mixture_weights_generators = self._init_mixture_weights()
        if self.cc.settings['trainer']['name'] == 'with_disc_mixture_wgan' \
            or self.cc.settings['trainer']['name'] == 'with_disc_mixture_gan':
            self.mixture_weights_discriminators = self._init_mixture_weights()
        else:
            self.mixture_weights_discriminators = None
コード例 #5
0
    def _gather_results(self):
        self._logger.info('Collecting results from clients...')

        # Initialize node client
        dataloader = self.cc.create_instance(
            self.cc.settings['dataloader']['dataset_name'])
        network_factory = self.cc.create_instance(
            self.cc.settings['network']['name'], dataloader.n_input_neurons)
        node_client = NodeClient(network_factory)
        db_logger = DbLogger()

        results = node_client.gather_results(
            self.cc.settings['general']['distribution']['client_nodes'], 120)

        scores = []
        for (node, generator_pop, discriminator_pop, weights_generator,
             weights_discriminator) in results:
            node_name = '{}:{}'.format(node['address'], node['port'])
            try:
                output_dir = self.get_and_create_output_dir(node)

                for generator in generator_pop.individuals:
                    source = generator.source.replace(':', '-')
                    filename = '{}{}.pkl'.format(GENERATOR_PREFIX, source)
                    torch.save(
                        generator.genome.net.state_dict(),
                        os.path.join(output_dir,
                                     'generator-{}.pkl'.format(source)))

                    with open(os.path.join(output_dir, 'mixture.yml'),
                              "a") as file:
                        file.write('{}: {}\n'.format(
                            filename, weights_generator[generator.source]))

                for discriminator in discriminator_pop.individuals:
                    source = discriminator.source.replace(':', '-')
                    filename = '{}{}.pkl'.format(DISCRIMINATOR_PREFIX, source)
                    torch.save(discriminator.genome.net.state_dict(),
                               os.path.join(output_dir, filename))

                # Save images
                dataset = MixedGeneratorDataset(
                    generator_pop, weights_generator,
                    self.cc.settings['master']['score_sample_size'],
                    self.cc.settings['trainer']
                    ['mixture_generator_samples_mode'])
                image_paths = self.save_samples(dataset, output_dir,
                                                dataloader)
                self._logger.info(
                    'Saved mixture result images of client {} to target directory {}.'
                    .format(node_name, output_dir))

                # Calculate inception or FID score
                score = float('-inf')
                if self.cc.settings['master']['calculate_score']:
                    calc = ScoreCalculatorFactory.create()
                    self._logger.info('Score calculator: {}'.format(
                        type(calc).__name__))
                    self._logger.info(
                        'Calculating score score of {}. Depending on the type, this may take very long.'
                        .format(node_name))

                    score = calc.calculate(dataset)
                    self._logger.info(
                        'Node {} with weights {} yielded a score of {}'.format(
                            node_name, weights_generator, score))
                    scores.append((node, score))

                if db_logger.is_enabled and self.experiment_id is not None:
                    db_logger.add_experiment_results(self.experiment_id,
                                                     node_name, image_paths,
                                                     score)
            except Exception as ex:
                self._logger.error(
                    'An error occured while trying to gather results from {}: {}'
                    .format(node_name, ex))
                traceback.print_exc()

        if self.cc.settings['master']['calculate_score'] and scores:
            best_node = sorted(
                scores,
                key=lambda x: x[1],
                reverse=ScoreCalculatorFactory.create().is_reversed)[-1]
            self._logger.info('Best result: {}:{} = {}'.format(
                best_node[0]['address'], best_node[0]['port'], best_node[1]))
コード例 #6
0
class Neighbourhood:
    def __init__(self):
        self.cc = ConfigurationContainer.instance()
        self.concurrent_populations = ConcurrentPopulations.instance()

        dataloader = self.cc.create_instance(
            self.cc.settings['dataloader']['dataset_name'])
        network_factory = self.cc.create_instance(
            self.cc.settings['network']['name'], dataloader.n_input_neurons)
        self.node_client = NodeClient(network_factory)

        self.grid_size, self.grid_position, self.local_node = self._load_topology_details(
        )
        self.cell_number = self._load_cell_number()
        self.neighbours = self._adjacent_cells()
        self.all_nodes = self.neighbours + [self.local_node]

        self.mixture_weights_generators = self._init_mixture_weights()
        if self.cc.settings['trainer']['name'] == 'with_disc_mixture_wgan' \
            or self.cc.settings['trainer']['name'] == 'with_disc_mixture_gan':
            self.mixture_weights_discriminators = self._init_mixture_weights()
        else:
            self.mixture_weights_discriminators = None

    @property
    def local_generators(self):
        # Return local individuals for now, possibility to split up gens and discs later
        return self._set_source(self.concurrent_populations.generator)

    @property
    def local_discriminators(self):
        # Return local individuals for now, possibility to split up gens and discs later
        return self._set_source(self.concurrent_populations.discriminator)

    @property
    def all_generators(self):
        neighbour_individuals = self.node_client.get_all_generators(
            self.neighbours)
        local_population = self.local_generators

        return Population(individuals=neighbour_individuals +
                          local_population.individuals,
                          default_fitness=local_population.default_fitness,
                          population_type=TYPE_GENERATOR)

    @property
    def best_generators(self):
        best_neighbour_individuals = self.node_client.get_best_generators(
            self.neighbours)
        local_population = self.local_generators
        best_local_individual = sorted(local_population.individuals,
                                       key=lambda x: x.fitness)[0]

        return Population(individuals=best_neighbour_individuals +
                          [best_local_individual],
                          default_fitness=local_population.default_fitness,
                          population_type=TYPE_GENERATOR)

    @property
    def all_discriminators(self):
        neighbour_individuals = self.node_client.get_all_discriminators(
            self.neighbours)
        local_population = self.local_discriminators

        return Population(individuals=neighbour_individuals +
                          local_population.individuals,
                          default_fitness=local_population.default_fitness,
                          population_type=TYPE_DISCRIMINATOR)

    @property
    def all_generator_parameters(self):
        neighbour_generators = self.node_client.load_generators_from_api(
            self.neighbours)
        local_parameters = [
            i.genome.encoded_parameters
            for i in self.local_generators.individuals
        ]
        return local_parameters + [
            n['parameters'] for n in neighbour_generators
        ]

    @property
    def all_discriminator_parameters(self):
        neighbour_discriminators = self.node_client.load_discriminators_from_api(
            self.neighbours)
        local_parameters = [
            i.genome.encoded_parameters
            for i in self.local_discriminators.individuals
        ]
        return local_parameters + [
            n['parameters'] for n in neighbour_discriminators
        ]

    @property
    def best_generator_parameters(self):
        return self.node_client.load_best_generators_from_api(
            self.neighbours + [self.local_node])

    @property
    def best_discriminator_parameters(self):
        return self.node_client.load_best_discriminators_from_api(
            self.neighbours + [self.local_node])

    def _load_topology_details(self):
        client_nodes = self._all_nodes_on_grid()

        if len(client_nodes) != 1 and not is_square(len(client_nodes)):
            raise Exception(
                'Provide either one client node, or a square number of cells (to create a square grid).'
            )

        local_port = ClientEnvironment.port
        matching_nodes = [
            node for node in client_nodes if is_local_host(node['address'])
            and int(node['port']) == local_port
        ]

        if len(matching_nodes) == 1:
            dim = int(round(sqrt(len(client_nodes))))
            idx = client_nodes.index(matching_nodes[0])
            x = idx % dim
            y = idx // dim
            return len(client_nodes), (x, y), matching_nodes[0]
        else:
            raise Exception(
                'This host is not specified as client in the configuration file, '
                'or too many clients match the condition.')

    def _load_cell_number(self):
        x, y = self.grid_position
        return y * int(sqrt(self.grid_size)) + x

    def _adjacent_cells(self):
        if self.grid_size == 1:
            return []

        nodes = self._all_nodes_on_grid()
        for node in nodes:
            node['id'] = '{}:{}'.format(node['address'], node['port'])

        dim = int(round(sqrt(len(nodes))))
        x, y = self.grid_position
        nodes = np.reshape(nodes, (-1, dim))

        def neighbours(x, y):
            indices = np.array([(x - 1, y), (x, y - 1), (x + 1, y),
                                (x, y + 1)])
            # Start at 0 when x or y is out of bounds
            indices[indices >= dim] = 0
            indices[indices == -1] = dim - 1
            # Remove duplicates (needed for smaller grids), and convert to (x,y) tuples
            return np.array([tuple(row) for row in np.unique(indices, axis=0)])

        mask = np.zeros((dim, dim))
        mask[tuple(neighbours(x, y).T)] = 1

        return nodes[mask == 1].tolist()

    def _all_nodes_on_grid(self):
        nodes = self.cc.settings['general']['distribution']['client_nodes']
        for node in nodes:
            node['id'] = '{}:{}'.format(node['address'], node['port'])
        return nodes

    def _set_source(self, population):
        for individual in population.individuals:
            individual.source = '{}:{}'.format(self.local_node['address'],
                                               self.local_node['port'])
        return population

    def _init_mixture_weights(self):
        node_ids = [node['id'] for node in self.all_nodes]
        default_weight = 1 / len(node_ids)
        # Warning: Feature of order preservation in Dict is used in the mixture_weight
        #          initialized here because further code involves converting it to list
        # According to https://stackoverflow.com/a/39980548, it's still preferable/safer
        # to use OrderedDict over Dict in Python 3.6
        return OrderedDict({n_id: default_weight for n_id in node_ids})
コード例 #7
0
 def __init__(self, event, kill_clients_on_disconnect):
     Thread.__init__(self)
     self.kill_clients_on_disconnect = kill_clients_on_disconnect
     self.stopped = event
     self.success = None
     self.node_client = NodeClient(None)