コード例 #1
0
    def __init__(self):
        self.cc = ConfigurationContainer.instance()
        self.comms = CommsManager.instance()
        self.comms.start_comms()
        # TODO: Falta el size check
        self.topology = TopologyManager.instance()
        self.grid = GridManager.instance()

        self.heartbeat_event = None
        self.heartbeat_thread = None
        self.experiment_id = None

        set_random_seed(self.cc.settings['general']['seed'],
                        self.cc.settings['trainer']['params']['score']['cuda'])
        self._logger.info("Seed used in master: {}".format(
            self.cc.settings['general']['seed']))

        self.heartbeat_event = Event()
        self.heartbeat_thread = Heartbeat(self.heartbeat_event, False)

        signal.signal(signal.SIGINT, self._sigint)

        self._start_experiments()

        self.heartbeat_thread.start()
        self._logger.info("Started heartbeat")
        self.heartbeat_thread.join()

        if self.heartbeat_thread.success:
            self._logger.info("Started stopped with success")
            self._gather_results()
            self._terminate(stop_clients=True, return_code=0)
        else:
            self._logger.info("Started stopped with error")
            self._terminate(stop_clients=False, return_code=-1)
コード例 #2
0
    def __init__(self):
        self.comms = CommsManager.instance()
        self.comms.start_comms()
        self.grid = Grid.instance()
        LipizzanerMpiClient.is_busy = False
        LipizzanerMpiClient.is_finished = False

        self.run()
コード例 #3
0
    def __init__(self):
        self.cc = ConfigurationContainer.instance()
        self.grid_x = self.cc.settings["general"]["distribution"]["grid"][
            "x_size"]
        self.grid_y = self.cc.settings["general"]["distribution"]["grid"][
            "y_size"]
        self.grid_size = self.grid_x * self.grid_y

        # Modifications from Neighbourhood __init__
        self.concurrent_populations = ConcurrentPopulations.instance()
        self.node_client = CommsManager.instance()
        self.grid = np.array([])

        self._mixture_weights_generators = None
        self._mixture_weights_discriminators = None
コード例 #4
0
    def _gather_results(source):
        LipizzanerMpiClient._lock.acquire()
        neighbourhood = Grid.instance()
        cc = ConfigurationContainer.instance()
        results = {
            'generators': neighbourhood.best_generator_parameters,
            'discriminators': neighbourhood.best_discriminator_parameters,
            'weights_generators': neighbourhood.mixture_weights_generators
        }
        if cc.settings['trainer']['name'] == 'with_disc_mixture_wgan' \
            or cc.settings['trainer']['name'] == 'with_disc_mixture_gan':
            results[
                'weights_discriminators'] = neighbourhood.mixture_weights_discriminators
        else:
            results['weights_discriminators'] = 0.0
        LipizzanerMpiClient._finish_event.set()
        LipizzanerMpiClient._lock.release()

        comms = CommsManager.instance()
        comms.isend(results, source)
コード例 #5
0
    def __init__(self):
        comms = CommsManager.instance()

        self.active_pu = set([])
        self.inactive_pu = set([])
        self.offline_pu = set([])

        self.pu_info = {}
        self.node_topology = {}
        for i, node in enumerate(comms.nodes_info):
            # pu_info load
            if i == comms.root:
                self.active_pu.add(i)
                self.pu_info[i] = {"info": node, "role": "root"}
            else:
                self.inactive_pu.add(i)
                self.pu_info[i] = {"info": node, "role": "none"}

            # node_topology
            if node["node"] not in self.node_topology:
                self.node_topology[node["node"]] = set([])
            self.node_topology[node["node"]].add(i)
コード例 #6
0
ファイル: heartbeat_mpi.py プロジェクト: emiperez95/hpcFinal
 def __init__(self, event, kill_clients_on_disconnect):
     Thread.__init__(self)
     self.kill_clients_on_disconnect = kill_clients_on_disconnect
     self.stopped = event
     self.success = None
     self.node_client = CommsManager.instance()
コード例 #7
0
 def get_worker_pu(self):
     return self.active_pu - set([CommsManager.instance().root])