Exemple #1
0
    def init_lmon(self, argv):
        """Initialize LaunchMON communication.

        argv is the set of arguments to initialize with.

        """
        self.lmon = LMON_be()
        try:
            self.lmon.init(len(argv), argv)
            self.lmon.regPackForBeToFe(lmon.pack)
            self.lmon.regUnpackForFeToBe(lmon.unpack)
            self.lmon.handshake(None)
            self.lmon.ready(None)
            self.lmon_rank = self.lmon.getMyRank()
            self.lmon_size = self.lmon.getSize()
            self.lmon_master = self.lmon.amIMaster()
            self.proctab_size = self.lmon.getMyProctabSize()
            self.proctab, unused = self.lmon.getMyProctab(self.proctab_size)
        except lmon.LMONException as e:
            e.print_lmon_error()
            traceback.print_exc()
            return False
        self._init_mpiranks()
        return True
Exemple #2
0
    def init_lmon(self, argv):
        """Initialize LaunchMON communication.

        argv is the set of arguments to initialize with.

        """
        self.lmon = LMON_be()
        try:
            self.lmon.init(len(argv), argv)
            self.lmon.regPackForBeToFe(lmon.pack)
            self.lmon.regUnpackForFeToBe(lmon.unpack)
            self.lmon.handshake(None)
            self.lmon.ready(None)
            self.lmon_rank = self.lmon.getMyRank()
            self.lmon_size = self.lmon.getSize()
            self.lmon_master = self.lmon.amIMaster()
            self.proctab_size = self.lmon.getMyProctabSize()
            self.proctab, unused = self.lmon.getMyProctab(self.proctab_size)
        except lmon.LMONException as e:
            e.print_lmon_error()
            traceback.print_exc()
            return False
        self._init_mpiranks()
        return True
Exemple #3
0
class CommunicatorBE (Communicator):
    """Communicator for the back-end."""

    def __init__(self, locking = False):
        Communicator.__init__(self, locking)

    def init_lmon(self, argv):
        """Initialize LaunchMON communication.

        argv is the set of arguments to initialize with.

        """
        self.lmon = LMON_be()
        try:
            self.lmon.init(len(argv), argv)
            self.lmon.regPackForBeToFe(lmon.pack)
            self.lmon.regUnpackForFeToBe(lmon.unpack)
            self.lmon.handshake(None)
            self.lmon.ready(None)
            self.lmon_rank = self.lmon.getMyRank()
            self.lmon_size = self.lmon.getSize()
            self.lmon_master = self.lmon.amIMaster()
            self.proctab_size = self.lmon.getMyProctabSize()
            self.proctab, unused = self.lmon.getMyProctab(self.proctab_size)
        except lmon.LMONException as e:
            e.print_lmon_error()
            traceback.print_exc()
            return False
        self._init_mpiranks()
        return True

    def _wait_for_hello(self):
        """Wait until we receive a HELLO message on MRnet from the front-end."""
        msg, stream = self.recv(ret_stream = True)
        if msg.msg_type != HELLO_MSG:
            print "First message is not hello!"
            sys.exit(1)
        self.mrnet_frontend_stream = stream

    def _init_mrnet_streams(self):
        """Initialize basic MRNet streams."""
        self.broadcast_communicator = None
        self.mrnet_broadcast_stream = None
        self.mrnet_frontend_stream = None # Filled in by back-ends later.

    def init_mrnet(self):
        """Initialize MRNet."""
        local_node_info = None
        try:
            if self.lmon_master:
                # Receive topology information from front-end.
                node_info = self.lmon.recvUsrData(gdbconf.topology_transmit_size)
                # Scatter topology information to back-end.
                # Presently uses a node info size of 256.
                local_node_info = self.lmon.scatter(node_info, 256)
            else:
                # Receive scattered topology.
                local_node_info = self.lmon.scatter(None, 256)
        except lmon.LMONException as e:
            e.print_lmon_error()
            traceback.print_exc()
            return False
        # Construct MRNet arguments and create network.
        argv = [sys.argv[0], # Program name.
                str(local_node_info.host), # Comm node host.
                str(local_node_info.port), # Comm node port.
                str(local_node_info.mrnrank), # Comm node rank.
                socket.getfqdn(), # My host.
                str(local_node_info.be_rank)] # My rank.
        # Initialize.
        self.mrnet = MRN.Network.CreateNetworkBE(6, argv)
        self._init_shared_mrnet()
        self._wait_for_hello()
        return True

    def shutdown(self):
        """Shut down the communication infrastructure."""
        while not self.mrnet_frontend_stream.is_Closed():
            time.sleep(0.1)
        #del self.mrnet_frontend_stream
        self.mrnet.waitfor_ShutDown()
        del self.mrnet
        self.lmon.finalize()
        self.been_shutdown = True
Exemple #4
0
class CommunicatorBE(Communicator):
    """Communicator for the back-end."""
    def __init__(self, locking=False):
        Communicator.__init__(self, locking)

    def init_lmon(self, argv):
        """Initialize LaunchMON communication.

        argv is the set of arguments to initialize with.

        """
        self.lmon = LMON_be()
        try:
            self.lmon.init(len(argv), argv)
            self.lmon.regPackForBeToFe(lmon.pack)
            self.lmon.regUnpackForFeToBe(lmon.unpack)
            self.lmon.handshake(None)
            self.lmon.ready(None)
            self.lmon_rank = self.lmon.getMyRank()
            self.lmon_size = self.lmon.getSize()
            self.lmon_master = self.lmon.amIMaster()
            self.proctab_size = self.lmon.getMyProctabSize()
            self.proctab, unused = self.lmon.getMyProctab(self.proctab_size)
        except lmon.LMONException as e:
            e.print_lmon_error()
            traceback.print_exc()
            return False
        self._init_mpiranks()
        return True

    def _wait_for_hello(self):
        """Wait until we receive a HELLO message on MRnet from the front-end."""
        msg, stream = self.recv(ret_stream=True)
        if msg.msg_type != HELLO_MSG:
            print "First message is not hello!"
            sys.exit(1)
        self.mrnet_frontend_stream = stream

    def _init_mrnet_streams(self):
        """Initialize basic MRNet streams."""
        self.broadcast_communicator = None
        self.mrnet_broadcast_stream = None
        self.mrnet_frontend_stream = None  # Filled in by back-ends later.

    def init_mrnet(self):
        """Initialize MRNet."""
        local_node_info = None
        try:
            if self.lmon_master:
                # Receive topology information from front-end.
                node_info = self.lmon.recvUsrData(
                    gdbconf.topology_transmit_size)
                # Scatter topology information to back-end.
                # Presently uses a node info size of 256.
                local_node_info = self.lmon.scatter(node_info, 256)
            else:
                # Receive scattered topology.
                local_node_info = self.lmon.scatter(None, 256)
        except lmon.LMONException as e:
            e.print_lmon_error()
            traceback.print_exc()
            return False
        # Construct MRNet arguments and create network.
        argv = [
            sys.argv[0],  # Program name.
            str(local_node_info.host),  # Comm node host.
            str(local_node_info.port),  # Comm node port.
            str(local_node_info.mrnrank),  # Comm node rank.
            socket.getfqdn(),  # My host.
            str(local_node_info.be_rank)
        ]  # My rank.
        # Initialize.
        self.mrnet = MRN.Network.CreateNetworkBE(6, argv)
        self._init_shared_mrnet()
        self._wait_for_hello()
        return True

    def shutdown(self):
        """Shut down the communication infrastructure."""
        while not self.mrnet_frontend_stream.is_Closed():
            time.sleep(0.1)
        #del self.mrnet_frontend_stream
        self.mrnet.waitfor_ShutDown()
        del self.mrnet
        self.lmon.finalize()
        self.been_shutdown = True