Ejemplo n.º 1
0
def wait_for_shutdown(process_list):
    """
    Go through list of processes and make sure they all have terminated
    """
    logger = Logger()
    exit_codes = []
    while process_list:
        remove = []
        for p in process_list:
            returncode = p.poll()
            #logger.debug("Got return code: %s" % returncode)

            if returncode is None:  # still alive
                pass
            elif returncode == 0:  # exited correctly
                exit_codes += [returncode]
                remove.append(p)
                #process_list.remove( p )
                logger.debug(
                    "A process exited with a status of 0. And we have %i left."
                    % (len(process_list) - len(remove)))
            else:  # error code
                exit_codes += [returncode]
                remove.append(p)
                #process_list.remove( p )
                logger.debug(
                    "A process exited with return code %d. And we have %i left."
                    % (returncode, len(process_list) - len(remove)))

        # We remove outside iteration over list just to be safe
        for p in remove:
            process_list.remove(p)

        time.sleep(1)

    # Target list is empty unless the option process_io=localfile is specified, in
    # which case we close the filedescriptors of all the log files made
    for t in io_target_list:
        t.close()

    return exit_codes
Ejemplo n.º 2
0
    def __init__(self):
        Thread.__init__(self)
        """
        Initializes the MPI environment. This will give each process a separate
        rank in the MPI_COMM_WORLD communicator along with the total number of
        processes in the communicator. Both attributes can be read just after
        startup::

            from mpi import MPI

            mpi = MPI()
            rank = mpi.MPI_COMM_WORLD.rank()
            size = mpi.MPI_COMM_WORLD.size()

            print "Proc %d of %d started" % (rank, size)

            mpi.finalize()

        """

        self.name = "MPI" # Thread name

        # Startup time. Used in Wtime() implementation.
        self.startup_timestamp = time.time()

        # Event for handling thread packing.
        self.packing = threading.Event()

        # Data structures for jobs.
        # The locks are for guarding the data structures
        # The events are for signalling change in data structures

        # Pending requests are recieve requests where the data may or may not have arrived
        self.pending_requests = []
        self.pending_requests_lock = threading.Lock()
        self.pending_requests_has_work = threading.Event()

        # Raw data are messages that have arrived but not been unpickled yet
        self.raw_data_queue = []
        self.raw_data_lock = threading.Lock()
        self.raw_data_has_work = threading.Event()

        # Recieved data are messages that have arrived and are unpickled
        # (ie. ready for matching with a posted recv request)
        #There are no events as this is handled through the "pending_request_" event.
        self.received_data = []
        self.received_data_lock = threading.Lock()

        # General event to wake up main mpi thread
        self.has_work_event = threading.Event()

        # Shutdown signals
        self.shutdown_event = threading.Event() # MPI finalize has been called, shutdown in progress

        # Lock and counter for enumerating request ids
        self.current_request_id_lock = threading.Lock()
        self.current_request_id = 0

        # Pending system commands. These will be executed at first chance we have (we
        # need access to the user code). We also have a lock around the list, to ensure
        # proper access.
        self.pending_systems_commands = []
        self.pending_systems_commands_lock = threading.Lock()

        # Unstarted collective requests.
        self.unstarted_collective_requests = []
        self.unstarted_collective_requests_lock = threading.Lock()
        self.unstarted_collective_requests_has_work = threading.Event()

        # When the collective requsts are started they are moved to this queue until
        # they are finished.
        self.pending_collective_requests = []

        self.received_collective_data_lock = threading.Lock()
        self.received_collective_data = []
        self.pending_collective_requests_has_work = threading.Event()

        # The settings module. This will be handle proper by the
        # function ``generate_settings``.
        self.settings = None
        self.config_callbacks = []

        # Append callbacks
        from mpi.settings import standard_callbacks
        self.config_callbacks.extend(standard_callbacks)

        options = self.parse_options()

        # TODO: See if logger initialisations below here shouldn't be refactored into one

        # Decide how to deal with I/O
        if options.process_io == "remotefile":
            # Initialise the logger
            import os
            logger = Logger(os.path.join(options.logdir,"remotelog"), "proc-%d" % options.rank, options.debug, options.verbosity, True)
            filename = constants.DEFAULT_LOGDIR+'mpi.local.rank%s.log' % options.rank


            logger.debug("Opening file for I/O: %s" % filename)
            try:
                output = open(filename, "w")
            except:
                raise MPIException("File for I/O not writeable - check that this path exists and is writeable:\n%s" % constants.DEFAULT_LOGDIR)

            sys.stdout = output
            sys.stderr = output
        elif options.process_io == "none":
            # Initialise the logger
            logger = Logger(options.logdir+"mpi", "proc-%d" % options.rank, options.debug, options.verbosity, True)
            logger.debug("Closing stdout")
            sys.stdout = None
        else:
            # Initialise the logger
            logger = Logger(options.logdir+"mpi", "proc-%d" % options.rank, options.debug, options.verbosity, options.quiet)

        # TODO: Put this info under settings when they start to work properly
        #       Also we should check that the path here is accessible and valid
        # if filepath starts with something else than / it is a relative path and we assume it relative to pupympi dir
        if not options.logdir.startswith('/'):
            _BASE = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
            self.logdir = os.path.join(_BASE,options.logdir)
        else:
            self.logdir = options.logdir

        # Parse and save settings.
        self.generate_settings(options.settings)

        # Attributes for the security component.
        self.disable_utilities = options.disable_utilities
        self.security_component = None

        # First check for required Python version
        self._version_check()

        # Check for yappi support
        self._yappi_enabled = False
        if options.yappi:
            try:
                import yappi
                self._yappi_enabled = True
                self._yappi_sorttype = yappi.SORTTYPE_NCALL

                if options.yappi_sorttype:
                    if options.yappi_sorttype == 'name':
                        self._yappi_sorttype = yappi.SORTTYPE_NAME
                    elif options.yappi_sorttype == 'ncall':
                        self._yappi_sorttype = yappi.SORTTYPE_NCALL
                    elif options.yappi_sorttype == 'ttotal':
                        self._yappi_sorttype = yappi.SORTTYPE_TTOTAL
                    elif options.yappi_sorttype == 'tsub':
                        self._yappi_sorttype = yappi.SORTTYPE_TSUB
                    elif options.yappi_sorttype == 'tavg':
                        self._yappi_sorttype = yappi.SORTTYPE_TAVG
                    else:
                        logger.warn("Unknown yappi sorttype '%s' - defaulting to ncall." % options.yappi_sorttype)

            except ImportError:
                logger.warn("Yappi is not supported on this system. Statistics will not be logged.")
                self._yappi_enabled = False

        # Start built-in profiling facility
        self._profiler_enabled = False
        if options.enable_profiling:
            if self._yappi_enabled:
                logger.warn("Running yappi and pupyprof simultaneously is unpossible. Pupyprof has been disabled.");
            else:
                try:
                    import pupyprof
                    self._profiler_enabled = True
                except ImportError:
                    logger.warn("Pupyprof is not supported on this system. Tracefile will not be generated");
                    self._profiler_enabled = False

        # Set a resume parameter indicating if we are resuming a packed job.
        # This will be changed (maybe) in the netowrk startup.
        self.resume = False

        # Enable a register for the users to put values in. This register can be read
        # with the readregister.py script found in bin/utils/
        self.user_register = {}

        # Place to keep functions needed when packing / unpacking the running MPI
        # instance. The best place to start is migrate.py
        self.migrate_onpack = None

        self.network = Network(self, options)

        # Create the initial global Group, and assign the network all_procs as members
        world_Group = Group(options.rank)
        world_Group.members = self.network.all_procs

        # Create the initial communicator MPI_COMM_WORLD. It is initialized with
        # the rank of the process that holds it and size.
        # The members are filled out after the network is initialized.
        self.communicators = {}

        self.MPI_COMM_WORLD = Communicator(self, options.rank, options.size, self.network, world_Group, comm_root=None)

        # Tell the network about the global MPI_COMM_WORLD, and let it start to
        # listen on the corresponding network channels
        self.network.MPI_COMM_WORLD = self.MPI_COMM_WORLD

        # Change the contents of sys.argv runtime, so the user processes
        # can't see all the mpi specific parameters we start with.
        user_options =[sys.argv[0], ]
        user_options.extend(sys.argv[sys.argv.index("--")+1:])
        sys.argv = user_options

        # Set up the global mpi constants
        constants.MPI_GROUP_EMPTY = Group()

        self.daemon = True

        resumer = None
        if self.resume:
            resumer = self.resume_packed_state()

        self.start()

        # Make every node connect to each other if settings specify it
        if not options.disable_full_network_startup:
            self.network.start_full_network()

        self.initinfo = (self.MPI_COMM_WORLD, self.MPI_COMM_WORLD.rank(), self.MPI_COMM_WORLD.size())

        # Set a static attribute on the class so we know it is initialised.
        self.__class__._initialized = True

        if self._profiler_enabled:
            pupyprof.start()

        if self.resume and resumer:
            resumer(self)
Ejemplo n.º 3
0
def terminate_children():
    for p in process_list:
        logger = Logger()
        logger.debug("Killing %s" % p)
        p.terminate()