Exemple #1
0
 def __init__(self, log_root=None, acc=None,
              pg=None, lg=None,
              job_dur=30,
              num_nodes=5,
              run_proxy=False,
              mon_host=default_aws_mon_host,
              mon_port=default_aws_mon_port,
              logv=1,
              facility=socket.gethostname().split('-')[0],
              zerorun=False,
              max_threads=0,
              sleepncopy=False,
              num_islands=1,
              all_nics=False,
              check_with_session=False):
     self._config = ConfigFactory.create_config(facility=facility)
     self._acc = self._config.getpar('acc') if (acc is None) else acc
     self._log_root = self._config.getpar('log_root') if (log_root is None) else log_root
     self._num_nodes = num_nodes
     self._job_dur = job_dur
     self._lg = lg
     self._pg = pg
     self._graph_vis = False
     self._run_proxy = run_proxy
     self._mon_host = mon_host
     self._mon_port = mon_port
     self._pip_name = utils.fname_to_pipname(lg or pg) if lg or pg else 'None'
     self._logv = logv
     self._zerorun = zerorun
     self._max_threads = max_threads
     self._sleepncopy = sleepncopy
     self._num_islands = num_islands
     self._all_nics = all_nics
     self._check_with_session = check_with_session
Exemple #2
0
def dlg_unroll_and_partition(parser, args):

    _add_logging_options(parser)
    _add_output_options(parser)
    apps = _add_unroll_options(parser)
    _add_partition_options(parser)
    (opts, args) = parser.parse_args(args)
    _setup_logging(opts)
    dump = _setup_output(opts)

    pip_name = utils.fname_to_pipname(opts.lg_path)
    pgt = unroll(opts.lg_path,
                 opts.oid_prefix,
                 zerorun=opts.zerorun,
                 app=apps[opts.app])
    dump(partition(pgt, pip_name, opts.partitions, opts.islands, opts.algo))
Exemple #3
0
def dlg_partition(parser, args):

    _add_logging_options(parser)
    _add_output_options(parser)
    _add_partition_options(parser)
    parser.add_option(
        '-P',
        '--physical-graph-template',
        action='store',
        dest='pgt_path',
        type='string',
        help='Path to the Physical Graph Template (default: stdin)',
        default='-')
    (opts, args) = parser.parse_args(args)
    _setup_logging(opts)
    dump = _setup_output(opts)

    pip_name = utils.fname_to_pipname(opts.pgt_path)
    with _open_i(opts.pgt_path) as fi:
        pgt = json.load(fi)
    dump(partition(pgt, pip_name, opts.partitions, opts.islands, opts.algo))
Exemple #4
0
def main():

    parser = optparse.OptionParser()
    parser.add_option("-l",
                      "--log_dir",
                      action="store",
                      type="string",
                      dest="log_dir",
                      help="Log directory (required)")
    # if this parameter is present, it means we want to get monitored
    parser.add_option("-m",
                      "--monitor_host",
                      action="store",
                      type="string",
                      dest="monitor_host",
                      help="Monitor host IP (optional)")
    parser.add_option("-o",
                      "--monitor_port",
                      action="store",
                      type="int",
                      dest="monitor_port",
                      help="The port to bind dfms monitor",
                      default=dfms_proxy.default_dfms_monitor_port)
    parser.add_option("-v",
                      "--verbose-level",
                      action="store",
                      type="int",
                      dest="verbose_level",
                      help="Verbosity level (1-3) of the DIM/NM logging",
                      default=1)
    parser.add_option(
        "-z",
        "--zerorun",
        action="store_true",
        dest="zerorun",
        help="Generate a physical graph that takes no time to run",
        default=False)
    parser.add_option(
        "--app",
        action="store",
        type="int",
        dest="app",
        help="The app to use in the PG. 1=SleepApp (default), 2=SleepAndCopy",
        default=0)

    parser.add_option(
        "-t",
        "--max-threads",
        action="store",
        type="int",
        dest="max_threads",
        help=
        "Max thread pool size used for executing drops. 0 (default) means no pool.",
        default=0)

    parser.add_option("-L",
                      "--logical-graph",
                      action="store",
                      type="string",
                      dest="logical_graph",
                      help="The filename of the logical graph to deploy",
                      default=None)
    parser.add_option(
        "-P",
        "--physical-graph",
        action="store",
        type="string",
        dest="physical_graph",
        help="The filename of the physical graph (template) to deploy",
        default=None)

    parser.add_option('-s',
                      '--num_islands',
                      action='store',
                      type='int',
                      dest='num_islands',
                      default=1,
                      help='The number of Data Islands')

    parser.add_option('-d',
                      '--dump',
                      action='store_true',
                      dest='dump',
                      help='dump file base name?',
                      default=False)

    parser.add_option("-c",
                      "--loc",
                      action="store",
                      type="string",
                      dest="loc",
                      help="deployment location (e.g. 'Pawsey' or 'Tianhe2')",
                      default="Pawsey")

    parser.add_option("-u",
                      "--all_nics",
                      action="store_true",
                      dest="all_nics",
                      help="Listen on all NICs for a node manager",
                      default=False)

    parser.add_option('--check-interfaces',
                      action='store_true',
                      dest='check_interfaces',
                      help='Run a small network interfaces test and exit',
                      default=False)
    parser.add_option(
        "-S",
        "--check_with_session",
        action="store_true",
        dest="check_with_session",
        help=
        "Check for node managers' availability by creating/destroy a session",
        default=False)

    (options, _) = parser.parse_args()

    if options.check_interfaces:
        print("From netifaces: %s" % utils.get_local_ip_addr())
        print("From ifconfig: %s" % get_ip())
        sys.exit(0)

    if options.logical_graph and options.physical_graph:
        parser.error(
            "Either a logical graph or physical graph filename must be specified"
        )
    for p in (options.logical_graph, options.physical_graph):
        if p and not os.path.exists(p):
            parser.error("Cannot locate graph file at '{0}'".format(p))

    if (options.monitor_host is not None and options.num_islands > 1):
        parser.error("We do not support proxy monitor multiple islands yet")

    logv = max(min(3, options.verbose_level), 1)

    from mpi4py import MPI  # @UnresolvedImport
    comm = MPI.COMM_WORLD  # @UndefinedVariable
    num_procs = comm.Get_size()
    rank = comm.Get_rank()

    log_dir = "{0}/{1}".format(options.log_dir, rank)
    os.makedirs(log_dir)
    logfile = log_dir + "/start_dfms_cluster.log"
    FORMAT = "%(asctime)-15s [%(levelname)5.5s] [%(threadName)15.15s] %(name)s#%(funcName)s:%(lineno)s %(message)s"
    logging.basicConfig(filename=logfile, level=logging.DEBUG, format=FORMAT)

    if (num_procs > 1 and options.monitor_host is not None):
        logger.info("Trying to start dfms_cluster with proxy")
        run_proxy = True
        threshold = 2
    else:
        logger.info("Trying to start dfms_cluster without proxy")
        run_proxy = False
        threshold = 1

    if (num_procs == threshold):
        logger.warning("No MPI processes left for running Drop Managers")
        run_node_mgr = False
    else:
        run_node_mgr = True

    # attach rank information at the end of IP address for multi-islands
    rank_str = '' if options.num_islands == 1 else ',%s' % rank
    public_ip = get_ip(options.loc)
    ip_adds = '{0}{1}'.format(public_ip, rank_str)
    origin_ip = ip_adds.split(',')[0]
    ip_adds = comm.gather(ip_adds, root=0)

    proxy_ip = None
    if run_proxy:
        # send island/master manager's IP address to the dfms proxy
        # also let island manager know dfms proxy's IP
        if rank == 0:
            mgr_ip = origin_ip
            comm.send(mgr_ip, dest=1)
            proxy_ip = comm.recv(source=1)
        elif rank == 1:
            mgr_ip = comm.recv(source=0)
            proxy_ip = origin_ip
            comm.send(proxy_ip, dest=0)

    set_env(rank)
    if (options.num_islands == 1):
        if (rank != 0):
            if (run_proxy and rank == 1):
                # Wait until the Island Manager is open
                if utils.portIsOpen(mgr_ip, ISLAND_DEFAULT_REST_PORT, 100):
                    start_dfms_proxy(options.loc, mgr_ip,
                                     ISLAND_DEFAULT_REST_PORT,
                                     options.monitor_host,
                                     options.monitor_port)
                else:
                    logger.warning(
                        "Couldn't connect to the main drop manager, proxy not started"
                    )
            elif (run_node_mgr):
                logger.info(
                    "Starting node manager on host {0}".format(origin_ip))
                start_node_mgr(log_dir,
                               logv=logv,
                               max_threads=options.max_threads,
                               host=None if options.all_nics else origin_ip)
        else:

            # 'no_nms' are known not to be NMs
            no_nms = [origin_ip, 'None']
            if proxy_ip:
                no_nms += [proxy_ip]
            node_mgrs = [ip for ip in ip_adds if ip not in no_nms]

            # unroll the graph first (if any) while starting node managers on other nodes
            pgt = None
            if options.logical_graph or options.physical_graph:
                pip_name = utils.fname_to_pipname(options.logical_graph
                                                  or options.physical_graph)
                if options.logical_graph:
                    unrolled = tool.unroll(options.logical_graph, '1',
                                           options.zerorun, apps[options.app])
                    pgt = tool.partition(unrolled, pip_name, len(node_mgrs),
                                         options.num_islands, 'metis')
                    del unrolled
                else:
                    pgt = json.loads(options.physical_graph)

            # Check that which NMs are up and use only those form now on
            node_mgrs = check_hosts(
                node_mgrs,
                NODE_DEFAULT_REST_PORT,
                check_with_session=options.check_with_session,
                timeout=MM_WAIT_TIME)

            # We have a PGT, let's map it and submit it
            if pgt:
                pg = tool.resource_map(pgt, [origin_ip] + node_mgrs, pip_name,
                                       options.num_islands)
                del pgt

                def submit_and_monitor():
                    host, port = 'localhost', ISLAND_DEFAULT_REST_PORT
                    tool.submit(host, port, pg)
                    if options.dump:
                        dump_path = '{0}/monitor'.format(log_dir)
                        monitor_graph(host, port, dump_path)

                threading.Thread(target=submit_and_monitor).start()

            # Start the DIM
            logger.info("Starting island manager on host %s", origin_ip)
            start_dim(node_mgrs, log_dir, logv=logv)

    elif (options.num_islands > 1):
        if (rank == 0):
            # master manager
            # 1. use ip_adds to produce the physical graph
            ip_list = []
            ip_rank_dict = dict()  # k - ip, v - MPI rank
            for ipr in ip_adds:
                iprs = ipr.split(',')
                ip = iprs[0]
                r = iprs[1]
                if (ip == origin_ip or 'None' == ip):
                    continue
                ip_list.append(ip)
                ip_rank_dict[ip] = int(r)

            if (len(ip_list) <= options.num_islands):
                raise Exception(
                    "Insufficient nodes available for node managers")

            # 2 broadcast dim ranks to all nodes to let them know who is the DIM
            dim_ranks = []
            dim_ip_list = ip_list[0:options.num_islands]
            logger.info("A list of DIM IPs: {0}".format(dim_ip_list))
            for dim_ip in dim_ip_list:
                dim_ranks.append(ip_rank_dict[dim_ip])
            dim_ranks = comm.bcast(dim_ranks, root=0)

            # 3 unroll the graph while waiting for node managers to start
            pip_name = utils.fname_to_pipname(options.logical_graph
                                              or options.physical_graph)
            if options.logical_graph:
                unrolled = tool.unroll(options.logical_graph, '1',
                                       options.zerorun, apps[options.app])
                pgt = tool.partition(unrolled, pip_name,
                                     len(ip_list) - 1, options.num_islands,
                                     'metis')
                del unrolled
            else:
                pgt = json.loads(options.physical_graph)

            #logger.info("Waiting all node managers to start in %f seconds", MM_WAIT_TIME)
            node_mgrs = check_hosts(
                ip_list[options.num_islands:],
                NODE_DEFAULT_REST_PORT,
                check_with_session=options.check_with_session,
                timeout=MM_WAIT_TIME)

            # 4.  produce the physical graph based on the available node managers
            # that have already been running (we have to assume island manager
            # will run smoothly in the future)
            logger.info("Master Manager producing the physical graph")
            pg = tool.resource_map(pgt, dim_ip_list + node_mgrs, pip_name,
                                   options.num_islands)

            # 5. parse the pg_spec to get the mapping from islands to node list
            dim_rank_nodes_dict = collections.defaultdict(set)
            for drop in pg:
                dim_ip = drop['island']
                # if (not dim_ip in dim_ip_list):
                #     raise Exception("'{0}' node is not in island list {1}".format(dim_ip, dim_ip_list))
                r = ip_rank_dict[dim_ip]
                n = drop['node']
                dim_rank_nodes_dict[r].add(n)

            # 6 send a node list to each DIM so that it can start
            for dim_ip in dim_ip_list:
                r = ip_rank_dict[dim_ip]
                logger.debug("Sending node list to rank {0}".format(r))
                #TODO this should be in a thread since it is blocking!
                comm.send(list(dim_rank_nodes_dict[r]), dest=r)

            # 7. make sure all DIMs are up running
            dim_ips_up = check_hosts(dim_ip_list,
                                     ISLAND_DEFAULT_REST_PORT,
                                     timeout=MM_WAIT_TIME,
                                     retry=10)
            if len(dim_ips_up) < len(dim_ip_list):
                logger.warning("Not all DIMs were up and running: %d/%d",
                               len(dim_ips_up), len(dim_ip_list))

            # 8. submit the graph in a thread (wait for mm to start)
            def submit():
                if not check_host('localhost',
                                  MASTER_DEFAULT_REST_PORT,
                                  timeout=GRAPH_SUBMIT_WAIT_TIME):
                    logger.warning(
                        "Master Manager didn't come up in %d seconds",
                        GRAPH_SUBMIT_WAIT_TIME)
                tool.submit('localhost', MASTER_DEFAULT_REST_PORT, pg)

            threading.Thread(target=submit).start()

            # 9. start dlgMM using islands IP addresses (this will block)
            start_mm(dim_ip_list, log_dir, logv=logv)

        else:
            dim_ranks = None
            dim_ranks = comm.bcast(dim_ranks, root=0)
            logger.debug("Receiving dim_ranks = {0}, my rank is {1}".format(
                dim_ranks, rank))
            if (rank in dim_ranks):
                logger.debug(
                    "Rank {0} is a DIM preparing for receiving".format(rank))
                # island manager
                # get a list of nodes that are its children from rank 0 (MM)
                nm_list = comm.recv(source=0)
                # no need to wait for node managers since the master manager
                # has already made sure they are up running
                logger.debug("nm_list for DIM {0} is {1}".format(
                    rank, nm_list))
                start_dim(nm_list, log_dir, logv=logv)
            else:
                # node manager
                logger.info(
                    "Starting node manager on host {0}".format(origin_ip))
                start_node_mgr(log_dir,
                               logv=logv,
                               max_threads=options.max_threads,
                               host=None if options.all_nics else origin_ip)
Exemple #5
0
def dlg_map(parser, args):

    from dfms.manager import constants

    _add_logging_options(parser)
    _add_output_options(parser)
    parser.add_option('-H',
                      '--host',
                      action='store',
                      dest='host',
                      help='The host we connect to to deploy the graph',
                      default='localhost')
    parser.add_option("-p",
                      "--port",
                      action="store",
                      type="int",
                      dest='port',
                      help='The port we connect to to deploy the graph',
                      default=constants.ISLAND_DEFAULT_REST_PORT)
    parser.add_option(
        '-P',
        '--physical-graph-template',
        action='store',
        dest='pgt_path',
        type='string',
        help='Path to the Physical Graph to submit (default: stdin)',
        default='-')
    parser.add_option(
        "-N",
        "--nodes",
        action="store",
        dest="nodes",
        help=
        "The nodes where the Physical Graph will be distributed, comma-separated",
        default=None)
    parser.add_option("-i",
                      "--islands",
                      action="store",
                      type="int",
                      dest="islands",
                      help="Number of islands to use during the partitioning",
                      default=1)
    (opts, args) = parser.parse_args(args)
    _setup_logging(opts)
    dump = _setup_output(opts)

    from dfms.manager.client import CompositeManagerClient

    if opts.nodes:
        nodes = [n for n in opts.nodes.split(',') if n]
    else:
        client = CompositeManagerClient(opts.host, opts.port, timeout=10)
        nodes = client.nodes()

    n_nodes = len(nodes)
    if n_nodes <= opts.islands:
        raise Exception(
            "#nodes (%d) should be bigger than number of islands (%d)" %
            (n_nodes, opts.islands))

    with _open_i(opts.pgt_path) as f:
        pgt = json.load(f)

    pip_name = utils.fname_to_pipname(opts.pgt_path)
    dump(resource_map(pgt, nodes, pip_name, opts.islands))