def __init__(self, log_root=None, acc=None, pg=None, lg=None, job_dur=30, num_nodes=5, run_proxy=False, mon_host=default_aws_mon_host, mon_port=default_aws_mon_port, logv=1, facility=socket.gethostname().split('-')[0], zerorun=False, max_threads=0, sleepncopy=False, num_islands=1, all_nics=False, check_with_session=False): self._config = ConfigFactory.create_config(facility=facility) self._acc = self._config.getpar('acc') if (acc is None) else acc self._log_root = self._config.getpar('log_root') if (log_root is None) else log_root self._num_nodes = num_nodes self._job_dur = job_dur self._lg = lg self._pg = pg self._graph_vis = False self._run_proxy = run_proxy self._mon_host = mon_host self._mon_port = mon_port self._pip_name = utils.fname_to_pipname(lg or pg) if lg or pg else 'None' self._logv = logv self._zerorun = zerorun self._max_threads = max_threads self._sleepncopy = sleepncopy self._num_islands = num_islands self._all_nics = all_nics self._check_with_session = check_with_session
def dlg_unroll_and_partition(parser, args): _add_logging_options(parser) _add_output_options(parser) apps = _add_unroll_options(parser) _add_partition_options(parser) (opts, args) = parser.parse_args(args) _setup_logging(opts) dump = _setup_output(opts) pip_name = utils.fname_to_pipname(opts.lg_path) pgt = unroll(opts.lg_path, opts.oid_prefix, zerorun=opts.zerorun, app=apps[opts.app]) dump(partition(pgt, pip_name, opts.partitions, opts.islands, opts.algo))
def dlg_partition(parser, args): _add_logging_options(parser) _add_output_options(parser) _add_partition_options(parser) parser.add_option( '-P', '--physical-graph-template', action='store', dest='pgt_path', type='string', help='Path to the Physical Graph Template (default: stdin)', default='-') (opts, args) = parser.parse_args(args) _setup_logging(opts) dump = _setup_output(opts) pip_name = utils.fname_to_pipname(opts.pgt_path) with _open_i(opts.pgt_path) as fi: pgt = json.load(fi) dump(partition(pgt, pip_name, opts.partitions, opts.islands, opts.algo))
def main(): parser = optparse.OptionParser() parser.add_option("-l", "--log_dir", action="store", type="string", dest="log_dir", help="Log directory (required)") # if this parameter is present, it means we want to get monitored parser.add_option("-m", "--monitor_host", action="store", type="string", dest="monitor_host", help="Monitor host IP (optional)") parser.add_option("-o", "--monitor_port", action="store", type="int", dest="monitor_port", help="The port to bind dfms monitor", default=dfms_proxy.default_dfms_monitor_port) parser.add_option("-v", "--verbose-level", action="store", type="int", dest="verbose_level", help="Verbosity level (1-3) of the DIM/NM logging", default=1) parser.add_option( "-z", "--zerorun", action="store_true", dest="zerorun", help="Generate a physical graph that takes no time to run", default=False) parser.add_option( "--app", action="store", type="int", dest="app", help="The app to use in the PG. 1=SleepApp (default), 2=SleepAndCopy", default=0) parser.add_option( "-t", "--max-threads", action="store", type="int", dest="max_threads", help= "Max thread pool size used for executing drops. 0 (default) means no pool.", default=0) parser.add_option("-L", "--logical-graph", action="store", type="string", dest="logical_graph", help="The filename of the logical graph to deploy", default=None) parser.add_option( "-P", "--physical-graph", action="store", type="string", dest="physical_graph", help="The filename of the physical graph (template) to deploy", default=None) parser.add_option('-s', '--num_islands', action='store', type='int', dest='num_islands', default=1, help='The number of Data Islands') parser.add_option('-d', '--dump', action='store_true', dest='dump', help='dump file base name?', default=False) parser.add_option("-c", "--loc", action="store", type="string", dest="loc", help="deployment location (e.g. 'Pawsey' or 'Tianhe2')", default="Pawsey") parser.add_option("-u", "--all_nics", action="store_true", dest="all_nics", help="Listen on all NICs for a node manager", default=False) parser.add_option('--check-interfaces', action='store_true', dest='check_interfaces', help='Run a small network interfaces test and exit', default=False) parser.add_option( "-S", "--check_with_session", action="store_true", dest="check_with_session", help= "Check for node managers' availability by creating/destroy a session", default=False) (options, _) = parser.parse_args() if options.check_interfaces: print("From netifaces: %s" % utils.get_local_ip_addr()) print("From ifconfig: %s" % get_ip()) sys.exit(0) if options.logical_graph and options.physical_graph: parser.error( "Either a logical graph or physical graph filename must be specified" ) for p in (options.logical_graph, options.physical_graph): if p and not os.path.exists(p): parser.error("Cannot locate graph file at '{0}'".format(p)) if (options.monitor_host is not None and options.num_islands > 1): parser.error("We do not support proxy monitor multiple islands yet") logv = max(min(3, options.verbose_level), 1) from mpi4py import MPI # @UnresolvedImport comm = MPI.COMM_WORLD # @UndefinedVariable num_procs = comm.Get_size() rank = comm.Get_rank() log_dir = "{0}/{1}".format(options.log_dir, rank) os.makedirs(log_dir) logfile = log_dir + "/start_dfms_cluster.log" FORMAT = "%(asctime)-15s [%(levelname)5.5s] [%(threadName)15.15s] %(name)s#%(funcName)s:%(lineno)s %(message)s" logging.basicConfig(filename=logfile, level=logging.DEBUG, format=FORMAT) if (num_procs > 1 and options.monitor_host is not None): logger.info("Trying to start dfms_cluster with proxy") run_proxy = True threshold = 2 else: logger.info("Trying to start dfms_cluster without proxy") run_proxy = False threshold = 1 if (num_procs == threshold): logger.warning("No MPI processes left for running Drop Managers") run_node_mgr = False else: run_node_mgr = True # attach rank information at the end of IP address for multi-islands rank_str = '' if options.num_islands == 1 else ',%s' % rank public_ip = get_ip(options.loc) ip_adds = '{0}{1}'.format(public_ip, rank_str) origin_ip = ip_adds.split(',')[0] ip_adds = comm.gather(ip_adds, root=0) proxy_ip = None if run_proxy: # send island/master manager's IP address to the dfms proxy # also let island manager know dfms proxy's IP if rank == 0: mgr_ip = origin_ip comm.send(mgr_ip, dest=1) proxy_ip = comm.recv(source=1) elif rank == 1: mgr_ip = comm.recv(source=0) proxy_ip = origin_ip comm.send(proxy_ip, dest=0) set_env(rank) if (options.num_islands == 1): if (rank != 0): if (run_proxy and rank == 1): # Wait until the Island Manager is open if utils.portIsOpen(mgr_ip, ISLAND_DEFAULT_REST_PORT, 100): start_dfms_proxy(options.loc, mgr_ip, ISLAND_DEFAULT_REST_PORT, options.monitor_host, options.monitor_port) else: logger.warning( "Couldn't connect to the main drop manager, proxy not started" ) elif (run_node_mgr): logger.info( "Starting node manager on host {0}".format(origin_ip)) start_node_mgr(log_dir, logv=logv, max_threads=options.max_threads, host=None if options.all_nics else origin_ip) else: # 'no_nms' are known not to be NMs no_nms = [origin_ip, 'None'] if proxy_ip: no_nms += [proxy_ip] node_mgrs = [ip for ip in ip_adds if ip not in no_nms] # unroll the graph first (if any) while starting node managers on other nodes pgt = None if options.logical_graph or options.physical_graph: pip_name = utils.fname_to_pipname(options.logical_graph or options.physical_graph) if options.logical_graph: unrolled = tool.unroll(options.logical_graph, '1', options.zerorun, apps[options.app]) pgt = tool.partition(unrolled, pip_name, len(node_mgrs), options.num_islands, 'metis') del unrolled else: pgt = json.loads(options.physical_graph) # Check that which NMs are up and use only those form now on node_mgrs = check_hosts( node_mgrs, NODE_DEFAULT_REST_PORT, check_with_session=options.check_with_session, timeout=MM_WAIT_TIME) # We have a PGT, let's map it and submit it if pgt: pg = tool.resource_map(pgt, [origin_ip] + node_mgrs, pip_name, options.num_islands) del pgt def submit_and_monitor(): host, port = 'localhost', ISLAND_DEFAULT_REST_PORT tool.submit(host, port, pg) if options.dump: dump_path = '{0}/monitor'.format(log_dir) monitor_graph(host, port, dump_path) threading.Thread(target=submit_and_monitor).start() # Start the DIM logger.info("Starting island manager on host %s", origin_ip) start_dim(node_mgrs, log_dir, logv=logv) elif (options.num_islands > 1): if (rank == 0): # master manager # 1. use ip_adds to produce the physical graph ip_list = [] ip_rank_dict = dict() # k - ip, v - MPI rank for ipr in ip_adds: iprs = ipr.split(',') ip = iprs[0] r = iprs[1] if (ip == origin_ip or 'None' == ip): continue ip_list.append(ip) ip_rank_dict[ip] = int(r) if (len(ip_list) <= options.num_islands): raise Exception( "Insufficient nodes available for node managers") # 2 broadcast dim ranks to all nodes to let them know who is the DIM dim_ranks = [] dim_ip_list = ip_list[0:options.num_islands] logger.info("A list of DIM IPs: {0}".format(dim_ip_list)) for dim_ip in dim_ip_list: dim_ranks.append(ip_rank_dict[dim_ip]) dim_ranks = comm.bcast(dim_ranks, root=0) # 3 unroll the graph while waiting for node managers to start pip_name = utils.fname_to_pipname(options.logical_graph or options.physical_graph) if options.logical_graph: unrolled = tool.unroll(options.logical_graph, '1', options.zerorun, apps[options.app]) pgt = tool.partition(unrolled, pip_name, len(ip_list) - 1, options.num_islands, 'metis') del unrolled else: pgt = json.loads(options.physical_graph) #logger.info("Waiting all node managers to start in %f seconds", MM_WAIT_TIME) node_mgrs = check_hosts( ip_list[options.num_islands:], NODE_DEFAULT_REST_PORT, check_with_session=options.check_with_session, timeout=MM_WAIT_TIME) # 4. produce the physical graph based on the available node managers # that have already been running (we have to assume island manager # will run smoothly in the future) logger.info("Master Manager producing the physical graph") pg = tool.resource_map(pgt, dim_ip_list + node_mgrs, pip_name, options.num_islands) # 5. parse the pg_spec to get the mapping from islands to node list dim_rank_nodes_dict = collections.defaultdict(set) for drop in pg: dim_ip = drop['island'] # if (not dim_ip in dim_ip_list): # raise Exception("'{0}' node is not in island list {1}".format(dim_ip, dim_ip_list)) r = ip_rank_dict[dim_ip] n = drop['node'] dim_rank_nodes_dict[r].add(n) # 6 send a node list to each DIM so that it can start for dim_ip in dim_ip_list: r = ip_rank_dict[dim_ip] logger.debug("Sending node list to rank {0}".format(r)) #TODO this should be in a thread since it is blocking! comm.send(list(dim_rank_nodes_dict[r]), dest=r) # 7. make sure all DIMs are up running dim_ips_up = check_hosts(dim_ip_list, ISLAND_DEFAULT_REST_PORT, timeout=MM_WAIT_TIME, retry=10) if len(dim_ips_up) < len(dim_ip_list): logger.warning("Not all DIMs were up and running: %d/%d", len(dim_ips_up), len(dim_ip_list)) # 8. submit the graph in a thread (wait for mm to start) def submit(): if not check_host('localhost', MASTER_DEFAULT_REST_PORT, timeout=GRAPH_SUBMIT_WAIT_TIME): logger.warning( "Master Manager didn't come up in %d seconds", GRAPH_SUBMIT_WAIT_TIME) tool.submit('localhost', MASTER_DEFAULT_REST_PORT, pg) threading.Thread(target=submit).start() # 9. start dlgMM using islands IP addresses (this will block) start_mm(dim_ip_list, log_dir, logv=logv) else: dim_ranks = None dim_ranks = comm.bcast(dim_ranks, root=0) logger.debug("Receiving dim_ranks = {0}, my rank is {1}".format( dim_ranks, rank)) if (rank in dim_ranks): logger.debug( "Rank {0} is a DIM preparing for receiving".format(rank)) # island manager # get a list of nodes that are its children from rank 0 (MM) nm_list = comm.recv(source=0) # no need to wait for node managers since the master manager # has already made sure they are up running logger.debug("nm_list for DIM {0} is {1}".format( rank, nm_list)) start_dim(nm_list, log_dir, logv=logv) else: # node manager logger.info( "Starting node manager on host {0}".format(origin_ip)) start_node_mgr(log_dir, logv=logv, max_threads=options.max_threads, host=None if options.all_nics else origin_ip)
def dlg_map(parser, args): from dfms.manager import constants _add_logging_options(parser) _add_output_options(parser) parser.add_option('-H', '--host', action='store', dest='host', help='The host we connect to to deploy the graph', default='localhost') parser.add_option("-p", "--port", action="store", type="int", dest='port', help='The port we connect to to deploy the graph', default=constants.ISLAND_DEFAULT_REST_PORT) parser.add_option( '-P', '--physical-graph-template', action='store', dest='pgt_path', type='string', help='Path to the Physical Graph to submit (default: stdin)', default='-') parser.add_option( "-N", "--nodes", action="store", dest="nodes", help= "The nodes where the Physical Graph will be distributed, comma-separated", default=None) parser.add_option("-i", "--islands", action="store", type="int", dest="islands", help="Number of islands to use during the partitioning", default=1) (opts, args) = parser.parse_args(args) _setup_logging(opts) dump = _setup_output(opts) from dfms.manager.client import CompositeManagerClient if opts.nodes: nodes = [n for n in opts.nodes.split(',') if n] else: client = CompositeManagerClient(opts.host, opts.port, timeout=10) nodes = client.nodes() n_nodes = len(nodes) if n_nodes <= opts.islands: raise Exception( "#nodes (%d) should be bigger than number of islands (%d)" % (n_nodes, opts.islands)) with _open_i(opts.pgt_path) as f: pgt = json.load(f) pip_name = utils.fname_to_pipname(opts.pgt_path) dump(resource_map(pgt, nodes, pip_name, opts.islands))