Exemple #1
0
def mpdsigjob():
    mpd_set_my_id('mpdsigjob_')
    if len(argv) < 3 or argv[1] == '-h' or argv[1] == '--help':
        usage()
    username = mpd_get_my_username()
    if environ.has_key('UNIX_SOCKET'):
        conFD = int(environ['UNIX_SOCKET'])
        conSocket = fromfd(conFD, AF_UNIX, SOCK_STREAM)
        close(conFD)
    else:
        if environ.has_key('MPD_CON_EXT'):
            conExt = '_' + environ['MPD_CON_EXT']
        else:
            conExt = ''
        consoleName = '/tmp/mpd2.console_' + username + conExt
        conSocket = socket(AF_UNIX, SOCK_STREAM)  # note: UNIX socket
        try:
            conSocket.connect(consoleName)
        except Exception, errmsg:
            print 'mpdsigjob: cannot connect to local mpd (%s); possible causes:' % consoleName
            print '    1. no mpd running on this host'
            print '    2. mpd is running but was started without a "console" (-n option)'
            print 'you can start an mpd with the "mpd" command; to get help, run:'
            print '    mpd -h'
            exit(-1)
            # mpd_raise('cannot connect to local mpd; errmsg: %s' % (str(errmsg)) )
        msgToSend = 'realusername=%s\n' % username
        mpd_send_one_line(conSocket, msgToSend)
Exemple #2
0
def mpdkilljob():
    import sys    # to get access to excepthook in next line
    sys.excepthook = mpd_uncaught_except_tb
    if len(sys.argv) < 2  or  sys.argv[1] == '-h'  or  sys.argv[1] == '--help':
        usage()
    signal.signal(signal.SIGINT, sig_handler)
    mpd_set_my_id(myid='mpdkilljob')
    mpdid = ''
    if sys.argv[1] == '-a':
        jobalias = sys.argv[2]
        jobnum = '0'
    else:
        jobalias = ''
        jobid = sys.argv[1]
        sjobid = jobid.split('@')
        jobnum = sjobid[0]
        if len(sjobid) > 1:
            mpdid = sjobid[1]

    parmdb = MPDParmDB(orderedSources=['cmdline','xml','env','rcfile','thispgm'])
    parmsToOverride = {
                        'MPD_USE_ROOT_MPD'            :  0,
                        'MPD_SECRETWORD'              :  '',
                      }
    for (k,v) in parmsToOverride.items():
        parmdb[('thispgm',k)] = v
    parmdb.get_parms_from_env(parmsToOverride)
    parmdb.get_parms_from_rcfile(parmsToOverride)
    if (hasattr(os,'getuid')  and  os.getuid() == 0)  or  parmdb['MPD_USE_ROOT_MPD']:
        fullDirName = os.path.abspath(os.path.split(sys.argv[0])[0])  # normalize
        mpdroot = os.path.join(fullDirName,'mpdroot')
        conSock = MPDConClientSock(mpdroot=mpdroot,secretword=parmdb['MPD_SECRETWORD'])
    else:
        conSock = MPDConClientSock(secretword=parmdb['MPD_SECRETWORD'])

    msgToSend = { 'cmd':'mpdkilljob', 'jobnum' : jobnum, 'mpdid' : mpdid,
                  'jobalias' : jobalias, 'username' : mpd_get_my_username() }
    conSock.send_dict_msg(msgToSend)
    msg = conSock.recv_dict_msg(timeout=5.0)
    if not msg:
        mpd_print(1,'no msg recvd from mpd before timeout')
        sys.exit(-1)
    if msg['cmd'] != 'mpdkilljob_ack':
        if msg['cmd'] == 'already_have_a_console':
            print 'mpd already has a console (e.g. for long ringtest); try later'
        else:
            print 'unexpected message from mpd: %s' % (msg)
        sys.exit(-1)
    if not msg['handled']:
        print 'job not found'
        sys.exit(-1)
    conSock.close()
Exemple #3
0
def mpdcleanup():
    rshCmd = 'ssh'
    user = mpd_get_my_username()
    cleanCmd = 'rm -f '
    hostsFile = ''
    try:
        (opts, args) = getopt(argv[1:], 'hf:r:u:c:',
                              ['help', 'file=', 'rsh=', 'user='******'clean='])
    except:
        usage()
        mpd_raise('invalid arg(s) specified')
    else:
        for opt in opts:
            if opt[0] == '-r' or opt[0] == '--rsh':
                rshCmd = opt[1]
            elif opt[0] == '-u' or opt[0] == '--user':
                user = opt[1]
            elif opt[0] == '-f' or opt[0] == '--file':
                hostsFile = opt[1]
            elif opt[0] == '-h' or opt[0] == '--help':
                usage()
            elif opt[0] == '-c' or opt[0] == '--clean':
                cleanCmd = opt[1]
    if args:
        usage()
        mpd_raise('invalid arg(s) specified: ' + ' '.join(args))

    if environ.has_key('MPD_CON_EXT'):
        conExt = '_' + environ['MPD_CON_EXT']
    else:
        conExt = ''
    cleanFile = '/tmp/mpd2.console_' + user + conExt
    system('%s %s' % (cleanCmd, cleanFile))
    if rshCmd == 'ssh':
        xOpt = '-x'
    else:
        xOpt = ''

    if hostsFile:
        try:
            f = open(hostsFile, 'r')
        except:
            print 'Not cleaning up on remote hosts; file %s not found' % hostsFile
            exit(0)
        hosts = f.readlines()
        for host in hosts:
            host = host.strip()
            if host[0] != '#':
                cmd = '%s %s -n %s %s %s &' % (rshCmd, xOpt, host, cleanCmd,
                                               cleanFile)
                # print 'cmd=:%s:' % (cmd)
                system(cmd)
def mpdkilljob():
    import sys    # to get access to excepthook in next line
    sys.excepthook = mpd_uncaught_except_tb
    if len(sys.argv) < 2  or  sys.argv[1] == '-h'  or  sys.argv[1] == '--help':
        usage()
    signal.signal(signal.SIGINT, sig_handler)
    mpd_set_my_id(myid='mpdkilljob')
    mpdid = ''
    if sys.argv[1] == '-a':
        jobalias = sys.argv[2]
        jobnum = '0'
    else:
        jobalias = ''
        jobid = sys.argv[1]
        sjobid = jobid.split('@')
        jobnum = sjobid[0]
        if len(sjobid) > 1:
            mpdid = sjobid[1]

    parmdb = MPDParmDB(orderedSources=['cmdline','xml','env','rcfile','thispgm'])
    parmsToOverride = {
                        'MPD_USE_ROOT_MPD'            :  0,
                        'MPD_SECRETWORD'              :  '',
                      }
    for (k,v) in parmsToOverride.items():
        parmdb[('thispgm',k)] = v
    parmdb.get_parms_from_env(parmsToOverride)
    parmdb.get_parms_from_rcfile(parmsToOverride)
    if (hasattr(os,'getuid')  and  os.getuid() == 0)  or  parmdb['MPD_USE_ROOT_MPD']:
        fullDirName = os.path.abspath(os.path.split(sys.argv[0])[0])  # normalize
        mpdroot = os.path.join(fullDirName,'mpdroot')
        conSock = MPDConClientSock(mpdroot=mpdroot,secretword=parmdb['MPD_SECRETWORD'])
    else:
        conSock = MPDConClientSock(secretword=parmdb['MPD_SECRETWORD'])

    msgToSend = { 'cmd':'mpdkilljob', 'jobnum' : jobnum, 'mpdid' : mpdid,
                  'jobalias' : jobalias, 'username' : mpd_get_my_username() }
    conSock.send_dict_msg(msgToSend)
    msg = conSock.recv_dict_msg(timeout=5.0)
    if not msg:
        mpd_print(1,'no msg recvd from mpd before timeout')
        sys.exit(-1)
    if msg['cmd'] != 'mpdkilljob_ack':
        if msg['cmd'] == 'already_have_a_console':
            print 'mpd already has a console (e.g. for long ringtest); try later'
        else:
            print 'unexpected message from mpd: %s' % (msg)
        sys.exit(-1)
    if not msg['handled']:
        print 'job not found'
        sys.exit(-1)
    conSock.close()
def mpdboot():
    global myHost, fullDirName, rshCmd, user, mpdCmd, debug, verbose
    myHost = gethostname()
    mpd_set_my_id('mpdboot_%s' % (myHost) )
    fullDirName  = path.abspath(path.split(argv[0])[0])
    rshCmd = 'ssh'
    user = mpd_get_my_username()
    mpdCmd = path.join(fullDirName,'mpd.py')
    hostsFilename = 'mpd.hosts'
    totalnumToStart = 1    # may get chgd below
    debug = 0
    verbose = 0
    localConArg  = ''
    remoteConArg = ''
    oneMPDPerHost = 1
    myNcpus = 1
    myIfhn = ''
    chkupIndicator = 0  # 1 -> chk and start ; 2 -> just chk
    maxUnderOneRoot = 4
    try:
        shell = path.split(environ['SHELL'])[-1]
    except:
        shell = 'csh'

    argidx = 1    # skip arg 0
    while argidx < len(argv):
        if   argv[argidx] == '-h' or argv[argidx] == '--help':
            usage()
        elif argv[argidx] == '-r':    # or --rsh=
            rshCmd = argv[argidx+1]
            argidx += 2
        elif argv[argidx].startswith('--rsh'):
            splitArg = argv[argidx].split('=')
            try:
                rshCmd = splitArg[1]
            except:
                print 'mpdboot: invalid argument:', argv[argidx]
                usage()
            argidx += 1
        elif argv[argidx] == '-u':    # or --user=
            user = argv[argidx+1]
            argidx += 2
        elif argv[argidx].startswith('--user'):
            splitArg = argv[argidx].split('=')
            try:
                user = splitArg[1]
            except:
                print 'mpdboot: invalid argument:', argv[argidx]
                usage()
            argidx += 1
        elif argv[argidx] == '-m':    # or --mpd=
            mpdCmd = argv[argidx+1]
            argidx += 2
        elif argv[argidx].startswith('--mpd'):
            splitArg = argv[argidx].split('=')
            try:
                mpdCmd = splitArg[1]
            except:
                print 'mpdboot: invalid argument:', argv[argidx]
                usage()
            argidx += 1
        elif argv[argidx] == '-f':    # or --file=
            hostsFilename = argv[argidx+1]
            argidx += 2
        elif argv[argidx].startswith('--file'):
            splitArg = argv[argidx].split('=')
            try:
                hostsFilename = splitArg[1]
            except:
                print 'mpdboot: invalid argument:', argv[argidx]
                usage()
            argidx += 1
        elif argv[argidx].startswith('--ncpus'):
            splitArg = argv[argidx].split('=')
            try:
                myNcpus = int(splitArg[1])
            except:
                print 'mpdboot: invalid argument:', argv[argidx]
                usage()
            argidx += 1
        elif argv[argidx].startswith('--ifhn'):
            splitArg = argv[argidx].split('=')
            myIfhn = splitArg[1]
            myHost = splitArg[1]
            argidx += 1
        elif argv[argidx] == '-n':    # or --totalnum=
            totalnumToStart = int(argv[argidx+1])
            argidx += 2
        elif argv[argidx].startswith('--totalnum'):
            splitArg = argv[argidx].split('=')
            try:
                totalnumToStart = int(splitArg[1])
            except:
                print 'mpdboot: invalid argument:', argv[argidx]
                usage()
            argidx += 1
        elif argv[argidx].startswith('--maxbranch'):
            splitArg = argv[argidx].split('=')
            try:
                maxUnderOneRoot = int(splitArg[1])
            except:
                print 'mpdboot: invalid argument:', argv[argidx]
                usage()
            argidx += 1
        elif argv[argidx] == '-d' or argv[argidx] == '--debug':
            debug = 1
            argidx += 1
        elif argv[argidx] == '-s' or argv[argidx] == '--shell':
            shell = 'bourne'
            argidx += 1
        elif argv[argidx] == '-v' or argv[argidx] == '--verbose':
            verbose = 1
            argidx += 1
        elif argv[argidx] == '-c' or argv[argidx] == '--chkup':
            chkupIndicator = 1
            argidx += 1
        elif argv[argidx] == '--chkuponly':
            chkupIndicator = 2
            argidx += 1
        elif argv[argidx] == '-1':
            oneMPDPerHost = 0
            argidx += 1
        elif argv[argidx] == '--loccons':
            localConArg  = '-n'
            argidx += 1
        elif argv[argidx] == '--remcons':
            remoteConArg = '-n'
            argidx += 1
        else:
            print 'mpdboot: unrecognized argument:', argv[argidx]
            usage()

    # Fix for tt#662, make sure the config file is available to avoid some very
    # confusing error messages.  We don't actually need these values here.
    parmdb = MPDParmDB()
    parmdb.get_parms_from_rcfile(parmsToOverride={}, errIfMissingFile=1)

    if debug:
        print 'debug: starting'

    lines = []
    if totalnumToStart > 1:
        try:
            f = open(hostsFilename,'r')
            for line in f:
                lines.append(line)
        except:
            print 'unable to open (or read) hostsfile %s' % (hostsFilename)
            exit(-1)
    hostsAndInfo = [ {'host' : myHost, 'ncpus' : myNcpus, 'ifhn' : myIfhn} ]
    for line in lines:
        line = line.strip()
        if not line  or  line[0] == '#':
            continue
        splitLine = re.split(r'\s+',line)
        host = splitLine[0]
        ncpus = 1  # default
        if ':' in host:
            (host,ncpus) = host.split(':',1)
            ncpus = int(ncpus)
        ifhn = ''  # default
        for kv in splitLine[1:]:
            (k,v) = kv.split('=',1)
            if k == 'ifhn':
                ifhn = v
        hostsAndInfo.append( {'host' : host, 'ncpus' : ncpus, 'ifhn' : ifhn} )
    cachedIPs = {}
    if oneMPDPerHost  and  totalnumToStart > 1:
        oldHostsAndInfo = hostsAndInfo[:]
        hostsAndInfo = []
        for hostAndInfo in oldHostsAndInfo:
            oldhost = hostAndInfo['host']
            try:
                ips = gethostbyname_ex(oldhost)[2]    # may fail if invalid host
            except:
                print 'unable to obtain IP for host:', oldhost
                continue
            uips = {}    # unique ips
            for ip in ips:
                uips[ip] = 1
            keep = 1
            for ip in uips.keys():
                if cachedIPs.has_key(ip):
                    keep = 0
                    break
            if keep:
                hostsAndInfo.append(hostAndInfo)
                cachedIPs.update(uips)
    if len(hostsAndInfo) < totalnumToStart:    # one is local
        print 'totalnum=%d  numhosts=%d' % (totalnumToStart,len(hostsAndInfo))
        print 'there are not enough hosts on which to start all processes'
        exit(-1)
    if chkupIndicator:
        hostsToCheck = [ hai['host'] for hai in hostsAndInfo[1:totalnumToStart] ]
        (upList,dnList) = chkupdn(hostsToCheck)
        if dnList:
            print "these hosts are down; exiting"
            print dnList
            exit(-1)
        print "there are %d hosts up (counting local)" % (len(upList)+1)
        if chkupIndicator == 2:  # do the chkup and quit
            exit(0)

    try:
        # stop current (if any) mpds; ignore the output
        getoutput('%s/mpdallexit.py' % (fullDirName))
        if verbose or debug:
            print 'running mpdallexit on %s' % (myHost)
    except:
        pass

    if environ.has_key('MPD_TMPDIR'):
        tmpdir = environ['MPD_TMPDIR']
    else:
        tmpdir = ''
    if myIfhn:
        ifhn = '--ifhn=%s' % (myIfhn)
    else:
        ifhn = ''
    hostsAndInfo[0]['entry_host'] = ''
    hostsAndInfo[0]['entry_port'] = ''
    mpdArgs = '%s %s --ncpus=%d' % (localConArg,ifhn,myNcpus)
    if tmpdir:
        mpdArgs += ' --tmpdir=%s' % (tmpdir)
    (mpdPID,mpdFD) = launch_one_mpd(0,0,mpdArgs,hostsAndInfo)
    fd2idx = {mpdFD : 0}

    handle_mpd_output(mpdFD,fd2idx,hostsAndInfo)

    try:
        from os import sysconf
        maxfds = sysconf('SC_OPEN_MAX')
    except:
        maxfds = 1024
    maxAtOnce = min(128,maxfds-8)  # -8  for stdeout, etc. + a few more for padding

    hostsSeen = { myHost : 1 }
    fdsToSelect = []
    numStarted = 1  # local already going
    numStarting = 0
    numUnderCurrRoot = 0
    possRoots = []
    currRoot = 0
    idxToStart = 1  # local mpd already going
    while numStarted < totalnumToStart:
        if  numStarting < maxAtOnce  and  idxToStart < totalnumToStart:
            if numUnderCurrRoot < maxUnderOneRoot:
                entryHost = hostsAndInfo[currRoot]['host']
                entryPort = hostsAndInfo[currRoot]['list_port']
                hostsAndInfo[idxToStart]['entry_host'] = entryHost
                hostsAndInfo[idxToStart]['entry_port'] = entryPort
                if hostsSeen.has_key(hostsAndInfo[idxToStart]['host']):
                    remoteConArg = '-n'
                myNcpus = hostsAndInfo[idxToStart]['ncpus']
                ifhn = hostsAndInfo[idxToStart]['ifhn']
                if ifhn:
                    ifhn = '--ifhn=%s' % (ifhn)
                mpdArgs = '%s -h %s -p %s %s --ncpus=%d' % (remoteConArg,entryHost,entryPort,ifhn,myNcpus)
                if tmpdir:
                    mpdArgs += ' --tmpdir=%s' % (tmpdir)
                (mpdPID,mpdFD) = launch_one_mpd(idxToStart,currRoot,mpdArgs,hostsAndInfo)
                numStarting += 1
                numUnderCurrRoot += 1
                hostsAndInfo[idxToStart]['pid'] = mpdPID
                hostsSeen[hostsAndInfo[idxToStart]['host']] = 1
                fd2idx[mpdFD] = idxToStart
                fdsToSelect.append(mpdFD)
                idxToStart += 1
            else:
                if possRoots:
                    currRoot = possRoots.pop()
                    numUnderCurrRoot = 0
            selectTime = 0.01
        else:
            selectTime = 0.1
        try:
            (readyFDs,unused1,unused2) = select(fdsToSelect,[],[],selectTime)
        except error, errmsg:
            mpd_print(1,'mpdboot: select failed: errmsg=:%s:' % (errmsg) )
            exit(-1)
        for fd in readyFDs:
            handle_mpd_output(fd,fd2idx,hostsAndInfo)
            numStarted += 1
            numStarting -= 1
            possRoots.append(fd2idx[fd])
            fdsToSelect.remove(fd)
            fd.close()
Exemple #6
0
def mpdboot():
    global myHost, fullDirName, rshCmd, user, mpdCmd, debug, verbose
    myHost = gethostname()
    mpd_set_my_id('mpdboot_%s' % (myHost) )
    fullDirName  = path.abspath(path.split(argv[0])[0])
    rshCmd = 'ssh'
    user = mpd_get_my_username()
    mpdCmd = path.join(fullDirName,'mpd.py')
    hostsFilename = 'mpd.hosts'
    totalnumToStart = 0
    debug = 0
    verbose = 0
    chkupIndicator = 0  # 1 -> chk and start ; 2 -> just chk
    maxUnderOneRoot = 4
    try:
        shell = path.split(environ['SHELL'])[-1]
    except:
        shell = 'csh'
    if environ.has_key('MPD_TMPDIR'):
        tmpdir = environ['MPD_TMPDIR']
    else:
        tmpdir = ''

    argidx = 1    # skip arg 0
    while argidx < len(argv):
        if   argv[argidx] == '-h' or argv[argidx] == '--help':
            usage()
        elif argv[argidx] == '-r':    # or --rsh=
            rshCmd = argv[argidx+1]
            argidx += 2
        elif argv[argidx].startswith('--rsh'):
            splitArg = argv[argidx].split('=')
            try:
                rshCmd = splitArg[1]
            except:
                print 'mpdboot: invalid argument:', argv[argidx]
                usage()
            argidx += 1
        elif argv[argidx] == '-u':    # or --user=
            user = argv[argidx+1]
            argidx += 2
        elif argv[argidx].startswith('--user'):
            splitArg = argv[argidx].split('=')
            try:
                user = splitArg[1]
            except:
                print 'mpdboot: invalid argument:', argv[argidx]
                usage()
            argidx += 1
        elif argv[argidx] == '-m':    # or --mpd=
            mpdCmd = argv[argidx+1]
            argidx += 2
        elif argv[argidx].startswith('--mpd'):
            splitArg = argv[argidx].split('=')
            try:
                mpdCmd = splitArg[1]
            except:
                print 'mpdboot: invalid argument:', argv[argidx]
                usage()
            argidx += 1
        elif argv[argidx] == '-f':    # or --file=
            hostsFilename = argv[argidx+1]
            argidx += 2
        elif argv[argidx].startswith('--file'):
            splitArg = argv[argidx].split('=')
            try:
                hostsFilename = splitArg[1]
            except:
                print 'mpdboot: invalid argument:', argv[argidx]
                usage()
            argidx += 1
        elif argv[argidx] == '-n':    # or --totalnum=
            totalnumToStart = int(argv[argidx+1])
            argidx += 2
        elif argv[argidx].startswith('--totalnum'):
            splitArg = argv[argidx].split('=')
            try:
                totalnumToStart = int(splitArg[1])
            except:
                print 'mpdboot: invalid argument:', argv[argidx]
                usage()
            argidx += 1
        elif argv[argidx].startswith('--maxbranch'):
            splitArg = argv[argidx].split('=')
            try:
                maxUnderOneRoot = int(splitArg[1])
            except:
                print 'mpdboot: invalid argument:', argv[argidx]
                usage()
            argidx += 1
        elif argv[argidx] == '-d' or argv[argidx] == '--debug':
            debug = 1
            argidx += 1
        elif argv[argidx] == '-s' or argv[argidx] == '--shell':
            shell = 'bourne'
            argidx += 1
        elif argv[argidx] == '-v' or argv[argidx] == '--verbose':
            verbose = 1
            argidx += 1
        elif argv[argidx] == '-c' or argv[argidx] == '--chkup':
            chkupIndicator = 1
            argidx += 1
        elif argv[argidx] == '--chkuponly':
            chkupIndicator = 2
            argidx += 1
        else:
            print 'mpdboot: unrecognized argument:', argv[argidx]
            usage()
    if debug:
        print 'debug: starting'

    lines = []
    try:
        f = open(hostsFilename,'r')
        for line in f:
            if not line  or  line[0] == '#':
                continue
            lines.append(line)
    except:
        print 'unable to open (or read) hostsfile %s' % (hostsFilename)
        exit(-1)
    if totalnumToStart == 0:
        totalnumToStart = len(lines)
    numRead = 0
    hostsAndInfo = []
    for line in lines:
        line = line.strip()
        splitLine = re.split(r'\s+',line)
        host = splitLine[0]
        ncpus = 1  # default
        ifhn = ''  # default
        cons = ''  # default
        for kv in splitLine[1:]:
            (k,v) = kv.split('=',1)
            if k == 'ifhn':
                ifhn = v
            elif k == 'ncpus':
                ncpus = int(v)
            elif k == 'cons':
                cons = v
            else:
                print "unrecognized key:", k
                exit(-1)
        hostsAndInfo.append( {'host' : host, 'ifhn' : ifhn, 'ncpus' : ncpus, 'cons' : cons} )
        numRead += 1
        if numRead >= totalnumToStart:
            break
    if len(hostsAndInfo) < totalnumToStart:    # one is local
        print 'totalnum=%d  numhosts=%d' % (totalnumToStart,len(hostsAndInfo))
        print 'there are not enough hosts on which to start all processes'
        exit(-1)
    if chkupIndicator:
        hostsToCheck = [ hai['host'] for hai in hostsAndInfo ]
        (upList,dnList) = chkupdn(hostsToCheck)
        if dnList:
            print "these hosts are down; exiting"
            print dnList
            exit(-1)
        print "there are %d hosts up" % (len(upList))
        if chkupIndicator == 2:  # do the chkup and quit
            exit(0)

    try:
        from os import sysconf
        maxfds = sysconf('SC_OPEN_MAX')
    except:
        maxfds = 1024
    maxAtOnce = min(128,maxfds-8)  # -8  for stdout, etc. + a few more for padding

    fd2idx = {}
    hostsSeen = {}
    fdsToSelect = []
    numStarted = 0
    numStarting = 0
    numUnderCurrRoot = 0
    possRoots = []
    currRoot = 0
    idxToStart = 0
    while numStarted < totalnumToStart:
        if  numStarting < maxAtOnce  and  idxToStart < totalnumToStart:
            if numUnderCurrRoot < maxUnderOneRoot:
                if idxToStart == 0:
                    entryHost = ''
                    entryPort = ''
                else:
                    entryHost = hostsAndInfo[currRoot]['host']
                    entryPort = hostsAndInfo[currRoot]['list_port']
                hostsAndInfo[idxToStart]['entry_host'] = entryHost
                hostsAndInfo[idxToStart]['entry_port'] = entryPort
                if entryHost:
                    entryHost = '-h ' + entryHost
                    entryPort = '-p ' + str(entryPort)
                ifhn = hostsAndInfo[idxToStart]['ifhn']
                ncpus = hostsAndInfo[idxToStart]['ncpus']
                cons = hostsAndInfo[idxToStart]['cons']
                if ifhn:
                    ifhn = '--ifhn=%s' % (ifhn)
                if ncpus:
                    ncpus = '--ncpus=%s' % (ncpus)
                if cons == 'n':
                    cons = '-n'
                mpdArgs = '%s %s %s %s %s ' % (cons,entryHost,entryPort,ifhn,ncpus)
                if tmpdir:
                    mpdArgs += ' --tmpdir=%s' % (tmpdir)
                (mpdPID,mpdFD) = launch_one_mpd(idxToStart,currRoot,mpdArgs,hostsAndInfo)
                hostsAndInfo[idxToStart]['pid'] = mpdPID
                hostsSeen[hostsAndInfo[idxToStart]['host']] = 1
                fd2idx[mpdFD] = idxToStart
                if idxToStart == 0:
                    handle_mpd_output(mpdFD,fd2idx,hostsAndInfo)
                    numStarted += 1
                else:
                    numUnderCurrRoot += 1
                    fdsToSelect.append(mpdFD)
                    numStarting += 1
                idxToStart += 1
            else:
                if possRoots:
                    currRoot = possRoots.pop()
                    numUnderCurrRoot = 0
            selectTime = 0.01
        else:
            selectTime = 0.1
        try:
            (readyFDs,unused1,unused2) = select(fdsToSelect,[],[],selectTime)
        except error, errmsg:
            mpd_print(1,'mpdboot: select failed: errmsg=:%s:' % (errmsg) )
            exit(-1)
        for fd in readyFDs:
            handle_mpd_output(fd,fd2idx,hostsAndInfo)
            numStarted += 1
            numStarting -= 1
            possRoots.append(fd2idx[fd])
            fdsToSelect.remove(fd)
            fd.close()
Exemple #7
0
def mpdboot():
    global myHost, fullDirName, rshCmd, user, mpdCmd, debug, verbose
    myHost = gethostname()
    mpd_set_my_id('mpdboot_%s' % (myHost))
    fullDirName = path.abspath(path.split(argv[0])[0])
    rshCmd = 'ssh'
    user = mpd_get_my_username()
    mpdCmd = path.join(fullDirName, 'mpd.py')
    hostsFilename = 'mpd.hosts'
    totalnumToStart = 0
    debug = 0
    verbose = 0
    chkupIndicator = 0  # 1 -> chk and start ; 2 -> just chk
    maxUnderOneRoot = 4
    try:
        shell = path.split(environ['SHELL'])[-1]
    except:
        shell = 'csh'
    if environ.has_key('MPD_TMPDIR'):
        tmpdir = environ['MPD_TMPDIR']
    else:
        tmpdir = ''

    argidx = 1  # skip arg 0
    while argidx < len(argv):
        if argv[argidx] == '-h' or argv[argidx] == '--help':
            usage()
        elif argv[argidx] == '-r':  # or --rsh=
            rshCmd = argv[argidx + 1]
            argidx += 2
        elif argv[argidx].startswith('--rsh'):
            splitArg = argv[argidx].split('=')
            try:
                rshCmd = splitArg[1]
            except:
                print 'mpdboot: invalid argument:', argv[argidx]
                usage()
            argidx += 1
        elif argv[argidx] == '-u':  # or --user=
            user = argv[argidx + 1]
            argidx += 2
        elif argv[argidx].startswith('--user'):
            splitArg = argv[argidx].split('=')
            try:
                user = splitArg[1]
            except:
                print 'mpdboot: invalid argument:', argv[argidx]
                usage()
            argidx += 1
        elif argv[argidx] == '-m':  # or --mpd=
            mpdCmd = argv[argidx + 1]
            argidx += 2
        elif argv[argidx].startswith('--mpd'):
            splitArg = argv[argidx].split('=')
            try:
                mpdCmd = splitArg[1]
            except:
                print 'mpdboot: invalid argument:', argv[argidx]
                usage()
            argidx += 1
        elif argv[argidx] == '-f':  # or --file=
            hostsFilename = argv[argidx + 1]
            argidx += 2
        elif argv[argidx].startswith('--file'):
            splitArg = argv[argidx].split('=')
            try:
                hostsFilename = splitArg[1]
            except:
                print 'mpdboot: invalid argument:', argv[argidx]
                usage()
            argidx += 1
        elif argv[argidx] == '-n':  # or --totalnum=
            totalnumToStart = int(argv[argidx + 1])
            argidx += 2
        elif argv[argidx].startswith('--totalnum'):
            splitArg = argv[argidx].split('=')
            try:
                totalnumToStart = int(splitArg[1])
            except:
                print 'mpdboot: invalid argument:', argv[argidx]
                usage()
            argidx += 1
        elif argv[argidx].startswith('--maxbranch'):
            splitArg = argv[argidx].split('=')
            try:
                maxUnderOneRoot = int(splitArg[1])
            except:
                print 'mpdboot: invalid argument:', argv[argidx]
                usage()
            argidx += 1
        elif argv[argidx] == '-d' or argv[argidx] == '--debug':
            debug = 1
            argidx += 1
        elif argv[argidx] == '-s' or argv[argidx] == '--shell':
            shell = 'bourne'
            argidx += 1
        elif argv[argidx] == '-v' or argv[argidx] == '--verbose':
            verbose = 1
            argidx += 1
        elif argv[argidx] == '-c' or argv[argidx] == '--chkup':
            chkupIndicator = 1
            argidx += 1
        elif argv[argidx] == '--chkuponly':
            chkupIndicator = 2
            argidx += 1
        else:
            print 'mpdboot: unrecognized argument:', argv[argidx]
            usage()
    if debug:
        print 'debug: starting'

    lines = []
    try:
        f = open(hostsFilename, 'r')
        for line in f:
            if not line or line[0] == '#':
                continue
            lines.append(line)
    except:
        print 'unable to open (or read) hostsfile %s' % (hostsFilename)
        exit(-1)
    if totalnumToStart == 0:
        totalnumToStart = len(lines)
    numRead = 0
    hostsAndInfo = []
    for line in lines:
        line = line.strip()
        splitLine = re.split(r'\s+', line)
        host = splitLine[0]
        ncpus = 1  # default
        ifhn = ''  # default
        cons = ''  # default
        for kv in splitLine[1:]:
            (k, v) = kv.split('=', 1)
            if k == 'ifhn':
                ifhn = v
            elif k == 'ncpus':
                ncpus = int(v)
            elif k == 'cons':
                cons = v
            else:
                print "unrecognized key:", k
                exit(-1)
        hostsAndInfo.append({
            'host': host,
            'ifhn': ifhn,
            'ncpus': ncpus,
            'cons': cons
        })
        numRead += 1
        if numRead >= totalnumToStart:
            break
    if len(hostsAndInfo) < totalnumToStart:  # one is local
        print 'totalnum=%d  numhosts=%d' % (totalnumToStart, len(hostsAndInfo))
        print 'there are not enough hosts on which to start all processes'
        exit(-1)
    if chkupIndicator:
        hostsToCheck = [hai['host'] for hai in hostsAndInfo]
        (upList, dnList) = chkupdn(hostsToCheck)
        if dnList:
            print "these hosts are down; exiting"
            print dnList
            exit(-1)
        print "there are %d hosts up" % (len(upList))
        if chkupIndicator == 2:  # do the chkup and quit
            exit(0)

    try:
        from os import sysconf
        maxfds = sysconf('SC_OPEN_MAX')
    except:
        maxfds = 1024
    maxAtOnce = min(128, maxfds -
                    8)  # -8  for stdout, etc. + a few more for padding

    fd2idx = {}
    hostsSeen = {}
    fdsToSelect = []
    numStarted = 0
    numStarting = 0
    numUnderCurrRoot = 0
    possRoots = []
    currRoot = 0
    idxToStart = 0
    while numStarted < totalnumToStart:
        if numStarting < maxAtOnce and idxToStart < totalnumToStart:
            if numUnderCurrRoot < maxUnderOneRoot:
                if idxToStart == 0:
                    entryHost = ''
                    entryPort = ''
                else:
                    entryHost = hostsAndInfo[currRoot]['host']
                    entryPort = hostsAndInfo[currRoot]['list_port']
                hostsAndInfo[idxToStart]['entry_host'] = entryHost
                hostsAndInfo[idxToStart]['entry_port'] = entryPort
                if entryHost:
                    entryHost = '-h ' + entryHost
                    entryPort = '-p ' + str(entryPort)
                ifhn = hostsAndInfo[idxToStart]['ifhn']
                ncpus = hostsAndInfo[idxToStart]['ncpus']
                cons = hostsAndInfo[idxToStart]['cons']
                if ifhn:
                    ifhn = '--ifhn=%s' % (ifhn)
                if ncpus:
                    ncpus = '--ncpus=%s' % (ncpus)
                if cons == 'n':
                    cons = '-n'
                mpdArgs = '%s %s %s %s %s ' % (cons, entryHost, entryPort,
                                               ifhn, ncpus)
                if tmpdir:
                    mpdArgs += ' --tmpdir=%s' % (tmpdir)
                (mpdPID, mpdFD) = launch_one_mpd(idxToStart, currRoot, mpdArgs,
                                                 hostsAndInfo)
                hostsAndInfo[idxToStart]['pid'] = mpdPID
                hostsSeen[hostsAndInfo[idxToStart]['host']] = 1
                fd2idx[mpdFD] = idxToStart
                if idxToStart == 0:
                    handle_mpd_output(mpdFD, fd2idx, hostsAndInfo)
                    numStarted += 1
                else:
                    numUnderCurrRoot += 1
                    fdsToSelect.append(mpdFD)
                    numStarting += 1
                idxToStart += 1
            else:
                if possRoots:
                    currRoot = possRoots.pop()
                    numUnderCurrRoot = 0
            selectTime = 0.01
        else:
            selectTime = 0.1
        try:
            (readyFDs, unused1, unused2) = select(fdsToSelect, [], [],
                                                  selectTime)
        except error, errmsg:
            mpd_print(1, 'mpdboot: select failed: errmsg=:%s:' % (errmsg))
            exit(-1)
        for fd in readyFDs:
            handle_mpd_output(fd, fd2idx, hostsAndInfo)
            numStarted += 1
            numStarting -= 1
            possRoots.append(fd2idx[fd])
            fdsToSelect.remove(fd)
            fd.close()
Exemple #8
0
def mpdkmpds():
    rshCmd = 'ssh'
    user = mpd_get_my_username()
    killCmd = 'pkill -9 -f mpd'  # perhaps '~/bin/kj mpd'  (in quotes)
    hostsFile = ''
    verbose = 0
    numFromHostsFile = 0  # chgd below
    try:
        (opts,
         args) = getopt(sys.argv[1:], 'hvf:r:u:c:k:n:',
                        ['help', 'verbose', 'file=', 'rsh=', 'user='******'kill='])
    except:
        print 'invalid arg(s) specified'
        usage()
    else:
        for opt in opts:
            if opt[0] == '-r' or opt[0] == '--rsh':
                rshCmd = opt[1]
            elif opt[0] == '-u' or opt[0] == '--user':
                user = opt[1]
            elif opt[0] == '-f' or opt[0] == '--file':
                hostsFile = opt[1]
            elif opt[0] == '-h' or opt[0] == '--help':
                usage()
            elif opt[0] == '-v' or opt[0] == '--verbose':
                verbose = 1
            elif opt[0] == '-n':
                numFromHostsFile = int(opt[1])
            elif opt[0] == '-k' or opt[0] == '--kill':
                killCmd = opt[1]
    if args:
        print 'invalid arg(s) specified: ' + ' '.join(args)
        usage()

    if os.environ.has_key('MPD_CON_EXT'):
        conExt = '_' + os.environ['MPD_CON_EXT']
    else:
        conExt = ''
    if rshCmd == 'ssh':
        xOpt = '-x'
    else:
        xOpt = ''

    try:
        localIP = socket.gethostbyname_ex(socket.gethostname())[2]
    except:
        localIP = 'unknownlocal'
    if hostsFile:
        try:
            f = open(hostsFile, 'r')
        except:
            print 'Not killing mpd up on remote hosts; file %s not found' % hostsFile
            sys.exit(0)
        hosts = f.readlines()
        if numFromHostsFile:
            hosts = hosts[0:numFromHostsFile]
        for host in hosts:
            host = host.strip()
            if host[0] != '#':
                try:
                    remoteIP = socket.gethostbyname_ex(host)[2]
                except:
                    remoteIP = 'unknownremote'
                if localIP == remoteIP:  # postpone local machine until last
                    hosts.append(host)
                    localIP = 0  # don't do it again
                    continue
                if killCmd:
                    cmd = "%s %s -n %s \"/bin/sh -c '%s' &\"" % (rshCmd, xOpt,
                                                                 host, killCmd)
                    if verbose:
                        print "cmd=:%s:" % (cmd)
                    os.system(cmd)
Exemple #9
0
def mpdboot():
    global myHost, fullDirName, topMPDBoot, user
    mpd_set_my_id('mpdboot_rank_notset')
    fullDirName = path.abspath(path.split(argv[0])[0])
    rshCmd = 'ssh'
    user = mpd_get_my_username()
    mpdCmd = path.join(fullDirName, 'mpd.py')
    mpdbootCmd = path.join(fullDirName, 'mpdboot.py')
    hostsFilename = 'mpd.hosts'
    totalNum = 1  # may get chgd below
    debug = 0
    verbosity = 0
    localConsoleArg = ''
    remoteConsoleArg = ''
    myConsoleVal = ''
    oneMPDPerHost = 1
    entryHost = ''
    entryPort = ''
    topMPDBoot = 1
    myHost = gethostname()
    myNcpus = 1
    myIfhn = ''
    try:
        shell = path.split(environ['SHELL'])[-1]
    except:
        shell = 'csh'

    argidx = 1  # skip arg 0
    while argidx < len(argv):
        if argv[argidx] == '-h' or argv[argidx] == '--help':
            usage()
        elif argv[argidx] == '-zentry':  # entry host and port
            if ':' not in argv[argidx + 1]:
                print 'invalid pair of entry host and entry port for -zentry option'
                usage()
            (entryHost, entryPort) = argv[argidx + 1].split(':')
            try:
                ip = gethostbyname_ex(entryHost)[2]  # may fail if invalid host
            except:
                print 'invalid entry host ', entryHost
                stdout.flush()
                usage()
            if not entryPort.isdigit():
                print 'invalid (nonumeric) entry port ', entryPort
                stdout.flush()
                usage()
            entryHost = entryHost
            entryPort = entryPort
            argidx += 2
        elif argv[argidx] == '-zrank':
            topMPDBoot = 0
            myBootRank = int(argv[argidx + 1])
            argidx += 2
        elif argv[argidx] == '-zhosts':
            zhosts = argv[argidx + 1]
            zhosts = zhosts.split(',')
            hostsAndInfo = []
            for zhost in zhosts:
                (host, ncpus, ifhn) = zhost.split(':')
                hostsAndInfo.append({
                    'host': host,
                    'ncpus': ncpus,
                    'ifhn': ifhn
                })
            argidx += 2
        elif argv[argidx] == '-r':  # or --rsh=
            rshCmd = argv[argidx + 1]
            argidx += 2
        elif argv[argidx].startswith('--rsh'):
            splitArg = argv[argidx].split('=')
            try:
                rshCmd = splitArg[1]
            except:
                print 'mpdboot: invalid argument:', argv[argidx]
                usage()
            argidx += 1
        elif argv[argidx] == '-u':  # or --user=
            user = argv[argidx + 1]
            argidx += 2
        elif argv[argidx].startswith('--user'):
            splitArg = argv[argidx].split('=')
            try:
                user = splitArg[1]
            except:
                print 'mpdboot: invalid argument:', argv[argidx]
                usage()
            argidx += 1
        elif argv[argidx] == '-m':  # or --mpd=
            mpdCmd = argv[argidx + 1]
            argidx += 2
        elif argv[argidx].startswith('--mpd'):
            splitArg = argv[argidx].split('=')
            try:
                mpdCmd = splitArg[1]
            except:
                print 'mpdboot: invalid argument:', argv[argidx]
                usage()
            argidx += 1
        elif argv[argidx] == '-f':  # or --file=
            hostsFilename = argv[argidx + 1]
            argidx += 2
        elif argv[argidx].startswith('--file'):
            splitArg = argv[argidx].split('=')
            try:
                hostsFilename = splitArg[1]
            except:
                print 'mpdboot: invalid argument:', argv[argidx]
                usage()
            argidx += 1
        elif argv[argidx].startswith('--ncpus'):
            splitArg = argv[argidx].split('=')
            try:
                myNcpus = splitArg[1]
            except:
                print 'mpdboot: invalid argument:', argv[argidx]
                usage()
            argidx += 1
        elif argv[argidx].startswith('--ifhn'):
            splitArg = argv[argidx].split('=')
            myIfhn = splitArg[1]
            myHost = splitArg[1]
            argidx += 1
        elif argv[argidx] == '-n':  # or --totalnum=
            totalNum = int(argv[argidx + 1])
            argidx += 2
        elif argv[argidx].startswith('--totalnum'):
            splitArg = argv[argidx].split('=')
            try:
                totalNum = int(splitArg[1])
            except:
                print 'mpdboot: invalid argument:', argv[argidx]
                usage()
            argidx += 1
        elif argv[argidx] == '-d' or argv[argidx] == '--debug':
            debug = 1
            argidx += 1
        elif argv[argidx] == '-s' or argv[argidx] == '--shell':
            shell = 'bourne'
            argidx += 1
        elif argv[argidx] == '-v' or argv[argidx] == '--verbose':
            verbosity = 1
            argidx += 1
        elif argv[argidx] == '-1':
            oneMPDPerHost = 0
            argidx += 1
        elif argv[argidx] == '--loccons':
            localConsoleArg = '--loccons'
            argidx += 1
        elif argv[argidx] == '--remcons':
            remoteConsoleArg = '--remcons'
            argidx += 1
        else:
            print 'mpdboot: unrecognized argument:', argv[argidx]
            usage()

    if topMPDBoot:
        lines = []
        if totalNum > 1:
            try:
                f = open(hostsFilename, 'r')
                for line in f:
                    lines.append(line)
            except:
                print 'unable to open (or read) hostsfile %s' % (hostsFilename)
                exit(-1)
        hostsAndInfo = [{'host': myHost, 'ncpus': myNcpus, 'ifhn': myIfhn}]
        for line in lines:
            line = line.strip()
            if not line or line[0] == '#':
                continue
            splitLine = re.split(r'\s+', line)
            host = splitLine[0]
            ncpus = 1  # default
            if ':' in host:
                (host, ncpus) = host.split(':', 1)
                ncpus = int(ncpus)
            ifhn = ''  # default
            for kv in splitLine[1:]:
                (k, v) = kv.split('=', 1)
                if k == 'ifhn':
                    ifhn = v
            hostsAndInfo.append({'host': host, 'ncpus': ncpus, 'ifhn': ifhn})
        if oneMPDPerHost and totalNum > 1:
            oldHosts = hostsAndInfo[:]
            hostsAndInfo = []
            for x in oldHosts:
                keep = 1
                for y in hostsAndInfo:
                    if mpd_same_ips(x['host'], y['host']):
                        keep = 0
                        break
                if keep:
                    hostsAndInfo.append(x)
        if len(hostsAndInfo) < totalNum:  # one is local
            print 'totalNum=%d  num hosts=%d' % (totalNum, len(hostsAndInfo))
            print 'there are not enough hosts on which to start all processes'
            exit(-1)
        myBootRank = 0
        if localConsoleArg:
            myConsoleVal = '-n'
    else:
        if remoteConsoleArg:
            myConsoleVal = '-n'
    anMPDalreadyHere = 0
    for i in range(myBootRank):
        if mpd_same_ips(hostsAndInfo[i]['host'],
                        myHost):  # if one before me on this host
            myConsoleVal = '-n'
            anMPDalreadyHere = 1
            break
    if not anMPDalreadyHere:
        try:
            system('%s/mpdallexit.py > /dev/null' %
                   (fullDirName))  # stop any current mpds
        except:
            pass

    mpd_set_my_id('mpdboot_%s_%d' % (myHost, myBootRank))
    if debug:
        mpd_print(1, 'starting')
    (parent, lchild,
     rchild) = mpd_get_ranks_in_binary_tree(myBootRank, totalNum)
    if debug:
        mpd_print(1, 'p=%d l=%d r=%d' % (parent, lchild, rchild))

    if myIfhn:
        ifhnVal = '--if %s' % (myIfhn)
    elif hostsAndInfo[myBootRank]['ifhn']:
        ifhnVal = '--if %s' % (hostsAndInfo[myBootRank]['ifhn'])
    else:
        ifhnVal = ''
    if entryHost:
        cmd = '%s %s -h %s -p %s -d -e --ncpus %s %s' % \
       (mpdCmd,myConsoleVal,entryHost,entryPort,myNcpus,ifhnVal)
    else:
        cmd = '%s %s -d -e --ncpus %s %s' % \
       (mpdCmd,myConsoleVal,myNcpus,ifhnVal)
    if verbosity:
        mpd_print(1, 'starting local mpd on %s' % (myHost))
    if debug:
        mpd_print(1, 'cmd to run local mpd = :%s:' % (cmd))

    if not access(mpdCmd, X_OK):
        err_exit('cannot access mpd cmd :%s:' % (mpdCmd))
    locMPD = Popen4(cmd, 0)
    locMPDFD = locMPD.fromchild
    locMPDPort = locMPDFD.readline().strip()
    if locMPDPort.isdigit():
        # can't do this until he's already in his ring
        locMPDSocket = mpd_get_inet_socket_and_connect(myHost, int(locMPDPort))
        if locMPDSocket:
            msgToSend = {
                'cmd': 'ping',
                'host': 'ping',
                'port': 0
            }  # dummy host & port
            mpd_send_one_msg(locMPDSocket, {
                'cmd': 'ping',
                'host': myHost,
                'port': 0
            })
            msg = mpd_recv_one_msg(locMPDSocket)  # RMB: WITH TIMEOUT ??
            if not msg or not msg.has_key('cmd') or msg['cmd'] != 'ping_ack':
                err_exit(
                    '%d: unable to ping local mpd; invalid msg from mpd :%s:' %
                    (myBootRank, msg))
            locMPDSocket.close()
        else:
            err_exit('failed to connect to mpd')
    else:
        err_exit('%d: invalid port from mpd %s' %
                 (myBootRank, str(locMPDPort)))

    if not entryHost:
        entryHost = myHost
        entryPort = locMPDPort

    if rshCmd == 'ssh':
        xOpt = '-x'
    else:
        xOpt = ''

    lfd = 0
    rfd = 0
    fdsToSelect = []
    if debug:
        debugArg = '-d'
    else:
        debugArg = ''
    if verbosity:
        verboseArg = '-v'
    else:
        verboseArg = ''
    if lchild >= 0:
        zhosts = [
            "%s:%s:%s" % (h['host'], h['ncpus'], h['ifhn'])
            for h in hostsAndInfo
        ]
        if hostsAndInfo[lchild]['ifhn']:
            ifhnVal = '--ifhn=%s' % (hostsAndInfo[lchild]['ifhn'])
        else:
            ifhnVal = ''
        cmd = "%s %s %s -n '%s --ncpus=%s %s -r %s -m %s -n %d %s %s %s -zentry %s:%s -zrank %s -zhosts %s </dev/null ' " % \
              (rshCmd, xOpt, hostsAndInfo[lchild]['host'], mpdbootCmd,
               hostsAndInfo[lchild]['ncpus'],ifhnVal,
        rshCmd, mpdCmd, totalNum, debugArg, verboseArg, remoteConsoleArg, entryHost,
        entryPort, lchild,
        ','.join(zhosts) )
        if verbosity:
            mpd_print(1, 'starting remote mpd on %s' % (hostsAndInfo[lchild]))
        if debug:
            mpd_print(1, 'cmd to run lchild boot = :%s:' % (cmd))
        lchildMPDBoot = Popen4(cmd, 0)
        lfd = lchildMPDBoot.fromchild
        fdsToSelect.append(lfd)
    if rchild >= 0:
        zhosts = [
            "%s:%s:%s" % (h['host'], h['ncpus'], h['ifhn'])
            for h in hostsAndInfo
        ]
        if hostsAndInfo[rchild]['ifhn']:
            ifhnVal = '--ifhn=%s' % (hostsAndInfo[rchild]['ifhn'])
        else:
            ifhnVal = ''
        cmd = "%s %s %s -n '%s --ncpus=%s %s -r %s -m %s -n %d %s %s %s -zentry %s:%s -zrank %s -zhosts %s </dev/null ' " % \
              (rshCmd, xOpt, hostsAndInfo[rchild]['host'], mpdbootCmd,
               hostsAndInfo[rchild]['ncpus'],ifhnVal,
        rshCmd, mpdCmd, totalNum, debugArg, verboseArg, remoteConsoleArg, entryHost,
        entryPort, rchild,
        ','.join(zhosts) )
        if verbosity:
            mpd_print(1, 'starting remote mpd on %s' % (hostsAndInfo[rchild]))
        if debug:
            mpd_print(1, 'cmd to run rchild boot = :%s:' % (cmd))
        rchildMPDBoot = Popen4(cmd, 0)
        rfd = rchildMPDBoot.fromchild
        fdsToSelect.append(rfd)

    lfd_first_line = 1
    rfd_first_line = 1
    while fdsToSelect:
        try:
            (readyFDs, unused1, unused2) = select(fdsToSelect, [], [], 0.1)
        except error, errmsg:
            mpd_raise('mpdboot: select failed: errmsg=:%s:' % (errmsg))
        if lfd and lfd in readyFDs:
            line = lfd.readline()
            if line:
                if line.find('RC=MPDBOOT_ERREXIT') >= 0:
                    err_exit('RC=MPDBOOT_ERREXIT')
                else:
                    if not verbosity and lfd_first_line:
                        lfd_first_line = 0
                        mpd_print(
                            1,
                            "error trying to start mpd(boot) at %d %s; output:"
                            % (lchild, hostsAndInfo[lchild]))
                    print '  ', line,
                    stdout.flush()
            else:
                lfd.close()
                fdsToSelect.remove(lfd)
        if rfd and rfd in readyFDs:
            line = rfd.readline()
            if line:
                if line.find('RC=MPDBOOT_ERREXIT') >= 0:
                    err_exit('RC=MPDBOOT_ERREXIT')
                else:
                    if not verbosity and rfd_first_line:
                        rfd_first_line = 0
                        mpd_print(
                            1,
                            "error trying to start mpd(boot) at %d %s; output:"
                            % (rchild, hostsAndInfo[rchild]))
                    print '  ', line,
                    stdout.flush()
            else:
                rfd.close()
                fdsToSelect.remove(rfd)
def mpdsigjob():
    import sys    # to get access to excepthook in next line
    sys.excepthook = mpd_uncaught_except_tb
    if len(argv) < 3  or  argv[1] == '-h'  or  argv[1] == '--help':
        usage()
    signal(SIGINT, sig_handler)
    mpd_set_my_id(myid='mpdsigjob')
    sigtype = argv[1]
    if sigtype.startswith('-'):
        sigtype = sigtype[1:]
    if sigtype.startswith('SIG'):
        sigtype = sigtype[3:]
    import signal as tmpsig  # just to get valid SIG's
    if sigtype.isdigit():
        if int(sigtype) > tmpsig.NSIG:
            print 'invalid signum: %s' % (sigtype)
            exit(-1)
    else:
	if not tmpsig.__dict__.has_key('SIG' + sigtype):
	    print 'invalid sig type: %s' % (sigtype)
	    exit(-1)
    jobalias = ''
    jobnum = ''
    mpdid = ''
    single_or_group = 'g'
    i = 2
    while i < len(argv):
        if argv[i] == '-a':
            if jobnum:      # should not have both alias and jobid
                print '** cannot specify both jobalias and jobid'
                usage()
            jobalias = argv[i+1]
            i += 1
            jobnum = '0'
        elif argv[i] == '-j':
            if jobalias:    # should not have both alias and jobid
                print '** cannot specify both jobalias and jobid'
                usage()
            jobid = argv[i+1]
            i += 1
            sjobid = jobid.split('@')
            jobnum = sjobid[0]
            if len(sjobid) > 1:
                mpdid = sjobid[1]
        elif argv[i] == '-s':
            single_or_group = 's'
        elif argv[i] == '-g':
            single_or_group = 'g'
        else:
            print '** unrecognized arg: %s' % (argv[i])
            usage()
        i += 1

    parmdb = MPDParmDB(orderedSources=['cmdline','xml','env','rcfile','thispgm'])
    parmsToOverride = {
                        'MPD_USE_ROOT_MPD'            :  0,
                        'MPD_SECRETWORD'              :  '',
                      }
    for (k,v) in parmsToOverride.items():
        parmdb[('thispgm',k)] = v
    parmdb.get_parms_from_env(parmsToOverride)
    parmdb.get_parms_from_rcfile(parmsToOverride)
    if getuid() == 0  or  parmdb['MPD_USE_ROOT_MPD']:
        fullDirName = path.abspath(path.split(argv[0])[0])  # normalize
        mpdroot = path.join(fullDirName,'mpdroot')
        conSock = MPDConClientSock(mpdroot=mpdroot,secretword=parmdb['MPD_SECRETWORD'])
    else:
        conSock = MPDConClientSock(secretword=parmdb['MPD_SECRETWORD'])

    msgToSend = {'cmd' : 'mpdsigjob', 'sigtype': sigtype, 'jobnum' : jobnum,
                 'mpdid' : mpdid, 'jobalias' : jobalias, 's_or_g' : single_or_group,
                 'username' : mpd_get_my_username() }
    conSock.send_dict_msg(msgToSend)
    msg = conSock.recv_dict_msg(timeout=5.0)
    if not msg:
        mpd_print(1,'no msg recvd from mpd before timeout')
    if msg['cmd'] != 'mpdsigjob_ack':
        if msg['cmd'] == 'already_have_a_console':
            mpd_print(1,'mpd already has a console (e.g. for long ringtest); try later')
        else:
            mpd_print(1,'unexpected message from mpd: %s' % (msg) )
        exit(-1)
    if not msg['handled']:
        print 'job not found'
        exit(-1)
    conSock.close()
Exemple #11
0
def mpdlistjobs():
    mpd_set_my_id('mpdlistjobs_')
    username = mpd_get_my_username()
    uname = ''
    jobid = ''
    sjobid = ''
    jobalias = ''
    sssPrintFormat = 0
    if len(argv) > 1:
        aidx = 1
        while aidx < len(argv):
            if argv[aidx] == '-h' or argv[aidx] == '--help':
                usage()
            if argv[aidx] == '-u':  # or --user=
                uname = argv[aidx + 1]
                aidx += 2
            elif argv[aidx].startswith('--user'):
                splitArg = argv[aidx].split('=')
                try:
                    uname = splitArg[1]
                except:
                    print 'mpdlistjobs: invalid argument:', argv[aidx]
                    usage()
                aidx += 1
            elif argv[aidx] == '-j':  # or --jobid=
                jobid = argv[aidx + 1]
                aidx += 2
                sjobid = jobid.split('@')  # jobnum and originating host
            elif argv[aidx].startswith('--jobid'):
                splitArg = argv[aidx].split('=')
                try:
                    jobid = splitArg[1]
                    sjobid = jobid.split('@')  # jobnum and originating host
                except:
                    print 'mpdlistjobs: invalid argument:', argv[aidx]
                    usage()
                aidx += 1
            elif argv[aidx] == '-a':  # or --alias=
                jobalias = argv[aidx + 1]
                aidx += 2
            elif argv[aidx].startswith('--alias'):
                splitArg = argv[aidx].split('=')
                try:
                    jobalias = splitArg[1]
                except:
                    print 'mpdlistjobs: invalid argument:', argv[aidx]
                    usage()
                aidx += 1
            elif argv[aidx] == '--sss':
                sssPrintFormat = 1
                aidx += 1
            else:
                print 'unrecognized arg: %s' % argv[aidx]
                exit(-1)
    if environ.has_key('UNIX_SOCKET'):
        conFD = int(environ['UNIX_SOCKET'])
        conSocket = fromfd(conFD, AF_UNIX, SOCK_STREAM)
        close(conFD)
    else:
        if environ.has_key('MPD_CON_EXT'):
            conExt = '_' + environ['MPD_CON_EXT']
        else:
            conExt = ''
        consoleName = '/tmp/mpd2.console_' + username + conExt
        conSocket = socket(AF_UNIX, SOCK_STREAM)  # note: UNIX socket
        try:
            conSocket.connect(consoleName)
        except Exception, errmsg:
            print 'mpdlistjobs: cannot connect to local mpd (%s); possible causes:' % consoleName
            print '    1. no mpd running on this host'
            print '    2. mpd is running but was started without a "console" (-n option)'
            print 'you can start an mpd with the "mpd" command; to get help, run:'
            print '    mpd -h'
            exit(-1)
        msgToSend = 'realusername=%s\n' % username
        mpd_send_one_line(conSocket, msgToSend)
def mpdkmpds():
    rshCmd    = 'ssh'
    user      = mpd_get_my_username()
    killCmd   = 'pkill -9 -f mpd'  # perhaps '~/bin/kj mpd'  (in quotes)
    hostsFile = ''
    verbose = 0
    numFromHostsFile = 0  # chgd below
    try:
	(opts, args) = getopt(sys.argv[1:], 'hvf:r:u:c:k:n:',
                              ['help', 'verbose', 'file=', 'rsh=', 'user='******'kill='])
    except:
        print 'invalid arg(s) specified'
	usage()
    else:
	for opt in opts:
	    if opt[0] == '-r' or opt[0] == '--rsh':
		rshCmd = opt[1]
	    elif opt[0] == '-u' or opt[0] == '--user':
		user   = opt[1]
	    elif opt[0] == '-f' or opt[0] == '--file':
		hostsFile = opt[1]
	    elif opt[0] == '-h' or opt[0] == '--help':
		usage()
	    elif opt[0] == '-v' or opt[0] == '--verbose':
		verbose = 1
	    elif opt[0] == '-n':
		numFromHostsFile = int(opt[1])
	    elif opt[0] == '-k' or opt[0] == '--kill':
		killCmd = opt[1]
    if args:
        print 'invalid arg(s) specified: ' + ' '.join(args)
	usage()

    if os.environ.has_key('MPD_CON_EXT'):
        conExt = '_' + os.environ['MPD_CON_EXT']
    else:
        conExt = ''
    if rshCmd == 'ssh':
	xOpt = '-x'
    else:
	xOpt = ''

    try: localIP = socket.gethostbyname_ex(socket.gethostname())[2]
    except: localIP = 'unknownlocal'
    if hostsFile:
        try:
	    f = open(hostsFile,'r')
        except:
	    print 'Not killing mpd up on remote hosts; file %s not found' % hostsFile
	    sys.exit(0)
        hosts = f.readlines()
        if numFromHostsFile:
            hosts = hosts[0:numFromHostsFile]
        for host in hosts:
	    host = host.strip()
	    if host[0] != '#':
                try: remoteIP = socket.gethostbyname_ex(host)[2]
                except: remoteIP = 'unknownremote'
                if localIP == remoteIP:  # postpone local machine until last
                    hosts.append(host)
                    localIP = 0  # don't do it again
                    continue
                if killCmd:
	            cmd = "%s %s -n %s \"/bin/sh -c '%s' &\"" % (rshCmd, xOpt, host, killCmd)
                    if verbose:
	                print "cmd=:%s:" % (cmd)
	            os.system(cmd)
def mpdsigjob():
    import sys  # to get access to excepthook in next line
    sys.excepthook = mpd_uncaught_except_tb
    if len(argv) < 3 or argv[1] == '-h' or argv[1] == '--help':
        usage()
    signal(SIGINT, sig_handler)
    mpd_set_my_id(myid='mpdsigjob')
    sigtype = argv[1]
    if sigtype.startswith('-'):
        sigtype = sigtype[1:]
    if sigtype.startswith('SIG'):
        sigtype = sigtype[3:]
    import signal as tmpsig  # just to get valid SIG's
    if sigtype.isdigit():
        if int(sigtype) > tmpsig.NSIG:
            print 'invalid signum: %s' % (sigtype)
            exit(-1)
    else:
        if not tmpsig.__dict__.has_key('SIG' + sigtype):
            print 'invalid sig type: %s' % (sigtype)
            exit(-1)
    jobalias = ''
    jobnum = ''
    mpdid = ''
    single_or_group = 'g'
    i = 2
    while i < len(argv):
        if argv[i] == '-a':
            if jobnum:  # should not have both alias and jobid
                print '** cannot specify both jobalias and jobid'
                usage()
            jobalias = argv[i + 1]
            i += 1
            jobnum = '0'
        elif argv[i] == '-j':
            if jobalias:  # should not have both alias and jobid
                print '** cannot specify both jobalias and jobid'
                usage()
            jobid = argv[i + 1]
            i += 1
            sjobid = jobid.split('@')
            jobnum = sjobid[0]
            if len(sjobid) > 1:
                mpdid = sjobid[1]
        elif argv[i] == '-s':
            single_or_group = 's'
        elif argv[i] == '-g':
            single_or_group = 'g'
        else:
            print '** unrecognized arg: %s' % (argv[i])
            usage()
        i += 1

    parmdb = MPDParmDB(
        orderedSources=['cmdline', 'xml', 'env', 'rcfile', 'thispgm'])
    parmsToOverride = {
        'MPD_USE_ROOT_MPD': 0,
        'MPD_SECRETWORD': '',
    }
    for (k, v) in parmsToOverride.items():
        parmdb[('thispgm', k)] = v
    parmdb.get_parms_from_env(parmsToOverride)
    parmdb.get_parms_from_rcfile(parmsToOverride)
    if getuid() == 0 or parmdb['MPD_USE_ROOT_MPD']:
        fullDirName = path.abspath(path.split(argv[0])[0])  # normalize
        mpdroot = path.join(fullDirName, 'mpdroot')
        conSock = MPDConClientSock(mpdroot=mpdroot,
                                   secretword=parmdb['MPD_SECRETWORD'])
    else:
        conSock = MPDConClientSock(secretword=parmdb['MPD_SECRETWORD'])

    msgToSend = {
        'cmd': 'mpdsigjob',
        'sigtype': sigtype,
        'jobnum': jobnum,
        'mpdid': mpdid,
        'jobalias': jobalias,
        's_or_g': single_or_group,
        'username': mpd_get_my_username()
    }
    conSock.send_dict_msg(msgToSend)
    msg = conSock.recv_dict_msg(timeout=5.0)
    if not msg:
        mpd_print(1, 'no msg recvd from mpd before timeout')
    if msg['cmd'] != 'mpdsigjob_ack':
        if msg['cmd'] == 'already_have_a_console':
            mpd_print(
                1,
                'mpd already has a console (e.g. for long ringtest); try later'
            )
        else:
            mpd_print(1, 'unexpected message from mpd: %s' % (msg))
        exit(-1)
    if not msg['handled']:
        print 'job not found'
        exit(-1)
    conSock.close()
Exemple #14
0
def mpdrun():
    global nprocs, pgm, pgmArgs, mship, rship, argsFilename, delArgsFile, \
           try0Locally, lineLabels, jobAlias, mergingOutput, conSocket
    global stdinGoesToWho, myExitStatus, manSocket, jobid, username, cwd, totalview
    global outXmlDoc, outXmlEC, outXmlFile, linesPerRank, gdb, gdbAttachJobid
    global execs, users, cwds, paths, args, envvars, limits, hosts, hostList
    global singinitPID, singinitPORT, doingBNR, myHost, myIP, myIfhn

    mpd_set_my_id('mpdrun_' + ` getpid() `)
    pgm = ''
    mship = ''
    rship = ''
    nprocs = 0
    jobAlias = ''
    argsFilename = ''
    outExitCodesFilename = ''
    outXmlFile = ''
    outXmlDoc = ''
    outXmlEC = ''
    delArgsFile = 0
    try0Locally = 1
    lineLabels = 0
    stdinGoesToWho = '0'
    mergingOutput = 0
    hostList = []
    gdb = 0
    gdbAttachJobid = ''
    singinitPID = 0
    singinitPORT = 0
    doingBNR = 0
    totalview = 0
    myHost = gethostname()  # default; may be chgd by -if arg
    myIfhn = ''
    known_rlimit_types = [
        'core', 'cpu', 'fsize', 'data', 'stack', 'rss', 'nproc', 'nofile',
        'ofile', 'memlock', 'as', 'vmem'
    ]
    username = mpd_get_my_username()
    cwd = path.abspath(getcwd())
    recvTimeout = 20

    execs = {}
    users = {}
    cwds = {}
    paths = {}
    args = {}
    envvars = {}
    limits = {}
    hosts = {}

    get_args_from_cmdline(
    )  # verify args as much as possible before connecting to mpd

    (listenSocket, listenPort) = mpd_get_inet_listen_socket('', 0)
    signal(SIGALRM, sig_handler)
    if environ.has_key('MPDRUN_TIMEOUT'):
        jobTimeout = int(environ['MPDRUN_TIMEOUT'])
    elif environ.has_key('MPIEXEC_TIMEOUT'):
        jobTimeout = int(environ['MPIEXEC_TIMEOUT'])
    else:
        jobTimeout = 0
    if environ.has_key('MPIEXEC_BNR'):
        doingBNR = 1
    if environ.has_key('UNIX_SOCKET'):
        conFD = int(environ['UNIX_SOCKET'])
        conSocket = fromfd(conFD, AF_UNIX, SOCK_STREAM)
        close(conFD)
    else:
        if environ.has_key('MPD_CON_EXT'):
            conExt = '_' + environ['MPD_CON_EXT']
        else:
            conExt = ''
        consoleName = '/tmp/mpd2.console_' + username + conExt
        conSocket = socket(AF_UNIX, SOCK_STREAM)  # note: UNIX socket
        try:
            conSocket.connect(consoleName)
        except Exception, errmsg:
            print 'cannot connect to local mpd (%s); possible causes:' % consoleName
            print '    1. no mpd running on this host'
            print '    2. mpd is running but was started without a "console" (-n option)'
            print 'you can start an mpd with the "mpd" command; to get help, run:'
            print '    mpd -h'
            myExitStatus = -1  # used in main
            exit(myExitStatus)  # really forces jump back into main
            # mpd_raise('cannot connect to local mpd; errmsg: %s' % (str(errmsg)) )
        msgToSend = 'realusername=%s\n' % username
        mpd_send_one_line(conSocket, msgToSend)
        msgToSend = {'cmd': 'get_mpd_version'}
        mpd_send_one_msg(conSocket, msgToSend)
        msg = recv_one_msg_with_timeout(conSocket, recvTimeout)
        if not msg:
            mpd_raise('no msg recvd from mpd during version check')
        elif msg['cmd'] != 'mpd_version_response':
            mpd_raise('unexpected msg from mpd :%s:' % (msg))
        if msg['mpd_version'] != mpd_version:
            mpd_raise('mpd version %s does not match mine %s' %
                      (msg['mpd_version'], mpd_version))
def mpdcleanup():
    rshCmd    = 'ssh'
    user      = mpd_get_my_username()
    killCmd   = ''  # perhaps '~/bin/kj mpd'  (in quotes)
    cleanCmd  = 'rm -f '
    hostsFile = ''
    verbose = 0
    numFromHostsFile = 0  # chgd below
    try:
	(opts, args) = getopt(sys.argv[1:], 'hvf:r:u:c:k:n:',
                              ['help', 'verbose', 'file=', 'rsh=', 'user='******'clean=','kill='])
    except:
        print 'invalid arg(s) specified'
	usage()
    else:
	for opt in opts:
	    if opt[0] == '-r' or opt[0] == '--rsh':
		rshCmd = opt[1]
	    elif opt[0] == '-u' or opt[0] == '--user':
		user   = opt[1]
	    elif opt[0] == '-f' or opt[0] == '--file':
		hostsFile = opt[1]
	    elif opt[0] == '-h' or opt[0] == '--help':
		usage()
	    elif opt[0] == '-v' or opt[0] == '--verbose':
		verbose = 1
	    elif opt[0] == '-n':
		numFromHostsFile = int(opt[1])
	    elif opt[0] == '-c' or opt[0] == '--clean':
		cleanCmd = opt[1]
	    elif opt[0] == '-k' or opt[0] == '--kill':
		killCmd = opt[1]
    if args:
        print 'invalid arg(s) specified: ' + ' '.join(args)
	usage()

    if os.environ.has_key('MPD_CON_EXT'):
        conExt = '_' + os.environ['MPD_CON_EXT']
    else:
        conExt = ''
    if os.environ.has_key('MPD_TMPDIR'):
        tmpdir = os.environ['MPD_TMPDIR']
    else:
        tmpdir = '/tmp'
    cleanFile = tmpdir + '/mpd2.console_' + user + conExt
    if rshCmd == 'ssh':
	xOpt = '-x'
    else:
	xOpt = ''
    try: localIP = socket.gethostbyname_ex(socket.gethostname())[2]
    except: localIP = 'unknownlocal'

    if hostsFile:
        try:
	    f = open(hostsFile,'r')
        except:
	    print 'Not cleaning up on remote hosts; file %s not found' % hostsFile
	    sys.exit(0)
        hosts = f.readlines()
        if numFromHostsFile:
            hosts = hosts[0:numFromHostsFile]
        for host in hosts:
	    host = host.strip()
	    if host[0] != '#':
                try: remoteIP = socket.gethostbyname_ex(host)[2]
                except: remoteIP = 'unknownremote'
                if localIP == remoteIP:  # local machine handled last below loop
                    continue
	        cmd = '%s %s -n %s %s %s &' % (rshCmd, xOpt, host, cleanCmd, cleanFile)
                if verbose:
	            print 'cmd=:%s:' % (cmd)
	        os.system(cmd)
                if killCmd:
	            cmd = "%s %s -n %s \"/bin/sh -c '%s' &\"" % (rshCmd, xOpt, host, killCmd)
                    if verbose:
	                print "cmd=:%s:" % (cmd)
	            os.system(cmd)

    ## clean up local machine last
    cmd = '%s %s' % (cleanCmd,cleanFile)
    if verbose:
        print 'cmd=:%s:' % (cmd)
    os.system(cmd)
    if killCmd:
        if verbose:
            print 'cmd=:%s:' % (killCmd)
        os.system(killCmd)
Exemple #16
0
def mpdboot():
    global myHost, fullDirName, rshCmd, user, mpdCmd, debug, verbose
    myHost = gethostname()
    mpd_set_my_id('mpdboot_%s' % (myHost))
    fullDirName = path.abspath(path.split(argv[0])[0])
    rshCmd = 'ssh'
    user = mpd_get_my_username()
    mpdCmd = path.join(fullDirName, 'mpd.py')
    hostsFilename = 'mpd.hosts'
    totalnumToStart = 1  # may get chgd below
    debug = 0
    verbose = 0
    localConArg = ''
    remoteConArg = ''
    oneMPDPerHost = 1
    myNcpus = 1
    myIfhn = ''
    chkupIndicator = 0  # 1 -> chk and start ; 2 -> just chk
    maxUnderOneRoot = 4
    try:
        shell = path.split(environ['SHELL'])[-1]
    except:
        shell = 'csh'

    argidx = 1  # skip arg 0
    while argidx < len(argv):
        if argv[argidx] == '-h' or argv[argidx] == '--help':
            usage()
        elif argv[argidx] == '-r':  # or --rsh=
            rshCmd = argv[argidx + 1]
            argidx += 2
        elif argv[argidx].startswith('--rsh'):
            splitArg = argv[argidx].split('=')
            try:
                rshCmd = splitArg[1]
            except:
                print 'mpdboot: invalid argument:', argv[argidx]
                usage()
            argidx += 1
        elif argv[argidx] == '-u':  # or --user=
            user = argv[argidx + 1]
            argidx += 2
        elif argv[argidx].startswith('--user'):
            splitArg = argv[argidx].split('=')
            try:
                user = splitArg[1]
            except:
                print 'mpdboot: invalid argument:', argv[argidx]
                usage()
            argidx += 1
        elif argv[argidx] == '-m':  # or --mpd=
            mpdCmd = argv[argidx + 1]
            argidx += 2
        elif argv[argidx].startswith('--mpd'):
            splitArg = argv[argidx].split('=')
            try:
                mpdCmd = splitArg[1]
            except:
                print 'mpdboot: invalid argument:', argv[argidx]
                usage()
            argidx += 1
        elif argv[argidx] == '-f':  # or --file=
            hostsFilename = argv[argidx + 1]
            argidx += 2
        elif argv[argidx].startswith('--file'):
            splitArg = argv[argidx].split('=')
            try:
                hostsFilename = splitArg[1]
            except:
                print 'mpdboot: invalid argument:', argv[argidx]
                usage()
            argidx += 1
        elif argv[argidx].startswith('--ncpus'):
            splitArg = argv[argidx].split('=')
            try:
                myNcpus = int(splitArg[1])
            except:
                print 'mpdboot: invalid argument:', argv[argidx]
                usage()
            argidx += 1
        elif argv[argidx].startswith('--ifhn'):
            splitArg = argv[argidx].split('=')
            myIfhn = splitArg[1]
            myHost = splitArg[1]
            argidx += 1
        elif argv[argidx] == '-n':  # or --totalnum=
            totalnumToStart = int(argv[argidx + 1])
            argidx += 2
        elif argv[argidx].startswith('--totalnum'):
            splitArg = argv[argidx].split('=')
            try:
                totalnumToStart = int(splitArg[1])
            except:
                print 'mpdboot: invalid argument:', argv[argidx]
                usage()
            argidx += 1
        elif argv[argidx].startswith('--maxbranch'):
            splitArg = argv[argidx].split('=')
            try:
                maxUnderOneRoot = int(splitArg[1])
            except:
                print 'mpdboot: invalid argument:', argv[argidx]
                usage()
            argidx += 1
        elif argv[argidx] == '-d' or argv[argidx] == '--debug':
            debug = 1
            argidx += 1
        elif argv[argidx] == '-s' or argv[argidx] == '--shell':
            shell = 'bourne'
            argidx += 1
        elif argv[argidx] == '-v' or argv[argidx] == '--verbose':
            verbose = 1
            argidx += 1
        elif argv[argidx] == '-c' or argv[argidx] == '--chkup':
            chkupIndicator = 1
            argidx += 1
        elif argv[argidx] == '--chkuponly':
            chkupIndicator = 2
            argidx += 1
        elif argv[argidx] == '-1':
            oneMPDPerHost = 0
            argidx += 1
        elif argv[argidx] == '--loccons':
            localConArg = '-n'
            argidx += 1
        elif argv[argidx] == '--remcons':
            remoteConArg = '-n'
            argidx += 1
        else:
            print 'mpdboot: unrecognized argument:', argv[argidx]
            usage()
    if debug:
        print 'debug: starting'

    lines = []
    if totalnumToStart > 1:
        try:
            f = open(hostsFilename, 'r')
            for line in f:
                lines.append(line)
        except:
            print 'unable to open (or read) hostsfile %s' % (hostsFilename)
            exit(-1)
    hostsAndInfo = [{'host': myHost, 'ncpus': myNcpus, 'ifhn': myIfhn}]
    for line in lines:
        line = line.strip()
        if not line or line[0] == '#':
            continue
        splitLine = re.split(r'\s+', line)
        host = splitLine[0]
        ncpus = 1  # default
        if ':' in host:
            (host, ncpus) = host.split(':', 1)
            ncpus = int(ncpus)
        ifhn = ''  # default
        for kv in splitLine[1:]:
            (k, v) = kv.split('=', 1)
            if k == 'ifhn':
                ifhn = v
        hostsAndInfo.append({'host': host, 'ncpus': ncpus, 'ifhn': ifhn})
    if oneMPDPerHost and totalnumToStart > 1:
        oldHosts = hostsAndInfo[:]
        hostsAndInfo = []
        for x in oldHosts:
            keep = 1
            for y in hostsAndInfo:
                if mpd_same_ips(x['host'], y['host']):
                    keep = 0
                    break
            if keep:
                hostsAndInfo.append(x)
    if len(hostsAndInfo) < totalnumToStart:  # one is local
        print 'totalnum=%d  numhosts=%d' % (totalnumToStart, len(hostsAndInfo))
        print 'there are not enough hosts on which to start all processes'
        exit(-1)
    if chkupIndicator:
        hostsToCheck = [hai['host'] for hai in hostsAndInfo[1:totalnumToStart]]
        (upList, dnList) = chkupdn(hostsToCheck)
        if dnList:
            print "these hosts are down; exiting"
            print dnList
            exit(-1)
        print "there are %d hosts up (counting local)" % (len(upList) + 1)
        if chkupIndicator == 2:  # do the chkup and quit
            exit(0)

    try:
        # stop current (if any) mpds; ignore the output
        getoutput('%s/mpdallexit.py' % (fullDirName))
        if verbose or debug:
            print 'running mpdallexit on %s' % (myHost)
    except:
        pass

    if myIfhn:
        ifhn = '--ifhn=%s' % (myIfhn)
    else:
        ifhn = ''
    hostsAndInfo[0]['entry_host'] = ''
    hostsAndInfo[0]['entry_port'] = ''
    mpdArgs = '%s %s --ncpus=%d' % (localConArg, ifhn, myNcpus)
    (mpdPID, mpdFD) = launch_one_mpd(0, 0, mpdArgs, hostsAndInfo)
    fd2idx = {mpdFD: 0}

    handle_mpd_output(mpdFD, fd2idx, hostsAndInfo)

    try:
        from os import sysconf
        maxfds = sysconf('SC_OPEN_MAX')
    except:
        maxfds = 1024
    maxAtOnce = min(128, maxfds -
                    8)  # -8  for stdeout, etc. + a few more for padding

    hostsSeen = {myHost: 1}
    fdsToSelect = []
    numStarted = 1  # local already going
    numStarting = 0
    numUnderCurrRoot = 0
    possRoots = []
    currRoot = 0
    idxToStart = 1  # local mpd already going
    while numStarted < totalnumToStart:
        if numStarting < maxAtOnce and idxToStart < totalnumToStart:
            if numUnderCurrRoot < maxUnderOneRoot:
                entryHost = hostsAndInfo[currRoot]['host']
                entryPort = hostsAndInfo[currRoot]['list_port']
                hostsAndInfo[idxToStart]['entry_host'] = entryHost
                hostsAndInfo[idxToStart]['entry_port'] = entryPort
                if hostsSeen.has_key(hostsAndInfo[idxToStart]['host']):
                    remoteConArg = '-n'
                myNcpus = hostsAndInfo[idxToStart]['ncpus']
                ifhn = hostsAndInfo[idxToStart]['ifhn']
                if ifhn:
                    ifhn = '--ifhn=%s' % (ifhn)
                mpdArgs = '%s -h %s -p %s %s --ncpus=%d' % (
                    remoteConArg, entryHost, entryPort, ifhn, myNcpus)
                (mpdPID, mpdFD) = launch_one_mpd(idxToStart, currRoot, mpdArgs,
                                                 hostsAndInfo)
                numStarting += 1
                numUnderCurrRoot += 1
                hostsAndInfo[idxToStart]['pid'] = mpdPID
                hostsSeen[hostsAndInfo[idxToStart]['host']] = 1
                fd2idx[mpdFD] = idxToStart
                fdsToSelect.append(mpdFD)
                idxToStart += 1
            else:
                if possRoots:
                    currRoot = possRoots.pop()
                    numUnderCurrRoot = 0
            selectTime = 0.01
        else:
            selectTime = 0.1
        try:
            (readyFDs, unused1, unused2) = select(fdsToSelect, [], [],
                                                  selectTime)
        except error, errmsg:
            mpd_print(1, 'mpdboot: select failed: errmsg=:%s:' % (errmsg))
            exit(-1)
        for fd in readyFDs:
            handle_mpd_output(fd, fd2idx, hostsAndInfo)
            numStarted += 1
            numStarting -= 1
            possRoots.append(fd2idx[fd])
            fdsToSelect.remove(fd)
            fd.close()
def mpdcleanup():
    rshCmd    = 'ssh'
    user      = mpd_get_my_username()
    killCmd   = ''  # perhaps '~/bin/kj mpd'  (in quotes)
    cleanCmd  = 'rm -f '
    hostsFile = ''
    verbose = 0
    numFromHostsFile = 0  # chgd below
    try:
	(opts, args) = getopt(sys.argv[1:], 'hvf:r:u:c:k:n:',
                              ['help', 'verbose', 'file=', 'rsh=', 'user='******'clean=','kill='])
    except:
        print 'invalid arg(s) specified'
	usage()
    else:
	for opt in opts:
	    if opt[0] == '-r' or opt[0] == '--rsh':
		rshCmd = opt[1]
	    elif opt[0] == '-u' or opt[0] == '--user':
		user   = opt[1]
	    elif opt[0] == '-f' or opt[0] == '--file':
		hostsFile = opt[1]
	    elif opt[0] == '-h' or opt[0] == '--help':
		usage()
	    elif opt[0] == '-v' or opt[0] == '--verbose':
		verbose = 1
	    elif opt[0] == '-n':
		numFromHostsFile = int(opt[1])
	    elif opt[0] == '-c' or opt[0] == '--clean':
		cleanCmd = opt[1]
	    elif opt[0] == '-k' or opt[0] == '--kill':
		killCmd = opt[1]
    if args:
        print 'invalid arg(s) specified: ' + ' '.join(args)
	usage()

    if os.environ.has_key('MPD_CON_EXT'):
        conExt = '_' + os.environ['MPD_CON_EXT']
    else:
        conExt = ''
    if os.environ.has_key('MPD_TMPDIR'):
        tmpdir = os.environ['MPD_TMPDIR']
    else:
        tmpdir = '/tmp'
    cleanFile = tmpdir + '/mpd2.console_' + user + conExt
    if rshCmd == 'ssh':
	xOpt = '-x'
    else:
	xOpt = ''
    try: localIP = socket.gethostbyname_ex(socket.gethostname())[2]
    except: localIP = 'unknownlocal'

    if hostsFile:
        try:
	    f = open(hostsFile,'r')
        except:
	    print 'Not cleaning up on remote hosts; file %s not found' % hostsFile
	    sys.exit(0)
        hosts = f.readlines()
        if numFromHostsFile:
            hosts = hosts[0:numFromHostsFile]
        for host in hosts:
	    host = host.strip()
	    if host[0] != '#':
                try: remoteIP = socket.gethostbyname_ex(host)[2]
                except: remoteIP = 'unknownremote'
                if localIP == remoteIP:  # local machine handled last below loop
                    continue
	        cmd = '%s %s -n %s %s %s &' % (rshCmd, xOpt, host, cleanCmd, cleanFile)
                if verbose:
	            print 'cmd=:%s:' % (cmd)
	        os.system(cmd)
                if killCmd:
	            cmd = "%s %s -n %s \"/bin/sh -c '%s' &\"" % (rshCmd, xOpt, host, killCmd)
                    if verbose:
	                print "cmd=:%s:" % (cmd)
	            os.system(cmd)

    ## clean up local machine last
    cmd = '%s %s' % (cleanCmd,cleanFile)
    if verbose:
        print 'cmd=:%s:' % (cmd)
    os.system(cmd)
    if killCmd:
        if verbose:
            print 'cmd=:%s:' % (killCmd)
        os.system(killCmd)