Beispiel #1
0
def get_vals_for_attach():
    global nprocs, pgm, pgmArgs, mship, rship, argsFilename, delArgsFile, \
           try0Locally, lineLabels, jobAlias, mergingOutput, conSocket
    global stdinGoesToWho, myExitStatus, manSocket, jobid, username, cwd, totalview
    global outXmlDoc, outXmlEC, outXmlFile, linesPerRank, gdb, gdbAttachJobid
    global execs, users, cwds, paths, args, envvars, limits, hosts, hostList
    global singinitPID, singinitPORT, doingBNR, myHost, myIP

    sjobid = gdbAttachJobid.split('@')  # jobnum and originating host
    msgToSend = {'cmd': 'mpdlistjobs'}
    mpd_send_one_msg(conSocket, msgToSend)
    msg = recv_one_msg_with_timeout(conSocket, 5)
    if not msg:
        mpd_raise('no msg recvd from mpd before timeout')
    if msg['cmd'] != 'local_mpdid':  # get full id of local mpd for filters later
        mpd_raise(
            'did not recv local_mpdid msg from local mpd; instead, recvd: %s' %
            msg)
    else:
        if len(sjobid) == 1:
            sjobid.append(msg['id'])
    got_info = 0
    while 1:
        msg = mpd_recv_one_msg(conSocket)
        if not msg.has_key('cmd'):
            print 'mpdlistjobs: INVALID msg=:%s:' % (msg)
            exit(-1)
        if msg['cmd'] == 'mpdlistjobs_info':
            got_info = 1
            smjobid = msg['jobid'].split(
                '  ')  # jobnum, mpdid, and alias (if present)
            if sjobid[0] == smjobid[0] and sjobid[1] == smjobid[
                    1]:  # jobnum and mpdid
                rank = int(msg['rank'])
                users[(rank, rank)] = msg['username']
                hosts[(rank, rank)] = msg['host']
                execs[(rank, rank)] = msg['pgm']
                cwds[(rank, rank)] = cwd
                paths[(rank, rank)] = environ['PATH']
                args[(rank, rank)] = [msg['clipid']]
                envvars[(rank, rank)] = {}
                limits[(rank, rank)] = {}
        elif msg['cmd'] == 'mpdlistjobs_trailer':
            if not got_info:
                print 'no info on this jobid; probably invalid'
                exit(-1)
            break
        else:
            print 'invaild msg from mpd :%s:' % (msg)
            exit(-1)
    nprocs = len(execs.keys())  # all dicts are the same len here
Beispiel #2
0
def recv_one_msg_with_timeout(sock, timeout):
    oldTimeout = alarm(timeout)
    msg = mpd_recv_one_msg(sock)  # fails WITHOUT a msg if sigalrm occurs
    alarm(oldTimeout)
    return (msg)
Beispiel #3
0
        conSocket = socket(AF_UNIX, SOCK_STREAM)  # note: UNIX socket
        try:
            conSocket.connect(consoleName)
        except Exception, errmsg:
            print 'mpdringtest: cannot connect to local mpd (%s); possible causes:' % consoleName
            print '    1. no mpd running on this host'
            print '    2. mpd is running but was started without a "console" (-n option)'
            print 'you can start an mpd with the "mpd" command; to get help, run:'
            print '    mpd -h'
            exit(-1)
        msgToSend = 'realusername=%s\n' % username
        mpd_send_one_line(conSocket, msgToSend)
    msgToSend = {'cmd': 'mpdringtest', 'numloops': numLoops}
    starttime = time()
    mpd_send_one_msg(conSocket, msgToSend)
    msg = mpd_recv_one_msg(conSocket)
    etime = time() - starttime
    if not msg:
        print 'mpdringtest terminated early'
    elif msg['cmd'] != 'mpdringtest_done':
        if msg['cmd'] == 'already_have_a_console':
            print 'mpd already has a console (e.g. for long ringtest); try later'
        else:
            print 'unexpected message from mpd: %s' % (msg)
    else:
        print 'time for %d loops =' % numLoops, etime, 'seconds'


def sigint_handler(signum, frame):
    exit(-1)
Beispiel #4
0
def mpdboot():
    global myHost, fullDirName, topMPDBoot, user
    mpd_set_my_id('mpdboot_rank_notset')
    fullDirName = path.abspath(path.split(argv[0])[0])
    rshCmd = 'ssh'
    user = mpd_get_my_username()
    mpdCmd = path.join(fullDirName, 'mpd.py')
    mpdbootCmd = path.join(fullDirName, 'mpdboot.py')
    hostsFilename = 'mpd.hosts'
    totalNum = 1  # may get chgd below
    debug = 0
    verbosity = 0
    localConsoleArg = ''
    remoteConsoleArg = ''
    myConsoleVal = ''
    oneMPDPerHost = 1
    entryHost = ''
    entryPort = ''
    topMPDBoot = 1
    myHost = gethostname()
    myNcpus = 1
    myIfhn = ''
    try:
        shell = path.split(environ['SHELL'])[-1]
    except:
        shell = 'csh'

    argidx = 1  # skip arg 0
    while argidx < len(argv):
        if argv[argidx] == '-h' or argv[argidx] == '--help':
            usage()
        elif argv[argidx] == '-zentry':  # entry host and port
            if ':' not in argv[argidx + 1]:
                print 'invalid pair of entry host and entry port for -zentry option'
                usage()
            (entryHost, entryPort) = argv[argidx + 1].split(':')
            try:
                ip = gethostbyname_ex(entryHost)[2]  # may fail if invalid host
            except:
                print 'invalid entry host ', entryHost
                stdout.flush()
                usage()
            if not entryPort.isdigit():
                print 'invalid (nonumeric) entry port ', entryPort
                stdout.flush()
                usage()
            entryHost = entryHost
            entryPort = entryPort
            argidx += 2
        elif argv[argidx] == '-zrank':
            topMPDBoot = 0
            myBootRank = int(argv[argidx + 1])
            argidx += 2
        elif argv[argidx] == '-zhosts':
            zhosts = argv[argidx + 1]
            zhosts = zhosts.split(',')
            hostsAndInfo = []
            for zhost in zhosts:
                (host, ncpus, ifhn) = zhost.split(':')
                hostsAndInfo.append({
                    'host': host,
                    'ncpus': ncpus,
                    'ifhn': ifhn
                })
            argidx += 2
        elif argv[argidx] == '-r':  # or --rsh=
            rshCmd = argv[argidx + 1]
            argidx += 2
        elif argv[argidx].startswith('--rsh'):
            splitArg = argv[argidx].split('=')
            try:
                rshCmd = splitArg[1]
            except:
                print 'mpdboot: invalid argument:', argv[argidx]
                usage()
            argidx += 1
        elif argv[argidx] == '-u':  # or --user=
            user = argv[argidx + 1]
            argidx += 2
        elif argv[argidx].startswith('--user'):
            splitArg = argv[argidx].split('=')
            try:
                user = splitArg[1]
            except:
                print 'mpdboot: invalid argument:', argv[argidx]
                usage()
            argidx += 1
        elif argv[argidx] == '-m':  # or --mpd=
            mpdCmd = argv[argidx + 1]
            argidx += 2
        elif argv[argidx].startswith('--mpd'):
            splitArg = argv[argidx].split('=')
            try:
                mpdCmd = splitArg[1]
            except:
                print 'mpdboot: invalid argument:', argv[argidx]
                usage()
            argidx += 1
        elif argv[argidx] == '-f':  # or --file=
            hostsFilename = argv[argidx + 1]
            argidx += 2
        elif argv[argidx].startswith('--file'):
            splitArg = argv[argidx].split('=')
            try:
                hostsFilename = splitArg[1]
            except:
                print 'mpdboot: invalid argument:', argv[argidx]
                usage()
            argidx += 1
        elif argv[argidx].startswith('--ncpus'):
            splitArg = argv[argidx].split('=')
            try:
                myNcpus = splitArg[1]
            except:
                print 'mpdboot: invalid argument:', argv[argidx]
                usage()
            argidx += 1
        elif argv[argidx].startswith('--ifhn'):
            splitArg = argv[argidx].split('=')
            myIfhn = splitArg[1]
            myHost = splitArg[1]
            argidx += 1
        elif argv[argidx] == '-n':  # or --totalnum=
            totalNum = int(argv[argidx + 1])
            argidx += 2
        elif argv[argidx].startswith('--totalnum'):
            splitArg = argv[argidx].split('=')
            try:
                totalNum = int(splitArg[1])
            except:
                print 'mpdboot: invalid argument:', argv[argidx]
                usage()
            argidx += 1
        elif argv[argidx] == '-d' or argv[argidx] == '--debug':
            debug = 1
            argidx += 1
        elif argv[argidx] == '-s' or argv[argidx] == '--shell':
            shell = 'bourne'
            argidx += 1
        elif argv[argidx] == '-v' or argv[argidx] == '--verbose':
            verbosity = 1
            argidx += 1
        elif argv[argidx] == '-1':
            oneMPDPerHost = 0
            argidx += 1
        elif argv[argidx] == '--loccons':
            localConsoleArg = '--loccons'
            argidx += 1
        elif argv[argidx] == '--remcons':
            remoteConsoleArg = '--remcons'
            argidx += 1
        else:
            print 'mpdboot: unrecognized argument:', argv[argidx]
            usage()

    if topMPDBoot:
        lines = []
        if totalNum > 1:
            try:
                f = open(hostsFilename, 'r')
                for line in f:
                    lines.append(line)
            except:
                print 'unable to open (or read) hostsfile %s' % (hostsFilename)
                exit(-1)
        hostsAndInfo = [{'host': myHost, 'ncpus': myNcpus, 'ifhn': myIfhn}]
        for line in lines:
            line = line.strip()
            if not line or line[0] == '#':
                continue
            splitLine = re.split(r'\s+', line)
            host = splitLine[0]
            ncpus = 1  # default
            if ':' in host:
                (host, ncpus) = host.split(':', 1)
                ncpus = int(ncpus)
            ifhn = ''  # default
            for kv in splitLine[1:]:
                (k, v) = kv.split('=', 1)
                if k == 'ifhn':
                    ifhn = v
            hostsAndInfo.append({'host': host, 'ncpus': ncpus, 'ifhn': ifhn})
        if oneMPDPerHost and totalNum > 1:
            oldHosts = hostsAndInfo[:]
            hostsAndInfo = []
            for x in oldHosts:
                keep = 1
                for y in hostsAndInfo:
                    if mpd_same_ips(x['host'], y['host']):
                        keep = 0
                        break
                if keep:
                    hostsAndInfo.append(x)
        if len(hostsAndInfo) < totalNum:  # one is local
            print 'totalNum=%d  num hosts=%d' % (totalNum, len(hostsAndInfo))
            print 'there are not enough hosts on which to start all processes'
            exit(-1)
        myBootRank = 0
        if localConsoleArg:
            myConsoleVal = '-n'
    else:
        if remoteConsoleArg:
            myConsoleVal = '-n'
    anMPDalreadyHere = 0
    for i in range(myBootRank):
        if mpd_same_ips(hostsAndInfo[i]['host'],
                        myHost):  # if one before me on this host
            myConsoleVal = '-n'
            anMPDalreadyHere = 1
            break
    if not anMPDalreadyHere:
        try:
            system('%s/mpdallexit.py > /dev/null' %
                   (fullDirName))  # stop any current mpds
        except:
            pass

    mpd_set_my_id('mpdboot_%s_%d' % (myHost, myBootRank))
    if debug:
        mpd_print(1, 'starting')
    (parent, lchild,
     rchild) = mpd_get_ranks_in_binary_tree(myBootRank, totalNum)
    if debug:
        mpd_print(1, 'p=%d l=%d r=%d' % (parent, lchild, rchild))

    if myIfhn:
        ifhnVal = '--if %s' % (myIfhn)
    elif hostsAndInfo[myBootRank]['ifhn']:
        ifhnVal = '--if %s' % (hostsAndInfo[myBootRank]['ifhn'])
    else:
        ifhnVal = ''
    if entryHost:
        cmd = '%s %s -h %s -p %s -d -e --ncpus %s %s' % \
       (mpdCmd,myConsoleVal,entryHost,entryPort,myNcpus,ifhnVal)
    else:
        cmd = '%s %s -d -e --ncpus %s %s' % \
       (mpdCmd,myConsoleVal,myNcpus,ifhnVal)
    if verbosity:
        mpd_print(1, 'starting local mpd on %s' % (myHost))
    if debug:
        mpd_print(1, 'cmd to run local mpd = :%s:' % (cmd))

    if not access(mpdCmd, X_OK):
        err_exit('cannot access mpd cmd :%s:' % (mpdCmd))
    locMPD = Popen4(cmd, 0)
    locMPDFD = locMPD.fromchild
    locMPDPort = locMPDFD.readline().strip()
    if locMPDPort.isdigit():
        # can't do this until he's already in his ring
        locMPDSocket = mpd_get_inet_socket_and_connect(myHost, int(locMPDPort))
        if locMPDSocket:
            msgToSend = {
                'cmd': 'ping',
                'host': 'ping',
                'port': 0
            }  # dummy host & port
            mpd_send_one_msg(locMPDSocket, {
                'cmd': 'ping',
                'host': myHost,
                'port': 0
            })
            msg = mpd_recv_one_msg(locMPDSocket)  # RMB: WITH TIMEOUT ??
            if not msg or not msg.has_key('cmd') or msg['cmd'] != 'ping_ack':
                err_exit(
                    '%d: unable to ping local mpd; invalid msg from mpd :%s:' %
                    (myBootRank, msg))
            locMPDSocket.close()
        else:
            err_exit('failed to connect to mpd')
    else:
        err_exit('%d: invalid port from mpd %s' %
                 (myBootRank, str(locMPDPort)))

    if not entryHost:
        entryHost = myHost
        entryPort = locMPDPort

    if rshCmd == 'ssh':
        xOpt = '-x'
    else:
        xOpt = ''

    lfd = 0
    rfd = 0
    fdsToSelect = []
    if debug:
        debugArg = '-d'
    else:
        debugArg = ''
    if verbosity:
        verboseArg = '-v'
    else:
        verboseArg = ''
    if lchild >= 0:
        zhosts = [
            "%s:%s:%s" % (h['host'], h['ncpus'], h['ifhn'])
            for h in hostsAndInfo
        ]
        if hostsAndInfo[lchild]['ifhn']:
            ifhnVal = '--ifhn=%s' % (hostsAndInfo[lchild]['ifhn'])
        else:
            ifhnVal = ''
        cmd = "%s %s %s -n '%s --ncpus=%s %s -r %s -m %s -n %d %s %s %s -zentry %s:%s -zrank %s -zhosts %s </dev/null ' " % \
              (rshCmd, xOpt, hostsAndInfo[lchild]['host'], mpdbootCmd,
               hostsAndInfo[lchild]['ncpus'],ifhnVal,
        rshCmd, mpdCmd, totalNum, debugArg, verboseArg, remoteConsoleArg, entryHost,
        entryPort, lchild,
        ','.join(zhosts) )
        if verbosity:
            mpd_print(1, 'starting remote mpd on %s' % (hostsAndInfo[lchild]))
        if debug:
            mpd_print(1, 'cmd to run lchild boot = :%s:' % (cmd))
        lchildMPDBoot = Popen4(cmd, 0)
        lfd = lchildMPDBoot.fromchild
        fdsToSelect.append(lfd)
    if rchild >= 0:
        zhosts = [
            "%s:%s:%s" % (h['host'], h['ncpus'], h['ifhn'])
            for h in hostsAndInfo
        ]
        if hostsAndInfo[rchild]['ifhn']:
            ifhnVal = '--ifhn=%s' % (hostsAndInfo[rchild]['ifhn'])
        else:
            ifhnVal = ''
        cmd = "%s %s %s -n '%s --ncpus=%s %s -r %s -m %s -n %d %s %s %s -zentry %s:%s -zrank %s -zhosts %s </dev/null ' " % \
              (rshCmd, xOpt, hostsAndInfo[rchild]['host'], mpdbootCmd,
               hostsAndInfo[rchild]['ncpus'],ifhnVal,
        rshCmd, mpdCmd, totalNum, debugArg, verboseArg, remoteConsoleArg, entryHost,
        entryPort, rchild,
        ','.join(zhosts) )
        if verbosity:
            mpd_print(1, 'starting remote mpd on %s' % (hostsAndInfo[rchild]))
        if debug:
            mpd_print(1, 'cmd to run rchild boot = :%s:' % (cmd))
        rchildMPDBoot = Popen4(cmd, 0)
        rfd = rchildMPDBoot.fromchild
        fdsToSelect.append(rfd)

    lfd_first_line = 1
    rfd_first_line = 1
    while fdsToSelect:
        try:
            (readyFDs, unused1, unused2) = select(fdsToSelect, [], [], 0.1)
        except error, errmsg:
            mpd_raise('mpdboot: select failed: errmsg=:%s:' % (errmsg))
        if lfd and lfd in readyFDs:
            line = lfd.readline()
            if line:
                if line.find('RC=MPDBOOT_ERREXIT') >= 0:
                    err_exit('RC=MPDBOOT_ERREXIT')
                else:
                    if not verbosity and lfd_first_line:
                        lfd_first_line = 0
                        mpd_print(
                            1,
                            "error trying to start mpd(boot) at %d %s; output:"
                            % (lchild, hostsAndInfo[lchild]))
                    print '  ', line,
                    stdout.flush()
            else:
                lfd.close()
                fdsToSelect.remove(lfd)
        if rfd and rfd in readyFDs:
            line = rfd.readline()
            if line:
                if line.find('RC=MPDBOOT_ERREXIT') >= 0:
                    err_exit('RC=MPDBOOT_ERREXIT')
                else:
                    if not verbosity and rfd_first_line:
                        rfd_first_line = 0
                        mpd_print(
                            1,
                            "error trying to start mpd(boot) at %d %s; output:"
                            % (rchild, hostsAndInfo[rchild]))
                    print '  ', line,
                    stdout.flush()
            else:
                rfd.close()
                fdsToSelect.remove(rfd)
Beispiel #5
0
                            print '        %s' % (host)
            elif msg['reason'] == 'invalid_username':
                print 'mpdrun: invalid username %s at host %s' % \
                      (msg['username'],msg['host'])
            else:
                print 'mpdrun: job failed; reason=:%s:' % (msg['reason'])
            myExitStatus = -1  # used in main
            exit(myExitStatus)  # really forces jump back into main
        else:
            mpd_raise('unexpected message from mpd: %s' % (msg))
    conSocket.close()
    if jobTimeout:
        alarm(jobTimeout)

    (manSocket, addr) = listenSocket.accept()
    msg = mpd_recv_one_msg(manSocket)
    if (not msg or not msg.has_key('cmd') or msg['cmd'] != 'man_checking_in'):
        mpd_raise('mpdrun: from man, invalid msg=:%s:' % (msg))
    msgToSend = {
        'cmd': 'ring_ncpus',
        'ring_ncpus': currRingNCPUs,
        'ringsize': currRingSize
    }
    mpd_send_one_msg(manSocket, msgToSend)
    msg = mpd_recv_one_msg(manSocket)
    if (not msg or not msg.has_key('cmd')):
        mpd_raise('mpdrun: from man, invalid msg=:%s:' % (msg))
    if (msg['cmd'] == 'job_started'):
        jobid = msg['jobid']
        if outXmlEC:
            outXmlEC.setAttribute('jobid', jobid.strip())