def mpdcleanup(): rshCmd = 'ssh' user = mpd_get_my_username() cleanCmd = 'rm -f ' hostsFile = '' try: (opts, args) = getopt(argv[1:], 'hf:r:u:c:', ['help', 'file=', 'rsh=', 'user='******'clean=']) except: usage() mpd_raise('invalid arg(s) specified') else: for opt in opts: if opt[0] == '-r' or opt[0] == '--rsh': rshCmd = opt[1] elif opt[0] == '-u' or opt[0] == '--user': user = opt[1] elif opt[0] == '-f' or opt[0] == '--file': hostsFile = opt[1] elif opt[0] == '-h' or opt[0] == '--help': usage() elif opt[0] == '-c' or opt[0] == '--clean': cleanCmd = opt[1] if args: usage() mpd_raise('invalid arg(s) specified: ' + ' '.join(args)) if environ.has_key('MPD_CON_EXT'): conExt = '_' + environ['MPD_CON_EXT'] else: conExt = '' cleanFile = '/tmp/mpd2.console_' + user + conExt system('%s %s' % (cleanCmd, cleanFile)) if rshCmd == 'ssh': xOpt = '-x' else: xOpt = '' if hostsFile: try: f = open(hostsFile, 'r') except: print 'Not cleaning up on remote hosts; file %s not found' % hostsFile exit(0) hosts = f.readlines() for host in hosts: host = host.strip() if host[0] != '#': cmd = '%s %s -n %s %s %s &' % (rshCmd, xOpt, host, cleanCmd, cleanFile) # print 'cmd=:%s:' % (cmd) system(cmd)
def get_vals_for_attach(): global nprocs, pgm, pgmArgs, mship, rship, argsFilename, delArgsFile, \ try0Locally, lineLabels, jobAlias, mergingOutput, conSocket global stdinGoesToWho, myExitStatus, manSocket, jobid, username, cwd, totalview global outXmlDoc, outXmlEC, outXmlFile, linesPerRank, gdb, gdbAttachJobid global execs, users, cwds, paths, args, envvars, limits, hosts, hostList global singinitPID, singinitPORT, doingBNR, myHost, myIP sjobid = gdbAttachJobid.split('@') # jobnum and originating host msgToSend = {'cmd': 'mpdlistjobs'} mpd_send_one_msg(conSocket, msgToSend) msg = recv_one_msg_with_timeout(conSocket, 5) if not msg: mpd_raise('no msg recvd from mpd before timeout') if msg['cmd'] != 'local_mpdid': # get full id of local mpd for filters later mpd_raise( 'did not recv local_mpdid msg from local mpd; instead, recvd: %s' % msg) else: if len(sjobid) == 1: sjobid.append(msg['id']) got_info = 0 while 1: msg = mpd_recv_one_msg(conSocket) if not msg.has_key('cmd'): print 'mpdlistjobs: INVALID msg=:%s:' % (msg) exit(-1) if msg['cmd'] == 'mpdlistjobs_info': got_info = 1 smjobid = msg['jobid'].split( ' ') # jobnum, mpdid, and alias (if present) if sjobid[0] == smjobid[0] and sjobid[1] == smjobid[ 1]: # jobnum and mpdid rank = int(msg['rank']) users[(rank, rank)] = msg['username'] hosts[(rank, rank)] = msg['host'] execs[(rank, rank)] = msg['pgm'] cwds[(rank, rank)] = cwd paths[(rank, rank)] = environ['PATH'] args[(rank, rank)] = [msg['clipid']] envvars[(rank, rank)] = {} limits[(rank, rank)] = {} elif msg['cmd'] == 'mpdlistjobs_trailer': if not got_info: print 'no info on this jobid; probably invalid' exit(-1) break else: print 'invaild msg from mpd :%s:' % (msg) exit(-1) nprocs = len(execs.keys()) # all dicts are the same len here
print '** unrecognized arg: %s' % (argv[i]) usage() i += 1 msgToSend = { 'cmd': 'mpdsigjob', 'sigtype': sigtype, 'jobnum': jobnum, 'mpdid': mpdid, 'jobalias': jobalias, 's_or_g': single_or_group, 'username': username } mpd_send_one_msg(conSocket, msgToSend) msg = recv_one_msg_with_timeout(conSocket, 5) if not msg: mpd_raise('no msg recvd from mpd before timeout') if msg['cmd'] != 'mpdsigjob_ack': if msg['cmd'] == 'already_have_a_console': print 'mpd already has a console (e.g. for long ringtest); try later' else: print 'unexpected message from mpd: %s' % (msg) exit(-1) if not msg['handled']: print 'job not found' exit(-1) conSocket.close() def signal_handler(signum, frame): if signum == SIGALRM: pass
conSocket.connect(consoleName) except Exception, errmsg: print 'mpdexit: cannot connect to local mpd (%s); possible causes:' % consoleName print ' 1. no mpd running on this host' print ' 2. mpd is running but was started without a "console" (-n option)' print 'you can start an mpd with the "mpd" command; to get help, run:' print ' mpd -h' exit(-1) # mpd_raise('cannot connect to local mpd; errmsg: %s' % (str(errmsg)) ) msgToSend = 'realusername=%s\n' % username mpd_send_one_line(conSocket, msgToSend) msgToSend = {'cmd': 'mpdexit', 'mpdid': argv[1]} mpd_send_one_msg(conSocket, msgToSend) msg = recv_one_msg_with_timeout(conSocket, 5) if not msg: mpd_raise('no msg recvd from mpd before timeout') if not msg: mpd_raise('mpd unexpectedly closed connection') elif msg['cmd'] == 'already_have_a_console': mpd_raise( 'mpd already has a console (e.g. for long ringtest); try later') if not msg.has_key('cmd'): raise RuntimeError, 'mpdexit: INVALID msg=:%s:' % (msg) if msg['cmd'] != 'mpdexit_ack': print 'mpdexit failed; may have wrong mpdid' # print 'mpdexit done' def signal_handler(signum, frame): if signum == SIGALRM: pass
def mpdboot(): global myHost, fullDirName, topMPDBoot, user mpd_set_my_id('mpdboot_rank_notset') fullDirName = path.abspath(path.split(argv[0])[0]) rshCmd = 'ssh' user = mpd_get_my_username() mpdCmd = path.join(fullDirName, 'mpd.py') mpdbootCmd = path.join(fullDirName, 'mpdboot.py') hostsFilename = 'mpd.hosts' totalNum = 1 # may get chgd below debug = 0 verbosity = 0 localConsoleArg = '' remoteConsoleArg = '' myConsoleVal = '' oneMPDPerHost = 1 entryHost = '' entryPort = '' topMPDBoot = 1 myHost = gethostname() myNcpus = 1 myIfhn = '' try: shell = path.split(environ['SHELL'])[-1] except: shell = 'csh' argidx = 1 # skip arg 0 while argidx < len(argv): if argv[argidx] == '-h' or argv[argidx] == '--help': usage() elif argv[argidx] == '-zentry': # entry host and port if ':' not in argv[argidx + 1]: print 'invalid pair of entry host and entry port for -zentry option' usage() (entryHost, entryPort) = argv[argidx + 1].split(':') try: ip = gethostbyname_ex(entryHost)[2] # may fail if invalid host except: print 'invalid entry host ', entryHost stdout.flush() usage() if not entryPort.isdigit(): print 'invalid (nonumeric) entry port ', entryPort stdout.flush() usage() entryHost = entryHost entryPort = entryPort argidx += 2 elif argv[argidx] == '-zrank': topMPDBoot = 0 myBootRank = int(argv[argidx + 1]) argidx += 2 elif argv[argidx] == '-zhosts': zhosts = argv[argidx + 1] zhosts = zhosts.split(',') hostsAndInfo = [] for zhost in zhosts: (host, ncpus, ifhn) = zhost.split(':') hostsAndInfo.append({ 'host': host, 'ncpus': ncpus, 'ifhn': ifhn }) argidx += 2 elif argv[argidx] == '-r': # or --rsh= rshCmd = argv[argidx + 1] argidx += 2 elif argv[argidx].startswith('--rsh'): splitArg = argv[argidx].split('=') try: rshCmd = splitArg[1] except: print 'mpdboot: invalid argument:', argv[argidx] usage() argidx += 1 elif argv[argidx] == '-u': # or --user= user = argv[argidx + 1] argidx += 2 elif argv[argidx].startswith('--user'): splitArg = argv[argidx].split('=') try: user = splitArg[1] except: print 'mpdboot: invalid argument:', argv[argidx] usage() argidx += 1 elif argv[argidx] == '-m': # or --mpd= mpdCmd = argv[argidx + 1] argidx += 2 elif argv[argidx].startswith('--mpd'): splitArg = argv[argidx].split('=') try: mpdCmd = splitArg[1] except: print 'mpdboot: invalid argument:', argv[argidx] usage() argidx += 1 elif argv[argidx] == '-f': # or --file= hostsFilename = argv[argidx + 1] argidx += 2 elif argv[argidx].startswith('--file'): splitArg = argv[argidx].split('=') try: hostsFilename = splitArg[1] except: print 'mpdboot: invalid argument:', argv[argidx] usage() argidx += 1 elif argv[argidx].startswith('--ncpus'): splitArg = argv[argidx].split('=') try: myNcpus = splitArg[1] except: print 'mpdboot: invalid argument:', argv[argidx] usage() argidx += 1 elif argv[argidx].startswith('--ifhn'): splitArg = argv[argidx].split('=') myIfhn = splitArg[1] myHost = splitArg[1] argidx += 1 elif argv[argidx] == '-n': # or --totalnum= totalNum = int(argv[argidx + 1]) argidx += 2 elif argv[argidx].startswith('--totalnum'): splitArg = argv[argidx].split('=') try: totalNum = int(splitArg[1]) except: print 'mpdboot: invalid argument:', argv[argidx] usage() argidx += 1 elif argv[argidx] == '-d' or argv[argidx] == '--debug': debug = 1 argidx += 1 elif argv[argidx] == '-s' or argv[argidx] == '--shell': shell = 'bourne' argidx += 1 elif argv[argidx] == '-v' or argv[argidx] == '--verbose': verbosity = 1 argidx += 1 elif argv[argidx] == '-1': oneMPDPerHost = 0 argidx += 1 elif argv[argidx] == '--loccons': localConsoleArg = '--loccons' argidx += 1 elif argv[argidx] == '--remcons': remoteConsoleArg = '--remcons' argidx += 1 else: print 'mpdboot: unrecognized argument:', argv[argidx] usage() if topMPDBoot: lines = [] if totalNum > 1: try: f = open(hostsFilename, 'r') for line in f: lines.append(line) except: print 'unable to open (or read) hostsfile %s' % (hostsFilename) exit(-1) hostsAndInfo = [{'host': myHost, 'ncpus': myNcpus, 'ifhn': myIfhn}] for line in lines: line = line.strip() if not line or line[0] == '#': continue splitLine = re.split(r'\s+', line) host = splitLine[0] ncpus = 1 # default if ':' in host: (host, ncpus) = host.split(':', 1) ncpus = int(ncpus) ifhn = '' # default for kv in splitLine[1:]: (k, v) = kv.split('=', 1) if k == 'ifhn': ifhn = v hostsAndInfo.append({'host': host, 'ncpus': ncpus, 'ifhn': ifhn}) if oneMPDPerHost and totalNum > 1: oldHosts = hostsAndInfo[:] hostsAndInfo = [] for x in oldHosts: keep = 1 for y in hostsAndInfo: if mpd_same_ips(x['host'], y['host']): keep = 0 break if keep: hostsAndInfo.append(x) if len(hostsAndInfo) < totalNum: # one is local print 'totalNum=%d num hosts=%d' % (totalNum, len(hostsAndInfo)) print 'there are not enough hosts on which to start all processes' exit(-1) myBootRank = 0 if localConsoleArg: myConsoleVal = '-n' else: if remoteConsoleArg: myConsoleVal = '-n' anMPDalreadyHere = 0 for i in range(myBootRank): if mpd_same_ips(hostsAndInfo[i]['host'], myHost): # if one before me on this host myConsoleVal = '-n' anMPDalreadyHere = 1 break if not anMPDalreadyHere: try: system('%s/mpdallexit.py > /dev/null' % (fullDirName)) # stop any current mpds except: pass mpd_set_my_id('mpdboot_%s_%d' % (myHost, myBootRank)) if debug: mpd_print(1, 'starting') (parent, lchild, rchild) = mpd_get_ranks_in_binary_tree(myBootRank, totalNum) if debug: mpd_print(1, 'p=%d l=%d r=%d' % (parent, lchild, rchild)) if myIfhn: ifhnVal = '--if %s' % (myIfhn) elif hostsAndInfo[myBootRank]['ifhn']: ifhnVal = '--if %s' % (hostsAndInfo[myBootRank]['ifhn']) else: ifhnVal = '' if entryHost: cmd = '%s %s -h %s -p %s -d -e --ncpus %s %s' % \ (mpdCmd,myConsoleVal,entryHost,entryPort,myNcpus,ifhnVal) else: cmd = '%s %s -d -e --ncpus %s %s' % \ (mpdCmd,myConsoleVal,myNcpus,ifhnVal) if verbosity: mpd_print(1, 'starting local mpd on %s' % (myHost)) if debug: mpd_print(1, 'cmd to run local mpd = :%s:' % (cmd)) if not access(mpdCmd, X_OK): err_exit('cannot access mpd cmd :%s:' % (mpdCmd)) locMPD = Popen4(cmd, 0) locMPDFD = locMPD.fromchild locMPDPort = locMPDFD.readline().strip() if locMPDPort.isdigit(): # can't do this until he's already in his ring locMPDSocket = mpd_get_inet_socket_and_connect(myHost, int(locMPDPort)) if locMPDSocket: msgToSend = { 'cmd': 'ping', 'host': 'ping', 'port': 0 } # dummy host & port mpd_send_one_msg(locMPDSocket, { 'cmd': 'ping', 'host': myHost, 'port': 0 }) msg = mpd_recv_one_msg(locMPDSocket) # RMB: WITH TIMEOUT ?? if not msg or not msg.has_key('cmd') or msg['cmd'] != 'ping_ack': err_exit( '%d: unable to ping local mpd; invalid msg from mpd :%s:' % (myBootRank, msg)) locMPDSocket.close() else: err_exit('failed to connect to mpd') else: err_exit('%d: invalid port from mpd %s' % (myBootRank, str(locMPDPort))) if not entryHost: entryHost = myHost entryPort = locMPDPort if rshCmd == 'ssh': xOpt = '-x' else: xOpt = '' lfd = 0 rfd = 0 fdsToSelect = [] if debug: debugArg = '-d' else: debugArg = '' if verbosity: verboseArg = '-v' else: verboseArg = '' if lchild >= 0: zhosts = [ "%s:%s:%s" % (h['host'], h['ncpus'], h['ifhn']) for h in hostsAndInfo ] if hostsAndInfo[lchild]['ifhn']: ifhnVal = '--ifhn=%s' % (hostsAndInfo[lchild]['ifhn']) else: ifhnVal = '' cmd = "%s %s %s -n '%s --ncpus=%s %s -r %s -m %s -n %d %s %s %s -zentry %s:%s -zrank %s -zhosts %s </dev/null ' " % \ (rshCmd, xOpt, hostsAndInfo[lchild]['host'], mpdbootCmd, hostsAndInfo[lchild]['ncpus'],ifhnVal, rshCmd, mpdCmd, totalNum, debugArg, verboseArg, remoteConsoleArg, entryHost, entryPort, lchild, ','.join(zhosts) ) if verbosity: mpd_print(1, 'starting remote mpd on %s' % (hostsAndInfo[lchild])) if debug: mpd_print(1, 'cmd to run lchild boot = :%s:' % (cmd)) lchildMPDBoot = Popen4(cmd, 0) lfd = lchildMPDBoot.fromchild fdsToSelect.append(lfd) if rchild >= 0: zhosts = [ "%s:%s:%s" % (h['host'], h['ncpus'], h['ifhn']) for h in hostsAndInfo ] if hostsAndInfo[rchild]['ifhn']: ifhnVal = '--ifhn=%s' % (hostsAndInfo[rchild]['ifhn']) else: ifhnVal = '' cmd = "%s %s %s -n '%s --ncpus=%s %s -r %s -m %s -n %d %s %s %s -zentry %s:%s -zrank %s -zhosts %s </dev/null ' " % \ (rshCmd, xOpt, hostsAndInfo[rchild]['host'], mpdbootCmd, hostsAndInfo[rchild]['ncpus'],ifhnVal, rshCmd, mpdCmd, totalNum, debugArg, verboseArg, remoteConsoleArg, entryHost, entryPort, rchild, ','.join(zhosts) ) if verbosity: mpd_print(1, 'starting remote mpd on %s' % (hostsAndInfo[rchild])) if debug: mpd_print(1, 'cmd to run rchild boot = :%s:' % (cmd)) rchildMPDBoot = Popen4(cmd, 0) rfd = rchildMPDBoot.fromchild fdsToSelect.append(rfd) lfd_first_line = 1 rfd_first_line = 1 while fdsToSelect: try: (readyFDs, unused1, unused2) = select(fdsToSelect, [], [], 0.1) except error, errmsg: mpd_raise('mpdboot: select failed: errmsg=:%s:' % (errmsg)) if lfd and lfd in readyFDs: line = lfd.readline() if line: if line.find('RC=MPDBOOT_ERREXIT') >= 0: err_exit('RC=MPDBOOT_ERREXIT') else: if not verbosity and lfd_first_line: lfd_first_line = 0 mpd_print( 1, "error trying to start mpd(boot) at %d %s; output:" % (lchild, hostsAndInfo[lchild])) print ' ', line, stdout.flush() else: lfd.close() fdsToSelect.remove(lfd) if rfd and rfd in readyFDs: line = rfd.readline() if line: if line.find('RC=MPDBOOT_ERREXIT') >= 0: err_exit('RC=MPDBOOT_ERREXIT') else: if not verbosity and rfd_first_line: rfd_first_line = 0 mpd_print( 1, "error trying to start mpd(boot) at %d %s; output:" % (rchild, hostsAndInfo[rchild])) print ' ', line, stdout.flush() else: rfd.close() fdsToSelect.remove(rfd)
try: conSocket.connect(consoleName) except Exception, errmsg: print 'mpdlistjobs: cannot connect to local mpd (%s); possible causes:' % consoleName print ' 1. no mpd running on this host' print ' 2. mpd is running but was started without a "console" (-n option)' print 'you can start an mpd with the "mpd" command; to get help, run:' print ' mpd -h' exit(-1) msgToSend = 'realusername=%s\n' % username mpd_send_one_line(conSocket, msgToSend) msgToSend = {'cmd': 'mpdlistjobs'} mpd_send_one_msg(conSocket, msgToSend) msg = recv_one_msg_with_timeout(conSocket, 5) if not msg: mpd_raise('no msg recvd from mpd before timeout') if msg['cmd'] != 'local_mpdid': # get full id of local mpd for filters later mpd_raise( 'did not recv local_mpdid msg from local mpd; instead, recvd: %s' % msg) else: if len(sjobid) == 1: sjobid.append(msg['id']) while 1: msg = mpd_recv_one_msg(conSocket) if not msg.has_key('cmd'): raise RuntimeError, 'mpdlistjobs: INVALID msg=:%s:' % (msg) if msg['cmd'] == 'mpdlistjobs_info': smjobid = msg['jobid'].split( ' ') # jobnum, mpdid, and alias (if present) if len(smjobid) < 3:
def get_args_from_file(): global nprocs, pgm, pgmArgs, mship, rship, argsFilename, delArgsFile, \ try0Locally, lineLabels, jobAlias, mergingOutput, conSocket global stdinGoesToWho, myExitStatus, manSocket, jobid, username, cwd, totalview global outXmlDoc, outXmlEC, outXmlFile, linesPerRank, gdb, gdbAttachJobid global execs, users, cwds, paths, args, envvars, limits, hosts, hostList global singinitPID, singinitPORT, doingBNR, myHost, myIP try: argsFile = open(argsFilename, 'r') except: print 'could not open job specification file %s' % (argsFilename) myExitStatus = -1 # used in main exit(myExitStatus) # really forces jump back into main file_contents = argsFile.read() if delArgsFile: unlink(argsFilename) try: from xml.dom.minidom import parseString #import only if needed except: print 'need xml parser like xml.dom.minidom' myExitStatus = -1 # used in main exit(myExitStatus) # really forces jump back into main parsedArgs = parseString(file_contents) if parsedArgs.documentElement.tagName != 'create-process-group': print 'expecting create-process-group; got unrecognized doctype: %s' % \ (parsedArgs.documentElement.tagName) myExitStatus = -1 # used in main exit(myExitStatus) # really forces jump back into main createReq = parsedArgs.getElementsByTagName('create-process-group')[0] if createReq.hasAttribute('totalprocs'): nprocs = int(createReq.getAttribute('totalprocs')) else: print '** totalprocs not specified in %s' % argsFilename myExitStatus = -1 # used in main exit(myExitStatus) # really forces jump back into main if createReq.hasAttribute('dont_try_0_locally'): try0Locally = 0 if createReq.hasAttribute('output') and \ createReq.getAttribute('output') == 'label': lineLabels = 1 if createReq.hasAttribute('net_interface'): myHost = createReq.getAttribute('net_interface') myIfhn = myHost if createReq.hasAttribute('pgid'): # our jobalias jobAlias = createReq.getAttribute('pgid') if createReq.hasAttribute('stdin_goes_to_who'): stdinGoesToWho = createReq.getAttribute('stdin_goes_to_who') if createReq.hasAttribute('doing_bnr'): doingBNR = int(createReq.getAttribute('doing_bnr')) if createReq.hasAttribute('gdb'): gdb = int(createReq.getAttribute('gdb')) if gdb: mergingOutput = 1 # implied lineLabels = 1 # implied stdinGoesToWho = 'all' # chgd to 0 - nprocs-1 when nprocs avail if createReq.hasAttribute('tv'): totalview = int(createReq.getAttribute('tv')) nextHost = 0 hostSpec = createReq.getElementsByTagName('host-spec') if hostSpec: for node in hostSpec[0].childNodes: node = node.data.strip() hostnames = findall(r'\S+', node) for hostname in hostnames: if hostname: # some may be the empty string try: ipaddr = gethostbyname_ex(hostname)[2][0] except: print 'unable to determine IP info for host %s' % ( hostname) myExitStatus = -1 # used in main exit(myExitStatus) # really forces jump back into main if ipaddr.startswith('127.0.0'): hostList.append(myHost) else: hostList.append(ipaddr) if hostSpec and hostSpec[0].hasAttribute('check'): hostSpecMode = hostSpec[0].getAttribute('check') if hostSpecMode == 'yes': msgToSend = {'cmd': 'verify_hosts_in_ring', 'host_list': hostList} mpd_send_one_msg(conSocket, msgToSend) msg = recv_one_msg_with_timeout(conSocket, 5) if not msg: mpd_raise('no msg recvd from mpd mpd during chk hosts up') elif msg['cmd'] != 'verify_hosts_in_ring_response': mpd_raise('unexpected msg from mpd :%s:' % (msg)) if msg['host_list']: print 'These hosts are not in the mpd ring:' for host in msg['host_list']: if host[0].isdigit(): print ' %s' % (host), try: print ' (%s)' % (gethostbyaddr(host)[0]) except: print '' else: print ' %s' % (host) myExitStatus = -1 # used in main exit(myExitStatus) # really forces jump back into main covered = [0] * nprocs procSpec = createReq.getElementsByTagName('process-spec') if not procSpec: print 'No process-spec specified' usage() for p in procSpec: if p.hasAttribute('range'): therange = p.getAttribute('range') splitRange = therange.split('-') if len(splitRange) == 1: loRange = int(splitRange[0]) hiRange = loRange else: (loRange, hiRange) = (int(splitRange[0]), int(splitRange[1])) else: (loRange, hiRange) = (0, nprocs - 1) for i in xrange(loRange, hiRange + 1): if i >= nprocs: print '*** exiting; rank %d is greater than nprocs for args' % ( i) myExitStatus = -1 # used in main exit(myExitStatus) # really forces jump back into main if covered[i]: print '*** exiting; rank %d is doubly used in proc specs' % (i) myExitStatus = -1 # used in main exit(myExitStatus) # really forces jump back into main covered[i] = 1 if p.hasAttribute('exec'): execs[(loRange, hiRange)] = p.getAttribute('exec') else: print '*** exiting; range %d-%d has no exec' % (loRange, hiRange) myExitStatus = -1 # used in main exit(myExitStatus) # really forces jump back into main if p.hasAttribute('user'): tempuser = p.getAttribute('user') try: pwent = getpwnam(tempuser) except: pwent = None if not pwent: print tempuser, 'is an invalid username' myExitStatus = -1 # used in main exit(myExitStatus) # really forces jump back into main if tempuser == username or getuid() == 0: users[(loRange, hiRange)] = p.getAttribute('user') else: print tempuser, 'username does not match yours and you are not root' myExitStatus = -1 # used in main exit(myExitStatus) # really forces jump back into main else: users[(loRange, hiRange)] = username if p.hasAttribute('cwd'): cwds[(loRange, hiRange)] = p.getAttribute('cwd') else: cwds[(loRange, hiRange)] = cwd if p.hasAttribute('path'): paths[(loRange, hiRange)] = p.getAttribute('path') else: paths[(loRange, hiRange)] = environ['PATH'] if p.hasAttribute('host'): host = p.getAttribute('host') if host.startswith('_any_'): hosts[(loRange, hiRange)] = host else: try: hosts[(loRange, hiRange)] = gethostbyname_ex(host)[2][0] except: print 'unable to do find info for host %s' % (host) myExitStatus = -1 # used in main exit(myExitStatus) # really forces jump back into main else: if hostList: hosts[(loRange, hiRange)] = '_any_from_pool_' else: hosts[(loRange, hiRange)] = '_any_' argDict = {} argList = p.getElementsByTagName('arg') for argElem in argList: argDict[int( argElem.getAttribute('idx'))] = argElem.getAttribute('value') argVals = [0] * len(argList) for i in argDict.keys(): argVals[i - 1] = unquote(argDict[i]) args[(loRange, hiRange)] = argVals limitDict = {} limitList = p.getElementsByTagName('limit') for limitElem in limitList: type = limitElem.getAttribute('type') if type in known_rlimit_types: limitDict[type] = limitElem.getAttribute('value') else: print 'mpdrun: invalid type in limit: %s' % (type) myExitStatus = -1 # used in main exit(myExitStatus) # really forces jump back into main limits[(loRange, hiRange)] = limitDict envVals = {} envVarList = p.getElementsByTagName('env') for envVarElem in envVarList: envkey = envVarElem.getAttribute('name') envval = envVarElem.getAttribute('value') envVals[envkey] = envval envvars[(loRange, hiRange)] = envVals
def mpdrun(): global nprocs, pgm, pgmArgs, mship, rship, argsFilename, delArgsFile, \ try0Locally, lineLabels, jobAlias, mergingOutput, conSocket global stdinGoesToWho, myExitStatus, manSocket, jobid, username, cwd, totalview global outXmlDoc, outXmlEC, outXmlFile, linesPerRank, gdb, gdbAttachJobid global execs, users, cwds, paths, args, envvars, limits, hosts, hostList global singinitPID, singinitPORT, doingBNR, myHost, myIP, myIfhn mpd_set_my_id('mpdrun_' + ` getpid() `) pgm = '' mship = '' rship = '' nprocs = 0 jobAlias = '' argsFilename = '' outExitCodesFilename = '' outXmlFile = '' outXmlDoc = '' outXmlEC = '' delArgsFile = 0 try0Locally = 1 lineLabels = 0 stdinGoesToWho = '0' mergingOutput = 0 hostList = [] gdb = 0 gdbAttachJobid = '' singinitPID = 0 singinitPORT = 0 doingBNR = 0 totalview = 0 myHost = gethostname() # default; may be chgd by -if arg myIfhn = '' known_rlimit_types = [ 'core', 'cpu', 'fsize', 'data', 'stack', 'rss', 'nproc', 'nofile', 'ofile', 'memlock', 'as', 'vmem' ] username = mpd_get_my_username() cwd = path.abspath(getcwd()) recvTimeout = 20 execs = {} users = {} cwds = {} paths = {} args = {} envvars = {} limits = {} hosts = {} get_args_from_cmdline( ) # verify args as much as possible before connecting to mpd (listenSocket, listenPort) = mpd_get_inet_listen_socket('', 0) signal(SIGALRM, sig_handler) if environ.has_key('MPDRUN_TIMEOUT'): jobTimeout = int(environ['MPDRUN_TIMEOUT']) elif environ.has_key('MPIEXEC_TIMEOUT'): jobTimeout = int(environ['MPIEXEC_TIMEOUT']) else: jobTimeout = 0 if environ.has_key('MPIEXEC_BNR'): doingBNR = 1 if environ.has_key('UNIX_SOCKET'): conFD = int(environ['UNIX_SOCKET']) conSocket = fromfd(conFD, AF_UNIX, SOCK_STREAM) close(conFD) else: if environ.has_key('MPD_CON_EXT'): conExt = '_' + environ['MPD_CON_EXT'] else: conExt = '' consoleName = '/tmp/mpd2.console_' + username + conExt conSocket = socket(AF_UNIX, SOCK_STREAM) # note: UNIX socket try: conSocket.connect(consoleName) except Exception, errmsg: print 'cannot connect to local mpd (%s); possible causes:' % consoleName print ' 1. no mpd running on this host' print ' 2. mpd is running but was started without a "console" (-n option)' print 'you can start an mpd with the "mpd" command; to get help, run:' print ' mpd -h' myExitStatus = -1 # used in main exit(myExitStatus) # really forces jump back into main # mpd_raise('cannot connect to local mpd; errmsg: %s' % (str(errmsg)) ) msgToSend = 'realusername=%s\n' % username mpd_send_one_line(conSocket, msgToSend) msgToSend = {'cmd': 'get_mpd_version'} mpd_send_one_msg(conSocket, msgToSend) msg = recv_one_msg_with_timeout(conSocket, recvTimeout) if not msg: mpd_raise('no msg recvd from mpd during version check') elif msg['cmd'] != 'mpd_version_response': mpd_raise('unexpected msg from mpd :%s:' % (msg)) if msg['mpd_version'] != mpd_version: mpd_raise('mpd version %s does not match mine %s' % (msg['mpd_version'], mpd_version))
else: pass # args already defined by get_args_from_file if mship: (mshipSocket, mshipPort) = mpd_get_inet_listen_socket('', 0) mshipPid = fork() if mshipPid == 0: conSocket.close() environ['MPDCP_AM_MSHIP'] = '1' environ['MPDCP_MSHIP_PORT'] = str(mshipPort) environ['MPDCP_MSHIP_FD'] = str(mshipSocket.fileno()) environ['MPDCP_MSHIP_NPROCS'] = str(nprocs) try: execvpe(mship, [mship], environ) except Exception, errmsg: mpd_raise('execvpe failed for copgm %s; errmsg=:%s:' % (mship, errmsg)) _exit(0) # do NOT do cleanup mshipSocket.close() else: mshipPid = 0 # make sure to do this after nprocs has its value linesPerRank = {} # keep this a dict instead of a list for i in range(nprocs): linesPerRank[i] = [] msgToSend = { 'cmd': 'mpdrun', 'conhost': myHost, 'conip': myIP,
conSocket.connect(consoleName) except Exception, errmsg: print 'mpdtrace: cannot connect to local mpd (%s); possible causes:' % consoleName print ' 1. no mpd running on this host' print ' 2. mpd is running but was started without a "console" (-n option)' print 'you can start an mpd with the "mpd" command; to get help, run:' print ' mpd -h' exit(-1) msgToSend = 'realusername=%s\n' % username mpd_send_one_line(conSocket, msgToSend) msgToSend = {'cmd': 'mpdtrace'} mpd_send_one_msg(conSocket, msgToSend) while 1: msg = recv_one_msg_with_timeout(conSocket, 5) if not msg: mpd_raise('no msg recvd from mpd before timeout') elif msg['cmd'] == 'already_have_a_console': mpd_raise( 'mpd already has a console (e.g. for long ringtest); try later' ) if not msg.has_key('cmd'): raise RuntimeError, 'mpdtrace: INVALID msg=:%s:' % (msg) if msg['cmd'] == 'mpdtrace_info': if len(argv) > 1 and argv[1] == '-l': print msg['id'] else: print sub(r'[\._].*', '', msg['id']) # strip off domain and port # printLine = msg['id'] + ': ' # printLine = printLine + 'lhs=' + msg['lhs'] + ' ' # printLine = printLine + 'rhs=' + msg['rhs'] + ' '