def testLookupSim5(self): metaDir = self.initPDAQHome() cfg = DAQConfigParser.load("simpleConfig", metaDir + "/config") dataList = (('53494d550101', 'Nicholson_Baker', 1001, 1), ('53494d550120', 'SIM0020', 1001, 20), ('53494d550140', 'SIM0040', 1001, 40), ('53494d550160', 'SIM0060', 1001, 60), ('53494d550201', 'SIM0065', 1002, 1), ('53494d550220', 'SIM0084', 1002, 20), ('53494d550240', 'SIM0104', 1002, 40), ('53494d550260', 'SIM0124', 1002, 60), ('53494d550301', 'SIM0129', 1003, 1), ('53494d550320', 'SIM0148', 1003, 20), ('53494d550340', 'SIM0168', 1003, 40), ('53494d550360', 'SIM0188', 1003, 60), ('53494d550401', 'SIM0193', 1004, 1), ('53494d550420', 'SIM0212', 1004, 20), ('53494d550440', 'SIM0232', 1004, 40), ('53494d550460', 'SIM0252', 1004, 60), ('53494d550501', 'SIM0257', 1005, 1), ('53494d550520', 'SIM0276', 1005, 20), ('53494d550540', 'SIM0296', 1005, 40), ('53494d550560', 'SIM0316', 1005, 60)) self.lookup(cfg, dataList)
def testListsSpsIC40IT6(self): metaDir = self.initPDAQHome() cfg = DAQConfigParser.load("sps-IC40-IT6-AM-Revert-IceTop-V029", metaDir + "/config") expected = [ 'amandaTrigger', 'eventBuilder', 'globalTrigger', 'iceTopTrigger', 'inIceTrigger', 'secondaryBuilders', 'stringHub#0', 'stringHub#21', 'stringHub#29', 'stringHub#30', 'stringHub#38', 'stringHub#39', 'stringHub#40', 'stringHub#44', 'stringHub#45', 'stringHub#46', 'stringHub#47', 'stringHub#48', 'stringHub#49', 'stringHub#50', 'stringHub#52', 'stringHub#53', 'stringHub#54', 'stringHub#55', 'stringHub#56', 'stringHub#57', 'stringHub#58', 'stringHub#59', 'stringHub#60', 'stringHub#61', 'stringHub#62', 'stringHub#63', 'stringHub#64', 'stringHub#65', 'stringHub#66', 'stringHub#67', 'stringHub#68', 'stringHub#69', 'stringHub#70', 'stringHub#71', 'stringHub#72', 'stringHub#73', 'stringHub#74', 'stringHub#75', 'stringHub#76', 'stringHub#77', 'stringHub#78', 'stringHub#201', 'stringHub#202', 'stringHub#203', 'stringHub#206' ] comps = cfg.components() self.assertEqual( len(expected), len(comps), "Expected %d components (%s), not %d (%s)" % (len(expected), str(expected), len(comps), str(comps))) for c in comps: try: expected.index(c.fullName()) except: self.fail('Unexpected component "%s"' % c)
def testReplay(self): metaDir = self.initPDAQHome() cfg = DAQConfigParser.load("replay-ic22-it4", metaDir + "/config") expected = [ 'eventBuilder', 'globalTrigger', 'iceTopTrigger', 'inIceTrigger', 'replayHub#21', 'replayHub#29', 'replayHub#30', 'replayHub#38', 'replayHub#39', 'replayHub#40', 'replayHub#46', 'replayHub#47', 'replayHub#48', 'replayHub#49', 'replayHub#50', 'replayHub#56', 'replayHub#57', 'replayHub#58', 'replayHub#59', 'replayHub#65', 'replayHub#66', 'replayHub#67', 'replayHub#72', 'replayHub#73', 'replayHub#74', 'replayHub#78', 'replayHub#201', 'replayHub#202', 'replayHub#203', 'replayHub#204' ] comps = cfg.components() self.assertEqual( len(expected), len(comps), "Expected %d components (%s), not %d (%s)" % (len(expected), str(expected), len(comps), str(comps))) for c in comps: try: expected.index(c.fullName()) except: self.fail('Unexpected component "%s"' % c)
def __checkCluster(self, clusterName, cfgName, expNodes, spadeDir, logCopyDir): cfg = DAQConfigParser.load(cfgName, RunClusterTest.CONFIG_DIR) cluster = RunCluster(cfg, clusterName, RunClusterTest.CONFIG_DIR) self.assertEquals(cluster.configName(), cfgName, 'Expected config name %s, not %s' % (cfgName, cluster.configName())) for node in cluster.nodes(): for comp in node.components(): found = False for en in expNodes: if en.matches(node.hostName(), comp.name(), comp.id()): found = True en.markFound() break if not found: self.fail('Did not expect %s component %s' % (node.hostName(), str(comp))) for en in expNodes: if not en.isFound(): self.fail('Did not find expected component %s' % str(en)) hubList = cluster.getHubNodes() self.assertEqual(cluster.logDirForSpade(), spadeDir, 'SPADE log directory is "%s", not "%s"' % (cluster.logDirForSpade(), spadeDir)) self.assertEqual(cluster.logDirCopies(), logCopyDir, 'Log copy directory is "%s", not "%s"' % (cluster.logDirCopies(), logCopyDir))
def testClusterFile(self): cfg = DAQConfigParser.load("simpleConfig", RunClusterTest.CONFIG_DIR) cluster = RunCluster(cfg, "localhost", RunClusterTest.CONFIG_DIR) cluster.clearActiveConfig() cluster.writeCacheFile(False) cluster.writeCacheFile(True)
def testLookupSpsIC40IT6(self): metaDir = self.initPDAQHome() cfg = DAQConfigParser.load("sps-IC40-IT6-AM-Revert-IceTop-V029", metaDir + "/config") dataList = ( ('737d355af587', 'Bat', 21, 1), ('499ccc773077', 'Werewolf', 66, 6), ('efc9607742b9', 'Big_Two_Card', 78, 60), ('1e5b72775d19', 'AMANDA_SYNC_DOM', 0, 91), ('1d165fc478ca', 'AMANDA_TRIG_DOM', 0, 92), ) self.lookup(cfg, dataList)
def getClusterConfig(self): cdesc = self.__clusterDesc cfgDir = self.__runConfigDir try: return DAQConfigParser.getClusterConfiguration( None, useActiveConfig=True, clusterDesc=cdesc, configDir=cfgDir) except XMLFileNotFound: if cdesc is None: cdescStr = "" else: cdescStr = " for cluster \"%s\"" % cdesc raise CnCServerException("Cannot find cluster configuration" + " %s: %s" % (cdescStr, exc_string()))
def testDumpDOMs(self): metaDir = self.initPDAQHome() cfg = DAQConfigParser.load("sps-IC40-IT6-AM-Revert-IceTop-V029", metaDir + "/config") for d in cfg.getAllDOMs(): mbid = str(d) if len(mbid) != 12 or mbid.startswith(" "): self.fail("DOM %s(%s) has bad MBID" % (mbid, d.name())) n = 0 if str(d).startswith("0"): n += 1 nmid = cfg.getIDbyName(d.name()) if nmid != mbid: self.fail("Bad IDbyName value \"%s\" for \"%s\"" % (nmid, mbid)) newid = cfg.getIDbyStringPos(d.string(), d.pos()) if newid.startswith(" ") or len(newid) != 12: self.fail("Bad IDbyStringPos value \"%s\" for \"%s\" %d" % (newid, mbid, n))
def testListsSim5(self): metaDir = self.initPDAQHome() cfg = DAQConfigParser.load("simpleConfig", metaDir + "/config") expected = [ 'eventBuilder', 'globalTrigger', 'inIceTrigger', 'secondaryBuilders', 'stringHub#1001', 'stringHub#1002', 'stringHub#1003', 'stringHub#1004', 'stringHub#1005' ] comps = cfg.components() self.assertEqual( len(expected), len(comps), "Expected %d components (%s), not %d (%s)" % (len(expected), str(expected), len(comps), str(comps))) for c in comps: try: expected.index(c.fullName()) except: self.fail('Unexpected component "%s"' % c)
def makeRunset(self, runConfigDir, runConfigName, timeout, logger, forceRestart=True, strict=True): "Build a runset from the specified run configuration" logger.info("Loading run configuration \"%s\"" % runConfigName) runConfig = DAQConfigParser.load(runConfigName, runConfigDir, strict) logger.info("Loaded run configuration \"%s\"" % runConfigName) nameList = [] for c in runConfig.components(): nameList.append(c.fullName()) if nameList is None or len(nameList) == 0: raise CnCServerException("No components found in" + " run configuration \"%s\"" % runConfig) compList = [] try: waitList = self.__collectComponents(nameList, compList, logger, timeout) if waitList is not None: raise CnCServerException("Still waiting for " + str(waitList)) except: self.__returnComponents(compList, logger) raise setAdded = False try: try: runSet = self.createRunset(runConfig, compList, logger) except: runSet = None raise self.__addRunset(runSet) setAdded = True finally: if not setAdded: self.__returnComponents(compList, logger) runSet = None if runSet is not None: if self.__defaultDebugBits is not None: runSet.setDebugBits(self.__defaultDebugBits) try: connMap = runSet.buildConnectionMap() runSet.connect(connMap, logger) runSet.setOrder(connMap, logger) runSet.configure() except: if not forceRestart: self.returnRunset(runSet, logger) else: self.restartRunset(runSet, logger) raise setComps = [] for c in runSet.components(): setComps.append(c.fullName()) logger.info("Built runset #%d: %s" % (runSet.id(), setComps)) return runSet
def main(): usage = "usage: %prog [options] <releasefile>" p = optparse.OptionParser(usage=usage) p.add_option("-c", "--config-name", type="string", dest="clusterConfigName", action="store", default=None, help="Cluster configuration name, subset of deployed" + " configuration.") p.add_option("-v", "--verbose", dest="verbose", action="store_true", default=False, help="Be chatty") p.add_option("-f", "--skip-flash", dest="skipFlash", action="store_true", default=False, help="Don't actually write flash on DOMs -" + " just 'practice' all other steps") p.add_option("-s", "--straggler-time", type="int", dest="stragglerTime", action="store", default=240, help="Time (seconds) to wait before reporting details" + " of straggler DOMs (default: 240)") p.add_option("-w", "--watch-period", type="int", dest="watchPeriod", action="store", default=15, help="Interval (seconds) between status reports during" + " upload (default: 15)") opt, args = p.parse_args() if len(args) < 1: p.error("An argument is required!") raise SystemExit releaseFile = args[0] # Make sure file exists if not os.path.exists(releaseFile): print "Release file %s doesn't exist!\n\n" % releaseFile print usage raise SystemExit clusterConfig = \ DAQConfigParser.getClusterConfiguration(opt.clusterConfigName) hublist = clusterConfig.getHubNodes() # Copy phase - copy mainboard release.hex file to remote nodes copySet = ThreadSet(opt.verbose) remoteFile = "/tmp/release%d.hex" % os.getpid() for domhub in hublist: copySet.add("scp -q %s %s:%s" % (releaseFile, domhub, remoteFile)) print "Copying %s to all hubs as %s..." % (releaseFile, remoteFile) copySet.start() try: copySet.wait() except KeyboardInterrupt: print "\nInterrupted." copySet.stop() raise SystemExit # Upload phase - upload release print "Uploading %s on all hubs..." % remoteFile uploader = HubThreadSet(opt.verbose, opt.watchPeriod, opt.stragglerTime) for domhub in hublist: f = opt.skipFlash and "-f" or "" cmd = "ssh %s UploadDOMs.py %s -v %s" % (domhub, remoteFile, f) uploader.add(cmd, domhub) uploader.start() try: uploader.watch() except KeyboardInterrupt: print "Got keyboardInterrupt... stopping threads..." uploader.stop() try: uploader.wait() print "Killing remote upload processes..." killer = ThreadSet(opt.verbose) for domhub in hublist: killer.add("ssh %s killall -9 UploadDOMs.py" % domhub, domhub) killer.start() killer.wait() except KeyboardInterrupt: pass # Cleanup phase - remove remote files from /tmp on hubs cleanUpSet = ThreadSet(opt.verbose) for domhub in hublist: cleanUpSet.add("ssh %s /bin/rm -f %s" % (domhub, remoteFile)) print "Cleaning up %s on all hubs..." % remoteFile cleanUpSet.start() try: cleanUpSet.wait() except KeyboardInterrupt: print "\nInterrupted." cleanUpSet.stop() raise SystemExit print "\n\nDONE." print uploader.summary()
print >>sys.stderr, \ 'To force a restart, rerun with the --force option' raise SystemExit if opt.doList: DAQConfig.showList(None, None) raise SystemExit if not opt.skipKill: doCnC = True caughtException = False try: activeConfig = \ DAQConfigParser.getClusterConfiguration(None, False, True, opt.clusterDesc, configDir=configDir) doKill(doCnC, opt.dryRun, dashDir, opt.verbose, opt.quiet, activeConfig, opt.killWith9) except ClusterConfigException: caughtException = True except DAQConfigException: caughtException = True if caughtException and opt.killOnly: print >>sys.stderr, 'DAQ is not currently active' if opt.force: print >>sys.stderr, "Remember to run SpadeQueue.py to recover" + \ " any orphaned data" if not opt.killOnly:
print >>sys.stderr, "Warning: Running RemoveHubs.py on expcont" print >>sys.stderr, "-"*60 (forceCreate, runCfgName, cluCfgName, hubIdList) = parseArgs() configDir = os.path.join(metaDir, "config") newPath = DAQConfig.createOmitFileName(configDir, runCfgName, hubIdList) if os.path.exists(newPath): if forceCreate: print >>sys.stderr, "WARNING: Overwriting %s" % newPath else: print >>sys.stderr, "WARNING: %s already exists" % newPath print >>sys.stderr, "Specify --force to overwrite this file" raise SystemExit() runCfg = DAQConfigParser.load(runCfgName, configDir) if runCfg is not None: newCfg = runCfg.omit(hubIdList) if newCfg is not None: fd = open(newPath, "w") newCfg.write(fd) fd.close() print "Created %s" % newPath if cluCfgName is not None: cluPath = createClusterConfigName(cluCfgName, hubIdList) if os.path.exists(cluPath): if forceCreate: print >>sys.stderr, "WARNING: Overwriting %s" % cluPath else: print >>sys.stderr, "WARNING: %s already exists" % \
def main(): "Main program" usage = "%prog [options]" p = optparse.OptionParser(usage=usage) p.add_option("-c", "--config-name", type="string", dest="clusterConfigName", action="store", default=None, help="REQUIRED: Configuration name") p.add_option("-n", "--dry-run", dest="dryRun", action="store_true", default=False, help="Don't actually run DOMPrep - just print what" + \ " would be done") p.add_option("-l", "--list-configs", dest="doList", action="store_true", default=False, help="List available configs") opt, args = p.parse_args() config = DAQConfigParser.getClusterConfiguration(opt.clusterConfigName, opt.doList) if opt.doList: raise SystemExit # Get relevant hubs - if it has a stringhub component on it, run DOMPrep.py there. hublist = config.getHubNodes() cmds = ParallelShell(dryRun=opt.dryRun, timeout=45) ids = {} for hub in hublist: cmd = "ssh %s DOMPrep.py" % hub ids[hub] = (cmds.add(cmd)) cmds.start() cmds.wait() numPlugged = 0 numPowered = 0 numCommunicating = 0 numIceboot = 0 for hub in hublist: print "Hub %s: " % hub, result = cmds.getResult(ids[hub]) result = result.rstrip() print result # Parse template: # 2 pairs plugged, 2 powered; 4 DOMs communicating, 4 in iceboot match = re.search( r'(\d+) pairs plugged, (\d+) powered; (\d+) DOMs communicating, (\d+) in iceboot', result) if match: numPlugged += int(match.group(1)) numPowered += int(match.group(2)) numCommunicating += int(match.group(3)) numIceboot += int(match.group(4)) print "TOTAL: %d pairs plugged, %d pairs powered; %d DOMs communicating, %d in iceboot" \ % (numPlugged, numPowered, numCommunicating, numIceboot)
else: compData = ComponentData.createAll(opt.numHubs, not opt.realNames, includeTrackEngine=opt.incTrackEng) for cd in compData: if opt.evtBldr and cd.isComponent("eventBuilder"): cd.useRealComponent() elif opt.glblTrig and cd.isComponent("globalTrigger"): cd.useRealComponent() elif opt.iniceTrig and cd.isComponent("iniceTrigger"): cd.useRealComponent() elif opt.icetopTrig and cd.isComponent("icetopTrigger"): cd.useRealComponent() elif opt.trackEng and cd.isComponent("trackEngine"): cd.useRealComponent() DAQFakeRun.makeMockClusterConfig(opt.runCfgDir, compData, opt.numHubs) try: DAQConfigParser.getClusterConfiguration(None, useActiveConfig=True) except: DAQFakeRun.hackActiveConfig("sim-localhost") from DumpThreads import DumpThreadsOnSignal DumpThreadsOnSignal() # create run object and initial run number # runner = DAQFakeRun() runner.runAll(compData, opt.runNum, opt.numRuns, opt.duration, opt.runCfgDir, opt.forkClients)
print >> fd, "</icecube>" if __name__ == "__main__": clusterName = "sps" cfgList = [] usage = False # configExists has a keyword argument that builds a dir # like this. load( does not and wants a directory # to stay consistent use the same construction # in both places configDir = os.path.join(metaDir, "config") for arg in sys.argv[1:]: if not DAQConfigParser.configExists(arg, configDir=configDir): print >> sys.stderr, "Could not find run config: %s" % arg usage = True else: cfgList.append(arg) if usage: print >> sys.stderr, "Usage: %s runConfig" % sys.argv[0] raise SystemExit ccc = ClusterConfigCreator(clusterName) for cfgName in cfgList: runCfg = DAQConfigParser.load(cfgName, configDir) ccc.write(sys.stdout, runCfg)
def main(): "Main program" ver_info = "%(filename)s %(revision)s %(date)s %(time)s %(author)s " \ "%(release)s %(repo_rev)s" % get_version_info(SVN_ID) usage = "%prog [options]\nversion: " + ver_info p = optparse.OptionParser(usage=usage, version=ver_info) p.add_option("-C", "--cluster-desc", type="string", dest="clusterDesc", action="store", default=None, help="Cluster description name") p.add_option("-c", "--config-name", type="string", dest="configName", action="store", default=None, help="REQUIRED: Configuration name") p.add_option("", "--delete", dest="delete", action="store_true", default=True, help="Run rsync's with --delete") p.add_option("", "--no-delete", dest="delete", action="store_false", default=True, help="Run rsync's without --delete") p.add_option("-l", "--list-configs", dest="doList", action="store_true", default=False, help="List available configs") p.add_option("-n", "--dry-run", dest="dryRun", action="store_true", default=False, help="Don't run rsyncs, just print as they would be run" + " (disables quiet)") p.add_option("", "--deep-dry-run", dest="deepDryRun", action="store_true", default=False, help="Run rsync's with --dry-run (implies verbose and serial)") p.add_option("-p", "--parallel", dest="doParallel", action="store_true", default=True, help="Run rsyncs in parallel (default)") p.add_option("-q", "--quiet", dest="quiet", action="store_true", default=False, help="Run quietly") p.add_option("-s", "--serial", dest="doSerial", action="store_true", default=False, help="Run rsyncs serially (overrides parallel and unsets" + " timeout)") p.add_option("-t", "--timeout", type="int", dest="timeout", action="store", default=300, help="Number of seconds before rsync is terminated") p.add_option("-v", "--verbose", dest="verbose", action="store_true", default=False, help="Be chatty") p.add_option("", "--undeploy", dest="undeploy", action="store_true", default=False, help="Remove entire ~pdaq/.m2 and ~pdaq/pDAQ_current dirs" + " on remote nodes - use with caution!") p.add_option("", "--nice-adj", type="int", dest="niceAdj", action="store", default=NICE_ADJ_DEFAULT, help="Set nice adjustment for remote rsyncs" + " [default=%default]") p.add_option("-E", "--express", dest="express", action="store_true", default=EXPRESS_DEFAULT, help="Express rsyncs, unsets and overrides any/all" + " nice adjustments") opt, args = p.parse_args() ## Work through options implications ## # A deep-dry-run implies verbose and serial if opt.deepDryRun: opt.doSerial = True opt.verbose = True opt.quiet = False # Serial overrides parallel and unsets timout if opt.doSerial: opt.doParallel = False opt.timeout = None # dry-run implies we want to see what is happening if opt.dryRun: opt.quiet = False # Map quiet/verbose to a 3-value tracelevel traceLevel = 0 if opt.quiet: traceLevel = -1 if opt.verbose: traceLevel = 1 if opt.quiet and opt.verbose: traceLevel = 0 # How often to report count of processes waiting to finish monitorIval = None if traceLevel >= 0 and opt.timeout: monitorIval = max(opt.timeout * 0.01, 2) if opt.doList: DAQConfig.showList(None, None) raise SystemExit if not opt.configName: print >>sys.stderr, 'No configuration specified' p.print_help() raise SystemExit try: cdesc = opt.clusterDesc config = \ DAQConfigParser.getClusterConfiguration(opt.configName, False, clusterDesc=cdesc) except XMLFileNotFound: print >>sys.stderr, 'Configuration "%s" not found' % opt.configName p.print_help() raise SystemExit if traceLevel >= 0: if config.descName() is None: print "CLUSTER CONFIG: %s" % config.configName() else: print "CONFIG: %s" % config.configName() print "CLUSTER: %s" % config.descName() nodeList = config.nodes() nodeList.sort() print "NODES:" for node in nodeList: print " %s(%s)" % (node.hostName(), node.locName()), compList = node.components() compList.sort() for comp in compList: print comp.fullName(), if comp.isHub(): print "[%s]" % getHubType(comp.id()), print " ", print if not opt.dryRun: config.writeCacheFile() ver = store_svnversion() if traceLevel >= 0: print "VERSION: %s" % ver parallel = ParallelShell(parallel=opt.doParallel, dryRun=opt.dryRun, verbose=(traceLevel > 0 or opt.dryRun), trace=(traceLevel > 0), timeout=opt.timeout) pdaqDir = replaceHome(os.environ["HOME"], metaDir) deploy(config, parallel, os.environ["HOME"], pdaqDir, SUBDIRS, opt.delete, opt.dryRun, opt.deepDryRun, opt.undeploy, traceLevel, monitorIval, opt.niceAdj, opt.express)
if clusterDesc is not None: raise Exception("Cannot specify multiple cluster descriptions") if len(name) > 2: clusterDesc = name[2:] else: grabDesc = True continue if os.path.basename(name) == 'default-dom-geometry.xml': # ignore continue nameList.append(name) for name in nameList: cfg = DAQConfigParser.load(name, configDir) try: runCluster = RunCluster(cfg, clusterDesc) except NotImplementedError, ue: print >>sys.stderr, 'For %s:' % name traceback.print_exc() continue except KeyboardInterrupt: break except: print >>sys.stderr, 'For %s:' % name traceback.print_exc() continue print 'RunCluster: %s (%s)' % \ (runCluster.configName(), runCluster.descName())