def test_proc_attr_order(self): """get_num_procs() has an order of preferred attributes """ jobad = classad.ClassAd() for attr, val in [(x, condor_meter.PROC_ATTRS.index(x)) for x in reversed(condor_meter.PROC_ATTRS)]: jobad[attr] = val self.assertEquals(condor_meter.get_num_procs(jobad), val)
def test_proc_expr(self): """get_num_procs() should be able to handle attributes set to ClassAd expressions """ for attr in condor_meter.PROC_ATTRS: jobad = classad.ClassAd() jobad[attr] = classad.ExprTree('2 + 2') procs = condor_meter.get_num_procs(jobad) self.assertEquals(procs, 4)
def test_proc_int(self): """The Processors field should always return an integer """ for attr in condor_meter.PROC_ATTRS: jobad = classad.ClassAd() jobad[attr] = 'broken attribute' procs = condor_meter.get_num_procs(jobad) self.assertIsInstance(procs, int)
def print_capabilities(): capabilities = { "PluginType": "FileTransfer", "PluginVersion": "0.1", "MultipleFileSupport": True, "SupportedMethods": "htmap", } sys.stdout.write(classad.ClassAd(capabilities).printOld())
def print_capabilities(): capabilities = { 'MultipleFileSupport': True, 'PluginType': 'FileTransfer', 'SupportedMethods': 'gluster', 'Version': GLUSTER_PLUGIN_VERSION, } sys.stdout.write(classad.ClassAd(capabilities).printOld())
def get_schedd(): """Find and return the HTCondor sched. :returns: htcondor schedd object.""" # Getting remote scheduler schedd_ad = classad.ClassAd() schedd_ad["MyAddress"] = os.environ.get("HTCONDOR_ADDR", None) schedd = htcondor.Schedd(schedd_ad) return schedd
def test_pickle(self): ad = classad.ClassAd({"one": 1}) expr = classad.ExprTree("2+2") pad = pickle.dumps(ad) pexpr = pickle.dumps(expr) ad2 = pickle.loads(pad) expr2 = pickle.loads(pexpr) self.assertEquals(ad2.__repr__(), "[ one = 1 ]") self.assertEquals(expr2.__repr__(), "2 + 2")
def condor_stats(timeout=TIMEOUT): """ Return the Condor Schedd stats as a ClassAd instance. """ ad = classad.ClassAd(MyType='Scheduler', Name='Error communicating with the Condor Schedd.') args = [utils.which('condor_status'), '-long', '-schedd'] res = _run_condor_cmd(args, [ad, ], timeout=timeout) return(res[0])
def test_ad_assignment(self): ad = classad.ClassAd() ad["foo"] = 2.1 self.assertEqual(ad["foo"], 2.1) ad["foo"] = 2 self.assertEqual(ad["foo"], 2) ad["foo"] = "bar" self.assertEqual(ad["foo"], "bar") self.assertRaises(TypeError, ad.__setitem__, {})
def test_temp_scope(self): expr = classad.ExprTree("foo") self.assertEquals("bar", expr.eval({"foo": "bar"})) ad = classad.ClassAd({"foo": "baz", "test": classad.ExprTree("foo")}) expr = ad["test"] self.assertEquals("baz", expr.eval()) self.assertEquals("bar", expr.eval({"foo": "bar"})) self.assertEquals("bar", expr.eval({"foo": "bar"})) self.assertEquals("baz", expr.eval())
def match(target_classad, requirement_str): """ checks if a single target satifies a given requirement. :param classad target_classad: :param str requirement_str: :return bool: """ requirement_classad = classad.ClassAd() requirement_classad['Requirements'] = classad.ExprTree(requirement_str) return target_classad.matches(requirement_classad)
def get_urls(self, host, args): urls = [] # Parse the input arguments for arg in args: attr, value = arg.split("=") if attr == "observatory": observatory = value if attr == "type": type = value if attr == "s": start_frame = value if attr == "e": end_frame = value # If any input arguments are missing, report error and exit try: observatory, type, end_frame, start_frame except NameError: with open(self.outfile, 'w') as outfile: outfile_dict = get_error_dict( "gwdata:// urls must include arguments 'observatory', 'type', 's' (start frame), 'e' (end frame)" ) outfile = open(self.outfile_path, 'w') outfile.write(str(classad.ClassAd(outfile_dict))) sys.exit(-1) # Retrieve the list of URLs try: urls = gwdatafind.find_urls(host=host, site=observatory, frametype=type, gpsstart=int(start_frame), gpsend=int(end_frame)) except Exception as e: outfile_dict = get_error_dict( f"Error retrieving gwdatafind URLs: {sys.exc_info()[0]} ({e})") outfile = open(self.outfile_path, 'w') outfile.write(str(classad.ClassAd(outfile_dict))) sys.exit(-1) return urls
def test_list_conversion(self): ad = dict(classad.ClassAd("[a = {1,2,3}]")) self.assertTrue(isinstance(ad["a"], types.ListType)) self.assertTrue(isinstance(ad["a"][0], types.LongType)) def listAdd(a, b): return a + b classad.register(listAdd) self.assertEqual(classad.ExprTree("listAdd({1,2}, {3,4})")[0], 1)
def getClusterAd(self): """ _getClusterAd_ Return common cluster classad scriptFile & Output/Error/Log filenames shortened to avoid condorg submission errors from >256 chars paths """ ad = classad.ClassAd() # ad['universe'] = "vanilla" ad['ShouldTransferFiles'] = "YES" ad['WhenToTransferOutput'] = "ON_EXIT" ad['UserLogUseXML'] = True ad['JobNotification'] = 0 ad['Cmd'] = self.scriptFile # Investigate whether we should pass the absolute path for Out and Err ads, # just as we did for UserLog. There may be issues, more info on WMCore #7362 ad['Out'] = classad.ExprTree('strcat("condor.", ClusterId, ".", ProcId, ".out")') ad['Err'] = classad.ExprTree('strcat("condor.", ClusterId, ".", ProcId, ".err")') ad['UserLog'] = classad.ExprTree('strcat(Iwd, "/condor.", ClusterId, ".", ProcId, ".log")') ad['WMAgent_AgentName'] = self.agent ad['JobLeaseDuration'] = classad.ExprTree('isUndefined(MachineAttrMaxHibernateTime0) ? 1200 : MachineAttrMaxHibernateTime0') ad['PeriodicRemove'] = classad.ExprTree('( JobStatus =?= 5 ) && ( time() - EnteredCurrentStatus > 10 * 60 )') removeReasonExpr = 'PeriodicRemove ? "Job automatically removed for being in Held status" : ""' ad['PeriodicRemoveReason'] = classad.ExprTree(removeReasonExpr) # Required for global pool accounting ad['AcctGroup'] = self.acctGroup ad['AcctGroupUser'] = self.acctGroupUser ad['AccountingGroup'] = "%s.%s" % (self.acctGroup, self.acctGroupUser) # Customized classAds for this plugin ad['DESIRED_Archs'] = "INTEL,X86_64" ad['Rank'] = 0.0 ad['TransferIn'] = False ad['JobMachineAttrs'] = "GLIDEIN_CMSSite" ad['JobAdInformationAttrs'] = ("JobStatus,QDate,EnteredCurrentStatus,JobStartDate,DESIRED_Sites," "ExtDESIRED_Sites,WMAgent_JobID,MachineAttrGLIDEIN_CMSSite0") # TODO: remove when 8.5.7 is deployed (seems to be still needed as of 8.6.11 ...) paramsToAdd = htcondor.param['SUBMIT_ATTRS'].split() + htcondor.param['SUBMIT_EXPRS'].split() paramsToSkip = ['accounting_group', 'use_x509userproxy', 'PostJobPrio2', 'JobAdInformationAttrs'] for param in paramsToAdd: if (param not in ad) and (param in htcondor.param) and (param not in paramsToSkip): ad[param] = classad.ExprTree(htcondor.param[param]) return ad
def test_abstime(self): expr = classad.ExprTree('absTime("2013-11-12T07:50:23")') dt = expr.eval() self.assertTrue(isinstance(dt, datetime.datetime)) self.assertEquals(dt.year, 2013) self.assertEquals(dt.month, 11) self.assertEquals(dt.day, 12) self.assertEquals(dt.hour, 7) self.assertEquals(dt.minute, 50) self.assertEquals(dt.second, 23) ad = classad.ClassAd({"foo": dt}) dt2 = ad["foo"] self.assertTrue(isinstance(dt2, datetime.datetime)) self.assertEquals(dt, dt2) ad = classad.ClassAd({"foo": datetime.datetime.now()}) td = (datetime.datetime.now() - ad["foo"]) self.assertEquals(td.days, 0) self.assertTrue(td.seconds < 300)
def request_token(pool, resource, scopes=None, local_dir=None): if ":" in pool: alias, port = pool.split(":") else: alias = pool port = DEFAULT_PORT ip, port = socket.getaddrinfo(alias, int(port), socket.AF_INET)[0][4] if not scopes: scopes = DEFAULT_TOKEN_SCOPES coll_ad = classad.ClassAd({ "MyAddress": "<{}:{}?alias={}>".format(ip, port, alias), "MyType": "Collector", }) logger.debug("Constructed collector ad: {}".format(repr(coll_ad))) htcondor.param["SEC_TOKEN_DIRECTORY"] = "/etc/condor/tokens.d" token = request_token_and_wait_for_approval(resource, alias, coll_ad, scopes) if token is None: return False print("Token request approved!") token_dir = htcondor.param["SEC_TOKEN_DIRECTORY"] token_name = "50-{}-{}-registration".format(alias, resource) token_path = os.path.join(token_dir, token_name) # We tell users to run register.py through the container and volume mount # "$PWD/tokens" into /etc/condor/tokens.d so our messages need to reflect # the host dir whenever they specify --local-dir (SOFTWARE-4372) if local_dir: # '/' is an accepted path separator across operating systems msg_path = os.path.join(local_dir, token_name).replace('\\', '/') else: msg_path = token_path logger.debug("Writing token to disk (in {})".format(msg_path)) token.write(token_name) logger.debug("Wrote token to disk (at {})".format(msg_path)) logger.debug("Correcting token file permissions...") shutil.chown(token_path, user=TOKEN_OWNER_USER, group=TOKEN_OWNER_GROUP) logger.debug("Corrected token file permissions...") print("Token was written to {}".format(msg_path)) if not is_admin(): print(NONROOT_TOKEN_MSG.format(path=msg_path, name=token_name)) return True
def ad_to_json(ad): result = classad.ClassAd() for (key, val) in ad.items(): # Evaluate Condor expressions if isinstance(val, classad.ExprTree): val = val.eval() result[key] = val # Unfortunately, classad.Value.Undefined is of type int, # and json.dumps() converts it to "2". # Use HTCondor JSON conversion, then back to a Python object instead. return json.loads(result.printJson())
def print_capabilities(): capabilities = { 'MultipleFileSupport': True, 'PluginType': 'FileTransfer', # SupportedMethods indicates which URL methods/types this plugin supports #CHANGE ME HERE 'SupportedMethods': 'example', #END CHANGE 'Version': PLUGIN_VERSION, } sys.stdout.write(classad.ClassAd(capabilities).printOld())
def test_ad_iterator(self): ad = classad.ClassAd() ad["foo"] = 1 ad["bar"] = 2 self.assertEqual(len(ad), 2) self.assertEqual(len(list(ad)), 2) self.assertEqual(list(ad)[1], "foo") self.assertEqual(list(ad)[0], "bar") self.assertEqual(list(ad.items())[1][1], 1) self.assertEqual(list(ad.items())[0][1], 2) self.assertEqual(list(ad.values())[1], 1) self.assertEqual(list(ad.values())[0], 2)
def who(self) -> classad.ClassAd: """ Return the result of ``condor_who -quick``, as a :class:`classad.ClassAd`. If ``condor_who -quick`` fails, or the output can't be parsed into a sensible who ad, this method returns an empty ad. """ who = self.run_command(["condor_who", "-quick"]) try: parsed = classad.parseOne(who.stdout) # If there's no MASTER key in the parsed ad, it indicates # that we actually got the special post-shutdown message # from condor_who and should act like there's nothing there. if "MASTER" not in parsed: return classad.ClassAd() return parsed except Exception: return classad.ClassAd()
def submitDirect(self, schedd, cmd, arg, info): #pylint: disable=R0201 """ Submit directly to the schedd using the HTCondor module """ dagAd = classad.ClassAd() addCRABInfoToClassAd(dagAd, info) # NOTE: Changes here must be synchronized with the job_submit in DagmanCreator.py in CAFTaskWorker dagAd["CRAB_Attempt"] = 0 dagAd["JobUniverse"] = 12 dagAd["HoldKillSig"] = "SIGUSR1" dagAd["Out"] = os.path.join(info['scratch'], "request.out") dagAd["Err"] = os.path.join(info['scratch'], "request.err") dagAd["Cmd"] = cmd dagAd['Args'] = arg dagAd["TransferInput"] = info['inputFilesString'] dagAd["LeaveJobInQueue"] = classad.ExprTree("(JobStatus == 4) && ((StageOutFinish =?= UNDEFINED) || (StageOutFinish == 0))") dagAd["TransferOutput"] = info['outputFilesString'] dagAd["OnExitRemove"] = classad.ExprTree("( ExitSignal =?= 11 || (ExitCode =!= UNDEFINED && ExitCode >=0 && ExitCode <= 2))") dagAd["OtherJobRemoveRequirements"] = classad.ExprTree("DAGManJobId =?= ClusterId") dagAd["RemoveKillSig"] = "SIGUSR1" dagAd["Environment"] = classad.ExprTree('strcat("PATH=/usr/bin:/bin CONDOR_ID=", ClusterId, ".", ProcId)') dagAd["RemoteCondorSetup"] = info['remote_condor_setup'] dagAd["Requirements"] = classad.ExprTree('true || false') dagAd["TaskType"] = "ROOT" dagAd["X509UserProxy"] = info['userproxy'] r, w = os.pipe() rpipe = os.fdopen(r, 'r') wpipe = os.fdopen(w, 'w') if os.fork() == 0: #pylint: disable=W0212 try: rpipe.close() try: resultAds = [] htcondor.SecMan().invalidateAllSessions() os.environ['X509_USER_PROXY'] = info['userproxy'] schedd.submit(dagAd, 1, True, resultAds) schedd.spool(resultAds) wpipe.write("OK") wpipe.close() os._exit(0) except Exception: #pylint: disable=W0703 wpipe.write(str(traceback.format_exc())) finally: os._exit(1) wpipe.close() results = rpipe.read() if results != "OK": raise Exception("Failure when submitting HTCondor task: %s" % results) schedd.reschedule()
def _submit_task(self, job_id, target): cluster_ad = classad.ClassAd() proc_ad = classad.ClassAd() props = self.base_job_properties.copy() props['Environment'] = environ_to_condor() for k, v in props.items(): cluster_ad[k] = v # classad.ExprTree( reqs = '%s && %s' % (self.base_reqs, 'TARGET.Machine == "%s"' % target) cluster_ad['Requirements'] = classad.ExprTree(reqs) workdir_i = '%s/%i/' % (self.workdir, job_id) proc_ad['UserLog'] = '%s/log' % workdir_i proc_ad['Out'] = '%s/out' % workdir_i proc_ad['Err'] = '%s/err' % workdir_i proc_ad['Arguments'] = str(job_id) cluster_id = self.schedd.submitMany(cluster_ad, [(proc_ad, 1)]) logger.info('Submitted job %i to cluster %i on %s.' % (job_id, cluster_id, target)) self.running_clusters[cluster_id] = (target, job_id) self.available_machines[target] = False
def makeSortAds(): anAd = classad.ClassAd() anAd["GridResource"] = "condor localhost localhost" anAd["TargetUniverse"] = 5 anAd["Name"] = "Sort Ads" anAd["Requirements"] = classad.ExprTree( "(sortStringSet(\"\") isnt error) && (target.HasBeenRouted is false) && (target.HasBeenSorted isnt true)" ) anAd["copy_DESIRED_Sites"] = "Prev_DESIRED_Sites" anAd["eval_set_DESIRED_Sites"] = classad.ExprTree( "debug(sortStringSet(Prev_DESIRED_Sites))") anAd["set_HasBeenSorted"] = True anAd['set_HasBeenRouted'] = False
def testCollectorAdvertise(self): self.launch_daemons(["COLLECTOR"]) coll = htcondor.Collector() now = time.time() ad = classad.ClassAd('[MyType="GenericAd"; Name="Foo"; Foo=1; Bar=%f; Baz="foo"]' % now) coll.advertise([ad]) for i in range(5): ads = coll.query(htcondor.AdTypes.Any, 'Name =?= "Foo"', ["Bar"]) if ads: break time.sleep(1) self.assertEquals(len(ads), 1) self.assertEquals(ads[0]["Bar"], now) self.assertTrue("Foo" not in ads[0])
def collect_cache_stats(url, rootdir, cache_max_fs_fraction=1.0): """ Collect stats on the cache server """ start_time = time.time() parsed_url = urllib.parse.urlparse(url) # Python 2.6's urlparse returns a ParseResult object whereas # Python 2.4's urlparse returns a tuple that doesn't handle # root:// properly try: if parsed_url.scheme not in ('root', 'xroot'): raise Exception("URL '%s' is not an xrootd url" % url) hostname = parsed_url.netloc except AttributeError: if parsed_url[0] not in ('root', 'xroot'): raise Exception("URL '%s' is not an xrootd url" % url) hostname = parsed_url[2][2:] # Avoid the '//' prefix result = { 'MyType': 'Machine', 'Name': 'xrootd@%s' % hostname, 'stats_time': int(start_time) } result.update(test_xrootd_server(url)) result.update(get_cache_info(rootdir, cache_max_fs_fraction)) stats_per_vo = scan_cache_dirs(rootdir) # add up the sizes totals = dict() most_recent_access = 0 result['VO'] = {} for vo, vostats in stats_per_vo.items(): for k, v in vostats.items(): if k == "most_recent_access_time": most_recent_access = max(most_recent_access, v) else: try: totals[k] += v except KeyError: totals[k] = v result['VO'][vo] = vostats result['used_cache_bytes'] = totals.pop("used_bytes", 0) for k, v in totals.items(): result["total_" + k] = v if most_recent_access > 0: result["most_recent_access_time"] = most_recent_access result['time_to_collect_stats'] = time.time() - start_time return classad.ClassAd(result)
def makeDrainAds(config=None): anAd = classad.ClassAd() anAd["GridResource"] = "condor localhost localhost" anAd["TargetUniverse"] = 5 set_To = 200000 draining_agents = config.get('speed_drain', []) for agent in draining_agents: anAd["Name"] = str("Drain agent %s" % agent) exp = 'regexp("%s",GlobalJobId) && JobStatus == 1 && JobPrio<%d' % ( str(agent), set_To) anAd["Requirements"] = classad.ExprTree(str(exp)) anAd["set_JobPrio"] = set_To anAd["set_HasBeenRouted"] = False print anAd
def killJobs(self, ids): ad = classad.ClassAd() ad['foo'] = ids const = "CRAB_ReqName =?= %s && member(CRAB_Id, %s)" % ( HTCondorUtils.quote(self.workflow), ad.lookup("foo").__repr__()) with HTCondorUtils.AuthenticatedSubprocess(self.proxy) as (parent, rpipe): if not parent: self.schedd.act(htcondor.JobAction.Remove, const) results = rpipe.read() if results != "OK": raise TaskWorkerException("The CRAB3 server backend could not kill jobs [%s]. because the Grid scheduler answered with an error\n" % ", ".join(ids)+\ "This is probably a temporary glitch, please try it again and contact an expert if the error persist\n"+\ "Error reason %s" % results)
def perform_submit(work_no): this_path = os.path.dirname(os.path.realpath(__file__)) initial_dir = os.path.join(this_path, "sim_{}".format(work_no)) work = dict(Cmd=os.path.join(this_path, "batch.py"), Iwd=initial_dir, UserLog="sim.log", UserOutput="sim.out", TransferInput="simdata", Err="sim.err", Arguments="simdata", ShouldTransferFiles="YES") work_descriptor = classad.ClassAd(work) cluster_id = htcondor.Schedd().submit(work_descriptor) return cluster_id
def submit_with_htcondor(self, job): ''' === In development === Submits all registered jobs to the local HTCondor scheduler using the HTCondor python bindings ''' import htcondor import classad # following # http://osgtech.blogspot.co.uk/2014/03/submitting-jobs-to-htcondor-using-python.html schedd = htcondor.Schedd() job_ad = classad.ClassAd() job_ad['executable'] = 'condor/run_job' # TODO schedd.submit(job_ad)
def makeReleaseAds(config): """ Create a set of rules to release a task to match """ for task, where in config.get('release', {}).items(): anAd = classad.ClassAd() anAd["Name"] = str("Releasing task %s" % (task)) anAd["GridResource"] = "condor localhost localhost" exp = '(HasBeenSetHeld is true) && (target.WMAgent_SubTaskName =?= %s)' % classad.quote( str(task)) anAd["Requirements"] = classad.ExprTree(str(exp)) anAd["copy_Held_DESIRED_Sites"] = "DESIRED_Sites" anAd["set_HasBeenRouted"] = False anAd["set_HasBeenSetHeld"] = False print anAd