Esempio n. 1
0
def getCollectors(pool, pool1, main=False):
    """Get both collectors"""
    if main:
        coll = htcondor.Collector(pool.split(":")[0])
        coll1 = htcondor.Collector(pool1.split(":")[0])
        return coll, coll1
    else:
        coll =  htcondor.Collector(pool)
        coll1 = htcondor.Collector(pool1)
        return coll, coll1
Esempio n. 2
0
 def __getcollector(self):
     self.log.debug('starting')
     if self.hostname:
         address = _address(self.hostname, self.port)
         collector = htcondor.Collector(address)
         self.log.debug('got remote collector')
     else:
         collector = htcondor.Collector()
         self.log.debug('got local collector')
     self.__validate_collector(collector)
     return collector
Esempio n. 3
0
    def testNegotiate(self):
        #htcondor.param['TOOL_DEBUG'] = 'D_FULLDEBUG'
        #os.environ['_condor_SCHEDD_DEBUG'] = 'D_FULLDEBUG, D_NETWORK'
        #htcondor.enable_debug()

        self.launch_daemons(["SCHEDD", "COLLECTOR", "STARTD"])
        output_file = os.path.join(testdir, "test.out")
        if os.path.exists(output_file):
            os.unlink(output_file)
        schedd = htcondor.Schedd()

        schedd.act(htcondor.JobAction.Remove, 'true')
        ad = classad.parseOne(open("tests/submit.ad"))
        ads = []
        cluster = schedd.submit(ad, 1, False, ads)

        # Get claim for startd
        claim_ads = []
        for i in range(10):
            startd_ads = htcondor.Collector().locateAll(
                htcondor.DaemonTypes.Startd)
            private_ads = htcondor.Collector().query(
                htcondor.AdTypes.StartdPrivate)
            if (len(startd_ads) != htcondor.param['NUM_CPUS']) or (
                    len(private_ads) != htcondor.param['NUM_CPUS']):
                time.sleep(1)
                continue
            break
        self.assertEqual(len(startd_ads), len(private_ads))
        self.assertEqual(len(startd_ads), htcondor.param['NUM_CPUS'])
        for ad in htcondor.Collector().locateAll(htcondor.DaemonTypes.Startd):
            for pvt_ad in private_ads:
                if pvt_ad.get('Name') == ad['Name']:
                    ad['ClaimId'] = pvt_ad['Capability']
                    claim_ads.append(ad)
        self.assertEqual(len(claim_ads), len(startd_ads))
        claim = claim_ads[0]

        me = "%s@%s" % (pwd.getpwuid(
            os.geteuid()).pw_name, htcondor.param['UID_DOMAIN'])
        with schedd.negotiate(me) as session:
            requests = list(session)
            self.assertEqual(len(requests), 1)
            request = requests[0]
            self.assertTrue(request.symmetricMatch(claim))
            session.sendClaim(claim['ClaimId'], claim, request)

        for i in range(60):
            ads = schedd.xquery("ClusterId == %d" % cluster, ["JobStatus"])
            ads = list(ads)
            if len(ads) == 0:
                break
            time.sleep(1)
        self.assertEqual(open(output_file).read(), "hello world\n")
Esempio n. 4
0
 def collector(self):
     """
     The :class:`htcondor.Collector` for the personal pool's collector.
     """
     with self.use_config():
         # This odd construction ensure that the Collector we return
         # doesn't just point to "the local collector" - that could be
         # overridden by changing CONDOR_CONFIG after the Collector
         # was initialized. Locating first keeps it stable.
         return htcondor.Collector(htcondor.Collector().locate(
             htcondor.DaemonTypes.Collector))
Esempio n. 5
0
 def __init__(self, collector=None):
     """Helper class to query HTCondor via python bindings."""
     if collector is None:
         self.collector = htcondor.Collector()
     else:
         self.collector = htcondor.Collector(collector)
     """Central collector."""
     self.schedds = [
         htcondor.Schedd(classAd)
         for classAd in self.collector.query(htcondor.AdTypes.Schedd)
     ]
     """List of schedd objects, retrieved from collector."""
 def waitRemoteDaemon(self, dtype, dname, pool=None, timeout=5):
     if pool:
         coll = htcondor.Collector(pool)
     else:
         coll = htcondor.Collector()
     for i in range(timeout):
         try:
             return coll.locate(dtype, dname)
         except Exception:
             pass
         time.sleep(1)
     return coll.locate(dtype, dname)
Esempio n. 7
0
def getSchedd(scheddurl,coll=""):
    if len(scheddurl)>0:
        try:
            if len(coll)>0: coll = htcondor.Collector(coll)
            else: coll = htcondor.Collector() # defaults to local
            scheddAd = coll.locate(htcondor.DaemonTypes.Schedd, scheddurl)
            schedd = htcondor.Schedd(scheddAd)
        except:
            print "Warning: could not locate schedd "+scheddurl
            return None
    else:
        schedd = htcondor.Schedd() # defaults to local
    return schedd
Esempio n. 8
0
def condor_submit_process(mp_queue, host, jdl_map_list):
    """
    Function for new process to submit condor
    """
    # initialization
    errStr = ''
    batchIDs_list = []
    # parse schedd and pool name
    condor_schedd, condor_pool = None, None
    if host in ('LOCAL', 'None'):
        tmpLog.debug(
            'submissionHost is {0}, treated as local schedd. Skipped'.format(
                host))
    else:
        try:
            condor_schedd, condor_pool = host.split(',')[0:2]
        except ValueError:
            tmpLog.error('Invalid submissionHost: {0} . Skipped'.format(host))
    # get schedd
    try:
        if condor_pool:
            collector = htcondor.Collector(condor_pool)
        else:
            collector = htcondor.Collector()
        if condor_schedd:
            scheddAd = collector.locate(htcondor.DaemonTypes.Schedd,
                                        condor_schedd)
        else:
            scheddAd = collector.locate(htcondor.DaemonTypes.Schedd)
        schedd = htcondor.Schedd(scheddAd)
    except Exception as e:
        errStr = 'create condor collector and schedd failed; {0}: {1}'.format(
            e.__class__.__name__, e)
    else:
        submit_obj = htcondor.Submit()
        try:
            with schedd.transaction() as txn:
                # TODO: Currently spool is not supported in htcondor.Submit ...
                submit_result = submit_obj.queue_with_itemdata(
                    txn, 1, iter(jdl_map_list))
                clusterid = submit_result.cluster()
                first_proc = submit_result.first_proc()
                num_proc = submit_result.num_procs()
                batchIDs_list.extend([
                    '{0}.{1}'.format(clusterid, procid)
                    for procid in range(first_proc, first_proc + num_proc)
                ])
        except RuntimeError as e:
            errStr = 'submission failed; {0}: {1}'.format(
                e.__class__.__name__, e)
    mp_queue.put((batchIDs_list, errStr))
Esempio n. 9
0
    def fetch_using_bindings(self, constraint=None, format_list=None):
        """Fetch the condor_q results using htcondor-python bindings

        Args:
            constraint (str): Constraints to be applied to the query
            format_list (list): Classad attr & type. [(attr1, 'i'), ('attr2', 's')]

        Returns (dict): Dict containing the results

        """
        global disk_cache
        results_dict = {}  # defined here in case of exception
        constraint = bindings_friendly_constraint(constraint)
        attrs = bindings_friendly_attrs(format_list)

        self.security_obj.save_state()
        try:
            self.security_obj.enforce_requests()
            htcondor_full_reload()
            if self.pool_name:
                collector = htcondor.Collector(str(self.pool_name))
            else:
                collector = htcondor.Collector()

            if self.schedd_name is None:
                schedd = htcondor.Schedd()
            else:
                schedd_ad = disk_cache.get(self.schedd_name + '.locate')
                if schedd_ad is None:
                    schedd_ad = collector.locate(htcondor.DaemonTypes.Schedd,
                                                 self.schedd_name)
                    disk_cache.save(self.schedd_name + '.locate', schedd_ad)
                schedd = htcondor.Schedd(schedd_ad)
            results = schedd.query(constraint, attrs)
            results_dict = list2dict(results, self.group_attribute)
        except Exception as ex:
            s = 'default'
            if self.schedd_name is not None:
                s = self.schedd_name
            p = 'default'
            if self.pool_name is not None:
                p = self.pool_name
            err_str = 'Error querying schedd %s in pool %s using python bindings: %s' % (
                s, p, ex)
            raise PBError(err_str), None, sys.exc_info()[2]
        finally:
            self.security_obj.restore_state()

        return results_dict
Esempio n. 10
0
 def collector(self):
     """
     Returns a context manager that provides the
     :class:`htcondor.Collector` for the personal pool's collector.
     """
     with self.use_config():
         yield htcondor.Collector()
Esempio n. 11
0
 def getScheddObj(self, name):
     """
     Return a tuple (schedd, address) containing an object representing the
     remote schedd and its corresponding address.
     Still required for OLD tasks. Remove it later TODO
     """
     info = name.split("_")
     if len(info) > 3:
         name = info[2]
     else:
         name = self.getSchedd()
     if name == "localhost":
         schedd = htcondor.Schedd()
         with open(htcondor.param['SCHEDD_ADDRESS_FILE']) as fd:
             address = fd.read().split("\n")[0]
     else:
         info = name.split(":")
         pool = "localhost"
         if len(info) == 3:
             pool = info[1]
         htcondor.param['COLLECTOR_HOST'] = self.getCollector(pool)
         coll = htcondor.Collector()
         schedds = coll.query(htcondor.AdTypes.Schedd, 'regexp(%s, Name)' % HTCondorUtils.quote(info[0]))
         self.scheddAd = ""
         if not schedds:
             self.scheddAd = self.getCachedCollectorOutput(info[0])
         else:
             self.cacheCollectorOutput(info[0], schedds[0])
             self.scheddAd = self.getCachedCollectorOutput(info[0])
         address = self.scheddAd['MyAddress']
         schedd = htcondor.Schedd(self.scheddAd)
     return schedd, address
Esempio n. 12
0
def get_pool_status(pool,
                    retry_delay=30,
                    max_retries=4,
                    schedd_constraint=True,
                    negotiator_constraint=True):
    coll = htcondor.Collector(pool)
    if callable(schedd_constraint):
        schedd_constraint = schedd_constraint(coll)
    if callable(negotiator_constraint):
        negotiator_constraint = negotiator_constraint(coll)

    daemons = {
        "schedds": htcondor.DaemonTypes.Schedd,
        "collectors": htcondor.DaemonTypes.Collector,
        "negotiators": htcondor.DaemonTypes.Negotiator
    }

    data = {
        "schema": "daemon.name.measurement",
        "metrics": {},
    }
    for daemon_type, daemon in daemons.iteritems():
        retries = 0
        while retries < max_retries:
            try:
                if daemon_type == "schedds":
                    ads = coll.query(htcondor.AdTypes.Schedd,
                                     schedd_constraint)
                elif daemon_type == 'negotiators':
                    ads = coll.query(htcondor.AdTypes.Negotiator,
                                     negotiator_constraint)
                else:
                    ads = coll.locateAll(daemon)
            except Exception as e:
                logger.warning(
                    "trouble getting pool {0} {1} status, retrying in {2}s: {3}"
                    .format(pool, daemon_type, retry_delay, e))
                ads = None
                retries += 1
                time.sleep(retry_delay)
            else:
                break
        if ads is None:
            logger.error(
                "trouble getting pool {0} {1} status, giving up.".format(
                    pool, daemon_type))
        else:
            for ad in ads:
                # quick hack to skip schedds starting up on worker nodes
                if ad['Name'].startswith('fnpc'):
                    logger.info('skipping worker node {}'.format(ad['Name']))
                    continue
                for k in ad:
                    if type(ad[k]) in [int, long, float]:
                        metric = ".".join([
                            daemon_type, ad["Name"].replace(".", "_").replace(
                                "@", "-").replace(" ", "_"), k
                        ])
                        data["metrics"][metric] = ad[k]
    return [data]
Esempio n. 13
0
 def testLocate(self):
     self.launch_daemons(["COLLECTOR"])
     coll = htcondor.Collector()
     coll_ad = coll.locate(htcondor.DaemonTypes.Collector)
     self.assertTrue("MyAddress" in coll_ad)
     self.assertEquals(coll_ad["Name"].split(":")[-1],
                       os.environ["_condor_PORT"])
Esempio n. 14
0
    def __init__(self, resthost, jsonDoc, logger=None):
        if not logger:
            self.logger = logging.getLogger(__name__)
            handler = logging.StreamHandler(sys.stdout)
            formatter = logging.Formatter(
                "%(asctime)s:%(levelname)s:%(module)s %(message)s")
            handler.setFormatter(formatter)
            self.logger.addHandler(handler)
            self.logger.setLevel(logging.DEBUG)
        else:
            self.logger = logger

        self.jsonDoc = jsonDoc
        self.resthost = resthost
        self.pool = ''
        self.schedds = []
        self.resthost = "cmsweb.cern.ch:8443"
        self.crabserver = CRABRest(hostname=resthost,
                                   localcert='/data/certs/servicecert.pem',
                                   localkey='/data/certs/servicekey.pem',
                                   retry=10,
                                   userAgent='CRABTaskWorker')
        self.crabserver.setDbInstance(dbInstance='prod')
        # use child collector on port 9620 to get schedd attributes
        collName = "cmsgwms-collector-global.cern.ch:9620,cmsgwms-collector-global.fnal.gov:9620"
        self.coll = htcondor.Collector(collName)
Esempio n. 15
0
def get_schedds(args):
    """
    Return a list of schedd ads representing all the schedds in the pool.
    """
    collectors = args.collectors
    if collectors:
        collectors = collectors.split(",")
    else:
        collectors = []
        logging.warning("The list of Collectors to query is empty")

    schedd_ads = {}
    for host in collectors:
        coll = htcondor.Collector(host)
        try:
            schedds = coll.locateAll(htcondor.DaemonTypes.Schedd)
        except IOError:
            logging.exception(
                f"Error while getting Schedds from Collector {host}")
            continue

        for schedd in schedds:
            if args.schedds and not (schedd["Name"]
                                     in args.schedds.split(",")):
                continue
            schedd["MyPool"] = host
            try:
                schedd_ads[schedd["Name"]] = schedd
            except KeyError:
                pass

    schedd_ads = list(schedd_ads.values())
    random.shuffle(schedd_ads)

    return schedd_ads
Esempio n. 16
0
def get_factory_version(node_name):
    htcondor.reload_config()
    collector = htcondor.Collector(node_name)
    adtype = htcondor.AdTypes.Any
    constraint = 'MyType == "glidefactoryglobal"'
    results = collector.query(adtype, constraint, ['GlideinWMSVersion'])
    return results[0]['GlideinWMSVersion']
Esempio n. 17
0
    def test_logging(self):

        # Submitting some sleep jobs
        job = {"executable": "/bin/sleep",
               "arguments": "5m",
               "request_memory": "1024"}

        sub = htcondor.Submit(job)
        schedd = htcondor.Schedd()
        with schedd.transaction() as txn:
            sub.queue(txn, 1)

        # Waiting for the glideins to start
        time.sleep(60)

        coll = htcondor.Collector()
        startd = coll.locateAll(htcondor.DaemonTypes.Startd)[0]

        url = startd['PRESIGNED_GET_URL']
        log_filename = 'logfile.tar.gz'
        logfile_opener = urllib.URLopener()
        logfile_opener.retrieve(url, log_filename)
        with tarfile.open(log_filename, 'r:gz') as tar:
            tar.extractall()
        logdir = glob.glob('log.*')[0]
        self.assertTrue(os.path.exists(os.path.join(logdir, 'MasterLog')),
                        msg='Failed to download logfile: {}'.format(url))
Esempio n. 18
0
    def getScheddObj(self, name):
        """
        Return a tuple (schedd, address) containing an object representing the
        remote schedd and its corresponding address.

        If address is None, then we are using the BossAir plugin.  Otherwise,
        the schedd object is of type htcondor.Schedd.
        """
        if not self.getRemoteCondor():
            if name == "localhost":
                schedd = htcondor.Schedd()
                with open(htcondor.param['SCHEDD_ADDRESS_FILE']) as fd:
                    address = fd.read().split("\n")[0]
            else:
                info = name.split(":")
                pool = "localhost"
                if len(info) == 2:
                    pool = info[1]
                coll = htcondor.Collector(self.getCollector(pool))
                scheddAd = coll.locate(htcondor.DaemonTypes.Schedd, info[0])
                address = scheddAd['MyAddress']
                schedd = htcondor.Schedd(scheddAd)
            return schedd, address
        else:
            return RemoteCondorPlugin.RemoteCondorPlugin(self.config, logger=self.logger), None
Esempio n. 19
0
 def get_schedd_ads(self):
     import htcondor
     self.get_collector_node_addresses()
     for node in self.collector_node_addresses:
         collector = htcondor.Collector(node)
         try:
             self.schedd_ads = collector.query(
                 htcondor.AdTypes.Schedd,
                 projection = [
                     'Name', 'MyAddress', 'MaxJobsRunning', 'ShadowsRunning',
                     'RecentDaemonCoreDutyCycle', 'TotalIdleJobs'
                     ],
                 constraint = self.schedd_constraints
                 )
             if self.schedd_ads:
                 # As soon as schedd_ads are found in one collector node, use those
                 # This may not be the correct choice for some batch systems
                 break
         except Exception as e:
             logger.debug('Failed querying %s: %s', node, e)
             continue
     else:
         logger.error('Failed to collect any schedds from %s', self.collector_node_addresses)
         raise RuntimeError
     logger.debug('Found schedd ads %s', self.schedd_ads)
     return self.schedd_ads
Esempio n. 20
0
def main():
    overview_running = {}
    overview_pending = {}
    overview_other = {}
    overview_running48 = {}
    overview_numjobstart = {}
    overview_removereason = {}
    jobs_48 = {}
    jobs_maxwall = {}
    jobs_numjobstart = {}
    jobs_removereason = {}

    # global pool collector
    coll = htcondor.Collector(global_pool)
    schedd_ads = coll.query(htcondor.AdTypes.Schedd,
                            'CMSGWMS_Type=?="prodschedd"',
                            ['Name', 'MyAddress', 'ScheddIpAddr'])

    # all schedds
    for ad in schedd_ads:
        if ad["Name"] not in schedds:
            continue
        print "getting jobs from %s" % ad["Name"]
        #fill the overview
        get_overview(overview_running, overview_pending, overview_other,
                     overview_running48, overview_numjobstart,
                     overview_removereason, jobs_48, jobs_maxwall,
                     jobs_numjobstart, jobs_removereason, ad)

    print_results(overview_running, overview_pending, overview_running48,
                  overview_numjobstart, overview_removereason, jobs_48,
                  jobs_maxwall, jobs_numjobstart, jobs_removereason)
Esempio n. 21
0
def get_pool_resource_utilization(pool, retry_delay=30, max_retries=4, schedd_constraint=True):
    coll =  htcondor.Collector(pool)
    retries = 0
    while retries < max_retries:
        try:
            schedd_ads = coll.query(htcondor.AdTypes.Schedd,schedd_constraint)
        except:
            logger.warning("trouble getting pool {0} schedds, retrying in {1}s.".format(pool,retry_delay))
            retries += 1
            schedd_ads = None
            time.sleep(retry_delay)
        else:
            break

    if schedd_ads is None:
        logger.error("trouble getting pool {0} schedds, giving up.".format(pool))
        return {}

    memory_usage = 0
    disk_usage = 0
    for ad in schedd_ads:
        try:
            schedd = htcondor.Schedd(ad)
            results = schedd.query('jobstatus==2',['ResidentSetSize_RAW','DiskUsage_RAW'])
        except Exception as e:
            logger.error(e)
        else:
            for r in results:
                memory_usage += r.get('ResidentSetSize_RAW',0)
                disk_usage += r.get('DiskUsage_RAW',0)
    return {
        "MemoryUsage":memory_usage/1024,
        "DiskUsage":disk_usage,
    }
Esempio n. 22
0
def get_schedd(pool=None, schedd_name=None):
    if schedd_name:
        collector = htcondor.Collector(pool)
        return htcondor.Schedd(
            collector.locate(htcondor.DaemonTypes.Schedd, schedd_name))
    else:
        return htcondor.Schedd()
Esempio n. 23
0
def main():
    coll = htcondor.Collector()
    slots = coll.query(htcondor.AdTypes.Startd,
                       projection=["Name", "Activity", "State"])

    expected_machines = {
        "syr-compute-c0", "syr-compute-c1", "uc-compute-c0", "uc-compute-c1",
        "ucsd-compute-c0", "ucsd-compute-c1", "unl-compute-c0",
        "unl-compute-c1"
    }

    current_machines = {
        s["Name"]
        for s in slots if s["State"] == "Unclaimed" and s["Activity"] == "Idle"
    }
    if current_machines != expected_machines:
        print("Expected machines not found")
        return 1

    print("Expected machines found")
    slots.sort(key=lambda s: s["Name"])
    for s in slots:
        print(repr(s))

    return 0
Esempio n. 24
0
    def test_startd_checks(self):

        startd_resources = ['PYGLIDEIN_RESOURCE_GPU',
                            'PYGLIDEIN_RESOURCE_CVMFS',
                            'PYGLIDEIN_RESOURCE_GRIDFTP']
        startd_metrics = ['PYGLIDEIN_METRIC_TIME_PER_PHOTON']

        coll = htcondor.Collector()
        startd = coll.locateAll(htcondor.DaemonTypes.Startd)
        if len(startd) == 0:
            # Submitting some sleep jobs
            job = {"executable": "/bin/sleep",
                   "arguments": "5m",
                   "request_memory": "1024"}

            sub = htcondor.Submit(job)
            schedd = htcondor.Schedd()
            with schedd.transaction() as txn:
                sub.queue(txn, 1)

            # Waiting for the glideins to start
            time.sleep(60)

        startd = coll.locateAll(htcondor.DaemonTypes.Startd)[0]

        for resource in startd_resources:
            self.assertTrue(startd.get(resource, False),
                            msg='{} does not exist or equals False'.format(resource))

        for metric in startd_metrics:
            self.assertTrue(startd.get(metric, 0) > 0,
                            msg='{} does not exist or equals 0'.format(metric))
Esempio n. 25
0
 def __init__(self, pool="localhost"):
     self.pool = pool
     self.collector = htcondor.Collector(pool)
     self.bins = [(300, 'recent'), (3600, 'one_hour'),
                  (3600 * 4, 'four_hours'), (3600 * 8, 'eight_hours'),
                  (3600 * 24, 'one_day'), (3600 * 24 * 2, 'two_days'),
                  (3600 * 24 * 7, 'one_week')]
Esempio n. 26
0
    def isScheddOverloaded(self):
        """
        check whether job limit is reached in local schedd.
        Condition is check by following logic.
        ( ShadowsRunning > 9.700000000000000E-01 * MAX_RUNNING_JOBS) )
        || ( RecentDaemonCoreDutyCycle > 9.800000000000000E-01 )
        """
        try:
            scheddAd = self.coll.locate(htcondor.DaemonTypes.Schedd)
            q = self.coll.query(htcondor.AdTypes.Schedd,
                                'Name == "%s"' % scheddAd['Name'],
                                projection=['CurbMatchmaking'])[0]
            isOverloaded = q['CurbMatchmaking'].eval()
            return isOverloaded
        except Exception:
            # if there is an error, try to recreate the collector instance
            logging.info("Recreating Collector instance due to query error...")
            self.coll = htcondor.Collector()
        try:
            scheddAd = self.coll.locate(htcondor.DaemonTypes.Schedd)
            q = self.coll.query(htcondor.AdTypes.Schedd,
                                'Name == "%s"' % scheddAd['Name'],
                                projection=['CurbMatchmaking'])[0]
            isOverloaded = q['CurbMatchmaking'].eval()
        except Exception as ex:
            msg = "Condor failed to fetch schedd attributes."
            msg += "Error message: %s" % str(ex)
            logging.exception(msg)
            # since it failed, assume it's overloaded
            isOverloaded = True

        return isOverloaded
Esempio n. 27
0
def read_from_collector(address,
                        history=False,
                        constraint='true',
                        projection=[]):
    """Connect to condor collectors and schedds to pull job ads directly.

    A generator that yields condor job dicts.

    Args:
        address (str): address of collector
        history (bool): read history (True) or active queue (default: False)
    """
    import htcondor
    coll = htcondor.Collector(address)
    schedd_ads = coll.locateAll(htcondor.DaemonTypes.Schedd)
    for schedd_ad in schedd_ads:
        logging.info('getting job ads from %s', schedd_ad['Name'])
        schedd = htcondor.Schedd(schedd_ad)
        try:
            i = 0
            if history:
                start_dt = datetime.now() - timedelta(minutes=10)
                start_stamp = time.mktime(start_dt.timetuple())
                gen = schedd.history(
                    '(EnteredCurrentStatus >= {0}) && ({1})'.format(
                        start_stamp, constraint), projection, 10000)
            else:
                gen = schedd.query(constraint, projection)
            for i, entry in enumerate(gen):
                yield classad_to_dict(entry)
            logging.info('got %d entries', i)
        except Exception:
            logging.info('%s failed', schedd_ad['Name'], exc_info=True)
Esempio n. 28
0
 def getScheddObj(self, name):
     """
     Return a tuple (schedd, address) containing an object representing the
     remote schedd and its corresponding address.
     """
     info = name.split("_")
     if len(info) > 3:
         name = info[2]
     else:
         name = self.getSchedd()
     if name == "localhost":
         schedd = htcondor.Schedd()
         with open(htcondor.param['SCHEDD_ADDRESS_FILE']) as fd:
             address = fd.read().split("\n")[0]
     else:
         info = name.split(":")
         pool = "localhost"
         if len(info) == 3:
             pool = info[1]
         htcondor.param['COLLECTOR_HOST'] = self.getCollector(pool)
         coll = htcondor.Collector()
         schedds = coll.query(htcondor.AdTypes.Schedd, 'regexp(%s, Name)' % HTCondorUtils.quote(info[0]))
         if not schedds:
             raise Exception("Unable to locate schedd %s" % info[0])
         self.scheddAd = schedds[0]
         address = self.scheddAd['MyAddress']
         schedd = htcondor.Schedd(self.scheddAd)
     return schedd, address
Esempio n. 29
0
def get_condor_daemons(daemon_type):
    collector = htcondor.Collector()
    try:
        daemons = collector.locateAll(daemon_type)
    except:
        daemons = []
    return daemons
Esempio n. 30
0
def peek(params, parsers):
    """Peek into the crystal ball to see the future."""
    coll = htcondor.Collector()

    # Ignore dynamic slots, which are the ephemeral children of partitionable slots, and thus noise.
    # Partitionable slot definitions remain unaltered by the process of dynamic slot creation.
    try:
        content = coll.query(htcondor.AdTypes.Startd,
                             constraint='SlotType != "Dynamic"',
                             projection=QUERY_DATA)
    except htcondor.HTCondorLocateError as e:
        LOGGER.error(
            str(e) +
            "\n You seem to run HTCrystalBall on a system that has no htcondor pool.\n"
            "For information about htcondor pools, you can go to\n"
            "https://htcondor.readthedocs.io/en/latest/admin-manual/introduction-admin-manual.html"
        )
        sys.exit(0)

    examine.prepare(cpu=params.cpu,
                    gpu=params.gpu,
                    ram=params.ram,
                    disk=params.disk,
                    jobs=params.jobs,
                    job_duration=params.time,
                    maxnodes=params.maxnodes,
                    verbose=params.verbose,
                    content=content)
    sys.exit(0)