Ejemplos de info en Python

Lenguaje de programación: Python

Namespace/Package Name: pbscc

Método / Función: info

Ejemplos en hotexamples.com: 2

Python info - 2 ejemplos encontrados. Estos son los ejemplos en Python del mundo real mejor valorados de pbscc.info extraídos de proyectos de código abierto. Puedes valorar ejemplos para ayudarnos a mejorar la calidad de los ejemplos.

Ejemplo n.º 1

Mostrar archivo

Archivo: autostart.py Proyecto: geekpiehpc/cyclecloud-STAR-CCMplus

    def query_jobs(self):
        '''
            Converts PBS jobs into cyclecloud.job.Job instances. It will also compress jobs that have the exact same requirements.
        '''
        scheduler_config = self.driver.scheduler_config()
        scheduler_resources = [] + scheduler_config["resources"]
        # special case for hostname so we can automatically place jobs onto the appropriate host
        scheduler_resources.append("hostname")
        scheduler_resources.append("instance_id")
        
        group_jobs = not self.disable_grouping
        running_autoscale_jobs = []
        idle_autoscale_jobs = []
    
        # get the raw string outputs first, and convert it second. This somewhat limits the
        # race condition of asking the status of the queue twice.
        running_raw_jobs_str, running_converter = self.driver.running_jobs()
        queued_raw_jobs_str, queued_converter = self.driver.queued_jobs()
        
        running_raw_jobs = running_converter(running_raw_jobs_str)
        queued_raw_jobs = queued_converter(queued_raw_jobs_str)
        
        raw_jobs = []
        
        for raw_job in running_raw_jobs:
            # it is only running on a single node
            if '+' not in raw_job["exec_vnode"]:
                raw_jobs.append(raw_job)
                continue
            
            for vnode in raw_job["exec_vnode"].split("+"):
                sub_raw_job = deepcopy(raw_job)
                sub_raw_job["exec_vnode"] = vnode
                raw_jobs.append(sub_raw_job)
        
        for raw_job in queued_raw_jobs:
            if not raw_job["resource_list"].get("select"):
                raw_jobs.append(raw_job)
            else:
                # pbspro, like many schedulers, allows a varying set requirements for nodes in a single submission.
                # we will break it apart here as if they had split them individually.
                
                place = raw_job["resource_list"].get("place")
                slot_type = raw_job["resource_list"].get("slot_type")

                chunks = pbscc.parse_select(raw_job)
                for n, chunk in enumerate(chunks):
                    # only pay the penalty of copies when we actually have multi-chunk jobs
                    sub_raw_job = deepcopy(raw_job)

                    if len(chunks) > 1:
                        sub_raw_job["job_id"] = "%s.%d" % (sub_raw_job["job_id"], n) 
                    
                    sub_raw_job["resource_list"] = {}
                    if place:
                        sub_raw_job["resource_list"]["place"] = place
                        
                    if slot_type:
                        sub_raw_job["resource_list"]["slot_type"] = slot_type
                        
                    sub_raw_job["resource_list"]["select"] = pbscc.format_select(chunk)
                    chunk["nodect"] = int(chunk["select"])
                    if "ncpus" not in chunk:
                        chunk["ncpus"] = "1"
                    
                    for key, value in chunk.iteritems():
                        if key not in ["select", "nodect"]:
                            try:
                                value = pbscc.parse_gb_size(key, value) * chunk["nodect"]
                            except InvalidSizeExpressionError:
                                pass
                            
                            sub_raw_job["resource_list"][key] = value
                    
                    sub_raw_job["nodect"] = sub_raw_job["resource_list"]["nodect"] = chunk["nodect"]
                    raw_jobs.append(sub_raw_job)
        
        warnings = set()
        raw_jobs = [x for x in raw_jobs if x["job_state"].upper() in [pbscc.JOB_STATE_QUEUED,
                                                                      pbscc.JOB_STATE_RUNNING,
                                                                      pbscc.JOB_STATE_BATCH]]
        
        for raw_job in raw_jobs:
            pbs_job = mockpbs.mock_job(raw_job)
            nodect = int(pbs_job.Resource_List["nodect"])
            
            if pbs_job["job_state"].upper() == pbscc.JOB_STATE_RUNNING:
                # update running job
                live_resources = pbscc.parse_exec_vnode(raw_job["exec_vnode"])
                for key, value in live_resources.iteritems():
                    # live resources are calculated on a per node basis, but the Resource_List is based
                    # on a total basis.
                    # we will normalize this below
                    
                    if isinstance(value, numbers.Number):
                        pbs_job.Resource_List[key] = value * nodect
                    else:
                        pbs_job.Resource_List[key] = value
                pbs_job["executing_hostname"] = live_resources["hostname"]
                
            is_array = bool(pbs_job.get("array", False))
            
            slots_per_job = int(pbs_job.Resource_List['ncpus']) / nodect 
            slot_type = pbs_job.Resource_List["slot_type"]  # can be None, similar to {}.get("key"). It is a pbs class.
            pbscc.info("found slot_type %s." % slot_type)
            
            placement = pbscc.parse_place(pbs_job.Resource_List.get("place"))
                    
            # Note: not sure we will ever support anything but group_id for autoscale purposes.
            # User could pick, say, group=host, which implies an SMP job, not a parallel job.
            
            if placement.get("grouping", "group=group_id") != "group=group_id":
                placement.pop("grouping")

            if placement.get("arrangement", "").lower() in ["scatter", "vscatter"]:
                pack = "scatter"
            else:
                pack = "pack"
            
            exclusive = placement.get("sharing", "").lower() in ["excl", "exclhost"]
            # we may need to support sharing at some point, but it seems that we can ignore it for now.
            _shared = placement.get("sharing") in ["sharing"]
            placeby = placement.get("grouping")
            
            autoscale_job = Job(name=pbs_job["job_id"],
                                nodearray=slot_type,
                                nodes=nodect,
                                packing_strategy=pack,
                                exclusive=exclusive,
                                resources={"ncpus": 0},
                                executing_hostname=pbs_job.get("executing_hostname"))

            if placeby:
                autoscale_job.placeby = placeby.split("=", 1)[-1]
            
            if is_array:
                array_count = 0
                array_tasks = raw_job["array_state_count"]

                # Only grab the first two array task states (queued and running)
                for ajob in str(array_tasks).split(" ")[:2]:
                    array_count += int(ajob.split(":")[1])

                # Multiply the number of cpus needed by number of tasks in the array
                if array_count != 0:
                    slots_per_job *= array_count
            else:
                array_count = 1
                    
            # If it's an MPI job and grouping is enabled
            # we want to use a grouped autoscale_job to get tightly coupled nodes

            if group_jobs and placement.get("grouping"): 
                autoscale_job['grouped'] = True
                autoscale_job["nodes"] *= array_count
                autoscale_job.placeby_value = "single"
            elif is_array:
                autoscale_job["nodes"] *= array_count

            autoscale_job.ncpus += slots_per_job
            
            for attr, value in pbs_job.Resource_List.iteritems():
                if attr not in scheduler_resources:
                    # if it isn't a scheduler level attribute, don't bother 
                    # considering it for autoscale as the scheduler won't respect it either.
                    continue
                try:
                    value = pbscc.parse_gb_size(attr, value)
                    value = value / nodect
                except InvalidSizeExpressionError:
                    if value.lower() in ["true", "false"]:
                        value = value.lower() == "true"

                autoscale_job.resources[attr] = value
                
            if raw_job["job_state"] == pbscc.JOB_STATE_QUEUED:
                idle_autoscale_jobs.append(autoscale_job)
            else:
                running_autoscale_jobs.append(autoscale_job)
                
        for warning in warnings:
            format_string, values = warning[0], warning[1:]
            pbscc.error(format_string % values)
        
        # leave an option for disabling this in case it causes issues.
        if self.cc_config.get("pbspro.compress_jobs", False):
            all_autoscale_jobs = running_autoscale_jobs + compress_queued_jobs(idle_autoscale_jobs)
        else:
            all_autoscale_jobs = running_autoscale_jobs + idle_autoscale_jobs
            
        return all_autoscale_jobs

Ejemplo n.º 2

Mostrar archivo

Archivo: autostart.py Proyecto: geekpiehpc/cyclecloud-STAR-CCMplus

    def autoscale(self):
        '''
            The main loop described at the top of this class. 
            Returns machine_requests, idle_machines and total_machines for ease of unit testing.
        '''
        pbscc.info("Begin autoscale cycle")
        
        nodearray_definitions = self.fetch_nodearray_definitions()
        
        pbsnodes_by_hostname, existing_machines, booting_instance_ids, instance_ids_to_shutdown = self.get_existing_machines(nodearray_definitions)
        
        start_enabled = "true" == str(self.cc_config.get("cyclecloud.cluster.autoscale.start_enabled", "true")).lower()
        
        if not start_enabled:
            pbscc.warn("cyclecloud.cluster.autoscale.start_enabled is false, new machines will not be allocated.")
        
        autoscaler = autoscalerlib.Autoscaler(nodearray_definitions, existing_machines, self.default_placement_attrs, start_enabled)
        
        # throttle how many jobs we attempt to match. When pbspro.compress_jobs is true (default) this shouldn't really be an issue
        # unless the user has over $pbspro.max_unmatched_jobs unique sets of requirements.
        max_unmatched_jobs = int(self.cc_config.get("pbspro.max_unmatched_jobs", 10000))
        unmatched_jobs = 0
        
        for job in self.query_jobs():
            if job.executing_hostname:
                try:
                    autoscaler.get_machine(hostname=job.executing_hostname).add_job(job, force=True)
                    continue
                except RuntimeError as e:
                    pbscc.error(str(e))
                    pass
                    
            if not autoscaler.add_job(job):
                unmatched_jobs += 1
                pbscc.info("Can not match job %s." % job.name)
                if max_unmatched_jobs > 0 and unmatched_jobs >= max_unmatched_jobs:
                    pbscc.warn('Maximum number of unmatched jobs reached - %s. To configure this setting, change {"pbspro": "max_unmatched_jobs": N}} in %s' % (unmatched_jobs, pbscc.CONFIG_PATH))
                    break
        
        machine_requests = autoscaler.get_new_machine_requests()
        idle_machines = autoscaler.get_idle_machines()
        
        autoscale_request = autoscale_util.create_autoscale_request(machine_requests)
        for request_set in autoscale_request["sets"]:
            configuration = request_set["nodeAttributes"]["Configuration"]
            
            if "pbspro" not in configuration:
                    configuration["pbspro"] = {}
            
            configuration["pbspro"]["slot_type"] = request_set["nodearray"]
            if not request_set.get("placementGroupId"):
                configuration["pbspro"]["is_grouped"] = False
            else:
                configuration["pbspro"]["is_grouped"] = True
                
        autoscale_util.scale_up(self.clusters_api, autoscale_request)
        
        for r in machine_requests:
            if r.placeby_value:
                pbscc.info("Requesting %d %s machines in placement group %s for nodearray %s" % (r.instancecount, r.machinetype, r.placeby_value, r.nodearray))
            else:
                pbscc.info("Requesting %d %s machines in nodearray %s" % (r.instancecount, r.machinetype, r.nodearray))
        
        if pbscc.is_fine():
            pbscc.fine("New target state of the cluster, including booting nodes:")
            
            for m in autoscaler.machines:
                pbscc.fine("    %s" % str(m))
        
        if instance_ids_to_shutdown:
            pbscc.info("Shutting down instance ids %s" % instance_ids_to_shutdown.keys())
            self.clusters_api.shutdown(instance_ids_to_shutdown.keys())
            
            for hostname in instance_ids_to_shutdown.itervalues():
                pbscc.info("Deleting %s" % hostname)
                self.driver.delete_host(hostname)
        
        now = self.clock.time()
        
        stop_enabled = "true" == str(self.cc_config.get("cyclecloud.cluster.autoscale.stop_enabled", "true")).lower()
        
        if not stop_enabled:
            pbscc.warn("cyclecloud.cluster.autoscale.stop_enabled is false, idle machines will not be terminated")
        
        if stop_enabled:
            idle_before_threshold = float(self.cc_config.get("cyclecloud.cluster.autoscale.idle_time_before_jobs", 3600))
            idle_after_threshold = float(self.cc_config.get("cyclecloud.cluster.autoscale.idle_time_after_jobs", 300))
        
            for m in idle_machines:
                if m.get_attr("instance_id", "") not in booting_instance_ids:
                    pbscc.debug("Could not find instance id in CycleCloud %s" % m.get_attr("instance_id", ""))
                    continue
                
                pbsnode = pbsnodes_by_hostname.get(m.hostname)
                
                # the machine may not have converged yet, so
                if pbsnode:
                    if "busy" in pbsnode["state"]:
                        if "down" in pbsnode["state"]:
                            pbscc.warn("WARNING: %s is down but busy with jobs %s", m.hostname, pbsnode.get("jobs", []))
                        else:
                            pbscc.error("WARNING: Falsely determined that %s is idle!" % m.hostname)
                        continue
                    
                    last_state_change_time = pbsnode["last_state_change_time"]
                    last_used_time = pbsnode.get("last_used_time")
                    if last_used_time:
                        # last_used_time can be stale while a job is exiting, e.g. last_state_change_time could be < 5 minutes but
                        # somehow last_used_time > 5 minutes, causing us to prematurely terminate the node just because a job took a long time
                        # to exit.
                        last_used_time = max(last_state_change_time, last_used_time)
                    else:
                        last_used_time = self.clock.time()

                    if now - last_used_time > idle_after_threshold:
                        pbscc.info("Setting %s offline after %s seconds" % (m.hostname, now - last_used_time))
                        self.driver.set_offline(m.hostname)
                    elif now - last_state_change_time > idle_before_threshold:
                        pbscc.info("Setting %s offline after %s seconds" % (m.hostname, now - last_state_change_time))
                        self.driver.set_offline(m.hostname)
        
        pbscc.info("End autoscale cycle")
        # returned for testing purposes
        return machine_requests, idle_machines, autoscaler.machines