コード例 #1
0
def _get_jobs():
    job_details = get_sge_job_details()
    jobs = get_sge_jobs()

    # Process all the jobs
    autoscale_jobs = []
    for job in jobs:

        # Ignore jobs in "held" or "error" states
        if job["job_state"] in ["h", "e"]:
            continue

        # Ignore jobs that are no longer in qstat output - added 20FEB2018 JLouie
        if job["job_number"] not in job_details:
            print "PATCHED: Job %s is no longer in qstat details.  Skipping..." % (
                job["job_number"])
            continue

        detail = job_details[job["job_number"]]

        slot_type = None
        if 'hard_resources' in detail:
            slot_type = detail["hard_resources"].get("slot_type", None)

        slots_per_job = 1
        if 'pe_range' in detail and 'min' in detail['pe_range']:
            slots_per_job = int(detail['pe_range']['min'])

        average_runtime = None
        if 'context' in detail and 'average_runtime' in detail['context']:
            average_runtime = int(detail['context']['average_runtime'])

        job = {
            'name': job['job_number'],
            'nodearray': slot_type,
            'request_cpus': slots_per_job,
            'average_runtime': average_runtime
        }

        # If it's an MPI job and grouping is enabled
        # we want to use a grouped request to get tightly coupled nodes
        if slots_per_job > 1 and jetpack.config.get(
                'cyclecloud.cluster.autoscale.use_node_groups') is True:
            job['grouped'] = True

        autoscale_jobs.append(job)

    return autoscale_jobs
コード例 #2
0
def sge_job_handler(job_details):
    """ Takes in a 'job_details' and returns a potentially updated job details object...

    TODO: What are they really allowed to update? Just hard resources "-l <foo>=<bar>"???
    """
    def _get(name):
        """ Does the name exist in hard or soft resources """
        if 'hard_resources' in job_details and name in job_details[
                'hard_resources']:
            return job_details['hard_resources'][name]
        elif 'soft_resources' in job_details and name in job_details[
                'soft_resources']:
            return job_details['soft_resources'][name]
        else:
            return None

    details = {}

    # Set the slot type if it isn't already set
    slot_type = _get('slot_type')
    if slot_type is None:
        slot_type = 'execute'
        details['slot_type'] = slot_type

    # Set the affinity group if grouping is enabled, it isn't already set
    # and it's a MPI job
    groups_enabled = jetpack.config.get(
        'cyclecloud.cluster.autoscale.use_node_groups')
    affinity_group = _get('affinity_group')
    if groups_enabled and affinity_group is None and 'pe_range' in job_details and 'min' in job_details[
            'pe_range']:
        # Find an affinity_group for the node
        job_size = job_details['pe_range']['min']
        if len(jobs_by_affinity_group) == 0:
            for job_id, job in get_sge_job_details().iteritems():
                st = None
                ag = None
                if 'hard_resources' in job and 'slot_type' in job[
                        'hard_resources']:
                    st = job['hard_resources']['slot_type']
                elif 'soft_resources' in job and 'slot_type' in job[
                        'soft_resources']:
                    st = job['soft_resources']['slot_type']

                if 'hard_resources' in job and 'affinity_group' in job[
                        'hard_resources']:
                    ag = job['hard_resources']['affinity_group']
                elif 'soft_resources' in job and 'affinity_group' in job[
                        'soft_resources']:
                    ag = job['soft_resources']['affinity_group']

                if st and ag:
                    jobs_by_affinity_group[(st, ag)] = job_id

        host_complexes = get_host_complexes(
            ['slot_type', 'affinity_group', 'affinity_group_cores'])
        affinity_groups = [
            hc['affinity_group'] for hc in host_complexes.itervalues()
            if hc['slot_type'] == slot_type
            and hc['affinity_group'] not in [None, 'default']
            and int(float(hc['affinity_group_cores'] or 0)) == int(job_size)
        ]
        for g in affinity_groups:
            if (slot_type, g) in jobs_by_affinity_group:
                continue
            else:
                jobs_by_affinity_group[(slot_type,
                                        g)] = job_details['job_number']
                details['affinity_group'] = g
                break
    else:
        # We just use 'default' for the affinity group
        details['affinity_group'] = affinity_group or 'default'

    return details
コード例 #3
0
    return details


try:
    sys.path.append("/opt/cycle/jetpack/config")
    from autoscale import sge_job_handler  # This should always fail - "blah" should be a module name that we define for all autoscaling stuff
except ImportError:
    pass  # The default function above will be used instead

if __name__ == "__main__":

    print "%s" % datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
    pidfile = "/var/run/modify_jobs.pid"
    makePidfile(pidfile)

    job_details = get_sge_job_details()
    # Check for updates to the job

    resources = {}
    for job_detail in job_details.itervalues():
        updates = sge_job_handler(job_detail)
        updates_to_apply = {}
        # Check the updates that were sent back to make sure they aren't already set, no need to reset them
        for k, v in updates.iteritems():
            if k in job_detail['hard_resources'] and job_detail[
                    'hard_resources'][k] == v:
                pass
            else:
                updates_to_apply[k] = v

        if updates_to_apply:
コード例 #4
0
def _get_jobs():
    job_details = get_sge_job_details()
    jobs = get_sge_jobs()

    metadata_url = "http://169.254.169.254/metadata/instance?api-version=2017-08-01"
    metadata_req = Request(metadata_url, headers={"Metadata" : True})
    metadata_response = urlopen(metadata_req)
    vm_metadata = json.load(metadata_response)

    subscription_id = vm_metadata["compute"]["subscriptionId"]

    node_config = jetpack.config.get()

    # Process all the jobs
    autoscale_requests = []
    for job in jobs:

        # Ignore jobs in "held" or "error" states
        if "h" in job["job_state"] or "e" in job["job_state"]:
            continue

        detail = job_details[job["job_number"]]

        slot_type = None
        if 'hard_resources' in detail:
            slot_type = detail["hard_resources"].get("slot_type", None)

        slots_per_job = 1
        if 'pe_range' in detail and 'min' in detail['pe_range']:
            slots_per_job = int(detail['pe_range']['min'])

        average_runtime = None
        if 'context' in detail and 'average_runtime' in detail['context']:
            average_runtime = int(detail['context']['average_runtime'])

        autoscale_request = {
            'Name': slot_type,
            'TargetCoreCount': slots_per_job
        }

        image_resource_id = None
        if slot_type != "execute":
            if 'context' in detail and 'image_id' in detail['context']:
                image_resource_id = detail['context']['image_id']


            if image_resource_id is None:
                print "WARNING: Job %s has non-execute slottype, but application image name not provided. Autoscaling %s nodes." % (job["job_number"], slot_type)
                print "WARNING: Job details %s" % detail
            else:
                autoscale_request = {
                    'Name': slot_type,
                    'Extends': 'execute',
                    'ImageId': image_resource_id,
                    'TargetCoreCount': slots_per_job,
                    'Configuration': {
                        'gridengine': {
                            'slot_type' : slot_type
                        }
                    },
                    'Dynamic': True
                }

                if 'context' in detail and 'machine_type' in detail['context']:
                   autoscale_request["MachineType"] = detail['context']['machine_type']
                    
                if 'context' in detail and 'ppn' in detail['context']:
                    ppn = detail['context']['ppn']
                    autoscale_request['Configuration']['gridengine']['slots'] = ppn
                    autoscale_request['CoreCount'] = ppn

        if 'context' in detail and 'autostop' in detail['context']:
            # cyclecloud.cluster.autoscale.stop_enabled
            if "Configuration" not in autoscale_request:
                autoscale_request["Configuration"] = {}
            
            stop_enabled = True
            if detail['context']['autostop'].lower() == 'false':
                stop_enabled = False

            autoscale_request["Configuration"]["cyclecloud"] = {
                'cluster': {
                    'autoscale': {
                        'stop_enabled': stop_enabled 
                    }
                }
            }
            

        print "Autoscale req: %s" % autoscale_request
        autoscale_requests.append(autoscale_request)

    return autoscale_requests