コード例 #1
0
ファイル: planning.py プロジェクト: msimonin/execo
def show_resources(resources,
                   msg='Resources',
                   max_resources=None,
                   queues='default'):
    """Print the resources in a fancy way"""
    if not max_resources:
        max_resources = {}
    total_hosts = 0
    log = style.log_header(msg) + '\n'

    for site in get_g5k_sites():
        site_added = False
        if site in resources:
            log += style.log_header(site).ljust(20) + ' ' + str(
                resources[site])
            if site in max_resources:
                log += '/' + str(max_resources[site])
            log += ' '
            site_added = True
        for cluster in get_site_clusters(site, queues=queues):
            if len(list(set(get_site_clusters(site)) & set(resources.keys()))) > 0 \
                    and not site_added:
                log += style.log_header(site).ljust(20)
                if site in max_resources:
                    log += '/' + str(max_resources[site])
                log += ' '
                site_added = True
            if cluster in resources:
                log += style.emph(cluster) + ': ' + str(resources[cluster])
                if cluster in max_resources:
                    log += '/' + str(max_resources[cluster])
                log += ' '
                total_hosts += resources[cluster]
        if site_added:
            log += '\n'
    if 'grid5000' in resources:
        log += style.log_header('Grid5000').ljust(20) + str(
            resources['grid5000'])
        if "grid5000" in max_resources:
            log += '/' + str(max_resources["grid5000"])
    elif total_hosts > 0:
        log += style.log_header('Total ').ljust(20) + str(total_hosts)
    logger.info(log)
コード例 #2
0
ファイル: charter.py プロジェクト: mickours/execo
 def __site_charter_remaining(site, day, user = None):
     try:
         with G5kAutoPortForwarder(site,
                                   'oardb.' + site + '.grid5000.fr',
                                   g5k_configuration['oar_pgsql_ro_port']) as (host, port):
             start, end = get_oar_day_start_end(day)
             if not user:
                 user = g5k_configuration.get('api_username')
                 if not user:
                     user = os.environ['LOGNAME']
             conn = psycopg2.connect(host = host, port = port,
                                     user = g5k_configuration['oar_pgsql_ro_user'],
                                     password = g5k_configuration['oar_pgsql_ro_password'],
                                     database = g5k_configuration['oar_pgsql_ro_db'])
             try:
                 logger.trace("getting jobs for user %s on %s for %s" % (user, site, day))
                 OOC_total_site_used = 0
                 OOC_site_quota = 0
                 for cluster in get_site_clusters(site):
                     total_cluster_used = 0
                     for j in _get_jobs(conn, cluster, user, start, end):
                         logger.trace("%s:%s - job: start %s, end %s, walltime %s, %s" % (
                                 site, cluster, format_unixts(j[7]),
                                 format_unixts(j[8]), format_seconds(j[6]), j))
                         if _job_intersect_charter_period(j):
                             cluster_used = (j[9] + j[10]) * j[6]
                             logger.trace("%s:%s job %i intersects charter -> uses %is of cluster quota" % (
                                     site, cluster, j[0], cluster_used,))
                             total_cluster_used += cluster_used
                     #cluster_quota = cluster_num_cores(cluster) * 3600 * 2
                     cluster_quota = _cluster_num_available_cores(conn, cluster) * 3600 * 2
                     logger.trace("%s:%s total cluster used = %i (%s), cluster quota = %i (%s)" % (
                             site, cluster,
                             total_cluster_used, format_seconds(total_cluster_used),
                             cluster_quota, format_seconds(cluster_quota)))
                     threading.currentThread().remaining[cluster] = max(0, cluster_quota - total_cluster_used)
                     OOC_total_site_used += total_cluster_used
                     OOC_site_quota += cluster_quota
                 logger.trace("%s to compare with outofchart: total used = %i (%s), %i%% of site quota = %i (%s)" % (
                         site,
                         OOC_total_site_used, format_seconds(OOC_total_site_used),
                         int(float(OOC_total_site_used) / float(OOC_site_quota) * 100.0),
                         OOC_site_quota, format_seconds(OOC_site_quota)))
             finally:
                 conn.close()
     except Exception as e:
         logger.warn("error connecting to oar database / getting planning from " + site)
         logger.detail("exception:\n" + format_exc())
コード例 #3
0
ファイル: charter.py プロジェクト: msimonin/execo
 def __site_charter_remaining(site, day, user = None):
     try:
         with G5kAutoPortForwarder(site,
                                   'oardb.' + site + '.grid5000.fr',
                                   g5k_configuration['oar_pgsql_ro_port']) as (host, port):
             start, end = get_oar_day_start_end(day)
             if not user:
                 user = g5k_configuration.get('api_username')
                 if not user:
                     user = os.environ['LOGNAME']
             conn = psycopg2.connect(host = host, port = port,
                                     user = g5k_configuration['oar_pgsql_ro_user'],
                                     password = g5k_configuration['oar_pgsql_ro_password'],
                                     database = g5k_configuration['oar_pgsql_ro_db'])
             try:
                 logger.trace("getting jobs for user %s on %s for %s" % (user, site, day))
                 OOC_total_site_used = 0
                 OOC_site_quota = 0
                 for cluster in get_site_clusters(site):
                     total_cluster_used = 0
                     for j in _get_jobs(conn, cluster, user, start, end):
                         logger.trace("%s:%s - job: start %s, end %s, walltime %s, %s" % (
                                 site, cluster, format_unixts(j[7]),
                                 format_unixts(j[8]), format_seconds(j[6]), j))
                         if _job_intersect_charter_period(j):
                             cluster_used = (j[9] + j[10]) * j[6]
                             logger.trace("%s:%s job %i intersects charter -> uses %is of cluster quota" % (
                                     site, cluster, j[0], cluster_used,))
                             total_cluster_used += cluster_used
                     #cluster_quota = cluster_num_cores(cluster) * 3600 * 2
                     cluster_quota = _cluster_num_available_cores(conn, cluster) * 3600 * 2
                     logger.trace("%s:%s total cluster used = %i (%s), cluster quota = %i (%s)" % (
                             site, cluster,
                             total_cluster_used, format_seconds(total_cluster_used),
                             cluster_quota, format_seconds(cluster_quota)))
                     threading.currentThread().remaining[cluster] = max(0, cluster_quota - total_cluster_used)
                     OOC_total_site_used += total_cluster_used
                     OOC_site_quota += cluster_quota
                 logger.trace("%s to compare with outofchart: total used = %i (%s), %i%% of site quota = %i (%s)" % (
                         site,
                         OOC_total_site_used, format_seconds(OOC_total_site_used),
                         int(float(OOC_total_site_used) / float(OOC_site_quota) * 100.0),
                         OOC_site_quota, format_seconds(OOC_site_quota)))
             finally:
                 conn.close()
     except Exception as e:
         logger.warn("error connecting to oar database / getting planning from " + site)
         logger.detail("exception:\n" + format_exc())
コード例 #4
0
ファイル: planning.py プロジェクト: msimonin/execo
def _set_colors():
    colors = {}
    colors['busy'] = '#666666'
    rgb_colors = [(x[0]/255., x[1]/255., x[2]/255.) for x in \
                [(255., 122., 122.), (255., 204., 122.), (255., 255., 122.), (255., 246., 153.), (204., 255., 122.),
                (122., 255., 122.), (122., 255., 255.), (122., 204., 255.), (204., 188., 255.), (255., 188., 255.)]]
    i_site = 0
    for site in sorted(get_g5k_sites()):
        colors[site] = rgb_colors[i_site]
        i_cluster = 0
        for cluster in sorted(get_site_clusters(site, queues=None)):
            min_index = colors[site].index(min(colors[site]))
            color = [0., 0., 0.]
            for i in range(3):
                color[i] = min(colors[site][i], 1.)
                if i == min_index:
                    color[i] += i_cluster * 0.12
            colors[cluster] = tuple(color)
            i_cluster += 1
        i_site += 1

    return colors
コード例 #5
0
def get_vms_slot(vms, elements, slots, excluded_elements=None):
    """Return a slot with enough RAM and CPU """
    chosen_slot = None
    mem = vms[0]['mem']
    cpu = vms[0]['n_cpu']
    req_ram = sum([vm['mem'] for vm in vms])
    req_cpu = sum([vm['n_cpu'] for vm in vms]) / 3
    logger.debug('RAM %s CPU %s', req_ram, req_cpu)

    for element in excluded_elements:
        if element in get_g5k_sites():
            excluded_elements += [
                cluster for cluster in get_site_clusters(element)
                if cluster not in excluded_elements
            ]

    if 'grid5000' in elements:
        clusters = [
            cluster for cluster in get_g5k_clusters()
            if cluster not in excluded_elements
            and get_cluster_site not in excluded_elements
        ]
    else:
        clusters = [
            element for element in elements if element in get_g5k_clusters()
            and element not in excluded_elements
        ]
        for element in elements:
            if element in get_g5k_sites():
                clusters += [
                    cluster for cluster in get_site_clusters(element)
                    if cluster not in clusters
                    and cluster not in excluded_elements
                ]

    for slot in slots:
        hosts = []
        for element in slot[2]:
            if str(element) in clusters:
                n_hosts = slot[2][element]
                for i in range(n_hosts):
                    hosts.append(Host(str(element + '-1.' + \
                            get_cluster_site(element) + '.grid5000.fr')))
        attr = get_CPU_RAM_FLOPS(hosts)['TOTAL']

        if attr['CPU'] > req_cpu and attr['RAM'] > req_ram:
            chosen_slot = slot
            break
        del hosts[:]

    if chosen_slot is None:
        return None, None

    resources_needed = {}
    resources_available = chosen_slot[2]
    logger.debug('resources available' + pformat(resources_available))
    iter_clusters = cycle(clusters)
    while req_ram > 0 or req_cpu > 0:
        cluster = iter_clusters.next()
        if resources_available[cluster] == 0:
            clusters.remove(cluster)
            iter_clusters = cycle(clusters)
        else:
            host = cluster + '-1'
            attr = get_CPU_RAM_FLOPS([host])
            resources_available[cluster] -= 1
            req_ram -= float(attr[host]['RAM'] / mem) * mem
            req_cpu -= float(attr[host]['CPU'] / cpu) * cpu

            if cluster not in resources_needed:
                resources_needed[cluster] = 0
            resources_needed[cluster] += 1

    if 'kavlan' in elements:
        resources_needed['kavlan'] = 1

    logger.debug('resources needed' + pformat(resources_needed))
    return chosen_slot[0], distribute_hosts(chosen_slot[2], resources_needed,
                                            excluded_elements)
コード例 #6
0
ファイル: utils.py プロジェクト: badock/vm5k
def get_vms_slot(vms, elements, slots, excluded_elements=None):
    """Return a slot with enough RAM and CPU """
    chosen_slot = None
    mem = vms[0]['mem']
    cpu = vms[0]['n_cpu']
    req_ram = sum([vm['mem'] for vm in vms])
    req_cpu = sum([vm['n_cpu'] for vm in vms]) / 3
    logger.debug('RAM %s CPU %s', req_ram, req_cpu)

    for element in excluded_elements:
        if element in get_g5k_sites():
            excluded_elements += [cluster for cluster
                                  in get_site_clusters(element)
                                  if cluster not in excluded_elements]

    if 'grid5000' in elements:
        clusters = [cluster for cluster in get_g5k_clusters()
                    if cluster not in excluded_elements
                    and get_cluster_site not in excluded_elements]
    else:
        clusters = [element for element in elements
                    if element in get_g5k_clusters()
                    and element not in excluded_elements]
        for element in elements:
            if element in get_g5k_sites():
                clusters += [cluster
                    for cluster in get_site_clusters(element)
                        if cluster not in clusters
                        and cluster not in excluded_elements]

    for slot in slots:
        hosts = []
        for element in slot[2]:
            if str(element) in clusters:
                n_hosts = slot[2][element]
                for i in range(n_hosts):
                    hosts.append(Host(str(element + '-1.' + \
                            get_cluster_site(element) + '.grid5000.fr')))
        attr = get_CPU_RAM_FLOPS(hosts)['TOTAL']

        if attr['CPU'] > req_cpu and attr['RAM'] > req_ram:
            chosen_slot = slot
            break
        del hosts[:]

    if chosen_slot is None:
        return None, None

    resources_needed = {}
    resources_available = chosen_slot[2]
    logger.debug('resources available' + pformat(resources_available))
    iter_clusters = cycle(clusters)
    while req_ram > 0 or req_cpu > 0:
        cluster = iter_clusters.next()
        if resources_available[cluster] == 0:
            clusters.remove(cluster)
            iter_clusters = cycle(clusters)
        else:
            host = cluster + '-1'
            attr = get_CPU_RAM_FLOPS([host])
            resources_available[cluster] -= 1
            req_ram -= float(attr[host]['RAM'] / mem) * mem
            req_cpu -= float(attr[host]['CPU'] / cpu) * cpu

            if cluster not in resources_needed:
                resources_needed[cluster] = 0
            resources_needed[cluster] += 1

    if 'kavlan' in elements:
        resources_needed['kavlan'] = 1

    logger.debug('resources needed' + pformat(resources_needed))
    return chosen_slot[0], distribute_hosts(chosen_slot[2], resources_needed,
                                            excluded_elements)
コード例 #7
0
ファイル: planning.py プロジェクト: msimonin/execo
def get_planning(elements=['grid5000'],
                 vlan=False,
                 subnet=False,
                 storage=False,
                 out_of_chart=False,
                 starttime=None,
                 endtime=None,
                 ignore_besteffort=True,
                 queues='default'):
    """Retrieve the planning of the elements (site, cluster) and others resources.
    Element planning structure is ``{'busy': [(123456,123457), ... ], 'free': [(123457,123460), ... ]}.``

    :param elements: a list of Grid'5000 elements ('grid5000', <site>, <cluster>)

    :param vlan: a boolean to ask for KaVLAN computation

    :param subnet: a boolean to ask for subnets computation

    :param storage: a boolean to ask for sorage computation

    :param out_of_chart: if True, consider that days outside weekends are busy

    :param starttime: start of time period for which to compute the planning, defaults to now + 1 minute

    :param endtime: end of time period for which to compute the planning, defaults to 4 weeks from now

    :param ignore_besteffort: True by default, to consider the resources with besteffort jobs as available

    :param queues: list of oar queues for which to get the planning

    Return a dict whose keys are sites, whose values are dict whose keys
    are cluster, subnets, kavlan or storage,
    whose values are planning dicts, whose keys are hosts, subnet address range,
    vlan number or chunk id planning respectively.
    """
    if not starttime:
        starttime = int(time() + timedelta_to_seconds(timedelta(minutes=1)))
    starttime = int(get_unixts(starttime))
    if not endtime:
        endtime = int(starttime +
                      timedelta_to_seconds(timedelta(weeks=4, minutes=1)))
    endtime = int(get_unixts(endtime))
    if 'grid5000' in elements:
        sites = elements = get_g5k_sites()
    else:
        sites = list(
            set([site for site in elements if site in get_g5k_sites()] + [
                get_cluster_site(cluster) for cluster in elements
                if cluster in get_g5k_clusters(queues=queues)
            ] + [
                get_host_site(host)
                for host in elements if host in get_g5k_hosts()
                or get_host_shortname(host) in get_g5k_hosts()
            ]))
    if len(sites) == 0:
        logger.error('Wrong elements given: %s' % (elements, ))
        return None
    planning = {}
    for site in sites:
        planning[site] = {}
        for cluster in get_site_clusters(site, queues=queues):
            planning[site][cluster] = {}

    for site in sites:
        if vlan:
            planning[site].update({'vlans': {}})
        if subnet:
            planning[site].update({'subnets': {}})
        if storage:
            planning[site].update({'storage': {}})

    if _retrieve_method == 'API':
        _get_planning_API(planning, ignore_besteffort)
    elif _retrieve_method == 'PostgreSQL':
        _get_planning_PGSQL(planning, ignore_besteffort)

    if out_of_chart:
        _add_charter_to_planning(planning, starttime, endtime)

    for site_pl in planning.values():
        for res_pl in site_pl.values():
            for el_planning in res_pl.values():
                el_planning['busy'].sort()
                _merge_el_planning(el_planning['busy'])
                _trunc_el_planning(el_planning['busy'], starttime, endtime)
                _fill_el_planning_free(el_planning, starttime, endtime)

    # cleaning
    real_planning = deepcopy(planning)
    for site, site_pl in planning.items():
        for cl, cl_pl in site_pl.items():
            if cl in ['vlans']:
                continue
            keep_cluster = False
            for h in cl_pl:
                if not (get_host_site(h) in elements
                        or get_host_cluster(h) in elements
                        or get_host_shortname(h) in elements or h in elements):
                    del real_planning[site][cl][h]
                else:
                    keep_cluster = True
            if not keep_cluster:
                del real_planning[site][cl]

    return real_planning
コード例 #8
0
ファイル: planning.py プロジェクト: msimonin/execo
def get_jobs_specs(resources, excluded_elements=None, name=None):
    """ Generate the several job specifications from the dict of resources and
    the blacklisted elements

    :param resources: a dict, whose keys are Grid'5000 element and values the
      corresponding number of n_nodes

    :param excluded_elements: a list of elements that won't be used

    :param name: the name of the jobs that will be given
    """
    jobs_specs = []
    if excluded_elements == None:
        excluded_elements = []

    # Creating the list of sites used
    sites = []
    real_resources = resources.copy()
    for resource in resources:
        if resource in get_g5k_sites() and resource not in sites:
            sites.append(resource)
        if resource in get_g5k_clusters(queues=None):
            if resource not in excluded_elements:
                site = get_cluster_site(resource)
                if site not in sites:
                    sites.append(site)
                if site not in real_resources:
                    real_resources[site] = 0

    # Checking if we need a Kavlan, a KaVLAN global or none
    get_kavlan = 'kavlan' in resources
    if get_kavlan:
        kavlan = 'kavlan'
        n_sites = 0
        for resource in real_resources:
            if resource in sites:
                n_sites += 1
            if n_sites > 1:
                kavlan += '-global'
                break

    blacklisted_hosts = {}
    for element in excluded_elements:
        if element not in get_g5k_clusters(queues=None) + get_g5k_sites():
            site = get_host_site(element)
            if not 'site' in blacklisted_hosts:
                blacklisted_hosts[site] = [element]
            else:
                blacklisted_hosts[site].append(element)

    for site in sites:
        sub_resources = ''
        # Adding a KaVLAN if needed
        if get_kavlan:
            if not 'global' in kavlan:
                sub_resources = "{type='" + kavlan + "'}/vlan=1+"
                get_kavlan = False
            elif site in resources['kavlan']:
                sub_resources = "{type='" + kavlan + "'}/vlan=1+"
                get_kavlan = False

        base_sql = '{'
        end_sql = '}/'

        # Creating blacklist SQL string for hosts
        host_blacklist = False
        str_hosts = ''
        if site in blacklisted_hosts and len(blacklisted_hosts[site]) > 0:
            str_hosts = ''.join([
                "host not in ('" + get_host_longname(host) + "') and "
                for host in blacklisted_hosts[site]
            ])
            host_blacklist = True

        #Adding the clusters blacklist
        str_clusters = str_hosts if host_blacklist else ''
        cl_blacklist = False
        clusters_nodes = 0
        for cluster in get_site_clusters(site, queues=None):
            if cluster in resources and resources[cluster] > 0:
                if str_hosts == '':
                    sub_resources += "{cluster='" + cluster + "'}"
                else:
                    sub_resources += base_sql + str_hosts + "cluster='" + \
                        cluster + "'" + end_sql
                sub_resources += "/nodes=" + str(resources[cluster]) + '+'
                clusters_nodes += resources[cluster]
            if cluster in excluded_elements:
                str_clusters += "cluster not in ('" + cluster + "') and "
                cl_blacklist = True

        # Generating the site blacklist string from host and cluster blacklist
        str_site = ''
        if host_blacklist or cl_blacklist:
            str_site += base_sql
            if not cl_blacklist:
                str_site += str_hosts[:-4]
            else:
                str_site += str_clusters[:-4]
            str_site = str_site + end_sql

        if real_resources[site] > 0:
            sub_resources += str_site + "nodes=" + str(real_resources[site]) +\
                '+'

        if sub_resources != '':
            jobs_specs.append((OarSubmission(resources=sub_resources[:-1],
                                             name=name), site))

    return jobs_specs