예제 #1
0
파일: deployment.py 프로젝트: badock/vm5k
 def _actions_hosts(self, action):
     hosts_ok, hosts_ko = [], []
     for p in action.processes:
         if p.ok:
             hosts_ok.append(p.host)
         else:
             logger.warn('%s is KO', p.host)
             hosts_ko.append(p.host)
     hosts_ok, hosts_ko = list(set(hosts_ok)), list(set(hosts_ko))
     self._update_hosts_state(hosts_ok, hosts_ko)
예제 #2
0
 def _actions_hosts(self, action):
     hosts_ok, hosts_ko = [], []
     for p in action.processes:
         if p.ok:
             hosts_ok.append(p.host)
         else:
             logger.warn('%s is KO', p.host)
             hosts_ko.append(p.host)
     hosts_ok, hosts_ko = list(set(hosts_ok)), list(set(hosts_ko))
     self._update_hosts_state(hosts_ok, hosts_ko)
예제 #3
0
 def _get_target_lc_and_port(self, equip, linecard_index, port_index, site):
     # given the port <linecard_index>:<port_index> on equip/site
     # find (linecard_index, port_index) to which is it connected to on the target equipment
     # by looking at the (optional) port specification and by searching in the target equipment network description, and comparing / complementing this informations
     # returns the tuple (linecard_index, port_index)
     port = self.data['network'][site][equip]['linecards'][linecard_index][
         'ports'][port_index]
     uid = _parse_port_uid(port['uid'])
     if 'port' in port:
         target_lc1, target_port1 = _parse_port_port(port['port'])
         if target_lc1 is None:
             logger.warn(
                 'unable to get from the port spec %s the target linecard of link between %s(%s:%s %s) and %s'
                 % (port['port'], equip, linecard_index, port_index, port,
                    uid))
         target_lc2, target_port2 = self._get_target_linecard_and_port_from_api(
             equip, linecard_index, port_index, site)
         if target_lc2 is None:
             logger.warn(
                 'unable to get from %s the target linecard of link between %s(%s:%s %s) and %s'
                 % (uid, equip, linecard_index, port_index, port, uid))
         if (target_lc1 != target_lc2) or (target_port1 != target_port2):
             if not target_lc1 is None:
                 target_lc, target_port = target_lc1, target_port1
             else:
                 target_lc, target_port = target_lc2, target_port2
             logger.warn(
                 'mismatch between the linecards of link between %s(%s:%s %s) and %s: %s:%s vs %s:%s. Using the "less unlikely one" %s:%s'
                 % (equip, linecard_index, port_index, port, uid,
                    target_lc1, target_port1, target_lc2, target_port2,
                    target_lc, target_port))
             return (target_lc, target_port)
     else:
         return self._get_target_linecard_and_port_from_api(
             equip, linecard_index, port_index, site)
예제 #4
0
def _parse_port_uid(uid):
    prefix, _, uid = uid.rpartition(' ')
    if prefix:
        logger.warn('uid %s prefixed with %s' % (uid, prefix))
    return uid
예제 #5
0
    def add_equip(self, equip, site):
        """Add a network equipment """
        if equip not in self.data['network'][site]:
            logger.warn('Equipment %s not described in API' % (equip, ))
            return
        data = self.data['network'][site][equip]
        if self.has_node(equip):
            recurse = False
        else:
            logger.debug('Adding equipment %s', equip)
            self.add_node(equip,
                          kind=data['kind'],
                          backplane=data['backplane_bps'])
            recurse = True
        lc_data = data['linecards']
        multiple_linecards = self._equip_uses_multiple_linecards(equip, site)
        equip_bw = data['backplane_bps']
        for i_lc, lc in enumerate(lc_data):
            lc_node = _get_linecard_name(equip, i_lc)
            if 'ports' in lc:
                for i_port, port in enumerate(lc['ports']):
                    if 'uid' in port:
                        uid = _parse_port_uid(port['uid'])
                        if not self._is_in_api(site, uid):
                            do_once(
                                (site, uid), logger.warn,
                                'unable to get kind of %s in %s, is it in g5k api?'
                                % (uid, site))
                            continue
                        kind = port.get('kind')
                        kind2 = self._get_node_kind(site, uid)
                        if not kind:
                            kind = kind2
                            if kind != 'node':
                                do_once(
                                    (equip, i_lc, i_port), logger.warn,
                                    'missing kind in port %s:%s %s of %s, using %s from %s'
                                    % (i_lc, i_port, port, equip, kind, uid))
                        elif not kind2:
                            logger.warn('missing kind in %s' % (uid, ))
                        elif kind != kind2:
                            logger.warn(
                                'mismatching kind %s in port %s:%s %s of %s and kind %s from %s. Using %s'
                                % (kind, i_lc, i_port, port, equip, kind2, uid,
                                   kind2))
                            kind = kind2
                        if not kind:
                            logger.error('unable to find kind of %s' % (uid, ))
                        port_bw = lc['rate'] if 'rate' not in port else port[
                            'rate']
                        if kind == 'virtual':
                            # in this situation, we don't know what
                            # kind is the target equipment, we need to
                            # discover it
                            if uid in self.data['network'][site]:
                                pass
                            elif uid in self.data['hosts']:
                                kind = 'virtual-node'
                                logger.warn(
                                    'virtual link from %s(%s:%s %s) to node %s'
                                    % (equip, i_lc, i_port, port, uid))
                            else:
                                pass
                        if self.has_node(uid):
                            if kind in ['node', 'virtual-node']:
                                for e in self.get_host_adapters(uid):
                                    if e['switch'] == equip:
                                        if multiple_linecards:
                                            self._checked_add_linecard(
                                                lc_node,
                                                lc.get('backplane_bps',
                                                       data['backplane_bps']))
                                            self._checked_add_edge(
                                                equip,
                                                lc_node,
                                                _unique_link_key(
                                                    equip, lc_node),
                                                bandwidth=equip_bw,
                                                active=True)
                                            self._checked_add_edge(
                                                lc_node,
                                                uid,
                                                _unique_link_key(
                                                    lc_node,
                                                    uid + '-' + e['device']),
                                                bandwidth=port_bw,
                                                active=e['mounted'])
                                        else:
                                            self._checked_add_edge(
                                                equip,
                                                uid,
                                                _unique_link_key(
                                                    equip,
                                                    uid + '-' + e['device']),
                                                bandwidth=min(
                                                    port_bw, equip_bw),
                                                active=e['mounted'])
                            elif kind in ['switch', 'router'] and recurse:
                                if multiple_linecards:
                                    self._checked_add_linecard(
                                        lc_node,
                                        lc.get('backplane_bps',
                                               data['backplane_bps']))
                                    self._checked_add_edge(equip,
                                                           lc_node,
                                                           _unique_link_key(
                                                               equip, lc_node),
                                                           bandwidth=equip_bw,
                                                           active=True)
                                    target_lc, target_port = self._get_target_lc_and_port(
                                        equip, i_lc, i_port, site)
                                    if not target_lc is None:
                                        if self._equip_uses_multiple_linecards(
                                                uid, site):
                                            self._checked_add_edge(
                                                lc_node,
                                                _get_linecard_name(
                                                    uid, target_lc),
                                                _unique_link_key(
                                                    lc_node,
                                                    _get_linecard_name(
                                                        uid, target_lc)),
                                                bandwidth=port_bw,
                                                active=True)
                                        else:
                                            self._checked_add_edge(
                                                lc_node,
                                                uid,
                                                _unique_link_key(lc_node, uid),
                                                bandwidth=port_bw,
                                                active=True)
                                    else:
                                        logger.error(
                                            'unable to find the target linecard of link between %s(%s:%s %s) and %s. Skipping this link!'
                                            % (equip, i_lc, i_port, port, uid))
                                else:
                                    target_lc, target_port = self._get_target_lc_and_port(
                                        equip, i_lc, i_port, site)
                                    if not target_lc is None:
                                        if self._equip_uses_multiple_linecards(
                                                uid, site):
                                            self._checked_add_edge(
                                                equip,
                                                _get_linecard_name(
                                                    uid, target_lc),
                                                _unique_link_key(
                                                    equip,
                                                    _get_linecard_name(
                                                        uid, target_lc)),
                                                bandwidth=min(
                                                    port_bw, equip_bw),
                                                active=True)
                                        else:
                                            self._checked_add_edge(
                                                equip,
                                                uid,
                                                _unique_link_key(equip, uid),
                                                bandwidth=min(
                                                    port_bw, equip_bw),
                                                active=True)
                                    else:
                                        logger.error(
                                            'unable to find the target linecard of link between %s(%s:%s %s) and %s. Skipping this link!'
                                            % (equip, i_lc, i_port, port, uid))
                        if 'renater' in uid:
                            # if uid != 'renater-' + site:
                            #     logger.error('renater node in %s has name %s which is not of the form renater-%s. Forcing to renater-%s' % (site, uid, site, site))
                            #     uid = 'renater-' + site
                            self.add_node(uid, kind='renater')
                            if multiple_linecards:
                                self._checked_add_linecard(
                                    lc_node,
                                    lc.get('backplane_bps',
                                           data['backplane_bps']))
                                self._checked_add_edge(equip,
                                                       lc_node,
                                                       _unique_link_key(
                                                           equip, lc_node),
                                                       bandwidth=equip_bw,
                                                       active=True)
                                self._checked_add_edge(lc_node,
                                                       uid,
                                                       _unique_link_key(
                                                           lc_node, uid),
                                                       bandwidth=port_bw,
                                                       active=True)
                            else:
                                self._checked_add_edge(
                                    equip,
                                    uid,
                                    _unique_link_key(equip, uid),
                                    bandwidth=min(port_bw, equip_bw),
                                    active=True)

                        elif kind in ['switch', 'router']:
                            if multiple_linecards:
                                self._checked_add_linecard(
                                    lc_node,
                                    lc.get('backplane_bps',
                                           data['backplane_bps']))
                                self._checked_add_edge(equip,
                                                       lc_node,
                                                       _unique_link_key(
                                                           equip, lc_node),
                                                       bandwidth=equip_bw,
                                                       active=True)
                            if recurse:
                                self.add_equip(uid, site)
예제 #6
0
 def _get_target_linecard_and_port_from_api(self, equip, linecard_index,
                                            port_index, site):
     # given the port <linecard_index>:<port_index> on equip/site
     # find (linecard_index, port_index) to which it is connected to on the target equipment
     # by searching in the target equipment network description
     # returns the tuple (linecard_index, port_index)
     port = self.data['network'][site][equip]['linecards'][linecard_index][
         'ports'][port_index]
     uid = _parse_port_uid(port['uid'])
     if uid not in self.data['network'][site]:
         raise Exception(
             'trying to find a linecard of equipment %s which is not in the network description of %s'
             % (uid, site))
     possible_targets = []
     for i_lc, lc in enumerate(
             self.data['network'][site][uid]['linecards']):
         if 'ports' in lc:
             for i_p, p in enumerate(lc['ports']):
                 if 'uid' in p and _parse_port_uid(p['uid']) == equip:
                     possible_targets.append((i_lc, i_p))
     targets = []
     if len(possible_targets) > 1:
         # need to disambiguate
         for target in possible_targets:
             target_port_data = self.data['network'][site][uid][
                 'linecards'][target[0]]['ports'][target[1]]
             if 'port' in target_port_data:
                 target_lc, target_port = _parse_port_port(
                     target_port_data['port'])
                 if target_lc is None or target_port is None:
                     logger.warn(
                         'unable to parse port spec %s of port %s:%s on %s of link from between %s(%s:%s %s)'
                         % (target_port_data['port'], target[0], target[1],
                            uid, equip, linecard_index, port_index, port))
                 if target_lc == linecard_index and target_port == port_index:
                     targets.append(target)
             else:
                 logger.warn(
                     'no "port" entry in api network/%s/%s/linecards[%s]/ports[%s]'
                     % (site, uid, target[0], target[1]))
     else:
         targets = possible_targets
     if len(targets) == 0:
         logger.warn(
             'unable to find the target linecard on %s of link from %s(%s:%s %s)'
             % (uid, equip, linecard_index, port_index, port))
         if len(possible_targets) > 0:
             logger.warn(
                 'there are %s candidates %s, use the first possible one: %s'
                 % (len(possible_targets), possible_targets,
                    possible_targets[0]))
             return possible_targets[0]
         else:
             return (None, None)
     if len(targets) > 1:
         logger.warn(
             'unable to disambiguate between multiple links to %s from %s(%s:%s %s). candidate linecards:ports are %s. Using the first possible one %s'
             % (uid, equip, linecard_index, port_index, port, targets,
                targets[0]))
         return targets[0]
     return targets[0]
예제 #7
0
파일: planning.py 프로젝트: msimonin/execo
def _get_site_planning_PGSQL(site, site_planning, ignore_besteffort):
    try:
        with G5kAutoPortForwarder(
                site, 'oardb.' + site + '.grid5000.fr',
                g5k_configuration['oar_pgsql_ro_port']) as (host, port):
            conn = psycopg2.connect(
                host=host,
                port=port,
                user=g5k_configuration['oar_pgsql_ro_user'],
                password=g5k_configuration['oar_pgsql_ro_password'],
                database=g5k_configuration['oar_pgsql_ro_db'])
            try:
                cur = conn.cursor()
                # Retrieving alive resources
                sql = """SELECT DISTINCT R.type, R.network_address, R.vlan, R.subnet_address
                    FROM resources R
                    WHERE state <> 'Dead' AND R.maintenance <> 'YES';"""

                cur.execute(sql)

                for data in cur.fetchall():
                    if data[0] == "default":
                        cluster = get_host_cluster(data[1])
                        if cluster in site_planning:
                            site_planning[cluster][data[1]] = {
                                'busy': [],
                                'free': []
                            }
                    if data[0] in ['kavlan', 'kavlan-global'] \
                        and 'vlans' in site_planning:
                        site_planning['vlans']['kavlan-' + data[2]] = {
                            'busy': [],
                            'free': []
                        }
                    if data[0] == "subnet" and 'subnet' in site_planning:
                        site_planning['subnets'][data[3]] = {
                            'busy': [],
                            'free': []
                        }

                sql = (
                    """SELECT J.job_id, J.state, GJP.start_time AS start_time,
                GJP.start_time+MJD.moldable_walltime,
                array_agg(DISTINCT R.network_address) AS hosts,
                array_agg(DISTINCT R.vlan) AS vlan,
                array_agg(DISTINCT R.subnet_address) AS subnets
                FROM jobs J
                LEFT JOIN moldable_job_descriptions MJD
                    ON MJD.moldable_job_id=J.job_id
                LEFT JOIN gantt_jobs_predictions GJP
                    ON GJP.moldable_job_id=MJD.moldable_id
                INNER JOIN gantt_jobs_resources AR
                    ON AR.moldable_job_id=MJD.moldable_id
                LEFT JOIN resources R
                    ON AR.resource_id=R.resource_id
                WHERE ( J.state='Launching' OR J.state='Running' OR J.state='Waiting')
                """ + (""" AND queue_name<>'besteffort'"""
                       if ignore_besteffort else """""") +
                    """GROUP BY J.job_id, GJP.start_time, MJD.moldable_walltime
                ORDER BY J.start_time""")

                #                    CONVERT(SUBSTRING_INDEX(SUBSTRING_INDEX(R.network_address,'.',1),'-',-1), SIGNED)"""
                cur.execute(sql)

                for job in cur.fetchall():
                    start_time = job[2]
                    end_time = job[3]
                    start_time, end_time = _fix_job(start_time, end_time)
                    if len(job[4]) > 0:
                        for host in job[4]:
                            if host != '':
                                cluster = get_host_cluster(host)
                                if cluster in site_planning:
                                    if host in site_planning[cluster]:
                                        site_planning[cluster][host][
                                            'busy'].append(
                                                (start_time, end_time))
                    if job[5][0] and 'vlans' in site_planning:
                        for vlan in job[5]:
                            if isinstance(vlan, str) and int(vlan) > 3:
                                # only routed vlan
                                site_planning['vlans']['kavlan-' +
                                                       vlan]['busy'].append(
                                                           (start_time,
                                                            end_time))

                    if len(job[6]) > 0 and 'subnet' in site_planning:
                        for subnet in job[6]:
                            site_planning['subnets'][subnet]['busy'].append(
                                (start_time, end_time))
            finally:
                conn.close()
    except Exception as e:
        logger.warn(
            'error connecting to oar database / getting planning from ' + site)
        logger.detail("exception:\n" + format_exc())
        currentThread().broken = True
예제 #8
0
파일: planning.py 프로젝트: msimonin/execo
def _get_site_planning_API(site, site_planning, ignore_besteffort):
    try:
        alive_nodes = set([
            str(node['network_address']) for node in get_resource_attributes(
                '/sites/' + site +
                '/internal/oarapi/resources/details.json?limit=2^30')['items']
            if node['type'] == 'default' and node['state'] != 'Dead'
            and node['maintenance'] != 'YES'
        ])

        for host in alive_nodes:
            host_cluster = get_host_cluster(str(host))
            if host_cluster in site_planning:
                site_planning[host_cluster].update(
                    {host: {
                        'busy': [],
                        'free': []
                    }})
        if 'vlans' in site_planning:
            site_planning['vlans'] = {}
            for vlan in _get_vlans_API(site):
                site_planning['vlans'][vlan] = {'busy': [], 'free': []}
        # STORAGE AND SUBNETS MISSING
        # Retrieving jobs

        site_jobs = get_resource_attributes(
            '/sites/' + site +
            '/jobs?limit=1073741824&state=waiting,launching,running')['items']
        jobs_links = [ link['href'] for job in site_jobs for link in job['links'] \
                      if link['rel'] == 'self' and (ignore_besteffort == False or job['queue'] != 'besteffort') ]
        threads = []
        for link in jobs_links:
            t = Thread(target=_get_job_link_attr_API,
                       args=('/' + str(link).split('/', 2)[2], ))
            t.broken = False
            t.attr = None
            t.ex = None
            threads.append(t)
            t.start()
        for t in threads:
            t.join()
            if t.broken:
                raise t.ex
            attr = t.attr
            try:
                start_time = attr['started_at'] if attr[
                    'started_at'] != 0 else attr['scheduled_at']
                end_time = start_time + attr['walltime']
            except:
                continue
            start_time, end_time = _fix_job(start_time, end_time)
            nodes = attr['assigned_nodes']
            for node in nodes:
                cluster = node.split('.', 1)[0].split('-')[0]
                if cluster in site_planning and node in site_planning[cluster]:
                    site_planning[cluster][node]['busy'].append(
                        (start_time, end_time))
            if 'vlans' in site_planning and 'vlans' in attr['resources_by_type'] \
                and int(attr['resources_by_type']['vlans'][0]) > 3:

                kavname = 'kavlan-' + str(
                    attr['resources_by_type']['vlans'][0])
                site_planning['vlans'][kavname]['busy'].append(
                    (start_time, end_time))
            if 'subnets' in site_planning and 'subnets' in attr[
                    'resources_by_type']:
                for subnet in attr['resources_by_type']['subnets']:
                    if subnet not in site_planning['subnets']:
                        site_planning['subnets'][subnet] = {
                            'busy': [],
                            'free': []
                        }
                    site_planning['subnets'][subnet]['busy'].append(
                        (start_time, end_time))
            # STORAGE IS MISSING
    except Exception as e:
        logger.warn(
            'error connecting to oar database / getting planning from ' + site)
        logger.detail("exception:\n" + format_exc())
        currentThread().broken = True