Example #1
0
def do_pxe_discovery(cfg, handler, info, manual, nodename, policies):
    # use uuid based scheme in lieu of tls cert, ideally only
    # for stateless 'discovery' targets like pxe, where data does not
    # change
    uuidinfo = cfg.get_node_attributes(nodename, ['id.uuid', 'id.serial', 'id.model', 'net*.bootable'])
    if manual or policies & set(('open', 'pxe')):
        enrich_pxe_info(info)
        attribs = {}
        olduuid = uuidinfo.get(nodename, {}).get('id.uuid', None)
        uuid = info.get('uuid', None)
        if uuid and uuid != olduuid:
            attribs['id.uuid'] = info['uuid']
        sn = info.get('serialnumber', None)
        mn = info.get('modelnumber', None)
        if sn and sn != uuidinfo.get(nodename, {}).get('id.serial', None):
            attribs['id.serial'] = sn
        if mn and mn != uuidinfo.get(nodename, {}).get('id.model', None):
            attribs['id.model'] = mn
        for attrname in uuidinfo.get(nodename, {}):
            if attrname.endswith('.bootable') and uuidinfo[nodename][attrname].get('value', None):
                newattrname = attrname[:-8] + 'hwaddr'
                attribs[newattrname] = info['hwaddr']
        if attribs:
            cfg.set_node_attributes({nodename: attribs})
    if info['uuid'] in known_pxe_uuids:
        return True
    if uuid_is_valid(info['uuid']):
        known_pxe_uuids[info['uuid']] = nodename
    log.log({'info': 'Detected {0} ({1} with mac {2})'.format(
        nodename, handler.devname, info['hwaddr'])})
    return True
Example #2
0
def become_leader(connection):
    global currentleader
    global follower
    global retrythread
    log.log({
        'info': 'Becoming leader of collective',
        'subsystem': 'collective'
    })
    if follower:
        follower.kill()
        cfm.stop_following()
        follower = None
    if retrythread:
        retrythread.cancel()
        retrythread = None
    currentleader = connection.getsockname()[0]
    skipaddr = connection.getpeername()[0]
    myname = get_myname()
    skipem = set(cfm.cfgstreams)
    skipem.add(currentleader)
    skipem.add(skipaddr)
    for member in cfm.list_collective():
        dronecandidate = cfm.get_collective_member(member)['address']
        if dronecandidate in skipem or member == myname:
            continue
        eventlet.spawn_n(try_assimilate, dronecandidate)
    schedule_rebalance()
Example #3
0
def start_collective():
    global follower
    global retrythread
    if follower:
        follower.kill()
        cfm.stop_following()
        follower = None
    try:
        if cfm.cfgstreams:
            cfm.check_quorum()
            # Do not start if we have quorum and are leader
            return
    except exc.DegradedCollective:
        pass
    if leader_init.active:  # do not start trying to connect if we are
        # xmitting data to a follower
        return
    myname = get_myname()
    for member in sorted(list(cfm.list_collective())):
        if member == myname:
            continue
        if cfm.cfgleader is None:
            cfm.stop_following(True)
        ldrcandidate = cfm.get_collective_member(member)['address']
        log.log({'info': 'Performing startup attempt to {0}'.format(
            ldrcandidate), 'subsystem': 'collective'})
        if connect_to_leader(name=myname, leader=ldrcandidate):
            break
    else:
        retrythread = eventlet.spawn_after(30 + random.random(),
                                           start_collective)
Example #4
0
def run():
    setlimits()
    try:
        signal.signal(signal.SIGUSR1, dumptrace)
    except AttributeError:
        pass   # silly windows
    if havefcntl:
        _checkpidfile()
    conf.init_config()
    try:
        config = conf.get_config()
        _initsecurity(config)
    except:
        sys.stderr.write("Error unlocking credential store\n")
        doexit()
        sys.exit(1)
    try:
        confluentcore.load_plugins()
    except:
        doexit()
        raise
    try:
        log.log({'info': 'Confluent management service starting'}, flush=True)
    except (OSError, IOError) as e:
        print(repr(e))
        sys.exit(1)
    _daemonize()
    if havefcntl:
        _updatepidfile()
    signal.signal(signal.SIGINT, terminate)
    signal.signal(signal.SIGTERM, terminate)
    collective.startup()
    if dbgif:
        oumask = os.umask(0o077)
        try:
            os.remove('/var/run/confluent/dbg.sock')
        except OSError:
            pass  # We are not expecting the file to exist
        try:
            dbgsock = eventlet.listen("/var/run/confluent/dbg.sock",
                                       family=socket.AF_UNIX)
            eventlet.spawn_n(backdoor.backdoor_server, dbgsock)
        except AttributeError:
            pass  # Windows...
        os.umask(oumask)
    http_bind_host, http_bind_port = _get_connector_config('http')
    sock_bind_host, sock_bind_port = _get_connector_config('socket')
    webservice = httpapi.HttpApi(http_bind_host, http_bind_port)
    webservice.start()
    disco.start_detection()
    try:
        sockservice = sockapi.SockApi(sock_bind_host, sock_bind_port)
        sockservice.start()
    except NameError:
        pass
    atexit.register(doexit)
    eventlet.sleep(1)
    consoleserver.start_console_sessions()
    while 1:
        eventlet.sleep(100)
Example #5
0
def _recheck_nodes_backend(nodeattribs, configmanager):
    global rechecker
    _map_unique_ids(nodeattribs)
    # for the nodes whose attributes have changed, consider them as potential
    # strangers
    for node in nodeattribs:
        if node in known_nodes:
            for somemac in known_nodes[node]:
                unknown_info[somemac] = known_nodes[node][somemac]
                unknown_info[somemac]['discostatus'] = 'unidentified'
    # Now we go through ones we did not find earlier
    for mac in list(unknown_info):
        try:
            _recheck_single_unknown(configmanager, mac)
        except Exception:
            traceback.print_exc()
            continue
    # now we go through ones that were identified, but could not pass
    # policy or hadn't been able to verify key
    for nodename in pending_nodes:
        info = pending_nodes[nodename]
        try:
            if info['handler'] is None:
                next
            handler = info['handler'].NodeHandler(info, configmanager)
            discopool.spawn_n(eval_node, configmanager, handler, info,
                              nodename)
        except Exception:
            traceback.print_exc()
            log.log({
                'error':
                'Unexpected error during discovery of {0}, check debug '
                'logs'.format(nodename)
            })
Example #6
0
def _affluent_map_switch(args):
    switch, password, user, cfm = args
    kv = util.TLSCertVerifier(cfm, switch,
                              'pubkeys.tls_hardwaremanager').verify_cert
    wc = webclient.SecureHTTPConnection(switch,
                                        443,
                                        verifycallback=kv,
                                        timeout=5)
    wc.set_basic_credentials(user, password)
    macs = wc.grab_json_response('/affluent/macs/by-port')
    _macsbyswitch[switch] = macs

    for iface in macs:
        nummacs = len(macs[iface])
        for mac in macs[iface]:
            if mac in _macmap:
                _macmap[mac].append((switch, iface, nummacs))
            else:
                _macmap[mac] = [(switch, iface, nummacs)]
            nodename = _nodelookup(switch, iface)
            if nodename is not None:
                if mac in _nodesbymac and _nodesbymac[mac][0] != nodename:
                    # For example, listed on both a real edge port
                    # and by accident a trunk port
                    log.log({
                        'error':
                        '{0} and {1} described by ambiguous'
                        ' switch topology values'.format(
                            nodename, _nodesbymac[mac][0])
                    })
                    _nodesbymac[mac] = (None, None)
                else:
                    _nodesbymac[mac] = (nodename, nummacs)
Example #7
0
def follow_leader(remote, leader):
    global currentleader
    cleanexit = False
    try:
        cfm.follow_channel(remote)
    except greenlet.GreenletExit:
        cleanexit = True
    finally:
        if cleanexit:
            log.log({
                'info': 'Previous following cleanly closed',
                'subsystem': 'collective'
            })
            return
        log.log({
            'info':
            'Current leader ({0}) has disappeared, restarting '
            'collective membership'.format(leader),
            'subsystem':
            'collective'
        })
        # The leader has folded, time to startup again...
        cfm.stop_following()
        currentleader = None
        eventlet.spawn_n(start_collective)
Example #8
0
def _full_updatemacmap(configmanager):
    global vintage
    global _macmap
    global _nodesbymac
    global _switchportmap
    global _macsbyswitch
    global switchbackoff
    start = util.monotonic_time()
    with mapupdating:
        vintage = util.monotonic_time()
        # Clear all existing entries
        _macmap = {}
        _nodesbymac = {}
        _switchportmap = {}
        _macsbyswitch = {}
        if configmanager.tenant is not None:
            raise exc.ForbiddenRequest(
                'Network topology not available to tenants')
        # here's a list of switches... need to add nodes that are switches
        nodelocations = configmanager.get_node_attributes(
            configmanager.list_nodes(), ('net*.switch', 'net*.switchport'))
        switches = set([])
        for node in nodelocations:
            cfg = nodelocations[node]
            for attr in cfg:
                if not attr.endswith('.switch') or 'value' not in cfg[attr]:
                    continue
                curswitch = cfg[attr].get('value', None)
                if not curswitch:
                    continue
                switches.add(curswitch)
                switchportattr = attr + 'port'
                if switchportattr in cfg:
                    portname = cfg[switchportattr].get('value', '')
                    if not portname:
                        continue
                    if curswitch not in _switchportmap:
                        _switchportmap[curswitch] = {}
                    if portname in _switchportmap[curswitch]:
                        log.log({
                            'error':
                            'Duplicate switch topology config '
                            'for {0} and {1}'.format(
                                node, _switchportmap[curswitch][portname])
                        })
                        _switchportmap[curswitch][portname] = None
                    else:
                        _switchportmap[curswitch][portname] = node
        switchauth = get_switchcreds(configmanager, switches)
        pool = GreenPool(64)
        for ans in pool.imap(_map_switch, switchauth):
            vintage = util.monotonic_time()
            yield ans
    endtime = util.monotonic_time()
    duration = endtime - start
    duration = duration * 15  # wait 15 times as long as it takes to walk
    # avoid spending a large portion of the time hitting switches with snmp
    # requests
    if duration > switchbackoff:
        switchbackoff = duration
Example #9
0
def get_nodename(cfg, handler, info):
    nodename = None
    maccount = None
    info['verified'] = False
    if not handler:
        return None, None
    if handler.https_supported:
        currcert = handler.https_cert
        if not currcert:
            info['discofailure'] = 'nohttps'
            return None, None
        currprint = util.get_fingerprint(currcert, 'sha256')
        nodename = nodes_by_fprint.get(currprint, None)
        if not nodename:
            # Try SHA512 as well
            currprint = util.get_fingerprint(currcert)
            nodename = nodes_by_fprint.get(currprint, None)
    if not nodename:
        curruuid = info.get('uuid', None)
        if uuid_is_valid(curruuid):
            nodename = nodes_by_uuid.get(curruuid, None)
            if nodename is None:
                _map_unique_ids()
                nodename = nodes_by_uuid.get(curruuid, None)
    if not nodename:
        # Ok, see if it is something with a chassis-uuid and discover by
        # chassis
        nodename = get_nodename_from_enclosures(cfg, info)
    if not nodename and handler.devname == 'SMM':
        nodename = get_nodename_from_chained_smms(cfg, handler, info)
    if not nodename:  # as a last resort, search switches for info
        # This is the slowest potential operation, so we hope for the
        # best to occur prior to this
        nodename, macinfo = macmap.find_nodeinfo_by_mac(info['hwaddr'], cfg)
        maccount = macinfo['maccount']
        if nodename:
            if handler.devname == 'SMM':
                nl = list(cfg.filter_node_attributes(
                            'enclosure.extends=' + nodename))
                if nl:
                    # We found an SMM, and it's in a chain per configuration
                    # we need to ask the switch for the fingerprint to see
                    # if we have a match or not
                    newnodename, v = get_chained_smm_name(nodename, cfg,
                                                          handler, nl)
                    if newnodename:
                        # while this started by switch, it was disambiguated
                        info['verified'] = v
                        return newnodename, None
        if (nodename and
                not handler.discoverable_by_switch(macinfo['maccount'])):
            if handler.devname == 'SMM':
                errorstr = 'Attempt to discover SMM by switch, but chained ' \
                           'topology or incorrect net attributes detected, ' \
                           'which is not compatible with switch discovery ' \
                           'of SMM, nodename would have been ' \
                           '{0}'.format(nodename)
                log.log({'error': errorstr})
                return None, None
    return nodename, maccount
Example #10
0
def update_macmap(configmanager):
    """Interrogate switches to build/update mac table

    Begin a rebuild process.  This process is a generator that will yield
    as each switch interrogation completes, allowing a caller to
    recheck the cache as results become possible, rather
    than having to wait for the process to complete to interrogate.
    """
    global _macmap
    global _nodesbymac
    global _switchportmap
    # Clear all existing entries
    _macmap = {}
    _nodesbymac = {}
    _switchportmap = {}
    if configmanager.tenant is not None:
        raise exc.ForbiddenRequest('Network topology not available to tenants')
    nodelocations = configmanager.get_node_attributes(
        configmanager.list_nodes(),
        ('hardwaremanagement.switch', 'hardwaremanagement.switchport'))
    switches = set([])
    for node in nodelocations:
        cfg = nodelocations[node]
        if 'hardwaremanagement.switch' in cfg:
            curswitch = cfg['hardwaremanagement.switch']['value']
            switches.add(curswitch)
            if 'hardwaremanagement.switchport' in cfg:
                portname = cfg['hardwaremanagement.switchport']['value']
                if curswitch not in _switchportmap:
                    _switchportmap[curswitch] = {}
                if portname in _switchportmap[curswitch]:
                    log.log({
                        'warning':
                        'Duplicate switch topology config for '
                        '{0} and {1}'.format(
                            node, _switchportmap[curswitch][portname])
                    })
                _switchportmap[curswitch][portname] = node
    switchcfg = configmanager.get_node_attributes(
        switches,
        ('secret.hardwaremanagementuser', 'secret.hardwaremanagementpassword'),
        decrypt=True)
    switchauth = []
    for switch in switches:
        password = '******'
        user = None
        if (switch in switchcfg
                and 'secret.hardwaremanagementpassword' in switchcfg[switch]):
            password = switchcfg[switch]['secret.hardwaremanagementpassword'][
                'value']
            if 'secret.hardwaremanagementuser' in switchcfg[switch]:
                user = switchcfg[switch]['secret.hardwaremanagementuser'][
                    'value']
        switchauth.append((switch, password, user))
    pool = GreenPool()
    for res in pool.imap(_map_switch, switchauth):
        yield res
        print(repr(_macmap))
Example #11
0
def update_macmap(configmanager):
    """Interrogate switches to build/update mac table

    Begin a rebuild process.  This process is a generator that will yield
    as each switch interrogation completes, allowing a caller to
    recheck the cache as results become possible, rather
    than having to wait for the process to complete to interrogate.
    """
    global _macmap
    global _nodesbymac
    global _switchportmap
    # Clear all existing entries
    _macmap = {}
    _nodesbymac = {}
    _switchportmap = {}
    if configmanager.tenant is not None:
        raise exc.ForbiddenRequest('Network topology not available to tenants')
    nodelocations = configmanager.get_node_attributes(
        configmanager.list_nodes(), ('hardwaremanagement.switch',
                                     'hardwaremanagement.switchport'))
    switches = set([])
    for node in nodelocations:
        cfg = nodelocations[node]
        if 'hardwaremanagement.switch' in cfg:
            curswitch = cfg['hardwaremanagement.switch']['value']
            switches.add(curswitch)
            if 'hardwaremanagement.switchport' in cfg:
                portname = cfg['hardwaremanagement.switchport']['value']
                if curswitch not in _switchportmap:
                    _switchportmap[curswitch] = {}
                if portname in _switchportmap[curswitch]:
                    log.log({'warning': 'Duplicate switch topology config for '
                                        '{0} and {1}'.format(node,
                                                             _switchportmap[
                                                                 curswitch][
                                                                 portname])})
                _switchportmap[curswitch][portname] = node
    switchcfg = configmanager.get_node_attributes(
        switches, ('secret.hardwaremanagementuser',
                   'secret.hardwaremanagementpassword'), decrypt=True)
    switchauth = []
    for switch in switches:
        password = '******'
        user = None
        if (switch in switchcfg and
                'secret.hardwaremanagementpassword' in switchcfg[switch]):
            password = switchcfg[switch]['secret.hardwaremanagementpassword'][
                'value']
            if 'secret.hardwaremanagementuser' in switchcfg[switch]:
                user = switchcfg[switch]['secret.hardwaremanagementuser'][
                    'value']
        switchauth.append((switch, password, user))
    pool = GreenPool()
    for res in pool.imap(_map_switch, switchauth):
        yield res
        print(repr(_macmap))
Example #12
0
def get_leader(connection):
    if currentleader is None or connection.getpeername()[0] == currentleader:
        if currentleader is None:
            msg = 'Becoming leader as no leader known'
        else:
            msg = 'Becoming leader because {0} attempted to connect and it ' \
                  'is current leader'.format(currentleader)
        log.log({'info': msg, 'subsystem': 'collective'})
        become_leader(connection)
    return currentleader
Example #13
0
def start_collective():
    global follower
    global retrythread
    global initting
    initting = True
    retrythread = None
    try:
        cfm.membership_callback = schedule_rebalance
        if follower is not None:
            initting = False
            return
        try:
            if cfm.cfgstreams:
                cfm.check_quorum()
                # Do not start if we have quorum and are leader
                return
        except exc.DegradedCollective:
            pass
        if leader_init.active:  # do not start trying to connect if we are
            # xmitting data to a follower
            return
        myname = get_myname()
        connecto = []
        for member in sorted(list(cfm.list_collective())):
            if member == myname:
                continue
            if cfm.cfgleader is None:
                cfm.stop_following(True)
            ldrcandidate = cfm.get_collective_member(member)['address']
            connecto.append(ldrcandidate)
        conpool = greenpool.GreenPool(64)
        connections = conpool.imap(create_connection, connecto)
        for ent in connections:
            member, remote = ent
            if isinstance(remote, Exception):
                continue
            if follower is None:
                log.log({
                    'info':
                    'Performing startup attempt to {0}'.format(member),
                    'subsystem':
                    'collective'
                })
                if not connect_to_leader(
                        name=myname, leader=member, remote=remote):
                    remote.close()
            else:
                remote.close()
    except Exception as e:
        pass
    finally:
        if retrythread is None and follower is None:
            retrythread = eventlet.spawn_after(5 + random.random(),
                                               start_collective)
        initting = False
Example #14
0
def _recheck_single_unknown(configmanager, mac):
    global rechecker
    global rechecktime
    info = unknown_info.get(mac, None)
    if not info:
        return
    if info['handler'] != pxeh and not info.get('addresses', None):
        #log.log({'info': 'Missing address information in ' + repr(info)})
        return
    handler = info['handler'].NodeHandler(info, configmanager)
    if handler.https_supported and not handler.https_cert:
        if handler.cert_fail_reason == 'unreachable':
            log.log({
                'info':
                '{0} with hwaddr {1} is not reachable at {2}'
                ''.format(handler.devname, info['hwaddr'], handler.ipaddr)
            })
            # addresses data is bad, delete the offending ip
            info['addresses'] = [
                x for x in info.get('addresses', []) if x != handler.ipaddr
            ]
            # TODO(jjohnson2):  rescan due to bad peer addr data?
            # not just wait around for the next announce
            return
        log.log({
            'info':
            '{0} with hwaddr {1} at address {2} is not yet running '
            'https, will examine later'.format(handler.devname, info['hwaddr'],
                                               handler.ipaddr)
        })
        if rechecker is not None and rechecktime > util.monotonic_time() + 300:
            rechecker.cancel()
        # if cancel did not result in dead, then we are in progress
        if rechecker is None or rechecker.dead:
            rechecktime = util.monotonic_time() + 300
            rechecker = eventlet.spawn_after(300, _periodic_recheck,
                                             configmanager)
        return
    nodename = get_nodename(configmanager, handler, info)
    if nodename:
        if handler.https_supported:
            dp = configmanager.get_node_attributes(
                [nodename], ('pubkeys.tls_hardwaremanager', ))
            lastfp = dp.get(nodename, {}).get('pubkeys.tls_hardwaremanager',
                                              {}).get('value', None)
            if util.cert_matches(lastfp, handler.https_cert):
                info['nodename'] = nodename
                known_nodes[nodename][info['hwaddr']] = info
                info['discostatus'] = 'discovered'
                return  # already known, no need for more
        discopool.spawn_n(eval_node, configmanager, handler, info, nodename)
Example #15
0
def try_assimilate(drone):
    try:
        remote = connect_to_collective(None, drone)
    except socket.error:
        # Oh well, unable to connect, hopefully the rest will be
        # in order
        return
    tlvdata.send(remote, {'collective': {'operation': 'assimilate',
                                         'name': get_myname(),
                                         'txcount': cfm._txcount}})
    tlvdata.recv(remote)  # the banner
    tlvdata.recv(remote)  # authpassed... 0..
    answer = tlvdata.recv(remote)
    if not answer:
        log.log(
            {'error':
                 'No answer from {0} while trying to assimilate'.format(
                     drone),
            'subsystem': 'collective'})
        return
    if 'txcount' in answer:
        log.log({'info': 'Deferring to {0} due to transaction count'.format(
            drone), 'subsystem': 'collective'})
        connect_to_leader(None, None, leader=remote.getpeername()[0])
        return
    if 'error' in answer:
        log.log({
            'error': 'Error encountered while attempting to '
                     'assimilate {0}: {1}'.format(drone, answer['error']),
            'subsystem': 'collective'})
        return
    log.log({'info': 'Assimilated {0} into collective'.format(drone),
             'subsystem': 'collective'})
Example #16
0
def try_assimilate(drone, followcount, remote):
    global retrythread
    try:
        remote = connect_to_collective(None, drone, remote)
    except socket.error:
        # Oh well, unable to connect, hopefully the rest will be
        # in order
        return
    tlvdata.send(
        remote, {
            'collective': {
                'operation': 'assimilate',
                'name': get_myname(),
                'followcount': followcount,
                'txcount': cfm._txcount
            }
        })
    tlvdata.recv(remote)  # the banner
    tlvdata.recv(remote)  # authpassed... 0..
    answer = tlvdata.recv(remote)
    if not answer:
        log.log({
            'error':
            'No answer from {0} while trying to assimilate'.format(drone),
            'subsystem':
            'collective'
        })
        return True
    if 'txcount' in answer:
        log.log({
            'info':
            'Deferring to {0} due to target being a better leader'.format(
                drone),
            'subsystem':
            'collective'
        })
        retire_as_leader(drone)
        if not connect_to_leader(None, None, leader=remote.getpeername()[0]):
            if retrythread is None:
                retrythread = eventlet.spawn_after(random.random(),
                                                   start_collective)
        return False
    if 'leader' in answer:
        # Will wait for leader to see about assimilation
        return True
    if 'error' in answer:
        log.log({
            'error':
            'Error encountered while attempting to '
            'assimilate {0}: {1}'.format(drone, answer['error']),
            'subsystem':
            'collective'
        })
        return True
    log.log({
        'info': 'Assimilated {0} into collective'.format(drone),
        'subsystem': 'collective'
    })
    return True
Example #17
0
def _periodic_recheck(configmanager):
    global rechecker
    global rechecktime
    rechecker = None
    try:
        _recheck_nodes((), configmanager)
    except Exception:
        traceback.print_exc()
        log.log({'error': 'Unexpected error during discovery, check debug '
                          'logs'})
    # if rechecker is set, it means that an accelerated schedule
    # for rechecker was requested in the course of recheck_nodes
    if rechecker is None:
        rechecktime = util.monotonic_time() + 900
        rechecker = eventlet.spawn_after(900, _periodic_recheck,
                                         configmanager)
Example #18
0
def become_leader(connection):
    global currentleader
    global follower
    global retrythread
    global reassimilate
    log.log({
        'info': 'Becoming leader of collective',
        'subsystem': 'collective'
    })
    if follower is not None:
        follower.kill()
        cfm.stop_following()
        follower = None
    if retrythread is not None:
        retrythread.cancel()
        retrythread = None
    currentleader = connection.getsockname()[0]
    skipaddr = connection.getpeername()[0]
    if _assimilate_missing(skipaddr):
        schedule_rebalance()
        if reassimilate is not None:
            reassimilate.kill()
        reassimilate = eventlet.spawn(reassimilate_missing)
Example #19
0
def _map_switch(args):
    try:
        return _map_switch_backend(args)
    except (UnicodeError, socket.gaierror):
        log.log({'error': "Cannot resolve switch '{0}' to an address".format(
            args[0])})
    except exc.TargetEndpointUnreachable:
        log.log({'error': "Timeout or bad SNMPv1 community string trying to "
                         "reach switch '{0}'".format(
            args[0])})
    except exc.TargetEndpointBadCredentials:
        log.log({'error': "Bad SNMPv3 credentials for \'{0}\'".format(
            args[0])})
    except Exception as e:
        log.log({'error': 'Unexpected condition trying to reach switch "{0}"'
                          ' check trace log for more'.format(args[0])})
        log.logtrace()
Example #20
0
def follow_leader(remote, leader):
    global currentleader
    global retrythread
    global follower
    cleanexit = False
    newleader = None
    try:
        exitcause = cfm.follow_channel(remote)
        newleader = exitcause.get('newleader', None)
    except greenlet.GreenletExit:
        cleanexit = True
    finally:
        if cleanexit:
            log.log({
                'info': 'Previous following cleanly closed',
                'subsystem': 'collective'
            })
            return
        if newleader:
            log.log({
                'info':
                'Previous leader directed us to join new leader {}'.format(
                    newleader)
            })
            if connect_to_leader(None, get_myname(), newleader):
                return
        log.log({
            'info':
            'Current leader ({0}) has disappeared, restarting '
            'collective membership'.format(leader),
            'subsystem':
            'collective'
        })
        # The leader has folded, time to startup again...
        follower = None
        cfm.stop_following()
        currentleader = None
        if retrythread is None:  # start a recovery
            retrythread = eventlet.spawn_after(random.random(),
                                               start_collective)
Example #21
0
def _full_updatemacmap(configmanager):
    global vintage
    global _macmap
    global _nodesbymac
    global _switchportmap
    global _macsbyswitch
    with mapupdating:
        vintage = util.monotonic_time()
        # Clear all existing entries
        _macmap = {}
        _nodesbymac = {}
        _switchportmap = {}
        _macsbyswitch = {}
        if configmanager.tenant is not None:
            raise exc.ForbiddenRequest(
                'Network topology not available to tenants')
        nodelocations = configmanager.get_node_attributes(
            configmanager.list_nodes(), ('net*.switch', 'net*.switchport'))
        switches = set([])
        for node in nodelocations:
            cfg = nodelocations[node]
            for attr in cfg:
                if not attr.endswith('.switch') or 'value' not in cfg[attr]:
                    continue
                curswitch = cfg[attr].get('value', None)
                if not curswitch:
                    continue
                switches.add(curswitch)
                switchportattr = attr + 'port'
                if switchportattr in cfg:
                    portname = cfg[switchportattr].get('value', '')
                    if not portname:
                        continue
                    if curswitch not in _switchportmap:
                        _switchportmap[curswitch] = {}
                    if portname in _switchportmap[curswitch]:
                        log.log({
                            'error':
                            'Duplicate switch topology config '
                            'for {0} and {1}'.format(
                                node, _switchportmap[curswitch][portname])
                        })
                        _switchportmap[curswitch][portname] = None
                    else:
                        _switchportmap[curswitch][portname] = node
        switchcfg = configmanager.get_node_attributes(
            switches, ('secret.hardwaremanagementuser', 'secret.snmpcommunity',
                       'secret.hardwaremanagementpassword'),
            decrypt=True)
        switchauth = []
        for switch in switches:
            if not switch:
                continue
            switchparms = switchcfg.get(switch, {})
            user = None
            password = switchparms.get('secret.snmpcommunity',
                                       {}).get('value', None)
            if not password:
                password = switchparms.get('secret.hardwaremanagementpassword',
                                           {}).get('value', 'public')
                user = switchparms.get('secret.hardwaremanagementuser',
                                       {}).get('value', None)
            switchauth.append((switch, password, user))
        pool = GreenPool()
        for ans in pool.imap(_map_switch, switchauth):
            vintage = util.monotonic_time()
            yield ans
Example #22
0
def _map_switch_backend(args):
    """Manipulate portions of mac address map relevant to a given switch
    """

    # 1.3.6.1.2.1.17.7.1.2.2.1.2 - mactoindex (qbridge - preferred)
    #  if not, check for cisco and if cisco, build list of all relevant vlans:
    #  .1.3.6.1.4.1.9.9.46.1.6.1.1.5 - trunk port vlan map (cisco only)
    #  .1.3.6.1.4.1.9.9.68.1.2.2.1.2 - access port vlan map (cisco only)
    # if cisco, vlan community string indexed or snmpv3 contest for:
    # 1.3.6.1.2.1.17.4.3.1.2 - mactoindx (bridge - low-end switches and cisco)
    #     .1.3.6.1.2.1.17.1.4.1.2 - bridge index to if index map
    # no vlan index or context for:
    #  .1.3.6.1.2.1.31.1.1.1.1 - ifName... but some switches don't do it
    #  .1.3.6.1.2.1.2.2.1.2 - ifDescr, usually useless, but a
    #   fallback if ifName is empty
    #
    global _macmap
    if len(args) == 3:
        switch, password, user = args
        if not user:
            user = None
    else:
        switch, password = args
        user = None
    haveqbridge = False
    mactobridge = {}
    conn = snmp.Session(switch, password, user)
    for vb in conn.walk('1.3.6.1.2.1.17.7.1.2.2.1.2'):
        haveqbridge = True
        oid, bridgeport = vb
        if not bridgeport:
            continue
        oid = str(oid).rsplit('.', 6)  # if 7, then oid[1] would be vlan id
        macaddr = '{0:02x}:{1:02x}:{2:02x}:{3:02x}:{4:02x}:{5:02x}'.format(
            *([int(x) for x in oid[-6:]]))
        mactobridge[macaddr] = int(bridgeport)
    if not haveqbridge:
        for vb in conn.walk('1.3.6.1.2.1.17.4.3.1.2'):
            oid, bridgeport = vb
            if not bridgeport:
                continue
            oid = str(oid).rsplit('.', 6)
            macaddr = '{0:02x}:{1:02x}:{2:02x}:{3:02x}:{4:02x}:{5:02x}'.format(
                *([int(x) for x in oid[-6:]]))
            mactobridge[macaddr] = int(bridgeport)
    bridgetoifmap = {}
    for vb in conn.walk('1.3.6.1.2.1.17.1.4.1.2'):
        bridgeport, ifidx = vb
        bridgeport = int(str(bridgeport).rsplit('.', 1)[1])
        try:
            bridgetoifmap[bridgeport] = int(ifidx)
        except ValueError:
            # ifidx might be '', skip in such a case
            continue
    ifnamemap = {}
    havenames = False
    for vb in conn.walk('1.3.6.1.2.1.31.1.1.1.1'):
        ifidx, ifname = vb
        if not ifname:
            continue
        havenames = True
        ifidx = int(str(ifidx).rsplit('.', 1)[1])
        ifnamemap[ifidx] = str(ifname)
    if not havenames:
        for vb in conn.walk('1.3.6.1.2.1.2.2.1.2'):
            ifidx, ifname = vb
            ifidx = int(str(ifidx).rsplit('.', 1)[1])
            ifnamemap[ifidx] = str(ifname)
    maccounts = {}
    bridgetoifvalid = False
    for mac in mactobridge:
        try:
            ifname = ifnamemap[bridgetoifmap[mactobridge[mac]]]
            bridgetoifvalid = True
        except KeyError:
            continue
        if ifname not in maccounts:
            maccounts[ifname] = 1
        else:
            maccounts[ifname] += 1
    if not bridgetoifvalid:
        bridgetoifmap = {}
        # Not a single mac address resolved to an interface index, chances are
        # that the switch is broken, and the mactobridge is reporting ifidx
        # instead of bridge port index
        # try again, skipping the bridgetoifmap lookup
        for mac in mactobridge:
            try:
                ifname = ifnamemap[mactobridge[mac]]
                bridgetoifmap[mactobridge[mac]] = mactobridge[mac]
            except KeyError:
                continue
            if ifname not in maccounts:
                maccounts[ifname] = 1
            else:
                maccounts[ifname] += 1
    _macsbyswitch[switch] = {}
    for mac in mactobridge:
        # We want to merge it so that when a mac appears in multiple
        # places, it is captured.
        try:
            ifname = ifnamemap[bridgetoifmap[mactobridge[mac]]]
        except KeyError:
            continue
        if mac in _macmap:
            _macmap[mac].append((switch, ifname, maccounts[ifname]))
        else:
            _macmap[mac] = [(switch, ifname, maccounts[ifname])]
        if ifname in _macsbyswitch[switch]:
            _macsbyswitch[switch][ifname].append(mac)
        else:
            _macsbyswitch[switch][ifname] = [mac]
        nodename = _nodelookup(switch, ifname)
        if nodename is not None:
            if mac in _nodesbymac and _nodesbymac[mac] != nodename:
                # For example, listed on both a real edge port
                # and by accident a trunk port
                log.log({
                    'error':
                    '{0} and {1} described by ambiguous'
                    ' switch topology values'.format(nodename,
                                                     _nodesbymac[mac])
                })
                _nodesbymac[mac] = None
            else:
                _nodesbymac[mac] = nodename
Example #23
0
def _map_switch_backend(args):
    """Manipulate portions of mac address map relevant to a given switch
    """

    # 1.3.6.1.2.1.17.7.1.2.2.1.2 - mactoindex (qbridge - preferred)
    #  if not, check for cisco and if cisco, build list of all relevant vlans:
    #  .1.3.6.1.4.1.9.9.46.1.6.1.1.5 - trunk port vlan map (cisco only)
    #  .1.3.6.1.4.1.9.9.68.1.2.2.1.2 - access port vlan map (cisco only)
    # if cisco, vlan community string indexed or snmpv3 contest for:
    # 1.3.6.1.2.1.17.4.3.1.2 - mactoindx (bridge - low-end switches and cisco)
    #     .1.3.6.1.2.1.17.1.4.1.2 - bridge index to if index map
    # no vlan index or context for:
    #  .1.3.6.1.2.1.31.1.1.1.1 - ifName... but some switches don't do it
    #  .1.3.6.1.2.1.2.2.1.2 - ifDescr, usually useless, but a
    #   fallback if ifName is empty
    #
    global _macmap
    if len(args) == 4:
        switch, password, user, _ = args  # 4th arg is for affluent only
        if not user:
            user = None
    else:
        switch, password = args
        user = None
    if switch not in noaffluent:
        try:
            return _affluent_map_switch(args)
        except Exception:
            pass
    mactobridge, ifnamemap, bridgetoifmap = _offload_map_switch(
        switch, password, user)
    maccounts = {}
    bridgetoifvalid = False
    for mac in mactobridge:
        try:
            ifname = ifnamemap[bridgetoifmap[mactobridge[mac]]]
            bridgetoifvalid = True
        except KeyError:
            continue
        if ifname not in maccounts:
            maccounts[ifname] = 1
        else:
            maccounts[ifname] += 1
    if not bridgetoifvalid:
        bridgetoifmap = {}
        # Not a single mac address resolved to an interface index, chances are
        # that the switch is broken, and the mactobridge is reporting ifidx
        # instead of bridge port index
        # try again, skipping the bridgetoifmap lookup
        for mac in mactobridge:
            try:
                ifname = ifnamemap[mactobridge[mac]]
                bridgetoifmap[mactobridge[mac]] = mactobridge[mac]
            except KeyError:
                continue
            if ifname not in maccounts:
                maccounts[ifname] = 1
            else:
                maccounts[ifname] += 1
    newmacs = {}
    noaffluent.add(switch)
    for mac in mactobridge:
        # We want to merge it so that when a mac appears in multiple
        # places, it is captured.
        try:
            ifname = ifnamemap[bridgetoifmap[mactobridge[mac]]]
        except KeyError:
            continue
        if mac in _macmap:
            _macmap[mac].append((switch, ifname, maccounts[ifname]))
        else:
            _macmap[mac] = [(switch, ifname, maccounts[ifname])]
        if ifname in newmacs:
            newmacs[ifname].append(mac)
        else:
            newmacs[ifname] = [mac]
        nodename = _nodelookup(switch, ifname)
        if nodename is not None:
            if mac in _nodesbymac and _nodesbymac[mac][0] != nodename:
                # For example, listed on both a real edge port
                # and by accident a trunk port
                log.log({
                    'error':
                    '{0} and {1} described by ambiguous'
                    ' switch topology values'.format(nodename,
                                                     _nodesbymac[mac][0])
                })
                _nodesbymac[mac] = (None, None)
            else:
                _nodesbymac[mac] = (nodename, maccounts[ifname])
    _macsbyswitch[switch] = newmacs
Example #24
0
def _map_switch_backend(args):
    """Manipulate portions of mac address map relevant to a given switch
    """

    # 1.3.6.1.2.1.17.7.1.2.2.1.2 - mactoindex (qbridge - preferred)
    #  if not, check for cisco and if cisco, build list of all relevant vlans:
    #  .1.3.6.1.4.1.9.9.46.1.6.1.1.5 - trunk port vlan map (cisco only)
    #  .1.3.6.1.4.1.9.9.68.1.2.2.1.2 - access port vlan map (cisco only)
    # if cisco, vlan community string indexed or snmpv3 contest for:
    # 1.3.6.1.2.1.17.4.3.1.2 - mactoindx (bridge - low-end switches and cisco)
    #     .1.3.6.1.2.1.17.1.4.1.2 - bridge index to if index map
    # no vlan index or context for:
    #  .1.3.6.1.2.1.31.1.1.1.1 - ifName... but some switches don't do it
    #  .1.3.6.1.2.1.2.2.1.2 - ifDescr, usually useless, but a
    #   fallback if ifName is empty
    #
    global _macmap
    switch, password, user = args
    haveqbridge = False
    mactobridge = {}
    conn = snmp.Session(switch, password, user)
    for vb in conn.walk('1.3.6.1.2.1.17.7.1.2.2.1.2'):
        haveqbridge = True
        oid, bridgeport = vb
        if not bridgeport:
            continue
        oid = str(oid).rsplit('.', 6)  # if 7, then oid[1] would be vlan id
        macaddr = '{0:02x}:{1:02x}:{2:02x}:{3:02x}:{4:02x}:{5:02x}'.format(
            *([int(x) for x in oid[-6:]]))
        mactobridge[macaddr] = int(bridgeport)
    if not haveqbridge:
        raise exc.NotImplementedException('TODO: Bridge-MIB without QBRIDGE')
    bridgetoifmap = {}
    for vb in conn.walk('1.3.6.1.2.1.17.1.4.1.2'):
        bridgeport, ifidx = vb
        bridgeport = int(str(bridgeport).rsplit('.', 1)[1])
        bridgetoifmap[bridgeport] = int(ifidx)
    ifnamemap = {}
    havenames = False
    for vb in conn.walk('1.3.6.1.2.1.31.1.1.1.1'):
        ifidx, ifname = vb
        if not ifname:
            continue
        havenames = True
        ifidx = int(str(ifidx).rsplit('.', 1)[1])
        ifnamemap[ifidx] = str(ifname)
    if not havenames:
        for vb in conn.walk('1.3.6.1.2.1.2.2.1.2'):
            ifidx, ifname = vb
            ifidx = int(str(ifidx).rsplit('.', 1)[1])
            ifnamemap[ifidx] = str(ifname)
    maccounts = {}
    for mac in mactobridge:
        ifname = ifnamemap[bridgetoifmap[mactobridge[mac]]]
        if ifname not in maccounts:
            maccounts[ifname] = 1
        else:
            maccounts[ifname] += 1
    _macsbyswitch[switch] = {}
    for mac in mactobridge:
        # We want to merge it so that when a mac appears in multiple
        # places, it is captured.
        ifname = ifnamemap[bridgetoifmap[mactobridge[mac]]]
        if mac in _macmap:
            _macmap[mac].append((switch, ifname, maccounts[ifname]))
        else:
            _macmap[mac] = [(switch, ifname, maccounts[ifname])]
        if ifname in _macsbyswitch[switch]:
            _macsbyswitch[switch][ifname].append(mac)
        else:
            _macsbyswitch[switch][ifname] = [mac]
        nodename = _nodelookup(switch, ifname)
        if nodename is not None:
            if mac in _nodesbymac and _nodesbymac[mac] != nodename:
                log.log({
                    'warning':
                    '{0} and {1} described by ambiguous'
                    ' switch topology values'.format(nodename,
                                                     _nodesbymac[mac])
                })
            _nodesbymac[mac] = nodename
Example #25
0
def connect_to_leader(cert=None, name=None, leader=None):
    global currentleader
    global follower
    if leader is None:
        leader = currentleader
    log.log({
        'info': 'Attempting connection to leader {0}'.format(leader),
        'subsystem': 'collective'
    })
    try:
        remote = connect_to_collective(cert, leader)
    except socket.error as e:
        log.log({
            'error':
            'Collective connection attempt to {0} failed: {1}'
            ''.format(leader, str(e)),
            'subsystem':
            'collective'
        })
        return False
    with connecting:
        with cfm._initlock:
            banner = tlvdata.recv(remote)  # the banner
            vers = banner.split()[2]
            if vers != b'v2':
                raise Exception(
                    'This instance only supports protocol 2, synchronize versions between collective members'
                )
            tlvdata.recv(remote)  # authpassed... 0..
            if name is None:
                name = get_myname()
            tlvdata.send(
                remote, {
                    'collective': {
                        'operation': 'connect',
                        'name': name,
                        'txcount': cfm._txcount
                    }
                })
            keydata = tlvdata.recv(remote)
            if not keydata:
                return False
            if 'error' in keydata:
                if 'backoff' in keydata:
                    log.log({
                        'info':
                        'Collective initialization in progress on '
                        '{0}'.format(leader),
                        'subsystem':
                        'collective'
                    })
                    return False
                if 'leader' in keydata:
                    log.log({
                        'info':
                        'Prospective leader {0} has redirected this '
                        'member to {1}'.format(leader, keydata['leader']),
                        'subsystem':
                        'collective'
                    })
                    ldrc = cfm.get_collective_member_by_address(
                        keydata['leader'])
                    if ldrc and ldrc['name'] == name:
                        raise Exception("Redirected to self")
                    return connect_to_leader(name=name,
                                             leader=keydata['leader'])
                if 'txcount' in keydata:
                    log.log({
                        'info':
                        'Prospective leader {0} has inferior '
                        'transaction count, becoming leader'
                        ''.format(leader),
                        'subsystem':
                        'collective',
                        'subsystem':
                        'collective'
                    })
                    return become_leader(remote)
                return False
                follower.kill()
                cfm.stop_following()
                follower = None
            if follower:
                follower.kill()
                cfm.stop_following()
                follower = None
            log.log({
                'info': 'Following leader {0}'.format(leader),
                'subsystem': 'collective'
            })
            colldata = tlvdata.recv(remote)
            # the protocol transmits global data, but for now we ignore it
            globaldata = tlvdata.recv(remote)
            dbi = tlvdata.recv(remote)
            dbsize = dbi['dbsize']
            dbjson = b''
            while (len(dbjson) < dbsize):
                ndata = remote.recv(dbsize - len(dbjson))
                if not ndata:
                    try:
                        remote.close()
                    except Exception:
                        pass
                    raise Exception("Error doing initial DB transfer")
                dbjson += ndata
            cfm.clear_configuration()
            try:
                cfm._restore_keys(keydata, None, sync=False)
                for c in colldata:
                    cfm._true_add_collective_member(c,
                                                    colldata[c]['address'],
                                                    colldata[c]['fingerprint'],
                                                    sync=False)
                #for globvar in globaldata:
                #    cfm.set_global(globvar, globaldata[globvar], False)
                cfm._txcount = dbi.get('txcount', 0)
                cfm.ConfigManager(tenant=None)._load_from_json(dbjson,
                                                               sync=False)
                cfm.commit_clear()
            except Exception:
                cfm.stop_following()
                cfm.rollback_clear()
                raise
            currentleader = leader
        #spawn this as a thread...
        follower = eventlet.spawn(follow_leader, remote, leader)
    return True
Example #26
0
def handle_connection(connection, cert, request, local=False):
    global currentleader
    global retrythread
    operation = request['operation']
    if cert:
        cert = crypto.dump_certificate(crypto.FILETYPE_ASN1, cert)
    else:
        if not local:
            return
        if operation in ('show', 'delete'):
            if not list(cfm.list_collective()):
                tlvdata.send(
                    connection, {
                        'collective': {
                            'error':
                            'Collective mode not '
                            'enabled on this '
                            'system'
                        }
                    })
                return
            if follower:
                linfo = cfm.get_collective_member_by_address(currentleader)
                remote = socket.create_connection((currentleader, 13001))
                remote = ssl.wrap_socket(remote,
                                         cert_reqs=ssl.CERT_NONE,
                                         keyfile='/etc/confluent/privkey.pem',
                                         certfile='/etc/confluent/srvcert.pem')
                cert = remote.getpeercert(binary_form=True)
                if not (linfo
                        and util.cert_matches(linfo['fingerprint'], cert)):
                    remote.close()
                    tlvdata.send(connection, {
                        'error':
                        'Invalid certificate, '
                        'redo invitation process'
                    })
                    connection.close()
                    return
                tlvdata.recv(remote)  # ignore banner
                tlvdata.recv(remote)  # ignore authpassed: 0
                tlvdata.send(remote, {
                    'collective': {
                        'operation': 'getinfo',
                        'name': get_myname()
                    }
                })
                collinfo = tlvdata.recv(remote)
            else:
                collinfo = {}
                populate_collinfo(collinfo)
            try:
                cfm.check_quorum()
                collinfo['quorum'] = True
            except exc.DegradedCollective:
                collinfo['quorum'] = False
            if operation == 'show':
                tlvdata.send(connection, {'collective': collinfo})
            elif operation == 'delete':
                todelete = request['member']
                if (todelete == collinfo['leader']
                        or todelete in collinfo['active']):
                    tlvdata.send(
                        connection, {
                            'collective': {
                                'error':
                                '{0} is still active, stop the confluent service to remove it'
                                .format(todelete)
                            }
                        })
                    return
                if todelete not in collinfo['offline']:
                    tlvdata.send(
                        connection, {
                            'collective': {
                                'error':
                                '{0} is not a recognized collective member'.
                                format(todelete)
                            }
                        })
                    return
                cfm.del_collective_member(todelete)
                tlvdata.send(
                    connection, {
                        'collective': {
                            'status':
                            'Successfully deleted {0}'.format(todelete)
                        }
                    })
                connection.close()
            return
        if 'invite' == operation:
            try:
                cfm.check_quorum()
            except exc.DegradedCollective:
                tlvdata.send(connection, {
                    'collective': {
                        'error': 'Collective does not have quorum'
                    }
                })
                return
            #TODO(jjohnson2): Cannot do the invitation if not the head node, the certificate hand-carrying
            #can't work in such a case.
            name = request['name']
            invitation = invites.create_server_invitation(name)
            tlvdata.send(connection,
                         {'collective': {
                             'invitation': invitation
                         }})
            connection.close()
        if 'join' == operation:
            invitation = request['invitation']
            try:
                invitation = base64.b64decode(invitation)
                name, invitation = invitation.split(b'@', 1)
                name = util.stringify(name)
            except Exception:
                tlvdata.send(
                    connection,
                    {'collective': {
                        'status': 'Invalid token format'
                    }})
                connection.close()
                return
            host = request['server']
            try:
                remote = socket.create_connection((host, 13001))
                # This isn't what it looks like.  We do CERT_NONE to disable
                # openssl verification, but then use the invitation as a
                # shared secret to validate the certs as part of the join
                # operation
                remote = ssl.wrap_socket(remote,
                                         cert_reqs=ssl.CERT_NONE,
                                         keyfile='/etc/confluent/privkey.pem',
                                         certfile='/etc/confluent/srvcert.pem')
            except Exception:
                tlvdata.send(
                    connection, {
                        'collective': {
                            'status': 'Failed to connect to {0}'.format(host)
                        }
                    })
                connection.close()
                return
            mycert = util.get_certificate_from_file(
                '/etc/confluent/srvcert.pem')
            cert = remote.getpeercert(binary_form=True)
            proof = base64.b64encode(
                invites.create_client_proof(invitation, mycert, cert))
            tlvdata.recv(remote)  # ignore banner
            tlvdata.recv(remote)  # ignore authpassed: 0
            tlvdata.send(remote, {
                'collective': {
                    'operation': 'enroll',
                    'name': name,
                    'hmac': proof
                }
            })
            rsp = tlvdata.recv(remote)
            if 'error' in rsp:
                tlvdata.send(connection,
                             {'collective': {
                                 'status': rsp['error']
                             }})
                connection.close()
                return
            proof = rsp['collective']['approval']
            proof = base64.b64decode(proof)
            j = invites.check_server_proof(invitation, mycert, cert, proof)
            if not j:
                remote.close()
                tlvdata.send(connection,
                             {'collective': {
                                 'status': 'Bad server token'
                             }})
                connection.close()
                return
            tlvdata.send(connection, {'collective': {'status': 'Success'}})
            connection.close()
            currentleader = rsp['collective']['leader']
            f = open('/etc/confluent/cfg/myname', 'w')
            f.write(name)
            f.close()
            log.log({
                'info': 'Connecting to collective due to join',
                'subsystem': 'collective'
            })
            eventlet.spawn_n(connect_to_leader,
                             rsp['collective']['fingerprint'], name)
    if 'enroll' == operation:
        #TODO(jjohnson2): error appropriately when asked to enroll, but the master is elsewhere
        mycert = util.get_certificate_from_file('/etc/confluent/srvcert.pem')
        proof = base64.b64decode(request['hmac'])
        myrsp = invites.check_client_proof(request['name'], mycert, cert,
                                           proof)
        if not myrsp:
            tlvdata.send(connection, {'error': 'Invalid token'})
            connection.close()
            return
        myrsp = base64.b64encode(myrsp)
        fprint = util.get_fingerprint(cert)
        myfprint = util.get_fingerprint(mycert)
        cfm.add_collective_member(get_myname(),
                                  connection.getsockname()[0], myfprint)
        cfm.add_collective_member(request['name'],
                                  connection.getpeername()[0], fprint)
        myleader = get_leader(connection)
        ldrfprint = cfm.get_collective_member_by_address(
            myleader)['fingerprint']
        tlvdata.send(
            connection, {
                'collective': {
                    'approval': myrsp,
                    'fingerprint': ldrfprint,
                    'leader': get_leader(connection)
                }
            })
    if 'assimilate' == operation:
        drone = request['name']
        droneinfo = cfm.get_collective_member(drone)
        if not droneinfo:
            tlvdata.send(
                connection,
                {'error': 'Unrecognized leader, '
                 'redo invitation process'})
            return
        if not util.cert_matches(droneinfo['fingerprint'], cert):
            tlvdata.send(
                connection,
                {'error': 'Invalid certificate, '
                 'redo invitation process'})
            return
        if request['txcount'] < cfm._txcount:
            tlvdata.send(
                connection, {
                    'error': 'Refusing to be assimilated by inferior'
                    'transaction count',
                    'txcount': cfm._txcount,
                })
            return
        if connecting.active:
            # don't try to connect while actively already trying to connect
            tlvdata.send(connection, {'status': 0})
            connection.close()
            return
        if (currentleader == connection.getpeername()[0] and follower
                and not follower.dead):
            # if we are happily following this leader already, don't stir
            # the pot
            tlvdata.send(connection, {'status': 0})
            connection.close()
            return
        log.log({
            'info': 'Connecting in response to assimilation',
            'subsystem': 'collective'
        })
        eventlet.spawn_n(connect_to_leader,
                         None,
                         None,
                         leader=connection.getpeername()[0])
        tlvdata.send(connection, {'status': 0})
        connection.close()
    if 'getinfo' == operation:
        drone = request['name']
        droneinfo = cfm.get_collective_member(drone)
        if not (droneinfo
                and util.cert_matches(droneinfo['fingerprint'], cert)):
            tlvdata.send(
                connection,
                {'error': 'Invalid certificate, '
                 'redo invitation process'})
            connection.close()
            return
        collinfo = {}
        populate_collinfo(collinfo)
        tlvdata.send(connection, collinfo)
    if 'connect' == operation:
        drone = request['name']
        droneinfo = cfm.get_collective_member(drone)
        if not (droneinfo
                and util.cert_matches(droneinfo['fingerprint'], cert)):
            tlvdata.send(
                connection,
                {'error': 'Invalid certificate, '
                 'redo invitation process'})
            connection.close()
            return
        myself = connection.getsockname()[0]
        if connecting.active:
            tlvdata.send(connection, {
                'error': 'Connecting right now',
                'backoff': True
            })
            connection.close()
            return
        if myself != get_leader(connection):
            tlvdata.send(
                connection, {
                    'error': 'Cannot assimilate, our leader is '
                    'in another castle',
                    'leader': currentleader
                })
            connection.close()
            return
        if request['txcount'] > cfm._txcount:
            retire_as_leader()
            tlvdata.send(
                connection, {
                    'error': 'Client has higher tranasaction count, '
                    'should assimilate me, connecting..',
                    'txcount': cfm._txcount
                })
            log.log({
                'info': 'Connecting to leader due to superior '
                'transaction count',
                'subsystem': collective
            })
            eventlet.spawn_n(connect_to_leader, None, None,
                             connection.getpeername()[0])
            connection.close()
            return
        if retrythread:
            retrythread.cancel()
            retrythread = None
        with leader_init:
            cfm.update_collective_address(request['name'],
                                          connection.getpeername()[0])
            tlvdata.send(connection, cfm._dump_keys(None, False))
            tlvdata.send(connection, cfm._cfgstore['collective'])
            tlvdata.send(connection, {})  # cfm.get_globals())
            cfgdata = cfm.ConfigManager(None)._dump_to_json()
            tlvdata.send(connection, {
                'txcount': cfm._txcount,
                'dbsize': len(cfgdata)
            })
            connection.sendall(cfgdata)
        #tlvdata.send(connection, {'tenants': 0}) # skip the tenants for now,
        # so far unused anyway
        if not cfm.relay_slaved_requests(drone, connection):
            if not retrythread:  # start a recovery if everyone else seems
                # to have disappeared
                retrythread = eventlet.spawn_after(30 + random.random(),
                                                   start_collective)
Example #27
0
def _map_switch_backend(args):
    """Manipulate portions of mac address map relevant to a given switch
    """

    # 1.3.6.1.2.1.17.7.1.2.2.1.2 - mactoindex (qbridge - preferred)
    #  if not, check for cisco and if cisco, build list of all relevant vlans:
    #  .1.3.6.1.4.1.9.9.46.1.6.1.1.5 - trunk port vlan map (cisco only)
    #  .1.3.6.1.4.1.9.9.68.1.2.2.1.2 - access port vlan map (cisco only)
    # if cisco, vlan community string indexed or snmpv3 contest for:
    # 1.3.6.1.2.1.17.4.3.1.2 - mactoindx (bridge - low-end switches and cisco)
    #     .1.3.6.1.2.1.17.1.4.1.2 - bridge index to if index map
    # no vlan index or context for:
    #  .1.3.6.1.2.1.31.1.1.1.1 - ifName... but some switches don't do it
    #  .1.3.6.1.2.1.2.2.1.2 - ifDescr, usually useless, but a
    #   fallback if ifName is empty
    #
    global _macmap
    switch, password, user = args
    haveqbridge = False
    mactobridge = {}
    conn = snmp.Session(switch, password, user)
    for vb in conn.walk('1.3.6.1.2.1.17.7.1.2.2.1.2'):
        haveqbridge = True
        oid, bridgeport = vb
        if not bridgeport:
            continue
        oid = str(oid).rsplit('.', 6)  # if 7, then oid[1] would be vlan id
        macaddr = '{0:02x}:{1:02x}:{2:02x}:{3:02x}:{4:02x}:{5:02x}'.format(
            *([int(x) for x in oid[-6:]])
        )
        mactobridge[macaddr] = int(bridgeport)
    if not haveqbridge:
        raise exc.NotImplementedException('TODO: Bridge-MIB without QBRIDGE')
    bridgetoifmap = {}
    for vb in conn.walk('1.3.6.1.2.1.17.1.4.1.2'):
        bridgeport, ifidx = vb
        bridgeport = int(str(bridgeport).rsplit('.', 1)[1])
        bridgetoifmap[bridgeport] = int(ifidx)
    ifnamemap = {}
    havenames = False
    for vb in conn.walk('1.3.6.1.2.1.31.1.1.1.1'):
        ifidx, ifname = vb
        if not ifname:
            continue
        havenames = True
        ifidx = int(str(ifidx).rsplit('.', 1)[1])
        ifnamemap[ifidx] = str(ifname)
    if not havenames:
        for vb in conn.walk( '1.3.6.1.2.1.2.2.1.2'):
            ifidx, ifname = vb
            ifidx = int(str(ifidx).rsplit('.', 1)[1])
            ifnamemap[ifidx] = str(ifname)
    maccounts = {}
    for mac in mactobridge:
        ifname = ifnamemap[bridgetoifmap[mactobridge[mac]]]
        if ifname not in maccounts:
            maccounts[ifname] = 1
        else:
            maccounts[ifname] += 1
    _macsbyswitch[switch] = {}
    for mac in mactobridge:
        # We want to merge it so that when a mac appears in multiple
        # places, it is captured.
        ifname = ifnamemap[bridgetoifmap[mactobridge[mac]]]
        if mac in _macmap:
            _macmap[mac].append((switch, ifname, maccounts[ifname]))
        else:
            _macmap[mac] = [(switch, ifname, maccounts[ifname])]
        if ifname in _macsbyswitch[switch]:
            _macsbyswitch[switch][ifname].append(mac)
        else:
            _macsbyswitch[switch][ifname] = [mac]
        nodename = _nodelookup(switch, ifname)
        if nodename is not None:
            if mac in _nodesbymac and _nodesbymac[mac] != nodename:
                log.log({'warning': '{0} and {1} described by ambiguous'
                                    ' switch topology values'.format(nodename,
                                                              _nodesbymac[mac]
                                                              )})
            _nodesbymac[mac] = nodename
Example #28
0
def check_reply(node, info, packet, sock, cfg, reqview):
    httpboot = info['architecture'] == 'uefi-httpboot'
    replen = 275  # default is going to be 286
    cfd = cfg.get_node_attributes(node, ('deployment.*'))
    profile = cfd.get(node, {}).get('deployment.pendingprofile',
                                    {}).get('value', None)
    myipn = info['netinfo']['recvip']
    myipn = socket.inet_aton(myipn)
    if not profile:
        return
    rqtype = packet[53][0]
    insecuremode = cfd.get(node, {}).get('deployment.useinsecureprotocols',
                                         'never')
    if not insecuremode:
        insecuremode = 'never'
    if insecuremode == 'never' and not httpboot:
        if rqtype == 1 and info['architecture']:
            log.log({
                'info':
                'Boot attempt by {0} detected in insecure mode, but '
                'insecure mode is disabled.  Set the attribute '
                '`deployment.useinsecureprotocols` to `firmware` or '
                '`always` to enable support, or use UEFI HTTP boot '
                'with HTTPS.'.format(node)
            })
        return
    reply = bytearray(512)
    repview = memoryview(reply)
    repview[:20] = iphdr
    repview[12:16] = myipn
    repview[20:28] = udphdr
    repview = repview[28:]
    repview[0:1] = b'\x02'
    repview[1:10] = reqview[1:10]  # duplicate txid, hwlen, and others
    repview[10:11] = b'\x80'  # always set broadcast
    repview[28:44] = reqview[28:44]  # copy chaddr field
    if httpboot:
        proto = 'https' if insecuremode == 'never' else 'http'
        bootfile = '{0}://{1}/confluent-public/os/{2}/boot.img'.format(
            proto, info['netinfo']['recvip'], profile)
        if not isinstance(bootfile, bytes):
            bootfile = bootfile.encode('utf8')
        repview[108:108 + len(bootfile)] = bootfile
    repview[20:24] = myipn
    gateway = None
    netmask = None
    niccfg = netutil.get_nic_config(cfg, node, ifidx=info['netinfo']['ifidx'])
    if niccfg.get('ipv4_broken', False):
        # Received a request over a nic with no ipv4 configured, ignore it
        return
    clipn = None
    if niccfg['ipv4_address']:
        clipn = socket.inet_aton(niccfg['ipv4_address'])
        repview[16:20] = clipn
        gateway = niccfg['ipv4_gateway']
        if gateway:
            gateway = socket.inet_aton(gateway)
        netmask = niccfg['prefix']
        netmask = (2**32 - 1) ^ (2**(32 - netmask) - 1)
        netmask = struct.pack('!I', netmask)
    repview[236:240] = b'\x63\x82\x53\x63'
    repview[240:242] = b'\x35\x01'
    if rqtype == 1:  # if discover, then offer
        repview[242:243] = b'\x02'
    elif rqtype == 3:  # if request, then ack
        repview[242:243] = b'\x05'
    repview[243:245] = b'\x36\x04'  # DHCP server identifier
    repview[245:249] = myipn
    repview[249:255] = b'\x33\x04\x00\x00\x00\xf0'  # fixed short lease time
    repview[255:257] = b'\x61\x11'
    repview[257:274] = packet[97]
    # Note that sending PXEClient kicks off the proxyDHCP procedure, ignoring
    # boot filename and such in the DHCP packet
    # we will simply always do it to provide the boot payload in a consistent
    # matter to both dhcp-elsewhere and fixed ip clients
    if info['architecture'] == 'uefi-httpboot':
        repview[replen - 1:replen + 11] = b'\x3c\x0aHTTPClient'
        replen += 12
    else:
        repview[replen - 1:replen + 10] = b'\x3c\x09PXEClient'
        replen += 11
    hwlen = bytearray(reqview[2:3].tobytes())[0]
    fulladdr = repview[28:28 + hwlen].tobytes()
    myipbypeer[fulladdr] = myipn
    if hwlen == 8:  # omnipath may present a mangled proxydhcp request later
        shortaddr = bytearray(6)
        shortaddr[0] = 2
        shortaddr[1:] = fulladdr[3:]
        myipbypeer[bytes(shortaddr)] = myipn
    if netmask:
        repview[replen - 1:replen + 1] = b'\x01\x04'
        repview[replen + 1:replen + 5] = netmask
        replen += 6
    if gateway:
        repview[replen - 1:replen + 1] = b'\x03\x04'
        repview[replen + 1:replen + 5] = gateway
        replen += 6
    repview[replen -
            1:replen] = b'\xff'  # end of options, should always be last byte
    repview = memoryview(reply)
    pktlen = struct.pack('!H', replen + 28)  # ip+udp = 28
    repview[2:4] = pktlen
    curripsum = ~(_ipsum(constiphdrsum + pktlen + myipn)) & 0xffff
    repview[10:12] = struct.pack('!H', curripsum)
    repview[24:26] = struct.pack('!H', replen + 8)
    datasum = _ipsum(b'\x00\x11' + repview[24:26].tobytes() +
                     repview[12:replen + 28].tobytes())
    datasum = ~datasum & 0xffff
    repview[26:28] = struct.pack('!H', datasum)
    if clipn:
        staticassigns[fulladdr] = (clipn, repview[:replen + 28].tobytes())
    elif fulladdr in staticassigns:
        del staticassigns[fulladdr]
    send_raw_packet(repview, replen + 28, reqview, info)
Example #29
0
def discover_node(cfg, handler, info, nodename, manual):
    known_nodes[nodename][info['hwaddr']] = info
    if info['hwaddr'] in unknown_info:
        del unknown_info[info['hwaddr']]
    info['discostatus'] = 'identified'
    dp = cfg.get_node_attributes(
        [nodename], ('discovery.policy', 'pubkeys.tls_hardwaremanager'))
    policy = dp.get(nodename, {}).get('discovery.policy',
                                      {}).get('value', None)
    if policy is None:
        policy = ''
    policies = set(policy.split(','))
    lastfp = dp.get(nodename, {}).get('pubkeys.tls_hardwaremanager',
                                      {}).get('value', None)
    # TODO(jjohnson2): permissive requires we guarantee storage of
    # the pubkeys, which is deferred for a little bit
    # Also, 'secure', when we have the needed infrastructure done
    # in some product or another.
    curruuid = info.get('uuid', False)
    if 'pxe' in policies and info['handler'] == pxeh:
        return do_pxe_discovery(cfg, handler, info, manual, nodename, policies)
    elif ('permissive' in policies and handler.https_supported and lastfp
          and not util.cert_matches(lastfp, handler.https_cert)
          and not manual):
        info['discofailure'] = 'fingerprint'
        log.log({
            'info':
            'Detected replacement of {0} with existing '
            'fingerprint and permissive discovery policy, not '
            'doing discovery unless discovery.policy=open or '
            'pubkeys.tls_hardwaremanager attribute is cleared '
            'first'.format(nodename)
        })
        return False  # With a permissive policy, do not discover new
    elif policies & set(('open', 'permissive')) or manual:
        info['nodename'] = nodename
        if info['handler'] == pxeh:
            return do_pxe_discovery(cfg, handler, info, manual, nodename,
                                    policies)
        elif manual or not util.cert_matches(lastfp, handler.https_cert):
            # only 'discover' if it is not the same as last time
            try:
                handler.config(nodename)
            except Exception as e:
                info['discofailure'] = 'bug'
                if manual:
                    raise
                log.log({
                    'error':
                    'Error encountered trying to set up {0}, {1}'.format(
                        nodename, str(e))
                })
                traceback.print_exc()
                return False
            newnodeattribs = {}
            if 'uuid' in info:
                newnodeattribs['id.uuid'] = info['uuid']
            if 'serialnumber' in info:
                newnodeattribs['id.serial'] = info['serialnumber']
            if 'modelnumber' in info:
                newnodeattribs['id.model'] = info['modelnumber']
            if handler.https_cert:
                newnodeattribs['pubkeys.tls_hardwaremanager'] = \
                    util.get_fingerprint(handler.https_cert, 'sha256')
            if newnodeattribs:
                cfg.set_node_attributes({nodename: newnodeattribs})
            log.log({
                'info':
                'Discovered {0} ({1})'.format(nodename, handler.devname)
            })
        info['discostatus'] = 'discovered'
        for i in pending_by_uuid.get(curruuid, []):
            eventlet.spawn_n(_recheck_single_unknown_info, cfg, i)
        return True
    log.log({
        'info':
        'Detected {0}, but discovery.policy is not set to a '
        'value allowing discovery (open or permissive)'.format(nodename)
    })
    info['discofailure'] = 'policy'
    return False
Example #30
0
def eval_node(cfg, handler, info, nodename, manual=False):
    try:
        handler.probe()  # unicast interrogation as possible to get more data
        # switch concurrently
        # do some preconfig, for example, to bring a SMM online if applicable
        handler.preconfig()
    except Exception as e:
        unknown_info[info['hwaddr']] = info
        info['discostatus'] = 'unidentified'
        errorstr = 'An error occured during discovery, check the ' \
                   'trace and stderr logs, mac was {0} and ip was {1}' \
                   ', the node or the containing enclosure was {2}' \
                   ''.format(info['hwaddr'], handler.ipaddr, nodename)
        traceback.print_exc()
        if manual:
            raise exc.InvalidArgumentException(errorstr)
        log.log({'error': errorstr})
        return
    # first, if had a bay, it was in an enclosure.  If it was discovered by
    # switch, it is probably the enclosure manager and not
    # the node directly.  switch is ambiguous and we should leave it alone
    if 'enclosure.bay' in info and handler.is_enclosure:
        unknown_info[info['hwaddr']] = info
        info['discostatus'] = 'unidentified'
        log.log({
            'error':
            'Something that is an enclosure reported a bay, '
            'not possible'
        })
        if manual:
            raise exc.InvalidArgumentException()
        return
    nl = list(cfg.filter_node_attributes('enclosure.manager=' + nodename))
    if not handler.is_enclosure and nl:
        # The specified node is an enclosure (has nodes mapped to it), but
        # what we are talking to is *not* an enclosure
        # might be ambiguous, need to match chassis-uuid as well..
        if 'enclosure.bay' not in info:
            unknown_info[info['hwaddr']] = info
            info['discostatus'] = 'unidentified'
            errorstr = '{2} with mac {0} is in {1}, but unable to ' \
                       'determine bay number'.format(info['hwaddr'],
                                                     nodename,
                                                     handler.ipaddr)
            if manual:
                raise exc.InvalidArgumentException(errorstr)
            log.log({'error': errorstr})
            return
        enl = list(cfg.filter_node_attributes('enclosure.extends=' + nodename))
        if enl:
            # ambiguous SMM situation according to the configuration, we need
            # to match uuid
            encuuid = info['attributes'].get('chasis-uuid', None)
            if encuuid:
                enl = list(cfg.filter_node_attributes('id.uuid=' + encuuid))
                if len(enl) != 1:
                    # errorstr = 'No SMM by given UUID known, *yet*'
                    # if manual:
                    #     raise exc.InvalidArgumentException(errorstr)
                    # log.log({'error': errorstr})
                    if encuuid in pending_by_uuid:
                        pending_by_uuid[encuuid].add(info)
                    else:
                        pending_by_uuid[encuuid] = set([info])
                    return
                # We found the real smm, replace the list with the actual smm
                # to continue
                nl = list(
                    cfg.filter_node_attributes('enclosure.manager=' + enl[0]))
            else:
                errorstr = 'Chained SMM configuration with older XCC, ' \
                           'unable to perform zero power discovery'
                if manual:
                    raise exc.InvalidArgumentException(errorstr)
                log.log({'error': errorstr})
                return
        # search for nodes fitting our description using filters
        # lead with the most specific to have a small second pass
        nl = list(
            cfg.filter_node_attributes(
                'enclosure.bay={0}'.format(info['enclosure.bay']), nl))
        if len(nl) != 1:
            info['discofailure'] = 'ambigconfig'
            if len(nl):
                errorstr = 'The following nodes have duplicate ' \
                           'enclosure attributes: ' + ','.join(nl)

            else:
                errorstr = 'The {0} in enclosure {1} bay {2} does not ' \
                           'seem to be a defined node ({3})'.format(
                                        handler.devname, nodename,
                                        info['enclosure.bay'],
                                        handler.ipaddr,
                                    )
            if manual:
                raise exc.InvalidArgumentException(errorstr)
            log.log({'error': errorstr})
            unknown_info[info['hwaddr']] = info
            info['discostatus'] = 'unidentified'
            return
        nodename = nl[0]
        if not discover_node(cfg, handler, info, nodename, manual):
            # store it as pending, assuming blocked on enclosure
            # assurance...
            pending_nodes[nodename] = info
    else:
        # we can and did accurately discover by switch or in enclosure
        # but... is this really ok?  could be on an upstream port or
        # erroneously put in the enclosure with no nodes yet
        # so first, see if the candidate node is a chain host
        if info['maccount']:
            # discovery happened through switch
            nl = list(
                cfg.filter_node_attributes('enclosure.extends=' + nodename))
            if nl:
                # The candidate nodename is the head of a chain, we must
                # validate the smm certificate by the switch
                macmap.get_node_fingerprint(nodename, cfg)
                util.handler.cert_matches(fprint, handler.https_cert)
                return
        if (info['maccount']
                and not handler.discoverable_by_switch(info['maccount'])):
            errorstr = 'The detected node {0} was detected using switch, ' \
                       'however the relevant port has too many macs learned ' \
                       'for this type of device ({1}) to be discovered by ' \
                       'switch.'.format(nodename, handler.devname)
            if manual:
                raise exc.InvalidArgumentException(errorstr)
            log.log({'error': errorstr})
            return
        if not discover_node(cfg, handler, info, nodename, manual):
            pending_nodes[nodename] = info
Example #31
0
def detected(info):
    global rechecker
    global rechecktime
    # later, manual and CMM discovery may act on SN and/or UUID
    for service in info['services']:
        if service in nodehandlers:
            if service not in known_services:
                known_services[service] = set([])
            handler = nodehandlers[service]
            info['handler'] = handler
            break
    else:  # no nodehandler, ignore for now
        return
    try:
        snum = info['attributes']['enclosure-serial-number'][0].strip()
        if snum:
            info['serialnumber'] = snum
            known_serials[info['serialnumber']] = info
    except (KeyError, IndexError):
        pass
    try:
        info['modelnumber'] = info['attributes'][
            'enclosure-machinetype-model'][0]
        known_services[service].add(info['modelnumber'])
    except (KeyError, IndexError):
        pass
    if info['hwaddr'] in known_info and 'addresses' in info:
        # we should tee these up for parsing when an enclosure comes up
        # also when switch config parameters change, should discard
        # and there's also if wiring is fixed...
        # of course could periodically revisit known_nodes
        # replace potentially stale address info
        #TODO(jjohnson2): remove this
        # temporary workaround for XCC not doing SLP DA over dedicated port
        # bz 93219, fix submitted, but not in builds yet
        # strictly speaking, going ipv4 only legitimately is mistreated here,
        # but that should be an edge case
        oldaddr = known_info[info['hwaddr']].get('addresses', [])
        for addr in info['addresses']:
            if addr[0].startswith('fe80::'):
                break
        else:
            for addr in oldaddr:
                if addr[0].startswith('fe80::'):
                    info['addresses'].append(addr)
        if known_info[info['hwaddr']].get('addresses',
                                          []) == info['addresses']:
            # if the ip addresses match, then assume no changes
            # now something resetting to defaults could, in theory
            # have the same address, but need to be reset
            # in that case, however, a user can clear pubkeys to force a check
            return
    known_info[info['hwaddr']] = info
    cfg = cfm.ConfigManager(None)
    if handler:
        handler = handler.NodeHandler(info, cfg)
        handler.scan()
    uuid = info.get('uuid', None)
    if uuid_is_valid(uuid):
        known_uuids[uuid][info['hwaddr']] = info
    if handler and handler.https_supported and not handler.https_cert:
        if handler.cert_fail_reason == 'unreachable':
            log.log({
                'info':
                '{0} with hwaddr {1} is not reachable by https '
                'at address {2}'.format(handler.devname, info['hwaddr'],
                                        handler.ipaddr)
            })
            info['addresses'] = [
                x for x in info.get('addresses', []) if x != handler.ipaddr
            ]
            return
        log.log({
            'info':
            '{0} with hwaddr {1} at address {2} is not yet running '
            'https, will examine later'.format(handler.devname, info['hwaddr'],
                                               handler.ipaddr)
        })
        if rechecker is not None and rechecktime > util.monotonic_time() + 300:
            rechecker.cancel()
        if rechecker is None or rechecker.dead:
            rechecktime = util.monotonic_time() + 300
            rechecker = eventlet.spawn_after(300, _periodic_recheck, cfg)
        unknown_info[info['hwaddr']] = info
        info['discostatus'] = 'unidentfied'
        #TODO, eventlet spawn after to recheck sooner, or somehow else
        # influence periodic recheck to shorten delay?
        return
    nodename, info['maccount'] = get_nodename(cfg, handler, info)
    if nodename and handler and handler.https_supported:
        dp = cfg.get_node_attributes([nodename],
                                     ('pubkeys.tls_hardwaremanager', ))
        lastfp = dp.get(nodename, {}).get('pubkeys.tls_hardwaremanager',
                                          {}).get('value', None)
        if util.cert_matches(lastfp, handler.https_cert):
            info['nodename'] = nodename
            known_nodes[nodename][info['hwaddr']] = info
            info['discostatus'] = 'discovered'
            return  # already known, no need for more
    #TODO(jjohnson2): We might have to get UUID for certain searches...
    #for now defer probe until inside eval_node.  We might not have
    #a nodename without probe in the future.
    if nodename and handler:
        eval_node(cfg, handler, info, nodename)
    elif handler:
        log.log({
            'info':
            'Detected unknown {0} with hwaddr {1} at '
            'address {2}'.format(handler.devname, info['hwaddr'],
                                 handler.ipaddr)
        })
        info['discostatus'] = 'unidentified'
        unknown_info[info['hwaddr']] = info
Example #32
0
def eval_node(cfg, handler, info, nodename, manual=False):
    try:
        handler.probe()  # unicast interrogation as possible to get more data
        # for now, we search switch only, ideally we search cmm, smm, and
        # switch concurrently
        # do some preconfig, for example, to bring a SMM online if applicable
        handler.preconfig()
    except Exception as e:
        unknown_info[info['hwaddr']] = info
        info['discostatus'] = 'unidentified'
        errorstr = 'An error occured during discovery, check the ' \
                   'trace and stderr logs, mac was {0} and ip was {1}' \
                   ', the node or the containing enclosure was {2}' \
                   ''.format(info['hwaddr'], handler.ipaddr, nodename)
        traceback.print_exc()
        if manual:
            raise exc.InvalidArgumentException(errorstr)
        log.log({'error': errorstr})
        return
    # first, if had a bay, it was in an enclosure.  If it was discovered by
    # switch, it is probably the enclosure manager and not
    # the node directly.  switch is ambiguous and we should leave it alone
    if 'enclosure.bay' in info and handler.is_enclosure:
        unknown_info[info['hwaddr']] = info
        info['discostatus'] = 'unidentified'
        log.log({
            'error':
            'Something that is an enclosure reported a bay, '
            'not possible'
        })
        if manual:
            raise exc.InvalidArgumentException()
        return
    nl = list(cfg.filter_node_attributes('enclosure.manager=' + nodename))
    if not handler.is_enclosure and nl:
        # The specified node is an enclosure (has nodes mapped to it), but
        # what we are talking to is *not* an enclosure
        if 'enclosure.bay' not in info:
            unknown_info[info['hwaddr']] = info
            info['discostatus'] = 'unidentified'
            errorstr = '{2} with mac {0} is in {1}, but unable to ' \
                       'determine bay number'.format(info['hwaddr'],
                                                     nodename,
                                                     handler.ipaddr)
            if manual:
                raise exc.InvalidArgumentException(errorstr)
            log.log({'error': errorstr})
            return
        # search for nodes fitting our description using filters
        # lead with the most specific to have a small second pass
        nl = cfg.filter_node_attributes(
            'enclosure.bay={0}'.format(info['enclosure.bay']), nl)
        nl = list(nl)
        if len(nl) != 1:
            info['discofailure'] = 'ambigconfig'
            if len(nl):
                errorstr = 'The following nodes have duplicate ' \
                           'enclosure attributes: ' + ','.join(nl)

            else:
                errorstr = 'The {0} in enclosure {1} bay {2} does not ' \
                           'seem to be a defined node ({3})'.format(
                                        handler.devname, nodename,
                                        info['enclosure.bay'],
                                        handler.ipaddr,
                                    )
            if manual:
                raise exc.InvalidArgumentException(errorstr)
            log.log({'error': errorstr})
            unknown_info[info['hwaddr']] = info
            info['discostatus'] = 'unidentified'
            return
        nodename = nl[0]
        if not discover_node(cfg, handler, info, nodename, manual):
            # store it as pending, assuming blocked on enclosure
            # assurance...
            pending_nodes[nodename] = info
    else:
        # we can and did accurately discover by switch or in enclosure
        if not discover_node(cfg, handler, info, nodename, manual):
            pending_nodes[nodename] = info