Ejemplo n.º 1
0
 def attachsession(self, session):
     self.clisession = session
     self.data_handler = session.data_handler
     termreq = {
         'proxyconsole': {
             'name': self.myname,
             'user': self.user,
             'tenant': self.cfm.tenant,
             'node': self.node,
             'skipreplay': self.skipreplay,
             'width': self.initsize[0],
             'height': self.initsize[1],
             #TODO(jjohnson2): declare myself as a proxy,
             #facilitate redirect rather than relay on manager change
         },
     }
     try:
         remote = socket.create_connection((self.managerinfo['address'], 13001))
         remote = ssl.wrap_socket(remote, cert_reqs=ssl.CERT_NONE,
                                  keyfile='/etc/confluent/privkey.pem',
                                  certfile='/etc/confluent/srvcert.pem')
         if not util.cert_matches(self.managerinfo['fingerprint'],
                                  remote.getpeercert(binary_form=True)):
             raise Exception('Invalid peer certificate')
     except Exception:
         eventlet.sleep(3)
         if self.clisession:
             self.clisession.detach()
         self.detachsession(None)
         return
     tlvdata.recv(remote)
     tlvdata.recv(remote)
     tlvdata.send(remote, termreq)
     self.remote = remote
     eventlet.spawn(self.relay_data)
Ejemplo n.º 2
0
def handle_dispatch(connection, cert, dispatch, peername):
    cert = crypto.dump_certificate(crypto.FILETYPE_ASN1, cert)
    if not util.cert_matches(
            cfm.get_collective_member(peername)['fingerprint'], cert):
        connection.close()
        return
    if dispatch[0:2] != b'\x01\x03':  # magic value to indicate msgpack
        # We only support msgpack now
        # The magic should preclude any pickle, as the first byte can never be
        # under 0x20 or so.
        connection.close()
        return
    dispatch = msgpack.unpackb(dispatch[2:], raw=False)
    configmanager = cfm.ConfigManager(dispatch['tenant'])
    nodes = dispatch['nodes']
    inputdata = dispatch['inputdata']
    operation = dispatch['operation']
    pathcomponents = dispatch['path']
    routespec = nested_lookup(noderesources, pathcomponents)
    inputdata = msg.get_input_message(pathcomponents, operation, inputdata,
                                      nodes, dispatch['isnoderange'],
                                      configmanager)
    plugroute = routespec.routeinfo
    plugpath = None
    nodesbyhandler = {}
    passvalues = []
    nodeattr = configmanager.get_node_attributes(nodes,
                                                 plugroute['pluginattrs'])
    for node in nodes:
        for attrname in plugroute['pluginattrs']:
            if attrname in nodeattr[node]:
                plugpath = nodeattr[node][attrname]['value']
            elif 'default' in plugroute:
                plugpath = plugroute['default']
        if plugpath:
            try:
                hfunc = getattr(pluginmap[plugpath], operation)
            except KeyError:
                nodesbyhandler[BadPlugin(node, plugpath).error] = [node]
                continue
            if hfunc in nodesbyhandler:
                nodesbyhandler[hfunc].append(node)
            else:
                nodesbyhandler[hfunc] = [node]
    try:
        for hfunc in nodesbyhandler:
            passvalues.append(
                hfunc(nodes=nodesbyhandler[hfunc],
                      element=pathcomponents,
                      configmanager=configmanager,
                      inputdata=inputdata))
        for res in itertools.chain(*passvalues):
            _forward_rsp(connection, res)
    except Exception as res:
        _forward_rsp(connection, res)
    connection.sendall('\x00\x00\x00\x00\x00\x00\x00\x00')
Ejemplo n.º 3
0
def get_chained_smm_name(nodename, cfg, handler, nl=None, checkswitch=True):
    # nodename is the head of the chain, cfg is a configmanager, handler
    # is the handler of the current candidate, nl is optional indication
    # of the next link in the chain, checkswitch can disable the switch
    # search if not indicated by current situation
    # returns the new name and whether it has been securely validated or not
    # first we check to see if directly connected
    mycert = handler.https_cert
    if checkswitch:
        fprints = macmap.get_node_fingerprints(nodename, cfg)
        for fprint in fprints:
            if util.cert_matches(fprint[0], mycert):
                # ok we have a direct match, it is this node
                return nodename, fprint[1]
    # ok, unable to get it, need to traverse the chain from the beginning
    if not nl:
        nl = list(cfg.filter_node_attributes(
            'enclosure.extends=' + nodename))
    while nl:
        if len(nl) != 1:
            raise exc.InvalidArgumentException('Multiple enclosures trying to '
                                               'extend a single enclosure')
        cd = cfg.get_node_attributes(nodename, ['hardwaremanagement.manager',
                                                'pubkeys.tls_hardwaremanager'])
        smmaddr = cd[nodename]['hardwaremanagement.manager']['value']
        pkey = cd[nodename].get('pubkeys.tls_hardwaremanager', {}).get(
            'value', None)
        if not pkey:
            # We cannot continue through a break in the chain
            return None, False
        if pkey:
            cv = util.TLSCertVerifier(
                cfg, nodename, 'pubkeys.tls_hardwaremanager').verify_cert
            for fprint in get_smm_neighbor_fingerprints(smmaddr, cv):
                if util.cert_matches(fprint, mycert):
                    # a trusted chain member vouched for the cert
                    # so it's validated
                    return nl[0], True
            # advance down the chain by one and try again
        nodename = nl[0]
        nl = list(cfg.filter_node_attributes(
            'enclosure.extends=' + nodename))
    return None, False
Ejemplo n.º 4
0
def _recheck_single_unknown(configmanager, mac):
    global rechecker
    global rechecktime
    info = unknown_info.get(mac, None)
    if not info:
        return
    if info['handler'] != pxeh and not info.get('addresses', None):
        #log.log({'info': 'Missing address information in ' + repr(info)})
        return
    handler = info['handler'].NodeHandler(info, configmanager)
    if handler.https_supported and not handler.https_cert:
        if handler.cert_fail_reason == 'unreachable':
            log.log({
                'info':
                '{0} with hwaddr {1} is not reachable at {2}'
                ''.format(handler.devname, info['hwaddr'], handler.ipaddr)
            })
            # addresses data is bad, delete the offending ip
            info['addresses'] = [
                x for x in info.get('addresses', []) if x != handler.ipaddr
            ]
            # TODO(jjohnson2):  rescan due to bad peer addr data?
            # not just wait around for the next announce
            return
        log.log({
            'info':
            '{0} with hwaddr {1} at address {2} is not yet running '
            'https, will examine later'.format(handler.devname, info['hwaddr'],
                                               handler.ipaddr)
        })
        if rechecker is not None and rechecktime > util.monotonic_time() + 300:
            rechecker.cancel()
        # if cancel did not result in dead, then we are in progress
        if rechecker is None or rechecker.dead:
            rechecktime = util.monotonic_time() + 300
            rechecker = eventlet.spawn_after(300, _periodic_recheck,
                                             configmanager)
        return
    nodename = get_nodename(configmanager, handler, info)
    if nodename:
        if handler.https_supported:
            dp = configmanager.get_node_attributes(
                [nodename], ('pubkeys.tls_hardwaremanager', ))
            lastfp = dp.get(nodename, {}).get('pubkeys.tls_hardwaremanager',
                                              {}).get('value', None)
            if util.cert_matches(lastfp, handler.https_cert):
                info['nodename'] = nodename
                known_nodes[nodename][info['hwaddr']] = info
                info['discostatus'] = 'discovered'
                return  # already known, no need for more
        discopool.spawn_n(eval_node, configmanager, handler, info, nodename)
Ejemplo n.º 5
0
def connect_to_collective(cert, member, remote=None):
    if remote is None:
        _, remote = create_connection(member)
        if isinstance(remote, Exception):
            raise remote
    if cert:
        fprint = cert
    else:
        collent = cfm.get_collective_member_by_address(member)
        fprint = collent['fingerprint']
    if not util.cert_matches(fprint, remote.getpeercert(binary_form=True)):
        # probably Janeway up to something
        raise Exception("Certificate mismatch in the collective")
    return remote
Ejemplo n.º 6
0
def handle_dispatch(connection, cert, dispatch, peername):
    cert = crypto.dump_certificate(crypto.FILETYPE_ASN1, cert)
    if not util.cert_matches(
            cfm.get_collective_member(peername)['fingerprint'], cert):
        connection.close()
        return
    pversion = 0
    if bytearray(dispatch)[0] == 0x80:
        pversion = bytearray(dispatch)[1]
    dispatch = pickle.loads(dispatch, **pargs)
    configmanager = cfm.ConfigManager(dispatch['tenant'])
    nodes = dispatch['nodes']
    inputdata = dispatch['inputdata']
    operation = dispatch['operation']
    pathcomponents = dispatch['path']
    routespec = nested_lookup(noderesources, pathcomponents)
    plugroute = routespec.routeinfo
    plugpath = None
    nodesbyhandler = {}
    passvalues = []
    nodeattr = configmanager.get_node_attributes(nodes,
                                                 plugroute['pluginattrs'])
    for node in nodes:
        for attrname in plugroute['pluginattrs']:
            if attrname in nodeattr[node]:
                plugpath = nodeattr[node][attrname]['value']
            elif 'default' in plugroute:
                plugpath = plugroute['default']
        if plugpath:
            try:
                hfunc = getattr(pluginmap[plugpath], operation)
            except KeyError:
                nodesbyhandler[BadPlugin(node, plugpath).error] = [node]
                continue
            if hfunc in nodesbyhandler:
                nodesbyhandler[hfunc].append(node)
            else:
                nodesbyhandler[hfunc] = [node]
    try:
        for hfunc in nodesbyhandler:
            passvalues.append(
                hfunc(nodes=nodesbyhandler[hfunc],
                      element=pathcomponents,
                      configmanager=configmanager,
                      inputdata=inputdata))
        for res in itertools.chain(*passvalues):
            _forward_rsp(connection, res, pversion)
    except Exception as res:
        _forward_rsp(connection, res, pversion)
    connection.sendall('\x00\x00\x00\x00\x00\x00\x00\x00')
Ejemplo n.º 7
0
def connect_to_collective(cert, member):
    remote = socket.create_connection((member, 13001))
    # TLS cert validation is custom and will not pass normal CA vetting
    # to override completely in the right place requires enormous effort, so just defer until after connect
    remote = ssl.wrap_socket(remote, cert_reqs=ssl.CERT_NONE, keyfile='/etc/confluent/privkey.pem',
                             certfile='/etc/confluent/srvcert.pem')
    if cert:
        fprint = cert
    else:
        collent = cfm.get_collective_member_by_address(member)
        fprint = collent['fingerprint']
    if not util.cert_matches(fprint, remote.getpeercert(binary_form=True)):
        # probably Janeway up to something
        raise Exception("Certificate mismatch in the collective")
    return remote
Ejemplo n.º 8
0
def handle_connection(connection, cert, request, local=False):
    global currentleader
    global retrythread
    operation = request['operation']
    if cert:
        cert = crypto.dump_certificate(crypto.FILETYPE_ASN1, cert)
    else:
        if not local:
            return
        if operation in ('show', 'delete'):
            if not list(cfm.list_collective()):
                tlvdata.send(
                    connection, {
                        'collective': {
                            'error':
                            'Collective mode not '
                            'enabled on this '
                            'system'
                        }
                    })
                return
            if follower:
                linfo = cfm.get_collective_member_by_address(currentleader)
                remote = socket.create_connection((currentleader, 13001))
                remote = ssl.wrap_socket(remote,
                                         cert_reqs=ssl.CERT_NONE,
                                         keyfile='/etc/confluent/privkey.pem',
                                         certfile='/etc/confluent/srvcert.pem')
                cert = remote.getpeercert(binary_form=True)
                if not (linfo
                        and util.cert_matches(linfo['fingerprint'], cert)):
                    remote.close()
                    tlvdata.send(connection, {
                        'error':
                        'Invalid certificate, '
                        'redo invitation process'
                    })
                    connection.close()
                    return
                tlvdata.recv(remote)  # ignore banner
                tlvdata.recv(remote)  # ignore authpassed: 0
                tlvdata.send(remote, {
                    'collective': {
                        'operation': 'getinfo',
                        'name': get_myname()
                    }
                })
                collinfo = tlvdata.recv(remote)
            else:
                collinfo = {}
                populate_collinfo(collinfo)
            try:
                cfm.check_quorum()
                collinfo['quorum'] = True
            except exc.DegradedCollective:
                collinfo['quorum'] = False
            if operation == 'show':
                tlvdata.send(connection, {'collective': collinfo})
            elif operation == 'delete':
                todelete = request['member']
                if (todelete == collinfo['leader']
                        or todelete in collinfo['active']):
                    tlvdata.send(
                        connection, {
                            'collective': {
                                'error':
                                '{0} is still active, stop the confluent service to remove it'
                                .format(todelete)
                            }
                        })
                    return
                if todelete not in collinfo['offline']:
                    tlvdata.send(
                        connection, {
                            'collective': {
                                'error':
                                '{0} is not a recognized collective member'.
                                format(todelete)
                            }
                        })
                    return
                cfm.del_collective_member(todelete)
                tlvdata.send(
                    connection, {
                        'collective': {
                            'status':
                            'Successfully deleted {0}'.format(todelete)
                        }
                    })
                connection.close()
            return
        if 'invite' == operation:
            try:
                cfm.check_quorum()
            except exc.DegradedCollective:
                tlvdata.send(connection, {
                    'collective': {
                        'error': 'Collective does not have quorum'
                    }
                })
                return
            #TODO(jjohnson2): Cannot do the invitation if not the head node, the certificate hand-carrying
            #can't work in such a case.
            name = request['name']
            invitation = invites.create_server_invitation(name)
            tlvdata.send(connection,
                         {'collective': {
                             'invitation': invitation
                         }})
            connection.close()
        if 'join' == operation:
            invitation = request['invitation']
            try:
                invitation = base64.b64decode(invitation)
                name, invitation = invitation.split(b'@', 1)
                name = util.stringify(name)
            except Exception:
                tlvdata.send(
                    connection,
                    {'collective': {
                        'status': 'Invalid token format'
                    }})
                connection.close()
                return
            host = request['server']
            try:
                remote = socket.create_connection((host, 13001))
                # This isn't what it looks like.  We do CERT_NONE to disable
                # openssl verification, but then use the invitation as a
                # shared secret to validate the certs as part of the join
                # operation
                remote = ssl.wrap_socket(remote,
                                         cert_reqs=ssl.CERT_NONE,
                                         keyfile='/etc/confluent/privkey.pem',
                                         certfile='/etc/confluent/srvcert.pem')
            except Exception:
                tlvdata.send(
                    connection, {
                        'collective': {
                            'status': 'Failed to connect to {0}'.format(host)
                        }
                    })
                connection.close()
                return
            mycert = util.get_certificate_from_file(
                '/etc/confluent/srvcert.pem')
            cert = remote.getpeercert(binary_form=True)
            proof = base64.b64encode(
                invites.create_client_proof(invitation, mycert, cert))
            tlvdata.recv(remote)  # ignore banner
            tlvdata.recv(remote)  # ignore authpassed: 0
            tlvdata.send(remote, {
                'collective': {
                    'operation': 'enroll',
                    'name': name,
                    'hmac': proof
                }
            })
            rsp = tlvdata.recv(remote)
            if 'error' in rsp:
                tlvdata.send(connection,
                             {'collective': {
                                 'status': rsp['error']
                             }})
                connection.close()
                return
            proof = rsp['collective']['approval']
            proof = base64.b64decode(proof)
            j = invites.check_server_proof(invitation, mycert, cert, proof)
            if not j:
                remote.close()
                tlvdata.send(connection,
                             {'collective': {
                                 'status': 'Bad server token'
                             }})
                connection.close()
                return
            tlvdata.send(connection, {'collective': {'status': 'Success'}})
            connection.close()
            currentleader = rsp['collective']['leader']
            f = open('/etc/confluent/cfg/myname', 'w')
            f.write(name)
            f.close()
            log.log({
                'info': 'Connecting to collective due to join',
                'subsystem': 'collective'
            })
            eventlet.spawn_n(connect_to_leader,
                             rsp['collective']['fingerprint'], name)
    if 'enroll' == operation:
        #TODO(jjohnson2): error appropriately when asked to enroll, but the master is elsewhere
        mycert = util.get_certificate_from_file('/etc/confluent/srvcert.pem')
        proof = base64.b64decode(request['hmac'])
        myrsp = invites.check_client_proof(request['name'], mycert, cert,
                                           proof)
        if not myrsp:
            tlvdata.send(connection, {'error': 'Invalid token'})
            connection.close()
            return
        myrsp = base64.b64encode(myrsp)
        fprint = util.get_fingerprint(cert)
        myfprint = util.get_fingerprint(mycert)
        cfm.add_collective_member(get_myname(),
                                  connection.getsockname()[0], myfprint)
        cfm.add_collective_member(request['name'],
                                  connection.getpeername()[0], fprint)
        myleader = get_leader(connection)
        ldrfprint = cfm.get_collective_member_by_address(
            myleader)['fingerprint']
        tlvdata.send(
            connection, {
                'collective': {
                    'approval': myrsp,
                    'fingerprint': ldrfprint,
                    'leader': get_leader(connection)
                }
            })
    if 'assimilate' == operation:
        drone = request['name']
        droneinfo = cfm.get_collective_member(drone)
        if not droneinfo:
            tlvdata.send(
                connection,
                {'error': 'Unrecognized leader, '
                 'redo invitation process'})
            return
        if not util.cert_matches(droneinfo['fingerprint'], cert):
            tlvdata.send(
                connection,
                {'error': 'Invalid certificate, '
                 'redo invitation process'})
            return
        if request['txcount'] < cfm._txcount:
            tlvdata.send(
                connection, {
                    'error': 'Refusing to be assimilated by inferior'
                    'transaction count',
                    'txcount': cfm._txcount,
                })
            return
        if connecting.active:
            # don't try to connect while actively already trying to connect
            tlvdata.send(connection, {'status': 0})
            connection.close()
            return
        if (currentleader == connection.getpeername()[0] and follower
                and not follower.dead):
            # if we are happily following this leader already, don't stir
            # the pot
            tlvdata.send(connection, {'status': 0})
            connection.close()
            return
        log.log({
            'info': 'Connecting in response to assimilation',
            'subsystem': 'collective'
        })
        eventlet.spawn_n(connect_to_leader,
                         None,
                         None,
                         leader=connection.getpeername()[0])
        tlvdata.send(connection, {'status': 0})
        connection.close()
    if 'getinfo' == operation:
        drone = request['name']
        droneinfo = cfm.get_collective_member(drone)
        if not (droneinfo
                and util.cert_matches(droneinfo['fingerprint'], cert)):
            tlvdata.send(
                connection,
                {'error': 'Invalid certificate, '
                 'redo invitation process'})
            connection.close()
            return
        collinfo = {}
        populate_collinfo(collinfo)
        tlvdata.send(connection, collinfo)
    if 'connect' == operation:
        drone = request['name']
        droneinfo = cfm.get_collective_member(drone)
        if not (droneinfo
                and util.cert_matches(droneinfo['fingerprint'], cert)):
            tlvdata.send(
                connection,
                {'error': 'Invalid certificate, '
                 'redo invitation process'})
            connection.close()
            return
        myself = connection.getsockname()[0]
        if connecting.active:
            tlvdata.send(connection, {
                'error': 'Connecting right now',
                'backoff': True
            })
            connection.close()
            return
        if myself != get_leader(connection):
            tlvdata.send(
                connection, {
                    'error': 'Cannot assimilate, our leader is '
                    'in another castle',
                    'leader': currentleader
                })
            connection.close()
            return
        if request['txcount'] > cfm._txcount:
            retire_as_leader()
            tlvdata.send(
                connection, {
                    'error': 'Client has higher tranasaction count, '
                    'should assimilate me, connecting..',
                    'txcount': cfm._txcount
                })
            log.log({
                'info': 'Connecting to leader due to superior '
                'transaction count',
                'subsystem': collective
            })
            eventlet.spawn_n(connect_to_leader, None, None,
                             connection.getpeername()[0])
            connection.close()
            return
        if retrythread:
            retrythread.cancel()
            retrythread = None
        with leader_init:
            cfm.update_collective_address(request['name'],
                                          connection.getpeername()[0])
            tlvdata.send(connection, cfm._dump_keys(None, False))
            tlvdata.send(connection, cfm._cfgstore['collective'])
            tlvdata.send(connection, {})  # cfm.get_globals())
            cfgdata = cfm.ConfigManager(None)._dump_to_json()
            tlvdata.send(connection, {
                'txcount': cfm._txcount,
                'dbsize': len(cfgdata)
            })
            connection.sendall(cfgdata)
        #tlvdata.send(connection, {'tenants': 0}) # skip the tenants for now,
        # so far unused anyway
        if not cfm.relay_slaved_requests(drone, connection):
            if not retrythread:  # start a recovery if everyone else seems
                # to have disappeared
                retrythread = eventlet.spawn_after(30 + random.random(),
                                                   start_collective)
Ejemplo n.º 9
0
def dispatch_request(nodes, manager, element, configmanager, inputdata,
                     operation, isnoderange):
    a = configmanager.get_collective_member(manager)
    try:
        remote = socket.create_connection((a['address'], 13001))
        remote.settimeout(180)
        remote = ssl.wrap_socket(remote,
                                 cert_reqs=ssl.CERT_NONE,
                                 keyfile='/etc/confluent/privkey.pem',
                                 certfile='/etc/confluent/srvcert.pem')
    except Exception:
        for node in nodes:
            if a:
                yield msg.ConfluentResourceUnavailable(
                    node,
                    'Collective member {0} is unreachable'.format(a['name']))
            else:
                yield msg.ConfluentResourceUnavailable(
                    node,
                    '"{0}" is not recognized as a collective member'.format(
                        manager))

        return
    if not util.cert_matches(a['fingerprint'],
                             remote.getpeercert(binary_form=True)):
        raise Exception("Invalid certificate on peer")
    banner = tlvdata.recv(remote)
    vers = banner.split()[2]
    if vers == b'v0':
        pvers = 2
    elif vers == b'v1':
        pvers = 4
    if sys.version_info[0] < 3:
        pvers = 2
    tlvdata.recv(remote)
    myname = collective.get_myname()
    dreq = b'\x01\x03' + msgpack.packb(
        {
            'name': myname,
            'nodes': list(nodes),
            'path': element,
            'tenant': configmanager.tenant,
            'operation': operation,
            'inputdata': inputdata,
            'isnoderange': isnoderange
        },
        use_bin_type=False)
    tlvdata.send(remote, {'dispatch': {'name': myname, 'length': len(dreq)}})
    remote.sendall(dreq)
    while True:
        try:
            rlen = remote.recv(8)
        except Exception:
            for node in nodes:
                yield msg.ConfluentResourceUnavailable(
                    node,
                    'Collective member {0} went unreachable'.format(a['name']))
            return
        while len(rlen) < 8:
            try:
                nlen = remote.recv(8 - len(rlen))
            except Exception:
                nlen = 0
            if not nlen:
                for node in nodes:
                    yield msg.ConfluentResourceUnavailable(
                        node, 'Collective member {0} went unreachable'.format(
                            a['name']))
                return
            rlen += nlen
        rlen = struct.unpack('!Q', rlen)[0]
        if rlen == 0:
            break
        try:
            rsp = remote.recv(rlen)
        except Exception:
            for node in nodes:
                yield msg.ConfluentResourceUnavailable(
                    node,
                    'Collective member {0} went unreachable'.format(a['name']))
            return
        while len(rsp) < rlen:
            try:
                nrsp = remote.recv(rlen - len(rsp))
            except Exception:
                nrsp = 0
            if not nrsp:
                for node in nodes:
                    yield msg.ConfluentResourceUnavailable(
                        node, 'Collective member {0} went unreachable'.format(
                            a['name']))
                return
            rsp += nrsp
        try:
            rsp = msg.msg_deserialize(rsp)
        except Exception:
            rsp = exc.deserialize_exc(rsp)
        if isinstance(rsp, Exception):
            raise rsp
        if not rsp:
            raise Exception(
                'Error in cross-collective serialize/deserialze, see remote logs'
            )
        yield rsp
Ejemplo n.º 10
0
 def validate_cert(self, certificate):
     # broadly speaking, merely checks consistency moment to moment,
     # but if https_cert gets stricter, this check means something
     fprint = util.get_fingerprint(self.https_cert)
     return util.cert_matches(fprint, certificate)
Ejemplo n.º 11
0
def dispatch_request(nodes, manager, element, configmanager, inputdata,
                     operation):
    a = configmanager.get_collective_member(manager)
    try:
        remote = socket.create_connection((a['address'], 13001))
        remote.settimeout(90)
        remote = ssl.wrap_socket(remote,
                                 cert_reqs=ssl.CERT_NONE,
                                 keyfile='/etc/confluent/privkey.pem',
                                 certfile='/etc/confluent/srvcert.pem')
    except Exception:
        for node in nodes:
            if a:
                yield msg.ConfluentResourceUnavailable(
                    node,
                    'Collective member {0} is unreachable'.format(a['name']))
            else:
                yield msg.ConfluentResourceUnavailable(
                    node,
                    '"{0}" is not recognized as a collective member'.format(
                        manager))

        return
    if not util.cert_matches(a['fingerprint'],
                             remote.getpeercert(binary_form=True)):
        raise Exception("Invalid certificate on peer")
    tlvdata.recv(remote)
    tlvdata.recv(remote)
    myname = collective.get_myname()
    dreq = pickle.dumps({
        'name': myname,
        'nodes': list(nodes),
        'path': element,
        'tenant': configmanager.tenant,
        'operation': operation,
        'inputdata': inputdata
    })
    tlvdata.send(remote, {'dispatch': {'name': myname, 'length': len(dreq)}})
    remote.sendall(dreq)
    while True:
        try:
            rlen = remote.recv(8)
        except Exception:
            for node in nodes:
                yield msg.ConfluentResourceUnavailable(
                    node,
                    'Collective member {0} went unreachable'.format(a['name']))
            return
        while len(rlen) < 8:
            try:
                nlen = remote.recv(8 - len(rlen))
            except Exception:
                nlen = 0
            if not nlen:
                for node in nodes:
                    yield msg.ConfluentResourceUnavailable(
                        node, 'Collective member {0} went unreachable'.format(
                            a['name']))
                return
            rlen += nlen
        rlen = struct.unpack('!Q', rlen)[0]
        if rlen == 0:
            break
        try:
            rsp = remote.recv(rlen)
        except Exception:
            for node in nodes:
                yield msg.ConfluentResourceUnavailable(
                    node,
                    'Collective member {0} went unreachable'.format(a['name']))
            return
        while len(rsp) < rlen:
            try:
                nrsp = remote.recv(rlen - len(rsp))
            except Exception:
                nrsp = 0
            if not nrsp:
                for node in nodes:
                    yield msg.ConfluentResourceUnavailable(
                        node, 'Collective member {0} went unreachable'.format(
                            a['name']))
                return
            rsp += nrsp
        rsp = pickle.loads(rsp)
        if isinstance(rsp, Exception):
            raise rsp
        yield rsp
Ejemplo n.º 12
0
def discover_node(cfg, handler, info, nodename, manual):
    known_nodes[nodename][info['hwaddr']] = info
    if info['hwaddr'] in unknown_info:
        del unknown_info[info['hwaddr']]
    info['discostatus'] = 'identified'
    dp = cfg.get_node_attributes(
        [nodename], ('discovery.policy', 'pubkeys.tls_hardwaremanager'))
    policy = dp.get(nodename, {}).get('discovery.policy',
                                      {}).get('value', None)
    if policy is None:
        policy = ''
    policies = set(policy.split(','))
    lastfp = dp.get(nodename, {}).get('pubkeys.tls_hardwaremanager',
                                      {}).get('value', None)
    # TODO(jjohnson2): permissive requires we guarantee storage of
    # the pubkeys, which is deferred for a little bit
    # Also, 'secure', when we have the needed infrastructure done
    # in some product or another.
    curruuid = info.get('uuid', False)
    if 'pxe' in policies and info['handler'] == pxeh:
        return do_pxe_discovery(cfg, handler, info, manual, nodename, policies)
    elif ('permissive' in policies and handler.https_supported and lastfp
          and not util.cert_matches(lastfp, handler.https_cert)
          and not manual):
        info['discofailure'] = 'fingerprint'
        log.log({
            'info':
            'Detected replacement of {0} with existing '
            'fingerprint and permissive discovery policy, not '
            'doing discovery unless discovery.policy=open or '
            'pubkeys.tls_hardwaremanager attribute is cleared '
            'first'.format(nodename)
        })
        return False  # With a permissive policy, do not discover new
    elif policies & set(('open', 'permissive')) or manual:
        info['nodename'] = nodename
        if info['handler'] == pxeh:
            return do_pxe_discovery(cfg, handler, info, manual, nodename,
                                    policies)
        elif manual or not util.cert_matches(lastfp, handler.https_cert):
            # only 'discover' if it is not the same as last time
            try:
                handler.config(nodename)
            except Exception as e:
                info['discofailure'] = 'bug'
                if manual:
                    raise
                log.log({
                    'error':
                    'Error encountered trying to set up {0}, {1}'.format(
                        nodename, str(e))
                })
                traceback.print_exc()
                return False
            newnodeattribs = {}
            if 'uuid' in info:
                newnodeattribs['id.uuid'] = info['uuid']
            if 'serialnumber' in info:
                newnodeattribs['id.serial'] = info['serialnumber']
            if 'modelnumber' in info:
                newnodeattribs['id.model'] = info['modelnumber']
            if handler.https_cert:
                newnodeattribs['pubkeys.tls_hardwaremanager'] = \
                    util.get_fingerprint(handler.https_cert, 'sha256')
            if newnodeattribs:
                cfg.set_node_attributes({nodename: newnodeattribs})
            log.log({
                'info':
                'Discovered {0} ({1})'.format(nodename, handler.devname)
            })
        info['discostatus'] = 'discovered'
        for i in pending_by_uuid.get(curruuid, []):
            eventlet.spawn_n(_recheck_single_unknown_info, cfg, i)
        return True
    log.log({
        'info':
        'Detected {0}, but discovery.policy is not set to a '
        'value allowing discovery (open or permissive)'.format(nodename)
    })
    info['discofailure'] = 'policy'
    return False
Ejemplo n.º 13
0
def detected(info):
    global rechecker
    global rechecktime
    # later, manual and CMM discovery may act on SN and/or UUID
    for service in info['services']:
        if service in nodehandlers:
            if service not in known_services:
                known_services[service] = set([])
            handler = nodehandlers[service]
            info['handler'] = handler
            break
    else:  # no nodehandler, ignore for now
        return
    try:
        snum = info['attributes']['enclosure-serial-number'][0].strip()
        if snum:
            info['serialnumber'] = snum
            known_serials[info['serialnumber']] = info
    except (KeyError, IndexError):
        pass
    try:
        info['modelnumber'] = info['attributes'][
            'enclosure-machinetype-model'][0]
        known_services[service].add(info['modelnumber'])
    except (KeyError, IndexError):
        pass
    if info['hwaddr'] in known_info and 'addresses' in info:
        # we should tee these up for parsing when an enclosure comes up
        # also when switch config parameters change, should discard
        # and there's also if wiring is fixed...
        # of course could periodically revisit known_nodes
        # replace potentially stale address info
        #TODO(jjohnson2): remove this
        # temporary workaround for XCC not doing SLP DA over dedicated port
        # bz 93219, fix submitted, but not in builds yet
        # strictly speaking, going ipv4 only legitimately is mistreated here,
        # but that should be an edge case
        oldaddr = known_info[info['hwaddr']].get('addresses', [])
        for addr in info['addresses']:
            if addr[0].startswith('fe80::'):
                break
        else:
            for addr in oldaddr:
                if addr[0].startswith('fe80::'):
                    info['addresses'].append(addr)
        if known_info[info['hwaddr']].get('addresses',
                                          []) == info['addresses']:
            # if the ip addresses match, then assume no changes
            # now something resetting to defaults could, in theory
            # have the same address, but need to be reset
            # in that case, however, a user can clear pubkeys to force a check
            return
    known_info[info['hwaddr']] = info
    cfg = cfm.ConfigManager(None)
    if handler:
        handler = handler.NodeHandler(info, cfg)
        handler.scan()
    uuid = info.get('uuid', None)
    if uuid_is_valid(uuid):
        known_uuids[uuid][info['hwaddr']] = info
    if handler and handler.https_supported and not handler.https_cert:
        if handler.cert_fail_reason == 'unreachable':
            log.log({
                'info':
                '{0} with hwaddr {1} is not reachable by https '
                'at address {2}'.format(handler.devname, info['hwaddr'],
                                        handler.ipaddr)
            })
            info['addresses'] = [
                x for x in info.get('addresses', []) if x != handler.ipaddr
            ]
            return
        log.log({
            'info':
            '{0} with hwaddr {1} at address {2} is not yet running '
            'https, will examine later'.format(handler.devname, info['hwaddr'],
                                               handler.ipaddr)
        })
        if rechecker is not None and rechecktime > util.monotonic_time() + 300:
            rechecker.cancel()
        if rechecker is None or rechecker.dead:
            rechecktime = util.monotonic_time() + 300
            rechecker = eventlet.spawn_after(300, _periodic_recheck, cfg)
        unknown_info[info['hwaddr']] = info
        info['discostatus'] = 'unidentfied'
        #TODO, eventlet spawn after to recheck sooner, or somehow else
        # influence periodic recheck to shorten delay?
        return
    nodename, info['maccount'] = get_nodename(cfg, handler, info)
    if nodename and handler and handler.https_supported:
        dp = cfg.get_node_attributes([nodename],
                                     ('pubkeys.tls_hardwaremanager', ))
        lastfp = dp.get(nodename, {}).get('pubkeys.tls_hardwaremanager',
                                          {}).get('value', None)
        if util.cert_matches(lastfp, handler.https_cert):
            info['nodename'] = nodename
            known_nodes[nodename][info['hwaddr']] = info
            info['discostatus'] = 'discovered'
            return  # already known, no need for more
    #TODO(jjohnson2): We might have to get UUID for certain searches...
    #for now defer probe until inside eval_node.  We might not have
    #a nodename without probe in the future.
    if nodename and handler:
        eval_node(cfg, handler, info, nodename)
    elif handler:
        log.log({
            'info':
            'Detected unknown {0} with hwaddr {1} at '
            'address {2}'.format(handler.devname, info['hwaddr'],
                                 handler.ipaddr)
        })
        info['discostatus'] = 'unidentified'
        unknown_info[info['hwaddr']] = info