def authorize(name, element, tenant=False, operation='create', skipuserobj=False): #TODO: actually use the element to ascertain if this user is good enough """Determine whether the given authenticated name is authorized. :param name: The shortname authenticated by the authentication scheme :param element: The path being examined. :param tenant: The tenant under which the account exists (defaults to detect from name) :param operation: Defaults to checking for 'create' level access returns None if authorization fails or a tuple of the user object and the relevant ConfigManager object for the context of the request. """ if operation not in ('create', 'start', 'update', 'retrieve', 'delete'): return None user, tenant = _get_usertenant(name, tenant) if tenant is not None and not configmanager.is_tenant(tenant): return None manager = configmanager.ConfigManager(tenant, username=user) if skipuserobj: return None, manager, user, tenant, skipuserobj userobj = manager.get_user(user) if userobj: # returning return userobj, manager, user, tenant, skipuserobj return None
def get_cluster_list(nodename=None, cfg=None): if cfg is None: cfg = configmanager.ConfigManager(None) nodes = None if nodename is not None: sshpeers = cfg.get_node_attributes(nodename, 'ssh.trustnodes') sshpeers = sshpeers.get(nodename, {}).get('ssh.trustnodes', {}).get('value', None) if sshpeers: nodes = noderange.NodeRange(sshpeers, cfg).nodes autonodes = False if nodes is None: autonodes = True nodes = set(cfg.list_nodes()) domain = None for node in list(util.natural_sort(nodes)): if domain is None: domaininfo = cfg.get_node_attributes(node, 'dns.domain') domain = domaininfo.get(node, {}).get('dns.domain', {}).get('value', None) for extraname in get_extra_names(node, cfg): nodes.add(extraname) if autonodes: for mgr in configmanager.list_collective(): nodes.add(mgr) if domain and domain not in mgr: nodes.add('{0}.{1}'.format(mgr, domain)) myname = collective.get_myname() nodes.add(myname) if domain and domain not in myname: nodes.add('{0}.{1}'.format(myname, domain)) return nodes, domain
def proxydhcp(): net4011 = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) net4011.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) net4011.setsockopt(socket.IPPROTO_IP, IP_PKTINFO, 1) net4011.bind(('', 4011)) cfg = cfm.ConfigManager(None) while True: ready = select.select([net4011], [], [], None) if not ready or not ready[0]: continue rq = bytearray(1024) rqv = memoryview(rq) nb, client = net4011.recvfrom_into(rq) if nb < 240: continue rp = bytearray(1024) rpv = memoryview(rp) try: optidx = rq.index(b'\x63\x82\x53\x63') + 4 except ValueError: continue opts, disco = opts_to_dict(rq, optidx, 3) disco['uuid'] node = None if disco.get('hwaddr', None) in macmap: node = macmap[disco['hwaddr']] elif disco.get('uuid', None) in uuidmap: node = uuidmap[disco['uuid']] if not node: continue hwlen = rq[2] myipn = myipbypeer.get(rqv[28:28 + hwlen].tobytes(), None) if not myipn: continue if opts.get(77, None) == b'iPXE': cfd = cfg.get_node_attributes(node, ('deployment.*')) profile = cfd.get(node, {}).get('deployment.pendingprofile', {}).get('value', None) if not profile: continue myip = socket.inet_ntoa(myipn) bootfile = 'http://{0}/confluent-public/os/{1}/boot.ipxe'.format( myip, profile).encode('utf8') elif disco['arch'] == 'uefi-x64': bootfile = b'confluent/x86_64/ipxe.efi' elif disco['arch'] == 'bios-x86': bootfile = b'confluent/x86_64/ipxe.kkpxe' rpv[:240] = rqv[:240].tobytes() rpv[0:1] = b'\x02' rpv[108:108 + len(bootfile)] = bootfile rpv[240:243] = b'\x35\x01\x05' rpv[243:249] = b'\x36\x04' + myipn rpv[20:24] = myipn rpv[249:268] = b'\x61\x11' + opts[97] rpv[268:280] = b'\x3c\x09PXEClient\xff' net4011.sendto(rpv[:281], client)
def handle_dispatch(connection, cert, dispatch, peername): cert = crypto.dump_certificate(crypto.FILETYPE_ASN1, cert) if not util.cert_matches( cfm.get_collective_member(peername)['fingerprint'], cert): connection.close() return if dispatch[0:2] != b'\x01\x03': # magic value to indicate msgpack # We only support msgpack now # The magic should preclude any pickle, as the first byte can never be # under 0x20 or so. connection.close() return dispatch = msgpack.unpackb(dispatch[2:], raw=False) configmanager = cfm.ConfigManager(dispatch['tenant']) nodes = dispatch['nodes'] inputdata = dispatch['inputdata'] operation = dispatch['operation'] pathcomponents = dispatch['path'] routespec = nested_lookup(noderesources, pathcomponents) inputdata = msg.get_input_message(pathcomponents, operation, inputdata, nodes, dispatch['isnoderange'], configmanager) plugroute = routespec.routeinfo plugpath = None nodesbyhandler = {} passvalues = [] nodeattr = configmanager.get_node_attributes(nodes, plugroute['pluginattrs']) for node in nodes: for attrname in plugroute['pluginattrs']: if attrname in nodeattr[node]: plugpath = nodeattr[node][attrname]['value'] elif 'default' in plugroute: plugpath = plugroute['default'] if plugpath: try: hfunc = getattr(pluginmap[plugpath], operation) except KeyError: nodesbyhandler[BadPlugin(node, plugpath).error] = [node] continue if hfunc in nodesbyhandler: nodesbyhandler[hfunc].append(node) else: nodesbyhandler[hfunc] = [node] try: for hfunc in nodesbyhandler: passvalues.append( hfunc(nodes=nodesbyhandler[hfunc], element=pathcomponents, configmanager=configmanager, inputdata=inputdata)) for res in itertools.chain(*passvalues): _forward_rsp(connection, res) except Exception as res: _forward_rsp(connection, res) connection.sendall('\x00\x00\x00\x00\x00\x00\x00\x00')
def sessionhdl(connection, authname, skipauth=False): # For now, trying to test the console stuff, so let's just do n4. authenticated = False authdata = None cfm = None if skipauth: authenticated = True cfm = configmanager.ConfigManager(tenant=None, username=authname) elif authname: authdata = auth.authorize(authname, element=None) if authdata is not None: cfm = authdata[1] authenticated = True send_data(connection, "Confluent -- v0 --") while not authenticated: # prompt for name and passphrase send_data(connection, {'authpassed': 0}) response = tlvdata.recv(connection) authname = response['username'] passphrase = response['password'] # note(jbjohnso): here, we need to authenticate, but not # authorize a user. When authorization starts understanding # element path, that authorization will need to be called # per request the user makes authdata = auth.check_user_passphrase(authname, passphrase) if authdata is None: auditlog.log( {'operation': 'connect', 'user': authname, 'allowed': False}) else: authenticated = True cfm = authdata[1] send_data(connection, {'authpassed': 1}) request = tlvdata.recv(connection) while request is not None: try: process_request( connection, request, cfm, authdata, authname, skipauth) except exc.ConfluentException as e: if ((not isinstance(e, exc.LockedCredentials)) and e.apierrorcode == 500): tracelog.log(traceback.format_exc(), ltype=log.DataTypes.event, event=log.Events.stacktrace) send_data(connection, {'errorcode': e.apierrorcode, 'error': e.apierrorstr, 'detail': e.get_error_body()}) send_data(connection, {'_requestdone': 1}) except SystemExit: sys.exit(0) except: tracelog.log(traceback.format_exc(), ltype=log.DataTypes.event, event=log.Events.stacktrace) send_data(connection, {'errorcode': 500, 'error': 'Unexpected error'}) send_data(connection, {'_requestdone': 1}) request = tlvdata.recv(connection)
def check_managers(): global failovercheck if not follower: try: cfm.check_quorum() except exc.DegradedCollective: failovercheck = None return c = cfm.ConfigManager(None) collinfo = {} populate_collinfo(collinfo) availmanagers = {} offlinemgrs = set(collinfo['offline']) offlinemgrs.add('') for offline in collinfo['offline']: nodes = noderange.NodeRange( 'collective.manager=={}'.format(offline), c).nodes managercandidates = c.get_node_attributes( nodes, 'collective.managercandidates') expandednoderanges = {} for node in nodes: if node not in managercandidates: continue targets = managercandidates[node].get( 'collective.managercandidates', {}).get('value', None) if not targets: continue if not availmanagers: for active in collinfo['active']: availmanagers[active] = len( noderange.NodeRange( 'collective.manager=={}'.format(active), c).nodes) availmanagers[collinfo['leader']] = len( noderange.NodeRange( 'collective.manager=={}'.format( collinfo['leader']), c).nodes) if targets not in expandednoderanges: expandednoderanges[targets] = set( noderange.NodeRange(targets, c).nodes) - offlinemgrs targets = sorted(expandednoderanges[targets], key=availmanagers.get) if not targets: continue c.set_node_attributes( {node: { 'collective.manager': { 'value': targets[0] } }}) availmanagers[targets[0]] += 1 _assimilate_missing() failovercheck = None
def start_proxy_term(connection, cert, request): cert = crypto.dump_certificate(crypto.FILETYPE_ASN1, cert) droneinfo = configmanager.get_collective_member(request['name']) if not util.cert_matches(droneinfo['fingerprint'], cert): connection.close() return cfm = configmanager.ConfigManager(request['tenant']) ccons = ClientConsole(connection) consession = consoleserver.ConsoleSession(node=request['node'], configmanager=cfm, username=request['user'], datacallback=ccons.sendall, skipreplay=request['skipreplay']) term_interact(None, None, ccons, None, connection, consession, None)
def handle_dispatch(connection, cert, dispatch, peername): cert = crypto.dump_certificate(crypto.FILETYPE_ASN1, cert) if not util.cert_matches( cfm.get_collective_member(peername)['fingerprint'], cert): connection.close() return pversion = 0 if bytearray(dispatch)[0] == 0x80: pversion = bytearray(dispatch)[1] dispatch = pickle.loads(dispatch, **pargs) configmanager = cfm.ConfigManager(dispatch['tenant']) nodes = dispatch['nodes'] inputdata = dispatch['inputdata'] operation = dispatch['operation'] pathcomponents = dispatch['path'] routespec = nested_lookup(noderesources, pathcomponents) plugroute = routespec.routeinfo plugpath = None nodesbyhandler = {} passvalues = [] nodeattr = configmanager.get_node_attributes(nodes, plugroute['pluginattrs']) for node in nodes: for attrname in plugroute['pluginattrs']: if attrname in nodeattr[node]: plugpath = nodeattr[node][attrname]['value'] elif 'default' in plugroute: plugpath = plugroute['default'] if plugpath: try: hfunc = getattr(pluginmap[plugpath], operation) except KeyError: nodesbyhandler[BadPlugin(node, plugpath).error] = [node] continue if hfunc in nodesbyhandler: nodesbyhandler[hfunc].append(node) else: nodesbyhandler[hfunc] = [node] try: for hfunc in nodesbyhandler: passvalues.append( hfunc(nodes=nodesbyhandler[hfunc], element=pathcomponents, configmanager=configmanager, inputdata=inputdata)) for res in itertools.chain(*passvalues): _forward_rsp(connection, res, pversion) except Exception as res: _forward_rsp(connection, res, pversion) connection.sendall('\x00\x00\x00\x00\x00\x00\x00\x00')
def authorize(name, element, tenant=False, operation='create', skipuserobj=False): #TODO: actually use the element to ascertain if this user is good enough """Determine whether the given authenticated name is authorized. :param name: The shortname authenticated by the authentication scheme :param element: The path being examined. :param tenant: The tenant under which the account exists (defaults to detect from name) :param operation: Defaults to checking for 'create' level access returns None if authorization fails or a tuple of the user object and the relevant ConfigManager object for the context of the request. """ # skipuserobj is a leftover from the now abandoned plan to use pam session # to do authorization and authentication. Now confluent always does authorization # even if pam does authentication. if operation not in ('create', 'start', 'update', 'retrieve', 'delete', None): return False user, tenant = _get_usertenant(name, tenant) if tenant is not None and not configmanager.is_tenant(tenant): return False manager = configmanager.ConfigManager(tenant, username=user) userobj = manager.get_user(user) if not userobj: for group in userutil.grouplist(user): userobj = manager.get_usergroup(group) if userobj: break if userobj: # returning role = userobj.get('role', 'Administrator') if element and role != 'Administrator': for rule in _allowedbyrole.get(role, {}).get(operation, []): if fnmatch(element, rule): break else: return False for rule in _deniedbyrole.get(role, {}).get(operation, []): if fnmatch(element, rule): return False return userobj, manager, user, tenant, skipuserobj return False
def start_detection(): global attribwatcher global rechecker _map_unique_ids() cfg = cfm.ConfigManager(None) allnodes = cfg.list_nodes() attribwatcher = cfg.watch_attributes( allnodes, ('discovery.policy', 'net*.switch', 'hardwaremanagement.manager', 'net*.switchport', 'id.uuid', 'pubkeys.tls_hardwaremanager'), _recheck_nodes) cfg.watch_nodecollection(newnodes) eventlet.spawn_n(slp.snoop, safe_detected) eventlet.spawn_n(pxe.snoop, safe_detected) if rechecker is None: rechecktime = util.monotonic_time() + 900 rechecker = eventlet.spawn_after(900, _periodic_recheck, cfg)
def _map_unique_ids(nodes=None): global nodes_by_uuid global nodes_by_fprint nodes_by_uuid = {} nodes_by_fprint = {} # Map current known ids based on uuid and fingperprints for fast lookup cfg = cfm.ConfigManager(None) if nodes is None: nodes = cfg.list_nodes() bigmap = cfg.get_node_attributes(nodes, ('id.uuid', 'pubkeys.tls_hardwaremanager')) uuid_by_nodes = {} fprint_by_nodes = {} for uuid in nodes_by_uuid: if not uuid_is_valid(): continue node = nodes_by_uuid[uuid] if node in bigmap: uuid_by_nodes[node] = uuid for fprint in nodes_by_fprint: node = nodes_by_fprint[fprint] if node in bigmap: fprint_by_nodes[node] = fprint for node in bigmap: if node in uuid_by_nodes: del nodes_by_uuid[uuid_by_nodes[node]] if node in fprint_by_nodes: del nodes_by_fprint[fprint_by_nodes[node]] uuid = bigmap[node].get('id.uuid', {}).get('value', None) if uuid_is_valid(uuid): nodes_by_uuid[uuid] = node fprint = bigmap[node].get( 'pubkeys.tls_hardwaremanager', {}).get('value', None) if fprint: nodes_by_fprint[fprint] = node for uuid in known_pxe_uuids: if uuid_is_valid(uuid) and uuid not in nodes_by_uuid: nodes_by_uuid[uuid] = known_pxe_uuids[uuid]
def check_user_passphrase(name, passphrase, element=None, tenant=False): """Check a a login name and passphrase for authenticity and authorization The function combines authentication and authorization into one function. It is highly recommended for a session layer to provide some secure means of protecting a session once this function works once and calling authorize() in order to provide best performance regardless of circumstance. The function makes effort to provide good performance in repeated invocation, but that facility will slow down to deter detected passphrase guessing activity when such activity is detected. :param name: The login name provided by client :param passphrase: The passphrase provided by client :param element: Optional specification of a particular destination :param tenant: Optional explicit indication of tenant (defaults to embedded in name) """ # The reason why tenant is 'False' instead of 'None': # None means explicitly not a tenant. False means check # the username for signs of being a tenant # If there is any sign of guessing on a user, all valid and # invalid attempts are equally slowed to no more than 20 per second # for that particular user. # similarly, guessing usernames is throttled to 20/sec user, tenant = _get_usertenant(name, tenant) while (user, tenant) in _passchecking: # Want to serialize passphrase checking activity # by a user, which might be malicious # would normally make an event and wait # but here there's no need for that eventlet.sleep(0.5) credobj = Credentials(user, passphrase) try: pammy = PAM.pam() pammy.start(_pamservice, user, credobj.pam_conv) pammy.authenticate() pammy.acct_mgmt() del pammy return authorize(user, element, tenant, skipuserobj=False) except NameError: pass except PAM.error: if credobj.haspam: return None if (user, tenant) in _passcache: if passphrase == _passcache[(user, tenant)]: return authorize(user, element, tenant) else: # In case of someone trying to guess, # while someone is legitimately logged in # invalidate cache and force the slower check del _passcache[(user, tenant)] return None cfm = configmanager.ConfigManager(tenant, username=user) ucfg = cfm.get_user(user) if ucfg is None or 'cryptpass' not in ucfg: eventlet.sleep(0.05) # stall even on test for existence of a username return None _passchecking[(user, tenant)] = True # TODO(jbjohnso): WORKERPOOL # PBKDF2 is, by design, cpu intensive # throw it at the worker pool when implemented # maybe a distinct worker pool, wondering about starving out non-auth stuff salt, crypt = ucfg['cryptpass'] # execute inside tpool to get greenthreads to give it a special thread # world #TODO(jbjohnso): util function to generically offload a call #such a beast could be passed into pyghmi as a way for pyghmi to #magically get offload of the crypto functions without having #to explicitly get into the eventlet tpool game crypted = eventlet.tpool.execute(_do_pbkdf, passphrase, salt) del _passchecking[(user, tenant)] eventlet.sleep(0.05) # either way, we want to stall so that client can't # determine failure because there is a delay, valid response will # delay as well if crypt == crypted: _passcache[(user, tenant)] = passphrase return authorize(user, element, tenant) return None
def connect_to_leader(cert=None, name=None, leader=None): global currentleader global follower if leader is None: leader = currentleader log.log({ 'info': 'Attempting connection to leader {0}'.format(leader), 'subsystem': 'collective' }) try: remote = connect_to_collective(cert, leader) except socket.error as e: log.log({ 'error': 'Collective connection attempt to {0} failed: {1}' ''.format(leader, str(e)), 'subsystem': 'collective' }) return False with connecting: with cfm._initlock: banner = tlvdata.recv(remote) # the banner vers = banner.split()[2] if vers != b'v2': raise Exception( 'This instance only supports protocol 2, synchronize versions between collective members' ) tlvdata.recv(remote) # authpassed... 0.. if name is None: name = get_myname() tlvdata.send( remote, { 'collective': { 'operation': 'connect', 'name': name, 'txcount': cfm._txcount } }) keydata = tlvdata.recv(remote) if not keydata: return False if 'error' in keydata: if 'backoff' in keydata: log.log({ 'info': 'Collective initialization in progress on ' '{0}'.format(leader), 'subsystem': 'collective' }) return False if 'leader' in keydata: log.log({ 'info': 'Prospective leader {0} has redirected this ' 'member to {1}'.format(leader, keydata['leader']), 'subsystem': 'collective' }) ldrc = cfm.get_collective_member_by_address( keydata['leader']) if ldrc and ldrc['name'] == name: raise Exception("Redirected to self") return connect_to_leader(name=name, leader=keydata['leader']) if 'txcount' in keydata: log.log({ 'info': 'Prospective leader {0} has inferior ' 'transaction count, becoming leader' ''.format(leader), 'subsystem': 'collective', 'subsystem': 'collective' }) return become_leader(remote) return False follower.kill() cfm.stop_following() follower = None if follower: follower.kill() cfm.stop_following() follower = None log.log({ 'info': 'Following leader {0}'.format(leader), 'subsystem': 'collective' }) colldata = tlvdata.recv(remote) # the protocol transmits global data, but for now we ignore it globaldata = tlvdata.recv(remote) dbi = tlvdata.recv(remote) dbsize = dbi['dbsize'] dbjson = b'' while (len(dbjson) < dbsize): ndata = remote.recv(dbsize - len(dbjson)) if not ndata: try: remote.close() except Exception: pass raise Exception("Error doing initial DB transfer") dbjson += ndata cfm.clear_configuration() try: cfm._restore_keys(keydata, None, sync=False) for c in colldata: cfm._true_add_collective_member(c, colldata[c]['address'], colldata[c]['fingerprint'], sync=False) #for globvar in globaldata: # cfm.set_global(globvar, globaldata[globvar], False) cfm._txcount = dbi.get('txcount', 0) cfm.ConfigManager(tenant=None)._load_from_json(dbjson, sync=False) cfm.commit_clear() except Exception: cfm.stop_following() cfm.rollback_clear() raise currentleader = leader #spawn this as a thread... follower = eventlet.spawn(follow_leader, remote, leader) return True
def handle_connection(connection, cert, request, local=False): global currentleader global retrythread operation = request['operation'] if cert: cert = crypto.dump_certificate(crypto.FILETYPE_ASN1, cert) else: if not local: return if operation in ('show', 'delete'): if not list(cfm.list_collective()): tlvdata.send( connection, { 'collective': { 'error': 'Collective mode not ' 'enabled on this ' 'system' } }) return if follower: linfo = cfm.get_collective_member_by_address(currentleader) remote = socket.create_connection((currentleader, 13001)) remote = ssl.wrap_socket(remote, cert_reqs=ssl.CERT_NONE, keyfile='/etc/confluent/privkey.pem', certfile='/etc/confluent/srvcert.pem') cert = remote.getpeercert(binary_form=True) if not (linfo and util.cert_matches(linfo['fingerprint'], cert)): remote.close() tlvdata.send(connection, { 'error': 'Invalid certificate, ' 'redo invitation process' }) connection.close() return tlvdata.recv(remote) # ignore banner tlvdata.recv(remote) # ignore authpassed: 0 tlvdata.send(remote, { 'collective': { 'operation': 'getinfo', 'name': get_myname() } }) collinfo = tlvdata.recv(remote) else: collinfo = {} populate_collinfo(collinfo) try: cfm.check_quorum() collinfo['quorum'] = True except exc.DegradedCollective: collinfo['quorum'] = False if operation == 'show': tlvdata.send(connection, {'collective': collinfo}) elif operation == 'delete': todelete = request['member'] if (todelete == collinfo['leader'] or todelete in collinfo['active']): tlvdata.send( connection, { 'collective': { 'error': '{0} is still active, stop the confluent service to remove it' .format(todelete) } }) return if todelete not in collinfo['offline']: tlvdata.send( connection, { 'collective': { 'error': '{0} is not a recognized collective member'. format(todelete) } }) return cfm.del_collective_member(todelete) tlvdata.send( connection, { 'collective': { 'status': 'Successfully deleted {0}'.format(todelete) } }) connection.close() return if 'invite' == operation: try: cfm.check_quorum() except exc.DegradedCollective: tlvdata.send(connection, { 'collective': { 'error': 'Collective does not have quorum' } }) return #TODO(jjohnson2): Cannot do the invitation if not the head node, the certificate hand-carrying #can't work in such a case. name = request['name'] invitation = invites.create_server_invitation(name) tlvdata.send(connection, {'collective': { 'invitation': invitation }}) connection.close() if 'join' == operation: invitation = request['invitation'] try: invitation = base64.b64decode(invitation) name, invitation = invitation.split(b'@', 1) name = util.stringify(name) except Exception: tlvdata.send( connection, {'collective': { 'status': 'Invalid token format' }}) connection.close() return host = request['server'] try: remote = socket.create_connection((host, 13001)) # This isn't what it looks like. We do CERT_NONE to disable # openssl verification, but then use the invitation as a # shared secret to validate the certs as part of the join # operation remote = ssl.wrap_socket(remote, cert_reqs=ssl.CERT_NONE, keyfile='/etc/confluent/privkey.pem', certfile='/etc/confluent/srvcert.pem') except Exception: tlvdata.send( connection, { 'collective': { 'status': 'Failed to connect to {0}'.format(host) } }) connection.close() return mycert = util.get_certificate_from_file( '/etc/confluent/srvcert.pem') cert = remote.getpeercert(binary_form=True) proof = base64.b64encode( invites.create_client_proof(invitation, mycert, cert)) tlvdata.recv(remote) # ignore banner tlvdata.recv(remote) # ignore authpassed: 0 tlvdata.send(remote, { 'collective': { 'operation': 'enroll', 'name': name, 'hmac': proof } }) rsp = tlvdata.recv(remote) if 'error' in rsp: tlvdata.send(connection, {'collective': { 'status': rsp['error'] }}) connection.close() return proof = rsp['collective']['approval'] proof = base64.b64decode(proof) j = invites.check_server_proof(invitation, mycert, cert, proof) if not j: remote.close() tlvdata.send(connection, {'collective': { 'status': 'Bad server token' }}) connection.close() return tlvdata.send(connection, {'collective': {'status': 'Success'}}) connection.close() currentleader = rsp['collective']['leader'] f = open('/etc/confluent/cfg/myname', 'w') f.write(name) f.close() log.log({ 'info': 'Connecting to collective due to join', 'subsystem': 'collective' }) eventlet.spawn_n(connect_to_leader, rsp['collective']['fingerprint'], name) if 'enroll' == operation: #TODO(jjohnson2): error appropriately when asked to enroll, but the master is elsewhere mycert = util.get_certificate_from_file('/etc/confluent/srvcert.pem') proof = base64.b64decode(request['hmac']) myrsp = invites.check_client_proof(request['name'], mycert, cert, proof) if not myrsp: tlvdata.send(connection, {'error': 'Invalid token'}) connection.close() return myrsp = base64.b64encode(myrsp) fprint = util.get_fingerprint(cert) myfprint = util.get_fingerprint(mycert) cfm.add_collective_member(get_myname(), connection.getsockname()[0], myfprint) cfm.add_collective_member(request['name'], connection.getpeername()[0], fprint) myleader = get_leader(connection) ldrfprint = cfm.get_collective_member_by_address( myleader)['fingerprint'] tlvdata.send( connection, { 'collective': { 'approval': myrsp, 'fingerprint': ldrfprint, 'leader': get_leader(connection) } }) if 'assimilate' == operation: drone = request['name'] droneinfo = cfm.get_collective_member(drone) if not droneinfo: tlvdata.send( connection, {'error': 'Unrecognized leader, ' 'redo invitation process'}) return if not util.cert_matches(droneinfo['fingerprint'], cert): tlvdata.send( connection, {'error': 'Invalid certificate, ' 'redo invitation process'}) return if request['txcount'] < cfm._txcount: tlvdata.send( connection, { 'error': 'Refusing to be assimilated by inferior' 'transaction count', 'txcount': cfm._txcount, }) return if connecting.active: # don't try to connect while actively already trying to connect tlvdata.send(connection, {'status': 0}) connection.close() return if (currentleader == connection.getpeername()[0] and follower and not follower.dead): # if we are happily following this leader already, don't stir # the pot tlvdata.send(connection, {'status': 0}) connection.close() return log.log({ 'info': 'Connecting in response to assimilation', 'subsystem': 'collective' }) eventlet.spawn_n(connect_to_leader, None, None, leader=connection.getpeername()[0]) tlvdata.send(connection, {'status': 0}) connection.close() if 'getinfo' == operation: drone = request['name'] droneinfo = cfm.get_collective_member(drone) if not (droneinfo and util.cert_matches(droneinfo['fingerprint'], cert)): tlvdata.send( connection, {'error': 'Invalid certificate, ' 'redo invitation process'}) connection.close() return collinfo = {} populate_collinfo(collinfo) tlvdata.send(connection, collinfo) if 'connect' == operation: drone = request['name'] droneinfo = cfm.get_collective_member(drone) if not (droneinfo and util.cert_matches(droneinfo['fingerprint'], cert)): tlvdata.send( connection, {'error': 'Invalid certificate, ' 'redo invitation process'}) connection.close() return myself = connection.getsockname()[0] if connecting.active: tlvdata.send(connection, { 'error': 'Connecting right now', 'backoff': True }) connection.close() return if myself != get_leader(connection): tlvdata.send( connection, { 'error': 'Cannot assimilate, our leader is ' 'in another castle', 'leader': currentleader }) connection.close() return if request['txcount'] > cfm._txcount: retire_as_leader() tlvdata.send( connection, { 'error': 'Client has higher tranasaction count, ' 'should assimilate me, connecting..', 'txcount': cfm._txcount }) log.log({ 'info': 'Connecting to leader due to superior ' 'transaction count', 'subsystem': collective }) eventlet.spawn_n(connect_to_leader, None, None, connection.getpeername()[0]) connection.close() return if retrythread: retrythread.cancel() retrythread = None with leader_init: cfm.update_collective_address(request['name'], connection.getpeername()[0]) tlvdata.send(connection, cfm._dump_keys(None, False)) tlvdata.send(connection, cfm._cfgstore['collective']) tlvdata.send(connection, {}) # cfm.get_globals()) cfgdata = cfm.ConfigManager(None)._dump_to_json() tlvdata.send(connection, { 'txcount': cfm._txcount, 'dbsize': len(cfgdata) }) connection.sendall(cfgdata) #tlvdata.send(connection, {'tenants': 0}) # skip the tenants for now, # so far unused anyway if not cfm.relay_slaved_requests(drone, connection): if not retrythread: # start a recovery if everyone else seems # to have disappeared retrythread = eventlet.spawn_after(30 + random.random(), start_collective)
def handle_request(env, start_response): global currtz global keymap global currlocale global currtzvintage configmanager.check_quorum() nodename = env.get('HTTP_CONFLUENT_NODENAME', None) apikey = env.get('HTTP_CONFLUENT_APIKEY', None) if not (nodename and apikey): start_response('401 Unauthorized', []) yield 'Unauthorized' return cfg = configmanager.ConfigManager(None) eak = cfg.get_node_attributes(nodename, 'crypted.selfapikey').get( nodename, {}).get('crypted.selfapikey', {}).get('hashvalue', None) if not eak: start_response('401 Unauthorized', []) yield 'Unauthorized' return salt = '$'.join(eak.split('$', 3)[:-1]) + '$' if crypt.crypt(apikey, salt) != eak: start_response('401 Unauthorized', []) yield 'Unauthorized' return retype = env.get('HTTP_ACCEPT', 'application/yaml') isgeneric = False if retype == '*/*': isgeneric = True retype = 'application/yaml' if retype == 'application/yaml': dumper = yamldump elif retype == 'application/json': dumper = json.dumps else: start_response('406 Not supported', []) yield 'Unsupported content type in ACCEPT: ' + retype return if env['REQUEST_METHOD'] not in ( 'HEAD', 'GET') and 'CONTENT_LENGTH' in env and int( env['CONTENT_LENGTH']) > 0: reqbody = env['wsgi.input'].read(int(env['CONTENT_LENGTH'])) if env['PATH_INFO'] == '/self/deploycfg': if 'HTTP_CONFLUENT_MGTIFACE' in env: ncfg = netutil.get_nic_config(cfg, nodename, ifidx=env['HTTP_CONFLUENT_MGTIFACE']) else: myip = env.get('HTTP_X_FORWARDED_HOST', None) if ']' in myip: myip = myip.split(']', 1)[0] else: myip = myip.split(':', 1)[0] myip = myip.replace('[', '').replace(']', '') ncfg = netutil.get_nic_config(cfg, nodename, serverip=myip) if ncfg['prefix']: ncfg['ipv4_netmask'] = netutil.cidr_to_mask(ncfg['prefix']) deployinfo = cfg.get_node_attributes( nodename, ('deployment.*', 'console.method', 'crypted.*', 'dns.*')) deployinfo = deployinfo.get(nodename, {}) profile = deployinfo.get('deployment.pendingprofile', {}).get('value', '') ncfg['encryptboot'] = deployinfo.get('deployment.encryptboot', {}).get('value', None) if ncfg['encryptboot'] in ('', 'none'): ncfg['encryptboot'] = None ncfg['profile'] = profile protocol = deployinfo.get('deployment.useinsecureprotocols', {}).get('value', 'never') ncfg['textconsole'] = bool( deployinfo.get('console.method', {}).get('value', None)) if protocol == 'always': ncfg['protocol'] = 'http' else: ncfg['protocol'] = 'https' ncfg['rootpassword'] = deployinfo.get('crypted.rootpassword', {}).get('hashvalue', None) ncfg['grubpassword'] = deployinfo.get('crypted.grubpassword', {}).get('grubhashvalue', None) if currtzvintage and currtzvintage > (time.time() - 30.0): ncfg['timezone'] = currtz else: langinfo = subprocess.check_output(['localectl', 'status']).split(b'\n') for line in langinfo: line = line.strip() if line.startswith(b'System Locale:'): ccurrlocale = line.split(b'=')[-1] if not ccurrlocale: continue if not isinstance(ccurrlocale, str): ccurrlocale = ccurrlocale.decode('utf8') if ccurrlocale == 'n/a': continue currlocale = ccurrlocale elif line.startswith(b'VC Keymap:'): ckeymap = line.split(b':')[-1] ckeymap = ckeymap.strip() if not ckeymap: continue if not isinstance(ckeymap, str): ckeymap = ckeymap.decode('utf8') if ckeymap == 'n/a': continue keymap = ckeymap tdc = subprocess.check_output(['timedatectl']).split(b'\n') for ent in tdc: ent = ent.strip() if ent.startswith(b'Time zone:'): currtz = ent.split(b': ', 1)[1].split(b'(', 1)[0].strip() if not isinstance(currtz, str): currtz = currtz.decode('utf8') currtzvintage = time.time() ncfg['timezone'] = currtz break ncfg['locale'] = currlocale ncfg['keymap'] = keymap ncfg['nameservers'] = [] for dns in deployinfo.get('dns.servers', {}).get('value', '').split(','): ncfg['nameservers'].append(dns) dnsdomain = deployinfo.get('dns.domain', {}).get('value', None) ncfg['dnsdomain'] = dnsdomain start_response('200 OK', (('Content-Type', retype), )) yield dumper(ncfg) elif env['PATH_INFO'] == '/self/sshcert': if not sshutil.ca_exists(): start_response('500 Unconfigured', ()) yield 'CA is not configured on this system (run ...)' return dnsinfo = cfg.get_node_attributes(nodename, ('dns.*')) dnsinfo = dnsinfo.get(nodename, {}).get('dns.domain', {}).get('value', None) if dnsinfo in nodename: dnsinfo = '' cert = sshutil.sign_host_key(reqbody, nodename, [dnsinfo]) start_response('200 OK', (('Content-Type', 'text/plain'), )) yield cert elif env['PATH_INFO'] == '/self/nodelist': nodes = set(cfg.list_nodes()) domaininfo = cfg.get_node_attributes(nodes, 'dns.domain') for node in list(util.natural_sort(nodes)): domain = domaininfo.get(node, {}).get('dns.domain', {}).get('value', None) if domain and domain not in node: nodes.add('{0}.{1}'.format(node, domain)) for mgr in configmanager.list_collective(): nodes.add(mgr) if domain and domain not in mgr: nodes.add('{0}.{1}'.format(mgr, domain)) myname = collective.get_myname() nodes.add(myname) if domain and domain not in myname: nodes.add('{0}.{1}'.format(myname, domain)) if isgeneric: start_response('200 OK', (('Content-Type', 'text/plain'), )) for node in util.natural_sort(nodes): yield node + '\n' else: start_response('200 OK', (('Content-Type', retype), )) yield dumper(sorted(nodes)) elif env['PATH_INFO'] == '/self/updatestatus': update = yaml.safe_load(reqbody) if update['status'] == 'staged': targattr = 'deployment.stagedprofile' elif update['status'] == 'complete': targattr = 'deployment.profile' else: raise Exception('Unknown update status request') currattr = cfg.get_node_attributes(nodename, 'deployment.*').get(nodename, {}) pending = None if targattr == 'deployment.profile': pending = currattr.get('deployment.stagedprofile', {}).get('value', '') if not pending: pending = currattr.get('deployment.pendingprofile', {}).get('value', '') updates = {} if pending: updates['deployment.pendingprofile'] = {'value': ''} if targattr == 'deployment.profile': updates['deployment.stagedprofile'] = {'value': ''} currprof = currattr.get(targattr, {}).get('value', '') if currprof != pending: updates[targattr] = {'value': pending} cfg.set_node_attributes({nodename: updates}) start_response('200 OK', (('Content-Type', 'text/plain'), )) yield 'OK' else: start_response('500 Error', (('Content-Type', 'text/plain'), )) yield 'No pending profile detected, unable to accept status update' else: start_response('404 Not Found', ()) yield 'Not found'
def snoop(handler, byehandler=None, protocol=None, uuidlookup=None): """Watch for SSDP notify messages The handler shall be called on any service coming online. byehandler is called whenever a system advertises that it is departing. If no byehandler is specified, byebye messages are ignored. The handler is given (as possible), the mac address, a list of viable sockaddrs to reference the peer, and the notification type (e.g. 'urn:dmtf-org:service:redfish-rest:1' :param handler: A handler for online notifications from network :param byehandler: Optional handler for devices going off the network """ # Normally, I like using v6/v4 agnostic socket. However, since we are # dabbling in multicast wizardry here, such sockets can cause big problems, # so we will have two distinct sockets tracelog = log.Logger('trace') known_peers = set([]) net6 = socket.socket(socket.AF_INET6, socket.SOCK_DGRAM) net6.setsockopt(socket.IPPROTO_IPV6, socket.IPV6_V6ONLY, 1) for ifidx in util.list_interface_indexes(): v6grp = ssdp6mcast + struct.pack('=I', ifidx) net6.setsockopt(socket.IPPROTO_IPV6, socket.IPV6_JOIN_GROUP, v6grp) net6.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) net4 = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) for i4 in util.list_ips(): ssdp4mcast = socket.inet_pton(socket.AF_INET, mcastv4addr) + \ socket.inet_aton(i4['addr']) try: net4.setsockopt(socket.IPPROTO_IP, socket.IP_ADD_MEMBERSHIP, ssdp4mcast) except socket.error as e: if e.errno != 98: # errno 98 can happen if aliased, skip for now raise net4.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) net4.bind(('', 1900)) net6.bind(('', 1900)) peerbymacaddress = {} while True: try: newmacs = set([]) machandlers = {} r, _, _ = select.select((net4, net6), (), (), 60) while r: for s in r: (rsp, peer) = s.recvfrom(9000) if rsp[:4] == b'PING': continue rsp = rsp.split(b'\r\n') method, _, _ = rsp[0].split(b' ', 2) if method == b'NOTIFY': ip = peer[0].partition('%')[0] if peer in known_peers: continue if ip not in neighutil.neightable: neighutil.update_neigh() if ip not in neighutil.neightable: continue mac = neighutil.neightable[ip] known_peers.add(peer) newmacs.add(mac) if mac in peerbymacaddress: peerbymacaddress[mac]['addresses'].append(peer) else: peerbymacaddress[mac] = { 'hwaddr': mac, 'addresses': [peer], } peerdata = peerbymacaddress[mac] for headline in rsp[1:]: if not headline: continue headline = util.stringify(headline) header, _, value = headline.partition(':') header = header.strip() value = value.strip() if header == 'NT': peerdata['service'] = value elif header == 'NTS': if value == 'ssdp:byebye': machandlers[mac] = byehandler elif value == 'ssdp:alive': machandlers[mac] = None # handler elif method == b'M-SEARCH': if not uuidlookup: continue #ip = peer[0].partition('%')[0] for headline in rsp[1:]: if not headline: continue headline = util.stringify(headline) headline = headline.partition(':') if len(headline) < 3: continue if headline[0] == 'ST' and headline[-1].startswith( ' urn:xcat.org:service:confluent:'): try: cfm.check_quorum() except Exception: continue for query in headline[-1].split('/'): if query.startswith('uuid='): curruuid = query.split('=', 1)[1].lower() node = uuidlookup(curruuid) if not node: break # Do not bother replying to a node that # we have no deployment activity # planned for cfg = cfm.ConfigManager(None) cfd = cfg.get_node_attributes( node, [ 'deployment.pendingprofile', 'collective.managercandidates' ]) if not cfd.get(node, {}).get( 'deployment.pendingprofile', {}).get('value', None): break candmgrs = cfd.get(node, {}).get( 'collective.managercandidates', {}).get('value', None) if candmgrs: candmgrs = noderange.NodeRange( candmgrs, cfg).nodes if collective.get_myname( ) not in candmgrs: break currtime = time.time() seconds = int(currtime) msecs = int(currtime * 1000 % 1000) reply = 'HTTP/1.1 200 OK\r\nNODENAME: {0}\r\nCURRTIME: {1}\r\nCURRMSECS: {2}\r\n'.format( node, seconds, msecs) if '%' in peer[0]: iface = peer[0].split('%', 1)[1] reply += 'MGTIFACE: {0}\r\n'.format( peer[0].split('%', 1)[1]) ncfg = netutil.get_nic_config( cfg, node, ifidx=iface) if ncfg.get( 'matchesnodename', None): reply += 'DEFAULTNET: 1\r\n' elif not netutil.address_is_local( peer[0]): continue if not isinstance(reply, bytes): reply = reply.encode('utf8') s.sendto(reply, peer) r, _, _ = select.select((net4, net6), (), (), 0.2) for mac in newmacs: thehandler = machandlers.get(mac, None) if thehandler: thehandler(peerbymacaddress[mac]) except Exception: tracelog.log(traceback.format_exc(), ltype=log.DataTypes.event, event=log.Events.stacktrace)
def snoop(handler, protocol=None): #TODO(jjohnson2): ipv6 socket and multicast for DHCPv6, should that be #prominent #TODO(jjohnson2): enable unicast replies. This would suggest either # injection into the neigh table before OFFER or using SOCK_RAW. global attribwatcher cfg = cfm.ConfigManager(None) remap_nodes(cfg.list_nodes(), cfg) attribwatcher = cfg.watch_attributes(cfg.list_nodes(), ('id.uuid', 'net.*hwaddr'), remap_nodes) cfg.watch_nodecollection(new_nodes) net4 = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) net4.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) net4.setsockopt(socket.SOL_SOCKET, socket.SO_BROADCAST, 1) net4.setsockopt(socket.IPPROTO_IP, IP_PKTINFO, 1) net4.bind(('', 67)) while True: # Just need some delay, picked a prime number so that overlap with other # timers might be reduced, though it really is probably nothing ready = select.select([net4], [], [], None) if not ready or not ready[0]: continue clientaddr = sockaddr_in() rawbuffer = bytearray(2048) data = pkttype.from_buffer(rawbuffer) msg = msghdr() cmsgarr = bytearray(cmsgsize) cmsg = cmsgtype.from_buffer(cmsgarr) iov = iovec() iov.iov_base = ctypes.addressof(data) iov.iov_len = 2048 msg.msg_iov = ctypes.pointer(iov) msg.msg_iovlen = 1 msg.msg_control = ctypes.addressof(cmsg) msg.msg_controllen = ctypes.sizeof(cmsg) msg.msg_name = ctypes.addressof(clientaddr) msg.msg_namelen = ctypes.sizeof(clientaddr) # We'll leave name and namelen blank for now i = recvmsg(net4.fileno(), ctypes.pointer(msg), 0) # if we have a small packet, just skip, it can't possible hold enough # data and avoids some downstream IndexErrors that would be messy # with try/except if i < 64: continue #peer = ipfromint(clientaddr.sin_addr.s_addr) # We don't need peer yet, generally it's 0.0.0.0 _, level, typ = struct.unpack('QII', cmsgarr[:16]) if level == socket.IPPROTO_IP and typ == IP_PKTINFO: idx, recv, targ = struct.unpack('III', cmsgarr[16:28]) recv = ipfromint(recv) targ = ipfromint(targ) # peer is the source ip (in dhcpdiscover, 0.0.0.0) # recv is the 'ip' that recevied the packet, regardless of target # targ is the ip in the destination ip of the header. # idx is the ip link number of the receiving nic # For example, a DHCPDISCOVER will probably have: # peer of 0.0.0.0 # targ of 255.255.255.255 # recv of <actual ip address that could reply> # idx correlated to the nic rqv = memoryview(rawbuffer) rq = bytearray(rqv[:i]) if rq[0] == 1: # Boot request addrlen = rq[2] if addrlen > 16 or addrlen == 0: continue rawnetaddr = rq[28:28 + addrlen] netaddr = ':'.join(['{0:02x}'.format(x) for x in rawnetaddr]) optidx = 0 try: optidx = rq.index(b'\x63\x82\x53\x63') + 4 except ValueError: continue txid = rq[4:8] # struct.unpack('!I', rq[4:8])[0] rqinfo, disco = opts_to_dict(rq, optidx) vivso = disco.get('vivso', None) if vivso: # info['modelnumber'] = info['attributes']['enclosure-machinetype-model'][0] info = { 'hwaddr': netaddr, 'uuid': disco['uuid'], 'architecture': vivso.get('arch', ''), 'services': (vivso['service-type'], ), 'netinfo': { 'ifidx': idx, 'recvip': recv, 'txid': txid }, 'attributes': { 'enclosure-machinetype-model': [vivso.get('machine', '')] } } handler(info) #consider_discover(info, rqinfo, net4, cfg, rqv) continue # We will fill out service to have something to byte into, # but the nature of the beast is that we do not have peers, # so that will not be present for a pxe snoop info = { 'hwaddr': netaddr, 'uuid': disco['uuid'], 'architecture': disco['arch'], 'netinfo': { 'ifidx': idx, 'recvip': recv, 'txid': txid }, 'services': ('pxe-client', ) } if disco['uuid']: #TODO(jjohnson2): need to explictly check for # discover, so that the parser can go ahead and # parse the options including uuid to enable # ACK handler(info) consider_discover(info, rqinfo, net4, cfg, rqv)
def __init__(self): self.cfm = cfm.ConfigManager(None)
def handle_request(env, start_response): global currtz global keymap global currlocale global currtzvintage configmanager.check_quorum() nodename = env.get('HTTP_CONFLUENT_NODENAME', None) apikey = env.get('HTTP_CONFLUENT_APIKEY', None) if not (nodename and apikey): start_response('401 Unauthorized', []) yield 'Unauthorized' return cfg = configmanager.ConfigManager(None) ea = cfg.get_node_attributes(nodename, ['crypted.selfapikey', 'deployment.apiarmed']) eak = ea.get(nodename, {}).get('crypted.selfapikey', {}).get('hashvalue', None) if not eak: start_response('401 Unauthorized', []) yield 'Unauthorized' return salt = '$'.join(eak.split('$', 3)[:-1]) + '$' if crypt.crypt(apikey, salt) != eak: start_response('401 Unauthorized', []) yield 'Unauthorized' return if ea.get(nodename, {}).get('deployment.apiarmed', {}).get('value', None) == 'once': cfg.set_node_attributes({nodename: {'deployment.apiarmed': ''}}) retype = env.get('HTTP_ACCEPT', 'application/yaml') isgeneric = False if retype == '*/*': isgeneric = True retype = 'application/yaml' if retype == 'application/yaml': dumper = yamldump elif retype == 'application/json': dumper = json.dumps else: start_response('406 Not supported', []) yield 'Unsupported content type in ACCEPT: ' + retype return operation = env['REQUEST_METHOD'] if operation not in ('HEAD', 'GET') and 'CONTENT_LENGTH' in env and int( env['CONTENT_LENGTH']) > 0: reqbody = env['wsgi.input'].read(int(env['CONTENT_LENGTH'])) if env['PATH_INFO'] == '/self/bmcconfig': hmattr = cfg.get_node_attributes(nodename, 'hardwaremanagement.*') hmattr = hmattr.get(nodename, {}) res = {} port = hmattr.get('hardwaremanagement.port', {}).get('value', None) if port is not None: res['bmcport'] = port vlan = hmattr.get('hardwaremanagement.vlan', {}).get('value', None) if vlan is not None: res['bmcvlan'] = vlan bmcaddr = hmattr.get('hardwaremanagement.manager', {}).get('value', None) bmcaddr = socket.getaddrinfo(bmcaddr, 0)[0] bmcaddr = bmcaddr[-1][0] if '.' in bmcaddr: # ipv4 is allowed netconfig = netutil.get_nic_config(cfg, nodename, ip=bmcaddr) res['bmcipv4'] = bmcaddr res['prefixv4'] = netconfig['prefix'] res['bmcgw'] = netconfig.get('ipv4_gateway', None) # credential security results in user/password having to be deferred start_response('200 OK', (('Content-Type', retype), )) yield dumper(res) elif env['PATH_INFO'] == '/self/deploycfg': if 'HTTP_CONFLUENT_MGTIFACE' in env: ncfg = netutil.get_nic_config(cfg, nodename, ifidx=env['HTTP_CONFLUENT_MGTIFACE']) else: myip = env.get('HTTP_X_FORWARDED_HOST', None) if ']' in myip: myip = myip.split(']', 1)[0] else: myip = myip.split(':', 1)[0] myip = myip.replace('[', '').replace(']', '') ncfg = netutil.get_nic_config(cfg, nodename, serverip=myip) if ncfg['prefix']: ncfg['ipv4_netmask'] = netutil.cidr_to_mask(ncfg['prefix']) if ncfg['ipv4_method'] == 'firmwaredhcp': ncfg['ipv4_method'] = 'static' deployinfo = cfg.get_node_attributes( nodename, ('deployment.*', 'console.method', 'crypted.*', 'dns.*', 'ntp.*')) deployinfo = deployinfo.get(nodename, {}) profile = deployinfo.get('deployment.pendingprofile', {}).get('value', '') ncfg['encryptboot'] = deployinfo.get('deployment.encryptboot', {}).get('value', None) if ncfg['encryptboot'] in ('', 'none'): ncfg['encryptboot'] = None ncfg['profile'] = profile protocol = deployinfo.get('deployment.useinsecureprotocols', {}).get('value', 'never') ncfg['textconsole'] = bool( deployinfo.get('console.method', {}).get('value', None)) if protocol == 'always': ncfg['protocol'] = 'http' else: ncfg['protocol'] = 'https' ncfg['rootpassword'] = deployinfo.get('crypted.rootpassword', {}).get('hashvalue', None) ncfg['grubpassword'] = deployinfo.get('crypted.grubpassword', {}).get('grubhashvalue', None) if currtzvintage and currtzvintage > (time.time() - 30.0): ncfg['timezone'] = currtz else: langinfo = subprocess.check_output(['localectl', 'status']).split(b'\n') for line in langinfo: line = line.strip() if line.startswith(b'System Locale:'): ccurrlocale = line.split(b'=')[-1] if not ccurrlocale: continue if not isinstance(ccurrlocale, str): ccurrlocale = ccurrlocale.decode('utf8') if ccurrlocale == 'n/a': continue currlocale = ccurrlocale elif line.startswith(b'VC Keymap:'): ckeymap = line.split(b':')[-1] ckeymap = ckeymap.strip() if not ckeymap: continue if not isinstance(ckeymap, str): ckeymap = ckeymap.decode('utf8') if ckeymap == 'n/a': continue keymap = ckeymap tdc = subprocess.check_output(['timedatectl']).split(b'\n') for ent in tdc: ent = ent.strip() if ent.startswith(b'Time zone:'): currtz = ent.split(b': ', 1)[1].split(b'(', 1)[0].strip() if not isinstance(currtz, str): currtz = currtz.decode('utf8') currtzvintage = time.time() ncfg['timezone'] = currtz break ncfg['locale'] = currlocale ncfg['keymap'] = keymap ncfg['nameservers'] = [] for dns in deployinfo.get('dns.servers', {}).get('value', '').split(','): ncfg['nameservers'].append(dns) ntpsrvs = deployinfo.get('ntp.servers', {}).get('value', '') if ntpsrvs: ntpsrvs = ntpsrvs.split(',') if ntpsrvs: ncfg['ntpservers'] = [] for ntpsrv in ntpsrvs: ncfg['ntpservers'].append(ntpsrv) dnsdomain = deployinfo.get('dns.domain', {}).get('value', None) ncfg['dnsdomain'] = dnsdomain start_response('200 OK', (('Content-Type', retype), )) yield dumper(ncfg) elif env['PATH_INFO'] == '/self/sshcert' and reqbody: if not sshutil.ca_exists(): start_response('500 Unconfigured', ()) yield 'CA is not configured on this system (run ...)' return pals = get_extra_names(nodename, cfg) cert = sshutil.sign_host_key(reqbody, nodename, pals) start_response('200 OK', (('Content-Type', 'text/plain'), )) yield cert elif env['PATH_INFO'] == '/self/nodelist': nodes, _ = get_cluster_list(nodename, cfg) if isgeneric: start_response('200 OK', (('Content-Type', 'text/plain'), )) for node in util.natural_sort(nodes): yield node + '\n' else: start_response('200 OK', (('Content-Type', retype), )) yield dumper(sorted(nodes)) elif env['PATH_INFO'] == '/self/remoteconfigbmc' and reqbody: try: reqbody = yaml.safe_load(reqbody) except Exception: reqbody = None cfgmod = reqbody.get('configmod', 'unspecified') if cfgmod == 'xcc': xcc.remote_nodecfg(nodename, cfg) elif cfgmod == 'tsm': tsm.remote_nodecfg(nodename, cfg) else: start_response('500 unsupported configmod', ()) yield 'Unsupported configmod "{}"'.format(cfgmod) start_response('200 Ok', ()) yield 'complete' elif env['PATH_INFO'] == '/self/updatestatus' and reqbody: update = yaml.safe_load(reqbody) if update['status'] == 'staged': targattr = 'deployment.stagedprofile' elif update['status'] == 'complete': targattr = 'deployment.profile' else: raise Exception('Unknown update status request') currattr = cfg.get_node_attributes(nodename, 'deployment.*').get(nodename, {}) pending = None if targattr == 'deployment.profile': pending = currattr.get('deployment.stagedprofile', {}).get('value', '') if not pending: pending = currattr.get('deployment.pendingprofile', {}).get('value', '') updates = {} if pending: updates['deployment.pendingprofile'] = {'value': ''} if targattr == 'deployment.profile': updates['deployment.stagedprofile'] = {'value': ''} currprof = currattr.get(targattr, {}).get('value', '') if currprof != pending: updates[targattr] = {'value': pending} cfg.set_node_attributes({nodename: updates}) start_response('200 OK', (('Content-Type', 'text/plain'), )) yield 'OK' else: start_response('500 Error', (('Content-Type', 'text/plain'), )) yield 'No pending profile detected, unable to accept status update' elif env['PATH_INFO'] == '/self/saveapikey' and reqbody: if not isinstance(reqbody, str): reqbody = reqbody.decode('utf8') cfg.set_node_attributes( {nodename: { 'deployment.sealedapikey': { 'value': reqbody } }}) start_response('200 OK', ()) yield '' elif env['PATH_INFO'].startswith( '/self/remoteconfig/') and 'POST' == operation: scriptcat = env['PATH_INFO'].replace('/self/remoteconfig/', '') slist, profile = get_scriptlist( scriptcat, cfg, nodename, '/var/lib/confluent/public/os/{0}/ansible/{1}') playlist = [] dirname = '/var/lib/confluent/public/os/{0}/ansible/{1}/'.format( profile, scriptcat) if not os.path.isdir(dirname): dirname = '/var/lib/confluent/public/os/{0}/ansible/{1}.d/'.format( profile, scriptcat) for filename in slist: if filename.endswith('.yaml') or filename.endswith('.yml'): playlist.append(os.path.join(dirname, filename)) if playlist: runansible.run_playbooks(playlist, [nodename]) start_response('202 Queued', ()) yield '' else: start_response('200 OK', ()) yield '' return elif env['PATH_INFO'].startswith('/self/remotesyncfiles'): if 'POST' == operation: result = syncfiles.start_syncfiles(nodename, cfg, json.loads(reqbody)) start_response(result, ()) yield '' return if 'GET' == operation: status, output = syncfiles.get_syncresult(nodename) start_response(status, ()) yield output return elif env['PATH_INFO'].startswith('/self/remoteconfig/status'): rst = runansible.running_status.get(nodename, None) if not rst: start_response('204 Not Running', (('Content-Length', '0'), )) yield '' return start_response('200 OK', ()) if rst.complete: del runansible.running_status[nodename] yield rst.dump_text() return elif env['PATH_INFO'].startswith('/self/scriptlist/'): scriptcat = env['PATH_INFO'].replace('/self/scriptlist/', '') slist, _ = get_scriptlist( scriptcat, cfg, nodename, '/var/lib/confluent/public/os/{0}/scripts/{1}') if slist: start_response('200 OK', (('Content-Type', 'application/yaml'), )) yield yaml.safe_dump(util.natural_sort(slist), default_flow_style=False) else: start_response('200 OK', ()) yield '' else: start_response('404 Not Found', ()) yield 'Not found'
def check_user_passphrase(name, passphrase, operation=None, element=None, tenant=False): """Check a a login name and passphrase for authenticity and authorization The function combines authentication and authorization into one function. It is highly recommended for a session layer to provide some secure means of protecting a session once this function works once and calling authorize() in order to provide best performance regardless of circumstance. The function makes effort to provide good performance in repeated invocation, but that facility will slow down to deter detected passphrase guessing activity when such activity is detected. :param name: The login name provided by client :param passphrase: The passphrase provided by client :param element: Optional specification of a particular destination :param tenant: Optional explicit indication of tenant (defaults to embedded in name) """ # The reason why tenant is 'False' instead of 'None': # None means explicitly not a tenant. False means check # the username for signs of being a tenant # If there is any sign of guessing on a user, all valid and # invalid attempts are equally slowed to no more than 20 per second # for that particular user. # similarly, guessing usernames is throttled to 20/sec user, tenant = _get_usertenant(name, tenant) while (user, tenant) in _passchecking: # Want to serialize passphrase checking activity # by a user, which might be malicious # would normally make an event and wait # but here there's no need for that eventlet.sleep(0.5) cfm = configmanager.ConfigManager(tenant, username=user) ucfg = cfm.get_user(user) if ucfg is None: try: for group in userutil.grouplist(user): ucfg = cfm.get_usergroup(group) if ucfg: break except KeyError: pass if ucfg is None: eventlet.sleep(0.05) return None if (user, tenant) in _passcache: if hashlib.sha256(passphrase).digest() == _passcache[(user, tenant)]: return authorize(user, element, tenant, operation=operation) else: # In case of someone trying to guess, # while someone is legitimately logged in # invalidate cache and force the slower check del _passcache[(user, tenant)] if 'cryptpass' in ucfg: _passchecking[(user, tenant)] = True # TODO(jbjohnso): WORKERPOOL # PBKDF2 is, by design, cpu intensive # throw it at the worker pool when implemented # maybe a distinct worker pool, wondering about starving out non-auth stuff salt, crypt = ucfg['cryptpass'] # execute inside tpool to get greenthreads to give it a special thread # world # TODO(jbjohnso): util function to generically offload a call # such a beast could be passed into pyghmi as a way for pyghmi to # magically get offload of the crypto functions without having # to explicitly get into the eventlet tpool game global authworkers global authcleaner if authworkers is None: authworkers = multiprocessing.Pool(processes=1) else: authcleaner.cancel() authcleaner = eventlet.spawn_after(30, _clean_authworkers) crypted = eventlet.tpool.execute(_do_pbkdf, passphrase, salt) del _passchecking[(user, tenant)] eventlet.sleep( 0.05) # either way, we want to stall so that client can't # determine failure because there is a delay, valid response will # delay as well if crypt == crypted: _passcache[(user, tenant)] = hashlib.sha256(passphrase).digest() return authorize(user, element, tenant, operation) if pam: pwe = None try: pwe = pwd.getpwnam(user) except KeyError: #pam won't work if the user doesn't exist, don't go further eventlet.sleep( 0.05) # stall even on test for existence of a username return None if os.getuid() != 0: # confluent is running with reduced privilege, however, pam_unix refuses # to let a non-0 user check anothers password. # We will fork and the child will assume elevated privilege to # get unix_chkpwd helper to enable checking /etc/shadow pid = os.fork() if not pid: usergood = False try: # we change to the uid we are trying to authenticate as, because # pam_unix uses unix_chkpwd which reque os.setuid(pwe.pw_uid) usergood = pam.authenticate(user, passphrase, service=_pamservice) finally: os._exit(0 if usergood else 1) usergood = os.waitpid(pid, 0)[1] == 0 else: # We are running as root, we don't need to fork in order to authenticate the # user usergood = pam.authenticate(user, passphrase, service=_pamservice) if usergood: _passcache[(user, tenant)] = hashlib.sha256(passphrase).digest() return authorize(user, element, tenant, operation, skipuserobj=False) eventlet.sleep(0.05) # stall even on test for existence of a username return None
def connect_to_leader(cert=None, name=None, leader=None): global currentleader global cfginitlock global follower if cfginitlock is None: cfginitlock = threading.RLock() if leader is None: leader = currentleader try: remote = connect_to_collective(cert, leader) except socket.error: return False with connecting: with cfginitlock: tlvdata.recv(remote) # the banner tlvdata.recv(remote) # authpassed... 0.. if name is None: name = get_myname() tlvdata.send( remote, { 'collective': { 'operation': 'connect', 'name': name, 'txcount': cfm._txcount } }) keydata = tlvdata.recv(remote) if not keydata: return False if 'error' in keydata: if 'backoff' in keydata: eventlet.spawn_after(random.random(), connect_to_leader, cert, name, leader) return True if 'leader' in keydata: ldrc = cfm.get_collective_member_by_address( keydata['leader']) if ldrc and ldrc['name'] == name: raise Exception("Redirected to self") return connect_to_leader(name=name, leader=keydata['leader']) if 'txcount' in keydata: return become_leader(remote) print(keydata['error']) return False if follower is not None: follower.kill() cfm.stop_following() follower = None colldata = tlvdata.recv(remote) globaldata = tlvdata.recv(remote) dbi = tlvdata.recv(remote) dbsize = dbi['dbsize'] dbjson = '' while (len(dbjson) < dbsize): ndata = remote.recv(dbsize - len(dbjson)) if not ndata: try: remote.close() except Exception: pass raise Exception("Error doing initial DB transfer") dbjson += ndata cfm.clear_configuration() try: cfm._restore_keys(keydata, None, sync=False) for c in colldata: cfm._true_add_collective_member(c, colldata[c]['address'], colldata[c]['fingerprint'], sync=False) for globvar in globaldata: cfm.set_global(globvar, globaldata[globvar], False) cfm._txcount = dbi.get('txcount', 0) cfm.ConfigManager(tenant=None)._load_from_json(dbjson, sync=False) cfm.commit_clear() except Exception: cfm.stop_following() cfm.rollback_clear() raise currentleader = leader #spawn this as a thread... follower = eventlet.spawn(follow_leader, remote) return True
def sessionhdl(connection, authname, skipauth=False, cert=None): # For now, trying to test the console stuff, so let's just do n4. authenticated = False authdata = None cfm = None if skipauth: authenticated = True cfm = configmanager.ConfigManager(tenant=None, username=authname) elif authname: authdata = auth.authorize(authname, element=None) if authdata is not None: cfm = authdata[1] authenticated = True send_data(connection, "Confluent -- v0 --") while not authenticated: # prompt for name and passphrase send_data(connection, {'authpassed': 0}) response = tlvdata.recv(connection) if 'collective' in response: return collective.handle_connection(connection, cert, response['collective']) if 'dispatch' in response: dreq = tlvdata.recvall(connection, response['dispatch']['length']) return pluginapi.handle_dispatch(connection, cert, dreq, response['dispatch']['name']) if 'proxyconsole' in response: return start_proxy_term(connection, cert, response['proxyconsole']) authname = response['username'] passphrase = response['password'] # note(jbjohnso): here, we need to authenticate, but not # authorize a user. When authorization starts understanding # element path, that authorization will need to be called # per request the user makes authdata = auth.check_user_passphrase(authname, passphrase) if authdata is None: auditlog.log({ 'operation': 'connect', 'user': authname, 'allowed': False }) else: authenticated = True cfm = authdata[1] send_data(connection, {'authpassed': 1}) request = tlvdata.recv(connection) if 'collective' in request and skipauth: if not libssl: tlvdata.send( connection, { 'collective': { 'error': 'Server either does not have ' 'python-pyopenssl installed or has an ' 'incorrect version installed ' '(e.g. pyOpenSSL would need to be ' 'replaced with python-pyopenssl)' } }) return return collective.handle_connection(connection, None, request['collective'], local=True) while request is not None: try: process_request(connection, request, cfm, authdata, authname, skipauth) except exc.ConfluentException as e: if ((not isinstance(e, exc.LockedCredentials)) and e.apierrorcode == 500): tracelog.log(traceback.format_exc(), ltype=log.DataTypes.event, event=log.Events.stacktrace) send_data( connection, { 'errorcode': e.apierrorcode, 'error': e.apierrorstr, 'detail': e.get_error_body() }) send_data(connection, {'_requestdone': 1}) except SystemExit: sys.exit(0) except: tracelog.log(traceback.format_exc(), ltype=log.DataTypes.event, event=log.Events.stacktrace) send_data(connection, { 'errorcode': 500, 'error': 'Unexpected error' }) send_data(connection, {'_requestdone': 1}) request = tlvdata.recv(connection)
def detected(info): global rechecker global rechecktime # later, manual and CMM discovery may act on SN and/or UUID for service in info['services']: if service in nodehandlers: if service not in known_services: known_services[service] = set([]) handler = nodehandlers[service] info['handler'] = handler break else: # no nodehandler, ignore for now return try: snum = info['attributes']['enclosure-serial-number'][0].strip() if snum: info['serialnumber'] = snum known_serials[info['serialnumber']] = info except (KeyError, IndexError): pass try: info['modelnumber'] = info['attributes'][ 'enclosure-machinetype-model'][0] known_services[service].add(info['modelnumber']) except (KeyError, IndexError): pass if info['hwaddr'] in known_info and 'addresses' in info: # we should tee these up for parsing when an enclosure comes up # also when switch config parameters change, should discard # and there's also if wiring is fixed... # of course could periodically revisit known_nodes # replace potentially stale address info #TODO(jjohnson2): remove this # temporary workaround for XCC not doing SLP DA over dedicated port # bz 93219, fix submitted, but not in builds yet # strictly speaking, going ipv4 only legitimately is mistreated here, # but that should be an edge case oldaddr = known_info[info['hwaddr']].get('addresses', []) for addr in info['addresses']: if addr[0].startswith('fe80::'): break else: for addr in oldaddr: if addr[0].startswith('fe80::'): info['addresses'].append(addr) if known_info[info['hwaddr']].get('addresses', []) == info['addresses']: # if the ip addresses match, then assume no changes # now something resetting to defaults could, in theory # have the same address, but need to be reset # in that case, however, a user can clear pubkeys to force a check return known_info[info['hwaddr']] = info cfg = cfm.ConfigManager(None) if handler: handler = handler.NodeHandler(info, cfg) handler.scan() uuid = info.get('uuid', None) if uuid_is_valid(uuid): known_uuids[uuid][info['hwaddr']] = info if handler and handler.https_supported and not handler.https_cert: if handler.cert_fail_reason == 'unreachable': log.log({ 'info': '{0} with hwaddr {1} is not reachable by https ' 'at address {2}'.format(handler.devname, info['hwaddr'], handler.ipaddr) }) info['addresses'] = [ x for x in info.get('addresses', []) if x != handler.ipaddr ] return log.log({ 'info': '{0} with hwaddr {1} at address {2} is not yet running ' 'https, will examine later'.format(handler.devname, info['hwaddr'], handler.ipaddr) }) if rechecker is not None and rechecktime > util.monotonic_time() + 300: rechecker.cancel() if rechecker is None or rechecker.dead: rechecktime = util.monotonic_time() + 300 rechecker = eventlet.spawn_after(300, _periodic_recheck, cfg) unknown_info[info['hwaddr']] = info info['discostatus'] = 'unidentfied' #TODO, eventlet spawn after to recheck sooner, or somehow else # influence periodic recheck to shorten delay? return nodename, info['maccount'] = get_nodename(cfg, handler, info) if nodename and handler and handler.https_supported: dp = cfg.get_node_attributes([nodename], ('pubkeys.tls_hardwaremanager', )) lastfp = dp.get(nodename, {}).get('pubkeys.tls_hardwaremanager', {}).get('value', None) if util.cert_matches(lastfp, handler.https_cert): info['nodename'] = nodename known_nodes[nodename][info['hwaddr']] = info info['discostatus'] = 'discovered' return # already known, no need for more #TODO(jjohnson2): We might have to get UUID for certain searches... #for now defer probe until inside eval_node. We might not have #a nodename without probe in the future. if nodename and handler: eval_node(cfg, handler, info, nodename) elif handler: log.log({ 'info': 'Detected unknown {0} with hwaddr {1} at ' 'address {2}'.format(handler.devname, info['hwaddr'], handler.ipaddr) }) info['discostatus'] = 'unidentified' unknown_info[info['hwaddr']] = info
'/'.join(pathcomponents))) def dump_macinfo(macaddr): macaddr = macaddr.replace('-', ':') info = _macmap.get(macaddr, None) if info is None: raise exc.NotFoundException('{0} not found in mac table of ' 'any known switches'.format(macaddr)) return _dump_locations(info, macaddr, _nodesbymac.get(macaddr, None)) def rescan(cfg): for _ in update_macmap(cfg): pass if __name__ == '__main__': cg = cfm.ConfigManager(None) for res in update_macmap(cg): print("map has updated") if len(sys.argv) > 1: print(repr(_macmap[sys.argv[1]])) print(repr(_nodesbymac[sys.argv[1]])) else: print("Mac to Node lookup table: -------------------") print(repr(_nodesbymac)) print("Mac to location lookup table: -------------------") print(repr(_macmap)) print("switch to fdb lookup table: -------------------") print(repr(_macsbyswitch))
def sessionhdl(connection, authname, skipauth=False): # For now, trying to test the console stuff, so let's just do n4. authenticated = False authdata = None cfm = None if skipauth: authenticated = True cfm = configmanager.ConfigManager(tenant=None, username=authname) elif authname: authdata = auth.authorize(authname, element=None) if authdata is not None: cfm = authdata[1] authenticated = True send_data(connection, "Confluent -- v0 --") while not authenticated: # prompt for name and passphrase send_data(connection, {'authpassed': 0}) response = tlvdata.recv(connection) authname = response['username'] passphrase = response['password'] # note(jbjohnso): here, we need to authenticate, but not # authorize a user. When authorization starts understanding # element path, that authorization will need to be called # per request the user makes authdata = auth.check_user_passphrase(authname, passphrase) if authdata is None: auditlog.log({ 'operation': 'connect', 'user': authname, 'allowed': False }) else: authenticated = True cfm = authdata[1] send_data(connection, {'authpassed': 1}) request = tlvdata.recv(connection) while request is not None: try: process_request(connection, request, cfm, authdata, authname, skipauth) except exc.ForbiddenRequest: send_data(connection, {'errorcode': 403, 'error': 'Forbidden'}) send_data(connection, {'_requestdone': 1}) except exc.TargetEndpointBadCredentials: send_data(connection, { 'errorcode': 502, 'error': 'Bad Credentials' }) send_data(connection, {'_requestdone': 1}) except exc.TargetEndpointUnreachable as tu: send_data(connection, { 'errorcode': 504, 'error': 'Unreachable Target - ' + str(tu) }) send_data(connection, {'_requestdone': 1}) except exc.NotImplementedException: send_data(connection, { 'errorcode': 501, 'error': 'Not Implemented' }) send_data(connection, {'_requestdone': 1}) except exc.NotFoundException as nfe: send_data(connection, {'errorcode': 404, 'error': str(nfe)}) send_data(connection, {'_requestdone': 1}) except exc.InvalidArgumentException as iae: send_data(connection, { 'errorcode': 400, 'error': 'Bad Request - ' + str(iae) }) send_data(connection, {'_requestdone': 1}) except exc.LockedCredentials as lockedcred: send_data(connection, { 'errorcode': 500, 'error': 'Locked Credential Store' }) send_data(connection, {'_requestdone': 1}) except exc.ConfluentException as e: if e.apierrorcode == 500: tracelog.log(traceback.format_exc(), ltype=log.DataTypes.event, event=log.Events.stacktrace) send_data( connection, { 'errorcode': e.apierrorcode, 'error': e.apierrorstr, 'detail': e.get_error_body() }) send_data(connection, {'_requestdone': 1}) except SystemExit: sys.exit(0) except: tracelog.log(traceback.format_exc(), ltype=log.DataTypes.event, event=log.Events.stacktrace) send_data(connection, { 'errorcode': 500, 'error': 'Unexpected error' }) send_data(connection, {'_requestdone': 1}) request = tlvdata.recv(connection)