Esempio n. 1
0
def become_leader(connection):
    global currentleader
    global follower
    global retrythread
    log.log({
        'info': 'Becoming leader of collective',
        'subsystem': 'collective'
    })
    if follower:
        follower.kill()
        cfm.stop_following()
        follower = None
    if retrythread:
        retrythread.cancel()
        retrythread = None
    currentleader = connection.getsockname()[0]
    skipaddr = connection.getpeername()[0]
    myname = get_myname()
    skipem = set(cfm.cfgstreams)
    skipem.add(currentleader)
    skipem.add(skipaddr)
    for member in cfm.list_collective():
        dronecandidate = cfm.get_collective_member(member)['address']
        if dronecandidate in skipem or member == myname:
            continue
        eventlet.spawn_n(try_assimilate, dronecandidate)
    schedule_rebalance()
Esempio n. 2
0
def get_cluster_list(nodename=None, cfg=None):
    if cfg is None:
        cfg = configmanager.ConfigManager(None)
    nodes = None
    if nodename is not None:
        sshpeers = cfg.get_node_attributes(nodename, 'ssh.trustnodes')
        sshpeers = sshpeers.get(nodename, {}).get('ssh.trustnodes',
                                                  {}).get('value', None)
        if sshpeers:
            nodes = noderange.NodeRange(sshpeers, cfg).nodes
    autonodes = False
    if nodes is None:
        autonodes = True
        nodes = set(cfg.list_nodes())
    domain = None
    for node in list(util.natural_sort(nodes)):
        if domain is None:
            domaininfo = cfg.get_node_attributes(node, 'dns.domain')
            domain = domaininfo.get(node, {}).get('dns.domain',
                                                  {}).get('value', None)
        for extraname in get_extra_names(node, cfg):
            nodes.add(extraname)
    if autonodes:
        for mgr in configmanager.list_collective():
            nodes.add(mgr)
            if domain and domain not in mgr:
                nodes.add('{0}.{1}'.format(mgr, domain))
        myname = collective.get_myname()
        nodes.add(myname)
        if domain and domain not in myname:
            nodes.add('{0}.{1}'.format(myname, domain))
    return nodes, domain
Esempio n. 3
0
def start_collective():
    global follower
    global retrythread
    if follower:
        follower.kill()
        cfm.stop_following()
        follower = None
    try:
        if cfm.cfgstreams:
            cfm.check_quorum()
            # Do not start if we have quorum and are leader
            return
    except exc.DegradedCollective:
        pass
    if leader_init.active:  # do not start trying to connect if we are
        # xmitting data to a follower
        return
    myname = get_myname()
    for member in sorted(list(cfm.list_collective())):
        if member == myname:
            continue
        if cfm.cfgleader is None:
            cfm.stop_following(True)
        ldrcandidate = cfm.get_collective_member(member)['address']
        log.log({'info': 'Performing startup attempt to {0}'.format(
            ldrcandidate), 'subsystem': 'collective'})
        if connect_to_leader(name=myname, leader=ldrcandidate):
            break
    else:
        retrythread = eventlet.spawn_after(30 + random.random(),
                                           start_collective)
Esempio n. 4
0
def startup():
    global cfginitlock
    members = list(cfm.list_collective())
    if len(members) < 2:
        # Not in collective mode, return
        return
    if cfginitlock is None:
        cfginitlock = threading.RLock()
    eventlet.spawn_n(start_collective)
Esempio n. 5
0
def populate_collinfo(collinfo):
    iam = get_myname()
    collinfo['leader'] = iam
    collinfo['active'] = list(cfm.cfgstreams)
    activemembers = set(cfm.cfgstreams)
    activemembers.add(iam)
    collinfo['offline'] = []
    for member in cfm.list_collective():
        if member not in activemembers:
            collinfo['offline'].append(member)
Esempio n. 6
0
def start_collective():
    global follower
    global retrythread
    global initting
    initting = True
    retrythread = None
    try:
        cfm.membership_callback = schedule_rebalance
        if follower is not None:
            initting = False
            return
        try:
            if cfm.cfgstreams:
                cfm.check_quorum()
                # Do not start if we have quorum and are leader
                return
        except exc.DegradedCollective:
            pass
        if leader_init.active:  # do not start trying to connect if we are
            # xmitting data to a follower
            return
        myname = get_myname()
        connecto = []
        for member in sorted(list(cfm.list_collective())):
            if member == myname:
                continue
            if cfm.cfgleader is None:
                cfm.stop_following(True)
            ldrcandidate = cfm.get_collective_member(member)['address']
            connecto.append(ldrcandidate)
        conpool = greenpool.GreenPool(64)
        connections = conpool.imap(create_connection, connecto)
        for ent in connections:
            member, remote = ent
            if isinstance(remote, Exception):
                continue
            if follower is None:
                log.log({
                    'info':
                    'Performing startup attempt to {0}'.format(member),
                    'subsystem':
                    'collective'
                })
                if not connect_to_leader(
                        name=myname, leader=member, remote=remote):
                    remote.close()
            else:
                remote.close()
    except Exception as e:
        pass
    finally:
        if retrythread is None and follower is None:
            retrythread = eventlet.spawn_after(5 + random.random(),
                                               start_collective)
        initting = False
Esempio n. 7
0
def become_leader(connection):
    global currentleader
    global follower
    if follower:
        follower.kill()
        follower = None
    currentleader = connection.getsockname()[0]
    skipaddr = connection.getpeername()[0]
    myname = get_myname()
    for member in cfm.list_collective():
        dronecandidate = cfm.get_collective_member(member)['address']
        if dronecandidate in (currentleader, skipaddr) or member == myname:
            continue
        eventlet.spawn_n(try_assimilate, dronecandidate)
Esempio n. 8
0
 def check_collective(self, attrvalue):
     myc = attrvalue.get(self.node, {}).get('collective.manager', {}).get(
         'value', None)
     if configmodule.list_collective() and not myc:
         self._is_local = False
         self._detach()
         self._disconnect()
     if myc and myc != collective.get_myname():
         # Do not do console connect for nodes managed by another
         # confluent collective member
         self._is_local = False
         self._detach()
         self._disconnect()
     else:
         self._is_local = True
Esempio n. 9
0
def start_collective():
    global follower
    global retrythread
    if follower:
        follower.kill()
        follower = None
    if leader_init.active:  # do not start trying to connect if we are
        # xmitting data to a follower
        return
    myname = get_myname()
    for member in sorted(list(cfm.list_collective())):
        if member == myname:
            continue
        if cfm.cfgleader is None:
            cfm.stop_following(True)
        ldrcandidate = cfm.get_collective_member(member)['address']
        if connect_to_leader(name=myname, leader=ldrcandidate):
            break
    else:
        retrythread = eventlet.spawn_after(30 + random.random(),
                                           start_collective)
Esempio n. 10
0
def _assimilate_missing(skipaddr=None):
    connecto = []
    myname = get_myname()
    skipem = set(cfm.cfgstreams)
    numfollowers = len(skipem)
    skipem.add(currentleader)
    if skipaddr is not None:
        skipem.add(skipaddr)
    for member in cfm.list_collective():
        dronecandidate = cfm.get_collective_member(member)['address']
        if dronecandidate in skipem or member == myname or member in skipem:
            continue
        connecto.append(dronecandidate)
    if not connecto:
        return True
    conpool = greenpool.GreenPool(64)
    connections = conpool.imap(create_connection, connecto)
    for ent in connections:
        member, remote = ent
        if isinstance(remote, Exception):
            continue
        if not try_assimilate(member, numfollowers, remote):
            return False
    return True
Esempio n. 11
0
def startup():
    members = list(cfm.list_collective())
    if len(members) < 2:
        # Not in collective mode, return
        return
    eventlet.spawn_n(start_collective)
Esempio n. 12
0
def handle_connection(connection, cert, request, local=False):
    global currentleader
    global retrythread
    operation = request['operation']
    if cert:
        cert = crypto.dump_certificate(crypto.FILETYPE_ASN1, cert)
    else:
        if not local:
            return
        if operation in ('show', 'delete'):
            if not list(cfm.list_collective()):
                tlvdata.send(
                    connection, {
                        'collective': {
                            'error':
                            'Collective mode not '
                            'enabled on this '
                            'system'
                        }
                    })
                return
            if follower:
                linfo = cfm.get_collective_member_by_address(currentleader)
                remote = socket.create_connection((currentleader, 13001))
                remote = ssl.wrap_socket(remote,
                                         cert_reqs=ssl.CERT_NONE,
                                         keyfile='/etc/confluent/privkey.pem',
                                         certfile='/etc/confluent/srvcert.pem')
                cert = remote.getpeercert(binary_form=True)
                if not (linfo
                        and util.cert_matches(linfo['fingerprint'], cert)):
                    remote.close()
                    tlvdata.send(connection, {
                        'error':
                        'Invalid certificate, '
                        'redo invitation process'
                    })
                    connection.close()
                    return
                tlvdata.recv(remote)  # ignore banner
                tlvdata.recv(remote)  # ignore authpassed: 0
                tlvdata.send(remote, {
                    'collective': {
                        'operation': 'getinfo',
                        'name': get_myname()
                    }
                })
                collinfo = tlvdata.recv(remote)
            else:
                collinfo = {}
                populate_collinfo(collinfo)
            try:
                cfm.check_quorum()
                collinfo['quorum'] = True
            except exc.DegradedCollective:
                collinfo['quorum'] = False
            if operation == 'show':
                tlvdata.send(connection, {'collective': collinfo})
            elif operation == 'delete':
                todelete = request['member']
                if (todelete == collinfo['leader']
                        or todelete in collinfo['active']):
                    tlvdata.send(
                        connection, {
                            'collective': {
                                'error':
                                '{0} is still active, stop the confluent service to remove it'
                                .format(todelete)
                            }
                        })
                    return
                if todelete not in collinfo['offline']:
                    tlvdata.send(
                        connection, {
                            'collective': {
                                'error':
                                '{0} is not a recognized collective member'.
                                format(todelete)
                            }
                        })
                    return
                cfm.del_collective_member(todelete)
                tlvdata.send(
                    connection, {
                        'collective': {
                            'status':
                            'Successfully deleted {0}'.format(todelete)
                        }
                    })
                connection.close()
            return
        if 'invite' == operation:
            try:
                cfm.check_quorum()
            except exc.DegradedCollective:
                tlvdata.send(connection, {
                    'collective': {
                        'error': 'Collective does not have quorum'
                    }
                })
                return
            #TODO(jjohnson2): Cannot do the invitation if not the head node, the certificate hand-carrying
            #can't work in such a case.
            name = request['name']
            invitation = invites.create_server_invitation(name)
            tlvdata.send(connection,
                         {'collective': {
                             'invitation': invitation
                         }})
            connection.close()
        if 'join' == operation:
            invitation = request['invitation']
            try:
                invitation = base64.b64decode(invitation)
                name, invitation = invitation.split(b'@', 1)
                name = util.stringify(name)
            except Exception:
                tlvdata.send(
                    connection,
                    {'collective': {
                        'status': 'Invalid token format'
                    }})
                connection.close()
                return
            host = request['server']
            try:
                remote = socket.create_connection((host, 13001))
                # This isn't what it looks like.  We do CERT_NONE to disable
                # openssl verification, but then use the invitation as a
                # shared secret to validate the certs as part of the join
                # operation
                remote = ssl.wrap_socket(remote,
                                         cert_reqs=ssl.CERT_NONE,
                                         keyfile='/etc/confluent/privkey.pem',
                                         certfile='/etc/confluent/srvcert.pem')
            except Exception:
                tlvdata.send(
                    connection, {
                        'collective': {
                            'status': 'Failed to connect to {0}'.format(host)
                        }
                    })
                connection.close()
                return
            mycert = util.get_certificate_from_file(
                '/etc/confluent/srvcert.pem')
            cert = remote.getpeercert(binary_form=True)
            proof = base64.b64encode(
                invites.create_client_proof(invitation, mycert, cert))
            tlvdata.recv(remote)  # ignore banner
            tlvdata.recv(remote)  # ignore authpassed: 0
            tlvdata.send(remote, {
                'collective': {
                    'operation': 'enroll',
                    'name': name,
                    'hmac': proof
                }
            })
            rsp = tlvdata.recv(remote)
            if 'error' in rsp:
                tlvdata.send(connection,
                             {'collective': {
                                 'status': rsp['error']
                             }})
                connection.close()
                return
            proof = rsp['collective']['approval']
            proof = base64.b64decode(proof)
            j = invites.check_server_proof(invitation, mycert, cert, proof)
            if not j:
                remote.close()
                tlvdata.send(connection,
                             {'collective': {
                                 'status': 'Bad server token'
                             }})
                connection.close()
                return
            tlvdata.send(connection, {'collective': {'status': 'Success'}})
            connection.close()
            currentleader = rsp['collective']['leader']
            f = open('/etc/confluent/cfg/myname', 'w')
            f.write(name)
            f.close()
            log.log({
                'info': 'Connecting to collective due to join',
                'subsystem': 'collective'
            })
            eventlet.spawn_n(connect_to_leader,
                             rsp['collective']['fingerprint'], name)
    if 'enroll' == operation:
        #TODO(jjohnson2): error appropriately when asked to enroll, but the master is elsewhere
        mycert = util.get_certificate_from_file('/etc/confluent/srvcert.pem')
        proof = base64.b64decode(request['hmac'])
        myrsp = invites.check_client_proof(request['name'], mycert, cert,
                                           proof)
        if not myrsp:
            tlvdata.send(connection, {'error': 'Invalid token'})
            connection.close()
            return
        myrsp = base64.b64encode(myrsp)
        fprint = util.get_fingerprint(cert)
        myfprint = util.get_fingerprint(mycert)
        cfm.add_collective_member(get_myname(),
                                  connection.getsockname()[0], myfprint)
        cfm.add_collective_member(request['name'],
                                  connection.getpeername()[0], fprint)
        myleader = get_leader(connection)
        ldrfprint = cfm.get_collective_member_by_address(
            myleader)['fingerprint']
        tlvdata.send(
            connection, {
                'collective': {
                    'approval': myrsp,
                    'fingerprint': ldrfprint,
                    'leader': get_leader(connection)
                }
            })
    if 'assimilate' == operation:
        drone = request['name']
        droneinfo = cfm.get_collective_member(drone)
        if not droneinfo:
            tlvdata.send(
                connection,
                {'error': 'Unrecognized leader, '
                 'redo invitation process'})
            return
        if not util.cert_matches(droneinfo['fingerprint'], cert):
            tlvdata.send(
                connection,
                {'error': 'Invalid certificate, '
                 'redo invitation process'})
            return
        if request['txcount'] < cfm._txcount:
            tlvdata.send(
                connection, {
                    'error': 'Refusing to be assimilated by inferior'
                    'transaction count',
                    'txcount': cfm._txcount,
                })
            return
        if connecting.active:
            # don't try to connect while actively already trying to connect
            tlvdata.send(connection, {'status': 0})
            connection.close()
            return
        if (currentleader == connection.getpeername()[0] and follower
                and not follower.dead):
            # if we are happily following this leader already, don't stir
            # the pot
            tlvdata.send(connection, {'status': 0})
            connection.close()
            return
        log.log({
            'info': 'Connecting in response to assimilation',
            'subsystem': 'collective'
        })
        eventlet.spawn_n(connect_to_leader,
                         None,
                         None,
                         leader=connection.getpeername()[0])
        tlvdata.send(connection, {'status': 0})
        connection.close()
    if 'getinfo' == operation:
        drone = request['name']
        droneinfo = cfm.get_collective_member(drone)
        if not (droneinfo
                and util.cert_matches(droneinfo['fingerprint'], cert)):
            tlvdata.send(
                connection,
                {'error': 'Invalid certificate, '
                 'redo invitation process'})
            connection.close()
            return
        collinfo = {}
        populate_collinfo(collinfo)
        tlvdata.send(connection, collinfo)
    if 'connect' == operation:
        drone = request['name']
        droneinfo = cfm.get_collective_member(drone)
        if not (droneinfo
                and util.cert_matches(droneinfo['fingerprint'], cert)):
            tlvdata.send(
                connection,
                {'error': 'Invalid certificate, '
                 'redo invitation process'})
            connection.close()
            return
        myself = connection.getsockname()[0]
        if connecting.active:
            tlvdata.send(connection, {
                'error': 'Connecting right now',
                'backoff': True
            })
            connection.close()
            return
        if myself != get_leader(connection):
            tlvdata.send(
                connection, {
                    'error': 'Cannot assimilate, our leader is '
                    'in another castle',
                    'leader': currentleader
                })
            connection.close()
            return
        if request['txcount'] > cfm._txcount:
            retire_as_leader()
            tlvdata.send(
                connection, {
                    'error': 'Client has higher tranasaction count, '
                    'should assimilate me, connecting..',
                    'txcount': cfm._txcount
                })
            log.log({
                'info': 'Connecting to leader due to superior '
                'transaction count',
                'subsystem': collective
            })
            eventlet.spawn_n(connect_to_leader, None, None,
                             connection.getpeername()[0])
            connection.close()
            return
        if retrythread:
            retrythread.cancel()
            retrythread = None
        with leader_init:
            cfm.update_collective_address(request['name'],
                                          connection.getpeername()[0])
            tlvdata.send(connection, cfm._dump_keys(None, False))
            tlvdata.send(connection, cfm._cfgstore['collective'])
            tlvdata.send(connection, {})  # cfm.get_globals())
            cfgdata = cfm.ConfigManager(None)._dump_to_json()
            tlvdata.send(connection, {
                'txcount': cfm._txcount,
                'dbsize': len(cfgdata)
            })
            connection.sendall(cfgdata)
        #tlvdata.send(connection, {'tenants': 0}) # skip the tenants for now,
        # so far unused anyway
        if not cfm.relay_slaved_requests(drone, connection):
            if not retrythread:  # start a recovery if everyone else seems
                # to have disappeared
                retrythread = eventlet.spawn_after(30 + random.random(),
                                                   start_collective)
Esempio n. 13
0
def handle_node_request(configmanager,
                        inputdata,
                        operation,
                        pathcomponents,
                        autostrip=True):
    if log.logfull:
        raise exc.TargetResourceUnavailable(
            'Filesystem full, free up space and restart confluent service')
    iscollection = False
    routespec = None
    if pathcomponents[0] == 'noderange':
        if len(pathcomponents) > 3 and pathcomponents[2] == 'nodes':
            # transform into a normal looking node request
            # this does mean we don't see if it is a valid
            # child, but that's not a goal for the noderange
            # facility anyway
            isnoderange = False
            pathcomponents = pathcomponents[2:]
        elif len(pathcomponents) == 3 and pathcomponents[2] == 'abbreviate':
            return abbreviate_noderange(configmanager, inputdata, operation)
        else:
            isnoderange = True
    else:
        isnoderange = False
    try:
        nodeorrange = pathcomponents[1]
        if not isnoderange and not configmanager.is_node(nodeorrange):
            raise exc.NotFoundException("Invalid Node")
        if isnoderange and not (len(pathcomponents) == 3
                                and pathcomponents[2] == 'abbreviate'):
            try:
                nodes = noderange.NodeRange(nodeorrange, configmanager).nodes
            except Exception as e:
                raise exc.NotFoundException("Invalid Noderange: " + str(e))
        else:
            nodes = (nodeorrange, )
    except IndexError:  # doesn't actually have a long enough path
        # this is enumerating a list of nodes or just empty noderange
        if isnoderange and operation == "retrieve":
            return iterate_collections([])
        elif isnoderange and operation == "create":
            inputdata = msg.InputAttributes(pathcomponents, inputdata)
            return create_noderange(inputdata.attribs, configmanager)
        elif isnoderange or operation == "delete":
            raise exc.InvalidArgumentException()
        if operation == "create":
            inputdata = msg.InputAttributes(pathcomponents, inputdata)
            return create_node(inputdata.attribs, configmanager)
        allnodes = list(configmanager.list_nodes())
        try:
            allnodes.sort(key=noderange.humanify_nodename)
        except TypeError:
            allnodes.sort()
        return iterate_collections(allnodes)
    if (isnoderange and len(pathcomponents) == 3
            and pathcomponents[2] == 'nodes'):
        # this means that it's a list of relevant nodes
        nodes = list(nodes)
        try:
            nodes.sort(key=noderange.humanify_nodename)
        except TypeError:
            nodes.sort()
        return iterate_collections(nodes)
    if len(pathcomponents) == 2:
        iscollection = True
    else:
        try:
            routespec = nested_lookup(noderesources, pathcomponents[2:])
        except KeyError:
            raise exc.NotFoundException("Invalid element requested")
        if isinstance(routespec, dict):
            iscollection = True
        elif isinstance(routespec, PluginCollection):
            iscollection = False  # it is a collection, but plugin defined
        elif routespec is None:
            raise exc.InvalidArgumentException(
                'Custom interface required for resource')
    if iscollection:
        if operation == "delete":
            return delete_node_collection(pathcomponents, configmanager,
                                          isnoderange)
        elif operation == "retrieve":
            return enumerate_node_collection(pathcomponents, configmanager)
        else:
            raise Exception("TODO here")
    del pathcomponents[0:2]
    passvalues = queue.Queue()
    plugroute = routespec.routeinfo
    msginputdata = msg.get_input_message(pathcomponents, operation, inputdata,
                                         nodes, isnoderange, configmanager)
    if 'handler' in plugroute:  # fixed handler definition, easy enough
        if isinstance(plugroute['handler'], str):
            hfunc = getattr(pluginmap[plugroute['handler']], operation)
        else:
            hfunc = getattr(plugroute['handler'], operation)
        passvalue = hfunc(nodes=nodes,
                          element=pathcomponents,
                          configmanager=configmanager,
                          inputdata=msginputdata)
        if isnoderange:
            return passvalue
        elif isinstance(passvalue, console.Console):
            return [passvalue]
        else:
            return stripnode(passvalue, nodes[0])
    elif 'pluginattrs' in plugroute:
        nodeattr = configmanager.get_node_attributes(
            nodes, plugroute['pluginattrs'] + ['collective.manager'])
        plugpath = None
        nodesbymanager = {}
        nodesbyhandler = {}
        badcollnodes = []
        for node in nodes:
            for attrname in plugroute['pluginattrs']:
                if attrname in nodeattr[node]:
                    plugpath = nodeattr[node][attrname]['value']
                elif 'default' in plugroute:
                    plugpath = plugroute['default']
            if plugpath in dispatch_plugins:
                cfm.check_quorum()
                manager = nodeattr[node].get('collective.manager',
                                             {}).get('value', None)
                if manager:
                    if collective.get_myname() != manager:
                        if manager not in nodesbymanager:
                            nodesbymanager[manager] = set([node])
                        else:
                            nodesbymanager[manager].add(node)
                        continue
                elif list(cfm.list_collective()):
                    badcollnodes.append(node)
                    continue
            if plugpath:
                try:
                    hfunc = getattr(pluginmap[plugpath], operation)
                except KeyError:
                    nodesbyhandler[BadPlugin(node, plugpath).error] = [node]
                    continue
                if hfunc in nodesbyhandler:
                    nodesbyhandler[hfunc].append(node)
                else:
                    nodesbyhandler[hfunc] = [node]
        for bn in badcollnodes:
            nodesbyhandler[BadCollective(bn).error] = [bn]
        workers = greenpool.GreenPool()
        numworkers = 0
        for hfunc in nodesbyhandler:
            numworkers += 1
            workers.spawn(
                addtoqueue, passvalues, hfunc, {
                    'nodes': nodesbyhandler[hfunc],
                    'element': pathcomponents,
                    'configmanager': configmanager,
                    'inputdata': msginputdata
                })
        for manager in nodesbymanager:
            numworkers += 1
            workers.spawn(
                addtoqueue, passvalues, dispatch_request, {
                    'nodes': nodesbymanager[manager],
                    'manager': manager,
                    'element': pathcomponents,
                    'configmanager': configmanager,
                    'inputdata': inputdata,
                    'operation': operation,
                    'isnoderange': isnoderange
                })
        if isnoderange or not autostrip:
            return iterate_queue(numworkers, passvalues)
        else:
            if numworkers > 0:
                return iterate_queue(numworkers, passvalues, nodes[0])
            else:
                raise exc.NotImplementedException()
Esempio n. 14
0
def run(args):
    setlimits()
    try:
        signal.signal(signal.SIGUSR1, dumptrace)
    except AttributeError:
        pass  # silly windows
    if havefcntl:
        _checkpidfile()
    conf.init_config()
    try:
        config = conf.get_config()
        _initsecurity(config)
    except:
        sys.stderr.write("Error unlocking credential store\n")
        doexit()
        sys.exit(1)
    try:
        confluentcore.load_plugins()
    except:
        doexit()
        raise
    try:
        log.log({'info': 'Confluent management service starting'}, flush=True)
    except (OSError, IOError) as e:
        print(repr(e))
        sys.exit(1)
    if '-f' not in args:
        _daemonize()
    if '-o' not in args:
        _redirectoutput()
    if havefcntl:
        _updatepidfile()
    signal.signal(signal.SIGINT, terminate)
    signal.signal(signal.SIGTERM, terminate)
    atexit.register(doexit)
    if dbgif:
        oumask = os.umask(0o077)
        try:
            os.remove('/var/run/confluent/dbg.sock')
        except OSError:
            pass  # We are not expecting the file to exist
        try:
            dbgsock = eventlet.listen("/var/run/confluent/dbg.sock",
                                      family=socket.AF_UNIX)
            eventlet.spawn_n(backdoor.backdoor_server, dbgsock)
        except AttributeError:
            pass  # Windows...
        os.umask(oumask)
    collective.startup()
    consoleserver.initialize()
    http_bind_host, http_bind_port = _get_connector_config('http')
    sock_bind_host, sock_bind_port = _get_connector_config('socket')
    try:
        sockservice = sockapi.SockApi(sock_bind_host, sock_bind_port)
        sockservice.start()
    except NameError:
        pass
    webservice = httpapi.HttpApi(http_bind_host, http_bind_port)
    webservice.start()
    while len(list(configmanager.list_collective())) >= 2:
        # If in a collective, stall automatic startup activity
        # until we establish quorum
        try:
            configmanager.check_quorum()
            break
        except Exception:
            eventlet.sleep(0.5)
    disco.start_detection()
    pxe.start_proxydhcp()
    eventlet.sleep(1)
    consoleserver.start_console_sessions()
    while 1:
        eventlet.sleep(100)
Esempio n. 15
0
def handle_request(env, start_response):
    global currtz
    global keymap
    global currlocale
    global currtzvintage
    configmanager.check_quorum()
    nodename = env.get('HTTP_CONFLUENT_NODENAME', None)
    apikey = env.get('HTTP_CONFLUENT_APIKEY', None)
    if not (nodename and apikey):
        start_response('401 Unauthorized', [])
        yield 'Unauthorized'
        return
    cfg = configmanager.ConfigManager(None)
    eak = cfg.get_node_attributes(nodename, 'crypted.selfapikey').get(
        nodename, {}).get('crypted.selfapikey', {}).get('hashvalue', None)
    if not eak:
        start_response('401 Unauthorized', [])
        yield 'Unauthorized'
        return
    salt = '$'.join(eak.split('$', 3)[:-1]) + '$'
    if crypt.crypt(apikey, salt) != eak:
        start_response('401 Unauthorized', [])
        yield 'Unauthorized'
        return
    retype = env.get('HTTP_ACCEPT', 'application/yaml')
    isgeneric = False
    if retype == '*/*':
        isgeneric = True
        retype = 'application/yaml'
    if retype == 'application/yaml':
        dumper = yamldump
    elif retype == 'application/json':
        dumper = json.dumps
    else:
        start_response('406 Not supported', [])
        yield 'Unsupported content type in ACCEPT: ' + retype
        return
    if env['REQUEST_METHOD'] not in (
            'HEAD', 'GET') and 'CONTENT_LENGTH' in env and int(
                env['CONTENT_LENGTH']) > 0:
        reqbody = env['wsgi.input'].read(int(env['CONTENT_LENGTH']))
    if env['PATH_INFO'] == '/self/deploycfg':
        if 'HTTP_CONFLUENT_MGTIFACE' in env:
            ncfg = netutil.get_nic_config(cfg,
                                          nodename,
                                          ifidx=env['HTTP_CONFLUENT_MGTIFACE'])
        else:
            myip = env.get('HTTP_X_FORWARDED_HOST', None)
            if ']' in myip:
                myip = myip.split(']', 1)[0]
            else:
                myip = myip.split(':', 1)[0]
            myip = myip.replace('[', '').replace(']', '')
            ncfg = netutil.get_nic_config(cfg, nodename, serverip=myip)
        if ncfg['prefix']:
            ncfg['ipv4_netmask'] = netutil.cidr_to_mask(ncfg['prefix'])
        deployinfo = cfg.get_node_attributes(
            nodename, ('deployment.*', 'console.method', 'crypted.*', 'dns.*'))
        deployinfo = deployinfo.get(nodename, {})
        profile = deployinfo.get('deployment.pendingprofile',
                                 {}).get('value', '')
        ncfg['encryptboot'] = deployinfo.get('deployment.encryptboot',
                                             {}).get('value', None)
        if ncfg['encryptboot'] in ('', 'none'):
            ncfg['encryptboot'] = None
        ncfg['profile'] = profile
        protocol = deployinfo.get('deployment.useinsecureprotocols',
                                  {}).get('value', 'never')
        ncfg['textconsole'] = bool(
            deployinfo.get('console.method', {}).get('value', None))
        if protocol == 'always':
            ncfg['protocol'] = 'http'
        else:
            ncfg['protocol'] = 'https'
        ncfg['rootpassword'] = deployinfo.get('crypted.rootpassword',
                                              {}).get('hashvalue', None)
        ncfg['grubpassword'] = deployinfo.get('crypted.grubpassword',
                                              {}).get('grubhashvalue', None)
        if currtzvintage and currtzvintage > (time.time() - 30.0):
            ncfg['timezone'] = currtz
        else:
            langinfo = subprocess.check_output(['localectl',
                                                'status']).split(b'\n')
            for line in langinfo:
                line = line.strip()
                if line.startswith(b'System Locale:'):
                    ccurrlocale = line.split(b'=')[-1]
                    if not ccurrlocale:
                        continue
                    if not isinstance(ccurrlocale, str):
                        ccurrlocale = ccurrlocale.decode('utf8')
                    if ccurrlocale == 'n/a':
                        continue
                    currlocale = ccurrlocale
                elif line.startswith(b'VC Keymap:'):
                    ckeymap = line.split(b':')[-1]
                    ckeymap = ckeymap.strip()
                    if not ckeymap:
                        continue
                    if not isinstance(ckeymap, str):
                        ckeymap = ckeymap.decode('utf8')
                    if ckeymap == 'n/a':
                        continue
                    keymap = ckeymap
            tdc = subprocess.check_output(['timedatectl']).split(b'\n')
            for ent in tdc:
                ent = ent.strip()
                if ent.startswith(b'Time zone:'):
                    currtz = ent.split(b': ', 1)[1].split(b'(', 1)[0].strip()
                    if not isinstance(currtz, str):
                        currtz = currtz.decode('utf8')
                    currtzvintage = time.time()
                    ncfg['timezone'] = currtz
                    break
        ncfg['locale'] = currlocale
        ncfg['keymap'] = keymap
        ncfg['nameservers'] = []
        for dns in deployinfo.get('dns.servers', {}).get('value',
                                                         '').split(','):
            ncfg['nameservers'].append(dns)
        dnsdomain = deployinfo.get('dns.domain', {}).get('value', None)
        ncfg['dnsdomain'] = dnsdomain
        start_response('200 OK', (('Content-Type', retype), ))
        yield dumper(ncfg)
    elif env['PATH_INFO'] == '/self/sshcert':
        if not sshutil.ca_exists():
            start_response('500 Unconfigured', ())
            yield 'CA is not configured on this system (run ...)'
            return
        dnsinfo = cfg.get_node_attributes(nodename, ('dns.*'))
        dnsinfo = dnsinfo.get(nodename, {}).get('dns.domain',
                                                {}).get('value', None)
        if dnsinfo in nodename:
            dnsinfo = ''
        cert = sshutil.sign_host_key(reqbody, nodename, [dnsinfo])
        start_response('200 OK', (('Content-Type', 'text/plain'), ))
        yield cert
    elif env['PATH_INFO'] == '/self/nodelist':
        nodes = set(cfg.list_nodes())
        domaininfo = cfg.get_node_attributes(nodes, 'dns.domain')
        for node in list(util.natural_sort(nodes)):
            domain = domaininfo.get(node, {}).get('dns.domain',
                                                  {}).get('value', None)
            if domain and domain not in node:
                nodes.add('{0}.{1}'.format(node, domain))
        for mgr in configmanager.list_collective():
            nodes.add(mgr)
            if domain and domain not in mgr:
                nodes.add('{0}.{1}'.format(mgr, domain))
        myname = collective.get_myname()
        nodes.add(myname)
        if domain and domain not in myname:
            nodes.add('{0}.{1}'.format(myname, domain))
        if isgeneric:
            start_response('200 OK', (('Content-Type', 'text/plain'), ))
            for node in util.natural_sort(nodes):
                yield node + '\n'
        else:
            start_response('200 OK', (('Content-Type', retype), ))
            yield dumper(sorted(nodes))
    elif env['PATH_INFO'] == '/self/updatestatus':
        update = yaml.safe_load(reqbody)
        if update['status'] == 'staged':
            targattr = 'deployment.stagedprofile'
        elif update['status'] == 'complete':
            targattr = 'deployment.profile'
        else:
            raise Exception('Unknown update status request')
        currattr = cfg.get_node_attributes(nodename,
                                           'deployment.*').get(nodename, {})
        pending = None
        if targattr == 'deployment.profile':
            pending = currattr.get('deployment.stagedprofile',
                                   {}).get('value', '')
        if not pending:
            pending = currattr.get('deployment.pendingprofile',
                                   {}).get('value', '')
        updates = {}
        if pending:
            updates['deployment.pendingprofile'] = {'value': ''}
            if targattr == 'deployment.profile':
                updates['deployment.stagedprofile'] = {'value': ''}
            currprof = currattr.get(targattr, {}).get('value', '')
            if currprof != pending:
                updates[targattr] = {'value': pending}
            cfg.set_node_attributes({nodename: updates})
            start_response('200 OK', (('Content-Type', 'text/plain'), ))
            yield 'OK'
        else:
            start_response('500 Error', (('Content-Type', 'text/plain'), ))
            yield 'No pending profile detected, unable to accept status update'
    else:
        start_response('404 Not Found', ())
        yield 'Not found'