Esempio n. 1
0
def connect_to_collective(cert, member, remote=None):
    if remote is None:
        _, remote = create_connection(member)
        if isinstance(remote, Exception):
            raise remote
    if cert:
        fprint = cert
    else:
        collent = cfm.get_collective_member_by_address(member)
        fprint = collent['fingerprint']
    if not util.cert_matches(fprint, remote.getpeercert(binary_form=True)):
        # probably Janeway up to something
        raise Exception("Certificate mismatch in the collective")
    return remote
Esempio n. 2
0
def connect_to_collective(cert, member):
    remote = socket.create_connection((member, 13001))
    # TLS cert validation is custom and will not pass normal CA vetting
    # to override completely in the right place requires enormous effort, so just defer until after connect
    remote = ssl.wrap_socket(remote, cert_reqs=ssl.CERT_NONE, keyfile='/etc/confluent/privkey.pem',
                             certfile='/etc/confluent/srvcert.pem')
    if cert:
        fprint = cert
    else:
        collent = cfm.get_collective_member_by_address(member)
        fprint = collent['fingerprint']
    if not util.cert_matches(fprint, remote.getpeercert(binary_form=True)):
        # probably Janeway up to something
        raise Exception("Certificate mismatch in the collective")
    return remote
Esempio n. 3
0
def connect_to_leader(cert=None, name=None, leader=None):
    global currentleader
    global follower
    if leader is None:
        leader = currentleader
    log.log({
        'info': 'Attempting connection to leader {0}'.format(leader),
        'subsystem': 'collective'
    })
    try:
        remote = connect_to_collective(cert, leader)
    except socket.error as e:
        log.log({
            'error':
            'Collective connection attempt to {0} failed: {1}'
            ''.format(leader, str(e)),
            'subsystem':
            'collective'
        })
        return False
    with connecting:
        with cfm._initlock:
            banner = tlvdata.recv(remote)  # the banner
            vers = banner.split()[2]
            if vers != b'v2':
                raise Exception(
                    'This instance only supports protocol 2, synchronize versions between collective members'
                )
            tlvdata.recv(remote)  # authpassed... 0..
            if name is None:
                name = get_myname()
            tlvdata.send(
                remote, {
                    'collective': {
                        'operation': 'connect',
                        'name': name,
                        'txcount': cfm._txcount
                    }
                })
            keydata = tlvdata.recv(remote)
            if not keydata:
                return False
            if 'error' in keydata:
                if 'backoff' in keydata:
                    log.log({
                        'info':
                        'Collective initialization in progress on '
                        '{0}'.format(leader),
                        'subsystem':
                        'collective'
                    })
                    return False
                if 'leader' in keydata:
                    log.log({
                        'info':
                        'Prospective leader {0} has redirected this '
                        'member to {1}'.format(leader, keydata['leader']),
                        'subsystem':
                        'collective'
                    })
                    ldrc = cfm.get_collective_member_by_address(
                        keydata['leader'])
                    if ldrc and ldrc['name'] == name:
                        raise Exception("Redirected to self")
                    return connect_to_leader(name=name,
                                             leader=keydata['leader'])
                if 'txcount' in keydata:
                    log.log({
                        'info':
                        'Prospective leader {0} has inferior '
                        'transaction count, becoming leader'
                        ''.format(leader),
                        'subsystem':
                        'collective',
                        'subsystem':
                        'collective'
                    })
                    return become_leader(remote)
                return False
                follower.kill()
                cfm.stop_following()
                follower = None
            if follower:
                follower.kill()
                cfm.stop_following()
                follower = None
            log.log({
                'info': 'Following leader {0}'.format(leader),
                'subsystem': 'collective'
            })
            colldata = tlvdata.recv(remote)
            # the protocol transmits global data, but for now we ignore it
            globaldata = tlvdata.recv(remote)
            dbi = tlvdata.recv(remote)
            dbsize = dbi['dbsize']
            dbjson = b''
            while (len(dbjson) < dbsize):
                ndata = remote.recv(dbsize - len(dbjson))
                if not ndata:
                    try:
                        remote.close()
                    except Exception:
                        pass
                    raise Exception("Error doing initial DB transfer")
                dbjson += ndata
            cfm.clear_configuration()
            try:
                cfm._restore_keys(keydata, None, sync=False)
                for c in colldata:
                    cfm._true_add_collective_member(c,
                                                    colldata[c]['address'],
                                                    colldata[c]['fingerprint'],
                                                    sync=False)
                #for globvar in globaldata:
                #    cfm.set_global(globvar, globaldata[globvar], False)
                cfm._txcount = dbi.get('txcount', 0)
                cfm.ConfigManager(tenant=None)._load_from_json(dbjson,
                                                               sync=False)
                cfm.commit_clear()
            except Exception:
                cfm.stop_following()
                cfm.rollback_clear()
                raise
            currentleader = leader
        #spawn this as a thread...
        follower = eventlet.spawn(follow_leader, remote, leader)
    return True
Esempio n. 4
0
def handle_connection(connection, cert, request, local=False):
    global currentleader
    global retrythread
    operation = request['operation']
    if cert:
        cert = crypto.dump_certificate(crypto.FILETYPE_ASN1, cert)
    else:
        if not local:
            return
        if operation in ('show', 'delete'):
            if not list(cfm.list_collective()):
                tlvdata.send(
                    connection, {
                        'collective': {
                            'error':
                            'Collective mode not '
                            'enabled on this '
                            'system'
                        }
                    })
                return
            if follower:
                linfo = cfm.get_collective_member_by_address(currentleader)
                remote = socket.create_connection((currentleader, 13001))
                remote = ssl.wrap_socket(remote,
                                         cert_reqs=ssl.CERT_NONE,
                                         keyfile='/etc/confluent/privkey.pem',
                                         certfile='/etc/confluent/srvcert.pem')
                cert = remote.getpeercert(binary_form=True)
                if not (linfo
                        and util.cert_matches(linfo['fingerprint'], cert)):
                    remote.close()
                    tlvdata.send(connection, {
                        'error':
                        'Invalid certificate, '
                        'redo invitation process'
                    })
                    connection.close()
                    return
                tlvdata.recv(remote)  # ignore banner
                tlvdata.recv(remote)  # ignore authpassed: 0
                tlvdata.send(remote, {
                    'collective': {
                        'operation': 'getinfo',
                        'name': get_myname()
                    }
                })
                collinfo = tlvdata.recv(remote)
            else:
                collinfo = {}
                populate_collinfo(collinfo)
            try:
                cfm.check_quorum()
                collinfo['quorum'] = True
            except exc.DegradedCollective:
                collinfo['quorum'] = False
            if operation == 'show':
                tlvdata.send(connection, {'collective': collinfo})
            elif operation == 'delete':
                todelete = request['member']
                if (todelete == collinfo['leader']
                        or todelete in collinfo['active']):
                    tlvdata.send(
                        connection, {
                            'collective': {
                                'error':
                                '{0} is still active, stop the confluent service to remove it'
                                .format(todelete)
                            }
                        })
                    return
                if todelete not in collinfo['offline']:
                    tlvdata.send(
                        connection, {
                            'collective': {
                                'error':
                                '{0} is not a recognized collective member'.
                                format(todelete)
                            }
                        })
                    return
                cfm.del_collective_member(todelete)
                tlvdata.send(
                    connection, {
                        'collective': {
                            'status':
                            'Successfully deleted {0}'.format(todelete)
                        }
                    })
                connection.close()
            return
        if 'invite' == operation:
            try:
                cfm.check_quorum()
            except exc.DegradedCollective:
                tlvdata.send(connection, {
                    'collective': {
                        'error': 'Collective does not have quorum'
                    }
                })
                return
            #TODO(jjohnson2): Cannot do the invitation if not the head node, the certificate hand-carrying
            #can't work in such a case.
            name = request['name']
            invitation = invites.create_server_invitation(name)
            tlvdata.send(connection,
                         {'collective': {
                             'invitation': invitation
                         }})
            connection.close()
        if 'join' == operation:
            invitation = request['invitation']
            try:
                invitation = base64.b64decode(invitation)
                name, invitation = invitation.split(b'@', 1)
                name = util.stringify(name)
            except Exception:
                tlvdata.send(
                    connection,
                    {'collective': {
                        'status': 'Invalid token format'
                    }})
                connection.close()
                return
            host = request['server']
            try:
                remote = socket.create_connection((host, 13001))
                # This isn't what it looks like.  We do CERT_NONE to disable
                # openssl verification, but then use the invitation as a
                # shared secret to validate the certs as part of the join
                # operation
                remote = ssl.wrap_socket(remote,
                                         cert_reqs=ssl.CERT_NONE,
                                         keyfile='/etc/confluent/privkey.pem',
                                         certfile='/etc/confluent/srvcert.pem')
            except Exception:
                tlvdata.send(
                    connection, {
                        'collective': {
                            'status': 'Failed to connect to {0}'.format(host)
                        }
                    })
                connection.close()
                return
            mycert = util.get_certificate_from_file(
                '/etc/confluent/srvcert.pem')
            cert = remote.getpeercert(binary_form=True)
            proof = base64.b64encode(
                invites.create_client_proof(invitation, mycert, cert))
            tlvdata.recv(remote)  # ignore banner
            tlvdata.recv(remote)  # ignore authpassed: 0
            tlvdata.send(remote, {
                'collective': {
                    'operation': 'enroll',
                    'name': name,
                    'hmac': proof
                }
            })
            rsp = tlvdata.recv(remote)
            if 'error' in rsp:
                tlvdata.send(connection,
                             {'collective': {
                                 'status': rsp['error']
                             }})
                connection.close()
                return
            proof = rsp['collective']['approval']
            proof = base64.b64decode(proof)
            j = invites.check_server_proof(invitation, mycert, cert, proof)
            if not j:
                remote.close()
                tlvdata.send(connection,
                             {'collective': {
                                 'status': 'Bad server token'
                             }})
                connection.close()
                return
            tlvdata.send(connection, {'collective': {'status': 'Success'}})
            connection.close()
            currentleader = rsp['collective']['leader']
            f = open('/etc/confluent/cfg/myname', 'w')
            f.write(name)
            f.close()
            log.log({
                'info': 'Connecting to collective due to join',
                'subsystem': 'collective'
            })
            eventlet.spawn_n(connect_to_leader,
                             rsp['collective']['fingerprint'], name)
    if 'enroll' == operation:
        #TODO(jjohnson2): error appropriately when asked to enroll, but the master is elsewhere
        mycert = util.get_certificate_from_file('/etc/confluent/srvcert.pem')
        proof = base64.b64decode(request['hmac'])
        myrsp = invites.check_client_proof(request['name'], mycert, cert,
                                           proof)
        if not myrsp:
            tlvdata.send(connection, {'error': 'Invalid token'})
            connection.close()
            return
        myrsp = base64.b64encode(myrsp)
        fprint = util.get_fingerprint(cert)
        myfprint = util.get_fingerprint(mycert)
        cfm.add_collective_member(get_myname(),
                                  connection.getsockname()[0], myfprint)
        cfm.add_collective_member(request['name'],
                                  connection.getpeername()[0], fprint)
        myleader = get_leader(connection)
        ldrfprint = cfm.get_collective_member_by_address(
            myleader)['fingerprint']
        tlvdata.send(
            connection, {
                'collective': {
                    'approval': myrsp,
                    'fingerprint': ldrfprint,
                    'leader': get_leader(connection)
                }
            })
    if 'assimilate' == operation:
        drone = request['name']
        droneinfo = cfm.get_collective_member(drone)
        if not droneinfo:
            tlvdata.send(
                connection,
                {'error': 'Unrecognized leader, '
                 'redo invitation process'})
            return
        if not util.cert_matches(droneinfo['fingerprint'], cert):
            tlvdata.send(
                connection,
                {'error': 'Invalid certificate, '
                 'redo invitation process'})
            return
        if request['txcount'] < cfm._txcount:
            tlvdata.send(
                connection, {
                    'error': 'Refusing to be assimilated by inferior'
                    'transaction count',
                    'txcount': cfm._txcount,
                })
            return
        if connecting.active:
            # don't try to connect while actively already trying to connect
            tlvdata.send(connection, {'status': 0})
            connection.close()
            return
        if (currentleader == connection.getpeername()[0] and follower
                and not follower.dead):
            # if we are happily following this leader already, don't stir
            # the pot
            tlvdata.send(connection, {'status': 0})
            connection.close()
            return
        log.log({
            'info': 'Connecting in response to assimilation',
            'subsystem': 'collective'
        })
        eventlet.spawn_n(connect_to_leader,
                         None,
                         None,
                         leader=connection.getpeername()[0])
        tlvdata.send(connection, {'status': 0})
        connection.close()
    if 'getinfo' == operation:
        drone = request['name']
        droneinfo = cfm.get_collective_member(drone)
        if not (droneinfo
                and util.cert_matches(droneinfo['fingerprint'], cert)):
            tlvdata.send(
                connection,
                {'error': 'Invalid certificate, '
                 'redo invitation process'})
            connection.close()
            return
        collinfo = {}
        populate_collinfo(collinfo)
        tlvdata.send(connection, collinfo)
    if 'connect' == operation:
        drone = request['name']
        droneinfo = cfm.get_collective_member(drone)
        if not (droneinfo
                and util.cert_matches(droneinfo['fingerprint'], cert)):
            tlvdata.send(
                connection,
                {'error': 'Invalid certificate, '
                 'redo invitation process'})
            connection.close()
            return
        myself = connection.getsockname()[0]
        if connecting.active:
            tlvdata.send(connection, {
                'error': 'Connecting right now',
                'backoff': True
            })
            connection.close()
            return
        if myself != get_leader(connection):
            tlvdata.send(
                connection, {
                    'error': 'Cannot assimilate, our leader is '
                    'in another castle',
                    'leader': currentleader
                })
            connection.close()
            return
        if request['txcount'] > cfm._txcount:
            retire_as_leader()
            tlvdata.send(
                connection, {
                    'error': 'Client has higher tranasaction count, '
                    'should assimilate me, connecting..',
                    'txcount': cfm._txcount
                })
            log.log({
                'info': 'Connecting to leader due to superior '
                'transaction count',
                'subsystem': collective
            })
            eventlet.spawn_n(connect_to_leader, None, None,
                             connection.getpeername()[0])
            connection.close()
            return
        if retrythread:
            retrythread.cancel()
            retrythread = None
        with leader_init:
            cfm.update_collective_address(request['name'],
                                          connection.getpeername()[0])
            tlvdata.send(connection, cfm._dump_keys(None, False))
            tlvdata.send(connection, cfm._cfgstore['collective'])
            tlvdata.send(connection, {})  # cfm.get_globals())
            cfgdata = cfm.ConfigManager(None)._dump_to_json()
            tlvdata.send(connection, {
                'txcount': cfm._txcount,
                'dbsize': len(cfgdata)
            })
            connection.sendall(cfgdata)
        #tlvdata.send(connection, {'tenants': 0}) # skip the tenants for now,
        # so far unused anyway
        if not cfm.relay_slaved_requests(drone, connection):
            if not retrythread:  # start a recovery if everyone else seems
                # to have disappeared
                retrythread = eventlet.spawn_after(30 + random.random(),
                                                   start_collective)
Esempio n. 5
0
def connect_to_leader(cert=None, name=None, leader=None):
    global currentleader
    global cfginitlock
    global follower
    if cfginitlock is None:
        cfginitlock = threading.RLock()
    if leader is None:
        leader = currentleader
    try:
        remote = connect_to_collective(cert, leader)
    except socket.error:
        return False
    with connecting:
        with cfginitlock:
            tlvdata.recv(remote)  # the banner
            tlvdata.recv(remote)  # authpassed... 0..
            if name is None:
                name = get_myname()
            tlvdata.send(
                remote, {
                    'collective': {
                        'operation': 'connect',
                        'name': name,
                        'txcount': cfm._txcount
                    }
                })
            keydata = tlvdata.recv(remote)
            if not keydata:
                return False
            if 'error' in keydata:
                if 'backoff' in keydata:
                    eventlet.spawn_after(random.random(), connect_to_leader,
                                         cert, name, leader)
                    return True
                if 'leader' in keydata:
                    ldrc = cfm.get_collective_member_by_address(
                        keydata['leader'])
                    if ldrc and ldrc['name'] == name:
                        raise Exception("Redirected to self")
                    return connect_to_leader(name=name,
                                             leader=keydata['leader'])
                if 'txcount' in keydata:
                    return become_leader(remote)
                print(keydata['error'])
                return False
            if follower is not None:
                follower.kill()
                cfm.stop_following()
                follower = None
            colldata = tlvdata.recv(remote)
            globaldata = tlvdata.recv(remote)
            dbi = tlvdata.recv(remote)
            dbsize = dbi['dbsize']
            dbjson = ''
            while (len(dbjson) < dbsize):
                ndata = remote.recv(dbsize - len(dbjson))
                if not ndata:
                    try:
                        remote.close()
                    except Exception:
                        pass
                    raise Exception("Error doing initial DB transfer")
                dbjson += ndata
            cfm.clear_configuration()
            try:
                cfm._restore_keys(keydata, None, sync=False)
                for c in colldata:
                    cfm._true_add_collective_member(c,
                                                    colldata[c]['address'],
                                                    colldata[c]['fingerprint'],
                                                    sync=False)
                for globvar in globaldata:
                    cfm.set_global(globvar, globaldata[globvar], False)
                cfm._txcount = dbi.get('txcount', 0)
                cfm.ConfigManager(tenant=None)._load_from_json(dbjson,
                                                               sync=False)
                cfm.commit_clear()
            except Exception:
                cfm.stop_following()
                cfm.rollback_clear()
                raise
            currentleader = leader
        #spawn this as a thread...
        follower = eventlet.spawn(follow_leader, remote)
    return True