Beispiel #1
0
def stop_decommissioned_node(address, terminate=False):

    log.debug('Stopping or terminating the node at {address}'.format(
                                                        address=address))

    log.debug('Retrieving the instance ID')
    command = 'curl http://169.254.169.254/latest/meta-data/instance-id'
    instance_id = run_command(address, command)

    log.debug('The instance ID is {id_}'.format(id_=instance_id))

    set_maintenance_mode(instance_id)

    log.debug('Establishing a connection to AWS EC2 us-east-1')
    conn = boto.ec2.connect_to_region('us-east-1')

    if terminate:
        log.debug('Terminating {instance}'.format(instance=instance_id))
        response = conn.terminate_instances(instance_ids=[instance_id])
    else:
        log.debug('Stopping `{instance}'.format(instance=instance_id))
        response = conn.stop_instances(instance_ids=[instance_id])

    log.debug('Received the response {response}'.format(response=response))

    terminated = [instance.id for instance in response]

    if instance_id in terminated:
        if terminate:
            log.debug('Successfully terminated {instance}'.format(
                                                        instance=instance_id))
        else:
            log.debug('Successfully stopped {instance}'.format(
                                                        instance=instance_id))
    else:
        if terminate:
            log.debug('Failed to terminate {instance}'.format(
                                                        instance=instance_id))
        else:
            log.debug('Failed to stop {instance}'.format(
                                                        instance=instance_id))
Beispiel #2
0
def replace_server(environment=None, group=None, subnet_id=None,
                   instance_type=None, availability_zone=None,
                   replica_set_index=None, data_volume_size=None,
                   data_volume_iops=None, mongodb_package_version=None,
                   member=None, replace=False, node_type='data',
                   reroute=False, replica_set_template=None, terminate=False,
                   prompt_before_replace=True):

    if member is None:

        log.critical('No existing member defined.')
        sys.exit(1)

    replica_set = ReplicaSet(member)

    if replica_set.primary[:2] == 'ip':

        log.warn('The replica set\'s primary address is private')
        log.debug('The replica set\'s primary is {primary}'.format(
                                                primary=replica_set.primary))
        log.info('To continue, the replica set must be failed over')

        log.debug('Connecting to AWS EC2 us-east-1')
        conn = boto.ec2.connect_to_region('us-east-1')
        log.debug('Connected to AWS EC2 us-east-1')

        components = replica_set.primary.split('-')
        old_primary = replica_set.primary

        private_ip = '.'.join([components[1], components[2], components[3],
                               components[4]])

        log.debug('Using the private IP address {ip} for the primary'.format(
                                                            ip=private_ip))

        log.debug('Filtering AWS instances by private IP address')
        reservations = conn.get_all_instances(
                                    filters={'private-ip-address': private_ip})

        instance = reservations[0].instances[0]
        log.debug('Found instance {id_}'.format(id_=instance.id))

        public_address = None

        if 'Name' in instance.tags:

            public_address = instance.tags['Name']

            log.debug('The tag Name exists on the instance')
            log.debug('Building public address from the instance\s name')

            if environment == 'test':
                public_address += '.thorhudl.com'
            elif environment == 'stage':
                public_address += '.app.staghudl.com'
            elif environment == 'prod':
                public_address += '.app.hudl.com'

        else:

            log.debug('The tag Name could not be found on the instance')

            public_address = instance.private_dns_name

        log.debug('Proceeding using {address} to contact the primary'.format(
                                                    address=public_address))

        log.debug('Instructing the primary to step down')
        run_mongo_command(public_address, 'rs.stepDown()')

        log.debug('Sleeping for 120 seconds while an election takes place')
        time.sleep(120)

        log.debug('Determining the new primary')
        replica_set.determine_primary(member)

        log.debug('(Temporarily) removing the old primary from '
                  'the replica set')
        replica_set.remove_member(old_primary)

        log.debug('Sleeping for 120 seconds')
        time.sleep(120)

        log.debug('Adding the old primary back into the replica '
                  'set with the new address')
        replica_set.add_member(public_address)

    replica_set_name = replica_set.status['set']

    log.info('Using the replica set name {name}'.format(name=replica_set_name))

    if node_type == 'arbiter':

        log.info('The node being added is an arbiter')

        log.info('Launching the new node')
        node = launch_server(environment, group, subnet_id, instance_type,
                             availability_zone, replica_set_index,
                             data_volume_size, data_volume_iops,
                             mongodb_package_version, node_type,
                             replica_set_template=replica_set_name)

        log.info('Retreiving the replica set\'s current arbiter')
        arbiter = replica_set.arbiter

        if arbiter is not None:
            log.info('The current arbiter is {arbiter}'.format(
                                                        arbiter=arbiter))
            log.info('Removing the old arbiter from the replica set')
            replica_set.remove_member(arbiter, clean=True)
        else:
            log.info('The replica set does not have an arbiter')

        log.info('Adding the new arbiter to the replica set')
        replica_set.add_member(node.hostname, arbiter=True,
                               accessible=node.instance.private_dns_name)

        if replace:
            log.info('Terminating the previous arbiter')
            stop_decommissioned_node(member, terminate=terminate)

        return

    log.info('The node being added is a {type_} node'.format(type_=node_type))

    log.info('Launching the new node')
    node = launch_server(environment, group, subnet_id, instance_type,
                         availability_zone, replica_set_index,
                         data_volume_size, data_volume_iops,
                         mongodb_package_version, node_type,
                         replica_set_template=replica_set_name)

    log.info('Placing the new node in maintenance mode')
    set_maintenance_mode(node.instance.id)

    log.info('Adding the new node to the replica set')

    if node_type == 'datawarehousing':
        replica_set.add_member(node.hostname, hidden=True,
                               accessible=node.instance.private_dns_name)
    else:
        replica_set.add_member(node.hostname,
                               accessible=node.instance.private_dns_name)

    log.info('Retreiving the replica set\'s arbiter')
    arbiter = replica_set.arbiter

    if arbiter is not None:
        log.debug('The arbiter is {arbiter}'.format(arbiter=arbiter))
        log.info('(Temporarily) removing the arbiter from the replica set')
        replica_set.remove_member(arbiter, clean=True)
    else:
        log.debug('There is no arbiter')

    log.info('Waiting for the node to finish syncing')
    wait_for_sync(node)

    log.info('Removing the node from maintenance mode')
    unset_maintenance_mode(node.instance.id)

    if arbiter is not None:
        log.info('Adding the arbiter back into the replica set')
        replica_set.add_member(arbiter, arbiter=True)

    if replace:

        log.info('Preparing to remove the previous node')

        if prompt_before_replace:

            print '\a'
            _ = raw_input('Press enter to continue')

        if replica_set.primary == member:

            log.warn('The previous node is the primary')
            log.warn('The replica set will need to fail over to continue')
            log.info('Failing over the replica set')
            replica_set.failover()

        log.info('Removing the previous node from the replica set')
        replica_set.remove_member(member)

        log.info('Terminating the previous node')
        stop_decommissioned_node(member, terminate=terminate)

        if node_type == 'data' and reroute:
            log.info('Redirecting previous DNS entry')

            log.debug('Establishing a connect to AWS Route53 us-east-1')
            conn = boto.route53.connect_to_region('us-east-1')

            log.debug('Retrieving the zone app.staghudl.com.')
            zone = conn.get_zone('app.staghudl.com.')

            if environment == 'prod':
                log.debug('Retrieving the zone app.hudl.com.')
                zone = conn.get_zone('app.hudl.com.')

            if environment == 'test':
                log.debug('Retrieving the zone thorhudl.com.')
                zone = conn.get_zone('thorhudl.com.')

            if zone.get_cname(member+'.') is None:
                log.debug('An existing DNS record does not exist')
            else:
                log.debug('Updating the DNS CNAME record')
                zone.update_cname(member+'.', node.instance.private_dns_name)
def compact_mongodb_server(host, version, prompt_before_failover=True):

    log.debug('Retrieving replica set for host {host}'.format(host=host))
    replica_set = ReplicaSet(host)

    log.debug('Validating the syncingTo property on nodes')
    while not validate_sync_to(replica_set):
        log.debug('Enforcing the syncingTo property on nodes')
        enforce_sync_to(replica_set)

    log.debug('Validation of syncingTo property on nodes complete')

    secondaries = [node for node in replica_set.status['members']
                   if node['stateStr'] == 'SECONDARY']

    log.info('Compacting {nodes}'.format(
        nodes=[s['name'] for s in secondaries]))

    for secondary in secondaries:
        address = secondary['name'].split(':')[0]

        log.debug('Retrieving compact.js on {host}'.format(host=address))
        fetch_script(address, version)

        log.debug('Setting maintenance mode for {host}'.format(host=address))
        set_maintenance_mode(id_for_host(address))

        log.info('Compacting {host}'.format(host=address))
        compact(address)

        log.info('Waiting for {host} to recover'.format(host=address))
        while recovering(replica_set, secondary['name']):
            log.warning('{host} is still recovering.'.format(host=address))
            log.debug('Sleeping for 30 seconds.')
            time.sleep(30)

        log.debug('Unsetting maintenance mode for {host}'.format(
            host=address))
        unset_maintenance_mode(id_for_host(address))

    log.debug('Retrieving current primary')
    secondaries = [node for node in replica_set.status['members']
                   if node['stateStr'] == 'PRIMARY']

    log.debug('Preparing to compact primary {host}'.format(
        host=secondaries[0]['name']))

    log.info('Preparing to fail over replica set')

    if prompt_before_failover:
        print '\a'
        _ = raw_input('Press return to continue')

    log.debug('Instructing the replica set to fail over')
    replica_set.failover()

    log.debug('Validating the syncingTo property on nodes')
    while not validate_sync_to(replica_set):
        log.debug('Enforcing the syncingTo property on nodes')
        enforce_sync_to(replica_set)

    log.debug('Validation of syncingTo property on nodes complete')

    for secondary in secondaries:
        address = secondary['name'].split(':')[0]

        log.debug('Retrieving compact.js on {host}'.format(host=address))
        fetch_script(address, version)

        log.debug('Setting maintenance mode for {host}'.format(host=address))
        set_maintenance_mode(id_for_host(address))

        log.info('Compacting {host}'.format(host=address))
        compact(address)

        log.info('Waiting for {host} to recover'.format(host=address))
        while recovering(replica_set, secondary['name']):
            log.warning('{host} is still recovering.'.format(host=address))
            log.debug('Sleeping for 30 seconds.')
            time.sleep(30)

        log.debug('Unsetting maintenance mode for {host}'.format(
            host=address))
        unset_maintenance_mode(id_for_host(address))