def test_is_safe_to_kill(mock_get_hosts_past_maintenance_start,
                         mock_is_host_drained):
    mock_is_host_drained.return_value = False
    mock_get_hosts_past_maintenance_start.return_value = []
    assert not paasta_maintenance.is_safe_to_kill("blah")

    mock_is_host_drained.return_value = False
    mock_get_hosts_past_maintenance_start.return_value = ["blah"]
    assert paasta_maintenance.is_safe_to_kill("blah")

    mock_is_host_drained.return_value = True
    mock_get_hosts_past_maintenance_start.return_value = ["blah"]
    assert paasta_maintenance.is_safe_to_kill("blah")

    mock_is_host_drained.return_value = True
    mock_get_hosts_past_maintenance_start.return_value = []
    assert paasta_maintenance.is_safe_to_kill("blah")
    def wait_and_terminate(self, slave, drain_timeout, dry_run, region=None):
        """Waits for slave to be drained and then terminate

        :param slave: dict of slave to kill
        :param drain_timeout: how long to wait before terminating
            even if not drained
        :param region to connect to ec2
        :param dry_run: Don't drain or make changes to spot fleet if True"""
        ec2_client = boto3.client('ec2', region_name=region)
        try:
            # This loop should always finish because the maintenance window should trigger is_ready_to_kill
            # being true. Just in case though we set a timeout and terminate anyway
            with Timeout(seconds=drain_timeout + 300):
                while True:
                    instance_id = slave.instance_id
                    if not instance_id:
                        self.log.warning(
                            "Didn't find instance ID for slave: {}. Skipping terminating"
                            .format(slave.pid), )
                        continue
                    # Check if no tasks are running or we have reached the maintenance window
                    if is_safe_to_kill(slave.hostname) or dry_run:
                        self.log.info(
                            "TERMINATING: {} (Hostname = {}, IP = {})".format(
                                instance_id,
                                slave.hostname,
                                slave.ip,
                            ))
                        try:
                            ec2_client.terminate_instances(
                                InstanceIds=[instance_id], DryRun=dry_run)
                        except ClientError as e:
                            if e.response['Error'].get(
                                    'Code') == 'DryRunOperation':
                                pass
                            else:
                                raise
                        break
                    else:
                        self.log.info("Instance {}: NOT ready to kill".format(
                            instance_id))
                    self.log.debug("Waiting 5 seconds and then checking again")
                    time.sleep(5)
        except TimeoutError:
            self.log.error(
                "Timed out after {} waiting to drain {}, now terminating anyway"
                .format(
                    drain_timeout,
                    slave.pid,
                ))
            try:
                ec2_client.terminate_instances(InstanceIds=instance_id,
                                               DryRun=dry_run)
            except ClientError as e:
                if e.response['Error'].get('Code') == 'DryRunOperation':
                    pass
                else:
                    raise
Esempio n. 3
0
def test_is_safe_to_kill(
    mock_get_hosts_past_maintenance_start,
    mock_is_host_drained,
):
    mock_is_host_drained.return_value = False
    mock_get_hosts_past_maintenance_start.return_value = []
    assert not paasta_maintenance.is_safe_to_kill('blah')

    mock_is_host_drained.return_value = False
    mock_get_hosts_past_maintenance_start.return_value = ['blah']
    assert paasta_maintenance.is_safe_to_kill('blah')

    mock_is_host_drained.return_value = True
    mock_get_hosts_past_maintenance_start.return_value = ['blah']
    assert paasta_maintenance.is_safe_to_kill('blah')

    mock_is_host_drained.return_value = True
    mock_get_hosts_past_maintenance_start.return_value = []
    assert paasta_maintenance.is_safe_to_kill('blah')
Esempio n. 4
0
def wait_and_terminate(slave, drain_timeout, dry_run, region=None):
    """Currently kills a slave, will wait for draining to complete soon

    :param slave: dict of slave to kill
    :param dry_run: Don't drain or make changes to spot fleet if True"""
    ec2_client = boto3.client('ec2', region_name=region)
    try:
        # This loop should always finish because the maintenance window should trigger is_ready_to_kill
        # being true. Just in case though we set a timeout and terminate anyway
        with Timeout(seconds=drain_timeout + 300):
            while True:
                instance_id = slave['instance_id']
                if not instance_id:
                    log.warning("Didn't find instance ID for slave: {0}. Skipping terminating".format(slave['pid']))
                    continue
                # Check if no tasks are running or we have reached the maintenance window
                if is_safe_to_kill(slave['hostname']) or dry_run:
                    log.info("TERMINATING: {0} (Hostname = {1}, IP = {2})".format(
                        instance_id,
                        slave['hostname'],
                        slave['ip'],
                    ))
                    try:
                        ec2_client.terminate_instances(InstanceIds=[instance_id], DryRun=dry_run)
                    except ClientError as e:
                        if e.response['Error'].get('Code') == 'DryRunOperation':
                            pass
                        else:
                            raise
                    break
                else:
                    log.info("Instance {0}: NOT ready to kill".format(instance_id))
                log.debug("Waiting 5 seconds and then checking again")
                time.sleep(5)
    except TimeoutError:
        log.error("Timed out after {0} waiting to drain {1}, now terminating anyway".format(drain_timeout,
                                                                                            slave['pid']))
        try:
            ec2_client.terminate_instances(InstanceIds=instance_id, DryRun=dry_run)
        except ClientError as e:
            if e.response['Error'].get('Code') == 'DryRunOperation':
                pass
            else:
                raise
Esempio n. 5
0
def wait_and_terminate(slave, drain_timeout, dry_run, region=None):
    """Waits for slave to be drained and then terminate

    :param slave: dict of slave to kill
    :param dry_run: Don't drain or make changes to spot fleet if True"""
    ec2_client = boto3.client('ec2', region_name=region)
    try:
        # This loop should always finish because the maintenance window should trigger is_ready_to_kill
        # being true. Just in case though we set a timeout and terminate anyway
        with Timeout(seconds=drain_timeout + 300):
            while True:
                instance_id = slave['instance_id']
                if not instance_id:
                    log.warning("Didn't find instance ID for slave: {0}. Skipping terminating".format(slave['pid']))
                    continue
                # Check if no tasks are running or we have reached the maintenance window
                if is_safe_to_kill(slave['hostname']) or dry_run:
                    log.info("TERMINATING: {0} (Hostname = {1}, IP = {2})".format(
                        instance_id,
                        slave['hostname'],
                        slave['ip'],
                    ))
                    try:
                        ec2_client.terminate_instances(InstanceIds=[instance_id], DryRun=dry_run)
                    except ClientError as e:
                        if e.response['Error'].get('Code') == 'DryRunOperation':
                            pass
                        else:
                            raise
                    break
                else:
                    log.info("Instance {0}: NOT ready to kill".format(instance_id))
                log.debug("Waiting 5 seconds and then checking again")
                time.sleep(5)
    except TimeoutError:
        log.error("Timed out after {0} waiting to drain {1}, now terminating anyway".format(drain_timeout,
                                                                                            slave['pid']))
        try:
            ec2_client.terminate_instances(InstanceIds=instance_id, DryRun=dry_run)
        except ClientError as e:
            if e.response['Error'].get('Code') == 'DryRunOperation':
                pass
            else:
                raise
 def can_kill(self, hostname, should_drain, dry_run, timer):
     if dry_run:
         return True
     if timer.ready():
         self.log.warning(
             "Timer expired before slave ready to kill, proceding to terminate anyways"
         )
         timer.start()
         raise TimeoutError
     if not should_drain:
         self.log.info("Not draining, waiting %s longer before killing" %
                       timer.left())
         return False
     if is_safe_to_kill(hostname):
         self.log.info("Slave %s is ready to kill, with %s left on timer" %
                       (hostname, timer.left()))
         timer.start()
         return True
     return False