Example #1
0
def test_over_filter_limits(value_numbers):
    instance_ids = list(range(value_numbers))
    with mock.patch('clusterman.aws.client.ec2.describe_instances') as mock_describe_instances:
        ec2_describe_instances(instance_ids)
        target_call_count = math.ceil(value_numbers / MAX_PAGE_SIZE)
        assert mock_describe_instances.call_count == target_call_count
        assert mock_describe_instances.call_args_list == [
            call(InstanceIds=instance_ids[i * MAX_PAGE_SIZE:(i + 1) * MAX_PAGE_SIZE])
            for i in range(target_call_count)
        ]
Example #2
0
    def get_instance_metadatas(
        self,
        state_filter: Optional[Collection[str]] = None
    ) -> Sequence[InstanceMetadata]:
        instance_metadatas = []
        for instance_dict in ec2_describe_instances(
                instance_ids=self.instance_ids):
            aws_state = instance_dict['State']['Name']
            if state_filter and aws_state not in state_filter:
                continue

            instance_market = get_instance_market(instance_dict)
            instance_ip = instance_dict.get('PrivateIpAddress')
            hostname = gethostbyaddr(instance_ip)[0] if instance_ip else None

            metadata = InstanceMetadata(
                group_id=self.id,
                hostname=hostname,
                instance_id=instance_dict['InstanceId'],
                ip_address=instance_ip,
                is_stale=(instance_dict['InstanceId']
                          in self.stale_instance_ids),
                market=instance_market,
                state=aws_state,
                uptime=(arrow.now() - arrow.get(instance_dict['LaunchTime'])),
                weight=self.market_weight(instance_market),
            )
            instance_metadatas.append(metadata)
        return instance_metadatas
Example #3
0
 def _instances_by_market(self):
     """ Responses from this API call are cached to prevent hitting any AWS request limits """
     instance_dict: Mapping[InstanceMarket,
                            List[Mapping]] = defaultdict(list)
     for instance in ec2_describe_instances(self.instance_ids):
         instance_dict[get_instance_market(instance)].append(instance)
     return instance_dict
Example #4
0
    def terminate_instances_by_id(self,
                                  instance_ids: List[str],
                                  batch_size: int = 500) -> Sequence[str]:
        """ Terminate instances in this resource group

        :param instance_ids: a list of instance IDs to terminate
        :param batch_size: number of instances to terminate at one time
        :returns: a list of terminated instance IDs
        """
        if not instance_ids:
            logger.warning(f'No instances to terminate in {self.group_id}')
            return []

        instance_weights = {}
        for instance in ec2_describe_instances(instance_ids):
            instance_market = get_instance_market(instance)
            if not instance_market.az:
                logger.warning(
                    f"Instance {instance['InstanceId']} missing AZ info, likely already terminated so skipping",
                )
                instance_ids.remove(instance['InstanceId'])
                continue
            instance_weights[instance['InstanceId']] = self.market_weight(
                get_instance_market(instance))

        # AWS API recommends not terminating more than 1000 instances at a time, and to
        # terminate larger numbers in batches
        terminated_instance_ids = []
        for batch in range(0, len(instance_ids), batch_size):
            response = ec2.terminate_instances(
                InstanceIds=instance_ids[batch:batch + batch_size])
            terminated_instance_ids.extend([
                instance['InstanceId']
                for instance in response['TerminatingInstances']
            ])

        # It's possible that not every instance is terminated.  The most likely cause for this
        # is that AWS terminated the instance in between getting its status and the terminate_instances
        # request.  This is probably fine but let's log a warning just in case.
        missing_instances = set(instance_ids) - set(terminated_instance_ids)
        if missing_instances:
            logger.warning(
                'Some instances could not be terminated; they were probably killed previously'
            )
            logger.warning(f'Missing instances: {list(missing_instances)}')
        terminated_capacity = sum(instance_weights[i] for i in instance_ids)

        logger.info(
            f'{self.id} terminated weight: {terminated_capacity}; instances: {terminated_instance_ids}'
        )
        return terminated_instance_ids
Example #5
0
 def get_tasks_and_frameworks():
     rg = context.pool_manager.resource_groups[context.rg_ids[0]]
     instances = ec2_describe_instances(instance_ids=rg.instance_ids[:1])
     return (
         [{
             'slave_id': instances[0]['InstanceId'],
             'state': 'TASK_RUNNING',
             'framework_id': 'framework_a'
         }] * int(tasks),
         {
             'framework_a': {
                 'name': 'framework_a_name'
             }
         },
     )
Example #6
0
def host_from_instance_id(
    sender: str,
    receipt_handle: str,
    instance_id: str,
) -> Optional[Host]:
    instance_data = ec2_describe_instances(instance_ids=[instance_id])
    if not instance_data:
        logger.warning(f'No instance data found for {instance_id}')
        return None
    try:
        sfr_ids = [
            tag['Value'] for tag in instance_data[0]['Tags']
            if tag['Key'] == 'aws:ec2spot:fleet-request-id'
        ]
        scheduler = 'mesos'
        for tag in instance_data[0]['Tags']:
            if tag['Key'] == 'KubernetesCluster':
                scheduler = 'kubernetes'
                break
    except KeyError as e:
        logger.warning(f'SFR tag key not found: {e}')
        sfr_ids = []
    if not sfr_ids:
        logger.warning(f'No SFR ID found for {instance_id}')
        return None
    try:
        ip = instance_data[0]['PrivateIpAddress']
    except KeyError:
        logger.warning(f'No primary IP found for {instance_id}')
        return None
    try:
        hostnames = socket.gethostbyaddr(ip)
    except socket.error:
        logger.warning(f"Couldn't derive hostname from IP via DNS for {ip}")
        return None
    return Host(
        sender=sender,
        receipt_handle=receipt_handle,
        instance_id=instance_id,
        hostname=hostnames[0],
        group_id=sfr_ids[0],
        ip=ip,
        scheduler=scheduler,
    )
Example #7
0
def test_empty_instance_ids():
    assert ec2_describe_instances(instance_ids=None) == []
    assert ec2_describe_instances(instance_ids=[]) == []