Ejemplo n.º 1
0
def mock_reload_resource_groups(context):
    with mock.patch(
        'clusterman.aws.auto_scaling_resource_group.AutoScalingResourceGroup.load',
        return_value={},
    ) as mock_asg_load, mock.patch(
        'clusterman.aws.spot_fleet_resource_group.SpotFleetResourceGroup.load',
        return_value={},
    ) as mock_sfr_load:
        if context.rg_type == 'asg':
            if getattr(context, 'reload', False):
                mock_asg_load.return_value = {
                    f'fake-asg-{i}': AutoScalingResourceGroup(f'fake-asg-{i}')
                    for i in range(context.rg_num)
                }
            else:
                mock_asg_load.return_value = mock_asgs(context.rg_num, context.subnet_id)
        elif context.rg_type == 'sfr':
            if getattr(context, 'reload', False):
                sfrids = [
                    config['SpotFleetRequestId']
                    for config in ec2.describe_spot_fleet_requests()['SpotFleetRequestConfigs']
                ]
                mock_sfr_load.return_value = {id: SpotFleetResourceGroup(id) for id in sfrids}
            else:
                mock_sfr_load.return_value = mock_sfrs(context.rg_num, context.subnet_id)
        yield
Ejemplo n.º 2
0
def test_modify_target_capacity_up(mock_spot_fleet_resource_group):
    mock_spot_fleet_resource_group.modify_target_capacity(20)
    assert ec2.describe_spot_fleet_requests(
        SpotFleetRequestIds=[mock_spot_fleet_resource_group.group_id],
    )['SpotFleetRequestConfigs'][0]['SpotFleetRequestConfig']['TargetCapacity'] == 20
    assert len(ec2.describe_spot_fleet_instances(
        SpotFleetRequestId=mock_spot_fleet_resource_group.group_id,
    )['ActiveInstances']) == 13
Ejemplo n.º 3
0
def test_modify_target_capacity_down(mock_spot_fleet_resource_group):
    mock_spot_fleet_resource_group.modify_target_capacity(5)
    new_config = ec2.describe_spot_fleet_requests(
        SpotFleetRequestIds=[mock_spot_fleet_resource_group.group_id],
    )['SpotFleetRequestConfigs'][0]['SpotFleetRequestConfig']
    assert new_config['TargetCapacity'] == 5
    assert new_config['FulfilledCapacity'] == 11
    assert len(ec2.describe_spot_fleet_instances(
        SpotFleetRequestId=mock_spot_fleet_resource_group.group_id,
    )['ActiveInstances']) == 7
Ejemplo n.º 4
0
 def _get_resource_group_tags(cls) -> Mapping[str, Mapping[str, str]]:
     """ Gets a dictionary of SFR id -> a dictionary of tags. The tags are taken
     from the TagSpecifications for the first LaunchSpecification
     """
     spot_fleet_requests = ec2.describe_spot_fleet_requests()
     sfr_id_to_tags = {}
     for sfr_config in spot_fleet_requests['SpotFleetRequestConfigs']:
         launch_specs = sfr_config['SpotFleetRequestConfig']['LaunchSpecifications']
         try:
             # we take the tags from the 0th launch spec for now
             # they should always be identical in every launch spec
             tags = launch_specs[0]['TagSpecifications'][0]['Tags']
         except (IndexError, KeyError):
             # if this SFR is misssing the TagSpecifications
             tags = []
         tags_dict = {tag['Key']: tag['Value'] for tag in tags}
         sfr_id_to_tags[sfr_config['SpotFleetRequestId']] = tags_dict
     return sfr_id_to_tags
def mock_rg_is_stale(context):
    response = ec2.describe_spot_fleet_requests(
        SpotFleetRequestIds=context.rg_ids)
    for config in response['SpotFleetRequestConfigs']:
        if config['SpotFleetRequestId'] == context.stale_rg_id:
            config['SpotFleetRequestState'] = 'cancelled_running'

    def mock_describe_sfrs(SpotFleetRequestIds):
        return {
            'SpotFleetRequestConfigs': [
                c for c in response['SpotFleetRequestConfigs']
                if c['SpotFleetRequestId'] in SpotFleetRequestIds
            ]
        }

    with mock.patch(
            'clusterman.aws.spot_fleet_resource_group.ec2.describe_spot_fleet_requests',
            side_effect=mock_describe_sfrs,
    ):
        yield
 def _get_sfr_configuration(self):
     """ Responses from this API call are cached to prevent hitting any AWS request limits """
     fleet_configuration = ec2.describe_spot_fleet_requests(
         SpotFleetRequestIds=[self.group_id])
     return fleet_configuration['SpotFleetRequestConfigs'][0]
Ejemplo n.º 7
0
    def _make_autoscaler(self, autoscaler_config_file: str) -> None:
        fetch_count, signal_count = setup_signals_environment(
            self.metadata.pool, self.metadata.scheduler)
        signal_dir = os.path.join(os.path.expanduser('~'), '.cache',
                                  'clusterman')

        endpoint_url = staticconf.read_string('aws.endpoint_url',
                                              '').format(svc='s3')
        env = os.environ.copy()
        if endpoint_url:
            env['AWS_ENDPOINT_URL_ARGS'] = f'--endpoint-url {endpoint_url}'

        for i in range(fetch_count):
            subprocess.run(['fetch_clusterman_signal',
                            str(i), signal_dir],
                           check=True,
                           env=env)
        for i in range(signal_count):
            subprocess.Popen(['run_clusterman_signal',
                              str(i), signal_dir],
                             env=env)

        with open(autoscaler_config_file) as f:
            autoscaler_config = yaml.safe_load(f)
        configs = autoscaler_config.get('configs', [])
        if 'sfrs' in autoscaler_config:
            aws_configs = ec2.describe_spot_fleet_requests(
                SpotFleetRequestIds=autoscaler_config['sfrs'])
            configs.extend([
                config['SpotFleetRequestConfig']
                for config in aws_configs['SpotFleetRequestConfigs']
            ])
        pool_manager = SimulatedPoolManager(self.metadata.cluster,
                                            self.metadata.pool, configs, self)
        metric_values = self.metrics_client.get_metric_values(
            'target_capacity',
            METADATA,
            self.start_time.timestamp,
            # metrics collector runs 1x/min, but we'll try to get five data points in case some data is missing
            self.start_time.shift(minutes=5).timestamp,
            use_cache=False,
            extra_dimensions=get_cluster_dimensions(self.metadata.cluster,
                                                    self.metadata.pool,
                                                    self.metadata.scheduler),
        )
        # take the earliest data point available - this is a Decimal, which doesn't play nicely, so convert to an int
        with patch_join_delay():
            actual_target_capacity = int(
                metric_values['target_capacity'][0][1])
            pool_manager.modify_target_capacity(actual_target_capacity,
                                                force=True,
                                                prune=False)

        for config in configs:
            for spec in config['LaunchSpecifications']:
                self.markets |= {get_instance_market(spec)}
        self.autoscaler = Autoscaler(
            self.metadata.cluster,
            self.metadata.pool,
            self.metadata.scheduler,
            [self.metadata.pool],
            pool_manager=pool_manager,
            metrics_client=self.metrics_client,
            monitoring_enabled=False,  # no sensu alerts during simulations
        )