def get_all_groups(self, kube_nodes): groups = [] for region in self.regions: client = self.session.client(self._BOTO_CLIENT_TYPE, region_name=region) raw_groups = aws_utils.fetch_all( client.describe_auto_scaling_groups, {}, 'AutoScalingGroups') launch_configs = self.get_all_launch_configs(client, raw_groups) for raw_group in sorted(raw_groups, key=lambda g: g['AutoScalingGroupName']): if self.cluster_name: cluster_name = None role = None for tag in raw_group['Tags']: if tag['Key'] == self._CLUSTER_KEY: cluster_name = tag['Value'] elif tag['Key'] in self._ROLE_KEYS: role = tag['Value'] if cluster_name != self.cluster_name or role not in self._WORKER_ROLE_VALUES: continue groups.append( AutoScalingGroup( client, region, kube_nodes, raw_group, launch_configs[raw_group['LaunchConfigurationName']])) return groups
def get_all_raw_groups_and_launch_configs(client): raw_groups = aws_utils.fetch_all(client.describe_auto_scaling_groups, {'MaxRecords': 100}, 'AutoScalingGroups') all_launch_configs = {} batch_size = 50 for launch_config_idx in range(0, len(raw_groups), batch_size): groups = raw_groups[launch_config_idx * batch_size:(launch_config_idx + 1) * batch_size] kwargs = { 'LaunchConfigurationNames': [g['LaunchConfigurationName'] for g in groups] } launch_configs = aws_utils.fetch_all( client.describe_launch_configurations, kwargs, 'LaunchConfigurations') all_launch_configs.update( (lc['LaunchConfigurationName'], lc) for lc in launch_configs) return raw_groups, all_launch_configs
def get_all_launch_configs(self, client, raw_groups): all_launch_configs = {} batch_size = 50 for launch_config_idx in range(0, len(raw_groups), batch_size): groups = raw_groups[launch_config_idx * batch_size:(launch_config_idx + 1) * batch_size] kwargs = { 'LaunchConfigurationNames': [g['LaunchConfigurationName'] for g in groups] } launch_configs = aws_utils.fetch_all( client.describe_launch_configurations, kwargs, 'LaunchConfigurations') all_launch_configs.update( (lc['LaunchConfigurationName'], lc) for lc in launch_configs) return all_launch_configs
def time_out_spot_asgs(self, asgs): """ Using recent spot pricing data from AWS, time out spot instance ASGs that would be outbid for more than _MAX_OUTBIDS_IN_INTERVAL seconds """ region_instance_asg_map = {} for asg in asgs: if not asg.is_spot: continue instance_asg_map = region_instance_asg_map.setdefault( asg.region, {}) instance_type = asg.launch_config['InstanceType'] instance_asg_map.setdefault(instance_type, []).append(asg) now = datetime.datetime.now(pytz.utc) since = now - datetime.timedelta(seconds=self._SPOT_HISTORY_PERIOD) for region, instance_asg_map in region_instance_asg_map.items(): # Expire old history history = [ item for item in self._spot_price_history.get(region, []) if item['Timestamp'] > since ] if history: newest_spot_price = max(item['Timestamp'] for item in history) else: newest_spot_price = since client = self.session.client('ec2', region_name=region) kwargs = { 'StartTime': newest_spot_price, 'InstanceTypes': list(instance_asg_map.keys()), 'ProductDescriptions': ['Linux/UNIX'] } history.extend( aws_utils.fetch_all(client.describe_spot_price_history, kwargs, 'SpotPriceHistory')) self._spot_price_history[region] = history for instance_type, asgs in instance_asg_map.items(): for asg in asgs: last_az_bid = {} outbid_time = {} bid_price = float(asg.launch_config['SpotPrice']) for item in history: if item['InstanceType'] != instance_type: continue if float(item['SpotPrice']) > bid_price: # we would've been outbid! if item['AvailabilityZone'] in last_az_bid: time_diff = ( last_az_bid[item['AvailabilityZone']] - item['Timestamp']) else: time_diff = datetime.timedelta(seconds=0) outbid_time[item['AvailabilityZone']] = ( outbid_time.get( item['AvailabilityZone'], datetime.timedelta(seconds=0)) + time_diff) last_az_bid[ item['AvailabilityZone']] = item['Timestamp'] if outbid_time: avg_outbid_time = sum( t.total_seconds() for t in outbid_time.values()) / len(outbid_time) else: avg_outbid_time = 0.0 if avg_outbid_time > self._MAX_OUTBIDS_IN_INTERVAL: self._spot_timeouts[ asg._id] = now + datetime.timedelta( seconds=self._TIMEOUT) logger.info( '%s (%s) is spot timed out until %s (would have been outbid for %ss on average)', asg.name, asg.region, self._spot_timeouts[asg._id], avg_outbid_time) else: self._spot_timeouts[asg._id] = None