def upgrade(self): """ Entry point for master upgrade. Support upgrade of: - Kubernetes versions; - AMI image; - Selected list of kube_env variables. """ logger.info("Starting master upgrade!") ami_name = os.getenv("AX_AWS_IMAGE_NAME") assert ami_name, "Fail to detect AMI name from environ" ami_id = AMI(aws_region=self.region, aws_profile=self.profile).get_ami_id_from_name(ami_name=ami_name) logger.info("Using ami %s for new master", ami_id) s3_data = self.cluster_info.get_master_config(USER_DATA_FILE_S3) if s3_data is None: attr = None else: attr = json.loads(self.cluster_info.get_master_config(USER_DATA_FILE_S3)) instance_id = self.discover_master() terminating = False launching = False if instance_id is None: # This is possible if previous upgrade fails after termination but before new master start. # Simply restart master in this case. # This could also happen when master crashes in the first place and upgrade is started. # We would use old config to start master and rerun upgrade again. logger.info("No running master. S3 attr %s.", USER_DATA_FILE_S3) assert attr is not None, "No master instance and no master config." self.attributes = attr self.attributes['user_data_file'] = USER_DATA_FILE_S3 self.ensure_master_tags() self.save_master_config(USER_DATA_FILE_S3) launching = True else: self.master_instance = self.ec2.Instance(instance_id) logger.info("Running master %s.", instance_id) self.aws_image = ami_id self.instance_profile = AXClusterInstanceProfile(self.cluster_name_id, region_name=self.region, aws_profile=self.profile).get_master_arn() self.populate_attributes() master_tag_updated = self.ensure_master_tags() # TODO: Possible race here. # If upgrade is interrupted after config saving but before master termination, # Next upgrade attempt would assume master is already upgraded. # Manually hack to terminate instance is needed then. if attr != self.attributes or self.user_data_updated() or master_tag_updated: self.save_master_config(USER_DATA_FILE_NEW) terminating = True launching = True if terminating: self.terminate_master() logger.info("Done terminating %s", instance_id) if launching: logger.info("Done launching %s", self.launch_new_master())
def _update_launch_config(self, old_name, new_name, retain_spot_price=False): """ Upgrade old_name launch config to new_name. Return OK if new launch config is already created. Raise error if both old and new don't exist. """ logger.info("Converting launch config %s to %s ...", old_name, new_name) ami_name = os.getenv("AX_AWS_IMAGE_NAME") assert ami_name, "Fail to detect AMI name from environ" ami_id = AMI( aws_region=self._region, aws_profile=self._profile).get_ami_id_from_name(ami_name=ami_name) logger.info("Using ami %s for new minion launch configuration", ami_id) cluster_config = AXClusterConfig(cluster_name_id=self._cluster_name_id, aws_profile=self._profile) if LaunchConfig(new_name, aws_profile=self._profile, aws_region=self._region).get() is not None: # Launch config already updated, nop. logger.debug("New launch config %s already there. No creation.", new_name) return lc = LaunchConfig(old_name, aws_profile=self._profile, aws_region=self._region) config = lc.get() assert config is not None, "Empty old launch config and new launch config" user_data = config.pop("UserData") logger.debug("Existing launch config %s: %s", old_name, config) updates = { "new_kube_version": self._new_kube_version, "new_cluster_install_version": self._new_cluster_install_version, "new_kube_server_hash": self._new_kube_server_hash, "new_kube_salt_hash": self._new_kube_salt_hash, } # Replace ImageId and everything listed in default_kube_up_env. config["ImageId"] = ami_id config["IamInstanceProfile"] = AXClusterInstanceProfile( self._cluster_name_id, aws_profile=self._profile).get_minion_instance_profile_name() user_data = zlib.decompressobj(32 + zlib.MAX_WBITS).decompress(user_data) user_data = kube_env_update(user_data, updates) comp = zlib.compressobj(9, zlib.DEFLATED, zlib.MAX_WBITS | 16) config["UserData"] = comp.compress(user_data) + comp.flush() # Add AX Volume device mappings. orig_block_devices = config.pop("BlockDeviceMappings") block_devices = [] for device in orig_block_devices: if device["DeviceName"] != AX_VOL_DISK: block_devices.append(device) vol_device = {} vol_device["DeviceName"] = AX_VOL_DISK ebs_dict = {} ebs_dict["DeleteOnTermination"] = True ebs_dict["VolumeSize"] = cluster_config.get_ax_vol_size() ebs_dict["VolumeType"] = self._ax_vol_disk_type vol_device["Ebs"] = ebs_dict block_devices.append(vol_device) config["BlockDeviceMappings"] = block_devices logger.debug("New block device mappings: %s", config["BlockDeviceMappings"]) lc.copy(new_name, config, retain_spot_price=retain_spot_price) logger.info("Converting launch config %s to %s ... DONE.", old_name, new_name)
def _generate_raw_cluster_config_dict_aws(self, config): """ Generate AWS specific cluster config. :param config: :return: """ # TODO: once we support installing with config file, we only overwrite when item is specifically set through CLI config["cloud"]["configure"]["region"] = self._cfg.cloud_region config["cloud"]["configure"]["placement"] = self._cfg.cloud_placement config["cloud"]["trusted_cidr"] = self._cfg.trusted_cidrs config["cloud"]["vpc_id"] = self._cfg.vpc_id # If we install into existing VPC, i.e. vpc_id is not None, or we are going to fetch it # from cluster metadata after cluster is created. config["cloud"][ "vpc_cidr_base"] = self._cfg.vpc_cidr_base if not self._cfg.vpc_id else None config["cloud"]["subnet_size"] = self._cfg.subnet_mask_size config["cloud"]["configure"][ "sandbox_enabled"] = self._cfg.enable_sandbox # TODO (#119): might want to remove this filed as this was used for hacks before. Setting it to "dev" for now config["cloud"]["configure"]["cluster_user"] = "******" # TODO (#117): Switch all spot related options by literals rather than true/false and some other hacks # also need to revise the need of specifying a spot price during installation if self._cfg.spot_instances_option in [ SpotInstanceOption.PARTIAL_SPOT, SpotInstanceOption.ALL_SPOT ]: spot_instances_enabled = "true" else: spot_instances_enabled = "false" config["cloud"]["configure"][ "spot_instances_enabled"] = spot_instances_enabled config["cloud"]["configure"][ "spot_instances_option"] = self._cfg.spot_instances_option config["cloud"]["node_spot_price"] = DEFAULT_NODE_SPOT_PRICE # Configure master axsys_node_type = config["cloud"]["configure"]["axsys_node_type"] axsys_node_max = config["cloud"]["configure"]["axsys_node_count"] axuser_node_type = config["cloud"]["configure"]["axuser_node_type"] axuser_node_max = config["cloud"]["configure"][ "max_node_count"] - axsys_node_max cluster_type = config["cloud"]["configure"]["cluster_type"] if self._cfg.cluster_size != AXClusterSize.CLUSTER_USER_PROVIDED: master_config = KubeMasterResourceConfig( usr_node_type=axuser_node_type, usr_node_max=axuser_node_max, ax_node_type=axsys_node_type, ax_node_max=axsys_node_max, cluster_type=cluster_type) if self._cfg.cluster_size == AXClusterSize.CLUSTER_MVC: # MVC cluster does not follow the heuristics we used to configure master config["cloud"]["configure"]["master_type"] = "m3.xlarge" else: config["cloud"]["configure"][ "master_type"] = master_config.master_instance_type config["cloud"]["configure"][ "master_config_env"] = master_config.kube_up_env # TODO (#121) Need to revise the relationship between user_on_demand_nodes and node minimum, system node count config["cloud"]["configure"][ "axuser_on_demand_nodes"] = self._cfg.user_on_demand_nodes # Get AMI information ami_name = self._cfg.software_info.ami_name ami_id = AMI(aws_profile=self._cfg.cloud_profile, aws_region=self._cfg.cloud_region).get_ami_id_from_name( ami_name=ami_name) config["cloud"]["configure"]["ami_id"] = ami_id # Other configurations config["cloud"]["configure"]["autoscaler_scan_interval"] = str( self._cfg.autoscaling_interval) + "s" config["cloud"]["configure"]["support_object_store_name"] = str( self._cfg.support_object_store_name) return config
def update_cluster_config(self): """ Upgrade the cluster config in S3 such that it has all required fields. """ logger.info("Updating cluster config!") cluster_config = AXClusterConfig(cluster_name_id=self._cluster_name_id, aws_profile=self._profile) cluster_info = AXClusterInfo(cluster_name_id=self._cluster_name_id, aws_profile=self._profile) # Separate axsys / axuser config if needed update_node_config_key_needed = False try: # New cluster config is looking for "max_node_count" for this method and # should throw KeyError if the cluster config in s3 was the old one cluster_config.get_max_node_count() except KeyError: update_node_config_key_needed = True if update_node_config_key_needed: logger.info("Updating node config keys ...") # Parse old raw config directly minion_type = cluster_config._conf["cloud"]["configure"][ "minion_type"] max_count = cluster_config._conf["cloud"]["configure"]["max_count"] min_count = cluster_config._conf["cloud"]["configure"]["min_count"] axsys_count = cluster_config._conf["cloud"]["configure"][ "axsys_nodes"] # Remove all old keys for old_key in [ "minion_type", "max_count", "min_count", "axsys_nodes" ]: cluster_config._conf["cloud"]["configure"].pop(old_key, None) # Setting new keys cluster_config._conf["cloud"]["configure"][ "axsys_node_count"] = axsys_count cluster_config._conf["cloud"]["configure"][ "max_node_count"] = max_count cluster_config._conf["cloud"]["configure"][ "min_node_count"] = min_count # All clusters that needs this upgrade has same node type for axsys and axuser cluster_config._conf["cloud"]["configure"][ "axuser_node_type"] = minion_type cluster_config._conf["cloud"]["configure"][ "axsys_node_type"] = minion_type else: logger.info("Node config keys are already up-to-date") # If cluster type is not set, default it to standard type if cluster_config.get_ax_cluster_type() == None: cluster_config._conf["cloud"]["configure"][ "cluster_type"] = AXClusterType.STANDARD # Check and update Cluster user. Defaults to "customer" if cluster_config.get_ax_cluster_user() is None: cluster_config.set_ax_cluster_user('customer') # Check and update Cluster size. Defaults to "small" if cluster_config.get_ax_cluster_size() is None: max_count = cluster_config.get_max_node_count() if max_count == 5: cluster_size = "small" elif max_count == 10: cluster_size = "medium" elif max_count == 21: cluster_size = "large" elif max_count == 30: cluster_size = "xlarge" else: cluster_size = "small" cluster_config.set_ax_cluster_size(cluster_size) # Check and update AX Volume size. Note that this has to come *AFTER* the cluster_size is set. if cluster_config.get_ax_vol_size() is None: cluster_size = cluster_config.get_ax_cluster_size() if cluster_size in ("small", "medium"): vol_size = 100 elif cluster_size == "large": vol_size = 200 elif cluster_size == "xlarge": vol_size = 400 else: vol_size = 100 cluster_config.set_ax_vol_size(vol_size) # Ensure that we have 3 tiers now cluster_config.set_node_tiers("master/applatix/user") # set new ami id ami_name = os.getenv("AX_AWS_IMAGE_NAME") ami_id = AMI( aws_region=self._region, aws_profile=self._profile).get_ami_id_from_name(ami_name=ami_name) logger.info("Updating cluster config with ami %s", ami_id) cluster_config.set_ami_id(ami_id) cluster_config.save_config()