Beispiel #1
0
    def upgrade(self):
        """
        Entry point for master upgrade.
        Support upgrade of:
            - Kubernetes versions;
            - AMI image;
            - Selected list of kube_env variables.
        """
        logger.info("Starting master upgrade!")
        ami_name = os.getenv("AX_AWS_IMAGE_NAME")
        assert ami_name, "Fail to detect AMI name from environ"
        ami_id = AMI(aws_region=self.region, aws_profile=self.profile).get_ami_id_from_name(ami_name=ami_name)
        logger.info("Using ami %s for new master", ami_id)

        s3_data = self.cluster_info.get_master_config(USER_DATA_FILE_S3)
        if s3_data is None:
            attr = None
        else:
            attr = json.loads(self.cluster_info.get_master_config(USER_DATA_FILE_S3))
        instance_id = self.discover_master()
        terminating = False
        launching = False
        if instance_id is None:
            # This is possible if previous upgrade fails after termination but before new master start.
            # Simply restart master in this case.
            # This could also happen when master crashes in the first place and upgrade is started.
            # We would use old config to start master and rerun upgrade again.
            logger.info("No running master. S3 attr %s.", USER_DATA_FILE_S3)
            assert attr is not None, "No master instance and no master config."
            self.attributes = attr
            self.attributes['user_data_file'] = USER_DATA_FILE_S3

            self.ensure_master_tags()
            self.save_master_config(USER_DATA_FILE_S3)
            launching = True
        else:
            self.master_instance = self.ec2.Instance(instance_id)
            logger.info("Running master %s.", instance_id)
            self.aws_image = ami_id
            self.instance_profile = AXClusterInstanceProfile(self.cluster_name_id, region_name=self.region,
                                                             aws_profile=self.profile).get_master_arn()
            self.populate_attributes()
            master_tag_updated = self.ensure_master_tags()
            # TODO: Possible race here.
            # If upgrade is interrupted after config saving but before master termination,
            # Next upgrade attempt would assume master is already upgraded.
            # Manually hack to terminate instance is needed then.
            if attr != self.attributes or self.user_data_updated() or master_tag_updated:
                self.save_master_config(USER_DATA_FILE_NEW)
                terminating = True
                launching = True

        if terminating:
            self.terminate_master()
            logger.info("Done terminating %s", instance_id)
        if launching:
            logger.info("Done launching %s", self.launch_new_master())
Beispiel #2
0
    def _update_launch_config(self,
                              old_name,
                              new_name,
                              retain_spot_price=False):
        """
        Upgrade old_name launch config to new_name.
        Return OK if new launch config is already created.
        Raise error if both old and new don't exist.
        """
        logger.info("Converting launch config %s to %s ...", old_name,
                    new_name)
        ami_name = os.getenv("AX_AWS_IMAGE_NAME")
        assert ami_name, "Fail to detect AMI name from environ"
        ami_id = AMI(
            aws_region=self._region,
            aws_profile=self._profile).get_ami_id_from_name(ami_name=ami_name)
        logger.info("Using ami %s for new minion launch configuration", ami_id)

        cluster_config = AXClusterConfig(cluster_name_id=self._cluster_name_id,
                                         aws_profile=self._profile)
        if LaunchConfig(new_name,
                        aws_profile=self._profile,
                        aws_region=self._region).get() is not None:
            # Launch config already updated, nop.
            logger.debug("New launch config %s already there. No creation.",
                         new_name)
            return

        lc = LaunchConfig(old_name,
                          aws_profile=self._profile,
                          aws_region=self._region)
        config = lc.get()
        assert config is not None, "Empty old launch config and new launch config"
        user_data = config.pop("UserData")
        logger.debug("Existing launch config %s: %s", old_name, config)

        updates = {
            "new_kube_version": self._new_kube_version,
            "new_cluster_install_version": self._new_cluster_install_version,
            "new_kube_server_hash": self._new_kube_server_hash,
            "new_kube_salt_hash": self._new_kube_salt_hash,
        }

        # Replace ImageId and everything listed in default_kube_up_env.
        config["ImageId"] = ami_id
        config["IamInstanceProfile"] = AXClusterInstanceProfile(
            self._cluster_name_id,
            aws_profile=self._profile).get_minion_instance_profile_name()
        user_data = zlib.decompressobj(32 +
                                       zlib.MAX_WBITS).decompress(user_data)
        user_data = kube_env_update(user_data, updates)
        comp = zlib.compressobj(9, zlib.DEFLATED, zlib.MAX_WBITS | 16)
        config["UserData"] = comp.compress(user_data) + comp.flush()

        # Add AX Volume device mappings.
        orig_block_devices = config.pop("BlockDeviceMappings")

        block_devices = []
        for device in orig_block_devices:
            if device["DeviceName"] != AX_VOL_DISK:
                block_devices.append(device)
        vol_device = {}
        vol_device["DeviceName"] = AX_VOL_DISK
        ebs_dict = {}
        ebs_dict["DeleteOnTermination"] = True
        ebs_dict["VolumeSize"] = cluster_config.get_ax_vol_size()
        ebs_dict["VolumeType"] = self._ax_vol_disk_type

        vol_device["Ebs"] = ebs_dict
        block_devices.append(vol_device)
        config["BlockDeviceMappings"] = block_devices
        logger.debug("New block device mappings: %s",
                     config["BlockDeviceMappings"])

        lc.copy(new_name, config, retain_spot_price=retain_spot_price)
        logger.info("Converting launch config %s to %s ... DONE.", old_name,
                    new_name)
    def _generate_raw_cluster_config_dict_aws(self, config):
        """
        Generate AWS specific cluster config.
        :param config:
        :return:
        """
        # TODO: once we support installing with config file, we only overwrite when item is specifically set through CLI
        config["cloud"]["configure"]["region"] = self._cfg.cloud_region
        config["cloud"]["configure"]["placement"] = self._cfg.cloud_placement
        config["cloud"]["trusted_cidr"] = self._cfg.trusted_cidrs
        config["cloud"]["vpc_id"] = self._cfg.vpc_id

        # If we install into existing VPC, i.e. vpc_id is not None, or we are going to fetch it
        # from cluster metadata after cluster is created.
        config["cloud"][
            "vpc_cidr_base"] = self._cfg.vpc_cidr_base if not self._cfg.vpc_id else None
        config["cloud"]["subnet_size"] = self._cfg.subnet_mask_size
        config["cloud"]["configure"][
            "sandbox_enabled"] = self._cfg.enable_sandbox

        # TODO (#119): might want to remove this filed as this was used for hacks before. Setting it to "dev" for now
        config["cloud"]["configure"]["cluster_user"] = "******"

        # TODO (#117): Switch all spot related options by literals rather than true/false and some other hacks
        # also need to revise the need of specifying a spot price during installation
        if self._cfg.spot_instances_option in [
                SpotInstanceOption.PARTIAL_SPOT, SpotInstanceOption.ALL_SPOT
        ]:
            spot_instances_enabled = "true"
        else:
            spot_instances_enabled = "false"
        config["cloud"]["configure"][
            "spot_instances_enabled"] = spot_instances_enabled
        config["cloud"]["configure"][
            "spot_instances_option"] = self._cfg.spot_instances_option
        config["cloud"]["node_spot_price"] = DEFAULT_NODE_SPOT_PRICE

        # Configure master
        axsys_node_type = config["cloud"]["configure"]["axsys_node_type"]
        axsys_node_max = config["cloud"]["configure"]["axsys_node_count"]
        axuser_node_type = config["cloud"]["configure"]["axuser_node_type"]
        axuser_node_max = config["cloud"]["configure"][
            "max_node_count"] - axsys_node_max
        cluster_type = config["cloud"]["configure"]["cluster_type"]
        if self._cfg.cluster_size != AXClusterSize.CLUSTER_USER_PROVIDED:
            master_config = KubeMasterResourceConfig(
                usr_node_type=axuser_node_type,
                usr_node_max=axuser_node_max,
                ax_node_type=axsys_node_type,
                ax_node_max=axsys_node_max,
                cluster_type=cluster_type)
            if self._cfg.cluster_size == AXClusterSize.CLUSTER_MVC:
                # MVC cluster does not follow the heuristics we used to configure master
                config["cloud"]["configure"]["master_type"] = "m3.xlarge"
            else:
                config["cloud"]["configure"][
                    "master_type"] = master_config.master_instance_type
            config["cloud"]["configure"][
                "master_config_env"] = master_config.kube_up_env

        # TODO (#121) Need to revise the relationship between user_on_demand_nodes and node minimum, system node count
        config["cloud"]["configure"][
            "axuser_on_demand_nodes"] = self._cfg.user_on_demand_nodes

        # Get AMI information
        ami_name = self._cfg.software_info.ami_name
        ami_id = AMI(aws_profile=self._cfg.cloud_profile,
                     aws_region=self._cfg.cloud_region).get_ami_id_from_name(
                         ami_name=ami_name)
        config["cloud"]["configure"]["ami_id"] = ami_id

        # Other configurations
        config["cloud"]["configure"]["autoscaler_scan_interval"] = str(
            self._cfg.autoscaling_interval) + "s"
        config["cloud"]["configure"]["support_object_store_name"] = str(
            self._cfg.support_object_store_name)

        return config
    def update_cluster_config(self):
        """
        Upgrade the cluster config in S3 such that it has all required fields.
        """
        logger.info("Updating cluster config!")
        cluster_config = AXClusterConfig(cluster_name_id=self._cluster_name_id,
                                         aws_profile=self._profile)
        cluster_info = AXClusterInfo(cluster_name_id=self._cluster_name_id,
                                     aws_profile=self._profile)

        # Separate axsys / axuser config if needed
        update_node_config_key_needed = False
        try:
            # New cluster config is looking for "max_node_count" for this method and
            # should throw KeyError if the cluster config in s3 was the old one
            cluster_config.get_max_node_count()
        except KeyError:
            update_node_config_key_needed = True

        if update_node_config_key_needed:
            logger.info("Updating node config keys ...")
            # Parse old raw config directly
            minion_type = cluster_config._conf["cloud"]["configure"][
                "minion_type"]
            max_count = cluster_config._conf["cloud"]["configure"]["max_count"]
            min_count = cluster_config._conf["cloud"]["configure"]["min_count"]
            axsys_count = cluster_config._conf["cloud"]["configure"][
                "axsys_nodes"]

            # Remove all old keys
            for old_key in [
                    "minion_type", "max_count", "min_count", "axsys_nodes"
            ]:
                cluster_config._conf["cloud"]["configure"].pop(old_key, None)

            # Setting new keys
            cluster_config._conf["cloud"]["configure"][
                "axsys_node_count"] = axsys_count
            cluster_config._conf["cloud"]["configure"][
                "max_node_count"] = max_count
            cluster_config._conf["cloud"]["configure"][
                "min_node_count"] = min_count

            # All clusters that needs this upgrade has same node type for axsys and axuser
            cluster_config._conf["cloud"]["configure"][
                "axuser_node_type"] = minion_type
            cluster_config._conf["cloud"]["configure"][
                "axsys_node_type"] = minion_type
        else:
            logger.info("Node config keys are already up-to-date")

        # If cluster type is not set, default it to standard type
        if cluster_config.get_ax_cluster_type() == None:
            cluster_config._conf["cloud"]["configure"][
                "cluster_type"] = AXClusterType.STANDARD

        # Check and update Cluster user. Defaults to "customer"
        if cluster_config.get_ax_cluster_user() is None:
            cluster_config.set_ax_cluster_user('customer')

        # Check and update Cluster size. Defaults to "small"
        if cluster_config.get_ax_cluster_size() is None:
            max_count = cluster_config.get_max_node_count()
            if max_count == 5:
                cluster_size = "small"
            elif max_count == 10:
                cluster_size = "medium"
            elif max_count == 21:
                cluster_size = "large"
            elif max_count == 30:
                cluster_size = "xlarge"
            else:
                cluster_size = "small"
            cluster_config.set_ax_cluster_size(cluster_size)

        # Check and update AX Volume size. Note that this has to come *AFTER* the cluster_size is set.
        if cluster_config.get_ax_vol_size() is None:
            cluster_size = cluster_config.get_ax_cluster_size()
            if cluster_size in ("small", "medium"):
                vol_size = 100
            elif cluster_size == "large":
                vol_size = 200
            elif cluster_size == "xlarge":
                vol_size = 400
            else:
                vol_size = 100
            cluster_config.set_ax_vol_size(vol_size)

        # Ensure that we have 3 tiers now
        cluster_config.set_node_tiers("master/applatix/user")

        # set new ami id
        ami_name = os.getenv("AX_AWS_IMAGE_NAME")
        ami_id = AMI(
            aws_region=self._region,
            aws_profile=self._profile).get_ami_id_from_name(ami_name=ami_name)
        logger.info("Updating cluster config with ami %s", ami_id)
        cluster_config.set_ami_id(ami_id)

        cluster_config.save_config()