コード例 #1
0
class AXVersion(object):
    def __init__(self, customer_id, cluster_name_id, aws_profile):
        self._customer_id = customer_id
        self._cluster_name_id = cluster_name_id
        self._cluster_name = AXClusterId(cluster_name_id).get_cluster_name()
        self._aws_profile = aws_profile

        cluster_bucket_name = AXClusterConfigPath(cluster_name_id).bucket()
        self._cluster_bucket = Cloud().get_bucket(
            cluster_bucket_name, aws_profile=self._aws_profile)

        support_bucket_name = AXSupportConfigPath(cluster_name_id).bucket()
        self._support_bucket = Cloud().get_bucket(
            support_bucket_name, aws_profile=self._aws_profile)

    def update(self, new):
        self._report_version_to_s3(new)

    def _get_current_version(self):
        # TODO: combine cluster bucket operations to AXClusterInfo object
        data = self._cluster_bucket.get_object(
            key=AXClusterConfigPath(self._cluster_name_id).versions())
        return yaml.load(data) if data else {}

    def _report_version_to_s3(self, new):
        old = self._get_current_version()
        history = {"from": old, "to": new}
        # Update current version in cluster bucket.
        cluster_version_key = AXClusterConfigPath(
            self._cluster_name_id).versions()
        self._cluster_bucket.put_object(cluster_version_key,
                                        yaml.dump(new),
                                        ACL="bucket-owner-full-control")

        # Update current version in support bucket.
        support_version_key = AXSupportConfigPath(
            self._cluster_name_id).current_versions()
        self._support_bucket.put_object(support_version_key,
                                        yaml.dump(new),
                                        ACL="bucket-owner-full-control")

        # Update version history in support bucket.
        support_version_history_key = AXSupportConfigPath(
            self._cluster_name_id).version_history()
        self._support_bucket.put_object(support_version_history_key,
                                        yaml.dump(history),
                                        ACL="bucket-owner-full-control")
コード例 #2
0
class AXClusterId(with_metaclass(Singleton, object)):
    def __init__(self, name=None, aws_profile=None):
        self._input_name = name
        self._aws_profile = aws_profile

        # Cluster id related bucket and path info should be self-contained rather than
        # using config_s3_path object. Because config_s3_path needs both cluster name
        # and id to initialize. In case we haven't get cluster id yet, singletons in
        # config_s3_path cannot be properly initialized.
        self._bucket_template = "applatix-cluster-{account}-{seq}"
        self._cluster_id_bucket_path_template = "{name}/id"

        # Set bucket
        self._customer_id = AXCustomerId().get_customer_id()
        self._bucket_name = self._bucket_template.format(
            account=self._customer_id, seq=0)
        self._bucket = None

        # These values will be set when user calls get/create cluster name id
        self._cluster_name = None
        self._cluster_id = None
        self._cluster_name_id = None

    def create_cluster_name_id(self):
        """
        User input cluster name in format of "<name>" or "<name>-<id>", and this function creates
        a record in S3. If he name caller passed in does not include an ID, we generate one.

        If we already have a cluster name/id record in s3, this function should not be called to avoid
        existing clusters's records to get overridden
        :return: <cluster-name>-<cluster-id>
        """
        assert not self._cluster_name_id, "Cluster {} has it's name id already created".format(
            self._cluster_name_id)
        assert self._input_name, "Must provide input name to create cluster name id"
        name, cid = self._format_name_id(self._input_name)
        if cid is None:
            logger.info("Cluster id not provided, generate one.")
            if Cloud().target_cloud_gcp():
                cid = str(uuid.uuid4())[:8]
            elif Cloud().target_cloud_aws():
                cid = str(uuid.uuid1())
            else:
                assert False, "Must provide valid target cloud to create cluster name id. Currently target cloud is set to {}".format(
                    Cloud().target_cloud())
        logger.info("Created new name-id %s", name + "-" + cid)

        # fill in cluster name id info
        self._cluster_name = name
        self._cluster_id = cid
        self._cluster_name_id = self._cluster_name + "-" + self._cluster_id
        return self._cluster_name_id

    def upload_cluster_name_id(self):
        """
        This function assumes cluster_name_id has been created already
        """
        logger.info("Uploading cluster name-id record to S3 ...")
        self._load_cluster_name_id_if_needed()
        self._instantiate_bucket_if_needed()
        id_key = self._cluster_id_bucket_path_template.format(
            name=self._cluster_name)
        self._bucket.put_object(id_key, self._cluster_id)
        logger.info("Uploaded cluster name (%s) and cluster id (%s) to S3",
                    self._cluster_name, self._cluster_id)

    def get_cluster_name_id(self):
        """
        This function assumes cluster name/id record is created. It first looks for
        AX_CLUSTER_NAME_ID env, if not set, it looks up cluster id from s3.
        :return" cluster_name_id
        """
        self._load_cluster_name_id_if_needed()
        return self._cluster_name_id

    def get_cluster_name(self):
        self._load_cluster_name_id_if_needed()
        return self._cluster_name

    def get_cluster_id(self):
        self._load_cluster_name_id_if_needed()
        return self._cluster_id

    def get_cluster_id_s3_key(self):
        self._load_cluster_name_id_if_needed()
        return self._cluster_id_bucket_path_template.format(
            name=self._cluster_name)

    def _load_cluster_name_id_if_needed(self):
        if not self._cluster_name_id:
            self._load_cluster_name_id()

    def _instantiate_bucket_if_needed(self):
        if not self._bucket:
            logger.info("Instantiating cluster bucket ...")
            self._bucket = Cloud().get_bucket(self._bucket_name,
                                              aws_profile=self._aws_profile)
            assert self._bucket.exists(), "Bucket {} not created yet".format(
                self._bucket.get_bucket_name())

    def _load_cluster_name_id(self):
        """
        This function assumes cluster name/id record is created. It first looks for
        AX_CLUSTER_NAME_ID env, if not set, it looks up cluster id from s3.

        This function sets cluster_name_id, cluster_name, and cluster_id
        """
        # Try to get from env first
        name_id = os.getenv(CLUSTER_NAME_ID_ENV_NAME, None)
        if name_id:
            logger.info("Found cluster name id in env: %s", name_id)
            self._cluster_name_id = name_id
            self._cluster_name, self._cluster_id = self._format_name_id(
                self._cluster_name_id)

            # NOTE: if we find some cluster name id we cannot even parse from env, we still fail
            # directly even though it is possible that we might find something valid from s3 bucket,
            # as the program that brings up program (i.e. axinstaller) is already having trouble in
            # such case, which is already alerting
            assert self._cluster_name and self._cluster_id, "Failed to load cluster name and cluster id from env"
        else:
            self._lookup_id_from_bucket()
            assert self._cluster_name and self._cluster_id, "Failed to load cluster name and cluster id from bucket"
            self._cluster_name_id = "{}-{}".format(self._cluster_name,
                                                   self._cluster_id)

    def _lookup_id_from_bucket(self):
        name, requested_cid = self._format_name_id(self._input_name)

        # Look up assumes bucket already exists, so there is no need to pass region
        # If bucket does not exist, AXS3Bucket will throw exception
        self._instantiate_bucket_if_needed()
        id_s3_key = self._cluster_id_bucket_path_template.format(name=name)
        cid = str(self._bucket.get_object(id_s3_key)).strip()
        if cid != "None":
            logger.info("Found existing cluster name %s-%s", name, cid)
            if cid != requested_cid:
                logger.info(
                    "Ignore requested cluster ID (%s). Real cluster id: %s",
                    requested_cid, cid)
            self._cluster_name = name
            self._cluster_id = cid
        else:
            logger.info("Cannot find cluster name/id mapping from bucket")
            if requested_cid:
                logger.info(
                    "Using user defined cluster name: %s, cluster id: %s",
                    name, requested_cid)
                self._cluster_name = name
                self._cluster_id = requested_cid

    @staticmethod
    def _format_name_id(input_name):
        if Cloud().target_cloud_aws():
            return AXClusterNameIdParser.parse_cluster_name_id_aws(input_name)
        elif Cloud().target_cloud_gcp():
            return AXClusterNameIdParser.parse_cluster_name_id_gcp(input_name)
        else:
            assert False, "Invalid cloud provider: {}. Only aws and gcp are supported".format(
                Cloud().target_cloud())
コード例 #3
0
class AXClusterConfig(with_metaclass(Singleton, object)):
    def __init__(self, cluster_name_id=None, aws_profile=None, config=None):
        self._cluster_name_id = AXClusterId(
            name=cluster_name_id,
            aws_profile=aws_profile).get_cluster_name_id()
        self._bucket_name = AXClusterConfigPath(self._cluster_name_id).bucket()
        self._bucket = Cloud().get_bucket(self._bucket_name,
                                          aws_profile=aws_profile)
        self._cluster_config_key = AXClusterConfigPath(
            self._cluster_name_id).cluster_config()
        self._conf = config

    def _load_config(self):
        if not self._conf:
            data = self._bucket.get_object(self._cluster_config_key)
            assert data is not None, "No cluster config data get at {}/{}".format(
                self._bucket_name, self._cluster_config_key)
            self._conf = json.loads(data.decode("utf-8"))

    def set_config(self, config):
        """
        Set a config generated by upper layer
        :param config:
        :return:
        """
        assert config, "Cannot set cluster config to None"
        self._conf = config

    def save_config(self):
        """
        Upload config to s3
        :return:
        """
        logger.info("Uploading cluster config to s3 at %s ...",
                    self._cluster_config_key)
        if not self._conf:
            logger.warning(
                "AXClusterConfig is not initialized with a valid config, NOT uploading."
            )
            return
        if not self._bucket.put_object(key=self._cluster_config_key,
                                       data=json.dumps(self._conf)):
            raise AXPlatformException(
                "Failed to upload cluster config for {}".format(
                    self._cluster_name_id))
        logger.info("Uploading cluster config to s3 ... DONE")

    def get_raw_config(self):
        """
        This method is mainly used for debugging purpsoe
        :return:
        """
        self._load_config()
        return self._conf

    def get_region(self):
        self._load_config()
        return str(self._conf["cloud"]["configure"]["region"])

    def get_zone(self):
        self._load_config()
        return str(self._conf["cloud"]["configure"]["placement"])

    def get_master_type(self):
        self._load_config()
        return str(self._conf["cloud"]["configure"]["master_type"])

    def get_axsys_node_type(self):
        self._load_config()
        return str(self._conf["cloud"]["configure"]["axsys_node_type"])

    def get_axuser_node_type(self):
        self._load_config()
        return str(self._conf["cloud"]["configure"]["axuser_node_type"])

    def get_max_node_count(self):
        self._load_config()
        return str(self._conf["cloud"]["configure"]["max_node_count"])

    def get_min_node_count(self):
        self._load_config()
        return str(self._conf["cloud"]["configure"]["min_node_count"])

    def get_asxys_node_count(self):
        self._load_config()
        try:
            return str(self._conf["cloud"]["configure"]["axsys_node_count"])
        except KeyError as ke:
            return "0"

    def get_axuser_on_demand_count(self):
        self._load_config()
        try:
            return str(
                self._conf["cloud"]["configure"]["axuser_on_demand_nodes"])
        except KeyError as ke:
            return "0"

    def get_autoscaler_scan_interval(self):
        self._load_config()
        try:
            return self._conf["cloud"]["configure"]["autoscaler_scan_interval"]
        except KeyError as ke:
            return "10s"

    def get_support_object_store_name(self):
        self._load_config()
        return str(
            self._conf.get("cloud",
                           {}).get("configure",
                                   {}).get("support_object_store_name", ""))

    def get_provider(self):
        self._load_config()
        return str(self._conf["cloud"]["provider"])

    def get_trusted_cidr(self):
        self._load_config()
        return self._conf["cloud"]["trusted_cidr"]

    def get_vpc_cidr_base(self):
        self._load_config()
        return str(self._conf["cloud"]["vpc_cidr_base"])

    def get_node_tiers(self):
        self._load_config()
        return self._conf["cloud"]["configure"].get("node_tiers", None)

    def get_node_config(self):
        """ Get entire config of the node """
        self._load_config()
        return self._conf["cloud"]["configure"]

    def get_ax_vol_size(self):
        self._load_config()
        return self._conf["cloud"]["configure"].get("ax_vol_size", None)

    def get_ax_cluster_user(self):
        self._load_config()
        return self._conf["cloud"]["configure"].get("cluster_user", None)

    def get_ax_cluster_size(self):
        self._load_config()
        return self._conf["cloud"]["configure"].get("cluster_size", None)

    def get_ax_cluster_type(self):
        self._load_config()
        return self._conf["cloud"]["configure"].get("cluster_type",
                                                    AXClusterType.STANDARD)

    def get_user_node_resouce_rsvp(self, key):
        self._load_config()
        if not self._conf["cloud"]["configure"].get("user_node_resource_rsvp",
                                                    None):
            return -1
        return self._conf["cloud"]["configure"]["user_node_resource_rsvp"].get(
            key, -1)

    def get_sandbox_flag(self):
        self._load_config()
        return self._conf["cloud"]["configure"].get("sandbox_enabled", None)

    def get_master_pod_cidr(self):
        self._load_config()
        return self._conf["cloud"]["kube_installer_config"].get(
            "MASTER_IP_RANGE", None)

    def get_ami_id(self):
        self._load_config()
        return self._conf["cloud"]["configure"].get("ami_id", None)

    def get_spot_instances_enabled(self):
        return self._conf["cloud"]["configure"].get("spot_instances_enabled",
                                                    None)

    def get_spot_instances_option(self):
        return self._conf["cloud"]["configure"].get("spot_instances_option",
                                                    "partial")

    def get_node_spot_price(self):
        return self._conf["cloud"].get("node_spot_price", None)

    def get_subnet_size(self):
        return self._conf["cloud"].get("subnet_size", None)

    def get_vpc_id(self):
        return self._conf["cloud"].get("vpc_id", None)

    def get_master_config_env(self):
        return self._conf["cloud"]["configure"]["master_config_env"]

    def get_kube_installer_config(self):
        return self._conf["cloud"]["kube_installer_config"]

    # Setters. Currently only a very limited items is mutable
    def set_ax_cluster_user(self, user):
        self._conf["cloud"]["configure"]["cluster_user"] = user

    def set_ax_vol_size(self, vol_size):
        self._conf["cloud"]["configure"]["ax_vol_size"] = vol_size

    def set_ax_cluster_size(self, cluster_size):
        self._conf["cloud"]["configure"]["ax_cluster_size"] = cluster_size

    def set_user_node_resource_rsvp(self, cpu, mem):
        if not self._conf["cloud"]["configure"].get("user_node_resource_rsvp",
                                                    None):
            self._conf["cloud"]["configure"]["user_node_resource_rsvp"] = {}
        if cpu >= 0:
            self._conf["cloud"]["configure"]["user_node_resource_rsvp"][
                "cpu"] = cpu
        if mem >= 0:
            self._conf["cloud"]["configure"]["user_node_resource_rsvp"][
                "memory"] = mem

    def set_node_tiers(self, tiers):
        self._conf["cloud"]["configure"]["node_tiers"] = tiers

    def set_ami_id(self, ami_id):
        self._conf["cloud"]["configure"]["ami_id"] = ami_id

    def set_trusted_cidr(self, cidrs):
        assert isinstance(cidrs, list), "Trusted cidrs need to be a list"
        self._conf["cloud"]["trusted_cidr"] = cidrs

    def set_kube_installer_config(self, config):
        self._conf["cloud"]["kube_installer_config"] = config

    def reload_config(self):
        """
        Python is ambiguous about updating members of singleton objects.
        Some flags in the cloud configuration may be updated. If callers
        want to latest configuration option, it has to be reloaded from
        S3.
        """
        self._conf = None
        return self.get_node_config()

    def load_cluster_meta(self, meta):
        """
        Cluster meta is generated after cluster is created, which should be part of cluster config
        Cluster meta format:

        cluster_name: ${AX_CLUSTER_NAME_ID}
        vpc: ${VPC_ID}
        internet_gateway: ${IGW_ID}
        route_table: ${ROUTE_TABLE_ID}
        security_group: ${NODE_SG_ID}
        subnets:
            ${ZONE}:
                subnet_id: ${SUBNET_ID}
                subnet_cidr: ${SUBNET_CIDR}

        :param meta:
        :return:
        """
        # Currently only add vpc_id / vpc_cidr_base and subnet_size for consistency among ax/non-ax clusters
        # and new VPC / existing VPC clusters
        self._conf["cloud"]["vpc_id"] = meta["vpc"]
        self._conf["cloud"]["vpc_cidr_base"] = meta["vpc_cidr_base"]
        for zone in meta["subnets"].keys():
            self._conf["cloud"]["subnet_size"] = meta["subnets"][zone][
                "subnet_cidr"].split("/")[1]
            # Getting the first one is fine as we only have 1 subnet
            break
コード例 #4
0
ファイル: ax_cluster_info.py プロジェクト: nuaays/argo
class AXClusterInfo(with_metaclass(Singleton, object)):

    default_config_path = "/tmp/ax_kube/cluster_{}.conf"
    default_key_path = os.path.expanduser("~/.ssh/kube_id_{}")
    default_cluster_meta_path = "/tmp/cluster_meta/metadata.yaml"

    def __init__(self,
                 cluster_name_id,
                 kube_config=None,
                 key_file=None,
                 metadata=None,
                 aws_profile=None):
        """
        Config file initialization

        :param cluster_name_id: Cluster name_id in format of name-uuid, lcj-cluster-515d9828-7515-11e6-9b3e-a0999b1b4e15
        :param kube_config: kubernetes saved config file.
        :param key_file: cluster ssh key path
        :param metadata: path to cluster metadata
        :param aws_profile: AWS profile to access S3.
        """
        assert AXEnv().is_in_pod(
        ) or cluster_name_id, "Must specify cluster name from outside cluster"
        self._aws_profile = aws_profile
        self._cluster_name_id = cluster_name_id

        self._config = AXClusterConfig(cluster_name_id=cluster_name_id,
                                       aws_profile=aws_profile)
        self._kube_config = kube_config if kube_config else self.default_config_path.format(
            cluster_name_id)
        tmp_kube_config = kube_config if kube_config else self.default_config_path.format(
            cluster_name_id)
        self._kube_config = os.getenv("ARGO_KUBE_CONFIG_PATH", tmp_kube_config)
        self._key_file = key_file if key_file else self.default_key_path.format(
            cluster_name_id)
        self._metadata_file = metadata if metadata else self.default_cluster_meta_path

        config_path = AXClusterConfigPath(name_id=cluster_name_id)
        self._bucket_name = config_path.bucket()
        self._bucket = Cloud().get_bucket(self._bucket_name,
                                          aws_profile=aws_profile)
        self._s3_kube_config_key = config_path.kube_config()
        self._s3_cluster_ssh_key = config_path.kube_ssh()
        self._s3_cluster_state_before_pause = config_path.state_before_pause()
        self._s3_cluster_meta = config_path.cluster_metadata()
        self._s3_cluster_software_info = config_path.versions()
        self._s3_platform_manifest_dir = config_path.platform_manifest_dir()
        self._s3_platform_config = config_path.platform_config()
        self._s3_cluster_current_state = config_path.current_state()
        self._s3_portal_support_flag = config_path.portal_support()

        self._s3_master_config_prefix = config_path.master_config_dir()
        self._s3_master_attributes_path = config_path.master_attributes_path()
        self._s3_master_user_data_path = config_path.master_user_data_path()

        # For cluster staging info, stage1 and stage2 can be uploaded, downloaded, deleted with AXClusterInfo
        # stage0 will can only be downloaded with AXClusterInfo. It will be uploaded during cluster information
        # initialization (i.e. upload cluster id an cluster config), and deleted during cluster information
        # clean up (i.e. during axinstaller uninstall)
        self._staging_info = {
            "stage0": config_path.cluster_install_stage0_key(),
            "stage1": config_path.cluster_install_stage1_key(),
            "stage2": config_path.cluster_install_stage2_key()
        }

    def upload_kube_config(self):
        """
        Save content in kube config file to S3
        """
        logger.info("Saving kubeconfig to s3 ...")
        with open(self._kube_config, "r") as f:
            data = f.read()
        self._bucket.put_object(self._s3_kube_config_key, data)
        logger.info("Saved kubeconfig %s at %s/%s", self._kube_config,
                    self._bucket_name, self._s3_kube_config_key)

    def upload_kube_key(self):
        """
        Save content in ssh key file to S3
        """
        logger.info("Saving cluster ssh key to s3 ...")
        with open(self._key_file, "r") as f:
            data = f.read()
        self._bucket.put_object(self._s3_cluster_ssh_key, data)
        logger.info("Saved ssh key %s at %s/%s", self._key_file,
                    self._bucket_name, self._s3_cluster_ssh_key)

    def upload_staging_info(self, stage, msg):
        assert stage in ["stage1", "stage2"
                         ], "Only stage1, and stage2 information is available"
        logger.info("Uploading Argo install %s info to s3 ...", stage)
        if not self._bucket.put_object(key=self._staging_info[stage],
                                       data=msg):
            raise AXPlatformException(
                "Failed to upload Argo install {} info for {}".format(
                    stage, self._cluster_name_id))
        logger.info("Uploading Argo install %s info %s to s3 ... DONE", stage,
                    msg)

    def upload_cluster_status_before_pause(self, status):
        """
        We upload cluster asg configures once for idempotency. i.e. when pause cluster failed but we have already
        scaled asg to 0, the next time we execute pause-cluster should use the status it uploaded before it even
        tried to scale cluster down
        """
        logger.info("Uploading Argo cluster status before pause ...")
        if self._bucket.get_object(key=self._s3_cluster_state_before_pause):
            logger.info("Status before pause already uploaded")
            return

        if not self._bucket.put_object(key=self._s3_cluster_state_before_pause,
                                       data=status):
            raise AXPlatformException(
                "Failed to upload cluster status before pause")
        logger.info("Uploading Argo cluster status before pause ... DONE")

    def upload_cluster_metadata(self):
        logger.info("Uploading Argo cluster metadata ...")
        with open(self._metadata_file, "r") as f:
            data = f.read()
        # User pods should be able to curl it so we have to set ACL to public-read
        if not self._bucket.put_object(
                self._s3_cluster_meta, data, ACL="public-read"):
            raise AXPlatformException(
                "Failed to upload cluster metadata for {}".format(
                    self._cluster_name_id))
        logger.info("Uploading Argo cluster metadata ... DONE")

    def upload_platform_manifests_and_config(self, platform_manifest_root,
                                             platform_config):
        """
        Upload platform manifests from given directory and platform config from given file path
        to S3 cluster bucket
        :param platform_manifest_root:
        :param platform_config:
        :return:
        """
        assert os.path.isdir(platform_manifest_root
                             ), "platform_manifest_root must be a directory"
        assert os.path.isfile(
            platform_config), "platform_config must be a file"
        logger.info("Uploading platform manifests and config ...")

        # Upload all manifests
        for f in os.listdir(platform_manifest_root):
            full_path = os.path.join(platform_manifest_root, f)
            if os.path.isfile(full_path):
                s3_path = self._s3_platform_manifest_dir + f
                logger.info("Uploading platform manifest %s -> %s", full_path,
                            s3_path)
                self._bucket.put_file(local_file_name=full_path,
                                      s3_key=s3_path)

        # Upload platform config
        logger.info("Uploading platform config %s", platform_config)
        self._bucket.put_file(local_file_name=platform_config,
                              s3_key=self._s3_platform_config)

        logger.info("Uploading platform manifests and config ... Done")

    def download_platform_manifests_and_config(self,
                                               target_platform_manifest_root,
                                               target_platform_config_path):
        """
        Download previously persisted platform manifests from S3 to given directory, and download previously
        persisted platform config file to given path
        :param target_platform_manifest_root:
        :param target_platform_config_path:
        :return:
        """
        assert os.path.isdir(
            target_platform_manifest_root
        ), "target_platform_manifest_root must be a directory"
        logger.info("Downloading platform manifests and config ...")

        for obj in self._bucket.list_objects_by_prefix(
                prefix=self._s3_platform_manifest_dir):
            s3_key = obj.key
            full_path = os.path.join(target_platform_manifest_root,
                                     s3_key.split("/")[-1])
            logger.info("Downloading platform manifest %s -> %s", s3_key,
                        full_path)
            self._bucket.download_file(key=s3_key, file_name=full_path)

        logger.info("Downloading platform config %s",
                    target_platform_config_path)
        self._bucket.download_file(key=self._s3_platform_config,
                                   file_name=target_platform_config_path)
        logger.info("Uploading platform manifests and config ... Done")

    def download_kube_config(self):
        """
        Get kube config from S3 and save it in file
        """
        logger.info("Downloading kubeconfig from s3 ...")
        data = self._bucket.get_object(self._s3_kube_config_key)
        assert data is not None, "No kube config at {}/{}".format(
            self._bucket_name, self._s3_kube_config_key)
        dir = os.path.dirname(self._kube_config)
        if not os.path.exists(dir):
            os.makedirs(dir)
        with open(self._kube_config, "w") as f:
            f.write(data)
        logger.info("Downloaded kubeconfig from %s/%s to %s",
                    self._bucket_name, self._s3_kube_config_key,
                    self._kube_config)
        return self._kube_config

    def download_kube_key(self):
        """
        Get kube ssh key from S3 and save it in file
        """
        if Cloud().target_cloud_gcp():
            return
        logger.info("Downloading cluster ssh key from s3 ...")
        data = self._bucket.get_object(self._s3_cluster_ssh_key)
        assert data is not None, "No kube ssh key at {}/{}".format(
            self._bucket_name, self._s3_cluster_ssh_key)
        dir = os.path.dirname(self._key_file)
        if not os.path.exists(dir):
            os.makedirs(dir)
        with open(self._key_file, "w") as f:
            f.write(data)
        os.chmod(self._key_file, 0o0600)
        logger.info("Downloaded kube ssh key from %s/%s to %s",
                    self._bucket_name, self._s3_cluster_ssh_key,
                    self._key_file)
        return self._key_file

    def download_staging_info(self, stage):
        assert stage in [
            "stage0", "stage1", "stage2"
        ], "Only stage0, stage1, and stage2 information is available"
        logger.info("Downloading Argo install %s info from s3 ...", stage)
        data = self._bucket.get_object(key=self._staging_info[stage])
        assert data is not None, "No Argo install {} info get at {}/{}".format(
            stage, self._bucket_name, self._staging_info[stage])
        return data

    def download_cluster_status_before_pause(self):
        logger.info("Downloading cluster status before pause ...")
        return self._bucket.get_object(key=self._s3_cluster_state_before_pause)

    def download_cluster_metadata(self):
        logger.info("Downloading cluster metadata")
        return self._bucket.get_object(key=self._s3_cluster_meta)

    def download_cluster_software_info(self):
        logger.info("Downloading cluster software info")
        data = self._bucket.get_object(key=self._s3_cluster_software_info)
        assert data, "No software info at {}/{}".format(
            self._bucket_name, self._s3_cluster_software_info)
        return data

    def delete_cluster_status_before_pause(self):
        logger.info("Deleting Argo cluster status before last pause ...")
        if not self._bucket.delete_object(
                key=self._s3_cluster_state_before_pause):
            raise AXPlatformException("Failed to delete {} information".format(
                self._s3_cluster_state_before_pause))
        logger.info("Deleted Argo cluster status before last pause")

    def delete_staging_info(self, stage):
        assert stage in ["stage1", "stage2"
                         ], "Only stage1, and stage2 information is available"
        logger.info("Deleting Argo install %s info from s3 ...", stage)
        if not self._bucket.delete_object(key=self._staging_info[stage]):
            raise AXPlatformException(
                "Failed to delete {} information".format(stage))
        logger.info("Deleted Argo install %s info from s3 ...", stage)

    def download_cluster_current_state(self):
        logger.info("Downloading cluster current state ...")
        return self._bucket.get_object(key=self._s3_cluster_current_state)

    def upload_cluster_current_state(self, state):
        logger.info("Uploading cluster current state ...")
        if not self._bucket.put_object(key=self._s3_cluster_current_state,
                                       data=state):
            raise AXPlatformException(
                "Failed to upload cluster current state info for {}".format(
                    self._cluster_name_id))
        logger.info("Uploading cluster current state ... DONE")

    def enable_portal_support(self):
        logger.info("Setting portal support flag ...")
        if not self._bucket.put_object(key=self._s3_portal_support_flag,
                                       data="True"):
            raise AXPlatformException(
                "Failed to upload cluster status before pause")
        logger.info("Setting portal support flag ... DONE")

    def disable_portal_support(self):
        logger.info("Deleting portal support flag ...")
        if not self._bucket.delete_object(key=self._s3_portal_support_flag):
            raise AXPlatformException(
                "Failed to upload cluster status before pause")
        logger.info("Deleted portal support flag")

    def is_cluster_supported_by_portal(self):
        logger.info("Checking portal support flag ...")
        data = self._bucket.get_object(key=self._s3_portal_support_flag)
        return False if not data else True

    def get_kube_config_file_path(self):
        """
        Get local config file path after saving.
        """
        return self._kube_config

    def get_key_file_path(self):
        return self._key_file

    def get_bucket_name(self):
        return self._bucket_name

    @retry(wait_exponential_multiplier=5000, stop_max_attempt_number=2)
    def get_master_config(self, user_data_file):
        """
        Checks whether the config for the master instance is present in S3. This is done
        by checking if the directory specific to the given cluster name is present or not.

        :return Master config json if the config was in S3. None otherwise.
        """
        # Check if the master_config was previously stored in S3. If so, download it.
        object_list = list(
            self._bucket.list_objects_by_prefix(
                prefix=self._s3_master_config_prefix))
        if len(object_list) > 0:
            # Objects should already be in s3. No need to store.
            config_exists_in_s3 = True
            logger.info("Master config already exists in S3. Downloading ...")
            self._bucket.download_file(self._s3_master_user_data_path,
                                       user_data_file)
            return self._bucket.get_object(self._s3_master_attributes_path)

        logger.info("Master config not found in s3")
        return None

    @retry(wait_exponential_multiplier=5000, stop_max_attempt_number=3)
    def upload_master_config_to_s3(self, master_attributes, master_user_data):
        """
        Uploads the master attributes and user-data into a directory in the s3 bucket.
        """
        # Upload the attributes file.
        self._bucket.put_object(key=self._s3_master_attributes_path,
                                data=json.dumps(master_attributes))
        # Upload the user-data file.
        self._bucket.put_object(key=self._s3_master_user_data_path,
                                data=master_user_data)

    def generate_cluster_metadata_from_provider(self):
        ec2 = EC2(profile=self._aws_profile, region=self._config.get_region())
        minion_name = "{}-minion".format(self._cluster_name_id)

        # Assume minion has same network configurations
        minion = ec2.get_instances(name=minion_name,
                                   states=[EC2InstanceState.Running])[0]
        vpc_id = minion["NetworkInterfaces"][0]["VpcId"]
        subnet_id = minion["NetworkInterfaces"][0]["SubnetId"]
        zone = minion["Placement"]["AvailabilityZone"]
        sg_id = None
        for sg in minion["SecurityGroups"]:
            if sg["GroupName"] == "kubernetes-minion-{}".format(
                    self._cluster_name_id):
                sg_id = sg["GroupId"]
        assert sg_id, "Unable to find security group for cluster minions"

        rtbs = ec2.get_routetables(
            tags={COMMON_CLOUD_RESOURCE_TAG_KEY: [self._cluster_name_id]})
        assert len(
            rtbs) == 1, "Cluster has 0 or more than 1 routetables: {}".format(
                rtbs)
        rtb_id = rtbs[0]["RouteTableId"]

        subnets = ec2.get_subnets(
            zones=[zone],
            tags={COMMON_CLOUD_RESOURCE_TAG_KEY: [self._cluster_name_id]})
        # Assume cluster has 1 subnet in 1 zone now, and 1 master node runs inside the same subnet
        assert len(
            subnets
        ) == 1, "Cluster has 0 or more than 1 subnets in zone {}: {}".format(
            zone, subnets)
        subnet_cidr = subnets[0]["CidrBlock"]
        max_instance_count = int(self._config.get_max_node_count()) + 1

        igws = ec2.get_vpc_igws(vpc_id=vpc_id)
        assert len(
            igws) == 1, "VPC should have only 1 internet gateways. {}".format(
                igws)
        igw_id = igws[0]["InternetGatewayId"]

        return {
            "cluster_name": self._cluster_name_id,
            "vpc": vpc_id,
            "internet_gateway": igw_id,
            "route_table": rtb_id,
            "security_group": sg_id,
            "subnets": {
                zone: {
                    "subnet_id": subnet_id,
                    "subnet_cidr": subnet_cidr,
                    "max_instance_count": max_instance_count
                }
            }
        }

    def set_kube_config(self, kube_config):
        self._kube_config = kube_config
        logger.info("Setting kube_config to %s", kube_config)
コード例 #5
0
ファイル: cluster_version.py プロジェクト: teddybearz/argo
class AXVersion(object):
    def __init__(self, customer_id, cluster_name_id, portal_url, aws_profile):
        self._customer_id = customer_id
        self._cluster_name_id = cluster_name_id
        self._cluster_name = AXClusterId(cluster_name_id).get_cluster_name()
        self._portal_url = portal_url
        self._aws_profile = aws_profile

        cluster_bucket_name = AXClusterConfigPath(cluster_name_id).bucket()
        self._cluster_bucket = Cloud().get_bucket(
            cluster_bucket_name, aws_profile=self._aws_profile)

        support_bucket_name = AXSupportConfigPath(cluster_name_id).bucket()
        self._support_bucket = Cloud().get_bucket(
            support_bucket_name, aws_profile=self._aws_profile)

    def update(self, new):
        self._report_version_to_s3(new)
        self._report_version_to_portal(new)

    def _get_current_version(self):
        # TODO: combine cluster bucket operations to AXClusterInfo object
        data = self._cluster_bucket.get_object(
            key=AXClusterConfigPath(self._cluster_name_id).versions())
        return yaml.load(data) if data else {}

    def _report_version_to_s3(self, new):
        old = self._get_current_version()
        history = {"from": old, "to": new}
        # Update current version in cluster bucket.
        cluster_version_key = AXClusterConfigPath(
            self._cluster_name_id).versions()
        self._cluster_bucket.put_object(cluster_version_key,
                                        yaml.dump(new),
                                        ACL="bucket-owner-full-control")

        # Update current version in support bucket.
        support_version_key = AXSupportConfigPath(
            self._cluster_name_id).current_versions()
        self._support_bucket.put_object(support_version_key,
                                        yaml.dump(new),
                                        ACL="bucket-owner-full-control")

        # Update version history in support bucket.
        support_version_history_key = AXSupportConfigPath(
            self._cluster_name_id).version_history()
        self._support_bucket.put_object(support_version_history_key,
                                        yaml.dump(history),
                                        ACL="bucket-owner-full-control")

    @retry(wait_exponential_multiplier=1000, stop_max_attempt_number=3)
    def _report_version_to_portal(self, versions):
        if self._portal_url:
            url = "{}/v1/environments/{}/{}/version"
            url = url.format(self._portal_url, self._customer_id,
                             self._cluster_name)
            logger.info("Reporting new version %s to %s", versions, url)
            try:
                r = requests.put(url, json=versions)
                if r.status_code == requests.codes.not_found:
                    # Cluster is not created from portal. OK for now.
                    logger.error(
                        "Cluster not found at portal. %s Create cluster from portal please.",
                        r.text)
                elif r.status_code == requests.codes.server_error:
                    # Portal doesn't know this version. OK for now.
                    logger.error("Version unknown at portal. %s", r.text)
                else:
                    msg = "Failed to report version {} to {}, {} {}".format(
                        versions, url, r.status_code, r.headers)
                    assert r.status_code == requests.codes.ok, msg
            except Exception:
                logger.exception("Failed to report cluster version %s to %s",
                                 versions, url)
                raise
        else:
            logger.warning("No portal URL provided, not reporting to portal.")