Esempio n. 1
0
    def __init__(self, cfg):
        assert isinstance(cfg, ClusterRestartConfig)
        self._cfg = cfg
        super(ClusterResumer,
              self).__init__(cluster_name=self._cfg.cluster_name,
                             cluster_id=self._cfg.cluster_id,
                             cloud_profile=self._cfg.cloud_profile,
                             dry_run=self._cfg.dry_run)

        # This will raise exception if name/id mapping cannot be found
        self._name_id = self._idobj.get_cluster_name_id()
        self._cluster_info = AXClusterInfo(cluster_name_id=self._name_id,
                                           aws_profile=self._cfg.cloud_profile)
        self._cluster_config = AXClusterConfig(
            cluster_name_id=self._name_id, aws_profile=self._cfg.cloud_profile)
        self._master_manager = AXMasterManager(
            cluster_name_id=self._name_id,
            region=self._cluster_config.get_region(),
            profile=self._cfg.cloud_profile)
        self._bootstrap_obj = AXBootstrap(
            cluster_name_id=self._name_id,
            aws_profile=self._cfg.cloud_profile,
            region=self._cluster_config.get_region())

        # Initialize node count to 1 as master is not in an auto scaling group
        self._total_nodes = 1
        self._cidr = str(get_public_ip()) + "/32"
        self._software_info = SoftwareInfo(info_dict=yaml.load(
            self._cluster_info.download_cluster_software_info()))
Esempio n. 2
0
    def __init__(self, cfg):

        super(ClusterInstallConfig, self).__init__(cfg)

        self.cluster_size = cfg.cluster_size
        self.cluster_type = cfg.cluster_type

        self.cloud_region = cfg.cloud_region
        self.cloud_placement = cfg.cloud_placement

        self.vpc_id = cfg.vpc_id
        self.vpc_cidr_base = cfg.vpc_cidr_base
        self.subnet_mask_size = cfg.subnet_mask_size
        self.trusted_cidrs = cfg.trusted_cidrs

        self.user_on_demand_nodes = cfg.user_on_demand_nodes
        self.spot_instances_option = cfg.spot_instances_option

        self.enable_sandbox = cfg.enable_sandbox
        self.manifest_root = cfg.service_manifest_root
        self.bootstrap_config = cfg.platform_bootstrap_config
        self.autoscaling_interval = cfg.cluster_autoscaling_scan_interval
        self.support_object_store_name = cfg.support_object_store_name

        if cfg.software_version_info:
            # Read software info from config file
            self.software_info = SoftwareInfo(
                info_file=cfg.software_version_info)
        else:
            # Read software info from envs
            self.software_info = SoftwareInfo()
Esempio n. 3
0
    def __init__(self, config_file_path):
        assert os.path.isfile(
            config_file_path), "Config file {} is not a file".format(
                config_file_path)
        self._config_file = config_file_path
        self._cluster_name_id = AXClusterId().get_cluster_name_id()
        self._cluster_config = AXClusterConfig(
            cluster_name_id=self._cluster_name_id)
        if not self._cluster_config.get_cluster_provider().is_user_cluster():
            self.cpu_mult, self.mem_mult, self.disk_mult, \
                self.daemon_cpu_mult, self.daemon_mem_mult = self._get_resource_multipliers()
        else:
            self.cpu_mult = 1
            self.mem_mult = 1
            self.disk_mult = 1
            self.daemon_cpu_mult = 1
            self.daemon_mem_mult = 1
        self._swagger_components = []
        self._yaml_components = []
        self._updated_raw = ""

        # TODO: when we support config software info using a config file, need to figure out how that
        # file gets passed through, since SoftwareInfo is not a singleton
        self._software_info = SoftwareInfo()

        self._load_objects()
        self._load_raw()
Esempio n. 4
0
    def __init__(self, name, image, pull_policy=None):
        """
        Construct a container that will provide the spec for a kubernetes container
        http://kubernetes.io/docs/api-reference/v1/definitions/#_v1_container
        Args:
            name: name of a container. must be conformant to kubernetes container name
            image: image for container
            pull_policy: pull policy based on kubernetes. If None then kubernetes default is used
        """
        self.name = name
        self.image = image
        self.image_pull_policy = pull_policy

        self.command = None
        self.args = None

        self.vmap = {}
        self.env_map = {}
        self.ports = []

        self.resources = None
        self.privileged = None

        self.software_info = SoftwareInfo()
        self.probes = {}
Esempio n. 5
0
    def __init__(self, name, client=None):
        self.name = name
        if client is None:
            self._client = KubernetesApiClient(use_proxy=True)
        else:
            self._client = client

        self._registry_spec = None
        self._software_info = SoftwareInfo()
        if self._software_info.registry_is_private():
            secret = KubeObjectConfigFile(DEFAULT_SECRET_YAML_PATH, {"REGISTRY_SECRETS": self._software_info.registry_secrets})
            for obj in secret.get_swagger_objects():
                if isinstance(obj, swagger_client.V1Secret):
                    self._registry_spec = obj
            assert self._registry_spec, "Argo registry specification is missing"

        self._am_service_spec = None
        self._am_deployment_spec = None

        # AA-2471: Hack to add AXOPS_EXT_DNS to Application Manager
        elb = InternalRoute("axops", "axsys", client=self._client)
        elb_status = elb.status(with_loadbalancer_info=True)["loadbalancer"][0]
        if not elb_status:
            raise AXPlatformException("Could not get axops elb address {}".format(elb_status))

        replacements = {"NAMESPACE": self._software_info.image_namespace,
                        "VERSION": self._software_info.image_version,
                        "REGISTRY": self._software_info.registry,
                        "APPLICATION_NAME": self.name,
                        "AXOPS_EXT_DNS": elb_status}
        cluster_name_id = os.getenv("AX_CLUSTER_NAME_ID", None)
        assert cluster_name_id, "Cluster name id is None!"
        cluster_config = AXClusterConfig(cluster_name_id=cluster_name_id)
        if not cluster_config.get_cluster_provider().is_user_cluster():
            axam_path = DEFAULT_AM_YAML_PATH
        else:
            axam_path = "/ax/config/service/argo-all/axam-svc.yml.in"
            replacements["ARGO_DATA_BUCKET_NAME"] = os.getenv("ARGO_DATA_BUCKET_NAME")

        logger.info("Using replacements: %s", replacements)

        k = KubeObjectConfigFile(axam_path, replacements)
        for obj in k.get_swagger_objects():
            if isinstance(obj, swagger_client.V1Service):
                self._am_service_spec = obj
            elif isinstance(obj, swagger_client.V1beta1Deployment):
                self._am_deployment_spec = obj
                self._add_pod_metadata("deployment", self._am_deployment_spec.metadata.name, is_label=True)
                self._add_pod_metadata("ax_costid", json.dumps({
                    "app": self.name,
                    "service": "axam-deployment",
                    "user": "******"
                }))
            else:
                logger.debug("Ignoring specification of type {}".format(type(obj)))
        assert self._am_service_spec and self._am_deployment_spec, "Application monitor specification is missing"
Esempio n. 6
0
    def __init__(self, cfg):
        super(ClusterUpgradeConfig, self).__init__(cfg)

        self.manifest_root = cfg.service_manifest_root
        self.bootstrap_config = cfg.platform_bootstrap_config
        self.force_upgrade = cfg.force_upgrade
        if cfg.software_version_info:
            # Read software info from config file
            self.target_software_info = SoftwareInfo(
                info_file=cfg.software_version_info)
        else:
            # Read software info from envs
            self.target_software_info = SoftwareInfo()
Esempio n. 7
0
    def __init__(self, cfg):
        assert isinstance(cfg, ClusterUpgradeConfig)
        self._cfg = cfg
        super(ClusterUpgrader, self).__init__(
            cluster_name=self._cfg.cluster_name,
            cluster_id=self._cfg.cluster_id,
            cloud_profile=self._cfg.cloud_profile
        )

        # This will raise exception if name/id mapping cannot be found
        self._name_id = self._idobj.get_cluster_name_id()
        self._cluster_info = AXClusterInfo(
            cluster_name_id=self._name_id,
            aws_profile=self._cfg.cloud_profile
        )
        self._cluster_config = AXClusterConfig(
            cluster_name_id=self._name_id,
            aws_profile=self._cfg.cloud_profile
        )
        self._bootstrap_obj = AXBootstrap(
            cluster_name_id=self._name_id,
            aws_profile=self._cfg.cloud_profile,
            region=self._cluster_config.get_region()
        )
        self._current_software_info = SoftwareInfo(
            info_dict=yaml.load(
                self._cluster_info.download_cluster_software_info()
            )
        )
        self._cidr = str(get_public_ip()) + "/32"
Esempio n. 8
0
    def __init__(self, containername, customer_image, namespace, version):
        s = SoftwareInfo()
        super(ArtifactsContainer, self).__init__(
            containername, "{}/{}/artifacts:{}".format(s.registry, namespace,
                                                       version))

        # artifacts scratch space
        self._artifacts_scratch = ContainerVolume(
            "artifacts-scratch",
            ArtifactsContainer.ARTIFACTS_CONTAINER_SCRATCH)
        self._artifacts_scratch.set_type("EMPTYDIR")
        self.add_volume(self._artifacts_scratch)

        # create a hostpath for docker-socket-dir. This is used to for running docker inspect
        socket_hostpath = ContainerVolume("docker-socket-file",
                                          "/var/run/docker.sock")
        socket_hostpath.set_type("HOSTPATH", "/var/run/docker.sock")
        self.add_volume(socket_hostpath)

        # emptydir for sharing for copying static binaries from init container
        # so that they are available in the main container
        self._static_bins = ContainerVolume("static-bins", "/copyto")
        self._static_bins.set_type("EMPTYDIR")
        self.add_volume(self._static_bins)

        # add environment vars needed for artifacts
        self.add_env("AX_TARGET_CLOUD", value=Cloud().target_cloud())
        self.add_env("AX_CLUSTER_NAME_ID",
                     value=AXClusterId().get_cluster_name_id())
        self.add_env("AX_CUSTOMER_ID", value=AXCustomerId().get_customer_id())
        self.add_env("AX_CUSTOMER_IMAGE_NAME", value=customer_image)
        self.add_env("AX_ARTIFACTS_SCRATCH",
                     value=ArtifactsContainer.ARTIFACTS_CONTAINER_SCRATCH)
        self.add_env("AX_POD_NAME", value_from="metadata.name")
        self.add_env("AX_POD_IP", value_from="status.podIP")
        self.add_env("AX_POD_NAMESPACE", value_from="metadata.namespace")
        self.add_env("AX_NODE_NAME", value_from="spec.nodeName")
        self.add_env("ARGO_LOG_BUCKET_NAME",
                     os.getenv("ARGO_LOG_BUCKET_NAME", ""))
        self.add_env("ARGO_DATA_BUCKET_NAME",
                     os.getenv("ARGO_DATA_BUCKET_NAME", ""))

        annotation_vol = ContainerVolume("annotations", "/etc/axspec")
        annotation_vol.set_type("DOWNWARDAPI", "metadata.annotations")
        self.add_volume(annotation_vol)

        # AA-3175: CPU and memory are set to lowest possible so that pod requests are kept at a minimum
        self.add_resource_constraints("cpu_cores", 0.001)
        self.add_resource_constraints("mem_mib", 4)
Esempio n. 9
0
    def __init__(self):
        super(AXMon, self).__init__()
        self.version = __version__

        self._cluster_cond = threading.Condition()
        self._shutdown = False

        self._kubectl = KubernetesApiClient(use_proxy=True)

        # Initialize SoftwareInfo singleton
        self._software_info = SoftwareInfo()

        if Cloud().target_cloud_aws():
            # init the volume manager singleton
            VolumeManager()
Esempio n. 10
0
    def __init__(self, name, namespace="axuser"):
        self.name = name
        self.namespace = namespace
        self.client = KubernetesApiClient(use_proxy=True)

        self.service = None  # this is the argo.services.service.Service object
        self._host_vols = []
        self._name_id = AXClusterId().get_cluster_name_id()
        self._s3_bucket_ax_is_external = AXLogPath(self._name_id).is_external()
        self._s3_bucket_ax = AXLogPath(self._name_id).bucket()
        self._s3_key_prefix_ax = AXLogPath(self._name_id).artifact()
        self._s3_bucket = AXClusterDataPath(self._name_id).bucket()
        self._s3_key_prefix = AXClusterDataPath(self._name_id).artifact()

        self.software_info = SoftwareInfo()
        self._resources = AXResources()
Esempio n. 11
0
    def __init__(self):
        self.client = KubernetesApiClient(use_proxy=True)
        self.batchapi = self.client.batchv
        self.kube_namespace = "axuser"
        self.jobname = None

        self.service = None  # this is the argo.services.service.Service object
        self._host_vols = []
        self._name_id = AXClusterId().get_cluster_name_id()
        self._s3_bucket_ax_is_external = AXLogPath(self._name_id).is_external()
        self._s3_bucket_ax = AXLogPath(self._name_id).bucket()
        self._s3_key_prefix_ax = AXLogPath(self._name_id).artifact()
        self._s3_bucket = AXClusterDataPath(self._name_id).bucket()
        self._s3_key_prefix = AXClusterDataPath(self._name_id).artifact()

        self._attribute_map = {"uuid": "metadata.uid"}
        self.software_info = SoftwareInfo()
        self._ax_resources = {}
Esempio n. 12
0
    def __init__(self, name, application):
        """
        Each deployment has a name and needs to be part of an application
        Application maps to a kubernetes namespace and the deployment will
        be created in this namespace.

        Args:
            name: deployment name
            application: the application that this deployment runs under
        """
        self.name = name
        self.application = application
        self.client = KubernetesApiClient(use_proxy=True)
        self._nameid = AXClusterId().get_cluster_name_id()
        self._software_info = SoftwareInfo()

        self._app_obj = Application(application)

        self.spec = None
Esempio n. 13
0
    def __init__(
            self,
            cluster_name_id=None,
            aws_profile=None,
            debug=True,
            manifest_root=AXPlatformConfigDefaults.DefaultManifestRoot,
            config_file=AXPlatformConfigDefaults.DefaultPlatformConfigFile,
            software_info=None):
        """
        AX Platform bootstrap

        :param cluster_name_id: cluster name id
        :param aws_profile: aws profile to authenticate all aws clients
        :param debug: debug mode
        :param manifest_root: root directory to all ax service objects
        """
        self._software_info = software_info if software_info else SoftwareInfo(
        )
        assert isinstance(
            self._software_info, SoftwareInfo
        ), "Wrong type ({}) of software info passed in.".format(
            self._software_info)
        self._aws_profile = aws_profile
        self._manifest_root = manifest_root
        self._config = AXPlatformConfig(config_file)

        logger.info("Using Kubernetes manifest from %s", self._manifest_root)
        logger.info("Using platform configuration \"%s\" from %s",
                    self._config.name, config_file)

        self._cluster_name_id = AXClusterId(
            cluster_name_id).get_cluster_name_id()
        self._cluster_config = AXClusterConfig(
            cluster_name_id=self._cluster_name_id,
            aws_profile=self._aws_profile)
        self._cluster_config_path = AXClusterConfigPath(cluster_name_id)
        self._cluster_info = AXClusterInfo(self._cluster_name_id,
                                           aws_profile=self._aws_profile)

        self._region = self._cluster_config.get_region()
        if Cloud().target_cloud_aws():
            self._account = AWSAccountInfo(
                aws_profile=self._aws_profile).get_account_id()
        else:
            self._account = ""
        self._bucket_name = self._cluster_config_path.bucket()
        self._bucket = Cloud().get_bucket(self._bucket_name,
                                          aws_profile=self._aws_profile,
                                          region=self._region)

        # In debug mode, when we failed to create an object, we don't delete it but just
        # leave it for debug.
        self._debug = debug

        # DNS
        self.cluster_dns_name = None

        # Get kube cluster config. Automatic if in pod already.
        self._kube_config = self._cluster_info.get_kube_config_file_path(
        ) if self._cluster_name_id else None
        if self._cluster_name_id:
            if not os.path.isfile(self._kube_config):
                logger.info(
                    "Can't find config file at %s; downloading from s3",
                    self._kube_config)
                self._kube_config = self._cluster_info.download_kube_config()
            assert os.path.isfile(
                self._kube_config), "No kube_config file available"

        # Kubernetes related objects and macros
        self.kube_namespaces = [AXNameSpaces.AXSYS, AXNameSpaces.AXUSER]
        self.kube_axsys_namespace = AXNameSpaces.AXSYS
        self.kube_user_namespace = AXNameSpaces.AXUSER
        self.kubectl = KubernetesApiClient(config_file=self._kube_config)
        self.kube_poll = KubeObjPoll(kubectl=self.kubectl)

        self._monitor = AXKubeMonitor(kubectl=self.kubectl)
        self._monitor.reload_monitors(namespace=self.kube_axsys_namespace)
        self._monitor.start()

        # Kube Objects
        self._kube_objects = {}
        self._replacing = {}
Esempio n. 14
0
        template: A stringified template
        replacements: a dict of string: string
    Returns:
        stringified modified template
    """
    return macro_replace(template, replacements)


if __name__ == "__main__":
    parser = argparse.ArgumentParser(description='Managed ELB creator')
    parser.add_argument('--version',
                        action='version',
                        version="%(prog)s {}".format(__version__))
    parser.add_argument('--templates', help='Path to templates', default=".")
    args = parser.parse_args()

    dir_name = args.templates

    software_info = SoftwareInfo()
    replacements = {
        "REGISTRY": software_info.registry,
        "NAMESPACE": software_info.image_namespace,
        "VERSION": software_info.image_version
    }
    print("Macro replacements are {}".format(replacements))

    for path, template in load_templates_from_dir(dir_name):
        print("Processing template {}".format(path))
        mod_template = modify_templates(template, replacements)
        post_template(path, mod_template)
Esempio n. 15
0
class AXSYSKubeYamlUpdater(object):
    """
    This class loads a kubernetes yaml file, updates resource,
    and generate objects that kube_object.py can consume
    """
    def __init__(self, config_file_path):
        assert os.path.isfile(
            config_file_path), "Config file {} is not a file".format(
                config_file_path)
        self._config_file = config_file_path
        self._cluster_name_id = AXClusterId().get_cluster_name_id()
        self._cluster_config = AXClusterConfig(
            cluster_name_id=self._cluster_name_id)
        self.cpu_mult, self.mem_mult, self.disk_mult, \
            self.daemon_cpu_mult, self.daemon_mem_mult = self._get_resource_multipliers()
        self._swagger_components = []
        self._yaml_components = []
        self._updated_raw = ""

        # TODO: when we support config software info using a config file, need to figure out how that
        # file gets passed through, since SoftwareInfo is not a singleton
        self._software_info = SoftwareInfo()

        self._load_objects()
        self._load_raw()

    @property
    def updated_raw(self):
        return self._updated_raw

    @property
    def components_in_dict(self):
        return self._yaml_components

    @property
    def components_in_swagger(self):
        return self._swagger_components

    def _load_objects(self):
        with open(self._config_file, "r") as f:
            data = f.read()
        for c in yaml.load_all(data):
            swagger_obj = self._config_yaml(c)
            yaml_obj = ApiClient().sanitize_for_serialization(swagger_obj)
            self._swagger_components.append(swagger_obj)
            self._yaml_components.append(yaml_obj)

    def _load_raw(self):
        self._updated_raw = yaml.dump_all(self._yaml_components)

    def _get_resource_multipliers(self):
        """
        Resources in yaml templates need to be multiplied with these numbers
        :return: cpu_multiplier, mem_multiplier, disk_multiplier
        """
        # Getting cluster size from cluster config, in order to configure resources
        # There are 3 situations we will be using AXClusterConfig
        #   - During install, since the class is a singleton, it has all the values we need
        #     no need to download from s3
        #   - During upgrade, since we are exporting AWS_DEFAULT_PROFILE, we can download
        #     cluster config files from s3 to get the values
        #   - During job creation: the node axmon runs has the proper roles to access s3

        try:
            ax_node_max = int(self._cluster_config.get_asxys_node_count())
            ax_node_type = self._cluster_config.get_axsys_node_type()
            usr_node_max = int(
                self._cluster_config.get_max_node_count()) - ax_node_max
            usr_node_type = self._cluster_config.get_axuser_node_type()
            assert all(
                [ax_node_max, ax_node_type, usr_node_max, usr_node_type])
        except Exception as e:
            logger.error(
                "Unable to read cluster config, skip resource config for %s. Error %s",
                self._config_file, e)
            return 1, 1, 1, 1, 1

        rc = AXSYSResourceConfig(
            ax_node_type=ax_node_type,
            ax_node_max=ax_node_max,
            usr_node_type=usr_node_type,
            usr_node_max=usr_node_max,
            cluster_type=self._cluster_config.get_ax_cluster_type())
        #logger.info("With %s %s axsys nodes, %s %s axuser nodes, component %s uses multipliers (%s, %s, %s, %s, %s)",
        #            ax_node_max, ax_node_type, usr_node_max, usr_node_type, self._config_file,
        #            rc.cpu_multiplier, rc.mem_multiplier, rc.disk_multiplier,
        #            rc.daemon_cpu_multiplier, rc.daemon_mem_multiplier)
        return rc.cpu_multiplier, rc.mem_multiplier, rc.disk_multiplier, rc.daemon_cpu_multiplier, rc.daemon_mem_multiplier

    def _config_yaml(self, kube_yaml_obj):
        """
        Load dict into swagger object, patch resource,
        sanitize, return a dict
        :param kube_yaml_obj:
        :return: swagger object with resource values finalized
        """
        kube_kind = kube_yaml_obj["kind"]
        (swagger_class_literal,
         swagger_instance) = KubeKindToV1KubeSwaggerObject[kube_kind]
        swagger_obj = ApiClient()._ApiClient__deserialize(
            kube_yaml_obj, swagger_class_literal)
        assert isinstance(swagger_obj, swagger_instance), \
            "{} has instance {}, expected {}".format(swagger_obj, type(swagger_obj), swagger_instance)

        if isinstance(swagger_obj, V1beta1Deployment):
            if not self._software_info.registry_is_private():
                swagger_obj.spec.template.spec.image_pull_secrets = None

            node_selector = swagger_obj.spec.template.spec.node_selector
            if node_selector.get('ax.tier', 'applatix') == 'master':
                # Skip updating containers on master.
                logger.info(
                    "Skip updating cpu, mem multipliers for pods on master: %s",
                    swagger_obj.metadata.name)
            else:
                for container in swagger_obj.spec.template.spec.containers:
                    self._update_container(container)
            return swagger_obj
        elif isinstance(swagger_obj, V1Pod):
            if not self._software_info.registry_is_private():
                swagger_obj.spec.image_pull_secrets = None
            return swagger_obj
        elif isinstance(swagger_obj, V1beta1DaemonSet):
            if not self._software_info.registry_is_private():
                swagger_obj.spec.template.spec.image_pull_secrets = None
            for container in swagger_obj.spec.template.spec.containers:
                # We are special-casing applet DaemonSet to compromise the fact that
                # we are using different node type for compute-intense nodes
                if swagger_obj.metadata.name == "applet":
                    self._update_container(container=container,
                                           is_daemon=True,
                                           update_resource=True)
                else:
                    self._update_container(container=container,
                                           is_daemon=True,
                                           update_resource=False)
            return swagger_obj
        elif isinstance(swagger_obj, V1beta1StatefulSet):
            if not self._software_info.registry_is_private():
                swagger_obj.spec.template.spec.image_pull_secrets = None
            return self._update_statefulset(swagger_obj)
        elif isinstance(swagger_obj, V1PersistentVolumeClaim):
            self._update_volume(swagger_obj)
            return swagger_obj
        else:
            # logger.info("Object %s does not need to configure resource", type(swagger_obj))
            # HACK, as the original hook will be messed up
            if isinstance(swagger_obj, V1Service):
                if swagger_obj.metadata.name == "axops":
                    swagger_obj.spec.load_balancer_source_ranges = []
                    for cidr in self._cluster_config.get_trusted_cidr():
                        # Seems swagger client does not support unicode ... SIGH
                        swagger_obj.spec.load_balancer_source_ranges.append(
                            str(cidr))

                # HACK #2: if we don't do this, kubectl will complain about something such as
                #
                # spec.ports[0].targetPort: Invalid value: "81": must contain at least one letter (a-z)
                #
                # p.target_port is defined as string though, but if its really a string, kubectl
                # is looking for a port name, rather than a number
                # SIGH ...
                for p in swagger_obj.spec.ports or []:
                    try:
                        p.target_port = int(p.target_port)
                    except (ValueError, TypeError):
                        pass
            return swagger_obj

    def _update_deployment_or_daemonset(self, kube_obj):
        assert isinstance(kube_obj, V1beta1Deployment) or isinstance(
            kube_obj, V1beta1DaemonSet)
        for container in kube_obj.spec.template.spec.containers:
            self._update_container(container)
        return kube_obj

    def _update_statefulset(self, kube_obj):
        assert isinstance(kube_obj, V1beta1StatefulSet)
        for container in kube_obj.spec.template.spec.containers:
            self._update_container(container)
        if isinstance(kube_obj.spec.volume_claim_templates, list):
            for vol in kube_obj.spec.volume_claim_templates:
                self._update_volume(vol)
        return kube_obj

    def _update_container(self,
                          container,
                          is_daemon=False,
                          update_resource=True):
        assert isinstance(container, V1Container)

        if update_resource:
            cpulim = container.resources.limits.get("cpu")
            memlim = container.resources.limits.get("memory")
            cpureq = container.resources.requests.get("cpu")
            memreq = container.resources.requests.get("memory")

            def _massage_cpu(orig):
                return orig * self.daemon_cpu_mult if is_daemon else orig * self.cpu_mult

            def _massage_mem(orig):
                return orig * self.daemon_mem_mult if is_daemon else orig * self.mem_mult

            if cpulim:
                rvc = ResourceValueConverter(value=cpulim, target="cpu")
                rvc.massage(_massage_cpu)
                container.resources.limits["cpu"] = "{}m".format(
                    rvc.convert("m"))
            if cpureq:
                rvc = ResourceValueConverter(value=cpureq, target="cpu")
                rvc.massage(_massage_cpu)
                container.resources.requests["cpu"] = "{}m".format(
                    rvc.convert("m"))
            if memlim:
                rvc = ResourceValueConverter(value=memlim, target="memory")
                rvc.massage(_massage_mem)
                container.resources.limits["memory"] = "{}Mi".format(
                    int(rvc.convert("Mi")))
            if memreq:
                rvc = ResourceValueConverter(value=memreq, target="memory")
                rvc.massage(_massage_mem)
                container.resources.requests["memory"] = "{}Mi".format(
                    int(rvc.convert("Mi")))

        if container.liveness_probe and container.liveness_probe.http_get:
            try:
                container.liveness_probe.http_get.port = int(
                    container.liveness_probe.http_get.port)
            except (ValueError, TypeError):
                pass
        if container.readiness_probe and container.readiness_probe.http_get:
            try:
                container.readiness_probe.http_get.port = int(
                    container.readiness_probe.http_get.port)
            except (ValueError, TypeError):
                pass

        # Add resource multiplier to containers in case we need them
        if not container.env:
            container.env = []
        container.env += self._generate_default_envs(is_daemon,
                                                     update_resource)

    def _update_volume(self, vol):
        assert isinstance(vol, V1PersistentVolumeClaim)
        vol_size = vol.spec.resources.requests["storage"]

        def _massage_disk(orig):
            return orig * self.disk_mult

        if vol_size:
            rvc = ResourceValueConverter(value=vol_size, target="storage")
            rvc.massage(_massage_disk)
            # Since AWS does not support value such as 1.5G, lets round up to its ceil
            vol.spec.resources.requests["storage"] = "{}Gi".format(
                int(ceil(rvc.convert("Gi"))))

        # Manually patch access mode as swagger client mistakenly interprets this as map
        vol.spec.access_modes = ["ReadWriteOnce"]

    def _generate_default_envs(self, is_daemon, resource_updated):
        """
        Add essential variables to all system containers
        :param is_daemon:
        :return:
        """
        default_envs = [
            # Kubernetes downward APIs
            {
                "name": "AX_NODE_NAME",
                "path": "spec.nodeName"
            },
            {
                "name": "AX_POD_NAME",
                "path": "metadata.name"
            },
            {
                "name": "AX_POD_NAMESPACE",
                "path": "metadata.namespace"
            },
            {
                "name": "AX_POD_IP",
                "path": "status.podIP"
            },

            # Values
            {
                "name": "DISK_MULT",
                "value": str(self.disk_mult)
            },
            {
                "name": "AX_TARGET_CLOUD",
                "value": Cloud().target_cloud()
            },
            {
                "name": "AX_CLUSTER_NAME_ID",
                "value": self._cluster_name_id
            },
            {
                "name": "AX_CUSTOMER_ID",
                "value": AXCustomerId().get_customer_id()
            },
        ]

        # Special cases for daemons
        if is_daemon:
            if resource_updated:
                default_envs += [
                    {
                        "name": "CPU_MULT",
                        "value": str(self.daemon_cpu_mult)
                    },
                    {
                        "name": "MEM_MULT",
                        "value": str(self.daemon_mem_mult)
                    },
                ]
            else:
                default_envs += [
                    {
                        "name": "CPU_MULT",
                        "value": "1.0"
                    },
                    {
                        "name": "MEM_MULT",
                        "value": "1.0"
                    },
                ]
        else:
            default_envs += [
                {
                    "name": "CPU_MULT",
                    "value": str(self.cpu_mult)
                },
                {
                    "name": "MEM_MULT",
                    "value": str(self.mem_mult)
                },
            ]

        rst = []
        for d in default_envs:
            var = V1EnvVar()
            var.name = d["name"]

            if d.get("path", None):
                field = V1ObjectFieldSelector()
                field.field_path = d["path"]
                src = V1EnvVarSource()
                src.field_ref = field
                var.value_from = src
            else:
                var.value = d["value"]
            rst.append(var)
        return rst
Esempio n. 16
0
class Application(object):
    """
    Create an Application which maps to a kubernetes namespace
    """
    def __init__(self, name, client=None):
        self.name = name
        if client is None:
            self._client = KubernetesApiClient(use_proxy=True)
        else:
            self._client = client

        self._registry_spec = None
        self._software_info = SoftwareInfo()
        if self._software_info.registry_is_private():
            secret = KubeObjectConfigFile(
                DEFAULT_SECRET_YAML_PATH,
                {"REGISTRY_SECRETS": self._software_info.registry_secrets})
            for obj in secret.get_swagger_objects():
                if isinstance(obj, swagger_client.V1Secret):
                    self._registry_spec = obj
            assert self._registry_spec, "Argo registry specification is missing"

        self._am_service_spec = None
        self._am_deployment_spec = None

        # AA-2471: Hack to add AXOPS_EXT_DNS to Application Manager
        elb = InternalRoute("axops", "axsys", client=self._client)
        elb_status = elb.status(with_loadbalancer_info=True)["loadbalancer"][0]
        if not elb_status:
            raise AXPlatformException(
                "Could not get axops elb address {}".format(elb_status))

        replacements = {
            "NAMESPACE": self._software_info.image_namespace,
            "VERSION": self._software_info.image_version,
            "REGISTRY": self._software_info.registry,
            "APPLICATION_NAME": self.name,
            "AXOPS_EXT_DNS": elb_status
        }
        cluster_name_id = os.getenv("AX_CLUSTER_NAME_ID", None)
        assert cluster_name_id, "Cluster name id is None!"
        cluster_config = AXClusterConfig(cluster_name_id=cluster_name_id)
        if cluster_config.get_cluster_provider() != ClusterProvider.USER:
            axam_path = DEFAULT_AM_YAML_PATH
        else:
            axam_path = "/ax/config/service/argo-all/axam-svc.yml.in"
            replacements["ARGO_DATA_BUCKET_NAME"] = os.getenv(
                "ARGO_DATA_BUCKET_NAME")

        logger.info("Using replacements: %s", replacements)

        k = KubeObjectConfigFile(axam_path, replacements)
        for obj in k.get_swagger_objects():
            if isinstance(obj, swagger_client.V1Service):
                self._am_service_spec = obj
            elif isinstance(obj, swagger_client.V1beta1Deployment):
                self._am_deployment_spec = obj
                self._add_pod_metadata("deployment",
                                       self._am_deployment_spec.metadata.name,
                                       is_label=True)
                self._add_pod_metadata(
                    "ax_costid",
                    json.dumps({
                        "app": self.name,
                        "service": "axam-deployment",
                        "user": "******"
                    }))
            else:
                logger.debug("Ignoring specification of type {}".format(
                    type(obj)))
        assert self._am_service_spec and self._am_deployment_spec, "Application monitor specification is missing"

    def _add_pod_metadata(self, key, value, is_label=False):
        """
        Helper function to add metadata to deployment pod spec for AXAM
        """
        pod_meta = self._am_deployment_spec.spec.template.metadata
        if is_label:
            if pod_meta.labels is None:
                pod_meta.labels = {}
            pod_meta.labels[key] = value
        else:
            if pod_meta.annotations is None:
                pod_meta.annotations = {}
            pod_meta.annotations[key] = value

    def create(self, force_recreate=False):
        """
        Create a kubernetes namespace and populate it with argo registry

        Idempotency: This function will be idempotent as long as the content
        of the secret is not changed. If create is called with a registry secret
        that has been updated and the namespace with the secret already exists
        then it will not update the secret for now.
        """
        @retry_not_exists
        def create_ns_in_provider():
            namespace = swagger_client.V1Namespace()
            namespace.metadata = swagger_client.V1ObjectMeta()
            namespace.metadata.name = self.name
            self._client.api.create_namespace(namespace)

        # NOTE: 403 is not retried as application is getting deleted in parallel
        # 422 is unprocessable object (aka error in spec)
        @retry_unless(status_code=[403, 422])
        def create_reg_in_provider():
            if self._registry_spec is None:
                return
            try:
                self._client.api.create_namespaced_secret(
                    self._registry_spec, self.name)
            except swagger_client.rest.ApiException as e:
                if e.status == 409:
                    self._client.api.patch_namespaced_secret(
                        self._registry_spec.to_dict(), self.name,
                        self._registry_spec.metadata.name)
                else:
                    raise e

        @retry_unless(status_code=[403, 422])
        def create_app_monitor_service_in_provider():
            try:
                self._client.api.create_namespaced_service(
                    self._am_service_spec, self.name)
            except swagger_client.rest.ApiException as e:
                if e.status == 409:
                    self._client.api.patch_namespaced_service(
                        self._am_service_spec.to_dict(), self.name,
                        self._am_service_spec.metadata.name)
                else:
                    raise e

        @retry_unless(status_code=[403, 422])
        def create_app_monitor_deployment_in_provider():
            try:
                self._client.apisappsv1beta1_api.create_namespaced_deployment(
                    self._am_deployment_spec, self.name)
            except swagger_client.rest.ApiException as e:
                if e.status == 409:
                    if force_recreate:
                        # add a new metadata in pod spec to force the recreation of pods
                        self._add_pod_metadata(
                            "applatix.io/force-recreate-salt",
                            str(uuid.uuid4()))

                    self._client.apisappsv1beta1_api.replace_namespaced_deployment(
                        self._am_deployment_spec, self.name,
                        self._am_deployment_spec.metadata.name)
                else:
                    raise e

        try:
            logger.debug("Creating application {}".format(self.name))
            create_ns_in_provider()
            logger.debug("Created namespace {}".format(self.name))
            create_reg_in_provider()
            create_app_monitor_service_in_provider()
            logger.debug("Created application monitor service {}".format(
                self._am_service_spec.metadata.name))
            create_app_monitor_deployment_in_provider()
            logger.debug("Created application monitor deployment {}".format(
                self._am_deployment_spec.metadata.name))
        except Exception as e:
            logger.exception(e)

    def delete(self, timeout=None):
        """
        Delete a kubernetes namespace and image secret for Argo

        Idempotency: Can be repeatedly called
        """
        delete_grace_period = 1
        options = swagger_client.V1DeleteOptions()
        options.grace_period_seconds = delete_grace_period
        options.orphan_dependents = False

        @retry_unless(swallow_code=[404, 409])
        def delete_ns_in_provider():
            """
            The retry is not done for 404 (not found) and also for 409 (conflict)
            The 404 case is for simple retry. 409 happens when application delete was
            requested but not complete and another request came in.
            """
            logger.debug("Deleting application {}".format(self.name))
            self._client.api.delete_namespace(options, self.name)

        delete_ns_in_provider()

        start_time = time.time()
        while self.exists():
            logger.debug("Application {} still exists".format(self.name))
            time.sleep(delete_grace_period + 1)
            wait_time = int(time.time() - start_time)
            if timeout is not None and wait_time > timeout:
                raise AXTimeoutException(
                    "Could not delete namespace {} in {} seconds".format(
                        self.name, timeout))

    def exists(self):
        @retry_unless_not_found
        def get_ns_in_provider():
            try:
                stat = self._client.api.read_namespace(self.name)
                return True
            except swagger_client.rest.ApiException as e:
                if e.status == 404:
                    return False
                else:
                    raise e

        return get_ns_in_provider()

    def status(self):
        """
        This function checks the following:
        1. Namespace exists?
        2. Argo Registry exists?
        3. TODO: Application Monitor exists
        Returns:
            A json dict with the status of each
            {
                'namespace': True/False,
                'registry': True/False,
                'monitor': True/False
            }
        """
        ret = {'namespace': False, 'registry': False, 'monitor': False}

        if not self.exists():
            return ret

        ret['namespace'] = True
        ns = self._get_registry_from_provider()
        if ns is None:
            return ret

        ret['registry'] = True
        srv = self._get_am_service_from_provider()
        if srv is None:
            return ret

        am_dep = self._get_am_deployment_from_provider()
        if am_dep is not None and am_dep.status.available_replicas == am_dep.status.replicas:
            ret["monitor"] = True

        return ret

    def healthy(self):
        """
        If all components are present/healthy then return True
        else return False
        """
        d = self.status()
        for component in d:
            if not d[component]:
                return False
        return True

    def events(self, name=None):
        return self._get_events_from_provider(name).items

    @retry_unless(swallow_code=[404])
    def _get_registry_from_provider(self):
        if self._registry_spec is not None:
            return self._client.api.read_namespaced_secret(
                self.name, self._registry_spec.metadata.name)
        else:
            return "NotNeeded"

    @retry_unless(swallow_code=[404])
    def _get_am_service_from_provider(self):
        return self._client.api.read_namespaced_service(
            self.name, self._am_service_spec.metadata.name)

    @retry_unless(swallow_code=[404])
    def _get_am_deployment_from_provider(self):
        return self._client.apisappsv1beta1_api.read_namespaced_deployment(
            self.name, self._am_deployment_spec.metadata.name)

    @retry_unless(swallow_code=[404])
    def _get_events_from_provider(self, name):
        # XXX: For some reason list_namespaced_event does not take a namespace but the _21 version
        #      of the function does. Hopefully this gets fixed in swagger soon
        field_selector = None
        if name is not None:
            field_selector = "involvedObject.name={}".format(name)
        return self._client.api.list_namespaced_event(
            self.name, field_selector=field_selector)
Esempio n. 17
0
class ClusterResumer(ClusterOperationBase):
    def __init__(self, cfg):
        assert isinstance(cfg, ClusterRestartConfig)
        self._cfg = cfg
        super(ClusterResumer,
              self).__init__(cluster_name=self._cfg.cluster_name,
                             cluster_id=self._cfg.cluster_id,
                             cloud_profile=self._cfg.cloud_profile,
                             dry_run=self._cfg.dry_run)

        # This will raise exception if name/id mapping cannot be found
        self._name_id = self._idobj.get_cluster_name_id()
        self._cluster_info = AXClusterInfo(cluster_name_id=self._name_id,
                                           aws_profile=self._cfg.cloud_profile)
        self._cluster_config = AXClusterConfig(
            cluster_name_id=self._name_id, aws_profile=self._cfg.cloud_profile)
        self._master_manager = AXMasterManager(
            cluster_name_id=self._name_id,
            region=self._cluster_config.get_region(),
            profile=self._cfg.cloud_profile)
        self._bootstrap_obj = AXBootstrap(
            cluster_name_id=self._name_id,
            aws_profile=self._cfg.cloud_profile,
            region=self._cluster_config.get_region())

        # Initialize node count to 1 as master is not in an auto scaling group
        self._total_nodes = 1
        self._cidr = str(get_public_ip()) + "/32"
        self._software_info = SoftwareInfo(info_dict=yaml.load(
            self._cluster_info.download_cluster_software_info()))

    def pre_run(self):
        if self._cluster_info.is_cluster_supported_by_portal():
            raise RuntimeError(
                "Cluster is currently supported by portal. Please login to portal to perform cluster management operations."
            )

        if self._csm.is_running():
            logger.info("Cluster is already running.")
            sys.exit(0)
        if not check_cluster_staging(cluster_info_obj=self._cluster_info,
                                     stage="stage2"):
            raise RuntimeError(
                "Cluster is not successfully installed: Stage2 information missing! Operation aborted."
            )
        self._csm.do_resume()
        self._persist_cluster_state_if_needed()

    def post_run(self):
        self._csm.done_resume()
        self._persist_cluster_state_if_needed()

    def run(self):
        if self._cfg.dry_run:
            logger.info("DRY RUN: Resuming cluster %s with software info %s",
                        self._name_id, self._software_info.to_dict())
            return

        logger.info("%s\n\nResuming cluster %s%s\n", COLOR_GREEN,
                    self._name_id, COLOR_NORM)
        # Main resume cluster routine
        try:
            self._master_manager.restart_master()
            self._recover_auto_scaling_groups()
            self._wait_for_master()
            self._ensure_restarter_access()
            self._wait_for_minions()
            ensure_manifest_temp_dir()
            self._start_platform()
            logger.info("\n\n%sSuccessfully resumed cluster %s%s\n",
                        COLOR_GREEN, self._name_id, COLOR_NORM)
        except Exception as e:
            logger.exception(e)
            raise RuntimeError(e)
        finally:
            self._disallow_restarter_access_if_needed()

    def _start_platform(self):
        """
        This step brings up Argo platform services
        :return:
        """
        logger.info("Bringing up Argo platform ...")

        self._cluster_info.download_platform_manifests_and_config(
            target_platform_manifest_root=TEMP_PLATFORM_MANIFEST_ROOT,
            target_platform_config_path=TEMP_PLATFORM_CONFIG_PATH)

        platform = AXPlatform(cluster_name_id=self._name_id,
                              aws_profile=self._cfg.cloud_profile,
                              manifest_root=TEMP_PLATFORM_MANIFEST_ROOT,
                              config_file=TEMP_PLATFORM_CONFIG_PATH,
                              software_info=self._software_info)
        platform.start()
        platform.stop_monitor()

    def _wait_for_master(self):
        """
        This step waits for master to be up and running
        :return:
        """
        count = 0
        running_master = None
        while count < WAIT_FOR_RUNNING_MASTER_RETRY:
            logger.info(
                "Waiting for master to be up and running. Trail %s / %s",
                count, WAIT_FOR_RUNNING_MASTER_RETRY)
            running_master = self._master_manager.discover_master(
                state=[EC2InstanceState.Running])
            if not running_master:
                time.sleep(5)
            else:
                logger.info("%sMaster %s is running%s", COLOR_GREEN,
                            running_master, COLOR_NORM)
                break
            count += 1
        if count == WAIT_FOR_RUNNING_MASTER_RETRY:
            raise RuntimeError(
                "Timeout waiting for master {} to come up. Please manually check cluster status"
                .format(running_master))

    def _wait_for_minions(self):
        """
        This step waits for all minions to come up and registered in Kubernetes master
        :return:
        """
        # Get kubernetes access token
        self._cluster_info.download_kube_config()
        kube_config = self._cluster_info.get_kube_config_file_path()

        # Wait for nodes to be ready.
        # Because we made sure during pause that kubernetes master already knows that all minions are gone,
        # we don't need to worry about cached minions here
        logger.info("Wait 120 seconds before Kubernetes master comes up ...")
        time.sleep(120)
        kubectl = KubernetesApiClient(config_file=kube_config)
        logger.info("Waiting for all Kubelets to be ready ...")

        trail = 0
        while True:
            try:
                all_kubelets_ready = True
                nodes = kubectl.api.list_node()
                logger.info("%s / %s nodes registered", len(nodes.items),
                            self._total_nodes)
                if len(nodes.items) < self._total_nodes:
                    all_kubelets_ready = False
                else:
                    for n in nodes.items:
                        kubelet_check = {
                            "KubeletHasSufficientDisk",
                            "KubeletHasSufficientMemory",
                            "KubeletHasNoDiskPressure", "KubeletReady",
                            "RouteCreated"
                        }
                        for cond in n.status.conditions:
                            if cond.reason in kubelet_check:
                                kubelet_check.remove(cond.reason)
                        if kubelet_check:
                            logger.info(
                                "Node %s not ready yet. Remaining Kubelet checkmarks: %s",
                                n.metadata.name, kubelet_check)
                            all_kubelets_ready = False
                            break
                        else:
                            logger.info("Node %s is ready.", n.metadata.name)
                if all_kubelets_ready:
                    logger.info("All Kubelets are ready")
                    break
            except Exception as e:
                if "Max retries exceeded" in str(e):
                    # If master API server is still not ready at this moment, we don't count as a trail
                    trail -= 1
                    logger.info("Kubernetes API server not ready yet")
                else:
                    logger.exception("Caught exception when listing nodes: %s",
                                     e)
            trail += 1
            if trail > WAIT_FOR_MINION_REG_RETRY:
                raise RuntimeError(
                    "Timeout waiting for minions to come up. Please manually check cluster status"
                )
            time.sleep(10)

    def _recover_auto_scaling_groups(self):
        """
        This steps does the following:
            - fetch the previously restored auto scaling group config. If this config cannot be found,
              we can assume that all autoscaling groups have correct configurations. This could happen
              when previous restart failed in the middle but passed this stage already, or the cluster is
              not even paused
            - Wait for all instances to be in service
        :return:
        """
        # Get previously persisted asg status
        logger.info("Fetching last cluster status ...")
        cluster_status_raw = self._cluster_info.download_cluster_status_before_pause(
        )

        asg_mgr = AXUserASGManager(cluster_name_id=self._name_id,
                                   aws_profile=self._cfg.cloud_profile,
                                   region=self._cluster_config.get_region())

        if cluster_status_raw:
            logger.info("Found last cluster status, restoring cluster ...")
            cluster_status = yaml.load(cluster_status_raw)
            all_asg_statuses = cluster_status["asg_status"]

            # Restore minions
            for asg_name in all_asg_statuses.keys():
                asg_status = all_asg_statuses[asg_name]
                min_size = asg_status["min_size"]
                max_size = asg_status["max_size"]
                desired = asg_status["desired_capacity"]
                self._total_nodes += desired
                logger.info(
                    "Recovering autoscaling group %s. Min: %s, Max: %s, Desired: %s",
                    asg_name, min_size, max_size, desired)
                asg_mgr.set_asg_spec(name=asg_name,
                                     minsize=min_size,
                                     maxsize=max_size,
                                     desired=desired)

            logger.info("Waiting for all auto scaling groups to scale up ...")
            asg_mgr.wait_for_desired_asg_state()
            logger.info("%sAll cluster instances are in service%s",
                        COLOR_GREEN, COLOR_NORM)

            # Delete previously stored cluster status
            self._cluster_info.delete_cluster_status_before_pause()
        else:
            all_asgs = asg_mgr.get_all_asgs()
            for asg in all_asgs:
                self._total_nodes += asg["DesiredCapacity"]

            logger.info(
                "Cannot find last cluster status, cluster already resumed with %s nodes",
                self._total_nodes)

    def _ensure_restarter_access(self):
        if self._cidr not in self._cluster_config.get_trusted_cidr():
            logger.info(
                "Restarting cluster from a not trusted IP (%s). Temporarily allowing access.",
                self._cidr)
            self._bootstrap_obj.modify_node_security_groups(
                old_cidr=[],
                new_cidr=[self._cidr],
                action_name="allow-cluster-manager")

    def _disallow_restarter_access_if_needed(self):
        if self._cidr not in self._cluster_config.get_trusted_cidr():
            logger.info(
                "Restarting cluster from a not trusted IP (%s). Disallowing access.",
                self._cidr)
            self._bootstrap_obj.modify_node_security_groups(
                old_cidr=[self._cidr],
                new_cidr=[],
                action_name="disallow-cluster-manager")
Esempio n. 18
0
class Container(KubeObject):
    """
    Class for creating container specifications
    """

    LIVENESS_PROBE = 1
    READINESS_PROBE = 2

    def __init__(self, name, image, pull_policy=None):
        """
        Construct a container that will provide the spec for a kubernetes container
        http://kubernetes.io/docs/api-reference/v1/definitions/#_v1_container
        Args:
            name: name of a container. must be conformant to kubernetes container name
            image: image for container
            pull_policy: pull policy based on kubernetes. If None then kubernetes default is used
        """
        self.name = name
        self.image = image
        self.image_pull_policy = pull_policy

        self.command = None
        self.args = None

        self.vmap = {}
        self.env_map = {}
        self.ports = []

        self.resources = None
        self.privileged = None

        self.software_info = SoftwareInfo()
        self.probes = {}

    def generate_spec(self):
        c = swagger_client.V1Container()
        c.name = self.name
        c.image = self.image

        if self.resources is not None:
            c.resources = swagger_client.V1ResourceRequirements()
            c.resources.requests = {}
            c.resources.limits = {}
            if "cpu_cores" in self.resources:
                c.resources.requests["cpu"] = str(
                    self.resources["cpu_cores"][0])
                if self.resources["cpu_cores"][1] is not None:
                    c.resources.limits["cpu"] = str(
                        self.resources["cpu_cores"][1])

            if "mem_mib" in self.resources:
                c.resources.requests["memory"] = "{}Mi".format(
                    self.resources["mem_mib"][0])
                if self.resources["mem_mib"][1] is not None:
                    c.resources.limits["memory"] = "{}Mi".format(
                        self.resources["mem_mib"][1])

        # Kubernetes 1.5 requires init container must specify image pull policy. Since we are setting
        # a pull policy for all containers, we want to replicate the kubernetes default behavior of pulling
        # the image if tag is "latest"
        if self.image.endswith(':latest'):
            c.image_pull_policy = ContainerImagePullPolicy.PullAlways
        else:
            c.image_pull_policy = self.image_pull_policy or ContainerImagePullPolicy.PullIfNotPresent

        if self.command:
            c.command = self.command
        if self.args:
            c.args = self.args

        c.volume_mounts = []
        for _, vol in self.vmap.iteritems():
            c.volume_mounts.append(vol.get_container_spec())

        c.env = []
        for _, env in self.env_map.iteritems():
            c.env.append(env)

        if self.privileged is not None:
            c.security_context = swagger_client.V1SecurityContext()
            c.security_context.privileged = self.privileged

        for probe in self.probes:
            probe_spec = self.probes[probe]
            probe_k8s_spec = Container._generate_probe_spec(probe_spec)
            if probe == Container.LIVENESS_PROBE:
                c.liveness_probe = probe_k8s_spec
            elif probe == Container.READINESS_PROBE:
                c.readiness_probe = probe_k8s_spec
            else:
                raise AXIllegalArgumentException(
                    "Unexpected probe type {} found with spec {}".format(
                        probe, probe_spec))

        return c

    def add_resource_constraints(self, resource, request, limit=None):
        if self.resources is None:
            self.resources = {}
        self.resources[resource] = (request, limit)

    def add_volume(self, volume):
        self.vmap[volume.name] = volume

    def add_volumes(self, volumes):
        for vol in volumes or []:
            self.add_volume(vol)

    def get_volume(self, name):
        return self.vmap.get(name, None)

    def add_env(self, name, value=None, value_from=None):
        env = swagger_client.V1EnvVar()
        env.name = name
        if value is not None:
            env.value = value
        else:
            assert value_from is not None, "value and value_from both cannot be None for env {}".format(
                name)
            env.value_from = swagger_client.V1EnvVarSource()
            env.value_from.field_ref = swagger_client.V1ObjectFieldSelector()
            env.value_from.field_ref.field_path = value_from
            # Some 1.5 requires this. https://github.com/kubernetes/kubernetes/issues/39189
            env.value_from.field_ref.api_version = "v1"

        self.env_map[name] = env

    def add_probe(self, probe_type, probe_spec):
        self.probes[probe_type] = probe_spec

    def parse_probe_spec(self, container_template):
        """
        @type container_template: argo.template.v1.container.ContainerTemplate
        """
        if container_template.liveness_probe:
            probe_type = Container.LIVENESS_PROBE
            self.add_probe(probe_type, container_template.liveness_probe)
        if container_template.readiness_probe:
            probe_type = Container.READINESS_PROBE
            self.add_probe(probe_type, container_template.readiness_probe)

    def get_registry(self, namespace="axuser"):
        """
        This function returns the name of the secrets file that needs to be
        used in the pod specification image_pull_secrets array
        """
        (reg, _, _) = DockerImage(fullname=self.image).docker_names()
        if reg == self.software_info.registry:
            if self.software_info.registry_is_private():
                return "applatix-registry"
            else:
                return None
        else:
            try:
                smanager = SecretsManager()
                secret = smanager.get_imgpull(reg, namespace)
                if secret:
                    return secret.metadata.name

                # Code for copying the registry to the app namespace if
                # it does not exist. We do not copy to axuser as secrets
                # are always created there.
                secret_axuser = smanager.get_imgpull(reg, "axuser")
                if secret_axuser and namespace != "axuser":
                    smanager.copy_imgpull(secret_axuser, namespace)
                    return secret_axuser.metadata.name
            except Exception as e:
                logger.debug(
                    "Did not find a secret for registry {} due to exception {}"
                    .format(reg, e))
            return None

    def volume_iterator(self):
        for _, vol in self.vmap.iteritems():
            yield vol

    @staticmethod
    def _generate_probe_spec(spec):
        """
        @type spec argo.template.v1.container.ContainerProbe
        """
        try:
            probe = swagger_client.V1Probe()
            probe.initial_delay_seconds = spec.initial_delay_seconds
            probe.timeout_seconds = spec.timeout_seconds
            probe.period_seconds = spec.period_seconds
            probe.failure_threshold = spec.failure_threshold
            probe.success_threshold = spec.success_threshold

            if spec.exec_probe:
                action = swagger_client.V1ExecAction()
                action.command = shlex.split(spec.exec_probe.command)
                probe._exec = action
                return probe
            elif spec.http_get:
                action = swagger_client.V1HTTPGetAction()
                action.path = spec.http_get.path
                action.port = spec.http_get.port
                headers = spec.http_get.http_headers
                action.http_headers = []
                for header in headers or []:
                    h = swagger_client.V1HTTPHeader()
                    h.name = header["name"]
                    h.value = header["value"]
                    action.http_headers.append(h)
                probe.http_get = action
                return probe
            else:
                logger.debug("Cannot handle probe {}".format(spec))
        except Exception as e:
            raise AXIllegalArgumentException(
                "Probe {} cannot be processed due to error {}".format(spec, e))

        return None