Ejemplo n.º 1
0
    def start(self):
        """
        Bring up platform using "platform-start.cfg" configuration from manifest directory
        :return:
        """
        # Generate kube-objects
        steps = self._config.steps
        self._load_kube_objects_from_steps(steps)

        if not self._cluster_config.get_cluster_provider().is_user_cluster():
            self._replacing = self._generate_replacing()
        else:
            self._replacing = self._generate_replacing_for_user_provisioned_cluster(
            )

        os.environ["AX_CLUSTER_NAME_ID"] = self._cluster_name_id
        logger.debug("Replacing ENVs: %s", self._replacing)

        # TODO: remove component's dependencies to AXOPS_EXT_DNS env (#32)
        # At this moment, we MUST separate first step due to the above dependency
        assert len(steps) >= 2, "Should have at least 1 step to create axops"
        self.create_objects(steps[0])
        self.create_objects(steps[1])
        self.create_objects(steps[2])

        # Prepare axops_eip
        if self._cluster_config.get_provider() not in ["minikube"]:
            self._set_ext_dns()

        info_bound = "=======================================================\n"
        img_namespace = "Image Namespace: {}\n".format(
            self._software_info.image_namespace)
        img_version = "Image Version: {}\n".format(
            self._software_info.image_version)
        start_info = "\n\n{}{}{}{}{}".format(
            info_bound, "Platform Up: Bringing up Argo services...\n",
            img_namespace, img_version, info_bound)
        logger.info(start_info)

        # Start rest of the objects
        for i in range(3, len(steps)):
            self.create_objects(steps[i])

        # update application namespace
        logger.info("Updating application managers")
        for app in Applications(client=self.kubectl).list():
            logger.info("--- updating {}".format(app))
            a = Application(app, client=self.kubectl)
            a.create(force_recreate=True)
        logger.info("Done updating application managers")

        # Upload version information to target cluster
        self._update_version()
        logger.info("\n\n%sCluster %s is up. Cluster is available at %s%s\n",
                    COLOR_GREEN, self._cluster_name_id, self.cluster_dns_name,
                    COLOR_NORM)
Ejemplo n.º 2
0
 def test_2(self):
     """
     Test assertion when application name exists
     """
     a1 = Application("testapp")
     a2 = Application("testapp")
     a1.create()
     with self.assertRaises(AXKubeApiException):
         a2.create()
     a1.delete()
Ejemplo n.º 3
0
Archivo: rest.py Proyecto: nuaays/argo
def axmon_application_status(applicationname):
    """
    Returns the status of the application
    Args:
        applicationname: the application name
    Returns: A json dict with the following format
        { 'result': { 'component': True/False } }
        where component is namespace, registry and monitor
    """
    application = Application(applicationname)
    status = application.status()
    return jsonify(result=status)
Ejemplo n.º 4
0
Archivo: rest.py Proyecto: nuaays/argo
def axmon_application_delete(applicationname):
    """
    Delete the application. This has an optional query parameter:
    timeout: In seconds.
    Returns: A json dict with the status of the request in the following format
        { 'result': 'ok' }
    """
    timeout = request.args.get('timeout', None)
    if timeout is not None:
        timeout = int(timeout)
    application = Application(applicationname)
    application.delete(timeout=timeout)
    return jsonify(result="ok")
Ejemplo n.º 5
0
Archivo: rest.py Proyecto: nuaays/argo
def axmon_application_create():
    """
    Create an application. If the application name already exists then
    this call will do nothing but also not report an exception. To find
    out if the application exists and its status, use the GET method.
    The body of the post must pass a dict with the following format:
    { 'name': 'somename' }
    The name value must be a DNS compatible name
    Returns:
        A json dict on success in the following format
        { 'result': 'ok' }
    """
    (applicationname, ) = _get_required_arguments('name')
    application = Application(applicationname)
    application.create()
    return jsonify(result="ok")
Ejemplo n.º 6
0
    def __init__(self, name, application):
        """
        Each deployment has a name and needs to be part of an application
        Application maps to a kubernetes namespace and the deployment will
        be created in this namespace.

        Args:
            name: deployment name
            application: the application that this deployment runs under
        """
        self.name = name
        self.application = application
        self.client = KubernetesApiClient(use_proxy=True)
        self._nameid = AXClusterId().get_cluster_name_id()
        self._software_info = SoftwareInfo()

        self._app_obj = Application(application)

        self.spec = None
Ejemplo n.º 7
0
 def test_1(self):
     """
     Tests simple application create and delete
     """
     a1 = Application("testapp")
     a1.create()
     a1.delete()
Ejemplo n.º 8
0
class Deployment(object):
    """
    This class creates and manages a single deployment object
    A deployment consists of the following specifications in kubernetes
    1. A kubernetes deployment spec
    2. Zero or more kubernetes service specs
    3. Zero or more ingress rules

    All functions in the object need to be idempotent.
    """
    def __init__(self, name, application):
        """
        Each deployment has a name and needs to be part of an application
        Application maps to a kubernetes namespace and the deployment will
        be created in this namespace.

        Args:
            name: deployment name
            application: the application that this deployment runs under
        """
        self.name = name
        self.application = application
        self.client = KubernetesApiClient(use_proxy=True)
        self._nameid = AXClusterId().get_cluster_name_id()
        self._software_info = SoftwareInfo()

        self._app_obj = Application(application)

        self.spec = None

    def create(self, spec):
        """
        Create a deployment from the template specified

        Idempotency: This function is idempotent. A create of identical spec will
        have no impact if the deployment already exists. If the spec is different
        then the existing deployment will be updated.
        """
        @retry_unless(status_code=[404, 422])
        def create_in_provider(k8s_spec):
            try:
                logger.info(
                    "Creating deployment %s in Kubernetes namespace %s",
                    self.name, self.application)
                self.client.apisappsv1beta1_api.create_namespaced_deployment(
                    k8s_spec, self.application)
                logger.info(
                    "Done creating deployment %s in Kubernetes namespace %s",
                    self.name, self.application)
            except swagger_client.rest.ApiException as e:
                if e.status == 409:
                    self.client.apisappsv1beta1_api.replace_namespaced_deployment(
                        k8s_spec, self.application, self.name)
                else:
                    raise e

        with DeploymentOperation(self):

            self.spec = spec

            # Do some template checks
            self._template_checks()

            # First create supplemental resources such as routes, ingress rules etc
            resources = self._create_deployment_resources()

            # Now create the deployment spec
            d_spec = self._create_deployment_spec()

            # Store the resources in the deployment spec
            resources.finalize(d_spec)

            # Create the deployment object in kubernetes
            create_in_provider(d_spec)

    def delete(self, timeout=None):
        """
        Delete the deployment.

        Idempotency: This function is idempotent. If deployment does not exist then
        delete will silently fail without raising any exceptions.
        Args:
            timeout: In seconds or None for infinite
        """
        options = swagger_client.V1DeleteOptions()
        options.grace_period_seconds = 1
        options.orphan_dependents = False

        def check_result(result):
            # True for retry False for done
            return not result

        @retry(retry_on_result=check_result,
               wait_fixed=2000,
               stop_max_delay=timeout)
        def wait_for_scale_to_zero():
            logger.debug("Wait for scale of deployment to 0 for {} {}".format(
                self.application, self.name))

            @retry_unless(swallow_code=[404])
            def get_scale_from_provider():
                return self.client.apisappsv1beta1_api.read_namespaced_scale_scale(
                    self.application, self.name)

            scale = get_scale_from_provider()
            if scale is None:
                return True
            if scale.status.replicas == 0:
                return True

            return False

        @retry_unless(swallow_code=[404, 409])
        def delete_in_provider():
            logger.debug("Deleting deployment for {} {}".format(
                self.application, self.name))
            self.client.apisappsv1beta1_api.delete_namespaced_deployment(
                options, self.application, self.name)

        def delete_rs_in_provider():
            logger.debug("Deleting replica set for {} {}".format(
                self.application, self.name))
            self.client.extensionsv1beta1.deletecollection_namespaced_replica_set(
                self.application,
                label_selector="deployment={}".format(self.name))

        # now delete deployment object and replication set
        with DeploymentOperation(self):
            dep_obj = self._deployment_status()
            self._scale_to(0)
            wait_for_scale_to_zero()

            if dep_obj:
                resources = AXResources(existing=dep_obj)
                resources.delete_all()

            delete_in_provider()
            delete_rs_in_provider()

    def status(self):
        """
        Get the status of the deployment.
        Returns: Returns the entire V1Deployment as a dict.
        If deployment is not found then this will raise an AXNotFoundException (404)
        """
        # STEP 1: Get status of deployment
        stat = self._deployment_status()
        if stat is None:
            raise AXNotFoundException(
                "Deployment {} not found in application {}".format(
                    self.name, self.application))

        dep_field_map = {
            "name": "metadata.name",
            "generation": "metadata.annotations.ax_generation",
            "desired_replicas": "status.replicas",
            "available_replicas": "status.available_replicas",
            "unavailable_replicas": "status.unavailable_replicas"
        }
        ret = KubeObject.swagger_obj_extract(stat,
                                             dep_field_map,
                                             serializable=True)

        # STEP 2: Get the pods for the deployment and events associated
        podlist = self._deployment_pods().items
        dep_events = self._app_obj.events(name=self.name)
        event_field_map = {
            "message": "message",
            "reason": "reason",
            "source": "source.component",
            "host": "source.host",
            "firstTS": "first_timestamp",
            "lastTS": "last_timestamp",
            "count": "count",
            "container": "involved_object.field_path",
            "type": "type"
        }
        ret["events"] = []
        for event in dep_events:
            ret["events"].append(
                KubeObject.swagger_obj_extract(event,
                                               event_field_map,
                                               serializable=True))

        ret["pods"] = []
        for pod in podlist or []:
            # fill pod status and containers
            pstatus = Pod.massage_pod_status(pod)

            # fill events for pod
            pstatus["events"] = []

            events = self._app_obj.events(name=pod.metadata.name)
            for event in events:
                pstatus["events"].append(
                    KubeObject.swagger_obj_extract(event,
                                                   event_field_map,
                                                   serializable=True))

            # fill pod failure information for pod based on events
            pstatus["failure"] = Deployment._pod_failed_info(pstatus)

            ret["pods"].append(pstatus)

        # STEP 3: From the deployment spec get the resources created by deployment
        # TODO: Add this when services are created by deployment

        return ret

    def get_labels(self):
        """
        Get a dict of labels used for this deployment
        """
        state = self._deployment_status()
        if state is None:
            raise AXNotFoundException(
                "Did not find deployment {} in application {}".format(
                    self.name, self.application))

        return KubeObject.swagger_obj_extract(
            state, {"labels": "spec.selector.match_labels"})['labels']

    @staticmethod
    def _pod_failed_info(pod_status):
        if pod_status["phase"] != "Pending":
            return None

        for ev in pod_status["events"] or []:
            if ev["reason"] == "Failed" and ev["source"] == "kubelet" and ev["type"] == "Warning" and \
                            "Failed to pull image" in ev["message"] and ev["count"] > 5:
                return {"reason": "ImagePullFailure", "message": ev["message"]}
        return None

    def scale(self, replicas):
        with DeploymentOperation(self):
            # Deployments with volumes can't be scaled to > 1.
            if replicas > 1:
                dep_obj = self._deployment_status()
                if dep_obj:
                    resources = AXResources(existing=dep_obj)
                    for type in resources.get_all_types():
                        if type.startswith("ax.platform.volumes"):
                            raise AXApiForbiddenReq(
                                "Deployments with volumes can't be scaled to > 1 ({})"
                                .format(replicas))

            self._scale_to(replicas)

    @retry_unless(swallow_code=[404])
    def _deployment_status(self):
        return self.client.apisappsv1beta1_api.read_namespaced_deployment(
            self.application, self.name)

    @retry_unless(swallow_code=[404])
    def _deployment_pods(self):
        return self.client.api.list_namespaced_pod(
            self.application, label_selector="deployment={}".format(self.name))

    def _create_deployment_spec(self):

        pod_spec = PodSpec(self.name, namespace=self.application)
        main_container = self.spec.template.get_main_container()

        main_container_spec = self._create_main_container_spec(main_container)
        pod_spec.add_main_container(main_container_spec)

        container_vols = self._get_main_container_vols()
        main_container_spec.add_volumes(container_vols)

        hw_res = main_container.get_resources()
        main_container_spec.add_resource_constraints("cpu_cores",
                                                     hw_res.cpu_cores,
                                                     limit=None)
        main_container_spec.add_resource_constraints("mem_mib",
                                                     hw_res.mem_mib,
                                                     limit=None)

        artifacts_container = pod_spec.enable_artifacts(
            self._software_info.image_namespace,
            self._software_info.image_version, None, main_container.to_dict())

        # Set up special circumstances based on annotations
        # Check if we need to circumvent the executor script. This is needed for containers that run
        # special init processes such as systemd as these processes like to be pid 1
        if main_container.executor_spec:
            main_container_spec.command = None
            if main_container.docker_spec is not None:
                raise ValueError(
                    "We do not support ax_ea_docker_enable with ax_ea_executor"
                )

        # Does this container need to be privileged
        main_container_spec.privileged = main_container.privileged

        # Check if docker daemon sidecar needs to be added
        if main_container.docker_spec:
            # graph storage size is specified in GiB
            dind_container_spec = pod_spec.enable_docker(
                main_container.docker_spec.graph_storage_size_mib)
            dind_container_spec.add_volumes(pod_spec.get_artifact_vols())
            dind_container_spec.add_resource_constraints(
                "cpu_cores", main_container.docker_spec.cpu_cores, limit=None)
            dind_container_spec.add_resource_constraints(
                "mem_mib", main_container.docker_spec.mem_mib, limit=None)
            dind_container_spec.add_volumes(container_vols)

        # Do we only need docker graph storage volume for the main container
        if main_container.graph_storage:
            dgs_vol = ContainerVolume("graph-storage-vol-only",
                                      main_container.graph_storage.mount_path)
            dgs_vol.set_type(
                "DOCKERGRAPHSTORAGE",
                main_container.graph_storage.graph_storage_size_mib)
            main_container_spec.add_volume(dgs_vol)

        # set the pod hostname to value provided in main container spec
        pod_spec.hostname = main_container.hostname

        # TODO: This needs fixup. job name is used in init container to ask permission to start
        # TODO: Don't know if this is needed in deployment or not?
        artifacts_container.add_env("AX_JOB_NAME", value=self.application)
        artifacts_container.add_env("AX_DEPLOYMENT_NEW", value="True")

        if len(container_vols) > 0:
            tmp_container_vols = copy.deepcopy(container_vols)
            volume_paths = []
            for v in tmp_container_vols:
                v.set_mount_path("/ax/fix" + v.volmount.mount_path)
                volume_paths.append(v.volmount.mount_path)
            artifacts_container.add_volumes(tmp_container_vols)
            logger.info("Volumes to chmod: %s", volume_paths)
            artifacts_container.add_env("AX_VOL_MOUNT_PATHS",
                                        value=str(volume_paths))

        # add annotation for service env which will show up in artifacts container
        pod_spec.add_annotation("AX_SERVICE_ENV",
                                self._generate_service_env(self.spec.template))
        pod_spec.add_annotation("AX_IDENTIFIERS", self._get_identifiers())
        if self.spec.costid:
            pod_spec.add_annotation("ax_costid", json.dumps(self.spec.costid))

        pod_spec.add_label("deployment", self.name)
        pod_spec.add_label("application", self.application)
        pod_spec.add_label("tier", "user")
        pod_spec.add_label("deployment_id", self.spec.id)

        # now that pod is ready get its spec and wrap it in a deployment
        k8s_spec = self._generate_deployment_spec_for_pod(pod_spec.get_spec())

        logger.info("Generated Kubernetes spec for deployment %s", self.name)
        return k8s_spec

    @staticmethod
    def _create_main_container_spec(container_template):
        """
        :type container_template: argo.template.v1.container.ContainerTemplate
        :rtype Container
        """
        logger.debug("Container template is {}".format(container_template))

        name = string_to_dns_label(container_template.name)
        container_spec = Container(
            name,
            container_template.image,
            pull_policy=container_template.image_pull_policy)
        container_spec.parse_probe_spec(container_template)

        # Necessary envs for handshake
        container_spec.add_env("AX_HANDSHAKE_VERSION",
                               value=CUR_RECORD_VERSION)

        # Envs introduced to user
        container_spec.add_env("AX_POD_NAME", value_from="metadata.name")
        container_spec.add_env("AX_POD_IP", value_from="status.podIP")
        container_spec.add_env("AX_POD_NAMESPACE",
                               value_from="metadata.namespace")
        container_spec.add_env("AX_NODE_NAME", value_from="spec.nodeName")
        container_spec.add_env("AX_CLUSTER_META_URL_V1",
                               value=CLUSTER_META_URL_V1)

        # envs from user spec
        for env in container_template.env:
            container_spec.add_env(env.name, value=env.value)

        # Unix socket for applet
        applet_sock = ContainerVolume("applet", "/tmp/applatix.io/")
        applet_sock.set_type("HOSTPATH", "/var/run/")
        container_spec.add_volume(applet_sock)

        return container_spec

    @staticmethod
    def _get_valid_name_from_axrn(axrn):
        # AXRN's will have non-alphanumeric characters such as : / @, etc which K8S doesn't
        # like in its PVC name. Replace all non-alphanumeric characters with -.
        name_regex = re.compile(r"\W+")
        return name_regex.sub("-", axrn).replace("_", "-")

    def _get_main_container_vols(self):
        container_template = self.spec.template.get_main_container()
        ret = []

        for vol_name, vol in iteritems(container_template.inputs.volumes):
            # sanitize the volume name for kubernetes
            vol_name = string_to_dns_label(vol_name)
            cvol = ContainerVolume(vol_name, vol.mount_path)
            assert "resource_id" in vol.details, "Volume resource-id absent in volume details"
            assert "filesystem" in vol.details, "Volume filesystem absent in volume details"
            cvol.set_type("AWS_EBS", vol_name, vol.details["resource_id"],
                          vol.details["filesystem"])
            logger.debug("Volume {} {} mounted at {}".format(
                vol_name, vol.details, vol.mount_path))
            ret.append(cvol)

        return ret

    def _generate_service_env(self, template):
        return base64.b64encode(json.dumps(template.to_dict()))

    def _get_identifiers(self):
        return {
            "application_id": self.spec.app_generation,
            "deployment_id": self.spec.id,
            "static": {
                "application_id": self.spec.app_id,
                "deployment_id": self.spec.deployment_id
            }
        }

    def _generate_deployment_spec_for_pod(self, pod_spec):

        metadata = swagger_client.V1ObjectMeta()
        metadata.name = self.name

        dspec = swagger_client.V1beta1DeploymentSpec()
        dspec.strategy = self._get_strategy()
        if self.spec.template.min_ready_seconds:
            dspec.min_ready_seconds = self.spec.template.min_ready_seconds
        dspec.selector = swagger_client.V1LabelSelector()
        dspec.selector.match_labels = {"deployment": self.name}

        dspec.replicas = self.spec.template.scale.min
        dspec.template = pod_spec

        deployment_obj = swagger_client.V1beta1Deployment()
        deployment_obj.metadata = metadata
        deployment_obj.spec = dspec
        return deployment_obj

    def _create_deployment_resources(self):
        res = AXResources()

        for route in self.spec.template.internal_routes:
            # ignore empty port spec
            if len(route.ports) == 0:
                logger.debug(
                    "Skipping internal route {} as port spec is empty".format(
                        route.name))
                continue
            ir = InternalRoute(route.name, self.application)
            ir.create(route.to_dict()["ports"],
                      selector={"deployment": self.name},
                      owner=self.name)
            res.insert(ir)
            logger.debug("Created route {}".format(ir))

        for route in self.spec.template.external_routes:

            dns_name = route.dns_name()
            if dns_name.endswith("."):
                dns_name = dns_name[:-1]

            r = ExternalRoute(dns_name, self.application,
                              {"deployment": self.name}, route.target_port,
                              route.ip_white_list, route.visibility)
            try:
                elb_addr = visibility_to_elb_addr(route.visibility)
                elb_name = visibility_to_elb_name(route.visibility)
            except AXNotFoundException:
                if route.visibility == ExternalRouteVisibility.VISIBILITY_WORLD:
                    raise AXNotFoundException(
                        "Could not find the public ELB. Please report this error to Applatix Support at [email protected]"
                    )
                else:
                    assert route.visibility == ExternalRouteVisibility.VISIBILITY_ORGANIZATION, "Only world and organization are currently supported as visibility attributes"
                    raise AXNotFoundException(
                        "Please create a private ELB using the template named 'ax_private_elb_creator_workflow' before using 'visibility=organization'"
                    )

            name = r.create(elb_addr, elb_name=elb_name)
            res.insert(r)
            logger.debug("Created external route {} for {}/{}/{}".format(
                name, self.application, self.name, dns_name))

        main_container = self.spec.template.get_main_container()
        for key_name, vol in iteritems(main_container.inputs.volumes):
            assert "resource_id" in vol.details, "Volume resource_id absent in volume details"
            name = vol.details.get("axrn", None)
            resource_id = vol.details.get("resource_id", None)
            assert name is not None and resource_id is not None, "axrn and resource_id are required details for volume {}".format(
                key_name)
            nv_res = AXNamedVolumeResource(name, resource_id)
            nv_res.create()
            res.insert(nv_res)
            logger.debug(
                "Using named volume resource {} in application {}".format(
                    name, self.application))

        return res

    @retry_unless(status_code=[422], swallow_code=[400, 404])
    def _scale_to(self, replicas):
        logger.debug("Scaling deployment to {} for {} {}".format(
            replicas, self.application, self.name))
        scale = swagger_client.V1beta1Scale()
        scale.spec = swagger_client.V1beta1ScaleSpec()
        scale.spec.replicas = replicas
        scale.metadata = swagger_client.V1ObjectMeta()
        scale.metadata.name = self.name
        scale.metadata.namespace = self.application
        self.client.apisappsv1beta1_api.replace_namespaced_scale_scale(
            scale, self.application, self.name)

    def _template_checks(self):
        if self.spec.template.scale and self.spec.template.scale.min > 1 and len(
                self.spec.template.volumes) >= 1:
            raise ValueError(
                "Deployments with volumes can't have scale > 1 ({})".format(
                    self.spec.template.scale.min))

    def _get_strategy(self):
        s = swagger_client.V1beta1DeploymentStrategy()
        s.type = "RollingUpdate" if self.spec.template.strategy.type == "rolling_update" else "Recreate"
        if s.type == "RollingUpdate":
            rolling_update = swagger_client.V1beta1RollingUpdateDeployment()
            rolling_update.max_unavailable = self.spec.template.strategy.rolling_update.max_unavailable
            rolling_update.max_surge = self.spec.template.strategy.rolling_update.max_surge
            s.rolling_update = rolling_update
        return s
Ejemplo n.º 9
0
 def test_3(self):
     """
     Test deletion of app that does not exists
     """
     a1 = Application("testappdoesnotexist")
     a1.delete()