Ejemplo n.º 1
0
    def spawn_job(self, task, node):
        """
        Spawn a Kubernetes job on a Kubernetes node.

        If a job with the same name already exists, delete it.

        Args:
            task (str): Type of job to run (e.g. runxhpl).
            node (str): Name of the node on which to run the job.

        Returns:
            job (V1Job): Spawned job.

        Raises:
            FailToCreateError: An error occured creating the job.
        """
        job = None
        kube_client = client.api_client.ApiClient()
        (exists, job) = self.job_exists()
        if exists:
            logger.info("Found existing job: {0}".format(job.metadata.name))
            delete_obj(job)
            self.wait_for_delete()

        logger.info("Creating worker: {0}-{1}".format(task, node))
        if isinstance(self.worker_yaml, dict):
            try:
                kubeutils.create_from_dict(kube_client, self.worker_yaml)
            except kubeutils.FailToCreateError as err:  # list(ApiException)
                raise err
            else:
                job = get_job(self.worker_yaml["metadata"]["name"])
        return job
Ejemplo n.º 2
0
    def handle_autoscaler(self, name: str):

        if self.resources and self.resources.get('auto_scale', False):

            if self.find_autoscaler(name):
                self.LOG.debug('Removing old autoscaler: {}'.format(name))
                k8s_client.AutoscalingV1Api(self.api_client). \
                    delete_namespaced_horizontal_pod_autoscaler(name=name, namespace=self.namespace)

            self.LOG.info("Creating horizontal Pod autoscaler")

            template = dict(
                apiVersion='autoscaling/v1',
                kind='HorizontalPodAutoscaler',
                metadata=dict(name=name, namespace=self.namespace),
                spec=dict(minReplicas=self.resources.get('minReplicas', 1),
                          maxReplicas=self.resources.get('maxReplicas', 10),
                          targetCPUUtilizationPercentage=self.resources.get(
                              'targetCPUUtilizationPercentage', 50),
                          scaleTargetRef=dict(apiVersion='apps/v1',
                                              name=name,
                                              kind='Deployment')))

            try:
                k8s_utils.create_from_dict(self.api_client, template)
                self.LOG.debug(template)
            except Exception as e:
                self.LOG.debug("Failed to create autoscaler: {}".format(name))
                self.LOG.debug(repr(e))
    def start_worker_job(self) -> None:
        # This needs to be a unique directory since jobs may share a host
        work_dir_prefix: str = (
            self.args.worker_work_dir_prefix if self.args.worker_work_dir_prefix else '/tmp/'
        )
        worker_id: str = uuid.uuid4().hex
        worker_name: str = f'cl-worker-{worker_id}'
        work_dir: str = os.path.join(work_dir_prefix, f'{worker_name}_work_dir')
        command: List[str] = self.build_command(worker_id, work_dir)
        worker_image: str = 'codalab/worker:' + os.environ.get('CODALAB_VERSION', 'latest')

        config: Dict[str, Any] = {
            'apiVersion': 'v1',
            'kind': 'Pod',
            'metadata': {'name': worker_name},
            'spec': {
                'containers': [
                    {
                        'name': f'{worker_name}-container',
                        'image': worker_image,
                        'command': command,
                        'securityContext': {'runAsUser': 0},  # Run as root
                        'env': [
                            {'name': 'CODALAB_USERNAME', 'value': self.codalab_username},
                            {'name': 'CODALAB_PASSWORD', 'value': self.codalab_password},
                        ],
                        'resources': {
                            'limits': {
                                'cpu': self.args.cpus,
                                'memory': f'{self.args.memory_mb}Mi',
                                'nvidia.com/gpu': self.args.gpus,  # Configure NVIDIA GPUs
                            }
                        },
                        'volumeMounts': [
                            {'name': 'dockersock', 'mountPath': '/var/run/docker.sock'},
                            {'name': 'workdir', 'mountPath': work_dir},
                        ],
                    }
                ],
                'volumes': [
                    {'name': 'dockersock', 'hostPath': {'path': '/var/run/docker.sock'}},
                    {'name': 'workdir', 'hostPath': {'path': work_dir}},
                ],
                'restartPolicy': 'Never',  # Only run a job once
            },
        }

        # Use Kubernetes to start a worker on GCP
        logger.debug('Starting worker {} with image {}'.format(worker_id, worker_image))
        try:
            utils.create_from_dict(self.k8_client, config)
        except (client.ApiException, FailToCreateError) as e:
            logger.error(f'Exception when calling Kubernetes utils->create_from_dict: {e}')
def main():
    # Configs can be set in Configuration class directly or using helper utility
    config.load_kube_config("kubeconfig-sa")

    name_suffix = "-" + binascii.b2a_hex(os.urandom(8))
    priority_class_name = "routine"
    env_subst = {
        "${NAMESPACE}": "spark-jobs",
        "${SERVICE_ACCOUNT_NAME}": "driver-sa",
        "${DRIVER_NODE_AFFINITIES}": "driver",
        "${EXECUTOR_NODE_AFFINITIES}": "compute",
        "${NAME_SUFFIX}": name_suffix,
        "${PRIORITY_CLASS_NAME}": priority_class_name
    }

    k8s_client = ApiClient()
    verbose = True

    # Create driver pod
    k8s_dir = os.path.join(os.path.dirname(__file__), "k8s/spark-submit")
    k8s_object_dict = create_k8s_object(
        os.path.join(k8s_dir, "pyspark-pi-driver-pod.yaml"), env_subst)
    pprint(k8s_object_dict)
    k8s_objects = utils.create_from_dict(k8s_client,
                                         k8s_object_dict,
                                         verbose=verbose)

    # Prepare ownership on dependent objects
    owner_refs = [{
        "apiVersion": "v1",
        "controller": True,
        "kind": "Pod",
        "name": k8s_objects[0].metadata.name,
        "uid": k8s_objects[0].metadata.uid
    }]

    # List all YAML files in k8s/spark-submit directory, except the driver pod definition file
    other_resources = listdir(k8s_dir)
    other_resources.remove("pyspark-pi-driver-pod.yaml")
    for f in other_resources:
        k8s_object_dict = create_k8s_object(os.path.join(k8s_dir, f),
                                            env_subst)
        # Set ownership
        k8s_object_dict["metadata"]["ownerReferences"] = owner_refs
        pprint(k8s_object_dict)
        utils.create_from_dict(k8s_client, k8s_object_dict, verbose=verbose)

    print("Submitted %s" % (k8s_objects[0].metadata.labels["app-name"]))
Ejemplo n.º 5
0
    def test_create_apps_deployment_from_yaml_obj(self):
        k8s_client = client.api_client.ApiClient(configuration=self.config)
        with open(self.path_prefix + "apps-deployment.yaml") as f:
            yml_obj = yaml.safe_load(f)

        yml_obj["metadata"]["name"] = "nginx-app-3"

        utils.create_from_dict(k8s_client, yml_obj)

        app_api = client.AppsV1Api(k8s_client)
        dep = app_api.read_namespaced_deployment(name="nginx-app-3",
                                                 namespace="default")
        self.assertIsNotNone(dep)
        app_api.delete_namespaced_deployment(name="nginx-app-3",
                                             namespace="default",
                                             body={})
Ejemplo n.º 6
0
async def _create_external_deployment(api_client, app_client, docker_images,
                                      tmpdir):
    namespace = 'external-deployment-ns'
    args = set_deployment_parser().parse_args([
        '--uses', f'docker://{docker_images[0]}', '--name',
        'external-deployment'
    ])
    external_deployment_config = K8sDeploymentConfig(args=args,
                                                     k8s_namespace=namespace)
    configs = external_deployment_config.to_k8s_yaml()
    deployment_base = os.path.join(tmpdir, 'external-deployment')
    filenames = []
    for name, k8s_objects in configs:
        filename = os.path.join(deployment_base, f'{name}.yml')
        os.makedirs(deployment_base, exist_ok=True)
        with open(filename, 'w+') as fp:
            filenames.append(filename)
            for i, k8s_object in enumerate(k8s_objects):
                yaml.dump(k8s_object, fp)
                if i < len(k8s_objects) - 1:
                    fp.write('---\n')
    from kubernetes import utils

    namespace_object = {
        'apiVersion': 'v1',
        'kind': 'Namespace',
        'metadata': {
            'name': f'{namespace}'
        },
    }
    try:
        utils.create_from_dict(api_client, namespace_object)
    except:
        pass

    for filename in filenames:
        try:
            utils.create_from_yaml(
                api_client,
                yaml_file=filename,
                namespace=namespace,
            )
        except:
            pass

    await asyncio.sleep(1.0)
Ejemplo n.º 7
0
def create_from_yaml(k8s_client,
                     yaml_file,
                     verbose=False,
                     namespace="default",
                     **kwargs):
    """
    Perform an action from a yaml file. Pass True for verbose to
    print confirmation information.
    Input:
    yaml_file: dict. YAML file content.
    k8s_client: an ApiClient object, initialized with the client args.
    verbose: If True, print confirmation from the create action.
        Default is False.
    namespace: string. Contains the namespace to create all
        resources inside. The namespace must preexist otherwise
        the resource creation will fail. If the API object in
        the yaml file already contains a namespace definition
        this parameter has no effect.
    Available parameters for creating <kind>:
    :param async_req bool
    :param bool include_uninitialized: If true, partially initialized
        resources are included in the response.
    :param str pretty: If 'true', then the output is pretty printed.
    :param str dry_run: When present, indicates that modifications
        should not be persisted. An invalid or unrecognized dryRun
        directive will result in an error response and no further
        processing of the request.
        Valid values are: - All: all dry run stages will be processed
    Returns:
        The created kubernetes API objects.
    Raises:
        FailToCreateError which holds list of `client.rest.ApiException`
        instances for each object that failed to create.
    """
    #yml_document_all = yaml.safe_load_all(yaml_file)
    yml_document_all = yaml_file

    failures = []
    k8s_objects = []
    for yml_document in yml_document_all:
        try:
            created = utils.create_from_dict(k8s_client,
                                             yml_document,
                                             verbose,
                                             namespace=namespace,
                                             **kwargs)
            k8s_objects.append(created)
        except utils.FailToCreateError as failure:
            failures.extend(failure.api_exceptions)
    if failures:
        raise utils.FailToCreateError(failures)

    return k8s_objects
Ejemplo n.º 8
0
 def create_objects_from_dict(self, filepath, namespace=None):
     """Create kubernetes object from a yaml encapsulated inside a dictionary"""
     yaml_objects = Parser(filepath).return_manifests_dict
     for manifest in yaml_objects:
         try:
             # handle special cases of namespace injection
             if namespace:
                 manifest["metadata"]["namespace"] = namespace
             utils.create_from_dict(self.api_client, manifest)
             logger.info('Created {}/{}'.format(
                 manifest["kind"], manifest["metadata"]["name"]))
         except (client.rest.ApiException, Exception) as e:
             # AttributeError: module 'kubernetes.client' has no attribute 'NetworkingIstioIoV1alpha3Api'
             if "module 'kubernetes.client' has no attribute 'NetworkingIstioIoV1alpha3Api'" in str(
                     e):
                 logger.warning("Creating {} failed.".format(
                     manifest["kind"]))
                 logger.info("Trying again using kubectl...")
                 exec_cmd("kubectl apply -f {} -n {}".format(
                     filepath, namespace))
                 break
             self.check_create_error_and_response(
                 e, manifest["kind"], manifest["metadata"]["name"])
Ejemplo n.º 9
0
    def createResourceFromYaml(self, filepath, namespace='default'):
        TmpObjectDict = {
            "created_objs": [],
            "failed_objs": [],
        }
        if not os.path.isfile(os.path.abspath(filepath)):
            return {
                "ret_code": 1,
                'result': "file %s not exists" % (filepath, )
            }
        with open(os.path.abspath(filepath), mode='rb') as f:
            try:
                TmpYAMLDocs = yaml.safe_load_all(f)
                for doc in TmpYAMLDocs:
                    if not doc:
                        continue
                    print('current kind: ' + str(doc['kind']) + ' name: ' +
                          str(doc['metadata']['name']))

                    RawNamespacedFuncName = 'getNamespaced' + doc['kind']
                    RawNoneNamespacedFuncName = 'get' + doc['kind']

                    if hasattr(self, RawNamespacedFuncName):
                        TmpResponse = getattr(self, RawNamespacedFuncName)(
                            name=doc['metadata']['name'], namespace=namespace)
                        if TmpResponse['ret_code'] == 0:
                            TmpObjectDict['created_objs'].append(TmpResponse)
                            continue
                    elif hasattr(self, RawNoneNamespacedFuncName):
                        TmpResponse = getattr(self, RawNoneNamespacedFuncName)(
                            name=doc['metadata']['name'])
                        if TmpResponse['ret_code'] == 0:
                            TmpObjectDict['created_objs'].append(TmpResponse)
                            continue

                    try:
                        TmpResponse = create_from_dict(
                            k8s_client=client.ApiClient(),
                            data=doc,
                            namespace=namespace)
                        TmpObjectDict['created_objs'].append(TmpResponse)
                    except Exception as e:
                        print(str(e))
                        TmpObjectDict['failed_objs'].append(doc)

                return {'ret_code': 0, 'result': TmpObjectDict}
            except Exception as e:
                print(str(e))
                return {'ret_code': 1, 'result': str(e)}
    def apply_GSelectedCluster_yaml(self, topicData):
        
        print('start-----------------apply_GSelectedCluster_yaml')
        if 'requestID' not in topicData['msg']:
            print('requestID not in topicData[msg]')
            return
        print('1')
        requestID = topicData['msg']['requestID']
        if 'fileID' not in topicData['msg']:
            print('fileID not in topicData[msg]')
            return
        print('1')
        fileID = topicData['msg']['fileID']
        print('2')
        if 'type' not in topicData['target']:
            self.send_error(requestID, 'type not in topicData[target]')
            return
        print('3')
        if 'object' not in topicData['target']:
            self.send_error(requestID, 'object not in topicData[target]')
            return
        print('4')
        if 'requestData' not in topicData['msg']:
            self.send_error(requestID, 'requestData not in topicData[msg]')
            return
        print('end---------------------apply_GSelectedCluster_yaml')

        yaml_file = self.get_yaml_file_from_redis(fileID)
        print('5')
        print('topicData[''msg''][''requestData'']',topicData['msg']['requestData'])
        '''
        {'requestID': 'req-b9494ca5-6e9a-4ab3-8392-8795f0b5eb3e', 'date': '2021-10-21 12:05:54', 'status': 'create', 
        'fileID': 'b2ab5fbe-e7bf-44dc-84d7-b969ad62f104', 'failCnt': 0, 'env': {'type': 'global', 'targetClusters': ['c1', 'c2', 'c3'], 
        'priority': 'GSelectedCluster'}}
        '''
        result = 'cancel'

        if yaml_file != None:
            #transter normal yaml file to gedge yaml file 
            ''' ===============================================
             only apply pod/deployment.yaml 
            ===================================================
            print('6')
            yaml_dic= yaml.load(yaml_file,Loader=yaml.FullLoader)
            print('7')
            if yaml_dic != None :
                print('8')
                result = self.apply_yaml(yaml_dic)
                print('9')
            else :
                print('10')
                result = 'fail'
            =============================================== '''
            ''' ===============================================
             apply everything yaml 
            ==================================================='''
            try :
                print('6')
                yaml_dic= yaml.load(yaml_file,Loader=yaml.FullLoader)
                print('7')
                if yaml_dic != None :
                    print('8')
                    resp = utils.create_from_dict(k8s_client, yaml_dic)
                    #print('resp of utils.create_from_yaml ====>',resp)
                    print('create_from_yaml is completed ',yaml_file)
                    result = 'success'   
                    print('9')
                else :
                    print('10')
                    result = 'fail'
            except :
                print("create_from_yaml", full_filename," Failed.")
                result = 'fail'
        else:
            print('error : yaml file read ')
            result = 'cancel'
        
        temp_msg = {'source': {'type':'cluster', 'object': self.cluster_name},
                'target':{'type':'none'},
                'hcode':400,
                'lcode':2,
                'msg':{'result': result}
        }
        print('11')
        self.send_result(requestID, temp_msg)
        print('12')
Ejemplo n.º 11
0
 def apply_manifest(self, manifest):
     return utils.create_from_dict(self.api.client,
                                   manifest,
                                   namespace=self.name)
def main():
    # Configs can be set in Configuration class directly or using helper utility
    config.load_kube_config("kubeconfig-sa")

    namespace = "spark-jobs"
    name_suffix = "-" + binascii.b2a_hex(os.urandom(8))
    priority_class_name = "routine"
    env_subst = {
        "${NAMESPACE}": namespace,
        "${SERVICE_ACCOUNT_NAME}": "driver-sa",
        "${DRIVER_NODE_AFFINITIES}": "driver",
        "${EXECUTOR_NODE_AFFINITIES}": "compute",
        "${NAME_SUFFIX}": name_suffix,
        "${PRIORITY_CLASS_NAME}": priority_class_name
    }

    custom_object_api = client.CustomObjectsApi()

    # Create pod
    yaml_file = os.path.join(os.path.dirname(__file__),
                             "k8s/spark-operator/pyspark-pi.yaml")
    spark_app = create_k8s_object(yaml_file, env_subst)
    pprint(spark_app)

    # create the resource
    group = "sparkoperator.k8s.io"
    version = "v1beta2"
    plural = "sparkapplications"

    custom_object_api.create_namespaced_custom_object(
        group=group,
        version=version,
        namespace=namespace,
        plural=plural,
        body=spark_app,
    )
    print("Resource created")

    # get the resource and print out data
    resource = custom_object_api.get_namespaced_custom_object(
        group=group,
        version=version,
        name="pyspark-pi-%s%s" % (priority_class_name, name_suffix),
        namespace=namespace,
        plural=plural,
    )
    print("Resource details:")
    pprint(resource)

    # Hijack the auto-created UI service and change its type from ClusterIP to NodePort
    # app_name = resource["metadata"]["name"]
    # ui_service_name = app_name + "-ui-svc"
    # core_v1_api = client.CoreV1Api()
    #
    # w = watch.Watch()
    # field_selector = "metadata.name=%s" % ui_service_name
    # for event in w.stream(core_v1_api.list_namespaced_service, namespace=namespace,
    #                       field_selector=field_selector,
    #                       timeout_seconds=30):
    #     ui_svc = event['object']
    #     if ui_svc:
    #         w.stop()
    #     else:
    #         print("Event: UI service not yet available")
    #
    # ui_svc.spec.type = "NodePort"
    # core_v1_api.patch_namespaced_service(name=ui_service_name, namespace="spark-jobs", body=ui_svc)

    # Create ingress
    # Prepare ownership on dependent objects
    owner_refs = [{
        "apiVersion": "sparkoperator.k8s.io/v1beta2",
        "controller": True,
        "kind": "SparkApplication",
        "name": resource["metadata"]["name"],
        "uid": resource["metadata"]["uid"]
    }]

    yaml_file = os.path.join(os.path.dirname(__file__),
                             "k8s/spark-operator/pyspark-pi-ui-ingress.yaml")
    k8s_object_dict = create_k8s_object(yaml_file, env_subst)
    # Set ownership
    k8s_object_dict["metadata"]["ownerReferences"] = owner_refs
    pprint(k8s_object_dict)
    k8s_client = ApiClient()
    utils.create_from_dict(k8s_client, k8s_object_dict, verbose=True)
Ejemplo n.º 13
0
async def create_all_flow_deployments_and_wait_ready(
    flow_dump_path,
    namespace,
    api_client,
    app_client,
    core_client,
    deployment_replicas_expected,
    logger,
):
    from kubernetes import utils

    namespace = namespace.lower()
    namespace_object = {
        'apiVersion': 'v1',
        'kind': 'Namespace',
        'metadata': {
            'name': f'{namespace}'
        },
    }
    try:
        logger.info(f'create Namespace {namespace}')
        utils.create_from_dict(api_client, namespace_object)
    except:
        pass

    while True:
        ns_items = core_client.list_namespace().items
        if any(item.metadata.name == namespace for item in ns_items):
            logger.info(f'created Namespace {namespace}')
            break
        logger.info(f'waiting for Namespace {namespace}')
        await asyncio.sleep(1.0)

    deployment_set = set(os.listdir(flow_dump_path))
    for deployment_name in deployment_set:
        file_set = set(
            os.listdir(os.path.join(flow_dump_path, deployment_name)))
        for file in file_set:
            try:
                utils.create_from_yaml(
                    api_client,
                    yaml_file=os.path.join(flow_dump_path, deployment_name,
                                           file),
                    namespace=namespace,
                )
            except Exception as e:
                # some objects are not successfully created since they exist from previous files
                logger.info(
                    f'Did not create ressource from {file} for pod {deployment_name} due to {e} '
                )
                pass

    # wait for all the pods to be up
    expected_deployments = sum(deployment_replicas_expected.values())
    while True:
        namespaced_pods = core_client.list_namespaced_pod(namespace)
        if (namespaced_pods.items is not None
                and len(namespaced_pods.items) == expected_deployments):
            break
        logger.info(
            f'Waiting for all {expected_deployments} Deployments to be created, only got {len(namespaced_pods.items) if namespaced_pods.items is not None else None}'
        )
        await asyncio.sleep(1.0)

    # wait for all the pods to be up
    resp = app_client.list_namespaced_deployment(namespace=namespace)
    deployment_names = set([item.metadata.name for item in resp.items])
    assert deployment_names == set(deployment_replicas_expected.keys())
    while len(deployment_names) > 0:
        deployments_ready = []
        for deployment_name in deployment_names:
            api_response = app_client.read_namespaced_deployment(
                name=deployment_name, namespace=namespace)
            expected_num_replicas = deployment_replicas_expected[
                deployment_name]
            if (api_response.status.ready_replicas is not None
                    and api_response.status.ready_replicas
                    == expected_num_replicas):
                logger.info(f'Deploymnt {deployment_name} is now ready')
                deployments_ready.append(deployment_name)
            else:
                logger.info(
                    f'Deploymnt {deployment_name} is not ready yet: ready_replicas is {api_response.status.ready_replicas} not equal to {expected_num_replicas}'
                )

        for deployment_name in deployments_ready:
            deployment_names.remove(deployment_name)
        logger.info(f'Waiting for {deployment_names} to be ready')
        await asyncio.sleep(1.0)
Ejemplo n.º 14
0
async def test_process_up_down_events(docker_images):
    from kubernetes import client
    from kubernetes import utils

    k8s_client = client.ApiClient()
    app_client = client.AppsV1Api(api_client=k8s_client)
    core_client = client.CoreV1Api(api_client=k8s_client)
    namespace = f'pool-test-namespace-{docker_images[0][0:4]}'
    namespace_object = {
        'apiVersion': 'v1',
        'kind': 'Namespace',
        'metadata': {
            'name': f'{namespace}'
        },
    }
    try:
        utils.create_from_dict(k8s_client, namespace_object)
    except:
        pass
    container_args = ['executor', '--native', '--port-in', '8081']
    if 'test-executor' in docker_images[0]:
        container_args.extend(['--uses', 'config.yml'])
    deployment_object = {
        'apiVersion': 'apps/v1',
        'kind': 'Deployment',
        'metadata': {
            'name': 'dummy-deployment',
            'namespace': f'{namespace}'
        },
        'spec': {
            'replicas': 1,
            'strategy': {
                'type': 'RollingUpdate',
                'rollingUpdate': {
                    'maxSurge': 1,
                    'maxUnavailable': 0
                },
            },
            'selector': {
                'matchLabels': {
                    'app': 'dummy-deployment'
                }
            },
            'template': {
                'metadata': {
                    'labels': {
                        'app': 'dummy-deployment',
                        'jina_deployment_name': 'some-deployment',
                        'shard_id': '4',
                        'pod_type': 'WORKER',
                        'ns': f'{namespace}',
                    }
                },
                'spec': {
                    'containers': [{
                        'name': 'executor',
                        'image': docker_images[0],
                        'command': ['jina'],
                        'args': container_args,
                        'ports': [{
                            'containerPort': 8081
                        }],
                        'readinessProbe': {
                            'tcpSocket': {
                                'port': 8081
                            },
                            'initialDelaySeconds': 5,
                            'periodSeconds': 10,
                        },
                    }]
                },
            },
        },
    }
    utils.create_from_dict(k8s_client, deployment_object, namespace=namespace)
    pool = K8sGrpcConnectionPool(namespace=namespace, client=core_client)
    pool.start()
    await asyncio.sleep(1.0)
    namespaced_pods = core_client.list_namespaced_pod(namespace)
    while not namespaced_pods.items:
        await asyncio.sleep(1.0)
        namespaced_pods = core_client.list_namespaced_pod(namespace)

    assigned_pod_ip = namespaced_pods.items[0].status.pod_ip
    for container in namespaced_pods.items[0].spec.containers:
        if container.name == 'executor':
            assigned_port = container.ports[0].container_port
            break

    expected_replicas = 1

    while True:
        api_response = app_client.read_namespaced_deployment(
            name='dummy-deployment', namespace=namespace)
        if (api_response.status.ready_replicas is not None
                and api_response.status.ready_replicas == expected_replicas
                or (api_response.status.ready_replicas is None
                    and expected_replicas == 0)):
            replica_lists = pool._connections.get_replicas_all_shards(
                'some-deployment')
            assert expected_replicas == sum([
                len(replica_list.get_all_connections())
                for replica_list in replica_lists
            ])

            if expected_replicas == 1:
                replica_lists[0].has_connection(
                    f'{assigned_pod_ip}:{assigned_port}')
                # scale up to 2 replicas
                app_client.patch_namespaced_deployment_scale(
                    'dummy-deployment',
                    namespace=namespace,
                    body={'spec': {
                        'replicas': 2
                    }},
                )
                expected_replicas += 1
            elif expected_replicas == 2:
                # scale down by 2 replicas
                app_client.patch_namespaced_deployment_scale(
                    'dummy-deployment',
                    namespace=namespace,
                    body={'spec': {
                        'replicas': 0
                    }},
                )
                expected_replicas = 0
            else:
                break
        else:
            await asyncio.sleep(1.0)
    await pool.close()
Ejemplo n.º 15
0
async def create_all_flow_deployments_and_wait_ready(flow_dump_path, namespace,
                                                     api_client, app_client,
                                                     core_client):
    from kubernetes import utils

    namespace_object = {
        'apiVersion': 'v1',
        'kind': 'Namespace',
        'metadata': {
            'name': f'{namespace}'
        },
    }
    try:
        utils.create_from_dict(api_client, namespace_object)
    except:
        pass
    deployment_set = set(os.listdir(flow_dump_path))
    assert deployment_set == {'gateway', 'slow_process_executor'}
    for deployment_name in deployment_set:
        file_set = set(
            os.listdir(os.path.join(flow_dump_path, deployment_name)))
        if deployment_name == 'gateway':
            assert file_set == {'gateway.yml'}
        else:
            assert file_set == {
                'slow-process-executor.yml',
            }
        for file in file_set:
            try:
                utils.create_from_yaml(
                    api_client,
                    yaml_file=os.path.join(flow_dump_path, deployment_name,
                                           file),
                    namespace=namespace,
                )
            except Exception:
                # some objects are not successfully created since they exist from previous files
                pass

    # wait for all the pods to be up
    while True:
        namespaced_pods = core_client.list_namespaced_pod(namespace)
        if namespaced_pods.items is not None and len(
                namespaced_pods.items) == 4:
            break
        await asyncio.sleep(1.0)

    # wait for all the pods to be up
    resp = app_client.list_namespaced_deployment(namespace=namespace)
    deployment_names = set([item.metadata.name for item in resp.items])
    assert deployment_names == {
        'gateway',
        'slow-process-executor',
    }
    expected_replicas = {
        'gateway': 1,
        'slow-process-executor': 3,
    }
    while len(deployment_names) > 0:
        deployments_ready = []
        for deployment_name in deployment_names:
            api_response = app_client.read_namespaced_deployment(
                name=deployment_name, namespace=namespace)
            expected_num_replicas = expected_replicas[deployment_name]
            if (api_response.status.ready_replicas is not None
                    and api_response.status.ready_replicas
                    == expected_num_replicas):
                deployments_ready.append(deployment_name)

        for deployment_name in deployments_ready:
            deployment_names.remove(deployment_name)
        await asyncio.sleep(1.0)
Ejemplo n.º 16
0
def job_create(yaml_output, namespace):
    d = os.getenv('DEFCONFIG').split('+')
    defconfig = d[0]
    frag = None
    if len(d) > 1:
        frag = d[1]

    job_name = 'build-j{}-{}-{}-{}'.format(
        os.getenv('BUILD_ID'),
        os.getenv('ARCH'),
        os.getenv('BUILD_ENVIRONMENT'),
        defconfig,
    )

    if frag:
        frag = os.path.splitext(os.path.basename(frag))[0]
        job_name += "-{}".format(frag)

    # job name can only have '-'
    job_name = re.sub('[\.:/_+=]', '-', job_name).lower()

    # k8s limits job-name to max 63 chars (and be sure it doesn't end with '-')
    job_name = job_name[0:63].rstrip('-')

    # FIXME: needs to be tweaked according to k8s cluster VMs
    cpu_limit = int(os.getenv('K8S_CPU_LIMIT', 8))
    parallel_builds = os.getenv('PARALLEL_BUILDS')
    if parallel_builds:
        parallel_builds = int(parallel_builds)
        cpu_limit = min(cpu_limit, parallel_builds)
        os.environ['PARALLEL_JOPT'] = "{}".format(parallel_builds)

    if (cpu_limit < 8):
        cpu_request = cpu_limit * 0.875
        # HACK: Azure nodes with 32 vCPUs refuse jobs with
        #       CPU request > 30.  Support ticket open with
        #       Azure
    elif (cpu_limit == 32):
        cpu_request = 30
    else:
        cpu_request = cpu_limit - 0.9

    # VMs are generous, let's be greedy and ask for 1Gb per core :)
    mem_request = cpu_limit

    params = {
        'job_name': job_name,
        'cpu_limit': cpu_limit,
        'cpu_request': cpu_request,
        'mem_request': "{}Gi".format(mem_request)
    }
    env = Environment(loader=FileSystemLoader(['config/k8s']),
                      extensions=["jinja2.ext.do"])
    env.filters['env_override'] = env_override
    template = env.get_template("job-build.jinja2")
    job_yaml_text = template.render(params)

    if (yaml_output):
        print("Writing job to ".format(yaml_output))
        fp = open(yaml_output, "w")
        fp.write(job_yaml_text)
        fp.close()

    # Translate the parsed YAML into a k8s job
    job_dict = yaml.safe_load(job_yaml_text)
    try:
        k8s_client = client.ApiClient()
        job = utils.create_from_dict(k8s_client,
                                     data=job_dict,
                                     namespace=namespace)
    except utils.FailToCreateError as e:
        print("Failed to create job: ", e)
        sys.exit(1)

    print("Started job {}".format(job_name))

    return job_name
Ejemplo n.º 17
0
 def create_from_dict(self, dictionary):
     try:
         return utils.create_from_dict(self._client, dictionary)
     except ApiException as e:
         self._raise_runtime_error(e)
Ejemplo n.º 18
0
    def updateTopology(cls, source: str, info: dict, nodes: dict):
        cls.config = info
        config.load_kube_config()
        configuration.assert_hostname = False
        k8sClient = client.ApiClient()
        loader = YAML(typ='safe')
        files = cls._listFiles(source)
        newNodes = []
        try:
            #Search ingress controller already deployed
            cls.controllers = cls._searchIngressControllers()
            for controller in cls.controllers:
                newNode = cls._addControllerToTopology(controller, nodes)
                if newNode:
                    newNodes.append(newNode)
                else:
                    cls.controllers.remove(controller)
            #Deployment of the application
            print('   Deploying the application...')
            i = 0
            for k8sFile in files:
                yamls = re.split('^---\n',
                                 cls._readFile(k8sFile),
                                 flags=re.MULTILINE)
                for contentStr in yamls:
                    contentDict = loader.load(contentStr)
                    if not contentDict:
                        continue
                    cls._prepareYaml(contentStr, contentDict)
                    with open(
                            join(cls.config['modDeploymentFiles'],
                                 str(i) + '.yml'), 'w') as f:
                        try:
                            f.write(yaml.dump(contentDict))
                            utils.create_from_dict(k8sClient, contentDict)
                        except utils.FailToCreateError:
                            cls._cleanEnvironment()
                            raise DeploymentError('Error deploying ' + k8sFile)
                    i = i + 1

            #Wait until the deployment is completed
            v1 = client.CoreV1Api()
            deploymentCompleted = False

            while not deploymentCompleted:
                pods = v1.list_pod_for_all_namespaces(watch=False)
                deploymentCompleted = True
                for pod in pods.items:
                    if pod.spec.hostname in nodes:
                        if pod.status.phase != 'Running' and pod.status.phase != 'Succeeded':
                            deploymentCompleted = False
                            break
                        for containerStatus in pod.status.container_statuses:
                            if not containerStatus.ready:
                                deploymentCompleted = False
                                break
                    if not deploymentCompleted:
                        break
                if not deploymentCompleted:
                    time.sleep(3)
            print('   Deployment completed')

            #Start monitoring
            print('   Monitoring in progress...')
            pods = v1.list_pod_for_all_namespaces(watch=False)
            containerName = ''.join(c
                                    for c in cls.config['monitoringContainer']
                                    if c.isalnum())
            for pod in pods.items:
                if pod.spec.hostname in nodes or (
                        pod.metadata.annotations
                        and 'archMinerName' in pod.metadata.annotations
                        and pod.metadata.annotations['archMinerName']
                        in nodes) and pod.status.phase == 'Running':
                    fileName = pod.spec.hostname if pod.spec.hostname in nodes else pod.metadata.annotations[
                        'archMinerName']
                    filePath = join('/home/dump', fileName + '.json')
                    command = [
                        './bin/sh', '-c',
                        'tshark -i eth0 -a duration:' + str(info['time'] + 3) +
                        ' -N nNdt -T json > ' + filePath + ' 2>/dev/null &'
                    ]
                    try:
                        resp = stream(v1.connect_get_namespaced_pod_exec,
                                      pod.metadata.name,
                                      pod.metadata.namespace,
                                      command=command,
                                      container=containerName,
                                      stderr=False,
                                      stdin=False,
                                      stdout=True,
                                      tty=False)
                    except ApiException as e:
                        cls._cleanEnvironment()
                        raise MonitoringError(pod.metadata.name)

            #Start tests
            time.sleep(3)
            if info['test']:
                try:
                    testModule = importlib.import_module(info['test'])
                    testModule.runTest()
                except:
                    cls._cleanEnvironment()
                    raise TestError('')

            #Wait until monitoring is finished
            time.sleep(info['time'] + 5)
            print('   Monitoring completed')

            #Save on local host the packets
            pods = v1.list_pod_for_all_namespaces(watch=False)
            for pod in pods.items:
                if pod.spec.hostname in nodes or (
                        pod.metadata.annotations
                        and 'archMinerName' in pod.metadata.annotations
                        and pod.metadata.annotations['archMinerName']
                        in nodes) and pod.status.phase == 'Running':
                    fileName = pod.spec.hostname if pod.spec.hostname in nodes else pod.metadata.annotations[
                        'archMinerName']
                    remoteFilePath = join('home/dump', fileName + '.json')
                    localFilePath = join(cls.config['monitoringFiles'],
                                         fileName + '.json')
                    os.system('kubectl cp -c ' + containerName + ' ' +
                              pod.metadata.namespace + '/' +
                              pod.metadata.name + ':' + remoteFilePath + ' ' +
                              localFilePath)

            #Create edges
            print('   Analyzing packets...')
            try:
                files = cls._listFiles(cls.config['monitoringFiles'])
            except WrongFolderError:
                cls._cleanEnvironment()
                raise
            for monitoringFilePath in files:
                if os.path.getsize(monitoringFilePath) == 0:
                    continue
                srcNodeName = monitoringFilePath.split('/')[-1].replace(
                    '.json', '')
                with open(monitoringFilePath, 'rb') as monitoringFile:
                    for packet in ijson.items(monitoringFile, 'item'):
                        if cls._isOutgoingPacket(packet, nodes, srcNodeName):
                            cls._createEdge(packet, nodes, srcNodeName)

            #Create communications
            commFactory = ConcreteCommunicationFactory()
            for monitoringFilePath in files:
                if os.path.getsize(monitoringFilePath) == 0:
                    continue
                srcNodeName = monitoringFilePath.split('/')[-1].replace(
                    '.json', '')
                with open(monitoringFilePath, 'rb') as monitoringFile:
                    for packet in ijson.items(monitoringFile, 'item'):
                        if cls._isOutgoingPacket(packet, nodes, srcNodeName):
                            cls._createCommunication(packet, nodes,
                                                     commFactory, srcNodeName)

            for newNode in newNodes:
                edges = nodes[newNode['controller']].getEdges(
                    Direction.OUTGOING)
                if not edges:
                    nodes.pop(newNode['controller'], None)
                    for service in newNode['services']:
                        nodes.pop(service, None)

        finally:
            cls._cleanEnvironment()