コード例 #1
0
ファイル: job.py プロジェクト: rasmunk/corc
def _required_delete_job_arguments(cluster, job):

    required_cluster_fields = {"name": str}
    validate_dict_values(cluster, required_cluster_fields, verbose=True, throw=True)

    required_job_fields = {"meta": dict}
    validate_dict_values(job, required_job_fields, verbose=True, throw=True)

    either_meta_fields = {"name": str, "all": str}
    validate_either_values(job["meta"], either_meta_fields, verbose=True, throw=True)
コード例 #2
0
ファイル: job.py プロジェクト: rasmunk/corc
def _required_get_result_arguments(job, storage, s3):

    required_job_fields = {"meta": dict}
    validate_dict_values(job, required_job_fields, verbose=True, throw=True)

    required_meta_fields = {"name": str}
    validate_dict_values(job["meta"], required_meta_fields, verbose=True, throw=True)

    required_storage_fields = {"endpoint": str}
    validate_dict_values(storage, required_storage_fields, verbose=True, throw=True)
    validate_dict_values(s3, required_s3_fields, verbose=True, throw=True)
コード例 #3
0
ファイル: job.py プロジェクト: rasmunk/corc
def run(provider, provider_kwargs, cluster=None, job=None, storage=None):
    # TODO, temp fix
    s3 = storage["s3"]
    _validate_fields(
        provider=provider_kwargs, cluster=cluster, job=job, storage=storage, s3=s3
    )
    _required_run_arguments(provider_kwargs, cluster, job, storage, s3)

    response = {"job": {}}
    if "name" not in job["meta"] or not job["meta"]["name"]:
        since_epoch = int(time.time())
        job["meta"]["name"] = "{}-{}".format(JOB_DEFAULT_NAME, since_epoch)

    if "bucket_name" not in s3 or not s3["bucket_name"]:
        s3["bucket_name"] = job["meta"]["name"]

    container_engine_client = new_client(
        ContainerEngineClient,
        composite_class=ContainerEngineClientCompositeOperations,
        name=provider_kwargs["profile"]["name"],
    )

    compute_cluster = get_cluster_by_name(
        container_engine_client,
        provider_kwargs["profile"]["compartment_id"],
        name=cluster["name"],
    )

    if not compute_cluster:
        response["msg"] = "Failed to find a cluster with name: {}".format(
            cluster["name"]
        )
        return False, response

    refreshed = refresh_kube_config(
        compute_cluster.id, name=provider_kwargs["profile"]["name"]
    )
    if not refreshed:
        response["msg"] = "Failed to refresh the kubernetes config"
        return False, response

    node_manager = NodeManager()
    if not node_manager.discover():
        response["msg"] = "Failed to discover any nodes to schedule jobs on"
        return False, response

    node = node_manager.select()
    if not node:
        response["msg"] = "Failed to select a node to schedule on"
        return False, response

    # Ensure we have the newest config
    scheduler = KubenetesScheduler()

    jobio_args = [
        "jobio",
        "run",
    ]
    jobio_args.extend(job["commands"])
    jobio_args.extend(["--job-meta-name", job["meta"]["name"]])

    if "output_path" in job:
        jobio_args.extend(
            ["--job-output-path", job["output_path"],]
        )

    if "capture" in job and job["capture"]:
        jobio_args.append("--job-capture")

    if "debug" in job["meta"]:
        jobio_args.append("--job-meta-debug")

    if "env_override" in job["meta"]:
        jobio_args.append("--job-meta-env-override")

    # Maintained by the pod
    volumes = []
    # Maintained by the container
    volume_mounts = []
    # Environment to pass to the container
    envs = []

    # Prepare config for the scheduler
    scheduler_config = {}

    if storage and storage["enable"]:
        validate_dict_values(storage, required_storage_fields, throw=True)
        jobio_args.append("--storage-enable")

        # Means that results should be exported to the specified storage
        # Create kubernetes secrets
        core_api = client.CoreV1Api()
        # storage_api = client.StorageV1Api()

        # Storage endpoint credentials secret (Tied to a profile and job)
        secret_profile_name = "{}-{}-{}".format(
            STORAGE_CREDENTIALS_NAME, s3["name"], job["meta"]["name"]
        )
        try:
            storage_credentials_secret = core_api.read_namespaced_secret(
                secret_profile_name, KUBERNETES_NAMESPACE
            )
        except ApiException:
            storage_credentials_secret = None

        # volumes
        secret_volume_source = V1SecretVolumeSource(secret_name=secret_profile_name)
        secret_volume = V1Volume(name=secret_profile_name, secret=secret_volume_source)
        volumes.append(secret_volume)

        # Where the storage credentials should be mounted
        # in the compute unit
        secret_mount = V1VolumeMount(
            name=secret_profile_name,
            mount_path=storage["credentials_path"],
            read_only=True,
        )
        volume_mounts.append(secret_mount)

        if s3:
            validate_dict_values(s3, required_staging_values, verbose=True, throw=True)
            jobio_args.append("--storage-s3")
            # S3 storage
            # Look for s3 credentials and config files
            s3_config = load_aws_config(
                s3["config_file"], s3["credentials_file"], profile_name=s3["name"],
            )
            s3_config["endpoint_url"] = storage["endpoint"]

            if not storage_credentials_secret:
                secret_data = dict(
                    aws_access_key_id=s3_config["aws_access_key_id"],
                    aws_secret_access_key=s3_config["aws_secret_access_key"],
                )
                secret_metadata = V1ObjectMeta(name=secret_profile_name)
                secrets_config = dict(metadata=secret_metadata, string_data=secret_data)
                scheduler_config.update(dict(secret_kwargs=secrets_config))

            # If `access_key`
            # TODO, unify argument endpoint, with s3 config endpoint'
            s3_resource = boto3.resource("s3", **s3_config)

            bucket = bucket_exists(s3_resource.meta.client, s3["bucket_name"])
            if not bucket:
                bucket = s3_resource.create_bucket(
                    Bucket=s3["bucket_name"],
                    CreateBucketConfiguration={
                        "LocationConstraint": s3_config["region_name"]
                    },
                )

            if "upload_path" in storage and storage["upload_path"]:
                # Upload local path to the bucket as designated input for the job
                uploaded = None
                if os.path.exists(storage["upload_path"]):
                    if os.path.isdir(storage["upload_path"]):
                        uploaded = upload_directory_to_s3(
                            s3_resource.meta.client,
                            storage["upload_path"],
                            s3["bucket_name"],
                            s3_prefix=s3["bucket_input_prefix"],
                        )
                    elif os.path.isfile(storage["upload_path"]):
                        s3_path = os.path.basename(storage["upload_path"])
                        if s3["bucket_input_prefix"]:
                            s3_path = os.path.join(s3["bucket_input_prefix"], s3_path)
                        # Upload
                        uploaded = upload_to_s3(
                            s3_resource.meta.client,
                            storage["upload_path"],
                            s3_path,
                            s3["bucket_name"],
                        )

                if not uploaded:
                    response[
                        "msg"
                    ] = "Failed to local path: {} in the upload folder to s3".format(
                        storage["upload_path"]
                    )
                    return False, response

            jobio_args.extend(
                [
                    "--s3-region-name",
                    s3_config["region_name"],
                    "--storage-secrets-dir",
                    storage["credentials_path"],
                    "--storage-endpoint",
                    storage["endpoint"],
                    "--storage-input-path",
                    storage["input_path"],
                    "--storage-output-path",
                    storage["output_path"],
                    "--bucket-name",
                    s3["bucket_name"],
                    "--bucket-input-prefix",
                    s3["bucket_input_prefix"],
                    "--bucket-output-prefix",
                    s3["bucket_output_prefix"],
                ]
            )

            # Provide a way to allow pod specific output prefixes
            field_ref = client.V1ObjectFieldSelector(field_path="metadata.name")
            env_var_source = client.V1EnvVarSource(field_ref=field_ref)
            # HACK, Set the output prefix in the bucket to the name of the pod
            env_output_prefix = client.V1EnvVar(
                name="JOBIO_BUCKET_OUTPUT_PREFIX", value_from=env_var_source
            )
            envs.append(env_output_prefix)

    if scheduler_config:
        prepared = scheduler.prepare(**scheduler_config)
        if not prepared:
            response["msg"] = "Failed to prepare the scheduler"
            return False, response

    container_spec = dict(
        name=job["meta"]["name"],
        image=cluster["image"],
        env=envs,
        args=jobio_args,
        volume_mounts=volume_mounts,
    )

    # If the working directory does not exist inside the container
    # It will set permissions where it will be unable to expand the
    # s3 bucket if the user doesn't have root permissions
    if "working_dir" in job:
        container_spec.update({"working_dir": job["working_dir"]})

    # If the container requires a specific set of resources
    resources = {}
    if "min_cores" in job:
        resources["requests"] = {"cpu": job["min_cores"]}
    if "max_cores" in job:
        resources["limits"] = {"cpu": job["max_cores"]}
    if "min_memory" in job:
        resources["requests"].update({"memory": job["min_memory"]})
    if "max_memory" in job:
        resources["limits"].update({"memory": job["max_memory"]})

    if resources:
        resource_req = client.V1ResourceRequirements(**resources)
        container_spec.update({"resources": resource_req})

    # args=jobio_args,
    pod_spec = dict(node_name=node.metadata.name, volumes=volumes, dns_policy="Default")

    job_spec = dict(
        backoff_limit=2,
        parallelism=job["meta"]["num_parallel"],
        completions=job["meta"]["num_jobs"],
    )

    task = dict(
        container_kwargs=container_spec,
        pod_spec_kwargs=pod_spec,
        job_spec_kwargs=job_spec,
    )

    job = scheduler.submit(**task)
    if not job:
        response["msg"] = "Failed to submit the job"
        return False, response

    response["job"] = job
    response["msg"] = "Job submitted"
    return True, response
コード例 #4
0
ファイル: job.py プロジェクト: rasmunk/corc
def _required_run_arguments(provider_kwargs, cluster, job, storage, s3):
    validate_dict_values(
        provider_kwargs["profile"], valid_profile_config, verbose=True, throw=True
    )
    validate_dict_values(cluster, required_run_cluster_fields, verbose=True, throw=True)
    validate_dict_values(job, required_run_job_fields, verbose=True, throw=True)
コード例 #5
0
ファイル: cluster.py プロジェクト: rasmunk/corc
    def validate_options(cls, options):
        if not isinstance(options, dict):
            raise TypeError("options is not a dictionary")

        validate_dict_fields(
            options["profile"], valid_profile_config, verbose=True, throw=True
        )
        validate_dict_values(
            options["profile"], valid_profile_config, verbose=True, throw=True
        )

        validate_dict_fields(
            options["cluster"], valid_cluster_config, verbose=True, throw=True
        )
        required_cluster_fields = {"name": str}
        validate_dict_values(
            options["cluster"], required_cluster_fields, verbose=True, throw=True
        )

        required_node_fields = {
            "availability_domain": str,
            "name": str,
            "size": int,
            "node_shape": str,
            "image": (str, dict),
        }

        validate_dict_fields(
            options["cluster"]["node"],
            valid_cluster_node_config,
            verbose=True,
            throw=True,
        )
        validate_dict_values(
            options["cluster"]["node"], required_node_fields, verbose=True, throw=True
        )

        required_vcn_fields = {"dns_label": str, "cidr_block": str}
        validate_dict_fields(options["vcn"], valid_vcn_config, verbose=True, throw=True)
        validate_dict_values(
            options["vcn"], required_vcn_fields, verbose=True, throw=True
        )

        required_subnet_fields = {"dns_label": str, "cidr_block": str}
        validate_dict_fields(
            options["subnet"], valid_subnet_config, verbose=True, throw=True
        )
        validate_dict_values(
            options["subnet"], required_subnet_fields, verbose=True, throw=True
        )

        required_internetgateway_fields = {"is_enabled": bool}
        validate_dict_fields(
            options["internetgateway"],
            valid_internet_gateway_config,
            verbose=True,
            throw=True,
        )
        validate_dict_values(
            options["internetgateway"],
            required_internetgateway_fields,
            verbose=True,
            throw=True,
        )

        required_route_table_fields = {"routerules": list}
        validate_dict_fields(
            options["routetable"], valid_route_table_config, verbose=True, throw=True
        )
        validate_dict_values(
            options["routetable"], required_route_table_fields, verbose=True, throw=True
        )

        required_routerules_fields = {
            "destination": str,
            "destination_type": str,
        }

        # Check each routerule
        if isinstance(options["routetable"]["routerules"], list):
            for route_rule in options["routetable"]["routerules"]:
                validate_dict_fields(
                    route_rule, valid_route_rule_config, verbose=True, throw=True,
                )
                validate_dict_values(
                    route_rule, required_routerules_fields, verbose=True, throw=True,
                )
        else:
            validate_dict_fields(
                options["routetable"]["routerules"],
                valid_route_rule_config,
                verbose=True,
                throw=True,
            )
            validate_dict_values(
                options["routetable"]["routerules"],
                required_routerules_fields,
                verbose=True,
                throw=True,
            )