def test_resource_tags():
    @solid(
        tags={
            K8S_RESOURCE_REQUIREMENTS_KEY: {
                'requests': {'cpu': '250m', 'memory': '64Mi'},
                'limits': {'cpu': '500m', 'memory': '2560Mi'},
            }
        }
    )
    def resource_tags_solid(_):
        pass

    resources = get_k8s_resource_requirements(resource_tags_solid.tags)

    assert resources == json.loads(json.dumps(resources))
    assert resources['requests']['cpu'] == '250m'
    assert resources['requests']['memory'] == '64Mi'
    assert resources['limits']['cpu'] == '500m'
    assert resources['limits']['memory'] == '2560Mi'

    @solid
    def no_resource_tags_solid(_):
        pass

    no_resources = get_k8s_resource_requirements(no_resource_tags_solid.tags)
    assert no_resources == None
Beispiel #2
0
def _submit_task_k8s_job(app, pipeline_context, step, queue, priority):
    resources = get_k8s_resource_requirements(step.tags)
    task = create_k8s_job_task(app)

    recon_repo = pipeline_context.pipeline.get_reconstructable_repository()

    task_signature = task.si(
        instance_ref_dict=pipeline_context.instance.get_ref().to_dict(),
        step_keys=[step.key],
        run_config=pipeline_context.pipeline_run.run_config,
        mode=pipeline_context.pipeline_run.mode,
        repo_name=recon_repo.get_definition().name,
        repo_location_name=pipeline_context.executor.repo_location_name,
        run_id=pipeline_context.pipeline_run.run_id,
        job_config_dict=pipeline_context.executor.job_config.to_dict(),
        job_namespace=pipeline_context.executor.job_namespace,
        resources=resources,
        retries_dict=pipeline_context.executor.retries.for_inner_plan().to_config(),
        load_incluster_config=pipeline_context.executor.load_incluster_config,
        kubeconfig_file=pipeline_context.executor.kubeconfig_file,
    )

    return task_signature.apply_async(
        priority=priority,
        queue=queue,
        routing_key='{queue}.execute_step_k8s_job'.format(queue=queue),
    )
Beispiel #3
0
def _submit_task_k8s_job(app, pipeline_context, step, queue, priority):
    from .tasks import create_k8s_job_task

    resources = get_k8s_resource_requirements(step.tags)
    task = create_k8s_job_task(app)

    task_signature = task.si(
        instance_ref_dict=pipeline_context.instance.get_ref().to_dict(),
        step_keys=[step.key],
        environment_dict=pipeline_context.pipeline_run.environment_dict,
        mode=pipeline_context.pipeline_run.mode,
        pipeline_name=pipeline_context.pipeline_run.pipeline_name,
        run_id=pipeline_context.pipeline_run.run_id,
        job_config_dict=pipeline_context.executor_config.job_config.to_dict(),
        job_namespace=pipeline_context.executor_config.job_namespace,
        resources=resources,
        load_incluster_config=pipeline_context.executor_config.load_incluster_config,
        kubeconfig_file=pipeline_context.executor_config.kubeconfig_file,
    )

    return task_signature.apply_async(
        priority=priority,
        queue=queue,
        routing_key='{queue}.execute_step_k8s_job'.format(queue=queue),
    )
Beispiel #4
0
def test_valid_job_format_with_resources(run_launcher):
    docker_image = test_project_docker_image()

    run_config = load_yaml_from_path(
        os.path.join(test_project_environments_path(), 'env.yaml'))
    pipeline_name = 'demo_pipeline'
    run = PipelineRun(pipeline_name=pipeline_name, run_config=run_config)

    tags = validate_tags({
        K8S_RESOURCE_REQUIREMENTS_KEY: ({
            'requests': {
                'cpu': '250m',
                'memory': '64Mi'
            },
            'limits': {
                'cpu': '500m',
                'memory': '2560Mi'
            },
        })
    })
    resources = get_k8s_resource_requirements(tags)
    job_name = 'dagster-run-%s' % run.run_id
    pod_name = 'dagster-run-%s' % run.run_id
    job = construct_dagster_k8s_job(
        job_config=run_launcher.job_config,
        command=['dagster-graphql'],
        args=[
            '-p',
            'executeRunInProcess',
            '-v',
            seven.json.dumps({'runId': run.run_id}),
        ],
        job_name=job_name,
        resources=resources,
        pod_name=pod_name,
        component='runmaster',
    )

    assert (yaml.dump(
        remove_none_recursively(job.to_dict()),
        default_flow_style=False).strip() == EXPECTED_JOB_SPEC.format(
            run_id=run.run_id,
            job_image=docker_image,
            image_pull_policy=image_pull_policy(),
            dagster_version=dagster_version,
            resources='''
        resources:
          limits:
            cpu: 500m
            memory: 2560Mi
          requests:
            cpu: 250m
            memory: 64Mi''',
        ).strip())
Beispiel #5
0
def test_resource_tags():
    @solid(
        tags={
            K8S_RESOURCE_REQUIREMENTS_KEY: {
                'requests': {
                    'cpu': '250m',
                    'memory': '64Mi'
                },
                'limits': {
                    'cpu': '500m',
                    'memory': '2560Mi'
                },
            }
        })
    def boop(_):
        pass

    resources = get_k8s_resource_requirements(boop.tags)
    assert resources.requests['cpu'] == '250m'
    assert resources.requests['memory'] == '64Mi'
    assert resources.limits['cpu'] == '500m'
    assert resources.limits['memory'] == '2560Mi'
Beispiel #6
0
    def launch_run(self, instance, run, external_pipeline):
        check.inst_param(run, 'run', PipelineRun)

        job_name = get_job_name_from_run_id(run.run_id)
        pod_name = job_name

        exc_config = _get_validated_celery_k8s_executor_config(run.run_config)

        job_config = DagsterK8sJobConfig(
            dagster_home=self.dagster_home,
            instance_config_map=self.instance_config_map,
            postgres_password_secret=self.postgres_password_secret,
            job_image=exc_config.get('job_image'),
            image_pull_policy=exc_config.get('image_pull_policy'),
            image_pull_secrets=exc_config.get('image_pull_secrets'),
            service_account_name=exc_config.get('service_account_name'),
            env_config_maps=exc_config.get('env_config_maps'),
            env_secrets=exc_config.get('env_secrets'),
        )

        resources = get_k8s_resource_requirements(
            frozentags(external_pipeline.tags))

        job = construct_dagster_graphql_k8s_job(
            job_config,
            args=[
                '-p',
                'executeRunInProcess',
                '-v',
                seven.json.dumps({
                    'runId':
                    run.run_id,
                    'repositoryName':
                    external_pipeline.handle.repository_name,
                    'repositoryLocationName':
                    external_pipeline.handle.location_name,
                }),
                '--remap-sigterm',
            ],
            job_name=job_name,
            pod_name=pod_name,
            component='runmaster',
            resources=resources,
        )

        job_namespace = exc_config.get('job_namespace')

        api = kubernetes.client.BatchV1Api()
        api.create_namespaced_job(body=job, namespace=job_namespace)

        self._instance.report_engine_event(
            'Kubernetes runmaster job launched',
            run,
            EngineEventData([
                EventMetadataEntry.text(job_name, 'Kubernetes Job name'),
                EventMetadataEntry.text(pod_name, 'Kubernetes Pod name'),
                EventMetadataEntry.text(job_namespace, 'Kubernetes Namespace'),
                EventMetadataEntry.text(run.run_id, 'Run ID'),
            ]),
            cls=CeleryK8sRunLauncher,
        )
        return run
Beispiel #7
0
    def launch_run(self, instance, run, external_pipeline):
        check.inst_param(run, 'run', PipelineRun)

        job_name = get_job_name_from_run_id(run.run_id)
        pod_name = job_name
        exc_config = _get_validated_celery_k8s_executor_config(run.run_config)

        job_image = None
        pipeline_origin = None
        env_vars = None
        if isinstance(external_pipeline.get_origin(),
                      PipelineGrpcServerOrigin):
            if exc_config.get('job_image'):
                raise DagsterInvariantViolationError(
                    'Cannot specify job_image in executor config when loading pipeline '
                    'from GRPC server.')

            repository_location_handle = (
                external_pipeline.repository_handle.repository_location_handle)

            if not isinstance(repository_location_handle,
                              GrpcServerRepositoryLocationHandle):
                raise DagsterInvariantViolationError(
                    'Expected RepositoryLocationHandle to be of type '
                    'GrpcServerRepositoryLocationHandle but found type {}'.
                    format(type(repository_location_handle)))

            job_image = repository_location_handle.get_current_image()
            env_vars = {'DAGSTER_CURRENT_IMAGE': job_image}

            repository_name = external_pipeline.repository_handle.repository_name
            pipeline_origin = PipelinePythonOrigin(
                pipeline_name=external_pipeline.name,
                repository_origin=repository_location_handle.
                get_repository_python_origin(repository_name),
            )

        else:
            job_image = exc_config.get('job_image')
            if not job_image:
                raise DagsterInvariantViolationError(
                    'Cannot find job_image in celery-k8s executor config.')
            pipeline_origin = external_pipeline.get_origin()

        job_config = DagsterK8sJobConfig(
            dagster_home=self.dagster_home,
            instance_config_map=self.instance_config_map,
            postgres_password_secret=self.postgres_password_secret,
            job_image=check.str_param(job_image, 'job_image'),
            image_pull_policy=exc_config.get('image_pull_policy'),
            image_pull_secrets=exc_config.get('image_pull_secrets'),
            service_account_name=exc_config.get('service_account_name'),
            env_config_maps=exc_config.get('env_config_maps'),
            env_secrets=exc_config.get('env_secrets'),
        )

        resources = get_k8s_resource_requirements(
            frozentags(external_pipeline.tags))

        from dagster.cli.api import ExecuteRunArgs

        input_json = serialize_dagster_namedtuple(
            # depends on DagsterInstance.get() returning the same instance
            # https://github.com/dagster-io/dagster/issues/2757
            ExecuteRunArgs(
                pipeline_origin=pipeline_origin,
                pipeline_run_id=run.run_id,
                instance_ref=None,
            ))

        job = construct_dagster_k8s_job(
            job_config,
            command=['dagster'],
            args=['api', 'execute_run_with_structured_logs', input_json],
            job_name=job_name,
            pod_name=pod_name,
            component='run_coordinator',
            resources=resources,
            env_vars=env_vars,
        )

        job_namespace = exc_config.get('job_namespace')

        api = kubernetes.client.BatchV1Api()
        api.create_namespaced_job(body=job, namespace=job_namespace)

        self._instance.report_engine_event(
            'Kubernetes run_coordinator job launched',
            run,
            EngineEventData([
                EventMetadataEntry.text(job_name, 'Kubernetes Job name'),
                EventMetadataEntry.text(pod_name, 'Kubernetes Pod name'),
                EventMetadataEntry.text(job_namespace, 'Kubernetes Namespace'),
                EventMetadataEntry.text(run.run_id, 'Run ID'),
            ]),
            cls=CeleryK8sRunLauncher,
        )
        return run
Beispiel #8
0
    def launch_run(self, instance, run, external_pipeline):
        check.inst_param(run, 'run', PipelineRun)

        job_name = get_job_name_from_run_id(run.run_id)
        pod_name = job_name

        exc_config = _get_validated_celery_k8s_executor_config(run.run_config)

        job_config = DagsterK8sJobConfig(
            dagster_home=self.dagster_home,
            instance_config_map=self.instance_config_map,
            postgres_password_secret=self.postgres_password_secret,
            job_image=exc_config.get('job_image'),
            image_pull_policy=exc_config.get('image_pull_policy'),
            image_pull_secrets=exc_config.get('image_pull_secrets'),
            service_account_name=exc_config.get('service_account_name'),
            env_config_maps=exc_config.get('env_config_maps'),
            env_secrets=exc_config.get('env_secrets'),
        )

        resources = get_k8s_resource_requirements(frozentags(external_pipeline.tags))

        from dagster.cli.api import ExecuteRunArgs

        input_json = serialize_dagster_namedtuple(
            # depends on DagsterInstance.get() returning the same instance
            # https://github.com/dagster-io/dagster/issues/2757
            ExecuteRunArgs(
                pipeline_origin=external_pipeline.get_origin(),
                pipeline_run_id=run.run_id,
                instance_ref=None,
            )
        )

        job = construct_dagster_k8s_job(
            job_config,
            command=['dagster'],
            args=['api', 'execute_run_with_structured_logs', input_json],
            job_name=job_name,
            pod_name=pod_name,
            component='runmaster',
            resources=resources,
        )

        job_namespace = exc_config.get('job_namespace')

        api = kubernetes.client.BatchV1Api()
        api.create_namespaced_job(body=job, namespace=job_namespace)

        self._instance.report_engine_event(
            'Kubernetes runmaster job launched',
            run,
            EngineEventData(
                [
                    EventMetadataEntry.text(job_name, 'Kubernetes Job name'),
                    EventMetadataEntry.text(pod_name, 'Kubernetes Pod name'),
                    EventMetadataEntry.text(job_namespace, 'Kubernetes Namespace'),
                    EventMetadataEntry.text(run.run_id, 'Run ID'),
                ]
            ),
            cls=CeleryK8sRunLauncher,
        )
        return run