예제 #1
0
    def __init__(self,
                 dag,
                 env_vars={},
                 input_dirs=[],
                 parallel_id=None,
                 execution_timeout=execution_timeout,
                 *args,
                 **kwargs
                 ):

        envs = {
            "INPUT_DIRS": ";".join(str(dir) for dir in input_dirs),
            "nnUNet_raw_data_base": "/input",
            "nnUNet_preprocessed": "/input/nnUNet_preprocessed",
            "RESULTS_FOLDER": "/models",
        }
        env_vars.update(envs)

        data_dir = os.getenv('DATADIR', "")
        models_dir = os.path.join(os.path.dirname(data_dir), "models")

        volume_mounts = []
        volumes = []

        volume_mounts.append(VolumeMount(
            'models', mount_path='/models', sub_path=None, read_only=False))
        volume_config = {
            'hostPath':
            {
                'type': 'DirectoryOrCreate',
                'path': models_dir
            }
        }
        volumes.append(Volume(name='models', configs=volume_config))

        super(NnUnetOperator, self).__init__(
            dag=dag,
            image="{}{}/nnunet-predict:0.1.0".format(default_registry, default_project),
            name="nnunet-predict",
            parallel_id=parallel_id,
            image_pull_secrets=["registry-secret"],
            volumes=volumes,
            volume_mounts=volume_mounts,
            execution_timeout=execution_timeout,
            ram_mem_mb=15000,
            ram_mem_mb_lmt=30000,
            gpu_mem_mb=5000,
            env_vars=env_vars,
            *args,
            **kwargs
        )
예제 #2
0
    def __init__(self,
                 dag,
                 env_vars={},
                 registry_url="dktk-jip-registry.dkfz.de",
                 registry_project="kaapana-public",
                 model_version="0.1.0",
                 execution_timeout=execution_timeout,
                 *args,
                 **kwargs):

        self.registry_url = registry_url or default_registry
        self.registry_project = registry_project or default_project
        self.model_version = model_version
        self.af_models_dir = "/root/airflow/models"

        host_models_dir = os.path.join(
            os.path.dirname(os.getenv('DATADIR', "")), "models")
        envs = {
            "MODELDIR": "/models_mount",
        }
        env_vars.update(envs)

        volume_mounts = []
        volumes = []

        volume_mounts.append(
            VolumeMount('models',
                        mount_path='/models_mount',
                        sub_path=None,
                        read_only=False))
        volume_config = {
            'hostPath': {
                'type': 'DirectoryOrCreate',
                'path': host_models_dir
            }
        }
        volumes.append(Volume(name='models', configs=volume_config))

        super().__init__(dag=dag,
                         image=None,
                         name="get-task-model",
                         image_pull_secrets=["registry-secret"],
                         volumes=volumes,
                         volume_mounts=volume_mounts,
                         execution_timeout=execution_timeout,
                         env_vars=env_vars,
                         ram_mem_mb=50,
                         *args,
                         **kwargs)
예제 #3
0
    def __init__(self,
                 dag,
                 task_id=None,
                 zip_file=False,
                 env_vars={},
                 execution_timeout=execution_timeout,
                 *args,
                 **kwargs):

        envs = {"MODELDIR": "/models", "ZIP_FILE": str(zip_file)}
        env_vars.update(envs)

        if task_id is not None:
            env_vars["TASK"] = task_id

        data_dir = os.getenv('DATADIR', "")
        models_dir = os.path.join(os.path.dirname(data_dir), "models")

        volume_mounts = []
        volumes = []

        volume_mounts.append(
            VolumeMount('models',
                        mount_path='/models',
                        sub_path=None,
                        read_only=False))
        volume_config = {
            'hostPath': {
                'type': 'DirectoryOrCreate',
                'path': models_dir
            }
        }
        volumes.append(Volume(name='models', configs=volume_config))

        super().__init__(dag=dag,
                         image="{}{}/nnunet-get-models:0.1.1-vdev".format(
                             default_registry, default_project),
                         name="get-task-model",
                         image_pull_secrets=["registry-secret"],
                         volumes=volumes,
                         volume_mounts=volume_mounts,
                         execution_timeout=execution_timeout,
                         env_vars=env_vars,
                         enable_proxy=True,
                         host_network=True,
                         ram_mem_mb=1000,
                         *args,
                         **kwargs)
예제 #4
0
    def pre_execute(self, context):
        print("Starting moule LaunchPodOperator...")
        print(context)
        conf = context['dag_run'].conf
        print(conf)
        self.port = int(conf['port'])
        self.ingress_path = conf['ingress_path']
        self.image = conf['image']

        if 'image_pull_secrets' in conf:
            self.image_pull_secrets.append(conf['image_pull_secrets'])

        envs = {
            "INGRESS_PATH": self.ingress_path,
        }

        self.env_vars.update(envs)

        if 'envs' in conf:
            self.env_vars.update(conf['envs'])

        if 'args' in conf:
            self.arguments = conf['args']

        if 'cmds' in conf:
            self.cmds = conf['cmds']

        if 'annotations' in conf:
            self.annotations = conf['annotations']

        self.volume_mounts = []
        if 'volume_mounts' in conf:
            print('writing volume_mounts')
            for volume_mount in conf['volume_mounts']:
                print('Writing volume_mount', volume_mount)
                self.volume_mounts.append(VolumeMount(**volume_mount))

        self.volumes = []
        if 'volumes' in conf:
            for volume in conf['volumes']:
                print('as', volume)
                self.volumes.append(Volume(**volume))
예제 #5
0
    def __init__(
            self,
            dag,
            name,
            image=None,
            # Directories
            operator_out_dir=None,
            input_operator=None,
            # Airflow
            task_id=None,
            parallel_id=None,
            trigger_rule=TriggerRule.ALL_SUCCESS,
            ram_mem_mb=500,
            ram_mem_mb_lmt=None,
            cpu_millicores=None,
            cpu_millicores_lmt=None,
            gpu_mem_mb=None,
            gpu_mem_mb_lmt=None,
            retries=1,
            retry_delay=timedelta(seconds=60),
            priority_weight=1,
            execution_timeout=timedelta(minutes=90),
            task_concurrency=None,
            manage_cache=None,
            # Other stuff
            cmds=None,
            arguments=None,
            env_vars=None,
            image_pull_secrets=None,
            startup_timeout_seconds=120,
            namespace='flow-jobs',
            image_pull_policy='Always',
            #  image_pull_policy='IfNotPresent',
            training_operator=False,
            volume_mounts=None,
            volumes=None,
            pod_resources=None,
            enable_proxy=False,
            host_network=False,
            in_cluster=False,
            cluster_context=None,
            labels=None,
            get_logs=True,
            annotations=None,
            affinity=None,
            config_file=None,
            xcom_push=False,
            node_selectors=None,
            secrets=None,
            kind="Pod",
            pool=None,
            pool_slots=None,
            api_version="v1",
            *args,
            **kwargs):

        KaapanaBaseOperator.set_defaults(self,
                                         name=name,
                                         task_id=task_id,
                                         operator_out_dir=operator_out_dir,
                                         input_operator=input_operator,
                                         parallel_id=parallel_id,
                                         trigger_rule=trigger_rule,
                                         pool=pool,
                                         pool_slots=pool_slots,
                                         ram_mem_mb=ram_mem_mb,
                                         ram_mem_mb_lmt=ram_mem_mb_lmt,
                                         cpu_millicores=cpu_millicores,
                                         cpu_millicores_lmt=cpu_millicores_lmt,
                                         gpu_mem_mb=gpu_mem_mb,
                                         gpu_mem_mb_lmt=gpu_mem_mb_lmt,
                                         manage_cache=manage_cache)

        # Airflow
        self.retries = retries
        self.priority_weight = priority_weight
        self.execution_timeout = execution_timeout
        self.task_concurrency = task_concurrency
        self.retry_delay = retry_delay

        self.training_operator = training_operator

        # Kubernetes
        self.image = image
        self.env_vars = env_vars or {}
        self.namespace = namespace
        self.cmds = cmds or []
        self.arguments = arguments or []
        self.labels = labels or {}
        self.startup_timeout_seconds = startup_timeout_seconds
        self.volume_mounts = volume_mounts or []
        self.volumes = volumes or []
        self.image_pull_secrets = image_pull_secrets or []
        self.in_cluster = in_cluster
        self.cluster_context = cluster_context
        self.get_logs = get_logs
        self.image_pull_policy = image_pull_policy
        self.node_selectors = node_selectors or {}
        self.annotations = annotations or {}
        self.affinity = affinity or {}
        self.xcom_push = xcom_push
        self.pod_resources = pod_resources or None
        self.config_file = config_file
        self.api_version = api_version
        self.secrets = secrets
        self.kind = kind
        self.data_dir = os.getenv('DATADIR', "")
        self.result_message = None
        self.host_network = host_network
        self.enable_proxy = enable_proxy

        self.volume_mounts.append(
            VolumeMount('dcmdata',
                        mount_path='/data',
                        sub_path=None,
                        read_only=False))
        volume_config = {
            'hostPath': {
                'type': 'DirectoryOrCreate',
                'path': self.data_dir
            }
        }
        self.volumes.append(Volume(name='dcmdata', configs=volume_config))

        if self.training_operator:
            self.volume_mounts.append(
                VolumeMount('tensorboard',
                            mount_path='/tensorboard',
                            sub_path=None,
                            read_only=False))
            tb_config = {
                'hostPath': {
                    'type': 'DirectoryOrCreate',
                    'path': os.path.join(self.data_dir, "tensorboard")
                }
            }
            self.volumes.append(Volume(name='tensorboard', configs=tb_config))

        if self.pod_resources is None:
            pod_resources = PodResources(
                request_cpu="{}m".format(self.cpu_millicores)
                if self.cpu_millicores != None else None,
                limit_cpu="{}m".format(self.cpu_millicores + 100)
                if self.cpu_millicores != None else None,
                request_memory="{}Mi".format(self.ram_mem_mb),
                limit_memory="{}Mi".format(
                    self.ram_mem_mb_lmt if self.
                    ram_mem_mb_lmt is not None else self.ram_mem_mb + 100),
                limit_gpu=1 if self.gpu_mem_mb is not None else None)
            self.pod_resources = pod_resources

        envs = {
            "WORKFLOW_DIR": str(WORKFLOW_DIR),
            "BATCH_NAME": str(BATCH_NAME),
            "OPERATOR_OUT_DIR": str(self.operator_out_dir),
            "OPERATOR_IN_DIR": str(self.operator_in_dir),
            "BATCHES_INPUT_DIR": "/{}/{}".format(WORKFLOW_DIR, BATCH_NAME)
        }

        if http_proxy is not None and http_proxy != "" and self.enable_proxy:
            envs.update({
                "http_proxy": http_proxy,
                "https_proxy": http_proxy,
                "HTTP_PROXY": http_proxy,
                "HTTPS_PROXY": http_proxy,
            })

        envs.update(self.env_vars)
        self.env_vars = envs
        super().__init__(dag=dag,
                         task_id=self.task_id,
                         retries=self.retries,
                         priority_weight=self.priority_weight,
                         execution_timeout=self.execution_timeout,
                         task_concurrency=self.task_concurrency,
                         pool=self.pool,
                         pool_slots=self.pool_slots,
                         retry_delay=self.retry_delay,
                         email=None,
                         email_on_retry=True,
                         email_on_failure=True,
                         start_date=days_ago(0),
                         depends_on_past=False,
                         wait_for_downstream=False,
                         trigger_rule=self.trigger_rule,
                         on_failure_callback=KaapanaBaseOperator.on_failure,
                         on_success_callback=KaapanaBaseOperator.on_success,
                         on_retry_callback=KaapanaBaseOperator.on_retry,
                         on_execute_callback=KaapanaBaseOperator.on_execute,
                         executor_config=self.executor_config,
                         *args,
                         **kwargs)
예제 #6
0
    def __init__(
            self,
            dag,
            mode,  # preprocess, training, inference,export-model,install-model
            input_nifti_operators=[],
            prep_label_operator=None,
            prep_processes_low=8,
            prep_processes_full=6,
            prep_modalities=[],
            prep_preprocess=True,
            prep_check_integrity=True,
            prep_use_nifti_labels=True,
            train_fold=0,
            train_network="3d_lowres",
            train_network_trainer="nnUNetTrainerV2",
            train_continue=False,
            train_npz=False,
            train_strict=True,
            train_max_epochs=1000,
            inf_preparation=True,
            inf_threads_prep=1,
            inf_threads_nifti=1,
            env_vars={},
            parallel_id=None,
            execution_timeout=execution_timeout,
            *args,
            **kwargs):
        envs = {
            "MODE":
            str(mode),
            "INPUT_NIFTI_DIRS":
            ",".join(
                str(operator.operator_out_dir)
                for operator in input_nifti_operators),
            "PREP_TL":
            str(prep_processes_low),
            "PREP_TF":
            str(prep_processes_full),
            "PREP_LABEL_DIR":
            str(prep_label_operator.operator_out_dir)
            if prep_label_operator is not None else "",
            "PREP_MODALITIES":
            ",".join(str(modality) for modality in prep_modalities),
            "PREP_PREPROCESS":
            str(prep_preprocess),
            "PREP_CHECK_INTEGRITY":
            str(prep_check_integrity),
            "PREP_USE_NIFITI_LABELS":
            str(prep_use_nifti_labels),
            "TRAIN_FOLD":
            str(train_fold),
            "TRAIN_NETWORK":
            train_network,
            "TRAIN_NETWORK_TRAINER":
            train_network_trainer,
            "TRAIN_CONTINUE":
            str(train_continue),
            "TRAIN_MAX_EPOCHS":
            str(train_max_epochs),
            "TRAIN_NPZ":
            str(train_npz),
            "TRAIN_STRICT":
            str(train_strict),
            "INF_THREADS_PREP":
            str(inf_threads_prep),
            "INF_THREADS_NIFTI":
            str(inf_threads_prep),
            "INF_PREPARATION":
            str(inf_preparation),
            "TENSORBOARD_DIR":
            '/tensorboard',
        }
        env_vars.update(envs)

        data_dir = os.getenv('DATADIR', "")
        models_dir = os.path.join(os.path.dirname(data_dir), "models")

        volume_mounts = []
        volumes = []

        volume_mounts.append(
            VolumeMount('models',
                        mount_path='/models',
                        sub_path=None,
                        read_only=False))
        volume_config = {
            'hostPath': {
                'type': 'DirectoryOrCreate',
                'path': models_dir
            }
        }
        volumes.append(Volume(name='models', configs=volume_config))

        volume_mounts.append(
            VolumeMount('dshm',
                        mount_path='/dev/shm',
                        sub_path=None,
                        read_only=False))
        volume_config = {
            'emptyDir': {
                'medium': 'Memory',
            }
        }
        volumes.append(Volume(name='dshm', configs=volume_config))

        pod_resources = PodResources(request_memory=None,
                                     request_cpu=None,
                                     limit_memory=None,
                                     limit_cpu=None,
                                     limit_gpu=None)

        training_operator = False
        gpu_mem_mb = None

        if mode == "training" or mode == "inference":
            pod_resources = PodResources(request_memory=None,
                                         request_cpu=None,
                                         limit_memory=None,
                                         limit_cpu=None,
                                         limit_gpu=1)
            gpu_mem_mb = 6000
            if mode == "training":
                gpu_mem_mb = None
                training_operator = True

        parallel_id = parallel_id if parallel_id is not None else mode

        super().__init__(dag=dag,
                         image="{}{}/nnunet:1.6.5-vdev".format(
                             default_registry, default_project),
                         name="nnunet",
                         parallel_id=parallel_id,
                         image_pull_secrets=["registry-secret"],
                         volumes=volumes,
                         volume_mounts=volume_mounts,
                         execution_timeout=execution_timeout,
                         ram_mem_mb=None,
                         ram_mem_mb_lmt=None,
                         pod_resources=pod_resources,
                         training_operator=training_operator,
                         gpu_mem_mb=gpu_mem_mb,
                         env_vars=env_vars,
                         *args,
                         **kwargs)