def __init__(self, dag, env_vars={}, input_dirs=[], parallel_id=None, execution_timeout=execution_timeout, *args, **kwargs ): envs = { "INPUT_DIRS": ";".join(str(dir) for dir in input_dirs), "nnUNet_raw_data_base": "/input", "nnUNet_preprocessed": "/input/nnUNet_preprocessed", "RESULTS_FOLDER": "/models", } env_vars.update(envs) data_dir = os.getenv('DATADIR', "") models_dir = os.path.join(os.path.dirname(data_dir), "models") volume_mounts = [] volumes = [] volume_mounts.append(VolumeMount( 'models', mount_path='/models', sub_path=None, read_only=False)) volume_config = { 'hostPath': { 'type': 'DirectoryOrCreate', 'path': models_dir } } volumes.append(Volume(name='models', configs=volume_config)) super(NnUnetOperator, self).__init__( dag=dag, image="{}{}/nnunet-predict:0.1.0".format(default_registry, default_project), name="nnunet-predict", parallel_id=parallel_id, image_pull_secrets=["registry-secret"], volumes=volumes, volume_mounts=volume_mounts, execution_timeout=execution_timeout, ram_mem_mb=15000, ram_mem_mb_lmt=30000, gpu_mem_mb=5000, env_vars=env_vars, *args, **kwargs )
def __init__(self, dag, env_vars={}, registry_url="dktk-jip-registry.dkfz.de", registry_project="kaapana-public", model_version="0.1.0", execution_timeout=execution_timeout, *args, **kwargs): self.registry_url = registry_url or default_registry self.registry_project = registry_project or default_project self.model_version = model_version self.af_models_dir = "/root/airflow/models" host_models_dir = os.path.join( os.path.dirname(os.getenv('DATADIR', "")), "models") envs = { "MODELDIR": "/models_mount", } env_vars.update(envs) volume_mounts = [] volumes = [] volume_mounts.append( VolumeMount('models', mount_path='/models_mount', sub_path=None, read_only=False)) volume_config = { 'hostPath': { 'type': 'DirectoryOrCreate', 'path': host_models_dir } } volumes.append(Volume(name='models', configs=volume_config)) super().__init__(dag=dag, image=None, name="get-task-model", image_pull_secrets=["registry-secret"], volumes=volumes, volume_mounts=volume_mounts, execution_timeout=execution_timeout, env_vars=env_vars, ram_mem_mb=50, *args, **kwargs)
def __init__(self, dag, task_id=None, zip_file=False, env_vars={}, execution_timeout=execution_timeout, *args, **kwargs): envs = {"MODELDIR": "/models", "ZIP_FILE": str(zip_file)} env_vars.update(envs) if task_id is not None: env_vars["TASK"] = task_id data_dir = os.getenv('DATADIR', "") models_dir = os.path.join(os.path.dirname(data_dir), "models") volume_mounts = [] volumes = [] volume_mounts.append( VolumeMount('models', mount_path='/models', sub_path=None, read_only=False)) volume_config = { 'hostPath': { 'type': 'DirectoryOrCreate', 'path': models_dir } } volumes.append(Volume(name='models', configs=volume_config)) super().__init__(dag=dag, image="{}{}/nnunet-get-models:0.1.1-vdev".format( default_registry, default_project), name="get-task-model", image_pull_secrets=["registry-secret"], volumes=volumes, volume_mounts=volume_mounts, execution_timeout=execution_timeout, env_vars=env_vars, enable_proxy=True, host_network=True, ram_mem_mb=1000, *args, **kwargs)
def pre_execute(self, context): print("Starting moule LaunchPodOperator...") print(context) conf = context['dag_run'].conf print(conf) self.port = int(conf['port']) self.ingress_path = conf['ingress_path'] self.image = conf['image'] if 'image_pull_secrets' in conf: self.image_pull_secrets.append(conf['image_pull_secrets']) envs = { "INGRESS_PATH": self.ingress_path, } self.env_vars.update(envs) if 'envs' in conf: self.env_vars.update(conf['envs']) if 'args' in conf: self.arguments = conf['args'] if 'cmds' in conf: self.cmds = conf['cmds'] if 'annotations' in conf: self.annotations = conf['annotations'] self.volume_mounts = [] if 'volume_mounts' in conf: print('writing volume_mounts') for volume_mount in conf['volume_mounts']: print('Writing volume_mount', volume_mount) self.volume_mounts.append(VolumeMount(**volume_mount)) self.volumes = [] if 'volumes' in conf: for volume in conf['volumes']: print('as', volume) self.volumes.append(Volume(**volume))
def __init__( self, dag, name, image=None, # Directories operator_out_dir=None, input_operator=None, # Airflow task_id=None, parallel_id=None, trigger_rule=TriggerRule.ALL_SUCCESS, ram_mem_mb=500, ram_mem_mb_lmt=None, cpu_millicores=None, cpu_millicores_lmt=None, gpu_mem_mb=None, gpu_mem_mb_lmt=None, retries=1, retry_delay=timedelta(seconds=60), priority_weight=1, execution_timeout=timedelta(minutes=90), task_concurrency=None, manage_cache=None, # Other stuff cmds=None, arguments=None, env_vars=None, image_pull_secrets=None, startup_timeout_seconds=120, namespace='flow-jobs', image_pull_policy='Always', # image_pull_policy='IfNotPresent', training_operator=False, volume_mounts=None, volumes=None, pod_resources=None, enable_proxy=False, host_network=False, in_cluster=False, cluster_context=None, labels=None, get_logs=True, annotations=None, affinity=None, config_file=None, xcom_push=False, node_selectors=None, secrets=None, kind="Pod", pool=None, pool_slots=None, api_version="v1", *args, **kwargs): KaapanaBaseOperator.set_defaults(self, name=name, task_id=task_id, operator_out_dir=operator_out_dir, input_operator=input_operator, parallel_id=parallel_id, trigger_rule=trigger_rule, pool=pool, pool_slots=pool_slots, ram_mem_mb=ram_mem_mb, ram_mem_mb_lmt=ram_mem_mb_lmt, cpu_millicores=cpu_millicores, cpu_millicores_lmt=cpu_millicores_lmt, gpu_mem_mb=gpu_mem_mb, gpu_mem_mb_lmt=gpu_mem_mb_lmt, manage_cache=manage_cache) # Airflow self.retries = retries self.priority_weight = priority_weight self.execution_timeout = execution_timeout self.task_concurrency = task_concurrency self.retry_delay = retry_delay self.training_operator = training_operator # Kubernetes self.image = image self.env_vars = env_vars or {} self.namespace = namespace self.cmds = cmds or [] self.arguments = arguments or [] self.labels = labels or {} self.startup_timeout_seconds = startup_timeout_seconds self.volume_mounts = volume_mounts or [] self.volumes = volumes or [] self.image_pull_secrets = image_pull_secrets or [] self.in_cluster = in_cluster self.cluster_context = cluster_context self.get_logs = get_logs self.image_pull_policy = image_pull_policy self.node_selectors = node_selectors or {} self.annotations = annotations or {} self.affinity = affinity or {} self.xcom_push = xcom_push self.pod_resources = pod_resources or None self.config_file = config_file self.api_version = api_version self.secrets = secrets self.kind = kind self.data_dir = os.getenv('DATADIR', "") self.result_message = None self.host_network = host_network self.enable_proxy = enable_proxy self.volume_mounts.append( VolumeMount('dcmdata', mount_path='/data', sub_path=None, read_only=False)) volume_config = { 'hostPath': { 'type': 'DirectoryOrCreate', 'path': self.data_dir } } self.volumes.append(Volume(name='dcmdata', configs=volume_config)) if self.training_operator: self.volume_mounts.append( VolumeMount('tensorboard', mount_path='/tensorboard', sub_path=None, read_only=False)) tb_config = { 'hostPath': { 'type': 'DirectoryOrCreate', 'path': os.path.join(self.data_dir, "tensorboard") } } self.volumes.append(Volume(name='tensorboard', configs=tb_config)) if self.pod_resources is None: pod_resources = PodResources( request_cpu="{}m".format(self.cpu_millicores) if self.cpu_millicores != None else None, limit_cpu="{}m".format(self.cpu_millicores + 100) if self.cpu_millicores != None else None, request_memory="{}Mi".format(self.ram_mem_mb), limit_memory="{}Mi".format( self.ram_mem_mb_lmt if self. ram_mem_mb_lmt is not None else self.ram_mem_mb + 100), limit_gpu=1 if self.gpu_mem_mb is not None else None) self.pod_resources = pod_resources envs = { "WORKFLOW_DIR": str(WORKFLOW_DIR), "BATCH_NAME": str(BATCH_NAME), "OPERATOR_OUT_DIR": str(self.operator_out_dir), "OPERATOR_IN_DIR": str(self.operator_in_dir), "BATCHES_INPUT_DIR": "/{}/{}".format(WORKFLOW_DIR, BATCH_NAME) } if http_proxy is not None and http_proxy != "" and self.enable_proxy: envs.update({ "http_proxy": http_proxy, "https_proxy": http_proxy, "HTTP_PROXY": http_proxy, "HTTPS_PROXY": http_proxy, }) envs.update(self.env_vars) self.env_vars = envs super().__init__(dag=dag, task_id=self.task_id, retries=self.retries, priority_weight=self.priority_weight, execution_timeout=self.execution_timeout, task_concurrency=self.task_concurrency, pool=self.pool, pool_slots=self.pool_slots, retry_delay=self.retry_delay, email=None, email_on_retry=True, email_on_failure=True, start_date=days_ago(0), depends_on_past=False, wait_for_downstream=False, trigger_rule=self.trigger_rule, on_failure_callback=KaapanaBaseOperator.on_failure, on_success_callback=KaapanaBaseOperator.on_success, on_retry_callback=KaapanaBaseOperator.on_retry, on_execute_callback=KaapanaBaseOperator.on_execute, executor_config=self.executor_config, *args, **kwargs)
def __init__( self, dag, mode, # preprocess, training, inference,export-model,install-model input_nifti_operators=[], prep_label_operator=None, prep_processes_low=8, prep_processes_full=6, prep_modalities=[], prep_preprocess=True, prep_check_integrity=True, prep_use_nifti_labels=True, train_fold=0, train_network="3d_lowres", train_network_trainer="nnUNetTrainerV2", train_continue=False, train_npz=False, train_strict=True, train_max_epochs=1000, inf_preparation=True, inf_threads_prep=1, inf_threads_nifti=1, env_vars={}, parallel_id=None, execution_timeout=execution_timeout, *args, **kwargs): envs = { "MODE": str(mode), "INPUT_NIFTI_DIRS": ",".join( str(operator.operator_out_dir) for operator in input_nifti_operators), "PREP_TL": str(prep_processes_low), "PREP_TF": str(prep_processes_full), "PREP_LABEL_DIR": str(prep_label_operator.operator_out_dir) if prep_label_operator is not None else "", "PREP_MODALITIES": ",".join(str(modality) for modality in prep_modalities), "PREP_PREPROCESS": str(prep_preprocess), "PREP_CHECK_INTEGRITY": str(prep_check_integrity), "PREP_USE_NIFITI_LABELS": str(prep_use_nifti_labels), "TRAIN_FOLD": str(train_fold), "TRAIN_NETWORK": train_network, "TRAIN_NETWORK_TRAINER": train_network_trainer, "TRAIN_CONTINUE": str(train_continue), "TRAIN_MAX_EPOCHS": str(train_max_epochs), "TRAIN_NPZ": str(train_npz), "TRAIN_STRICT": str(train_strict), "INF_THREADS_PREP": str(inf_threads_prep), "INF_THREADS_NIFTI": str(inf_threads_prep), "INF_PREPARATION": str(inf_preparation), "TENSORBOARD_DIR": '/tensorboard', } env_vars.update(envs) data_dir = os.getenv('DATADIR', "") models_dir = os.path.join(os.path.dirname(data_dir), "models") volume_mounts = [] volumes = [] volume_mounts.append( VolumeMount('models', mount_path='/models', sub_path=None, read_only=False)) volume_config = { 'hostPath': { 'type': 'DirectoryOrCreate', 'path': models_dir } } volumes.append(Volume(name='models', configs=volume_config)) volume_mounts.append( VolumeMount('dshm', mount_path='/dev/shm', sub_path=None, read_only=False)) volume_config = { 'emptyDir': { 'medium': 'Memory', } } volumes.append(Volume(name='dshm', configs=volume_config)) pod_resources = PodResources(request_memory=None, request_cpu=None, limit_memory=None, limit_cpu=None, limit_gpu=None) training_operator = False gpu_mem_mb = None if mode == "training" or mode == "inference": pod_resources = PodResources(request_memory=None, request_cpu=None, limit_memory=None, limit_cpu=None, limit_gpu=1) gpu_mem_mb = 6000 if mode == "training": gpu_mem_mb = None training_operator = True parallel_id = parallel_id if parallel_id is not None else mode super().__init__(dag=dag, image="{}{}/nnunet:1.6.5-vdev".format( default_registry, default_project), name="nnunet", parallel_id=parallel_id, image_pull_secrets=["registry-secret"], volumes=volumes, volume_mounts=volume_mounts, execution_timeout=execution_timeout, ram_mem_mb=None, ram_mem_mb_lmt=None, pod_resources=pod_resources, training_operator=training_operator, gpu_mem_mb=gpu_mem_mb, env_vars=env_vars, *args, **kwargs)