Beispiel #1
0
    def trigger_dag(self, ds, **kwargs):
        from airflow.api.common.experimental.trigger_dag import trigger_dag as trigger
        import uuid
        import time
        import os

        global es

        run_dir = os.path.join(WORKFLOW_DIR, kwargs['dag_run'].run_id)
        batch_folder = [
            f for f in glob.glob(os.path.join(run_dir, BATCH_NAME, '*'))
        ]

        dag_run_id = generate_run_id(self.trigger_dag_id)

        for batch_element_dir in batch_folder:
            src = os.path.join(batch_element_dir, self.operator_in_dir)
            target = os.path.join(
                WORKFLOW_DIR, dag_run_id, BATCH_NAME,
                os.path.basename(os.path.normpath(batch_element_dir)),
                INITIAL_INPUT_DIR)
            print(src, target)
            self.copy(src, target)

        trigger(dag_id=self.trigger_dag_id,
                run_id=dag_run_id,
                replace_microseconds=False)
    def trigger_it(dag_id, dcm_path, series_uid):
        dag_run_id = generate_run_id(dag_id)

        target = os.path.join("/data", dag_run_id, "batch", series_uid, 'extract-metadata-input')
        print("MOVE!")
        print("SRC: {}".format(dcm_path))
        print("TARGET: {}".format(target))
        shutil.move(dcm_path, target)

        print(("TRIGGERING! DAG-ID: %s RUN_ID: %s" % (dag_id, dag_run_id)))
        trigger(dag_id=dag_id, run_id=dag_run_id, replace_microseconds=False)
    def check(self, **kwargs):
        quarantine_path = os.path.join("/ctpinput", ".quarantines")
        path_list = {p for p in Path(quarantine_path).rglob("*.dcm") if p.is_file()}
        if path_list:
                dag_run_id = generate_run_id(self.trigger_dag_id)
                series_uid = kwargs['dag_run'].conf.get('seriesInstanceUID')
                target = os.path.join("/data", dag_run_id, "batch", series_uid, "initial-input")

                if not os.path.exists(target):
                    os.makedirs(target)
                print("MOVE!")
                for dcm_file in path_list:
                    print("SRC: {}".format(dcm_file))
                    print("TARGET: {}".format(target))
                    shutil.move(str(dcm_file), target)

                print(("TRIGGERING! DAG-ID: %s RUN_ID: %s" % (self.trigger_dag_id, dag_run_id)))
                trigger(dag_id=self.trigger_dag_id, run_id=dag_run_id, replace_microseconds=False)
Beispiel #4
0
def start_reindexing(ds, **kwargs):
    import os
    import glob
    from airflow.api.common.experimental.trigger_dag import trigger_dag as trigger
    from elasticsearch import Elasticsearch

    pacs_data_dir = '/pacsdata'
    workflowdata_dir = "/data/"
    dag_id = "service-extract-metadata"

    print("Start re-index")

    dcm_dirs = []
    file_list = glob.glob(pacs_data_dir + "/**/*", recursive=True)
    for fi in file_list:
        if os.path.isfile(fi):
            dcm_dirs.append(os.path.dirname(fi))
    dcm_dirs = list(set(dcm_dirs))

    print("Files found: {}".format(len(file_list)))
    print("Dcm dirs found: {}".format(len(dcm_dirs)))
    for dcm_dir in dcm_dirs:
        dag_run_id = generate_run_id(dag_id)
        print("Run-id: {}".format(dag_run_id))

        dcm_file = os.path.join(dcm_dir, os.listdir(dcm_dir)[0])
        print("DIR: {}".format(dcm_dir))
        print("dcm-file: {}".format(dcm_file))
        incoming_dcm = pydicom.dcmread(dcm_file)
        seriesUID = incoming_dcm.SeriesInstanceUID

        target_dir = os.path.join(workflowdata_dir, dag_run_id, BATCH_NAME,
                                  "{}".format(seriesUID),
                                  'extract-metadata-input')
        print(target_dir)

        if not os.path.exists(target_dir):
            os.makedirs(target_dir)

        copyfile(dcm_file,
                 os.path.join(target_dir,
                              os.path.basename(dcm_file) + ".dcm"))

        trigger(dag_id=dag_id, run_id=dag_run_id, replace_microseconds=False)
Beispiel #5
0
    def trigger_dag(self, ds, **kwargs):
        pending_dags = []
        done_dags = []

        self.conf = kwargs['dag_run'].conf
        self.dag_run_id = kwargs['dag_run'].run_id
        self.run_dir = os.path.join(WORKFLOW_DIR, self.dag_run_id)

        dicom_info_list = self.get_dicom_list()
        trigger_series_list = []
        for dicom_series in dicom_info_list:
            for cache_operator in self.cache_operators:
                cache_found = self.check_cache(dicom_series=dicom_series,
                                               cache_operator=cache_operator)

                if not cache_found and self.trigger_mode == "batch":
                    if len(trigger_series_list) == 0:
                        trigger_series_list.append([])
                    trigger_series_list[0].append(dicom_series)
                elif not cache_found and self.trigger_mode == "single":
                    trigger_series_list.append([dicom_series])

                elif not cache_found:
                    print()
                    print(
                        "#############################################################"
                    )
                    print()
                    print("TRIGGER_MODE: {} is not supported!".format(
                        self.trigger_mode))
                    print("Please use: 'single' or 'batch' -> abort.")
                    print()
                    print(
                        "#############################################################"
                    )
                    print()
                    exit(1)

        print()
        print("#############################################################")
        print()
        print("TRIGGER-LIST: ")
        print(json.dumps(trigger_series_list, indent=4, sort_keys=True))
        print()
        print("#############################################################")
        print()

        for element in trigger_series_list:
            conf = {"inputs": element, "conf": self.conf}
            dag_run_id = generate_run_id(self.trigger_dag_id)
            triggered_dag = trigger(dag_id=self.trigger_dag_id,
                                    run_id=dag_run_id,
                                    conf=conf,
                                    replace_microseconds=False)
            pending_dags.append(triggered_dag)

        while self.wait_till_done and len(pending_dags) > 0:
            print(
                "Some triggered DAGs are still pending -> waiting {} s".format(
                    self.delay))

            for pending_dag in list(pending_dags):
                pending_dag.update_state()
                state = pending_dag.get_state()
                if state == "running":
                    continue
                elif state == "success":
                    done_dags.append(pending_dag)
                    pending_dags.remove(pending_dag)
                    for series in pending_dag.conf["inputs"]:
                        for cache_operator in self.cache_operators:
                            if not self.check_cache(
                                    dicom_series=series,
                                    cache_operator=cache_operator):
                                print()
                                print(
                                    "#############################################################"
                                )
                                print()
                                print(
                                    "Could still not find the data after the sub-dag."
                                )
                                print("This is unexpected behaviour -> error")
                                print()
                                print(
                                    "#############################################################"
                                )
                                exit(1)

                elif state == "failed":
                    print()
                    print(
                        "#############################################################"
                    )
                    print()
                    print("Triggered Dag Failed: {}".format(pending_dag.id))
                    print()
                    print(
                        "#############################################################"
                    )
                    print()
                    exit(1)
                else:
                    print()
                    print(
                        "#############################################################"
                    )
                    print()
                    print("Unknown DAG-state!")
                    print("DAG:   {}".format(pending_dag.id))
                    print("STATE: {}".format(state))
                    print()
                    print(
                        "#############################################################"
                    )
                    print()
                    exit(1)

            time.sleep(self.delay)

        print()
        print("#############################################################")
        print()
        print("#######################  DONE  ##############################")
        print()
        print("#############################################################")
        print()
Beispiel #6
0
def trigger_dag(dag_id):
    data = request.get_json(force=True)
    if 'conf' in data:
        tmp_conf = data['conf']
    else:
        tmp_conf = data

    # For authentication
    if "x_auth_token" in data:
        tmp_conf["x_auth_token"] = data["x_auth_token"]
    else:
        tmp_conf["x_auth_token"] = request.headers.get('X-Auth-Token')

    if dag_id == "meta-trigger":
        query = tmp_conf["query"]
        index = tmp_conf["index"]
        dag_id = tmp_conf["dag"]
        form_data = tmp_conf["form_data"]
        cohort_limit = int(tmp_conf["cohort_limit"] if "cohort_limit" in tmp_conf else None)
        single_execution = True if "single_execution" in form_data and form_data["single_execution"] else False

        print(f"query: {query}")
        print(f"index: {index}")
        print(f"dag_id: {dag_id}")
        print(f"single_execution: {single_execution}")

        if single_execution:
            hits = HelperElasticsearch.get_query_cohort(elastic_query=query, elastic_index=index)
            if hits is None:
                message = ["Error in HelperElasticsearch: {}!".format(dag_id)]
                response = jsonify(message=message)
                response.status_code = 500
                return response

            hits = hits[:cohort_limit] if cohort_limit is not None else hits
            
            print("SERIES TO LOAD: {}".format(len(hits)))
            for hit in hits:
                hit = hit["_source"]
                studyUID = hit[HelperElasticsearch.study_uid_tag]
                seriesUID = hit[HelperElasticsearch.series_uid_tag]
                SOPInstanceUID = hit[HelperElasticsearch.SOPInstanceUID_tag]
                modality = hit[HelperElasticsearch.modality_tag]

                conf = {
                    "inputs": [
                        {
                            "dcm-uid": {
                                "study-uid": studyUID,
                                "series-uid": seriesUID,
                                "modality": modality
                            }
                        }
                    ],
                    "conf": tmp_conf
                }

                dag_run_id = generate_run_id(dag_id)
                trigger(dag_id=dag_id, run_id=dag_run_id, conf=conf, replace_microseconds=False)
        else:
            conf = {
                "inputs": [
                    {
                        "elastic-query": {
                            "query": query,
                            "index": index
                        }
                    }
                ],
                "conf": tmp_conf
            }
            dag_run_id = generate_run_id(dag_id)
            trigger(dag_id=dag_id, run_id=dag_run_id, conf=conf, replace_microseconds=False)

        message = ["{} created!".format(dag_id)]
        response = jsonify(message=message)
        return response

    else:
        run_id = generate_run_id(dag_id)

        execution_date = None
        try:
            dr = trigger(dag_id, run_id, tmp_conf, execution_date, replace_microseconds=False)
        except AirflowException as err:
            _log.error(err)
            response = jsonify(error="{}".format(err))
            response.status_code = err.status_code
            return response

        message = ["{} created!".format(dr.dag_id)]
        response = jsonify(message=message)
        return response