def trigger_dag(self, ds, **kwargs): from airflow.api.common.experimental.trigger_dag import trigger_dag as trigger import uuid import time import os global es run_dir = os.path.join(WORKFLOW_DIR, kwargs['dag_run'].run_id) batch_folder = [ f for f in glob.glob(os.path.join(run_dir, BATCH_NAME, '*')) ] dag_run_id = generate_run_id(self.trigger_dag_id) for batch_element_dir in batch_folder: src = os.path.join(batch_element_dir, self.operator_in_dir) target = os.path.join( WORKFLOW_DIR, dag_run_id, BATCH_NAME, os.path.basename(os.path.normpath(batch_element_dir)), INITIAL_INPUT_DIR) print(src, target) self.copy(src, target) trigger(dag_id=self.trigger_dag_id, run_id=dag_run_id, replace_microseconds=False)
def trigger_it(dag_id, dcm_path, series_uid): dag_run_id = generate_run_id(dag_id) target = os.path.join("/data", dag_run_id, "batch", series_uid, 'extract-metadata-input') print("MOVE!") print("SRC: {}".format(dcm_path)) print("TARGET: {}".format(target)) shutil.move(dcm_path, target) print(("TRIGGERING! DAG-ID: %s RUN_ID: %s" % (dag_id, dag_run_id))) trigger(dag_id=dag_id, run_id=dag_run_id, replace_microseconds=False)
def check(self, **kwargs): quarantine_path = os.path.join("/ctpinput", ".quarantines") path_list = {p for p in Path(quarantine_path).rglob("*.dcm") if p.is_file()} if path_list: dag_run_id = generate_run_id(self.trigger_dag_id) series_uid = kwargs['dag_run'].conf.get('seriesInstanceUID') target = os.path.join("/data", dag_run_id, "batch", series_uid, "initial-input") if not os.path.exists(target): os.makedirs(target) print("MOVE!") for dcm_file in path_list: print("SRC: {}".format(dcm_file)) print("TARGET: {}".format(target)) shutil.move(str(dcm_file), target) print(("TRIGGERING! DAG-ID: %s RUN_ID: %s" % (self.trigger_dag_id, dag_run_id))) trigger(dag_id=self.trigger_dag_id, run_id=dag_run_id, replace_microseconds=False)
def start_reindexing(ds, **kwargs): import os import glob from airflow.api.common.experimental.trigger_dag import trigger_dag as trigger from elasticsearch import Elasticsearch pacs_data_dir = '/pacsdata' workflowdata_dir = "/data/" dag_id = "service-extract-metadata" print("Start re-index") dcm_dirs = [] file_list = glob.glob(pacs_data_dir + "/**/*", recursive=True) for fi in file_list: if os.path.isfile(fi): dcm_dirs.append(os.path.dirname(fi)) dcm_dirs = list(set(dcm_dirs)) print("Files found: {}".format(len(file_list))) print("Dcm dirs found: {}".format(len(dcm_dirs))) for dcm_dir in dcm_dirs: dag_run_id = generate_run_id(dag_id) print("Run-id: {}".format(dag_run_id)) dcm_file = os.path.join(dcm_dir, os.listdir(dcm_dir)[0]) print("DIR: {}".format(dcm_dir)) print("dcm-file: {}".format(dcm_file)) incoming_dcm = pydicom.dcmread(dcm_file) seriesUID = incoming_dcm.SeriesInstanceUID target_dir = os.path.join(workflowdata_dir, dag_run_id, BATCH_NAME, "{}".format(seriesUID), 'extract-metadata-input') print(target_dir) if not os.path.exists(target_dir): os.makedirs(target_dir) copyfile(dcm_file, os.path.join(target_dir, os.path.basename(dcm_file) + ".dcm")) trigger(dag_id=dag_id, run_id=dag_run_id, replace_microseconds=False)
def trigger_dag(self, ds, **kwargs): pending_dags = [] done_dags = [] self.conf = kwargs['dag_run'].conf self.dag_run_id = kwargs['dag_run'].run_id self.run_dir = os.path.join(WORKFLOW_DIR, self.dag_run_id) dicom_info_list = self.get_dicom_list() trigger_series_list = [] for dicom_series in dicom_info_list: for cache_operator in self.cache_operators: cache_found = self.check_cache(dicom_series=dicom_series, cache_operator=cache_operator) if not cache_found and self.trigger_mode == "batch": if len(trigger_series_list) == 0: trigger_series_list.append([]) trigger_series_list[0].append(dicom_series) elif not cache_found and self.trigger_mode == "single": trigger_series_list.append([dicom_series]) elif not cache_found: print() print( "#############################################################" ) print() print("TRIGGER_MODE: {} is not supported!".format( self.trigger_mode)) print("Please use: 'single' or 'batch' -> abort.") print() print( "#############################################################" ) print() exit(1) print() print("#############################################################") print() print("TRIGGER-LIST: ") print(json.dumps(trigger_series_list, indent=4, sort_keys=True)) print() print("#############################################################") print() for element in trigger_series_list: conf = {"inputs": element, "conf": self.conf} dag_run_id = generate_run_id(self.trigger_dag_id) triggered_dag = trigger(dag_id=self.trigger_dag_id, run_id=dag_run_id, conf=conf, replace_microseconds=False) pending_dags.append(triggered_dag) while self.wait_till_done and len(pending_dags) > 0: print( "Some triggered DAGs are still pending -> waiting {} s".format( self.delay)) for pending_dag in list(pending_dags): pending_dag.update_state() state = pending_dag.get_state() if state == "running": continue elif state == "success": done_dags.append(pending_dag) pending_dags.remove(pending_dag) for series in pending_dag.conf["inputs"]: for cache_operator in self.cache_operators: if not self.check_cache( dicom_series=series, cache_operator=cache_operator): print() print( "#############################################################" ) print() print( "Could still not find the data after the sub-dag." ) print("This is unexpected behaviour -> error") print() print( "#############################################################" ) exit(1) elif state == "failed": print() print( "#############################################################" ) print() print("Triggered Dag Failed: {}".format(pending_dag.id)) print() print( "#############################################################" ) print() exit(1) else: print() print( "#############################################################" ) print() print("Unknown DAG-state!") print("DAG: {}".format(pending_dag.id)) print("STATE: {}".format(state)) print() print( "#############################################################" ) print() exit(1) time.sleep(self.delay) print() print("#############################################################") print() print("####################### DONE ##############################") print() print("#############################################################") print()
def trigger_dag(dag_id): data = request.get_json(force=True) if 'conf' in data: tmp_conf = data['conf'] else: tmp_conf = data # For authentication if "x_auth_token" in data: tmp_conf["x_auth_token"] = data["x_auth_token"] else: tmp_conf["x_auth_token"] = request.headers.get('X-Auth-Token') if dag_id == "meta-trigger": query = tmp_conf["query"] index = tmp_conf["index"] dag_id = tmp_conf["dag"] form_data = tmp_conf["form_data"] cohort_limit = int(tmp_conf["cohort_limit"] if "cohort_limit" in tmp_conf else None) single_execution = True if "single_execution" in form_data and form_data["single_execution"] else False print(f"query: {query}") print(f"index: {index}") print(f"dag_id: {dag_id}") print(f"single_execution: {single_execution}") if single_execution: hits = HelperElasticsearch.get_query_cohort(elastic_query=query, elastic_index=index) if hits is None: message = ["Error in HelperElasticsearch: {}!".format(dag_id)] response = jsonify(message=message) response.status_code = 500 return response hits = hits[:cohort_limit] if cohort_limit is not None else hits print("SERIES TO LOAD: {}".format(len(hits))) for hit in hits: hit = hit["_source"] studyUID = hit[HelperElasticsearch.study_uid_tag] seriesUID = hit[HelperElasticsearch.series_uid_tag] SOPInstanceUID = hit[HelperElasticsearch.SOPInstanceUID_tag] modality = hit[HelperElasticsearch.modality_tag] conf = { "inputs": [ { "dcm-uid": { "study-uid": studyUID, "series-uid": seriesUID, "modality": modality } } ], "conf": tmp_conf } dag_run_id = generate_run_id(dag_id) trigger(dag_id=dag_id, run_id=dag_run_id, conf=conf, replace_microseconds=False) else: conf = { "inputs": [ { "elastic-query": { "query": query, "index": index } } ], "conf": tmp_conf } dag_run_id = generate_run_id(dag_id) trigger(dag_id=dag_id, run_id=dag_run_id, conf=conf, replace_microseconds=False) message = ["{} created!".format(dag_id)] response = jsonify(message=message) return response else: run_id = generate_run_id(dag_id) execution_date = None try: dr = trigger(dag_id, run_id, tmp_conf, execution_date, replace_microseconds=False) except AirflowException as err: _log.error(err) response = jsonify(error="{}".format(err)) response.status_code = err.status_code return response message = ["{} created!".format(dr.dag_id)] response = jsonify(message=message) return response