def get_data(self, studyUID, seriesUID, dag_run_id): target_dir = os.path.join(WORKFLOW_DIR, dag_run_id, BATCH_NAME, f'{seriesUID}', INITIAL_INPUT_DIR) if not os.path.exists(target_dir): os.makedirs(target_dir) if self.data_type == "dicom": HelperDcmWeb.downloadSeries(studyUID=studyUID, seriesUID=seriesUID, target_dir=target_dir) elif self.data_type == "json": meta_data = HelperElasticsearch.get_series_metadata( series_uid=seriesUID) json_path = os.path.join(target_dir, "metadata.json") with open(json_path, 'w') as fp: json.dump(meta_data, fp, indent=4, sort_keys=True) elif self.data_type == "minio": print("Not supported yet!") print("abort...") exit(1) else: print("unknown data-mode!") print("abort...") exit(1)
def get_data(self, series_dict): download_successful = True studyUID, seriesUID, dag_run_id = series_dict["studyUID"], series_dict[ "seriesUID"], series_dict["dag_run_id"] print(f"Start download series: {seriesUID}") target_dir = os.path.join(WORKFLOW_DIR, dag_run_id, BATCH_NAME, f'{seriesUID}', self.operator_out_dir) if not os.path.exists(target_dir): os.makedirs(target_dir) if self.data_type == "dicom": download_successful = HelperDcmWeb.downloadSeries( studyUID=studyUID, seriesUID=seriesUID, target_dir=target_dir) if not download_successful: print("Could not download DICOM data!") download_successful = False elif self.data_type == "json": meta_data = HelperElasticsearch.get_series_metadata( series_uid=seriesUID) json_path = os.path.join(target_dir, "metadata.json") with open(json_path, 'w') as fp: json.dump(meta_data, fp, indent=4, sort_keys=True) elif self.data_type == "minio": print("Not supported yet!") print("abort...") download_successful = False else: print("unknown data-mode!") print("abort...") download_successful = False message = f"Series: {seriesUID}" return download_successful, message
def start(self, ds, **kwargs): print("Starting moule LocalGetInputDataOperator...") self.conf = kwargs['dag_run'].conf dag_run_id = kwargs['dag_run'].run_id if self.conf == None or not "inputs" in self.conf: print("No config or inputs in config found!") print("Skipping...") return inputs = self.conf["inputs"] if not isinstance(inputs, list): inputs = [inputs] for input in inputs: if "elastic-query" in input: elastic_query = input["elastic-query"] if "query" not in elastic_query: print("'query' not found in 'elastic-query': {}".format( input)) print("abort...") exit(1) if "index" not in elastic_query: print("'index' not found in 'elastic-query': {}".format( input)) print("abort...") exit(1) query = elastic_query["query"] index = elastic_query["index"] cohort = HelperElasticsearch.get_query_cohort( elastic_index=index, elastic_query=query) for series in cohort: series = series["_source"] study_uid = series[HelperElasticsearch.study_uid_tag] series_uid = series[HelperElasticsearch.series_uid_tag] # SOPInstanceUID = series[ElasticDownloader.SOPInstanceUID_tag] modality = series[HelperElasticsearch.modality_tag] print(("studyUID %s" % study_uid)) print(("seriesUID %s" % series_uid)) print(("modality %s" % modality)) if self.check_modality: self.check_dag_modality(input_modality=modality) self.get_data(studyUID=study_uid, seriesUID=series_uid, dag_run_id=dag_run_id) elif "dcm-uid" in input: dcm_uid = input["dcm-uid"] if "study-uid" not in dcm_uid: print( "'study-uid' not found in 'dcm-uid': {}".format(input)) print("abort...") exit(1) if "series-uid" not in dcm_uid: print("'series-uid' not found in 'dcm-uid': {}".format( input)) print("abort...") exit(1) if "modality" in dcm_uid and self.check_modality: modality = dcm_uid["modality"] self.check_dag_modality(input_modality=modality) study_uid = dcm_uid["study-uid"] series_uid = dcm_uid["series-uid"] self.get_data(studyUID=study_uid, seriesUID=series_uid, dag_run_id=dag_run_id) else: print("Error with dag-config!") print("Unknown input: {}".format(input)) print("Supported 'dcm-uid' and 'elastic-query' ") print("Dag-conf: {}".format(self.conf)) exit(1)
def get_dicom_list(self): dicom_info_list = [] if not self.use_dcm_files and self.conf == None or not "inputs" in self.conf: print("No config or inputs in config found!") print("Abort.") exit(1) if self.use_dcm_files: batch_folder = [ f for f in glob.glob(os.path.join(self.run_dir, BATCH_NAME, '*')) ] for batch_element_dir in batch_folder: input_dir = os.path.join(batch_element_dir, self.operator_in_dir) output_dir = os.path.join(batch_element_dir, self.operator_out_dir) dcm_file_list = glob.glob(input_dir + "/*.dcm", recursive=True) if len(dcm_file_list) == 0: print() print( "#############################################################" ) print() print("Couldn't find any DICOM file in dir: {}".format( input_dir)) print() print( "#############################################################" ) print() exit(1) dicom_file = pydicom.dcmread(dcm_file_list[0]) study_uid = dicom_file[0x0020, 0x000D].value series_uid = dicom_file[0x0020, 0x000E].value modality = dicom_file[0x0008, 0x0060].value dicom_info_list.append({ "dcm-uid": { "study-uid": study_uid, "series-uid": series_uid, "modality": modality } }) else: inputs = self.conf["inputs"] if not isinstance(inputs, list): inputs = [inputs] for input in inputs: if "elastic-query" in input: elastic_query = input["elastic-query"] if "query" not in elastic_query: print( "'query' not found in 'elastic-query': {}".format( input)) print("abort...") exit(1) if "index" not in elastic_query: print( "'index' not found in 'elastic-query': {}".format( input)) print("abort...") exit(1) query = elastic_query["query"] index = elastic_query["index"] cohort = HelperElasticsearch.get_query_cohort( elastic_index=index, elastic_query=query) for series in cohort: series = series["_source"] study_uid = series[HelperElasticsearch.study_uid_tag] series_uid = series[HelperElasticsearch.series_uid_tag] # SOPInstanceUID = series[ElasticDownloader.SOPInstanceUID_tag] modality = series[HelperElasticsearch.modality_tag] dicom_info_list.append({ "dcm-uid": { "study-uid": study_uid, "series-uid": series_uid, "modality": modality } }) elif "dcm-uid" in input: dcm_uid = input["dcm-uid"] if "study-uid" not in dcm_uid: print("'study-uid' not found in 'dcm-uid': {}".format( input)) print("abort...") exit(1) if "series-uid" not in dcm_uid: print("'series-uid' not found in 'dcm-uid': {}".format( input)) print("abort...") exit(1) study_uid = dcm_uid["study-uid"] series_uid = dcm_uid["series-uid"] modality = dcm_uid["modality"] dicom_info_list.append({ "dcm-uid": { "study-uid": study_uid, "series-uid": series_uid, "modality": modality } }) else: print("Error with dag-config!") print("Unknown input: {}".format(input)) print("Supported 'dcm-uid' and 'elastic-query' ") print("Dag-conf: {}".format(self.conf)) exit(1) return dicom_info_list
def start(self, ds, **kwargs): print("Starting moule LocalGetInputDataOperator...") self.conf = kwargs['dag_run'].conf cohort_limit = None if self.conf is not None and "conf" in self.conf: trigger_conf = self.conf["conf"] cohort_limit = int(trigger_conf["cohort_limit"] if "cohort_limit" in trigger_conf else None) dag_run_id = kwargs['dag_run'].run_id if self.conf == None or not "inputs" in self.conf: print("No config or inputs in config found!") print("Skipping...") return inputs = self.conf["inputs"] if not isinstance(inputs, list): inputs = [inputs] download_list = [] for input in inputs: if "elastic-query" in input: elastic_query = input["elastic-query"] if "query" not in elastic_query: print("'query' not found in 'elastic-query': {}".format( input)) print("abort...") exit(1) if "index" not in elastic_query: print("'index' not found in 'elastic-query': {}".format( input)) print("abort...") exit(1) query = elastic_query["query"] index = elastic_query["index"] cohort = HelperElasticsearch.get_query_cohort( elastic_index=index, elastic_query=query) for series in cohort: series = series["_source"] study_uid = series[HelperElasticsearch.study_uid_tag] series_uid = series[HelperElasticsearch.series_uid_tag] # SOPInstanceUID = series[ElasticDownloader.SOPInstanceUID_tag] modality = series[HelperElasticsearch.modality_tag] print(("studyUID %s" % study_uid)) print(("seriesUID %s" % series_uid)) print(("modality %s" % modality)) if self.check_modality: self.check_dag_modality(input_modality=modality) download_list.append({ "studyUID": study_uid, "seriesUID": series_uid, "dag_run_id": dag_run_id }) elif "dcm-uid" in input: dcm_uid = input["dcm-uid"] if "study-uid" not in dcm_uid: print( "'study-uid' not found in 'dcm-uid': {}".format(input)) print("abort...") exit(1) if "series-uid" not in dcm_uid: print("'series-uid' not found in 'dcm-uid': {}".format( input)) print("abort...") exit(1) if "modality" in dcm_uid and self.check_modality: modality = dcm_uid["modality"] self.check_dag_modality(input_modality=modality) study_uid = dcm_uid["study-uid"] series_uid = dcm_uid["series-uid"] download_list.append({ "studyUID": study_uid, "seriesUID": series_uid, "dag_run_id": dag_run_id }) else: print("Error with dag-config!") print("Unknown input: {}".format(input)) print("Supported 'dcm-uid' and 'elastic-query' ") print("Dag-conf: {}".format(self.conf)) exit(1) download_list = download_list[: cohort_limit] if cohort_limit is not None else download_list print("") print("## SERIES TO LOAD: {}".format(len(download_list))) print("") results = ThreadPool(self.parallel_downloads).imap_unordered( self.get_data, download_list) for download_successful, message in results: print(f"Finished: {message}") if not download_successful: print("Something went wrong.") exit(1)
def trigger_dag(dag_id): data = request.get_json(force=True) if 'conf' in data: tmp_conf = data['conf'] else: tmp_conf = data # For authentication if "x_auth_token" in data: tmp_conf["x_auth_token"] = data["x_auth_token"] else: tmp_conf["x_auth_token"] = request.headers.get('X-Auth-Token') if dag_id == "meta-trigger": query = tmp_conf["query"] index = tmp_conf["index"] dag_id = tmp_conf["dag"] form_data = tmp_conf["form_data"] cohort_limit = int(tmp_conf["cohort_limit"] if "cohort_limit" in tmp_conf else None) single_execution = True if "single_execution" in form_data and form_data["single_execution"] else False print(f"query: {query}") print(f"index: {index}") print(f"dag_id: {dag_id}") print(f"single_execution: {single_execution}") if single_execution: hits = HelperElasticsearch.get_query_cohort(elastic_query=query, elastic_index=index) if hits is None: message = ["Error in HelperElasticsearch: {}!".format(dag_id)] response = jsonify(message=message) response.status_code = 500 return response hits = hits[:cohort_limit] if cohort_limit is not None else hits print("SERIES TO LOAD: {}".format(len(hits))) for hit in hits: hit = hit["_source"] studyUID = hit[HelperElasticsearch.study_uid_tag] seriesUID = hit[HelperElasticsearch.series_uid_tag] SOPInstanceUID = hit[HelperElasticsearch.SOPInstanceUID_tag] modality = hit[HelperElasticsearch.modality_tag] conf = { "inputs": [ { "dcm-uid": { "study-uid": studyUID, "series-uid": seriesUID, "modality": modality } } ], "conf": tmp_conf } dag_run_id = generate_run_id(dag_id) trigger(dag_id=dag_id, run_id=dag_run_id, conf=conf, replace_microseconds=False) else: conf = { "inputs": [ { "elastic-query": { "query": query, "index": index } } ], "conf": tmp_conf } dag_run_id = generate_run_id(dag_id) trigger(dag_id=dag_id, run_id=dag_run_id, conf=conf, replace_microseconds=False) message = ["{} created!".format(dag_id)] response = jsonify(message=message) return response else: run_id = generate_run_id(dag_id) execution_date = None try: dr = trigger(dag_id, run_id, tmp_conf, execution_date, replace_microseconds=False) except AirflowException as err: _log.error(err) response = jsonify(error="{}".format(err)) response.status_code = err.status_code return response message = ["{} created!".format(dr.dag_id)] response = jsonify(message=message) return response