def check_dicomdir(self, event): '''check_dicomdir is the main function to call on a folder creation or modification, which both could signal new dicom directories ''' if self.is_finished(event.pathname): bot.log("2|FINISHED: %s" % (event.pathname)) if event.pathname.lower().startswith("test"): bot.log("Here would be call to import_dicomdir for %s" % (event.pathname)) else: # Here is the celery task to use import_dicomdir.apply_async( kwargs={"dicom_dir": event.pathname}) else: bot.log("2|NOTFINISHED: %s" % (event.pathname))
def process_IN_MOVED_TO(self, event): bot.log("1|MOVEDTO EVENT: %s" % (event.pathname)) self.check_dicomdir(event)
def process_IN_CLOSE_WRITE(self, event): bot.log("1|CLOSEWRITE EVENT: %s" % (event.pathname)) self.check_dicomdir(event)
def process_IN_MODIFY(self, event): bot.log("1|MODIFY EVENT: %s" % (event.pathname)) self.check_dicomdir(event)
def upload_storage(batch_ids=None): '''upload storage will as a batch, send all batches with DONEPROCESSING status to google cloud storage. ''' from sendit.settings import (GOOGLE_CLOUD_STORAGE, SEND_TO_GOOGLE, GOOGLE_PROJECT_NAME, GOOGLE_STORAGE_COLLECTION) if batch_ids is None: batches = Batch.objects.filter(status="DONEPROCESSING") else: batches = Batch.objects.filter(status="DONEPROCESSING", id__in=batch_ids) # All variables must be defined for sending! if GOOGLE_CLOUD_STORAGE in [None,""]: SEND_TO_GOOGLE = False if GOOGLE_PROJECT_NAME in [None,""]: SEND_TO_GOOGLE = False if GOOGLE_STORAGE_COLLECTION in [None,""]: SEND_TO_GOOGLE = False if SEND_TO_GOOGLE is True: from deid.identifiers import get_timestamp try: client = get_client(bucket_name=GOOGLE_CLOUD_STORAGE, project_name=GOOGLE_PROJECT_NAME) # Client is unreachable, usually network is being stressed # this is why we instantiate in batches to upload except: #OSError and ServiceUnavailable bot.error("Cannot connect to client.") return # Create/get BigQuery dataset, collection should be IRB dataset = client.get_or_create_dataset(GOOGLE_STORAGE_COLLECTION) # Create a table based on ... table = client.get_or_create_table(dataset=dataset, # All tables named dicom table_name='dicom', schema=dicom_schema) for batch in batches: valid = True batch.qa['UploadStartTime'] = time.time() batch_ids = BatchIdentifiers.objects.get(batch=batch) # Retrieve only images that aren't in PHI folder images = batch.get_finished() # Stop if no images pass filters if len(images) == 0: change_status(batch,"EMPTY") message = "batch %s has no images for processing, stopping upload" %(bid) batch = add_batch_warning(message,batch) batch.save() continue # IR0001fa6_20160525_IR661B54.tar.gz # (coded MRN?)_jittereddate_studycode required_fields = ['AccessionNumber', 'PatientID'] for required_field in required_fields: if required_field not in batch_ids.shared: change_status(batch,"ERROR") message = "batch ids %s do not have shared PatientID or AccessionNumber, stopping upload" %(bid) batch = add_batch_warning(message,batch) batch.save() valid = False if valid is False: continue # Add additional shared metadata studycode = batch_ids.shared['AccessionNumber'] coded_mrn = batch_ids.shared['PatientID'] batch_ids.shared['CodedPatientID'] = coded_mrn batch_ids.shared['ContentType'] = 'application/gzip' batch_ids.shared['CodedAccessionNumberID'] = studycode batch_ids.shared['NumberOfSeries'] = batch.qa['NumberOfSeries'] batch_ids.shared['Series'] = batch.qa['Series'] batch_ids.shared['RemovedSeries'] = batch.qa['FlaggedSeries'] timestamp = get_timestamp(batch_ids.shared['StudyDate'], format = "%Y%m%d") compressed_filename = "%s/%s_%s_%s.tar.gz" %(batch.get_path(), coded_mrn, timestamp, studycode) compressed_file = generate_compressed_file(files=images, # mode="w:gz" filename=compressed_filename) # File will be None if no files added if compressed_file is None: change_status(batch,"ERROR") message = "batch %s problem compressing file, stopping upload" %(bid) batch = add_batch_error(message,batch) batch.save() valid = False continue # We prepare shared metadata for one item batch_ids.shared['IMAGE_COUNT'] = len(images) batch.logs['IMAGE_COUNT'] = len(images) batch_ids.save() batch.save() if valid is True: metadata = deepcopy(batch_ids.shared) metadata['DicomHeader'] = json.dumps(metadata) metadata = { compressed_file: metadata } bot.log("Uploading %s with %s images to Google Storage %s" %(os.path.basename(compressed_file), len(images), GOOGLE_CLOUD_STORAGE)) # We only expect to have one entity per batch kwargs = {"items":[compressed_file], "table":table, "study": SOM_STUDY, "metadata": metadata, "batch": False} # upload in batches at END # Batch metadata upload_dataset(client=client, k=kwargs) # Clean up compressed file if os.path.exists(compressed_file): os.remove(compressed_file) # Finish and record time elapsed change_status(batch,"DONE") batch.qa['UploadFinishTime'] = time.time() total_time = batch.qa['UploadFinishTime'] - batch.qa['UploadStartTime'] bot.info("Total time for %s: %s images is %f min" %(batch.uid, batch.image_set.count(), total_time/60)) batch.qa['ElapsedTime'] = total_time batch.save() # After image upload, metadata can be uploaded on one batch # If this isn't optimal, change "batch" in kwargs to False return client.batch.runInsert(table)
def upload_storage(batch_ids=None): '''upload storage will as a batch, send all batches with DONEPROCESSING status to google cloud storage. ''' from sendit.settings import (GOOGLE_CLOUD_STORAGE, SEND_TO_GOOGLE, GOOGLE_PROJECT_NAME, GOOGLE_PROJECT_ID_HEADER, GOOGLE_STORAGE_COLLECTION) if batch_ids is None: batches = Batch.objects.filter(status="DONEPROCESSING") else: batches = Batch.objects.filter(status="DONEPROCESSING", id__in=batch_ids) # All variables must be defined for sending! if GOOGLE_CLOUD_STORAGE in [None, ""]: SEND_TO_GOOGLE = False if GOOGLE_PROJECT_NAME in [None, ""]: SEND_TO_GOOGLE = False if GOOGLE_STORAGE_COLLECTION in [None, ""]: SEND_TO_GOOGLE = False if SEND_TO_GOOGLE is True: from deid.identifiers import get_timestamp # I'm not sure we need this #if GOOGLE_PROJECT_ID_HEADER is not None: # client.headers["x-goog-project-id"] = GOOGLE_PROJECT_ID_HEADER try: client = get_client(bucket_name=GOOGLE_CLOUD_STORAGE, project_name=GOOGLE_PROJECT_NAME) # Client is unreachable, usually network is being stressed except: #OSError and ServiceUnavailable bot.error("Cannot connect to client.") return collection = client.create_collection(uid=GOOGLE_STORAGE_COLLECTION) for batch in batches: valid = True batch_ids = BatchIdentifiers.objects.get(batch=batch) # Retrieve only images that aren't in PHI folder images = batch.get_finished() # Stop if no images pass filters if len(images) == 0: change_status(batch, "EMPTY") message = "batch %s has no images for processing, stopping upload" % ( batch.id) batch = add_batch_warning(message, batch) batch.save() continue # IR0001fa6_20160525_IR661B54.tar.gz # (coded MRN?)_jittereddate_studycode required_fields = ['AccessionNumber', 'PatientID'] for required_field in required_fields: if required_field not in batch_ids.shared: change_status(batch, "ERROR") message = "batch ids %s do not have shared PatientID or AccessionNumber, stopping upload" % ( bid) batch = add_batch_warning(message, batch) batch.save() valid = False if valid is False: continue studycode = batch_ids.shared['AccessionNumber'] coded_mrn = batch_ids.shared['PatientID'] timestamp = get_timestamp(batch_ids.shared['StudyDate'], format="%Y%m%d") compressed_filename = "%s/%s_%s_%s.tar.gz" % ( batch.get_path(), coded_mrn, timestamp, studycode) compressed_file = generate_compressed_file( files=images, # mode="w:gz" filename=compressed_filename) # File will be None if no files added if compressed_file is None: change_status(batch, "ERROR") message = "batch %s problem compressing file, stopping upload" % ( bid) batch = add_batch_error(message, batch) batch.save() valid = False continue # We prepare shared metadata for one item batch_ids.shared['IMAGE_COUNT'] = len(images) batch.logs['IMAGE_COUNT'] = len(images) batch_ids.save() batch.save() if valid is True: items_metadata = batch_ids.shared items = {compressed_file: items_metadata} cleaned = deepcopy(batch_ids.cleaned) metadata = prepare_entity_metadata(cleaned_ids=cleaned) bot.log("Uploading %s with %s images to Google Storage %s" % (os.path.basename(compressed_file), len(images), GOOGLE_CLOUD_STORAGE)) # We only expect to have one entity per batch uid = list(metadata.keys())[0] kwargs = { "images": [compressed_file], "collection": collection, "uid": uid, "entity_metadata": metadata[uid], "images_metadata": items } # Batch metadata upload_dataset(client=client, k=kwargs) # Clean up compressed file if os.path.exists(compressed_file): os.remove(compressed_file) # Finish and record time elapsed change_status(batch, "DONE") batch.qa['FinishTime'] = time.time() total_time = batch.qa['FinishTime'] - batch.qa['StartTime'] bot.info("Total time for %s: %s images is %f min" % (batch.uid, batch.image_set.count(), total_time / 60)) batch.qa['ElapsedTime'] = total_time batch.save()