Example #1
0
def generate_compressed_file(files,
                             filename=None,
                             mode="w:gz",
                             archive_basename=None):
    ''' generate a tar.gz file (default) including a set of files '''
    if filename is None:
        filename = "%s.tar.gz" % str(uuid.uuid4())
    bot.debug("Compressing %s files into %s" % (len(files), filename))
    tar = tarfile.open(filename, mode)
    if archive_basename is None:
        archive_basename = os.path.basename(filename).split('.')[0]
    images_added = 0
    for name in files:
        try:
            # Make the archive flat with the images
            basename = "%s/%s" % (archive_basename, os.path.basename(name))
            tar.add(name, arcname=basename)
            images_added += 1
        except FileNotFoundError:
            pass

    tar.close()
    if images_added == 0:
        filename = None
    return filename
Example #2
0
def get_notifier():
    '''get notifier will return a basic pyinotify watch manager
    based on the user's inotify watch paths in settings.
    if there is an error, returns None.
    '''

    try:
        import pyinotify
    except ImportError as e:
        bot.error("pyinotify is not installed.")
        return None

    level = get_level()
    wm = pyinotify.WatchManager()
    for path, mask, processor_cls in settings.INOTIFIER_WATCH_PATHS:
        cls_path = '.'.join(processor_cls.split('.')[0:-1])
        cls = processor_cls.split('.')[-1]
        mod = __import__(cls_path, globals(), locals(), [cls], level)
        Processor = getattr(mod, cls)
        wm.add_watch(path, mask, proc_fun=Processor())
        bot.debug("Adding watch on %s, processed by %s" %
                  (path, processor_cls))

    notifier = pyinotify.Notifier(wm)
    return notifier
Example #3
0
def watcher_message(message, request=None):
    '''if request is defined, a message is added.```
    '''
    if request is not None:
        messages.info(request, message)
    else:
        bot.debug(message)
Example #4
0
def get_pid_file(quiet=False):
    '''get_pid_file will return a path to write the pid file,
    based on the configuration (user settings)
    '''
    try:
        pid_file = os.path.join(settings.BASE_DIR, 'watcher.pid')
    except AttributeError:
        pid_file = os.path.join("/tmp", "watcher.pid")
    if not quiet:
        if os.path.exists(pid_file):
            bot.debug("pid file is at %s" % (pid_file))
        else:
            bot.debug("pid file set to %s" % (pid_file))

    return pid_file
Example #5
0
    def handle(self, *args, **options):

        nbids = len(options['bid'])
        if nbids > 0:
            bot.debug("Inspecting for errors for %s batch ids" % nbids)
            batches = Batch.objects.filter(id__in=options['bid'],
                                           has_error=True)
        else:
            batches = Batch.objects.filter(has_error=True)

        if len(batches) == 0:
            bot.info("There are no batches with error.")
            sys.exit(1)

        for batch in batches:
            bot.info("\n# %s" % batch.uid)
            errors = batch.logs['errors']
            for error in errors:
                bot.info(error)
Example #6
0
def import_dicomdir(dicom_dir, run_get_identifiers=True):
    '''import dicom directory manages importing a valid dicom set into 
    the application, and is a celery job triggered by the watcher. 
    Here we also flag (and disclude) images that have a header value 
    that indicates pixel identifiers.
    '''
    start_time = time.time()

    if os.path.exists(dicom_dir):
        try:
            dicom_files = ls_fullpath(dicom_dir)
        except NotADirectoryError:
            bot.error('%s is not a directory, skipping.' % dicom_dir)
            return

        bot.debug("Importing %s, found %s .dcm files" %
                  (dicom_dir, len(dicom_files)))

        # The batch --> the folder with a set of dicoms tied to one request
        dcm_folder = os.path.basename(dicom_dir)
        batch, created = Batch.objects.get_or_create(uid=dcm_folder)
        batch.logs['STARTING_IMAGE_COUNT'] = len(dicom_files)

        # Data quality check: keep a record of study dates
        study_dates = dict()
        size_bytes = sum(os.path.getsize(f) for f in dicom_files)
        messages = []  # print all unique messages / warnings at end

        # Add in each dicom file to the series
        for dcm_file in dicom_files:
            try:

                # The dicom folder will be named based on the accession#
                dcm = read_file(dcm_file, force=True)
                dicom_uid = os.path.basename(dcm_file)

                # Keep track of studyDate
                study_date = dcm.get('StudyDate')
                if study_date not in study_dates:
                    study_dates[study_date] = 0
                study_dates[study_date] += 1
                flag, flag_group, reason = has_burned_pixels(
                    dicom_file=dcm_file, quiet=True, deid=STUDY_DEID)

                # If the image is flagged, we don't include and move on
                continue_processing = True
                if flag is True:
                    if flag_group not in ["whitelist"]:
                        continue_processing = False
                        message = "%s is flagged in %s: %s, skipping" % (
                            dicom_uid, flag_group, reason)

                        batch = add_batch_warning(message, batch, quiet=True)
                        message = "BurnedInAnnotation found for batch %s" % batch.uid
                        if message not in messages:
                            messages.append(message)

                if continue_processing is True:
                    # Create the Image object in the database
                    # A dicom instance number must be unique for its batch
                    dicom = Image.objects.create(batch=batch, uid=dicom_uid)

                    # Save the dicom file to storage
                    # basename = "%s/%s" %(batch.id,os.path.basename(dcm_file))
                    dicom = save_image_dicom(dicom=dicom,
                                             dicom_file=dcm_file)  # Also saves

                    # Generate image name based on [SUID] added later
                    # accessionnumberSUID.seriesnumber.imagenumber,
                    name = "%s_%s.dcm" % (dcm.get('SeriesNumber'),
                                          dcm.get('InstanceNumber'))
                    dicom.name = name
                    dicom.save()
                    # Only remove files successfully imported
                    #os.remove(dcm_file)

            # Note that on error we don't remove files
            except InvalidDicomError:
                message = "InvalidDicomError: %s skipping." % (dcm_file)
                batch = add_batch_error(message, batch)
            except KeyError:
                message = "KeyError: %s is possibly invalid, skipping." % (
                    dcm_file)
                batch = add_batch_error(message, batch)
            except Exception as e:
                message = "Exception: %s, for %s, skipping." % (e, dcm_file)

        # Print summary messages all at once
        for message in messages:
            bot.warning(message)

        if len(study_dates) > 1:
            message = "% study dates found for %s" % (len(study_dates),
                                                      dcm_file)
            batch = add_batch_error(message, batch)

        # Save batch thus far
        batch.qa['StudyDate'] = study_dates
        batch.qa['StartTime'] = start_time
        batch.qa['SizeBytes'] = size_bytes
        batch.save()

        # If there were no errors on import, we should remove the directory
        #if not batch.has_error:

        # Should only be called given no error, and should trigger error if not empty
        #os.rmdir(dicom_dir)

        # At the end, submit the dicoms to be anonymized as a batch
        count = batch.image_set.count()
        if count > 0:
            if ANONYMIZE_PIXELS is True:
                bot.warning(
                    "Anonimization of pixels is not yet implemented. Images were skipped."
                )
                # When this is implemented, the function will be modified to add these images
                # to the batch, which will then be first sent through a function to
                # scrub pixels before header data is looked at.
                # scrub_pixels(bid=batch.id)
            #else:
            if run_get_identifiers is True:
                bot.debug("get_identifiers submit batch %s with %s dicoms." %
                          (batch.uid, count))
                return get_identifiers(bid=batch.id)
            else:
                bot.debug("Finished batch %s with %s dicoms" %
                          (batch.uid, count))
                return batch
        else:
            # No images for further processing
            batch.status = "EMPTY"
            batch.qa['FinishTime'] = time.time()
            message = "%s is flagged EMPTY, no images pass filter" % (batch.id)
            batch = add_batch_warning(message, batch)
            batch.save()
            return

    else:
        bot.warning('Cannot find %s' % dicom_dir)
Example #7
0
def get_identifiers(bid, study=None, run_replace_identifiers=True):
    '''get identifiers is the celery task to get identifiers for 
    all images in a batch. A batch is a set of dicom files that may include
    more than one series/study. This is done by way of sending one restful call
    to the DASHER endpoint. If ANONYMIZE_RESTFUL is False
    under settings, this function doesn't run
    '''
    batch = Batch.objects.get(id=bid)

    if study is None:
        study = SOM_STUDY

    if ANONYMIZE_RESTFUL is True:

        images = batch.image_set.all()

        # Process all dicoms at once, one call to the API
        dicom_files = batch.get_image_paths()
        batch.change_images_status('PROCESSING')
        batch.save()  # redundant

        try:
            ids = get_ids(dicom_files=dicom_files, expand_sequences=False
                          )  # we are uploading a zip, doesn't make sense
            # to preserve image level metadata
        except FileNotFoundError:
            batch.status = "ERROR"
            message = "batch %s is missing dicom files and should be reprocessed" % (
                batch.id)
            batch = add_batch_warning(message, batch)
            batch.save()

        # Prepare identifiers with only minimal required
        # This function expects many items for one entity, returns
        # request['identifiers'] --> [ entity-with-study-item ]
        request = prepare_identifiers_request(ids)  # force: True

        bot.debug("som.client making request to anonymize batch %s" % (bid))

        # Run with retrying, in case issue with token refresh
        result = None
        try:
            result = run_client(study, request)
        except:
            # But any error, don't continue, don't launch new job
            message = "error with client, stopping job."
            batch = add_batch_error(message, batch)
            batch.status = "ERROR"
            batch.qa['FinishTime'] = time.time()
            batch.save()

        # Create a batch for all results
        if result is not None:
            if "results" in result:
                batch_ids, created = BatchIdentifiers.objects.get_or_create(
                    batch=batch)
                batch_ids.response = result['results']
                batch_ids.ids = ids
                batch_ids.save()
                batch.qa['DasherFinishTime'] = time.time()
                if run_replace_identifiers is True:
                    return replace_identifiers(bid=bid)
                else:
                    return batch_ids
            else:
                message = "'results' field not found in response: %s" % result
                batch = add_batch_error(message, batch)

    else:
        bot.debug(
            "Restful de-identification skipped [ANONYMIZE_RESTFUL is False]")
        change_status(batch, "DONEPROCESSING")
        change_status(batch.image_set.all(), "DONEPROCESSING")