Esempio n. 1
0
    def inspect(self, texts, include_quote=True, min_level=None):
        '''inspect will inspect a dump of text for identifiers
        :param include_quote: include quotes in the query?
        :param min_level: the minimum likilihood level to return
        '''
        if not isinstance(texts, list):
            texts = [texts]

        if min_level is None:
            min_level = 'LIKELIHOOD_UNSPECIFIED'

        config = {
            'includeQuote': include_quote,
            'infoTypes': [],
            'maxFindings': 0,
            'minLikelihood': min_level
        }

        items = []
        for text in texts:
            new_item = {'type': 'text/plain', 'value': text}
            items.append(new_item)

        groups = paginate_items(items, size=100)

        results = []
        for idx in range(len(groups)):
            bot.debug("inspecting for %s of %s" % (idx + 1, len(groups)))
            items = groups[idx]
            body = {'inspectConfig': config, 'items': items}
            result = self.service.content().inspect(body=body).execute()
            results = results + result['results']
        return results
Esempio n. 2
0
def validate_metadata(full_path, metadata_type=None):
    '''validate_metadata checks to see if a name (either a collection
    name, folder for an image or text) has associated metadata, indicated by
    a file of the same name (ending with json) in the parent directory of the
    named file. If no matching files are found, None is returned, and the user
    is alerted. If a matching file is found, it is checked to be valid json.
    :param full_path: full path to a file or folder
    :param metadata_type: either one of collection, image, or text. Default collection
    '''
    if metadata_type == None:
        metadata_type = "collection"

    parent_dir = os.path.dirname(full_path)
    base_name = os.path.basename(full_path).split('.')[0]
    metadata = "%s/%s.json" % (parent_dir, base_name)

    if os.path.exists(metadata):
        bot.debug('found %s metadata: %s' % (metadata_type, base_name))
        try:
            md = read_json(metadata)
            bot.info('%s %s metadata is valid' % (metadata_type, base_name))
        except:
            bot.error('%s %s has invalid json metadata %s' %
                      (metadata_type, base_name, metadata))
            return False

    else:
        bot.info('%s %s does not have metadata file %s.json' %
                 (metadata_type, base_name, base_name))
        return None

    return True
Esempio n. 3
0
 def delete(self):
     '''delete an entity'''
     key = None
     if self._Entity is not None:
         key = self._Entity.key
         self.client.delete(key)
         bot.debug("Deleting %s" % (key))
     return key
Esempio n. 4
0
 def get(self, url, headers=None, token=None, data=None, return_json=True):
     '''get will use requests to get a particular url
     '''
     bot.debug("GET %s" % url)
     return self.call(url,
                      func=requests.get,
                      data=data,
                      return_json=return_json)
Esempio n. 5
0
 def post(self, url, data=None, return_json=True):
     '''post will use requests to get a particular url
     '''
     bot.debug("POST %s" % url)
     return self.call(url,
                      func=requests.post,
                      data=data,
                      return_json=return_json)
Esempio n. 6
0
 def put(self, url, data=None, return_json=True):
     '''put request
     '''
     bot.debug("PUT %s" % url)
     return self.call(url,
                      func=requests.put,
                      data=data,
                      return_json=return_json)
Esempio n. 7
0
def get_structures(inputs, build_dir=None, clean_up=True, fail_exit=True):
    '''get structures will parse one or more compressed files and/ or folder paths
    and return a data structure that has full file paths for images/text documents,
    and the loaded json for metadata.
    :param inputs: a single, or list of inputs, meaning folders and compressed files 
    for validation
    :param build_dir: a directory to use to extract and run things. If not specified,
    one is created.
    :param clean_up: boolean to determine if test_dir and subcontents should be
    removed after tests. Default is True.
    :param fail_exit: Given failure of validation, fail the process. Otherwise, return
    False to the calling function. Default fail_exit is True
    '''
    if not isinstance(inputs, list):
        inputs = [inputs]

    bot.debug("Found %s inputs to structure using som-validator." %
              len(inputs))

    # Where are we testing?
    if build_dir == None:
        build_dir = tempfile.mkdtemp()

    # We will return a list of structures, each a collection
    structures = dict()

    # Tell the user about testing folder
    message = "Building folder will be %s"
    if clean_up == True:
        message = "%s, and will be removed upon completion." % (message)
    bot.debug(message % build_dir)

    for testing in inputs:
        valid = validate_dataset(dataset=testing,
                                 testing_base=build_dir,
                                 clean_up=clean_up)

        # We only structure input that is valid
        if valid == False:
            if fail_exit == True:
                bot.error("Input %s is not valid, please fix. Exiting." %
                          testing)
                sys.exit(1)
            bot.error("Input %s is not valid, skipping.")
        else:
            structures[dataset] = structure_dataset(dataset=testing,
                                                    testing_base=build_dir,
                                                    clean_up=clean_up)

    return structures
Esempio n. 8
0
 def update_fields(self, new_fields, add_new=True):
     '''update fields will update the model's fields with an input dictionary
     new fields are added if add_new=True (default). This does not by default save
     the entity.
     '''
     for key, value in new_fields.items():
         if key not in self._Entity.keys():
             bot.debug("adding %s to Entity" % (key))
             self._Entity[key] = value
         else:
             if add_new == True:
                 bot.debug("%s found existing in Entity, overwriting" %
                           (key))
                 self._Entity[key] = value
Esempio n. 9
0
def run_validation(inputs, test_dir=None, clean_up=True, fail_exit=True):
    '''run validation will run one or more inputs through the validation procedure,
    meaning checking that the folder (and other data structures) fit the WordFish
    standard: http://www.github.com/radinformatics/wordfish-standard
    :param inputs: a single, or list of inputs, meaning folders and compressed files 
    for validation
    :param test_dir: a directory to use to extract and run things. If not specified,
    one is created.
    :param clean_up: boolean to determine if test_dir and subcontents should be
    removed after tests. Default is True.
    :param fail_exit: Given failure of validation, fail the process. Otherwise, return
    False to the calling function. Default fail_exit is True
    '''
    if not isinstance(inputs, list):
        inputs = [inputs]

    bot.debug("Found %s inputs to test using som-validator." % len(inputs))

    # Where are we testing?
    if test_dir == None:
        test_dir = tempfile.mkdtemp()

    # Tell the user about testing folder
    message = "Testing folder will be %s"
    if clean_up == True:
        message = "%s, and will be removed upon completion." % (message)
    bot.debug(message % test_dir)

    for testing in inputs:
        if os.path.isdir(testing):
            valid = validate_folder(folder=testing)
        elif re.search("[.]zip$|[.]tar[.]gz$", testing):
            valid = validate_compressed(compressed_file=testing,
                                        testing_base=test_dir,
                                        clean_up=clean_up)

        # Always exit or return False if input is not valid
        if valid == False:
            if fail_exit == True:
                bot.error(
                    "Input %s is not valid, please fix and retest. Exiting." %
                    testing)
                sys.exit(1)
            bot.error(
                "Input %s is not valid, please fix and retest. Returning False."
                % testing)
            return valid

    return valid
Esempio n. 10
0
    def deidentify(self, ids, study=None):
        '''deidentify: uid endpoint: 
         https://api.rit.stanford.edu/identifiers/api/v1/uid/{study} 
         will take a list of identifiers, and return the deidentified.        
        :param ids: a list of identifiers
        :param study: if None, defaults to test.
        '''
        # Testing overrides all other specifications
        if study is None:
            study = self.study
        study = study.lower()
        bot.debug("study: %s" % study)

        url = "%s/%s/uid/%s" % (self.base, self.version, study)

        return self.post(url=url, data=ids)
Esempio n. 11
0
def validate_item(item,sources=None,verbose=True):
    '''validate_item will validate a single item objects, intended to go in as a field to
    a POST. For more than one item, use validate_items wrapper
    :param item: the item object. Must include the following:
    :param sources: a list of valid item sources (eg ["pacs"])
    :param verbose: if True, prints out valid True messages. False (errors) always printed

    :: notes

    {
        # generic attribute/values just to store, not interpreted
        "id":"123123123123123", // mandatory

        # the issuer for the above id
        # mandatory, or maybe optional with default of "stanford" or "unspecified"
        "id_source":"pacs",

        # when the id was looked up, to help with changed/merged ids
        # optional with default of current timestamp?
        "id_timestamp":"2016-01-30T17:15:23.123Z",

        # optional key/value attributes, will be stored as-is if provided, but not used or interpreted
        # values will be updated/replaced if sent multiple times (last received wins)
        # any existing values will be preserved if not explicitly set here; set empty string to remove
        "custom_fields":{
          "image_type":"x-ray",
          "resolution":"high"
    }
    '''
    if sources == None:
        sources = item_sources

    # These are the rules for an item
    rules = {
        "id": [Required, Pattern("^[A-Za-z0-9_-]*$")], # pattern
        "id_source": [Required, In(sources)],      # must be in item sources
        "id_timestamp": [Required,Pattern(timestamp)], 
    }

    valid,message = validate(rules, item)
    if verbose == True:
        bot.debug("identifier %s data structure valid: %s" %(item['id'],valid))
    if valid == False:
        bot.error(message)
        if verbose == True:
            print(item)
    return valid
Esempio n. 12
0
 def upload_text(self,
                 text,
                 entity,
                 batch=True,
                 fields=None,
                 permission=None,
                 mimetype=None):
     '''upload_text will add a text object to the batch manager'''
     new_object = self.upload_object(file_path=text,
                                     entity=entity,
                                     fields=fields,
                                     mimetype=mimetype,
                                     permission=permission,
                                     object_type="Text",
                                     batch=batch)
     bot.debug('TEXT: %s' % new_object)
     return new_object
Esempio n. 13
0
def find_dicoms(folder, extension=None):
    '''find_dicoms will walk a directory structure and find dicoms in subfolders
    :param folder: the parent folder to look in
    :param extension: the extension to use. Default is .dcm
    '''
    folders = dict()

    if extension == None:
        extension = ".dcm"

    for dirpath, dirnames, filenames in os.walk(folder):
        dicoms = []
        for filename in [f for f in filenames if f.endswith(extension)]:
            dicoms.append(os.path.join(dirpath, filename))
        if len(dicoms) > 0:
            folders[dirpath] = dicoms
    bot.debug('Found %s directories with dicom.' % len(folders))
    return folders
Esempio n. 14
0
 def upload_image(self,
                  image,
                  entity,
                  batch=True,
                  fields=None,
                  permission=None,
                  mimetype=None):
     '''upload_images will add an image object to the batch manager
     '''
     new_object = self.upload_object(file_path=image,
                                     entity=entity,
                                     fields=fields,
                                     mimetype=mimetype,
                                     permission=permission,
                                     object_type="Image",
                                     batch=batch)
     bot.debug('IMAGE: %s' % new_object)
     return new_object
Esempio n. 15
0
    def update_headers(self, fields=None):
        '''get_headers will return a simple default header for a json
        post. This function will be adopted as needed.
        '''
        if self.headers is None:
            headers = self._init_headers()
        else:
            headers = self.headers

        if self.token is not None:
            headers["Authorization"] = "Bearer %s" % (self.token)

        if fields is not None:
            for key, value in fields.items():
                headers[key] = value

        header_names = ",".join(list(headers.keys()))
        bot.debug("Headers found: %s" % header_names)
        self.headers = headers
Esempio n. 16
0
def detect_compressed(folder,compressed_types=None):
    '''detect compressed will return a list of files in 
    some folder that are compressed, by default this means
    .zip or .tar.gz, but the called can specify a custom list
    :param folder: the folder base to use.
    :param compressed_types: a list of types to include, should
    be extensions in format like *.tar.gz, *.zip, etc.
    '''
    compressed = []
    if compressed_types == None:
        compressed_types = ["*.tar.gz",'*zip']
    bot.debug("Searching for %s" %", ".join(compressed_types))

    for filey in os.listdir(folder):
        for compressed_type in compressed_types:
            if fnmatch.fnmatch(filey, compressed_type):
                compressed.append("%s/%s" %(folder,filey))
    bot.debug("Found %s compressed files in %s" %len(compressed),folder)
    return compressed
Esempio n. 17
0
def add_tag(dicom,name,value):
    '''add tag will add a tag only if it's in the (active) DicomDictionary
    :param dicom: the pydicom.dataset Dataset (pydicom.read_file)
    :param name: the name of the field to add
    :param value: the value to set, if name is a valid tag
    '''
    dicom_file = os.path.basename(dicom.filename)
    tag = get_tag(name)

    if name in tag:
        dicom.add_new(tag['tag'], tag['VR'], value) 
 
        # dicom.data_element("PatientIdentityRemoved")
        # (0012, 0062) Patient Identity Removed            CS: 'Yes'

        bot.debug("ADDITION %s to %s." %(dicom.data_element(name),dicom_file))
    else:
        bot.error("%s is not a valid field to add. Skipping." %(name))

    return dicom
Esempio n. 18
0
def read_series(dicoms, return_nifti=True):
    '''read_series will read in a series of dicoms belonging to a group
    :param dicoms: a list of dicom files to parse, assumed in same series and
    equal size
    :param return_nifti: If True (default) will turn image as Nifti File
    '''
    # Sort the dicoms
    dicoms.sort()

    # Get the size of the image
    params = sniff_header(dicoms[0])
    xdim = params['xdim']
    ydim = params['ydim']
    windom_center = params['window_center']

    bot.debug("First dicom found with dimension %s by %s, using as standard." %
              (xdim, ydim))

    # Let's get ordering of images based on InstanceNumber
    ordered = dict()
    for d in range(len(dicoms)):
        ds = dicom.read_file(dicoms[d])
        if ds.Rows == xdim and ds.Columns == ydim:
            ordered[int(ds.InstanceNumber)] = ds.pixel_array

    # Sort by order
    zdim = len(ordered)
    data = numpy.ndarray((xdim, ydim, zdim))

    # Start at window center, then go back to zero
    index = 0
    for key in sorted(ordered.keys()):
        data[:, :, index] = ordered[key]
        index += 1

    if return_nifti == True:
        affine = numpy.diag((1, 1, 1, 0))
        data = nibabel.Nifti1Image(data, affine=affine)
    return data
Esempio n. 19
0
def validate_identifiers(identifiers,id_sources=None,item_sources=None,verbose=True):
    '''validate_identifiers will validate one or more identifier objects, 
    intended to go in as a field to a POST
    :param identifiers: the identifiers object.
    :param verbose: verbose output for items
    :param identifier_sources: a list of one or more identifier sources. 
    :param item_sources: a list of one or more item sources
    If either not defined, default to use standards

    :: notes
       {
        # mandatory key for uniquely identifying the person
        "id":"1234567-8",
       
        # the issuer for the above id
        # mandatory, or maybe optional with default of "stanford" or "unspecified"
        
        "id_source":"stanford",
        # when the id was looked up, to help with changed/merged ids
        # optional with default of current timestamp?
        
        "id_timestamp":"2016-01-30T17:15:23.123Z",
        # optional key/value attributes, will be stored as-is if provided, but not used or interpreted
        # values will be updated/replaced if sent multiple times (last received wins)
        # any existing values will be preserved if not explicitly set here; set empty string to remove
        
        "custom_fields":{
          "first_name":"Joe",
          "last_name":"Smith",
          "dob":"1970-02-28"
        }
    '''
    if id_sources == None:
        id_sources = identifier_sources

    # These are the rules for a person
    rules = {
        "id": [Required, Pattern("^[A-Za-z0-9_-]*$")], # pattern
        "id_source": [Required, In(id_sources)],   # must be in person sources
        "id_timestamp": [Required,Pattern(timestamp)], 
    }

    if not isinstance(identifiers,dict):
        bot.error("Identifiers data structure must be dictionary.")
        return False

    if "identifiers" not in identifiers:
        bot.error("identifiers key not found in data structure.")

    items = identifiers['identifiers']

    if not isinstance(items,list):
        bot.error("Items in identifiers data structure must be list.")
        return False  

    for item in items:

        valid,message = validate(rules, item)
        if valid == False:
            bot.error(message)
            return valid

        if "items" in item:
            validate_items(item['items'],sources=item_sources)

    bot.debug("Identifiers data structure valid: %s" %valid)
    return valid
Esempio n. 20
0
 def set_table(self,table, clear_rows=True):
     self.table = table
     if clear_rows is True:
         bot.debug("Clearing previously added rows. Set clear_rows to False to prevent this.")
         self.rows = []
     return self.rows 
Esempio n. 21
0
def receive_identifiers(response):
    '''receive identifiers will validate reception of an identifiers response.
    This should be a list
    :param response: the response list of identifiers

    :: notes
     successful response:

     HTTP 200

    [
       {'jittered_timestamp': '2016-01-30T17:15:23.123Z', 
        'id': '12345678', 
        'suid': '103e', 
        'id_source': 'Stanford MRN', 
        'custom_fields': [
             {'key': 'studySiteID', 'value': '78329'}], 
        'items': [
                   {
                    'id_source': 'GE PACS', 
                    'jittered_timestamp': '2016-01-15T17:15:23.123Z', 
                    'id': 'A654321', 
                    'suid': '103e'}
                   ]}

    ]

    '''
    # These fields are expected, but not required. We will error
    # if any fields are present outside this scope
    expected_fields = [
        'items', 'id_source', 'jittered_timestamp', 'suid', 'id',
        'custom_fields'
    ]

    if not isinstance(response, list):
        bot.error("Response must be a list")
        return False

    # These are the rules for each uidEntity
    rules = {
        "id": [Required, Pattern("^[A-Za-z0-9_-]*$")],  # pattern
        "suid": [Required, Pattern("^[A-Za-z0-9_-]*$")],  # the suid
        "id_source": [Required,
                      In(identifier_sources)],  # must be in identifer sources
        "jittered_timestamp": [Required, Pattern(timestamp)]
    }

    for item in response:

        # Validate required fields
        valid, message = validate(rules, item)
        if valid == False:
            bot.error(message)
            return valid

        # Validate fields returned in response
        if not validate_fields(expected_fields, item.keys()):
            return False

        # Validate items
        if "items" in item:
            if not receive_items(item['items']):
                return False

    bot.debug("Identifiers data structure valid: %s" % valid)
    return valid
Esempio n. 22
0
def progress_download(collection_name,
                      output_folder,
                      suid,
                      project,
                      bucket_name,
                      query_entity=True,
                      filters=None):

    '''
    show progress while downloading images for a Collection/[c]/Entity/study 
    
    Parameters
    ==========

    collection_name: the name of the collection, typically an IRB number
    output_folder: the base directory to create a study folder in
    project: Google Cloud project name
    suid: an suid of interest to query (eg, if querying an Entity, you would
          use the suid of the patient, an Image would be an suid of the
          study SUID --> (coded accession#)
    query_entity: by default, we query the entity first, and then get images.
                  to query the images (studies) set this to False.
    bucket_name: the name for the Google Storage Bucket (usually provided)
    filters: a list of tuples to apply to filter the query. Default is:

         [ ("entity_id","=", study) ]

    to retrieve all Image items that are equal to the study name

    Returns
    =======
    path to newly created image file

    '''

    if filters is None:
        if query_entity is True:
            filters = [ ("uid","=", suid) ]
        else:
            filters = [ ("AccessionNumber","=", suid) ]

    bot.info("Collecting available images...")

    try:
        storage_client = storage.Client()

    except DefaultCredentialsError:
        bot.error("We didn't detect your GOOGLE_APPLICATION_CREDENTIALS in the environment! Did you export the path?")
        sys.exit(1)
    except Forbidden:
        bot.error("The service account specified by GOOGLE_APPLICATION_CREDENTIALS does not have permission to use this resource.")
        sys.exit(1)

    if not os.path.exists(output_folder):
        os.mkdir(output_folder)

    bucket = storage_client.get_bucket(bucket_name)

    # Retrieve bucket, datastore client, images
    requester = RetryRequester(bucket_name=bucket_name,
                               project=project)

    collection = requester.create_collection(collection_name)

    if query_entity is True:
        entity_set = requester.get_entity(filters)
        images = []
        for entity in entity_set:
            entity_images = requester.client.get_images(entity=entity)
            images = [x for x in entity_images if x not in images]
    else:
        images = requester.get_images(filters)
    
    bot.info("Found %s images for suid %s in collection %s" %(len(images),
                                                             suid,
                                                             collection_name))
    
    progress = 0
    total = len(images)

    files = []
    if len(images) > 0:
        bot.debug("Saving images and metadata...")
        for image in images:

            # Download image
            file_name = prepare_folders(output_folder=output_folder,
                                        image_name=image.key.name)
            
            blob = bucket.blob(image['storage_name'])
            bot.show_progress(progress, total, length=35)
            requester.download(blob,file_name)
            files.append(file_name)
            files.append(save_metadata(image,file_name))
            progress+=1
            bot.show_progress(progress,total,length=35)

        # Newline to finish
        sys.stdout.write('\n')

    return files