Ejemplo n.º 1
0
def main(argv):

    import argparse
    parser = argparse.ArgumentParser(prog='fs_to_nidm.py',
                                     description='''This program will load in a aseg.stats file from Freesurfer
                                        , augment the Freesurfer anatomical region designations with common data element
                                        anatomical designations, and save the statistics + region designations out as
                                        NIDM serializations (i.e. TURTLE, JSON-LD RDF))''')
    parser.add_argument('-s', '--subject_dir', dest='subject_dir', type=str, required=True,
                        help='Path to Freesurfer subject directory')
    parser.add_argument('-j','--json_map', dest='json_file',type=str, required=True,
                        help='JSON mapping file which maps Freesurfer aseg anatomy terms to commond data elements')
    parser.add_argument('-o', '--output_dir', dest='output_file', type=str,
                        help='Output directory')
    parser.add_argument('--n','--nidm', dest='nidm_file', type=str, required=False,
                        help='Optional NIDM file to add segmentation data to.')

    args = parser.parse_args()


    [header, tableinfo, measures] = read_stats(os.path.join(args.subject_dir,"stats","aseg.stats"))

    #for measures we need to create NIDM structures using anatomy mappings
    #If user has added an existing NIDM file as a command line parameter then add to existing file for subjects who exist in the NIDM file
    if args.nidm_file is None:

        print("Creating NIDM file...")
        #If user did not choose to add this data to an existing NIDM file then create a new one for the CSV data

        #create an empty NIDM graph
        nidmdoc = Core()
        root_act = nidmdoc.graph.activity(QualifiedName(provNamespace("nidm",Constants.NIDM),getUUID()),other_attributes={Constants.NIDM_PROJECT_DESCRIPTION:"Freesurfer segmentation statistics"})

        #this function sucks...more thought needed for version that works with adding to existing NIDM file versus creating a new NIDM file....
        add_seg_data(nidmdoc=nidmdoc,measure=measures,header=header, tableinfo=tableinfo, json_map=args.json_file)



        #serialize NIDM file
        with open(args.output_file,'w') as f:
            print("Writing NIDM file...")
            f.write(nidmdoc.serializeJSONLD())
            nidmdoc.save_DotGraph(str(args.output_file + ".pdf"), format="pdf")
Ejemplo n.º 2
0
    10/3/17 Modified Namespace to be QualifiedName for provtoolbox support...left most of the NIDM-Results Namespaces the same
@author: Sanu Ann Abraham <*****@*****.**>
	05/04/2018 Added python ProvONE support
'''
import six
from rdflib import Namespace, Graph
from prov.model import ProvDocument, QualifiedName
from prov.model import Namespace as provNamespace
from prov.constants import PROV_ATTRIBUTE_QNAMES, PROV_ATTRIBUTE_LITERALS, \
 PROV_N_MAP

from collections import namedtuple
DD = namedtuple("DD", ["source", "variable"])

PROV = Namespace('http://www.w3.org/ns/prov#')
PROVONE = provNamespace(
    'provone', 'http://purl.dataone.org/provone/2015/01/15/ontology#')

NIDM_URL = 'http://purl.org/nidash/nidm#'
NIDM = Namespace(NIDM_URL)

NIIRI = Namespace('http://iri.nidash.org/')
AFNI = Namespace('http://purl.org/nidash/afni#')
SPM = Namespace('http://purl.org/nidash/spm#')
FSL = Namespace('http://purl.org/nidash/fsl#')
FREESURFER = Namespace('https://surfer.nmr.mgh.harvard.edu/')
ANTS = Namespace('http://stnava.github.io/ANTs/')
RDFS = Namespace('http://www.w3.org/2000/01/rdf-schema#')
CRYPTO = Namespace('http://id.loc.gov/vocabulary/preservation/\
cryptographicHashFunctions#')
DC = Namespace('http://purl.org/dc/elements/1.1/')
DCT = Namespace('http://purl.org/dc/terms/')
Ejemplo n.º 3
0
NIDM_COORDINATE = NIDM['NIDM_0000015']
NIDM_LEGENDRE_POLYNOMIAL_ORDER = NIDM['NIDM_0000014']
NIDM_CONTRAST_STANDARD_ERROR_MAP = NIDM['NIDM_0000013']
NIDM_CONNECTIVITY_CRITERION = NIDM['NIDM_0000012']
NIDM_CONJUNCTION_INFERENCE = NIDM['NIDM_0000011']
NIDM_HAS_FMRI_DESIGN = NIDM['NIDM_0000010']
NIDM_COLIN27_COORDINATE_SYSTEM = NIDM['NIDM_0000009']
NIDM_CLUSTER_LABELS_MAP = NIDM['NIDM_0000008']
NIDM_CLUSTER_DEFINITION_CRITERIA = NIDM['NIDM_0000007']
NIDM_CLUSTER = NIDM['NIDM_0000006']
NIDM_BINOMIAL_DISTRIBUTION = NIDM['NIDM_0000005']
NIDM_BINARY_MAP = NIDM['NIDM_0000004']
NIDM_CONTRAST_ESTIMATION = NIDM['NIDM_0000001']
NIDM_CONTRAST_MAP = NIDM['NIDM_0000002']
#NIDM-Experiment##############################################################
NIDM_PROJECT = QualifiedName(provNamespace("nidm", NIDM), 'Project')
#NIDM_PROJECT_TYPE = QualifiedName(provNamespace("dctypes", DCTYPES),"Dataset")
NIDM_PROJECT_IDENTIFIER = QualifiedName(provNamespace("sio", SIO),"Identifier")
NIDM_PROJECT_NAME = QualifiedName(provNamespace("dctypes", DCTYPES),"title")
NIDM_PROJECT_DESCRIPTION = QualifiedName(provNamespace("dct", DCT),"description")
NIDM_PROJECT_LICENSE = QualifiedName(provNamespace("dct", DCT),"license")
NIDM_PROJECT_URL = QualifiedName(provNamespace("sio", SIO),"URL")
NIDM_PROJECT_REFERENCES = QualifiedName(provNamespace("dcat", DCAT),"creator")
NIDM_AUTHOR = QualifiedName(provNamespace("ncit", DCAT),"author")
NIDM_SESSION = QualifiedName(provNamespace("nidm", NIDM), 'Session')
NIDM_ACQUISITION_ACTIVITY = QualifiedName(provNamespace("nidm", NIDM), "Acquisition")
NIDM_ACQUISITION_MODALITY = QualifiedName(provNamespace("nidm",NIDM),"AcquisitionModality")
NIDM_ASSESSMENT_ACQUISITION = QualifiedName(provNamespace("nidm", NIDM), "assessment-instrument")
NIDM_ACQUISITION_ENTITY = QualifiedName(provNamespace("nidm", NIDM), "AcquisitionObject")
NIDM_DEMOGRAPHICS_ENTITY = QualifiedName(provNamespace("nidm", NIDM), "DemographicsAssessment")
NIDM_ASSESSMENT_ENTITY = QualifiedName(provNamespace("nidm", NIDM), "assessment-instrument")
Ejemplo n.º 4
0
def bidsmri2project(directory, args):
    #Parse dataset_description.json file in BIDS directory
    if (os.path.isdir(os.path.join(directory))):
        try:
            with open(os.path.join(directory,
                                   'dataset_description.json')) as data_file:
                dataset = json.load(data_file)
        except OSError:
            logging.critical(
                "Cannot find dataset_description.json file which is required in the BIDS spec"
            )
            exit("-1")
    else:
        logging.critical("Error: BIDS directory %s does not exist!" %
                         os.path.join(directory))
        exit("-1")

    #create project / nidm-exp doc
    project = Project()

    #add various attributes if they exist in BIDS dataset
    for key in dataset:
        #if key from dataset_description file is mapped to term in BIDS_Constants.py then add to NIDM object
        if key in BIDS_Constants.dataset_description:
            if type(dataset[key]) is list:
                project.add_attributes({
                    BIDS_Constants.dataset_description[key]:
                    "".join(dataset[key])
                })
            else:
                project.add_attributes(
                    {BIDS_Constants.dataset_description[key]: dataset[key]})
        #add absolute location of BIDS directory on disk for later finding of files which are stored relatively in NIDM document
        project.add_attributes({Constants.PROV['Location']: directory})

    #get BIDS layout
    bids_layout = BIDSLayout(directory)

    #create empty dictinary for sessions where key is subject id and used later to link scans to same session as demographics
    session = {}
    participant = {}
    #Parse participants.tsv file in BIDS directory and create study and acquisition objects
    if os.path.isfile(os.path.join(directory, 'participants.tsv')):
        with open(os.path.join(directory, 'participants.tsv')) as csvfile:
            participants_data = csv.DictReader(csvfile, delimiter='\t')

            #logic to map variables to terms.#########################################################################################################

            #first iterate over variables in dataframe and check which ones are already mapped as BIDS constants and which are not.  For those that are not
            #we want to use the variable-term mapping functions to help the user do the mapping
            #iterate over columns
            mapping_list = []
            column_to_terms = {}
            for field in participants_data.fieldnames:

                #column is not in BIDS_Constants
                if not (field in BIDS_Constants.participants):
                    #add column to list for column_to_terms mapping
                    mapping_list.append(field)

            #do variable-term mappings
            if ((args.json_map != False) or (args.key != None)):

                #if user didn't supply a json mapping file but we're doing some variable-term mapping create an empty one for column_to_terms to use
                if args.json_map == False:
                    #defaults to participants.json because here we're mapping the participants.tsv file variables to terms
                    # if participants.json file doesn't exist then run without json mapping file
                    if not os.path.isfile(
                            os.path.join(directory, 'participants.json')):
                        #maps variables in CSV file to terms
                        temp = DataFrame(columns=mapping_list)
                        column_to_terms.update(
                            map_variables_to_terms(directory=directory,
                                                   df=temp,
                                                   apikey=args.key,
                                                   output_file=os.path.join(
                                                       directory,
                                                       'participants.json')))
                    else:
                        #maps variables in CSV file to terms
                        temp = DataFrame(columns=mapping_list)
                        column_to_terms.update(
                            map_variables_to_terms(directory=directory,
                                                   df=temp,
                                                   apikey=args.key,
                                                   output_file=os.path.join(
                                                       directory,
                                                       'participants.json'),
                                                   json_file=os.path.join(
                                                       directory,
                                                       'participants.json')))

                else:
                    #maps variables in CSV file to terms
                    temp = DataFrame(columns=mapping_list)
                    column_to_terms.update(
                        map_variables_to_terms(directory=directory,
                                               df=temp,
                                               apikey=args.key,
                                               output_file=os.path.join(
                                                   directory,
                                                   'participants.json'),
                                               json_file=args.json_map))

            for row in participants_data:
                #create session object for subject to be used for participant metadata and image data
                #parse subject id from "sub-XXXX" string
                temp = row['participant_id'].split("-")
                #for ambiguity in BIDS datasets.  Sometimes participant_id is sub-XXXX and othertimes it's just XXXX
                if len(temp) > 1:
                    subjid = temp[1]
                else:
                    subjid = temp[0]
                logging.info(subjid)
                session[subjid] = Session(project)

                #add acquisition object
                acq = AssessmentAcquisition(session=session[subjid])

                acq_entity = AssessmentObject(acquisition=acq)
                participant[subjid] = {}
                participant[subjid]['person'] = acq.add_person(
                    attributes=({
                        Constants.NIDM_SUBJECTID: row['participant_id']
                    }))

                #add qualified association of participant with acquisition activity
                acq.add_qualified_association(
                    person=participant[subjid]['person'],
                    role=Constants.NIDM_PARTICIPANT)

                for key, value in row.items():
                    if not value:
                        continue
                    #for variables in participants.tsv file who have term mappings in BIDS_Constants.py use those, add to json_map so we don't have to map these if user
                    #supplied arguments to map variables
                    if key in BIDS_Constants.participants:

                        #if this was the participant_id, we already handled it above creating agent / qualified association
                        if not (BIDS_Constants.participants[key]
                                == Constants.NIDM_SUBJECTID):
                            acq_entity.add_attributes(
                                {BIDS_Constants.participants[key]: value})

                    #else if user added -mapvars flag to command line then we'll use the variable-> term mapping procedures to help user map variables to terms (also used
                    # in CSV2NIDM.py)
                    else:

                        if key in column_to_terms:
                            acq_entity.add_attributes(
                                {
                                    QualifiedName(
                                        provNamespace(
                                            Core.safe_string(None,
                                                             string=str(key)), column_to_terms[key]["url"]), ""):
                                    value
                                })
                        else:

                            acq_entity.add_attributes(
                                {Constants.BIDS[key.replace(" ", "_")]: value})

    #create acquisition objects for each scan for each subject

    #loop through all subjects in dataset
    for subject_id in bids_layout.get_subjects():
        logging.info("Converting subject: %s" % subject_id)
        #skip .git directories...added to support datalad datasets
        if subject_id.startswith("."):
            continue

        #check if there's a session number.  If so, store it in the session activity
        session_dirs = bids_layout.get(target='session',
                                       subject=subject_id,
                                       return_type='dir')
        #if session_dirs has entries then get any metadata about session and store in session activity

        #bids_layout.get(subject=subject_id,type='session',extensions='.tsv')
        #bids_layout.get(subject=subject_id,type='scans',extensions='.tsv')
        #bids_layout.get(extensions='.tsv',return_type='obj')

        #check whether sessions have been created (i.e. was there a participants.tsv file?  If not, create here
        if not (subject_id in session):
            session[subject_id] = Session(project)

        for file_tpl in bids_layout.get(subject=subject_id,
                                        extensions=['.nii', '.nii.gz']):
            #create an acquisition activity
            acq = MRAcquisition(session[subject_id])

            #check whether participant (i.e. agent) for this subject already exists (i.e. if participants.tsv file exists) else create one
            if not (subject_id in participant):
                participant[subject_id] = {}
                participant[subject_id]['person'] = acq.add_person(
                    attributes=({
                        Constants.NIDM_SUBJECTID: subject_id
                    }))

            #add qualified association with person
            acq.add_qualified_association(
                person=participant[subject_id]['person'],
                role=Constants.NIDM_PARTICIPANT)

            if file_tpl.entities['datatype'] == 'anat':
                #do something with anatomicals
                acq_obj = MRObject(acq)
                #add image contrast type
                if file_tpl.entities['suffix'] in BIDS_Constants.scans:
                    acq_obj.add_attributes({
                        Constants.NIDM_IMAGE_CONTRAST_TYPE:
                        BIDS_Constants.scans[file_tpl.entities['suffix']]
                    })
                else:
                    logging.info(
                        "WARNING: No matching image contrast type found in BIDS_Constants.py for %s"
                        % file_tpl.entities['suffix'])

                #add image usage type
                if file_tpl.entities['datatype'] in BIDS_Constants.scans:
                    acq_obj.add_attributes({
                        Constants.NIDM_IMAGE_USAGE_TYPE:
                        BIDS_Constants.scans[file_tpl.entities['datatype']]
                    })
                else:
                    logging.info(
                        "WARNING: No matching image usage type found in BIDS_Constants.py for %s"
                        % file_tpl.entities['datatype'])
                #add file link
                #make relative link to
                acq_obj.add_attributes({
                    Constants.NIDM_FILENAME:
                    getRelPathToBIDS(join(file_tpl.dirname, file_tpl.filename),
                                     directory)
                })
                #WIP: add absolute location of BIDS directory on disk for later finding of files
                acq_obj.add_attributes({Constants.PROV['Location']: directory})

                #add sha512 sum
                if isfile(join(directory, file_tpl.dirname,
                               file_tpl.filename)):
                    acq_obj.add_attributes({
                        Constants.CRYPTO_SHA512:
                        getsha512(
                            join(directory, file_tpl.dirname,
                                 file_tpl.filename))
                    })
                else:
                    logging.info(
                        "WARNINGL file %s doesn't exist! No SHA512 sum stored in NIDM files..."
                        % join(directory, file_tpl.dirname, file_tpl.filename))
                #get associated JSON file if exists
                json_data = (bids_layout.get(
                    suffix=file_tpl.entities['suffix'],
                    subject=subject_id))[0].metadata
                if len(json_data.info) > 0:
                    for key in json_data.info.items():
                        if key in BIDS_Constants.json_keys:
                            if type(json_data.info[key]) is list:
                                acq_obj.add_attributes({
                                    BIDS_Constants.json_keys[key.replace(
                                        " ", "_")]:
                                    ''.join(
                                        str(e) for e in json_data.info[key])
                                })
                            else:
                                acq_obj.add_attributes({
                                    BIDS_Constants.json_keys[key.replace(
                                        " ", "_")]:
                                    json_data.info[key]
                                })
            elif file_tpl.entities['datatype'] == 'func':
                #do something with functionals
                acq_obj = MRObject(acq)
                #add image contrast type
                if file_tpl.entities['suffix'] in BIDS_Constants.scans:
                    acq_obj.add_attributes({
                        Constants.NIDM_IMAGE_CONTRAST_TYPE:
                        BIDS_Constants.scans[file_tpl.entities['suffix']]
                    })
                else:
                    logging.info(
                        "WARNING: No matching image contrast type found in BIDS_Constants.py for %s"
                        % file_tpl.entities['suffix'])

                #add image usage type
                if file_tpl.entities['datatype'] in BIDS_Constants.scans:
                    acq_obj.add_attributes({
                        Constants.NIDM_IMAGE_USAGE_TYPE:
                        BIDS_Constants.scans[file_tpl.entities['datatype']]
                    })
                else:
                    logging.info(
                        "WARNING: No matching image usage type found in BIDS_Constants.py for %s"
                        % file_tpl.entities['datatype'])
                #make relative link to
                acq_obj.add_attributes({
                    Constants.NIDM_FILENAME:
                    getRelPathToBIDS(join(file_tpl.dirname, file_tpl.filename),
                                     directory)
                })
                #WIP: add absolute location of BIDS directory on disk for later finding of files
                acq_obj.add_attributes({Constants.PROV['Location']: directory})

                #add sha512 sum
                if isfile(join(directory, file_tpl.dirname,
                               file_tpl.filename)):
                    acq_obj.add_attributes({
                        Constants.CRYPTO_SHA512:
                        getsha512(
                            join(directory, file_tpl.dirname,
                                 file_tpl.filename))
                    })
                else:
                    logging.info(
                        "WARNINGL file %s doesn't exist! No SHA512 sum stored in NIDM files..."
                        % join(directory, file_tpl.dirname, file_tpl.filename))

                if 'run' in file_tpl.entities:
                    acq_obj.add_attributes({
                        BIDS_Constants.json_keys["run"]:
                        file_tpl.entities['run']
                    })

                #get associated JSON file if exists
                json_data = (bids_layout.get(
                    suffix=file_tpl.entities['suffix'],
                    subject=subject_id))[0].metadata

                if len(json_data.info) > 0:
                    for key in json_data.info.items():
                        if key in BIDS_Constants.json_keys:
                            if type(json_data.info[key]) is list:
                                acq_obj.add_attributes({
                                    BIDS_Constants.json_keys[key.replace(
                                        " ", "_")]:
                                    ''.join(
                                        str(e) for e in json_data.info[key])
                                })
                            else:
                                acq_obj.add_attributes({
                                    BIDS_Constants.json_keys[key.replace(
                                        " ", "_")]:
                                    json_data.info[key]
                                })
                #get associated events TSV file
                if 'run' in file_tpl.entities:
                    events_file = bids_layout.get(
                        subject=subject_id,
                        extensions=['.tsv'],
                        modality=file_tpl.entities['datatype'],
                        task=file_tpl.entities['task'],
                        run=file_tpl.entities['run'])
                else:
                    events_file = bids_layout.get(
                        subject=subject_id,
                        extensions=['.tsv'],
                        modality=file_tpl.entities['datatype'],
                        task=file_tpl.entities['task'])
                #if there is an events file then this is task-based so create an acquisition object for the task file and link
                if events_file:
                    #for now create acquisition object and link it to the associated scan
                    events_obj = AcquisitionObject(acq)
                    #add prov type, task name as prov:label, and link to filename of events file

                    events_obj.add_attributes({
                        PROV_TYPE:
                        Constants.NIDM_MRI_BOLD_EVENTS,
                        BIDS_Constants.json_keys["TaskName"]:
                        json_data["TaskName"],
                        Constants.NIDM_FILENAME:
                        getRelPathToBIDS(events_file[0].filename, directory)
                    })
                    #link it to appropriate MR acquisition entity
                    events_obj.wasAttributedTo(acq_obj)

            elif file_tpl.entities['datatype'] == 'dwi':
                #do stuff with with dwi scans...
                acq_obj = MRObject(acq)
                #add image contrast type
                if file_tpl.entities['suffix'] in BIDS_Constants.scans:
                    acq_obj.add_attributes({
                        Constants.NIDM_IMAGE_CONTRAST_TYPE:
                        BIDS_Constants.scans[file_tpl.entities['suffix']]
                    })
                else:
                    logging.info(
                        "WARNING: No matching image contrast type found in BIDS_Constants.py for %s"
                        % file_tpl.entities['suffix'])

                #add image usage type
                if file_tpl.entities['datatype'] in BIDS_Constants.scans:
                    acq_obj.add_attributes({
                        Constants.NIDM_IMAGE_USAGE_TYPE:
                        BIDS_Constants.scans["dti"]
                    })
                else:
                    logging.info(
                        "WARNING: No matching image usage type found in BIDS_Constants.py for %s"
                        % file_tpl.entities['datatype'])
                #make relative link to
                acq_obj.add_attributes({
                    Constants.NIDM_FILENAME:
                    getRelPathToBIDS(join(file_tpl.dirname, file_tpl.filename),
                                     directory)
                })
                #add sha512 sum
                if isfile(join(directory, file_tpl.dirname,
                               file_tpl.filename)):
                    acq_obj.add_attributes({
                        Constants.CRYPTO_SHA512:
                        getsha512(
                            join(directory, file_tpl.dirname,
                                 file_tpl.filename))
                    })
                else:
                    logging.info(
                        "WARNINGL file %s doesn't exist! No SHA512 sum stored in NIDM files..."
                        % join(directory, file_tpl.dirname, file_tpl.filename))

                if 'run' in file_tpl._fields:
                    acq_obj.add_attributes(
                        {BIDS_Constants.json_keys["run"]: file_tpl.run})

                #get associated JSON file if exists
                json_data = (bids_layout.get(
                    suffix=file_tpl.entities['suffix'],
                    subject=subject_id))[0].metadata

                if len(json_data.info) > 0:
                    for key in json_data.info.items():
                        if key in BIDS_Constants.json_keys:
                            if type(json_data.info[key]) is list:
                                acq_obj.add_attributes({
                                    BIDS_Constants.json_keys[key.replace(
                                        " ", "_")]:
                                    ''.join(
                                        str(e) for e in json_data.info[key])
                                })
                            else:
                                acq_obj.add_attributes({
                                    BIDS_Constants.json_keys[key.replace(
                                        " ", "_")]:
                                    json_data.info[key]
                                })
                #for bval and bvec files, what to do with those?

                #for now, create new generic acquisition objects, link the files, and associate with the one for the DWI scan?
                acq_obj_bval = AcquisitionObject(acq)
                acq_obj_bval.add_attributes(
                    {PROV_TYPE: BIDS_Constants.scans["bval"]})
                #add file link to bval files
                acq_obj_bval.add_attributes({
                    Constants.NIDM_FILENAME:
                    getRelPathToBIDS(
                        join(file_tpl.dirname,
                             bids_layout.get_bval(file_tpl.filename)),
                        directory)
                })
                #WIP: add absolute location of BIDS directory on disk for later finding of files
                acq_obj_bval.add_attributes(
                    {Constants.PROV['Location']: directory})

                #add sha512 sum
                if isfile(join(directory, file_tpl.dirname,
                               file_tpl.filename)):
                    acq_obj_bval.add_attributes({
                        Constants.CRYPTO_SHA512:
                        getsha512(
                            join(directory, file_tpl.dirname,
                                 file_tpl.filename))
                    })
                else:
                    logging.info(
                        "WARNINGL file %s doesn't exist! No SHA512 sum stored in NIDM files..."
                        % join(directory, file_tpl.dirname, file_tpl.filename))
                acq_obj_bvec = AcquisitionObject(acq)
                acq_obj_bvec.add_attributes(
                    {PROV_TYPE: BIDS_Constants.scans["bvec"]})
                #add file link to bvec files
                acq_obj_bvec.add_attributes({
                    Constants.NIDM_FILENAME:
                    getRelPathToBIDS(
                        join(file_tpl.dirname,
                             bids_layout.get_bvec(file_tpl.filename)),
                        directory)
                })
                #WIP: add absolute location of BIDS directory on disk for later finding of files
                acq_obj_bvec.add_attributes(
                    {Constants.PROV['Location']: directory})

                if isfile(join(directory, file_tpl.dirname,
                               file_tpl.filename)):
                    #add sha512 sum
                    acq_obj_bvec.add_attributes({
                        Constants.CRYPTO_SHA512:
                        getsha512(
                            join(directory, file_tpl.dirname,
                                 file_tpl.filename))
                    })
                else:
                    logging.info(
                        "WARNINGL file %s doesn't exist! No SHA512 sum stored in NIDM files..."
                        % join(directory, file_tpl.dirname, file_tpl.filename))

                #link bval and bvec acquisition object entities together or is their association with DWI scan...

        #Added temporarily to support phenotype files
        #for each *.tsv / *.json file pair in the phenotypes directory
        #WIP: ADD VARIABLE -> TERM MAPPING HERE
        for tsv_file in glob.glob(os.path.join(directory, "phenotype",
                                               "*.tsv")):
            #for now, open the TSV file, extract the row for this subject, store it in an acquisition object and link to
            #the associated JSON data dictionary file
            with open(tsv_file) as phenofile:
                pheno_data = csv.DictReader(phenofile, delimiter='\t')
                for row in pheno_data:
                    subjid = row['participant_id'].split("-")
                    if not subjid[1] == subject_id:
                        continue
                    else:
                        #add acquisition object
                        acq = AssessmentAcquisition(session=session[subjid[1]])
                        #add qualified association with person
                        acq.add_qualified_association(
                            person=participant[subject_id]['person'],
                            role=Constants.NIDM_PARTICIPANT)

                        acq_entity = AssessmentObject(acquisition=acq)

                        for key, value in row.items():
                            if not value:
                                continue
                            #we're using participant_id in NIDM in agent so don't add to assessment as a triple.
                            #BIDS phenotype files seem to have an index column with no column header variable name so skip those
                            if ((not key == "participant_id") and (key != "")):
                                #for now we're using a placeholder namespace for BIDS and simply the variable names as the concept IDs..
                                acq_entity.add_attributes(
                                    {Constants.BIDS[key]: value})

                        #link TSV file
                        acq_entity.add_attributes({
                            Constants.NIDM_FILENAME:
                            getRelPathToBIDS(tsv_file, directory)
                        })
                        #WIP: add absolute location of BIDS directory on disk for later finding of files
                        acq_entity.add_attributes(
                            {Constants.PROV['Location']: directory})

                        #link associated JSON file if it exists
                        data_dict = os.path.join(
                            directory, "phenotype",
                            os.path.splitext(os.path.basename(tsv_file))[0] +
                            ".json")
                        if os.path.isfile(data_dict):
                            acq_entity.add_attributes({
                                Constants.BIDS["data_dictionary"]:
                                getRelPathToBIDS(data_dict, directory)
                            })

    return project
Ejemplo n.º 5
0
def add_brainvolume_data(nidmdoc,
                         df,
                         id_field,
                         source_row,
                         column_to_terms,
                         png_file=None,
                         output_file=None,
                         root_act=None,
                         nidm_graph=None):
    '''

    :param nidmdoc:
    :param df:
    :param id_field:
    :param source_row:
    :param empty:
    :param png_file:
    :param root_act:
    :return:
    '''
    #dictionary to store activities for each software agent
    software_agent = {}
    software_activity = {}
    participant_agent = {}
    entity = {}

    #this function can be used for both creating a brainvolumes NIDM file from scratch or adding brain volumes to
    #existing NIDM file.  The following logic basically determines which route to take...

    #if an existing NIDM graph is passed as a parameter then add to existing file
    if nidm_graph is None:
        first_row = True
        #iterate over rows and store in NIDM file
        for csv_index, csv_row in df.iterrows():

            #store other data from row with columns_to_term mappings
            for row_variable, row_data in csv_row.iteritems():

                #check if row_variable is subject id, if so check whether we have an agent for this participant
                if row_variable == id_field:
                    #store participant id for later use in processing the data for this row
                    participant_id = row_data
                    #if there is no agent for the participant then add one
                    if row_data not in participant_agent.keys():
                        #add an agent for this person
                        participant_agent[row_data] = nidmdoc.graph.agent(
                            QualifiedName(
                                provNamespace("nidm", Constants.NIDM),
                                getUUID()),
                            other_attributes=({
                                Constants.NIDM_SUBJECTID:
                                row_data
                            }))
                    continue
                else:

                    #get source software matching this column deal with duplicate variables in source_row and pandas changing duplicate names
                    software_key = source_row.columns[[
                        column_index(df, row_variable)
                    ]]._values[0].split(".")[0]

                    #see if we already have a software_activity for this agent
                    if software_key not in software_activity.keys():

                        #create an activity for the computation...simply a placeholder for more extensive provenance
                        software_activity[
                            software_key] = nidmdoc.graph.activity(
                                QualifiedName(
                                    provNamespace("nidm", Constants.NIDM),
                                    getUUID()),
                                other_attributes={
                                    Constants.NIDM_PROJECT_DESCRIPTION:
                                    "brain volume computation"
                                })

                        if root_act is not None:
                            #associate activity with activity of brain volumes creation (root-level activity)
                            software_activity[
                                software_key].add_attributes(
                                    {
                                        QualifiedName(
                                            provNamespace(
                                                "dct", Constants.DCT), 'isPartOf'):
                                        root_act
                                    })

                        #associate this activity with the participant
                        nidmdoc.graph.association(
                            activity=software_activity[software_key],
                            agent=participant_agent[participant_id],
                            other_attributes={
                                PROV_ROLE: Constants.NIDM_PARTICIPANT
                            })
                        nidmdoc.graph.wasAssociatedWith(
                            activity=software_activity[software_key],
                            agent=participant_agent[participant_id])

                        #check if there's an associated software agent and if not, create one
                        if software_key not in software_agent.keys():
                            #create an agent
                            software_agent[software_key] = nidmdoc.graph.agent(
                                QualifiedName(
                                    provNamespace("nidm", Constants.NIDM),
                                    getUUID()),
                                other_attributes={
                                    'prov:type':
                                    QualifiedName(
                                        provNamespace(
                                            Core.safe_string(
                                                None,
                                                string=str(
                                                    "Neuroimaging Analysis Software"
                                                )), Constants.
                                            NIDM_NEUROIMAGING_ANALYSIS_SOFTWARE
                                        ), ""),
                                    QualifiedName(
                                        provNamespace(
                                            Core.safe_string(None,
                                                             string=str("Neuroimaging Analysis Software")), Constants.NIDM_NEUROIMAGING_ANALYSIS_SOFTWARE), ""):
                                    software_key
                                })
                            #create qualified association with brain volume computation activity
                            nidmdoc.graph.association(
                                activity=software_activity[software_key],
                                agent=software_agent[software_key],
                                other_attributes={
                                    PROV_ROLE:
                                    QualifiedName(
                                        provNamespace(
                                            Core.safe_string(
                                                None,
                                                string=str(
                                                    "Neuroimaging Analysis Software"
                                                )), Constants.
                                            NIDM_NEUROIMAGING_ANALYSIS_SOFTWARE
                                        ), "")
                                })
                            nidmdoc.graph.wasAssociatedWith(
                                activity=software_activity[software_key],
                                agent=software_agent[software_key])

                    #check if we have an entity for storing this particular variable for this subject and software else create one
                    if software_activity[
                            software_key].identifier.localpart + participant_agent[
                                participant_id].identifier.localpart not in entity.keys(
                                ):
                        #create an entity to store brain volume data for this participant
                        entity[software_activity[software_key].identifier.
                               localpart + participant_agent[participant_id].
                               identifier.localpart] = nidmdoc.graph.entity(
                                   QualifiedName(
                                       provNamespace("nidm", Constants.NIDM),
                                       getUUID()))
                        #add wasGeneratedBy association to activity
                        nidmdoc.graph.wasGeneratedBy(
                            entity=entity[software_activity[software_key].
                                          identifier.localpart +
                                          participant_agent[participant_id].
                                          identifier.localpart],
                            activity=software_activity[software_key])

                    #get column_to_term mapping uri and add as namespace in NIDM document
                    entity[
                        software_activity[software_key].identifier.localpart +
                        participant_agent[participant_id].identifier.
                        localpart].add_attributes({
                            QualifiedName(
                                provNamespace(
                                    Core.safe_string(None,
                                                     string=str(row_variable)), column_to_terms[row_variable.split(".")[0]]["url"]), ""):
                            row_data
                        })
                    #print(project.serializeTurtle())

            #just for debugging.  resulting graph is too big right now for DOT graph creation so here I'm simply creating
            #a DOT graph for the processing of 1 row of the brain volumes CSV file so we can at least visually see the
            #model
            if png_file is not None:
                if first_row:
                    #serialize NIDM file
                    #with open(args.output_file,'w') as f:
                    #   print("Writing NIDM file...")
                    #   f.write(nidmdoc.serializeTurtle())
                    if png_file:
                        nidmdoc.save_DotGraph(str(output_file + ".pdf"),
                                              format="pdf")
                    first_row = False
    else:
        first_row = True
        #logic to add to existing graph
        #use RDFLib here for temporary graph making query easier
        rdf_graph = Graph()
        rdf_graph_parse = rdf_graph.parse(source=StringIO(
            nidmdoc.serializeTurtle()),
                                          format='turtle')

        #find subject ids and sessions in NIDM document
        query = """SELECT DISTINCT ?session ?nidm_subj_id ?agent ?entity
                    WHERE {
                        ?activity prov:wasAssociatedWith ?agent ;
                            dct:isPartOf ?session  .
                        ?entity prov:wasGeneratedBy ?activity ;
                            nidm:hadImageUsageType nidm:Anatomical .
                        ?agent rdf:type prov:Agent ;
                            ndar:src_subject_id ?nidm_subj_id .

                    }"""
        #print(query)
        qres = rdf_graph_parse.query(query)

        for row in qres:
            print('%s \t %s' % (row[2], row[1]))
            #find row in CSV file with subject id matching agent from NIDM file

            #csv_row = df.loc[df[id_field]==type(df[id_field][0])(row[1])]
            #find row in CSV file with matching subject id to the agent in the NIDM file
            #be careful about data types...simply type-change dataframe subject id column and query to strings.
            #here we're removing the leading 0's from IDs because pandas.read_csv strips those unless you know ahead of
            #time which column is the subject id....
            csv_row = df.loc[df[id_field].astype('str').str.contains(
                str(row[1]).lstrip("0"))]

            #if there was data about this subject in the NIDM file already (i.e. an agent already exists with this subject id)
            #then add this brain volumes data to NIDM file, else skip it....
            if (not (len(csv_row.index) == 0)):
                print("found other data for participant %s" % row[1])

                #Here we're sure we have an agent in the NIDM graph that corresponds to the participant in the
                #brain volumes data.  We don't know which AcquisitionObject (entity) describes the T1-weighted scans
                #used for the project.  Since we don't have the SHA512 sums in the brain volumes data (YET) we can't
                #really verify that it's a particular T1-weighted scan that was used for the brain volumes but we're
                #simply, for the moment, going to assume it's the activity/session returned by the above query
                #where we've specifically asked for the entity which has a nidm:hasImageUsageType nidm:Anatomical

                #NIDM document entity uuid which has a nidm:hasImageUsageType nidm:Anatomical
                #this is the entity that is associated with the brain volume report for this participant
                anat_entity_uuid = row[3]

                #Now we need to set up the entities/activities, etc. to add the brain volume data for this row of the
                #CSV file and link it to the above entity and the agent for this participant which is row[0]
                #store other data from row with columns_to_term mappings
                for row_variable, row_data in csv_row.iteritems():

                    #check if row_variable is subject id, if so check whether we have an agent for this participant
                    if row_variable == id_field:
                        #store participant id for later use in processing the data for this row
                        participant_id = row_data.values[0]
                        print("participant id: %s" % participant_id)
                        continue
                    else:

                        #get source software matching this column deal with duplicate variables in source_row and pandas changing duplicate names
                        software_key = source_row.columns[[
                            column_index(df, row_variable)
                        ]]._values[0].split(".")[0]

                        #see if we already have a software_activity for this agent
                        if software_key + row[2] not in software_activity.keys(
                        ):

                            #create an activity for the computation...simply a placeholder for more extensive provenance
                            software_activity[
                                software_key +
                                row[2]] = nidmdoc.graph.activity(
                                    QualifiedName(
                                        provNamespace("niiri",
                                                      Constants.NIIRI),
                                        getUUID()),
                                    other_attributes={
                                        Constants.NIDM_PROJECT_DESCRIPTION:
                                        "brain volume computation",
                                        PROV_ATTR_USED_ENTITY: anat_entity_uuid
                                    })

                            #associate the activity with the entity containing the original T1-weighted scan which is stored in anat_entity_uuid
                            if root_act is not None:
                                #associate activity with activity of brain volumes creation (root-level activity)
                                software_activity[
                                    software_key + row[2]].add_attributes({
                                        QualifiedName(
                                            provNamespace(
                                                "dct", Constants.DCT), 'isPartOf'):
                                        root_act
                                    })

                            #associate this activity with the participant..the participant's agent is row[2] in the query response
                            nidmdoc.graph.association(
                                activity=software_activity[software_key +
                                                           row[2]],
                                agent=row[2],
                                other_attributes={
                                    PROV_ROLE: Constants.NIDM_PARTICIPANT
                                })
                            nidmdoc.graph.wasAssociatedWith(
                                activity=software_activity[software_key +
                                                           row[2]],
                                agent=row[2])

                            #check if there's an associated software agent and if not, create one
                            if software_key not in software_agent.keys():
                                #if we have a URL defined for this software in Constants.py then use it else simply use the string name of the software product
                                if software_key.lower(
                                ) in Constants.namespaces:
                                    #create an agent
                                    software_agent[software_key] = nidmdoc.graph.agent(
                                        QualifiedName(
                                            provNamespace(
                                                "niiri", Constants.NIIRI),
                                            getUUID()),
                                        other_attributes={
                                            'prov:type':
                                            QualifiedName(
                                                provNamespace(
                                                    Core.safe_string(
                                                        None,
                                                        string=str(
                                                            "Neuroimaging Analysis Software"
                                                        )), Constants.
                                                    NIDM_NEUROIMAGING_ANALYSIS_SOFTWARE
                                                ), ""),
                                            QualifiedName(
                                                provNamespace(
                                                    Core.safe_string(None,
                                                                     string=str("Neuroimaging Analysis Software")), Constants.NIDM_NEUROIMAGING_ANALYSIS_SOFTWARE), ""):
                                            QualifiedName(
                                                provNamespace(
                                                    software_key,
                                                    Constants.namespaces[
                                                        software_key.lower()]),
                                                "")
                                        })
                                else:
                                    #create an agent
                                    software_agent[software_key] = nidmdoc.graph.agent(
                                        QualifiedName(
                                            provNamespace(
                                                "niiri", Constants.NIIRI),
                                            getUUID()),
                                        other_attributes={
                                            'prov:type':
                                            QualifiedName(
                                                provNamespace(
                                                    Core.safe_string(
                                                        None,
                                                        string=str(
                                                            "Neuroimaging Analysis Software"
                                                        )), Constants.
                                                    NIDM_NEUROIMAGING_ANALYSIS_SOFTWARE
                                                ), ""),
                                            QualifiedName(
                                                provNamespace(
                                                    Core.safe_string(None,
                                                                     string=str("Neuroimaging Analysis Software")), Constants.NIDM_NEUROIMAGING_ANALYSIS_SOFTWARE), ""):
                                            software_key
                                        })
                            #create qualified association with brain volume computation activity
                            nidmdoc.graph.association(
                                activity=software_activity[software_key +
                                                           row[2]],
                                agent=software_agent[software_key],
                                other_attributes={
                                    PROV_ROLE:
                                    QualifiedName(
                                        provNamespace(
                                            Core.safe_string(
                                                None,
                                                string=str(
                                                    "Neuroimaging Analysis Software"
                                                )), Constants.
                                            NIDM_NEUROIMAGING_ANALYSIS_SOFTWARE
                                        ), "")
                                })
                            nidmdoc.graph.wasAssociatedWith(
                                activity=software_activity[software_key +
                                                           row[2]],
                                agent=software_agent[software_key])

                        #check if we have an entity for storing this particular variable for this subject and software else create one
                        if software_activity[
                                software_key +
                                row[2]].identifier.localpart + row[
                                    2] not in entity.keys():
                            #create an entity to store brain volume data for this participant
                            entity[software_activity[
                                software_key + row[2]].identifier.localpart +
                                   row[2]] = nidmdoc.graph.entity(
                                       QualifiedName(
                                           provNamespace(
                                               "niiri", Constants.NIIRI),
                                           getUUID()))
                            #add wasGeneratedBy association to activity
                            nidmdoc.graph.wasGeneratedBy(
                                entity=entity[software_activity[
                                    software_key + row[2]].identifier.localpart
                                              + row[2]],
                                activity=software_activity[software_key +
                                                           row[2]])

                        #get column_to_term mapping uri and add as namespace in NIDM document
                        entity[
                            software_activity[software_key +
                                              row[2]].identifier.localpart +
                            row[2]].add_attributes({
                                QualifiedName(
                                    provNamespace(
                                        Core.safe_string(None,
                                                         string=str(row_variable)), column_to_terms[row_variable.split(".")[0]]["url"]), ""):
                                row_data.values[0]
                            })
Ejemplo n.º 6
0
def main(argv):
    parser = ArgumentParser(
        description="""This program will load in a CSV file made during simple-2
                brain volumes experiment which has the following organization:
                source	FSL	FSL	FSL
                participant_id	left nucleus accumbens volume	left amygdala volume
                sub-0050002	    796.4723293	    1255.574283	    4449.579039
                sub-0050003	    268.9688215	    878.7860634	    3838.602449
                sub-0050004	    539.0969914	    1195.288168	    3561.518188
                If will use the first row to determine the software used for the segmentations and the
                second row for the variable names.  Then it does a simple NIDM conversion using
                example model in: https://docs.google.com/document/d/1PyBoM7J0TuzTC1TIIFPDqd05nomcCM5Pvst8yCoqLng/edit"""
    )

    parser.add_argument('-csv',
                        dest='csv_file',
                        required=True,
                        help="Path to CSV file to convert")
    parser.add_argument('-ilxkey',
                        dest='key',
                        required=True,
                        help="Interlex/SciCrunch API key to use for query")
    parser.add_argument(
        '-json_map',
        dest='json_map',
        required=False,
        help="User-suppled JSON file containing variable-term mappings.")
    parser.add_argument(
        '-nidm',
        dest='nidm_file',
        required=False,
        help="Optional NIDM file to add CSV->NIDM converted graph to")
    parser.add_argument(
        '-owl',
        action='store_true',
        required=False,
        help='Optionally searches NIDM OWL files...internet connection required'
    )
    parser.add_argument(
        '-png',
        action='store_true',
        required=False,
        help=
        'Optional flag, when set a PNG image file of RDF graph will be produced'
    )
    parser.add_argument('-out',
                        dest='output_file',
                        required=True,
                        help="Filename to save NIDM file")
    args = parser.parse_args()

    #open CSV file and read first line which is the source of the segmentations
    source_row = pd.read_csv(args.csv_file, nrows=0)
    #open CSV file and load into
    df = pd.read_csv(args.csv_file, skiprows=0, header=1)
    #account for duplicate column names
    # df.columns = df.iloc[0]
    df = df.reindex(df.index.drop(0)).reset_index(drop=True)

    #get unique variable names from CSV data file
    #note, duplicate variable names will be appended with a ".X" where X is the number of duplicates
    unique_vars = []
    for variable in list(df):
        temp = variable.split(".")[0]
        if temp not in unique_vars:
            unique_vars.append(temp)

    #do same as above for unique software agents
    unique_software = []
    for variable in list(source_row):
        temp = variable.split(".")[0]
        if temp not in unique_software:
            unique_software.append(temp)

    #maps variables in CSV file to terms
    if args.owl:
        column_to_terms = map_variables_to_terms(
            df=pd.DataFrame(columns=unique_vars),
            apikey=args.key,
            directory=dirname(args.output_file),
            output_file=join(dirname(args.output_file), "json_map.json"),
            json_file=args.json_map,
            owl_file=args.owl)
    else:
        column_to_terms = map_variables_to_terms(
            df=pd.DataFrame(columns=unique_vars),
            apikey=args.key,
            directory=dirname(args.output_file),
            output_file=join(dirname(args.output_file), "json_map.json"),
            json_file=args.json_map)

    #get subjectID field from CSV
    id_field = getSubjIDColumn(column_to_terms, df)

    # WIP!!!#########################################################################################
    #go line by line through CSV file creating NIDM structures
    #If user has added an existing NIDM file as a command line parameter then add to existing file for subjects who exist in the NIDM file
    if args.nidm_file is not None:
        print("Adding to NIDM file...")
        #read in NIDM file
        project = read_nidm(args.nidm_file)

        root_act = project.graph.activity(
            QualifiedName(provNamespace("niiri", Constants.NIIRI), getUUID()),
            other_attributes={
                Constants.NIDM_PROJECT_DESCRIPTION:
                "Brain volumes provenance document"
            })

        #this function sucks...more thought needed for version that works with adding to existing NIDM file versus creating a new NIDM file....
        add_brainvolume_data(nidmdoc=project,
                             df=df,
                             id_field=id_field,
                             root_act=root_act,
                             column_to_terms=column_to_terms,
                             png_file=args.png,
                             output_file=args.output_file,
                             source_row=source_row,
                             nidm_graph=True)

        #serialize NIDM file
        with open(args.output_file, 'w') as f:
            print("Writing NIDM file...")
            f.write(project.serializeTurtle())
            #if args.png:
            #    nidmdoc.save_DotGraph(str(args.output_file + ".png"), format="png")


#        #find subject ids and sessions in NIDM document
#        query = """SELECT DISTINCT ?session ?nidm_subj_id ?agent ?entity
#                    WHERE {
#                        ?activity prov:wasAssociatedWith ?agent ;
#                            dct:isPartOf ?session  .
#                        ?entity prov:wasGeneratedBy ?activity ;
#                            nidm:hasImageUsageType nidm:Anatomical .
#                        ?agent rdf:type prov:Agent ;
#                            ndar:src_subject_id ?nidm_subj_id .
#
#                    }"""
#        #print(query)
#        qres = rdf_graph_parse.query(query)

#        for row in qres:
#            print('%s \t %s' %(row[0],row[1]))
#            #find row in CSV file with subject id matching agent from NIDM file

#            #csv_row = df.loc[df[id_field]==type(df[id_field][0])(row[1])]
#            #find row in CSV file with matching subject id to the agent in the NIDM file
#            #be carefull about data types...simply type-change dataframe subject id column and query to strings.
#            #here we're removing the leading 0's from IDs because pandas.read_csv strips those unless you know ahead of
#            #time which column is the subject id....
#            csv_row = df.loc[df[id_field].astype('str').str.contains(str(row[1]).lstrip("0"))]

#            #if there was data about this subject in the NIDM file already (i.e. an agent already exists with this subject id)
#            #then add this brain volumes data to NIDM file, else skip it....
#            if (not (len(csv_row.index)==0)):

#Here we're sure we have an agent in the NIDM graph that corresponds to the participant in the
#brain volumes data.  We don't know which AcquisitionObject (entity) describes the T1-weighted scans
#used for the project.  Since we don't have the SHA512 sums in the brain volumes data (YET) we can't
#really verify that it's a particular T1-weighted scan that was used for the brain volumes but we're
#simply, for the moment, going to assume it's the activity/session returned by the above query
#where we've specifically asked for the entity which has a nidm:hasImageUsageType nidm:Anatomical

#NIDM document entity uuid which has a nidm:hasImageUsageType nidm:Anatomical
#this is the entity that is associated with the brain volume report for this participant
#                entity_uuid = row[3]

#Now we need to set up the entities/activities, etc. to add the brain volume data for this row of the
#CSV file and link it to the above entity and the agent for this participant which is row[0]

#add acquisition entity for assessment
#                acq_entity = AssessmentObject(acquisition=acq)
#add qualified association with existing agent
#                acq.add_qualified_association(person=row[2],role=Constants.NIDM_PARTICIPANT)

#                #store other data from row with columns_to_term mappings
#                for row_variable in csv_row:
#check if row_variable is subject id, if so skip it
#                    if row_variable==id_field:
#                        continue
#                    else:
#get column_to_term mapping uri and add as namespace in NIDM document
#provNamespace(Core.safe_string(None,string=str(row_variable)), column_to_terms[row_variable]["url"])
#                        acq_entity.add_attributes({QualifiedName(provNamespace(Core.safe_string(None,string=str(row_variable)), column_to_terms[row_variable]["url"]), ""):csv_row[row_variable].values[0]})
#                continue

#        #serialize NIDM file
#        with open(args.nidm_file,'w') as f:
#            print("Writing NIDM file...")
#            f.write(project.serializeTurtle())
#            project.save_DotGraph(str(args.nidm_file + ".png"), format="png")
##############################################################################################################################

    else:
        print("Creating NIDM file...")
        #If user did not choose to add this data to an existing NIDM file then create a new one for the CSV data

        #create an empty NIDM graph
        nidmdoc = Core()
        root_act = nidmdoc.graph.activity(
            QualifiedName(provNamespace("niiri", Constants.NIIRI), getUUID()),
            other_attributes={
                Constants.NIDM_PROJECT_DESCRIPTION:
                "Brain volumes provenance document"
            })

        #this function sucks...more thought needed for version that works with adding to existing NIDM file versus creating a new NIDM file....
        add_brainvolume_data(nidmdoc=nidmdoc,
                             df=df,
                             id_field=id_field,
                             root_act=root_act,
                             column_to_terms=column_to_terms,
                             png_file=args.png,
                             output_file=args.output_file,
                             source_row=source_row)

        #serialize NIDM file
        with open(args.output_file, 'w') as f:
            print("Writing NIDM file...")
            f.write(nidmdoc.serializeTurtle())
            if args.png:
                #    nidmdoc.save_DotGraph(str(args.output_file + ".png"), format="png")

                nidmdoc.save_DotGraph(str(args.output_file + ".pdf"),
                                      format="pdf")
Ejemplo n.º 7
0
def main(argv):
    parser = ArgumentParser(description='This program will load in a CSV file and iterate over the header \
     variable names performing an elastic search of https://scicrunch.org/ for NIDM-ReproNim \
     tagged terms that fuzzy match the variable names.  The user will then interactively pick \
     a term to associate with the variable name.  The resulting annotated CSV data will \
     then be written to a NIDM data file.')

    parser.add_argument('-csv', dest='csv_file', required=True, help="Path to CSV file to convert")
    parser.add_argument('-ilxkey', dest='key', required=True, help="Interlex/SciCrunch API key to use for query")
    parser.add_argument('-json_map', dest='json_map',required=False,help="User-suppled JSON file containing variable-term mappings.")
    parser.add_argument('-nidm', dest='nidm_file', required=False, help="Optional NIDM file to add CSV->NIDM converted graph to")
    #parser.add_argument('-owl', action='store_true', required=False, help='Optionally searches NIDM OWL files...internet connection required')
    parser.add_argument('-png', action='store_true', required=False, help='Optional flag, when set a PNG image file of RDF graph will be produced')
    parser.add_argument('-jsonld', action='store_true', required=False, help='Optional flag, when set NIDM files are saved as JSON-LD instead of TURTLE')
    parser.add_argument('-out', dest='output_file', required=True, help="Filename to save NIDM file")
    args = parser.parse_args()

    #open CSV file and load into
    df = pd.read_csv(args.csv_file)

    #maps variables in CSV file to terms
    #if args.owl is not False:
    #    column_to_terms = map_variables_to_terms(df=df, apikey=args.key, directory=dirname(args.output_file), output_file=args.output_file, json_file=args.json_map, owl_file=args.owl)
    #else:
    column_to_terms = map_variables_to_terms(df=df, apikey=args.key, directory=dirname(args.output_file), output_file=args.output_file, json_file=args.json_map)



    #If user has added an existing NIDM file as a command line parameter then add to existing file for subjects who exist in the NIDM file
    if args.nidm_file:
        print("Adding to NIDM file...")
        #read in NIDM file
        project = read_nidm(args.nidm_file)
        #get list of session objects
        session_objs=project.get_sessions()

        #look at column_to_terms dictionary for NIDM URL for subject id  (Constants.NIDM_SUBJECTID)
        id_field=None
        for key, value in column_to_terms.items():
            if Constants.NIDM_SUBJECTID._str == column_to_terms[key]['label']:
                id_field=key
                #make sure id_field is a string for zero-padded subject ids
                #re-read data file with constraint that key field is read as string
                #df = pd.read_csv(args.csv_file,dtype={id_field : str})

        #if we couldn't find a subject ID field in column_to_terms, ask user
        if id_field is None:
            option=1
            for column in df.columns:
                print("%d: %s" %(option,column))
                option=option+1
            selection=input("Please select the subject ID field from the list above: ")
            id_field=df.columns[int(selection)-1]
            #make sure id_field is a string for zero-padded subject ids
            #re-read data file with constraint that key field is read as string
            #df = pd.read_csv(args.csv_file,dtype={id_field : str})



        #use RDFLib here for temporary graph making query easier
        rdf_graph = Graph()
        rdf_graph_parse = rdf_graph.parse(source=StringIO(project.serializeTurtle()),format='turtle')

        #find subject ids and sessions in NIDM document
        query = """SELECT DISTINCT ?session ?nidm_subj_id ?agent
                    WHERE {
                        ?activity prov:wasAssociatedWith ?agent ;
                            dct:isPartOf ?session  .
                        ?agent rdf:type prov:Agent ;
                            ndar:src_subject_id ?nidm_subj_id .
                    }"""
        #print(query)
        qres = rdf_graph_parse.query(query)


        for row in qres:
            print('%s \t %s' %(row[0],row[1]))
            #find row in CSV file with subject id matching agent from NIDM file

            #csv_row = df.loc[df[id_field]==type(df[id_field][0])(row[1])]
            #find row in CSV file with matching subject id to the agent in the NIDM file
            #be carefull about data types...simply type-change dataframe subject id column and query to strings.
            #here we're removing the leading 0's from IDs because pandas.read_csv strips those unless you know ahead of
            #time which column is the subject id....
            csv_row = df.loc[df[id_field].astype('str').str.contains(str(row[1]).lstrip("0"))]

            #if there was data about this subject in the NIDM file already (i.e. an agent already exists with this subject id)
            #then add this CSV assessment data to NIDM file, else skip it....
            if (not (len(csv_row.index)==0)):

                #NIDM document sesssion uuid
                session_uuid = row[0]

                #temporary list of string-based URIs of session objects from API
                temp = [o.identifier._uri for o in session_objs]
                #get session object from existing NIDM file that is associated with a specific subject id
                #nidm_session = (i for i,x in enumerate([o.identifier._uri for o in session_objs]) if x == str(session_uuid))
                nidm_session = session_objs[temp.index(str(session_uuid))]
                #for nidm_session in session_objs:
                #    if nidm_session.identifier._uri == str(session_uuid):
                #add an assessment acquisition for the phenotype data to session and associate with agent
                acq=AssessmentAcquisition(session=nidm_session)
                #add acquisition entity for assessment
                acq_entity = AssessmentObject(acquisition=acq)
                #add qualified association with existing agent
                acq.add_qualified_association(person=row[2],role=Constants.NIDM_PARTICIPANT)

                #store other data from row with columns_to_term mappings
                for row_variable in csv_row:
                    #check if row_variable is subject id, if so skip it
                    if row_variable==id_field:
                        continue
                    else:
                        if not csv_row[row_variable].values[0]:
                            continue
                        #get column_to_term mapping uri and add as namespace in NIDM document
                        #provNamespace(Core.safe_string(None,string=str(row_variable)), column_to_terms[row_variable]["url"])
                        acq_entity.add_attributes({QualifiedName(provNamespace(Core.safe_string(None,string=str(row_variable)), column_to_terms[row_variable]["url"]), ""):csv_row[row_variable].values[0]})
                continue

        #serialize NIDM file
        with open(args.nidm_file,'w') as f:
            print("Writing NIDM file...")
            if args.jsonld:
                f.write(project.serializeJSONLD())
            else:
                f.write(project.serializeTurtle())

            project.save_DotGraph(str(args.nidm_file + ".png"), format="png")



    else:
        print("Creating NIDM file...")
        #If user did not choose to add this data to an existing NIDM file then create a new one for the CSV data
        #create empty project
        project=Project()

        #simply add name of file to project since we don't know anything about it
        project.add_attributes({Constants.NIDM_FILENAME:args.csv_file})


        #look at column_to_terms dictionary for NIDM URL for subject id  (Constants.NIDM_SUBJECTID)
        id_field=None
        for key, value in column_to_terms.items():
            if Constants.NIDM_SUBJECTID._str == column_to_terms[key]['label']:
                id_field=key
                #make sure id_field is a string for zero-padded subject ids
                #re-read data file with constraint that key field is read as string
                #df = pd.read_csv(args.csv_file,dtype={id_field : str})

        #if we couldn't find a subject ID field in column_to_terms, ask user
        if id_field is None:
            option=1
            for column in df.columns:
                print("%d: %s" %(option,column))
                option=option+1
            selection=input("Please select the subject ID field from the list above: ")
            id_field=df.columns[int(selection)-1]


        #iterate over rows and store in NIDM file
        for csv_index, csv_row in df.iterrows():
            #create a session object
            session=Session(project)

            #create and acquisition activity and entity
            acq=AssessmentAcquisition(session)
            acq_entity=AssessmentObject(acq)



            #store other data from row with columns_to_term mappings
            for row_variable,row_data in csv_row.iteritems():
                if not row_data:
                    continue
                #check if row_variable is subject id, if so skip it
                if row_variable==id_field:
                    #add qualified association with person
                    acq.add_qualified_association(person= acq.add_person(attributes=({Constants.NIDM_SUBJECTID:row_data})),role=Constants.NIDM_PARTICIPANT)

                    continue
                else:
                    #get column_to_term mapping uri and add as namespace in NIDM document
                    acq_entity.add_attributes({QualifiedName(provNamespace(Core.safe_string(None,string=str(row_variable)), column_to_terms[row_variable]["url"]),""):row_data})
                    #print(project.serializeTurtle())

        #serialize NIDM file
        with open(args.output_file,'w') as f:
            print("Writing NIDM file...")
            if args.jsonld:
                f.write(project.serializeJSONLD())
            else:
                f.write(project.serializeTurtle())
            if args.png:
                project.save_DotGraph(str(args.output_file + ".png"), format="png")
Ejemplo n.º 8
0
    10/3/17 Modified Namespace to be QualifiedName for provtoolbox support...left most of the NIDM-Results Namespaces the same
@author: Sanu Ann Abraham <*****@*****.**>
	05/04/2018 Added python ProvONE support
'''
import six
from rdflib import Namespace, Graph
from prov.model import ProvDocument, QualifiedName
from prov.model import Namespace as provNamespace
from prov.constants import PROV_ATTRIBUTE_QNAMES, PROV_ATTRIBUTE_LITERALS, \
	PROV_N_MAP

from collections import namedtuple
DD = namedtuple("DD", ["source", "variable"])

PROV = Namespace('http://www.w3.org/ns/prov#')
PROVONE = provNamespace('provone', 'http://purl.dataone.org/provone/2015/01/15/ontology#')

NIDM_URL = 'http://purl.org/nidash/nidm#'
NIDM = Namespace(NIDM_URL)

NIIRI = Namespace('http://iri.nidash.org/')
AFNI = Namespace('http://purl.org/nidash/afni#')
SPM = Namespace('http://purl.org/nidash/spm#')
FSL = Namespace('http://purl.org/nidash/fsl#')
FREESURFER = Namespace('https://surfer.nmr.mgh.harvard.edu/')
ANTS = Namespace('http://stnava.github.io/ANTs/')
RDFS = Namespace('http://www.w3.org/2000/01/rdf-schema#')
CRYPTO = Namespace('http://id.loc.gov/vocabulary/preservation/\
cryptographicHashFunctions#')
DC = Namespace('http://purl.org/dc/elements/1.1/')
DCT = Namespace('http://purl.org/dc/terms/')
Ejemplo n.º 9
0
@copyright: University of Warwick 2014
@author: David Keator <*****@*****.**>
    Added Python provtoolbox  support
    10/3/17 Modified Namespace to be QualifiedName for provtoolbox support...left most of the NIDM-Results Namespaces the same
@author: Sanu Ann Abraham <*****@*****.**>
	05/04/2018 Added python ProvONE support
'''
import six
from rdflib import Namespace, Graph
from prov.model import ProvDocument, QualifiedName
from prov.model import Namespace as provNamespace
from prov.constants import PROV_ATTRIBUTE_QNAMES, PROV_ATTRIBUTE_LITERALS, \
 PROV_N_MAP

PROV = Namespace('http://www.w3.org/ns/prov#')
PROVONE = provNamespace(
    'provone', 'http://purl.dataone.org/provone/2015/01/15/ontology#')

NIDM_URL = 'http://purl.org/nidash/nidm#'
NIDM = Namespace(NIDM_URL)

NIIRI = Namespace('http://iri.nidash.org/')
AFNI = Namespace('http://purl.org/nidash/afni#')
SPM = Namespace('http://purl.org/nidash/spm#')
FSL = Namespace('http://purl.org/nidash/fsl#')
RDFS = Namespace('http://www.w3.org/2000/01/rdf-schema#')
CRYPTO = Namespace('http://id.loc.gov/vocabulary/preservation/\
cryptographicHashFunctions#')
DC = Namespace('http://purl.org/dc/elements/1.1/')
DCT = Namespace('http://purl.org/dc/terms/')
OWL = Namespace('http://www.w3.org/2002/07/owl#')
XSD = Namespace('http://www.w3.org/2001/XMLSchema#')
Ejemplo n.º 10
0
def add_seg_data(nidmdoc, measure, header, tableinfo, json_map,   png_file=None, output_file=None, root_act=None, nidm_graph=None):
    '''
    WIP: this function creates a NIDM file of brain volume data and if user supplied a NIDM-E file it will add
    :param nidmdoc:
    :param measure:
    :param json_map:
    :param png_file:
    :param root_act:
    :param nidm_graph:
    :return:
    '''

    #read in json_map




    #dictionary to store activities for each software agent
    software_agent={}
    software_activity={}
    participant_agent={}
    entity={}

    #this function can be used for both creating a brainvolumes NIDM file from scratch or adding brain volumes to
    #existing NIDM file.  The following logic basically determines which route to take...

    #if an existing NIDM graph is passed as a parameter then add to existing file
    if nidm_graph is None:
        first_row=True
        #iterate over measure dictionary
        for measures in measure:

            #key is
            print(measures)

            #store other data from row with columns_to_term mappings
            for row_variable,row_data in csv_row.iteritems():

                #check if row_variable is subject id, if so check whether we have an agent for this participant
                if row_variable==id_field:
                    #store participant id for later use in processing the data for this row
                    participant_id = row_data
                    #if there is no agent for the participant then add one
                    if row_data not in participant_agent.keys():
                        #add an agent for this person
                        participant_agent[row_data] = nidmdoc.graph.agent(QualifiedName(provNamespace("nidm",Constants.NIDM),getUUID()),other_attributes=({Constants.NIDM_SUBJECTID:row_data}))
                    continue
                else:

                    #get source software matching this column deal with duplicate variables in source_row and pandas changing duplicate names
                    software_key = source_row.columns[[column_index(df,row_variable)]]._values[0].split(".")[0]

                    #see if we already have a software_activity for this agent
                    if software_key not in software_activity.keys():

                        #create an activity for the computation...simply a placeholder for more extensive provenance
                        software_activity[software_key] = nidmdoc.graph.activity(QualifiedName(provNamespace("nidm",Constants.NIDM),getUUID()),other_attributes={Constants.NIDM_PROJECT_DESCRIPTION:"brain volume computation"})

                        if root_act is not None:
                            #associate activity with activity of brain volumes creation (root-level activity)
                            software_activity[software_key].add_attributes({QualifiedName(provNamespace("dct",Constants.DCT),'isPartOf'):root_act})

                        #associate this activity with the participant
                        nidmdoc.graph.association(activity=software_activity[software_key],agent=participant_agent[participant_id],other_attributes={PROV_ROLE:Constants.NIDM_PARTICIPANT})
                        nidmdoc.graph.wasAssociatedWith(activity=software_activity[software_key],agent=participant_agent[participant_id])

                        #check if there's an associated software agent and if not, create one
                        if software_key not in software_agent.keys():
                            #create an agent
                            software_agent[software_key] = nidmdoc.graph.agent(QualifiedName(provNamespace("nidm",Constants.NIDM),getUUID()),other_attributes={'prov:type':QualifiedName(provNamespace(Core.safe_string(None,string=str("Neuroimaging Analysis Software")),Constants.NIDM_NEUROIMAGING_ANALYSIS_SOFTWARE),""),
                                                                    QualifiedName(provNamespace(Core.safe_string(None,string=str("Neuroimaging Analysis Software")),Constants.NIDM_NEUROIMAGING_ANALYSIS_SOFTWARE),""):software_key } )
                            #create qualified association with brain volume computation activity
                            nidmdoc.graph.association(activity=software_activity[software_key],agent=software_agent[software_key],other_attributes={PROV_ROLE:QualifiedName(provNamespace(Core.safe_string(None,string=str("Neuroimaging Analysis Software")),Constants.NIDM_NEUROIMAGING_ANALYSIS_SOFTWARE),"")})
                            nidmdoc.graph.wasAssociatedWith(activity=software_activity[software_key],agent=software_agent[software_key])

                    #check if we have an entity for storing this particular variable for this subject and software else create one
                    if software_activity[software_key].identifier.localpart + participant_agent[participant_id].identifier.localpart not in entity.keys():
                        #create an entity to store brain volume data for this participant
                        entity[software_activity[software_key].identifier.localpart + participant_agent[participant_id].identifier.localpart] = nidmdoc.graph.entity( QualifiedName(provNamespace("nidm",Constants.NIDM),getUUID()))
                        #add wasGeneratedBy association to activity
                        nidmdoc.graph.wasGeneratedBy(entity=entity[software_activity[software_key].identifier.localpart + participant_agent[participant_id].identifier.localpart], activity=software_activity[software_key])

                    #get column_to_term mapping uri and add as namespace in NIDM document
                    entity[software_activity[software_key].identifier.localpart + participant_agent[participant_id].identifier.localpart].add_attributes({QualifiedName(provNamespace(Core.safe_string(None,string=str(row_variable)), column_to_terms[row_variable.split(".")[0]]["url"]),""):row_data})
                    #print(project.serializeTurtle())


            #just for debugging.  resulting graph is too big right now for DOT graph creation so here I'm simply creating
            #a DOT graph for the processing of 1 row of the brain volumes CSV file so we can at least visually see the
            #model
            if png_file is not None:
                if first_row:
                    #serialize NIDM file
                    #with open(args.output_file,'w') as f:
                    #   print("Writing NIDM file...")
                    #   f.write(nidmdoc.serializeTurtle())
                    if png_file:
                        nidmdoc.save_DotGraph(str(output_file + ".pdf"), format="pdf")
                    first_row=False
Ejemplo n.º 11
0
NIDM_COORDINATE = NIDM['NIDM_0000015']
NIDM_LEGENDRE_POLYNOMIAL_ORDER = NIDM['NIDM_0000014']
NIDM_CONTRAST_STANDARD_ERROR_MAP = NIDM['NIDM_0000013']
NIDM_CONNECTIVITY_CRITERION = NIDM['NIDM_0000012']
NIDM_CONJUNCTION_INFERENCE = NIDM['NIDM_0000011']
NIDM_HAS_FMRI_DESIGN = NIDM['NIDM_0000010']
NIDM_COLIN27_COORDINATE_SYSTEM = NIDM['NIDM_0000009']
NIDM_CLUSTER_LABELS_MAP = NIDM['NIDM_0000008']
NIDM_CLUSTER_DEFINITION_CRITERIA = NIDM['NIDM_0000007']
NIDM_CLUSTER = NIDM['NIDM_0000006']
NIDM_BINOMIAL_DISTRIBUTION = NIDM['NIDM_0000005']
NIDM_BINARY_MAP = NIDM['NIDM_0000004']
NIDM_CONTRAST_ESTIMATION = NIDM['NIDM_0000001']
NIDM_CONTRAST_MAP = NIDM['NIDM_0000002']
#NIDM-Experiment##############################################################
NIDM_PROJECT = QualifiedName(provNamespace("nidm", NIDM), 'Project')
NIDM_PROJECT_TYPE = QualifiedName(provNamespace("dctypes", DCTYPES),"Dataset")
NIDM_PROJECT_IDENTIFIER = QualifiedName(provNamespace("sio", SIO),"Identifier")
NIDM_PROJECT_NAME = QualifiedName(provNamespace("dctypes", DCTYPES),"title")
NIDM_PROJECT_DESCRIPTION = QualifiedName(provNamespace("dct", DCT),"description")
NIDM_PROJECT_LICENSE = QualifiedName(provNamespace("dct", DCT),"license")
NIDM_PROJECT_URL = QualifiedName(provNamespace("sio", SIO),"URL")
NIDM_PROJECT_REFERENCES = QualifiedName(provNamespace("dcat", DCAT),"creator")
NIDM_SESSION = QualifiedName(provNamespace("nidm", NIDM), 'Session')
NIDM_ACQUISITION_ACTIVITY = QualifiedName(provNamespace("nidm", NIDM), "AcquisitionActivity")
NIDM_ACQUISITION_ENTITY = QualifiedName(provNamespace("nidm", NIDM), "AcquisitionEntity")
NIDM_MRACQUISITION_ENTITY = QualifiedName(provNamespace("nidm", NIDM), "MRAcquistionEntity")
NIDM_DEMOGRAPHICS_ENTITY = QualifiedName(provNamespace("nidm", NIDM), "DemographicsAcquistionEntity")
NIDM_ASSESSMENT_ENTITY = QualifiedName(provNamespace("nidm", NIDM), "AssessmentAcquistionEntity")
#files
NIDM_FILENAME = QualifiedName(provNamespace("nfo", NFO), "filename")
Ejemplo n.º 12
0
def add_seg_data(nidmdoc,
                 measure,
                 header,
                 json_map,
                 png_file=None,
                 output_file=None,
                 root_act=None,
                 nidm_graph=None):
    '''
    WIP: this function creates a NIDM file of brain volume data and if user supplied a NIDM-E file it will add brain volumes to the
    NIDM-E file for the matching subject ID
    :param nidmdoc:
    :param measure:
    :param header:
    :param json_map:
    :param png_file:
    :param root_act:
    :param nidm_graph:
    :return:
    '''

    niiri = prov.Namespace("niiri", "http://iri.nidash.org/")
    #this function can be used for both creating a brainvolumes NIDM file from scratch or adding brain volumes to
    #existing NIDM file.  The following logic basically determines which route to take...

    #if an existing NIDM graph is passed as a parameter then add to existing file
    if nidm_graph is None:
        first_row = True

        #for each of the header items create a dictionary where namespaces are freesurfer
        #software_activity = nidmdoc.graph.activity(QualifiedName(provNamespace("niiri",Constants.NIIRI),getUUID()),other_attributes={Constants.NIDM_PROJECT_DESCRIPTION:"Freesurfer segmentation statistics"})
        software_activity = nidmdoc.graph.activity(
            niiri[getUUID()],
            other_attributes={
                Constants.NIDM_PROJECT_DESCRIPTION:
                "Freesurfer segmentation statistics"
            })
        for key, value in header.items():
            software_activity.add_attributes({
                QualifiedName(provNamespace("fs", Constants.FREESURFER), key):
                value
            })

        #create software agent and associate with software activity
        #software_agent = nidmdoc.graph.agent(QualifiedName(provNamespace("niiri",Constants.NIIRI),getUUID()),other_attributes={
        software_agent = nidmdoc.graph.agent(
            niiri[getUUID()],
            other_attributes={
                QualifiedName(
                    provNamespace(
                        "Neuroimaging_Analysis_Software", Constants.NIDM_NEUROIMAGING_ANALYSIS_SOFTWARE), ""):
                Constants.FREESURFER,
                prov.PROV_TYPE:
                prov.PROV["SoftwareAgent"]
            })
        #create qualified association with brain volume computation activity
        nidmdoc.graph.association(
            activity=software_activity,
            agent=software_agent,
            other_attributes={
                PROV_ROLE: Constants.NIDM_NEUROIMAGING_ANALYSIS_SOFTWARE
            })
        nidmdoc.graph.wasAssociatedWith(activity=software_activity,
                                        agent=software_agent)

        #print(nidmdoc.serializeTurtle())

        with open('measure.json', 'w') as fp:
            json.dump(measure, fp)

        with open('json_map.json', 'w') as fp:
            json.dump(json_map, fp)

        #datum_entity=nidmdoc.graph.entity(QualifiedName(provNamespace("niiri",Constants.NIIRI),getUUID()),other_attributes={
        datum_entity = nidmdoc.graph.entity(
            niiri[getUUID()],
            other_attributes={
                prov.PROV_TYPE:
                QualifiedName(
                    provNamespace("nidm", "http://purl.org/nidash/nidm#"),
                    "FSStatsCollection")
            })
        nidmdoc.graph.wasGeneratedBy(software_activity, datum_entity)

        #iterate over measure dictionary where measures are the lines in the FS stats files which start with '# Measure' and
        #the whole table at the bottom of the FS stats file that starts with '# ColHeaders
        for measures in measure:

            #check if we have a CDE mapping for the anatomical structure referenced in the FS stats file
            if measures["structure"] in json_map['Anatomy']:

                #for the various fields in the FS stats file row starting with '# Measure'...
                for items in measures["items"]:
                    # if the
                    if items['name'] in json_map['Measures'].keys():

                        if not json_map['Anatomy'][
                                measures["structure"]]['label']:
                            continue
                        #region_entity=nidmdoc.graph.entity(QualifiedName(provNamespace("niiri",Constants.NIIRI),getUUID()),other_attributes={prov.PROV_TYPE:
                        region_entity = nidmdoc.graph.entity(
                            niiri[getUUID()],
                            other_attributes={
                                prov.PROV_TYPE:
                                QualifiedName(
                                    provNamespace(
                                        "measurement_datum",
                                        "http://uri.interlex.org/base/ilx_0738269#"
                                    ), "")
                            })

                        #construct the custom CDEs to describe measurements of the various brain regions
                        region_entity.add_attributes({
                            QualifiedName(
                                provNamespace(
                                    "isAbout", "http://uri.interlex.org/ilx_0381385#"), ""):
                            json_map['Anatomy'][
                                measures["structure"]]['isAbout'],
                            QualifiedName(
                                provNamespace(
                                    "hasLaterality", "http://uri.interlex.org/ilx_0381387#"), ""):
                            json_map['Anatomy'][
                                measures["structure"]]['hasLaterality'],
                            Constants.NIDM_PROJECT_DESCRIPTION:
                            json_map['Anatomy'][measures["structure"]]
                            ['definition'],
                            QualifiedName(
                                provNamespace(
                                    "isMeasureOf", "http://uri.interlex.org/ilx_0381389#"), ""):
                            QualifiedName(
                                provNamespace(
                                    "GrayMatter",
                                    "http://uri.interlex.org/ilx_0104768#"),
                                ""),
                            QualifiedName(
                                provNamespace(
                                    "rdfs", "http://www.w3.org/2000/01/rdf-schema#"), "label"):
                            json_map['Anatomy'][measures["structure"]]['label']
                        })

                        #QualifiedName(provNamespace("hasUnit","http://uri.interlex.org/ilx_0381384#"),""):json_map['Anatomy'][measures["structure"]]['units'],
                        #print("%s:%s" %(key,value))

                        region_entity.add_attributes({
                            QualifiedName(
                                provNamespace(
                                    "hasMeasurementType", "http://uri.interlex.org/ilx_0381388#"), ""):
                            json_map['Measures'][items['name']]["measureOf"],
                            QualifiedName(
                                provNamespace(
                                    "hasDatumType", "http://uri.interlex.org/ilx_0738262#"), ""):
                            json_map['Measures'][items['name']]["datumType"]
                        })

                        datum_entity.add_attributes(
                            {region_entity.identifier: items['value']})