def test_GetParticipantIDs(): kwargs = { Constants.NIDM_PROJECT_NAME: "FBIRN_PhaseII", Constants.NIDM_PROJECT_IDENTIFIER: 9610, Constants.NIDM_PROJECT_DESCRIPTION: "Test investigation" } project = Project(uuid="_123456", attributes=kwargs) session = Session(uuid="_13579", project=project) acq = Acquisition(uuid="_15793", session=session) acq2 = Acquisition(uuid="_15795", session=session) person = acq.add_person(attributes=({Constants.NIDM_SUBJECTID: "9999"})) acq.add_qualified_association(person=person, role=Constants.NIDM_PARTICIPANT) person2 = acq2.add_person(attributes=({Constants.NIDM_SUBJECTID: "8888"})) acq2.add_qualified_association(person=person2, role=Constants.NIDM_PARTICIPANT) #save a turtle file with open("test.ttl", 'w') as f: f.write(project.serializeTurtle()) participant_list = Query.GetParticipantIDs(["test.ttl"]) remove("test.ttl") assert (participant_list['ID'].str.contains('9999').any()) assert (participant_list['ID'].str.contains('8888').any())
def saveTestFile(file_name, data): project = Project(uuid="_123_" + file_name, attributes=data) # save a turtle file with open(file_name, 'w') as f: f.write(project.serializeTurtle()) return "nidm:_123_{}".format(file_name)
def test_1(tmpdir): tmpdir.chdir() project = Project() #save a turtle file with open("test.ttl", 'w') as f: f.write(project.serializeTurtle())
def test_2(tmpdir): tmpdir.chdir() kwargs = { Constants.NIDM_PROJECT_NAME: "FBIRN_PhaseII", Constants.NIDM_PROJECT_IDENTIFIER: 9610, Constants.NIDM_PROJECT_DESCRIPTION: "Test investigation" } project = Project(attributes=kwargs) with open("test.ttl", 'w') as f: f.write(project.serializeTurtle())
def test_GetProjects(): kwargs={Constants.NIDM_PROJECT_NAME:"FBIRN_PhaseII",Constants.NIDM_PROJECT_IDENTIFIER:9610,Constants.NIDM_PROJECT_DESCRIPTION:"Test investigation"} project = Project(uuid="_123456",attributes=kwargs) #save a turtle file with open("test.ttl",'w') as f: f.write(project.serializeTurtle()) project_list = Query.GetProjectsUUID(["test.ttl"]) assert URIRef(Constants.NIDM + "_123456") in project_list
def test_sessions_3(tmpdir): tmpdir.chdir() project1 = Project() project2 = Project() session1 = Session(project1) session2 = Session(project2) project1.add_sessions(session1) project1.add_sessions(session2) assert len(project1.sessions) == 2 assert session2.label == project1.sessions[1].label assert session1.label == project1.sessions[0].label
def test_project_trig_serialization(): outfile = StringIO() kwargs = { Constants.NIDM_PROJECT_NAME: "FBIRN_PhaseII", Constants.NIDM_PROJECT_IDENTIFIER: 9610, Constants.NIDM_PROJECT_DESCRIPTION: "Test investigation" } project = Project(uuid="_123456", attributes=kwargs) #save as trig file with graph identifier Constants.NIDM_Project test = project.serializeTrig(identifier=Constants.NIIRI["_996"]) outfile.write(test) outfile.seek(0)
def test_project_emptygraph(): # creating project without parameters proj = Project(empty_graph=True) # checking if we created ProvDocument assert type(proj.bundle) is prov.model.ProvDocument # checking graph namespace namesp = [i.prefix for i in proj.graph.namespaces] assert namesp == ["nidm"] # checking type proj_type = proj.get_type() assert eval(proj_type.provn_representation()) == 'prov:Activity' assert len(proj.graph.get_records()) == 1
def test_sessions_2(tmpdir): tmpdir.chdir() project = Project() assert project.sessions == [] session1 = Session(project) assert project.sessions[0].label == session1.label
def test_jsonld_exports(): kwargs = { Constants.NIDM_PROJECT_NAME: "FBIRN_PhaseII", Constants.NIDM_PROJECT_IDENTIFIER: 9610, Constants.NIDM_PROJECT_DESCRIPTION: "Test investigation" } project = Project(uuid="_123456", attributes=kwargs) #save a turtle file with open("test.json", 'w') as f: f.write(project.serializeJSONLD()) #load in JSON file with open("test.json") as json_file: data = json.load(json_file) assert (data["Identifier"]['@value'] == "9610")
def test_project_noparameters(): # creating project without parameters proj = Project() # checking if we created ProvDocument assert type(proj.bundle) is Constants.NIDMDocument assert issubclass(type(proj.bundle), prov.model.ProvDocument) # checking graph namespace const_l = list(Constants.namespaces) namesp = [i.prefix for i in proj.graph.namespaces] assert sorted(const_l) == sorted(namesp) # checking type proj_type = proj.get_type() assert eval(proj_type.provn_representation()) == 'prov:Activity' # checking length of graph records; it doesn work if all tests are run assert len(proj.graph.get_records()) == 1
def test_GetProjectInstruments(): kwargs = { Constants.NIDM_PROJECT_NAME: "FBIRN_PhaseII", Constants.NIDM_PROJECT_IDENTIFIER: 9610, Constants.NIDM_PROJECT_DESCRIPTION: "Test investigation" } proj_uuid = "_123456gpi" project = Project(uuid=proj_uuid, attributes=kwargs) session = Session(project) acq = AssessmentAcquisition(session) kwargs = { pm.PROV_TYPE: pm.QualifiedName(pm.Namespace("nidm", Constants.NIDM), "NorthAmericanAdultReadingTest") } acq_obj = AssessmentObject(acq, attributes=kwargs) acq2 = AssessmentAcquisition(session) kwargs = { pm.PROV_TYPE: pm.QualifiedName(pm.Namespace("nidm", Constants.NIDM), "PositiveAndNegativeSyndromeScale") } acq_obj2 = AssessmentObject(acq2, attributes=kwargs) #save a turtle file with open("test_gpi.ttl", 'w') as f: f.write(project.serializeTurtle()) assessment_list = Query.GetProjectInstruments(["test_gpi.ttl"], proj_uuid) remove("test_gpi.ttl") assert Constants.NIDM + "NorthAmericanAdultReadingTest" in [ str(x) for x in assessment_list['assessment_type'].to_list() ] assert Constants.NIDM + "PositiveAndNegativeSyndromeScale" in [ str(x) for x in assessment_list['assessment_type'].to_list() ]
def test_uri_project_id(): kwargs = { Constants.NIDM_PROJECT_NAME: "FBIRN_PhaseII", Constants.NIDM_PROJECT_IDENTIFIER: 9610, Constants.NIDM_PROJECT_DESCRIPTION: "1234356 Test investigation" } project = Project(uuid="_123456", attributes=kwargs) #save a turtle file with open("uri2test.ttl", 'w') as f: f.write(project.serializeTurtle()) kwargs = { Constants.NIDM_PROJECT_NAME: "FBIRN_PhaseIII", Constants.NIDM_PROJECT_IDENTIFIER: 1200, Constants.NIDM_PROJECT_DESCRIPTION: "Test investigation2" } project = Project(uuid="_654321", attributes=kwargs) #save a turtle file with open("uri2test2.ttl", 'w') as f: f.write(project.serializeTurtle()) result = restParser(['uri2test.ttl', 'uri2test2.ttl'], '/projects/nidm:_123456') pp = pprint.PrettyPrinter() pp.pprint(result) assert type(result) == dict assert result["dct:description"] == "1234356 Test investigation"
def test_uri_project_list(): import uuid kwargs={Constants.NIDM_PROJECT_NAME:"FBIRN_PhaseII",Constants.NIDM_PROJECT_IDENTIFIER:9610,Constants.NIDM_PROJECT_DESCRIPTION:"Test investigation"} proj1_uuid = str(uuid.uuid1()) proj2_uuid = str(uuid.uuid1()) project = Project(uuid=proj1_uuid,attributes=kwargs) #save a turtle file with open("uritest.ttl",'w') as f: f.write(project.serializeTurtle()) kwargs={Constants.NIDM_PROJECT_NAME:"FBIRN_PhaseIII",Constants.NIDM_PROJECT_IDENTIFIER:1200,Constants.NIDM_PROJECT_DESCRIPTION:"Test investigation2"} project = Project(uuid=proj2_uuid,attributes=kwargs) #save a turtle file with open("uritest2.ttl",'w') as f: f.write(project.serializeTurtle()) restParser = RestParser() result = restParser.run(['uritest.ttl', 'uritest2.ttl'], '/projects') project_uuids = [] for uuid in result: project_uuids.append(uuid) assert type(result) == list assert len(project_uuids) >= 2 assert proj1_uuid in project_uuids assert proj2_uuid in project_uuids os.remove("uritest.ttl") os.remove("uritest2.ttl")
def test_GetProjectMetadata(): kwargs={Constants.NIDM_PROJECT_NAME:"FBIRN_PhaseII",Constants.NIDM_PROJECT_IDENTIFIER:9610,Constants.NIDM_PROJECT_DESCRIPTION:"Test investigation"} project = Project(uuid="_123456",attributes=kwargs) session = Session(project) acq = AssessmentAcquisition(session) kwargs={Constants.NIDM_HANDEDNESS:"Left", Constants.NIDM_AGE:"90"} acq_obj = AssessmentObject(acq,kwargs) #save a turtle file with open("test.ttl",'w') as f: f.write(project.serializeTurtle()) kwargs={Constants.NIDM_PROJECT_NAME:"FBIRN_PhaseIII",Constants.NIDM_PROJECT_IDENTIFIER:1200,Constants.NIDM_PROJECT_DESCRIPTION:"Test investigation"} project = Project(uuid="_654321",attributes=kwargs) session = Session(project) acq = AssessmentAcquisition(session) kwargs={Constants.NIDM_HANDEDNESS:"Right", Constants.NIDM_AGE:"75"} acq_obj = AssessmentObject(acq,kwargs) #save a turtle file with open("test2.ttl",'w') as f: f.write(project.serializeTurtle()) test = Query.GetProjectMetadata(["test.ttl", "test2.ttl"])
def test_project_att(): # creating project without parameters proj = Project( attributes={ prov.model.QualifiedName(Constants.NIDM, "title"): "MyPRoject" }) # checking if we created ProvDocument assert type(proj.bundle) is Constants.NIDMDocument assert issubclass(type(proj.bundle), prov.model.ProvDocument) # checking graph namespace const_l = list(Constants.namespaces) namesp = [i.prefix for i in proj.graph.namespaces] assert sorted(const_l + [rdflib.term.URIRef('http://purl.org/nidash/nidm#prefix')] ) == sorted(namesp) # checking type proj_type = proj.get_type() assert eval(proj_type.provn_representation()) == 'prov:Activity' # checking length of graph records; it doesn work if all tests are run assert len(proj.graph.get_records()) == 1
def test_sessions_1(tmpdir): tmpdir.chdir() project = Project() assert project.sessions == [] session1 = Session(project) project.add_sessions(session1) assert session1.label == project.sessions[0].label session2 = Session(project) project.add_sessions(session2) assert len(project.sessions) == 2 assert session2.label == project.sessions[1].label
def makeProjectTestFile2(filename): DCTYPES = Namespace("http://purl.org/dc/dcmitype/") kwargs = { Constants.NIDM_PROJECT_NAME: "TEST B", # this is the "title" Constants.NIDM_PROJECT_IDENTIFIER: 1234, Constants.NIDM_PROJECT_DESCRIPTION: "More Scans", Constants.NIDM_FILENAME: "testfile2.ttl", Constants.NIDM_PROJECT_LICENSE: "Creative Commons", Constants.NIDM_PROJECT_SOURCE: "Other", Constants.NIDM_HAD_NUMERICAL_VALUE: "numval???", Constants.NIDM_BATH_SOLUTION: "bath", Constants.NIDM_CELL_TYPE: "ctype", Constants.NIDM_CHANNEL_NUMBER: "5", Constants.NIDM_ELECTRODE_IMPEDANCE: ".01", Constants.NIDM_GROUP_LABEL: "group 123", Constants.NIDM_HOLLOW_ELECTRODE_SOLUTION: "water", Constants.NIDM_HAD_IMAGE_CONTRACT_TYPE: "off", Constants.NIDM_HAD_IMAGE_USAGE_TYPE: "abcd", Constants.NIDM_NUBMER_OF_CHANNELS: "11", Constants.NIDM_APPLIED_FILTER: "on", Constants.NIDM_SOLUTION_FLOW_SPEED: "2.8", Constants.NIDM_RECORDING_LOCATION: "lab" } project = Project(uuid="_123_" + filename, attributes=kwargs) s1 = Session(project) a1 = AssessmentAcquisition(session=s1) # = s1.add_acquisition("a1", attributes={"http://ncicb.nci.nih.gov/xml/owl/EVS/Thesaurus.owl#Age" : 22}) p1 = a1.add_person("p1", attributes={ Constants.NIDM_GIVEN_NAME: "George", Constants.NIDM_AGE: 22 }) a1.add_qualified_association(person=p1, role=Constants.NIDM_PARTICIPANT) return saveProject(filename, project)
def test_uri_project_id(): kwargs={Constants.NIDM_PROJECT_NAME:"FBIRN_PhaseII",Constants.NIDM_PROJECT_IDENTIFIER:9610,Constants.NIDM_PROJECT_DESCRIPTION:"1234356 Test investigation"} project = Project(uuid="_123456",attributes=kwargs) #save a turtle file with open("uri2test.ttl",'w') as f: f.write(project.serializeTurtle()) kwargs={Constants.NIDM_PROJECT_NAME:"FBIRN_PhaseIII",Constants.NIDM_PROJECT_IDENTIFIER:1200,Constants.NIDM_PROJECT_DESCRIPTION:"Test investigation2"} project = Project(uuid="_654321",attributes=kwargs) #save a turtle file with open("uri2test2.ttl",'w') as f: f.write(project.serializeTurtle()) result = restParser(['uri2test.ttl', 'uri2test2.ttl'], '/projects/{}_123456'.format(Query.matchPrefix(Constants.NIIRI)) ) assert type(result) == dict assert result["dct:description"] == "1234356 Test investigation" os.remove("uri2test.ttl") os.remove("uri2test2.ttl")
def test_uri_project(): kwargs = { Constants.NIDM_PROJECT_NAME: "FBIRN_PhaseII", Constants.NIDM_PROJECT_IDENTIFIER: 9610, Constants.NIDM_PROJECT_DESCRIPTION: "Test investigation" } project = Project(uuid="_123456", attributes=kwargs) #save a turtle file with open("uritest.ttl", 'w') as f: f.write(project.serializeTurtle()) kwargs = { Constants.NIDM_PROJECT_NAME: "FBIRN_PhaseIII", Constants.NIDM_PROJECT_IDENTIFIER: 1200, Constants.NIDM_PROJECT_DESCRIPTION: "Test investigation2" } project = Project(uuid="_654321", attributes=kwargs) #save a turtle file with open("uritest2.ttl", 'w') as f: f.write(project.serializeTurtle()) result = restParser(['uritest.ttl', 'uritest2.ttl'], '/projects') print(result) project_uuids = [] for uuid in result: project_uuids.append(uuid) assert type(result) == list assert len(project_uuids) == 2 assert str(Constants.NIDM_URL) + "_123456" in project_uuids assert str(Constants.NIDM_URL) + "_654321" in project_uuids os.remove("uritest.ttl") os.remove("uritest2.ttl")
def test_GetProjectMetadata(): kwargs = { Constants.NIDM_PROJECT_NAME: "FBIRN_PhaseII", Constants.NIDM_PROJECT_IDENTIFIER: 9610, Constants.NIDM_PROJECT_DESCRIPTION: "Test investigation" } project = Project(uuid="_123456", attributes=kwargs) #save a turtle file with open("test.ttl", 'w') as f: f.write(project.serializeTurtle()) kwargs = { Constants.NIDM_PROJECT_NAME: "FBIRN_PhaseIII", Constants.NIDM_PROJECT_IDENTIFIER: 1200, Constants.NIDM_PROJECT_DESCRIPTION: "Test investigation2" } project = Project(uuid="_654321", attributes=kwargs) #save a turtle file with open("test2.ttl", 'w') as f: f.write(project.serializeTurtle())
def test_GetProjectMetadata(): kwargs = { Constants.NIDM_PROJECT_NAME: "FBIRN_PhaseII", Constants.NIDM_PROJECT_IDENTIFIER: 9610, Constants.NIDM_PROJECT_DESCRIPTION: "Test investigation" } project = Project(uuid="_123456", attributes=kwargs) #save a turtle file with open("test_gpm.ttl", 'w') as f: f.write(project.serializeTurtle()) kwargs = { Constants.NIDM_PROJECT_NAME: "FBIRN_PhaseIII", Constants.NIDM_PROJECT_IDENTIFIER: 1200, Constants.NIDM_PROJECT_DESCRIPTION: "Test investigation2" } project = Project(uuid="_654321", attributes=kwargs) #save a turtle file with open("test2_gpm.ttl", 'w') as f: f.write(project.serializeTurtle()) #WIP test = Query.GetProjectMetadata(["test.ttl", "test2.ttl"]) #assert URIRef(Constants.NIDM + "_654321") in test #assert URIRef(Constants.NIDM + "_123456") in test #assert URIRef(Constants.NIDM_PROJECT_IDENTIFIER + "1200") in test #assert URIRef(Constants.NIDM_PROJECT_IDENTIFIER + "9610") in test #assert URIRef((Constants.NIDM_PROJECT_NAME + "FBIRN_PhaseII")) in test #assert URIRef((Constants.NIDM_PROJECT_NAME + "FBIRN_PhaseIII")) in test #assert URIRef((Constants.NIDM_PROJECT_DESCRIPTION + "Test investigation")) in test #assert URIRef((Constants.NIDM_PROJECT_DESCRIPTION + "Test investigation2")) in test remove("test_gpm.ttl") remove("test2_gpm.ttl")
def main(argv): parser = ArgumentParser(description='This program will load in a CSV file and iterate over the header \ variable names performing an elastic search of https://scicrunch.org/ for NIDM-ReproNim \ tagged terms that fuzzy match the variable names. The user will then interactively pick \ a term to associate with the variable name. The resulting annotated CSV data will \ then be written to a NIDM data file. Note, you must obtain an API key to Interlex by signing up \ for an account at scicrunch.org then going to My Account and API Keys. Then set the environment \ variable INTERLEX_API_KEY with your key.') parser.add_argument('-csv', dest='csv_file', required=True, help="Full path to CSV file to convert") # parser.add_argument('-ilxkey', dest='key', required=True, help="Interlex/SciCrunch API key to use for query") parser.add_argument('-json_map', dest='json_map',required=False,help="Full path to user-suppled JSON file containing variable-term mappings.") parser.add_argument('-nidm', dest='nidm_file', required=False, help="Optional full path of NIDM file to add CSV->NIDM converted graph to") parser.add_argument('-no_concepts', action='store_true', required=False, help='If this flag is set then no concept associations will be' 'asked of the user. This is useful if you already have a -json_map specified without concepts and want to' 'simply run this program to get a NIDM file with user interaction to associate concepts.') # parser.add_argument('-owl', action='store_true', required=False, help='Optionally searches NIDM OWL files...internet connection required') # parser.add_argument('-png', action='store_true', required=False, help='Optional flag, when set a PNG image file of RDF graph will be produced') # parser.add_argument('-jsonld', action='store_true', required=False, help='Optional flag, when set NIDM files are saved as JSON-LD instead of TURTLE') parser.add_argument('-log','--log', dest='logfile',required=False, default=None, help="full path to directory to save log file. Log file name is csv2nidm_[arg.csv_file].log") parser.add_argument('-out', dest='output_file', required=True, help="Full path with filename to save NIDM file") args = parser.parse_args() #open CSV file and load into df = pd.read_csv(args.csv_file) #temp = csv.reader(args.csv_file) #df = pd.DataFrame(temp) #maps variables in CSV file to terms #if args.owl is not False: # column_to_terms = map_variables_to_terms(df=df, apikey=args.key, directory=dirname(args.output_file), output_file=args.output_file, json_file=args.json_map, owl_file=args.owl) #else: # if user did not specify -no_concepts then associate concepts interactively with user if not args.no_concepts: column_to_terms, cde = map_variables_to_terms(df=df, assessment_name=basename(args.csv_file),directory=dirname(args.output_file), output_file=args.output_file, json_file=args.json_map) # run without concept mappings else: column_to_terms, cde = map_variables_to_terms(df=df, assessment_name=basename(args.csv_file), directory=dirname(args.output_file), output_file=args.output_file, json_file=args.json_map, associate_concepts=False) if args.logfile is not None: logging.basicConfig(filename=join(args.logfile,'csv2nidm_' + os.path.splitext(os.path.basename(args.csv_file))[0] + '.log'), level=logging.DEBUG) # add some logging info logging.info("csv2nidm %s" %args) #If user has added an existing NIDM file as a command line parameter then add to existing file for subjects who exist in the NIDM file if args.nidm_file: print("Adding to NIDM file...") #read in NIDM file project = read_nidm(args.nidm_file) #get list of session objects session_objs=project.get_sessions() #look at column_to_terms dictionary for NIDM URL for subject id (Constants.NIDM_SUBJECTID) id_field=None for key, value in column_to_terms.items(): if Constants.NIDM_SUBJECTID._str == column_to_terms[key]['label']: key_tuple = eval(key) #id_field=key id_field = key_tuple.variable #make sure id_field is a string for zero-padded subject ids #re-read data file with constraint that key field is read as string df = pd.read_csv(args.csv_file,dtype={id_field : str}) break #if we couldn't find a subject ID field in column_to_terms, ask user if id_field is None: option=1 for column in df.columns: print("%d: %s" %(option,column)) option=option+1 selection=input("Please select the subject ID field from the list above: ") # Make sure user selected one of the options. If not present user with selection input again while (not selection.isdigit()) or (int(selection) > int(option)): # Wait for user input selection = input("Please select the subject ID field from the list above: \t" % option) id_field=df.columns[int(selection)-1] #make sure id_field is a string for zero-padded subject ids #re-read data file with constraint that key field is read as string df = pd.read_csv(args.csv_file,dtype={id_field : str}) #use RDFLib here for temporary graph making query easier rdf_graph = Graph() rdf_graph.parse(source=StringIO(project.serializeTurtle()),format='turtle') print("Querying for existing participants in NIDM graph....") #find subject ids and sessions in NIDM document query = """SELECT DISTINCT ?session ?nidm_subj_id ?agent WHERE { ?activity prov:wasAssociatedWith ?agent ; dct:isPartOf ?session . ?agent rdf:type prov:Agent ; ndar:src_subject_id ?nidm_subj_id . }""" #print(query) qres = rdf_graph.query(query) for row in qres: logging.info("found existing participant %s \t %s" %(row[0],row[1])) #find row in CSV file with subject id matching agent from NIDM file #csv_row = df.loc[df[id_field]==type(df[id_field][0])(row[1])] #find row in CSV file with matching subject id to the agent in the NIDM file #be carefull about data types...simply type-change dataframe subject id column and query to strings. #here we're removing the leading 0's from IDs because pandas.read_csv strips those unless you know ahead of #time which column is the subject id.... csv_row = df.loc[df[id_field].astype('str').str.contains(str(row[1]).lstrip("0"))] #if there was data about this subject in the NIDM file already (i.e. an agent already exists with this subject id) #then add this CSV assessment data to NIDM file, else skip it.... if (not (len(csv_row.index)==0)): #NIDM document sesssion uuid session_uuid = row[0] #temporary list of string-based URIs of session objects from API temp = [o.identifier._uri for o in session_objs] #get session object from existing NIDM file that is associated with a specific subject id #nidm_session = (i for i,x in enumerate([o.identifier._uri for o in session_objs]) if x == str(session_uuid)) nidm_session = session_objs[temp.index(str(session_uuid))] #for nidm_session in session_objs: # if nidm_session.identifier._uri == str(session_uuid): #add an assessment acquisition for the phenotype data to session and associate with agent acq=AssessmentAcquisition(session=nidm_session) #add acquisition entity for assessment acq_entity = AssessmentObject(acquisition=acq) #add qualified association with existing agent acq.add_qualified_association(person=row[2],role=Constants.NIDM_PARTICIPANT) # add git-annex info if exists num_sources = addGitAnnexSources(obj=acq_entity,filepath=args.csv_file,bids_root=dirname(args.csv_file)) # if there aren't any git annex sources then just store the local directory information if num_sources == 0: # WIP: add absolute location of BIDS directory on disk for later finding of files acq_entity.add_attributes({Constants.PROV['Location']:"file:/" + args.csv_file}) # store file to acq_entity acq_entity.add_attributes({Constants.NIDM_FILENAME:basename(args.csv_file)}) #store other data from row with columns_to_term mappings for row_variable in csv_row: #check if row_variable is subject id, if so skip it if row_variable==id_field: continue else: if not csv_row[row_variable].values[0]: continue add_attributes_with_cde(acq_entity, cde, row_variable, csv_row[row_variable].values[0]) continue print ("Adding CDEs to graph....") # convert to rdflib Graph and add CDEs rdf_graph = Graph() rdf_graph.parse(source=StringIO(project.serializeTurtle()),format='turtle') rdf_graph = rdf_graph + cde print("Backing up original NIDM file...") copy2(src=args.nidm_file,dst=args.nidm_file+".bak") print("Writing NIDM file....") rdf_graph.serialize(destination=args.nidm_file,format='turtle') else: print("Creating NIDM file...") #If user did not choose to add this data to an existing NIDM file then create a new one for the CSV data #create empty project project=Project() #simply add name of file to project since we don't know anything about it project.add_attributes({Constants.NIDM_FILENAME:args.csv_file}) #look at column_to_terms dictionary for NIDM URL for subject id (Constants.NIDM_SUBJECTID) id_field=None for key, value in column_to_terms.items(): # using skos:sameAs relationship to associate subject identifier variable from csv with a known term # for subject IDs if 'sameAs' in column_to_terms[key]: if Constants.NIDM_SUBJECTID.uri == column_to_terms[key]['sameAs']: key_tuple = eval(key) id_field=key_tuple.variable #make sure id_field is a string for zero-padded subject ids #re-read data file with constraint that key field is read as string df = pd.read_csv(args.csv_file,dtype={id_field : str}) break #if we couldn't find a subject ID field in column_to_terms, ask user if id_field is None: option=1 for column in df.columns: print("%d: %s" %(option,column)) option=option+1 selection=input("Please select the subject ID field from the list above: ") # Make sure user selected one of the options. If not present user with selection input again while (not selection.isdigit()) or (int(selection) > int(option)): # Wait for user input selection = input("Please select the subject ID field from the list above: \t" % option) id_field=df.columns[int(selection)-1] #make sure id_field is a string for zero-padded subject ids #re-read data file with constraint that key field is read as string df = pd.read_csv(args.csv_file,dtype={id_field : str}) #iterate over rows and store in NIDM file for csv_index, csv_row in df.iterrows(): #create a session object session=Session(project) #create and acquisition activity and entity acq=AssessmentAcquisition(session) acq_entity=AssessmentObject(acq) #create prov:Agent for subject #acq.add_person(attributes=({Constants.NIDM_SUBJECTID:row['participant_id']})) # add git-annex info if exists num_sources = addGitAnnexSources(obj=acq_entity,filepath=args.csv_file,bids_root=os.path.dirname(args.csv_file)) # if there aren't any git annex sources then just store the local directory information if num_sources == 0: # WIP: add absolute location of BIDS directory on disk for later finding of files acq_entity.add_attributes({Constants.PROV['Location']:"file:/" + args.csv_file}) # store file to acq_entity acq_entity.add_attributes({Constants.NIDM_FILENAME : basename(args.csv_file)}) #store other data from row with columns_to_term mappings for row_variable,row_data in csv_row.iteritems(): if not row_data: continue #check if row_variable is subject id, if so skip it if row_variable==id_field: ### WIP: Check if agent already exists with the same ID. If so, use it else create a new agent #add qualified association with person acq.add_qualified_association(person= acq.add_person(attributes=({Constants.NIDM_SUBJECTID:str(row_data)})),role=Constants.NIDM_PARTICIPANT) continue else: add_attributes_with_cde(acq_entity, cde, row_variable, row_data) #print(project.serializeTurtle()) # convert to rdflib Graph and add CDEs rdf_graph = Graph() rdf_graph.parse(source=StringIO(project.serializeTurtle()),format='turtle') rdf_graph = rdf_graph + cde print("Writing NIDM file....") rdf_graph.serialize(destination=args.output_file,format='turtle')
def main(argv): parser = ArgumentParser( description= 'This program will convert a BIDS MRI dataset to a NIDM-Experiment \ RDF document. It will parse phenotype information and simply store variables/values \ and link to the associated json data dictionary file.') parser.add_argument('-d', dest='directory', required=True, help="Path to BIDS dataset directory") parser.add_argument('-o', dest='outputfile', default="nidm.ttl", help="NIDM output turtle file") args = parser.parse_args() directory = args.directory outputfile = args.outputfile #importlib.reload(sys) #sys.setdefaultencoding('utf8') #Parse dataset_description.json file in BIDS directory with open(os.path.join(directory, 'dataset_description.json')) as data_file: dataset = json.load(data_file) #print(dataset_data) #create project / nidm-exp doc project = Project() #add various attributes if they exist in BIDS dataset for key in dataset: #print(key) #if key from dataset_description file is mapped to term in BIDS_Constants.py then add to NIDM object if key in BIDS_Constants.dataset_description: if type(dataset[key]) is list: project.add_attributes({ BIDS_Constants.dataset_description[key]: "".join(dataset[key]) }) else: project.add_attributes( {BIDS_Constants.dataset_description[key]: dataset[key]}) #create empty dictinary for sessions where key is subject id and used later to link scans to same session as demographics session = {} #Parse participants.tsv file in BIDS directory and create study and acquisition objects with open(os.path.join(directory, 'participants.tsv')) as csvfile: participants_data = csv.DictReader(csvfile, delimiter='\t') #print(participants_data.fieldnames) for row in participants_data: #create session object for subject to be used for participant metadata and image data #parse subject id from "sub-XXXX" string subjid = row['participant_id'].split("-") session[subjid[1]] = Session(project) #add acquisition object acq = Acquisition(session=session[subjid[1]]) acq_entity = DemographicsAcquisitionObject(acquisition=acq) participant = acq.add_person(role=Constants.NIDM_PARTICIPANT, attributes=({ Constants.NIDM_SUBJECTID: row['participant_id'] })) for key, value in row.items(): #for now only convert variables in participants.tsv file who have term mappings in BIDS_Constants.py if key in BIDS_Constants.participants: acq_entity.add_attributes( {BIDS_Constants.participants[key]: value}) #get BIDS layout bids_layout = BIDSLayout(directory) #create acquisition objects for each scan for each subject #loop through all subjects in dataset for subject_id in bids_layout.get_subjects(): #skip .git directories...added to support datalad datasets if subject_id.startswith("."): continue for file_tpl in bids_layout.get(subject=subject_id, extensions=['.nii', '.nii.gz']): #create an acquisition activity acq = Acquisition(session[subject_id]) #print(file_tpl.type) if file_tpl.modality == 'anat': #do something with anatomicals acq_obj = MRAcquisitionObject(acq) acq_obj.add_attributes( {PROV_TYPE: BIDS_Constants.scans[file_tpl.modality]}) #add file link #make relative link to acq_obj.add_attributes( {Constants.NIDM_FILENAME: file_tpl.filename}) #get associated JSON file if exists json_data = bids_layout.get_metadata(file_tpl.filename) if json_data: for key in json_data: if key in BIDS_Constants.json_keys: if type(json_data[key]) is list: acq_obj.add_attributes({ BIDS_Constants.json_keys[key]: ''.join(str(e) for e in json_data[key]) }) else: acq_obj.add_attributes({ BIDS_Constants.json_keys[key]: json_data[key] }) elif file_tpl.modality == 'func': #do something with functionals acq_obj = MRAcquisitionObject(acq) acq_obj.add_attributes( {PROV_TYPE: BIDS_Constants.scans[file_tpl.modality]}) #add file link acq_obj.add_attributes( {Constants.NIDM_FILENAME: file_tpl.filename}) if 'run' in file_tpl._fields: acq_obj.add_attributes( {BIDS_Constants.json_keys["run"]: file_tpl.run}) #get associated JSON file if exists json_data = bids_layout.get_metadata(file_tpl.filename) if json_data: for key in json_data: if key in BIDS_Constants.json_keys: if type(json_data[key]) is list: acq_obj.add_attributes({ BIDS_Constants.json_keys[key]: ''.join(str(e) for e in json_data[key]) }) else: acq_obj.add_attributes({ BIDS_Constants.json_keys[key]: json_data[key] }) #get associated events TSV file if 'run' in file_tpl._fields: events_file = bids_layout.get(subject=subject_id, extensions=['.tsv'], modality=file_tpl.modality, task=file_tpl.task, run=file_tpl.run) else: events_file = bids_layout.get(subject=subject_id, extensions=['.tsv'], modality=file_tpl.modality, task=file_tpl.task) #if there is an events file then this is task-based so create an acquisition object for the task file and link if events_file: #for now create acquisition object and link it to the associated scan events_obj = AcquisitionObject(acq) #add prov type, task name as prov:label, and link to filename of events file events_obj.add_attributes({ PROV_TYPE: Constants.NIDM_MRI_BOLD_EVENTS, BIDS_Constants.json_keys["TaskName"]: json_data["TaskName"], Constants.NFO["filename"]: events_file[0].filename }) #link it to appropriate MR acquisition entity events_obj.wasAttributedTo(acq_obj) elif file_tpl.modality == 'dwi': #do stuff with with dwi scans... acq_obj = MRAcquisitionObject(acq) acq_obj.add_attributes( {PROV_TYPE: BIDS_Constants.scans[file_tpl.modality]}) #add file link acq_obj.add_attributes( {Constants.NIDM_FILENAME: file_tpl.filename}) if 'run' in file_tpl._fields: acq_obj.add_attributes( {BIDS_Constants.json_keys["run"]: file_tpl.run}) #get associated JSON file if exists json_data = bids_layout.get_metadata(file_tpl.filename) if json_data: for key in json_data: if key in BIDS_Constants.json_keys: if type(json_data[key]) is list: acq_obj.add_attributes({ BIDS_Constants.json_keys[key]: ''.join(str(e) for e in json_data[key]) }) else: acq_obj.add_attributes({ BIDS_Constants.json_keys[key]: json_data[key] }) #for bval and bvec files, what to do with those? #for now, create new generic acquisition objects, link the files, and associate with the one for the DWI scan? acq_obj_bval = AcquisitionObject(acq) acq_obj_bval.add_attributes( {PROV_TYPE: BIDS_Constants.scans["bval"]}) #add file link to bval files acq_obj_bval.add_attributes({ Constants.NIDM_FILENAME: bids_layout.get_bval(file_tpl.filename) }) acq_obj_bvec = AcquisitionObject(acq) acq_obj_bvec.add_attributes( {PROV_TYPE: BIDS_Constants.scans["bvec"]}) #add file link to bvec files acq_obj_bvec.add_attributes({ Constants.NIDM_FILENAME: bids_layout.get_bvec(file_tpl.filename) }) #link bval and bvec acquisition object entities together or is their association with enclosing activity enough? #Added temporarily to support phenotype files #for each *.tsv / *.json file pair in the phenotypes directory for tsv_file in glob.glob(os.path.join(directory, "phenotype", "*.tsv")): #for now, open the TSV file, extract the row for this subject, store it in an acquisition object and link to #the associated JSON data dictionary file with open(tsv_file) as phenofile: pheno_data = csv.DictReader(phenofile, delimiter='\t') for row in pheno_data: subjid = row['participant_id'].split("-") if not subjid[1] == subject_id: continue else: #add acquisition object acq = Acquisition(session=session[subjid[1]]) acq_entity = AssessmentAcquisitionObject( acquisition=acq) participant = acq.add_person( role=Constants.NIDM_PARTICIPANT, attributes=({ Constants.NIDM_SUBJECTID: row['participant_id'] })) for key, value in row.items(): if not key == "participant_id": #for now we're using a placeholder namespace for BIDS and simply the variable names as the concept IDs.. acq_entity.add_attributes( {Constants.BIDS[key]: value}) #link TSV file acq_entity.add_attributes( {Constants.NIDM_FILENAME: tsv_file}) #link associated JSON file if it exists data_dict = os.path.join( directory, "phenotype", os.path.splitext(os.path.basename(tsv_file))[0] + ".json") if os.path.isfile(data_dict): acq_entity.add_attributes( {Constants.BIDS["data_dictionary"]: data_dict}) #serialize graph #print(project.graph.get_provn()) with open(outputfile, 'w') as f: f.write(project.serializeTurtle()) #f.write(project.graph.get_provn()) #save a DOT graph as PNG project.save_DotGraph(str(outputfile + ".png"), format="png")
def main(argv): parser = ArgumentParser(description='This program will load in a CSV file and iterate over the header \ variable names performing an elastic search of https://scicrunch.org/ for NIDM-ReproNim \ tagged terms that fuzzy match the variable names. The user will then interactively pick \ a term to associate with the variable name. The resulting annotated CSV data will \ then be written to a NIDM data file.') parser.add_argument('-csv', dest='csv_file', required=True, help="Path to CSV file to convert") parser.add_argument('-ilxkey', dest='key', required=True, help="Interlex/SciCrunch API key to use for query") parser.add_argument('-json_map', dest='json_map',required=False,help="User-suppled JSON file containing variable-term mappings.") parser.add_argument('-nidm', dest='nidm_file', required=False, help="Optional NIDM file to add CSV->NIDM converted graph to") #parser.add_argument('-owl', action='store_true', required=False, help='Optionally searches NIDM OWL files...internet connection required') parser.add_argument('-png', action='store_true', required=False, help='Optional flag, when set a PNG image file of RDF graph will be produced') parser.add_argument('-jsonld', action='store_true', required=False, help='Optional flag, when set NIDM files are saved as JSON-LD instead of TURTLE') parser.add_argument('-out', dest='output_file', required=True, help="Filename to save NIDM file") args = parser.parse_args() #open CSV file and load into df = pd.read_csv(args.csv_file) #maps variables in CSV file to terms #if args.owl is not False: # column_to_terms = map_variables_to_terms(df=df, apikey=args.key, directory=dirname(args.output_file), output_file=args.output_file, json_file=args.json_map, owl_file=args.owl) #else: column_to_terms = map_variables_to_terms(df=df, apikey=args.key, directory=dirname(args.output_file), output_file=args.output_file, json_file=args.json_map) #If user has added an existing NIDM file as a command line parameter then add to existing file for subjects who exist in the NIDM file if args.nidm_file: print("Adding to NIDM file...") #read in NIDM file project = read_nidm(args.nidm_file) #get list of session objects session_objs=project.get_sessions() #look at column_to_terms dictionary for NIDM URL for subject id (Constants.NIDM_SUBJECTID) id_field=None for key, value in column_to_terms.items(): if Constants.NIDM_SUBJECTID._str == column_to_terms[key]['label']: id_field=key #make sure id_field is a string for zero-padded subject ids #re-read data file with constraint that key field is read as string #df = pd.read_csv(args.csv_file,dtype={id_field : str}) #if we couldn't find a subject ID field in column_to_terms, ask user if id_field is None: option=1 for column in df.columns: print("%d: %s" %(option,column)) option=option+1 selection=input("Please select the subject ID field from the list above: ") id_field=df.columns[int(selection)-1] #make sure id_field is a string for zero-padded subject ids #re-read data file with constraint that key field is read as string #df = pd.read_csv(args.csv_file,dtype={id_field : str}) #use RDFLib here for temporary graph making query easier rdf_graph = Graph() rdf_graph_parse = rdf_graph.parse(source=StringIO(project.serializeTurtle()),format='turtle') #find subject ids and sessions in NIDM document query = """SELECT DISTINCT ?session ?nidm_subj_id ?agent WHERE { ?activity prov:wasAssociatedWith ?agent ; dct:isPartOf ?session . ?agent rdf:type prov:Agent ; ndar:src_subject_id ?nidm_subj_id . }""" #print(query) qres = rdf_graph_parse.query(query) for row in qres: print('%s \t %s' %(row[0],row[1])) #find row in CSV file with subject id matching agent from NIDM file #csv_row = df.loc[df[id_field]==type(df[id_field][0])(row[1])] #find row in CSV file with matching subject id to the agent in the NIDM file #be carefull about data types...simply type-change dataframe subject id column and query to strings. #here we're removing the leading 0's from IDs because pandas.read_csv strips those unless you know ahead of #time which column is the subject id.... csv_row = df.loc[df[id_field].astype('str').str.contains(str(row[1]).lstrip("0"))] #if there was data about this subject in the NIDM file already (i.e. an agent already exists with this subject id) #then add this CSV assessment data to NIDM file, else skip it.... if (not (len(csv_row.index)==0)): #NIDM document sesssion uuid session_uuid = row[0] #temporary list of string-based URIs of session objects from API temp = [o.identifier._uri for o in session_objs] #get session object from existing NIDM file that is associated with a specific subject id #nidm_session = (i for i,x in enumerate([o.identifier._uri for o in session_objs]) if x == str(session_uuid)) nidm_session = session_objs[temp.index(str(session_uuid))] #for nidm_session in session_objs: # if nidm_session.identifier._uri == str(session_uuid): #add an assessment acquisition for the phenotype data to session and associate with agent acq=AssessmentAcquisition(session=nidm_session) #add acquisition entity for assessment acq_entity = AssessmentObject(acquisition=acq) #add qualified association with existing agent acq.add_qualified_association(person=row[2],role=Constants.NIDM_PARTICIPANT) #store other data from row with columns_to_term mappings for row_variable in csv_row: #check if row_variable is subject id, if so skip it if row_variable==id_field: continue else: if not csv_row[row_variable].values[0]: continue #get column_to_term mapping uri and add as namespace in NIDM document #provNamespace(Core.safe_string(None,string=str(row_variable)), column_to_terms[row_variable]["url"]) acq_entity.add_attributes({QualifiedName(provNamespace(Core.safe_string(None,string=str(row_variable)), column_to_terms[row_variable]["url"]), ""):csv_row[row_variable].values[0]}) continue #serialize NIDM file with open(args.nidm_file,'w') as f: print("Writing NIDM file...") if args.jsonld: f.write(project.serializeJSONLD()) else: f.write(project.serializeTurtle()) project.save_DotGraph(str(args.nidm_file + ".png"), format="png") else: print("Creating NIDM file...") #If user did not choose to add this data to an existing NIDM file then create a new one for the CSV data #create empty project project=Project() #simply add name of file to project since we don't know anything about it project.add_attributes({Constants.NIDM_FILENAME:args.csv_file}) #look at column_to_terms dictionary for NIDM URL for subject id (Constants.NIDM_SUBJECTID) id_field=None for key, value in column_to_terms.items(): if Constants.NIDM_SUBJECTID._str == column_to_terms[key]['label']: id_field=key #make sure id_field is a string for zero-padded subject ids #re-read data file with constraint that key field is read as string #df = pd.read_csv(args.csv_file,dtype={id_field : str}) #if we couldn't find a subject ID field in column_to_terms, ask user if id_field is None: option=1 for column in df.columns: print("%d: %s" %(option,column)) option=option+1 selection=input("Please select the subject ID field from the list above: ") id_field=df.columns[int(selection)-1] #iterate over rows and store in NIDM file for csv_index, csv_row in df.iterrows(): #create a session object session=Session(project) #create and acquisition activity and entity acq=AssessmentAcquisition(session) acq_entity=AssessmentObject(acq) #store other data from row with columns_to_term mappings for row_variable,row_data in csv_row.iteritems(): if not row_data: continue #check if row_variable is subject id, if so skip it if row_variable==id_field: #add qualified association with person acq.add_qualified_association(person= acq.add_person(attributes=({Constants.NIDM_SUBJECTID:row_data})),role=Constants.NIDM_PARTICIPANT) continue else: #get column_to_term mapping uri and add as namespace in NIDM document acq_entity.add_attributes({QualifiedName(provNamespace(Core.safe_string(None,string=str(row_variable)), column_to_terms[row_variable]["url"]),""):row_data}) #print(project.serializeTurtle()) #serialize NIDM file with open(args.output_file,'w') as f: print("Writing NIDM file...") if args.jsonld: f.write(project.serializeJSONLD()) else: f.write(project.serializeTurtle()) if args.png: project.save_DotGraph(str(args.output_file + ".png"), format="png")
def saveTestFile(file_name, data): project = Project(uuid="_123_" + file_name, attributes=data) return saveProject(file_name, project)
def bidsmri2project(directory, args): #Parse dataset_description.json file in BIDS directory if (os.path.isdir(os.path.join(directory))): try: with open(os.path.join(directory, 'dataset_description.json')) as data_file: dataset = json.load(data_file) except OSError: logging.critical( "Cannot find dataset_description.json file which is required in the BIDS spec" ) exit("-1") else: logging.critical("Error: BIDS directory %s does not exist!" % os.path.join(directory)) exit("-1") #create project / nidm-exp doc project = Project() #add various attributes if they exist in BIDS dataset for key in dataset: #if key from dataset_description file is mapped to term in BIDS_Constants.py then add to NIDM object if key in BIDS_Constants.dataset_description: if type(dataset[key]) is list: project.add_attributes({ BIDS_Constants.dataset_description[key]: "".join(dataset[key]) }) else: project.add_attributes( {BIDS_Constants.dataset_description[key]: dataset[key]}) #add absolute location of BIDS directory on disk for later finding of files which are stored relatively in NIDM document project.add_attributes({Constants.PROV['Location']: directory}) #get BIDS layout bids_layout = BIDSLayout(directory) #create empty dictinary for sessions where key is subject id and used later to link scans to same session as demographics session = {} participant = {} #Parse participants.tsv file in BIDS directory and create study and acquisition objects if os.path.isfile(os.path.join(directory, 'participants.tsv')): with open(os.path.join(directory, 'participants.tsv')) as csvfile: participants_data = csv.DictReader(csvfile, delimiter='\t') #logic to map variables to terms.######################################################################################################### #first iterate over variables in dataframe and check which ones are already mapped as BIDS constants and which are not. For those that are not #we want to use the variable-term mapping functions to help the user do the mapping #iterate over columns mapping_list = [] column_to_terms = {} for field in participants_data.fieldnames: #column is not in BIDS_Constants if not (field in BIDS_Constants.participants): #add column to list for column_to_terms mapping mapping_list.append(field) #do variable-term mappings if ((args.json_map != False) or (args.key != None)): #if user didn't supply a json mapping file but we're doing some variable-term mapping create an empty one for column_to_terms to use if args.json_map == False: #defaults to participants.json because here we're mapping the participants.tsv file variables to terms # if participants.json file doesn't exist then run without json mapping file if not os.path.isfile( os.path.join(directory, 'participants.json')): #maps variables in CSV file to terms temp = DataFrame(columns=mapping_list) column_to_terms, cde = map_variables_to_terms( directory=directory, assessment_name='participants.tsv', df=temp, apikey=args.key, output_file=os.path.join(directory, 'participants.json')) else: #maps variables in CSV file to terms temp = DataFrame(columns=mapping_list) column_to_terms, cde = map_variables_to_terms( directory=directory, assessment_name='participants.tsv', df=temp, apikey=args.key, output_file=os.path.join(directory, 'participants.json'), json_file=os.path.join(directory, 'participants.json')) else: #maps variables in CSV file to terms temp = DataFrame(columns=mapping_list) column_to_terms, cde = map_variables_to_terms( directory=directory, assessment_name='participants.tsv', df=temp, apikey=args.key, output_file=os.path.join(directory, 'participants.json'), json_file=args.json_map) for row in participants_data: #create session object for subject to be used for participant metadata and image data #parse subject id from "sub-XXXX" string temp = row['participant_id'].split("-") #for ambiguity in BIDS datasets. Sometimes participant_id is sub-XXXX and othertimes it's just XXXX if len(temp) > 1: subjid = temp[1] else: subjid = temp[0] logging.info(subjid) session[subjid] = Session(project) #add acquisition object acq = AssessmentAcquisition(session=session[subjid]) acq_entity = AssessmentObject(acquisition=acq) participant[subjid] = {} participant[subjid]['person'] = acq.add_person( attributes=({ Constants.NIDM_SUBJECTID: row['participant_id'] })) #add qualified association of participant with acquisition activity acq.add_qualified_association( person=participant[subjid]['person'], role=Constants.NIDM_PARTICIPANT) print(acq) for key, value in row.items(): if not value: continue #for variables in participants.tsv file who have term mappings in BIDS_Constants.py use those, add to json_map so we don't have to map these if user #supplied arguments to map variables if key in BIDS_Constants.participants: #if this was the participant_id, we already handled it above creating agent / qualified association if not (BIDS_Constants.participants[key] == Constants.NIDM_SUBJECTID): acq_entity.add_attributes( {BIDS_Constants.participants[key]: value}) #else if user added -mapvars flag to command line then we'll use the variable-> term mapping procedures to help user map variables to terms (also used # in CSV2NIDM.py) else: # WIP: trying to add new support for CDEs... add_attributes_with_cde(prov_object=acq_entity, cde=cde, row_variable=key, value=value) # if key in column_to_terms: # acq_entity.add_attributes({QualifiedName(provNamespace(Core.safe_string(None,string=str(key)), column_to_terms[key]["url"]), ""):value}) #else: # acq_entity.add_attributes({Constants.BIDS[key.replace(" ", "_")]:value}) #create acquisition objects for each scan for each subject #loop through all subjects in dataset for subject_id in bids_layout.get_subjects(): logging.info("Converting subject: %s" % subject_id) #skip .git directories...added to support datalad datasets if subject_id.startswith("."): continue #check if there's a session number. If so, store it in the session activity session_dirs = bids_layout.get(target='session', subject=subject_id, return_type='dir') #if session_dirs has entries then get any metadata about session and store in session activity #bids_layout.get(subject=subject_id,type='session',extensions='.tsv') #bids_layout.get(subject=subject_id,type='scans',extensions='.tsv') #bids_layout.get(extensions='.tsv',return_type='obj') #check whether sessions have been created (i.e. was there a participants.tsv file? If not, create here if not (subject_id in session): session[subject_id] = Session(project) for file_tpl in bids_layout.get(subject=subject_id, extensions=['.nii', '.nii.gz']): #create an acquisition activity acq = MRAcquisition(session[subject_id]) #check whether participant (i.e. agent) for this subject already exists (i.e. if participants.tsv file exists) else create one if not (subject_id in participant): participant[subject_id] = {} participant[subject_id]['person'] = acq.add_person( attributes=({ Constants.NIDM_SUBJECTID: subject_id })) #add qualified association with person acq.add_qualified_association( person=participant[subject_id]['person'], role=Constants.NIDM_PARTICIPANT) if file_tpl.entities['datatype'] == 'anat': #do something with anatomicals acq_obj = MRObject(acq) #add image contrast type if file_tpl.entities['suffix'] in BIDS_Constants.scans: acq_obj.add_attributes({ Constants.NIDM_IMAGE_CONTRAST_TYPE: BIDS_Constants.scans[file_tpl.entities['suffix']] }) else: logging.info( "WARNING: No matching image contrast type found in BIDS_Constants.py for %s" % file_tpl.entities['suffix']) #add image usage type if file_tpl.entities['datatype'] in BIDS_Constants.scans: acq_obj.add_attributes({ Constants.NIDM_IMAGE_USAGE_TYPE: BIDS_Constants.scans[file_tpl.entities['datatype']] }) else: logging.info( "WARNING: No matching image usage type found in BIDS_Constants.py for %s" % file_tpl.entities['datatype']) #add file link #make relative link to acq_obj.add_attributes({ Constants.NIDM_FILENAME: getRelPathToBIDS(join(file_tpl.dirname, file_tpl.filename), directory) }) #WIP: add absolute location of BIDS directory on disk for later finding of files acq_obj.add_attributes({Constants.PROV['Location']: directory}) #add sha512 sum if isfile(join(directory, file_tpl.dirname, file_tpl.filename)): acq_obj.add_attributes({ Constants.CRYPTO_SHA512: getsha512( join(directory, file_tpl.dirname, file_tpl.filename)) }) else: logging.info( "WARNINGL file %s doesn't exist! No SHA512 sum stored in NIDM files..." % join(directory, file_tpl.dirname, file_tpl.filename)) #get associated JSON file if exists #There is T1w.json file with information json_data = (bids_layout.get( suffix=file_tpl.entities['suffix'], subject=subject_id))[0].metadata if len(json_data.info) > 0: for key in json_data.info.items(): if key in BIDS_Constants.json_keys: if type(json_data.info[key]) is list: acq_obj.add_attributes({ BIDS_Constants.json_keys[key.replace( " ", "_")]: ''.join( str(e) for e in json_data.info[key]) }) else: acq_obj.add_attributes({ BIDS_Constants.json_keys[key.replace( " ", "_")]: json_data.info[key] }) #Parse T1w.json file in BIDS directory to add the attributes contained inside if (os.path.isdir(os.path.join(directory))): try: with open(os.path.join(directory, 'T1w.json')) as data_file: dataset = json.load(data_file) except OSError: logging.critical( "Cannot find T1w.json file which is required in the BIDS spec" ) exit("-1") else: logging.critical( "Error: BIDS directory %s does not exist!" % os.path.join(directory)) exit("-1") #add various attributes if they exist in BIDS dataset for key in dataset: #if key from T1w.json file is mapped to term in BIDS_Constants.py then add to NIDM object if key in BIDS_Constants.json_keys: if type(dataset[key]) is list: acq_obj.add_attributes({ BIDS_Constants.json_keys[key]: "".join(dataset[key]) }) else: acq_obj.add_attributes( {BIDS_Constants.json_keys[key]: dataset[key]}) elif file_tpl.entities['datatype'] == 'func': #do something with functionals acq_obj = MRObject(acq) #add image contrast type if file_tpl.entities['suffix'] in BIDS_Constants.scans: acq_obj.add_attributes({ Constants.NIDM_IMAGE_CONTRAST_TYPE: BIDS_Constants.scans[file_tpl.entities['suffix']] }) else: logging.info( "WARNING: No matching image contrast type found in BIDS_Constants.py for %s" % file_tpl.entities['suffix']) #add image usage type if file_tpl.entities['datatype'] in BIDS_Constants.scans: acq_obj.add_attributes({ Constants.NIDM_IMAGE_USAGE_TYPE: BIDS_Constants.scans[file_tpl.entities['datatype']] }) else: logging.info( "WARNING: No matching image usage type found in BIDS_Constants.py for %s" % file_tpl.entities['datatype']) #make relative link to acq_obj.add_attributes({ Constants.NIDM_FILENAME: getRelPathToBIDS(join(file_tpl.dirname, file_tpl.filename), directory) }) #WIP: add absolute location of BIDS directory on disk for later finding of files acq_obj.add_attributes({Constants.PROV['Location']: directory}) #add sha512 sum if isfile(join(directory, file_tpl.dirname, file_tpl.filename)): acq_obj.add_attributes({ Constants.CRYPTO_SHA512: getsha512( join(directory, file_tpl.dirname, file_tpl.filename)) }) else: logging.info( "WARNINGL file %s doesn't exist! No SHA512 sum stored in NIDM files..." % join(directory, file_tpl.dirname, file_tpl.filename)) if 'run' in file_tpl.entities: acq_obj.add_attributes({ BIDS_Constants.json_keys["run"]: file_tpl.entities['run'] }) #get associated JSON file if exists json_data = (bids_layout.get( suffix=file_tpl.entities['suffix'], subject=subject_id))[0].metadata if len(json_data.info) > 0: for key in json_data.info.items(): if key in BIDS_Constants.json_keys: if type(json_data.info[key]) is list: acq_obj.add_attributes({ BIDS_Constants.json_keys[key.replace( " ", "_")]: ''.join( str(e) for e in json_data.info[key]) }) else: acq_obj.add_attributes({ BIDS_Constants.json_keys[key.replace( " ", "_")]: json_data.info[key] }) #get associated events TSV file if 'run' in file_tpl.entities: events_file = bids_layout.get( subject=subject_id, extensions=['.tsv'], modality=file_tpl.entities['datatype'], task=file_tpl.entities['task'], run=file_tpl.entities['run']) else: events_file = bids_layout.get( subject=subject_id, extensions=['.tsv'], modality=file_tpl.entities['datatype'], task=file_tpl.entities['task']) #if there is an events file then this is task-based so create an acquisition object for the task file and link if events_file: #for now create acquisition object and link it to the associated scan events_obj = AcquisitionObject(acq) #add prov type, task name as prov:label, and link to filename of events file events_obj.add_attributes({ PROV_TYPE: Constants.NIDM_MRI_BOLD_EVENTS, BIDS_Constants.json_keys["TaskName"]: json_data["TaskName"], Constants.NIDM_FILENAME: getRelPathToBIDS(events_file[0].filename, directory) }) #link it to appropriate MR acquisition entity events_obj.wasAttributedTo(acq_obj) #Parse task-rest_bold.json file in BIDS directory to add the attributes contained inside if (os.path.isdir(os.path.join(directory))): try: with open( os.path.join( directory, 'task-rest_bold.json')) as data_file: dataset = json.load(data_file) except OSError: logging.critical( "Cannot find task-rest_bold.json file which is required in the BIDS spec" ) exit("-1") else: logging.critical( "Error: BIDS directory %s does not exist!" % os.path.join(directory)) exit("-1") #add various attributes if they exist in BIDS dataset for key in dataset: #if key from task-rest_bold.json file is mapped to term in BIDS_Constants.py then add to NIDM object if key in BIDS_Constants.json_keys: if type(dataset[key]) is list: acq_obj.add_attributes({ BIDS_Constants.json_keys[key]: ",".join(map(str, dataset[key])) }) else: acq_obj.add_attributes( {BIDS_Constants.json_keys[key]: dataset[key]}) elif file_tpl.entities['datatype'] == 'dwi': #do stuff with with dwi scans... acq_obj = MRObject(acq) #add image contrast type if file_tpl.entities['suffix'] in BIDS_Constants.scans: acq_obj.add_attributes({ Constants.NIDM_IMAGE_CONTRAST_TYPE: BIDS_Constants.scans[file_tpl.entities['suffix']] }) else: logging.info( "WARNING: No matching image contrast type found in BIDS_Constants.py for %s" % file_tpl.entities['suffix']) #add image usage type if file_tpl.entities['datatype'] in BIDS_Constants.scans: acq_obj.add_attributes({ Constants.NIDM_IMAGE_USAGE_TYPE: BIDS_Constants.scans["dti"] }) else: logging.info( "WARNING: No matching image usage type found in BIDS_Constants.py for %s" % file_tpl.entities['datatype']) #make relative link to acq_obj.add_attributes({ Constants.NIDM_FILENAME: getRelPathToBIDS(join(file_tpl.dirname, file_tpl.filename), directory) }) #add sha512 sum if isfile(join(directory, file_tpl.dirname, file_tpl.filename)): acq_obj.add_attributes({ Constants.CRYPTO_SHA512: getsha512( join(directory, file_tpl.dirname, file_tpl.filename)) }) else: logging.info( "WARNINGL file %s doesn't exist! No SHA512 sum stored in NIDM files..." % join(directory, file_tpl.dirname, file_tpl.filename)) if 'run' in file_tpl._fields: acq_obj.add_attributes( {BIDS_Constants.json_keys["run"]: file_tpl.run}) #get associated JSON file if exists json_data = (bids_layout.get( suffix=file_tpl.entities['suffix'], subject=subject_id))[0].metadata if len(json_data.info) > 0: for key in json_data.info.items(): if key in BIDS_Constants.json_keys: if type(json_data.info[key]) is list: acq_obj.add_attributes({ BIDS_Constants.json_keys[key.replace( " ", "_")]: ''.join( str(e) for e in json_data.info[key]) }) else: acq_obj.add_attributes({ BIDS_Constants.json_keys[key.replace( " ", "_")]: json_data.info[key] }) #for bval and bvec files, what to do with those? #for now, create new generic acquisition objects, link the files, and associate with the one for the DWI scan? acq_obj_bval = AcquisitionObject(acq) acq_obj_bval.add_attributes( {PROV_TYPE: BIDS_Constants.scans["bval"]}) #add file link to bval files acq_obj_bval.add_attributes({ Constants.NIDM_FILENAME: getRelPathToBIDS( join(file_tpl.dirname, bids_layout.get_bval(file_tpl.filename)), directory) }) #WIP: add absolute location of BIDS directory on disk for later finding of files acq_obj_bval.add_attributes( {Constants.PROV['Location']: directory}) #add sha512 sum if isfile(join(directory, file_tpl.dirname, file_tpl.filename)): acq_obj_bval.add_attributes({ Constants.CRYPTO_SHA512: getsha512( join(directory, file_tpl.dirname, file_tpl.filename)) }) else: logging.info( "WARNINGL file %s doesn't exist! No SHA512 sum stored in NIDM files..." % join(directory, file_tpl.dirname, file_tpl.filename)) acq_obj_bvec = AcquisitionObject(acq) acq_obj_bvec.add_attributes( {PROV_TYPE: BIDS_Constants.scans["bvec"]}) #add file link to bvec files acq_obj_bvec.add_attributes({ Constants.NIDM_FILENAME: getRelPathToBIDS( join(file_tpl.dirname, bids_layout.get_bvec(file_tpl.filename)), directory) }) #WIP: add absolute location of BIDS directory on disk for later finding of files acq_obj_bvec.add_attributes( {Constants.PROV['Location']: directory}) if isfile(join(directory, file_tpl.dirname, file_tpl.filename)): #add sha512 sum acq_obj_bvec.add_attributes({ Constants.CRYPTO_SHA512: getsha512( join(directory, file_tpl.dirname, file_tpl.filename)) }) else: logging.info( "WARNINGL file %s doesn't exist! No SHA512 sum stored in NIDM files..." % join(directory, file_tpl.dirname, file_tpl.filename)) #link bval and bvec acquisition object entities together or is their association with DWI scan... #Added temporarily to support phenotype files #for each *.tsv / *.json file pair in the phenotypes directory #WIP: ADD VARIABLE -> TERM MAPPING HERE for tsv_file in glob.glob(os.path.join(directory, "phenotype", "*.tsv")): #for now, open the TSV file, extract the row for this subject, store it in an acquisition object and link to #the associated JSON data dictionary file with open(tsv_file) as phenofile: pheno_data = csv.DictReader(phenofile, delimiter='\t') for row in pheno_data: subjid = row['participant_id'].split("-") if not subjid[1] == subject_id: continue else: #add acquisition object acq = AssessmentAcquisition(session=session[subjid[1]]) #add qualified association with person acq.add_qualified_association( person=participant[subject_id]['person'], role=Constants.NIDM_PARTICIPANT) acq_entity = AssessmentObject(acquisition=acq) for key, value in row.items(): if not value: continue #we're using participant_id in NIDM in agent so don't add to assessment as a triple. #BIDS phenotype files seem to have an index column with no column header variable name so skip those if ((not key == "participant_id") and (key != "")): #for now we're using a placeholder namespace for BIDS and simply the variable names as the concept IDs.. acq_entity.add_attributes( {Constants.BIDS[key]: value}) #link TSV file acq_entity.add_attributes({ Constants.NIDM_FILENAME: getRelPathToBIDS(tsv_file, directory) }) #WIP: add absolute location of BIDS directory on disk for later finding of files acq_entity.add_attributes( {Constants.PROV['Location']: directory}) #link associated JSON file if it exists data_dict = os.path.join( directory, "phenotype", os.path.splitext(os.path.basename(tsv_file))[0] + ".json") if os.path.isfile(data_dict): acq_entity.add_attributes({ Constants.BIDS["data_dictionary"]: getRelPathToBIDS(data_dict, directory) }) return project, cde
def main(argv): #create new nidm-experiment document with project kwargs={Constants.NIDM_PROJECT_NAME:"FBIRN_PhaseII",Constants.NIDM_PROJECT_IDENTIFIER:9610,Constants.NIDM_PROJECT_DESCRIPTION:"Test investigation"} project = Project(attributes=kwargs) #test add string attribute with existing namespace #nidm_doc.addLiteralAttribute("nidm","isFun","ForMe") project.add_attributes({Constants.NIDM["isFun"]:"ForMe"}) #test adding string attribute with new namespace/term project.addLiteralAttribute("fred","notFound","in namespaces","www.fred.org/") #test add float attribute project.addLiteralAttribute("nidm", "float", float(2.34)) #test adding attributes in bulk with mix of existing and new namespaces #nidm_doc.addAttributesWithNamespaces(nidm_doc.getProject(),[{"prefix":"nidm", "uri":nidm_doc.namespaces["nidm"], "term":"score", "value":int(15)}, \ # {"prefix":"dave", "uri":"http://www.davidkeator.com/", "term":"isAwesome", "value":"15"}, \ # {"prefix":"nidm", "uri":nidm_doc.namespaces["nidm"], "term":"value", "value":float(2.34)}]) #nidm_doc.addAttributes(nidm_doc.getProject(),{"nidm:test":int(15), "ncit:isTerminology":"15","ncit:joker":float(1)}) #test add PI to investigation project_PI = project.add_person(attributes={Constants.NIDM_FAMILY_NAME:"Keator", Constants.NIDM_GIVEN_NAME:"David"}) #add qualified association of project PI to project activity project.add_qualified_association(person=project_PI,role=Constants.NIDM_PI) #test add session to graph and associate with project session = Session(project) session.add_attributes({Constants.NIDM:"test"}) #project.add_sessions(session) #test add MR acquisition activity / entity to graph and associate with session acq_act = MRAcquisition(session=session) #test add acquisition object entity to graph associated with participant role NIDM_PARTICIPANT acq_entity = MRObject(acquisition=acq_act) #add person to graph person = acq_act.add_person(attributes={Constants.NIDM_GIVEN_NAME:"George"}) #add qualified association of person with role NIDM_PARTICIPANT, and associated with acquistion activity acq_act.add_qualified_association(person=person, role=Constants.NIDM_PARTICIPANT) #test add Assessment acquisition activity / entity to graph and associate with session acq_act = AssessmentAcquisition(session=session) #test add acquisition object entity to graph associated with participant role NIDM_PARTICIPANT acq_entity = AssessmentObject(acquisition=acq_act) acq_entity.add_attributes({Constants.NIDM["Q1"]:"Q1 Answer",Constants.NIDM["Q2"]:"Q2 Answer" }) #associate person as participant acq_act.add_qualified_association(person=person, role=Constants.NIDM_PARTICIPANT) #test add DemographicsAssessment acquisition activity / entity to graph and associate with session acq_act = AssessmentAcquisition(session=session) #test add acquisition object entity to graph associated with participant role NIDM_PARTICIPANT acq_entity = DemographicsObject(acquisition=acq_act) #add new person to graph person2 = acq_act.add_person(attributes={Constants.NIDM_FAMILY_NAME:"Doe", \ Constants.NIDM_GIVEN_NAME:"John"}) #associate person2 with assessment acquisition acq_act.add_qualified_association(person=person2, role=Constants.NIDM_PARTICIPANT) acq_entity.add_attributes({Constants.NIDM_AGE:60,Constants.NIDM_GENDER:"Male" }) #save a turtle file with open("test.ttl",'w') as f: f.write (project.serializeTurtle()) #save a DOT graph as PDF project.save_DotGraph("test.png",format="png")
def bidsmri2project(directory, args): # initialize empty cde graph...it may get replaced if we're doing variable to term mapping or not cde=Graph() # Parse dataset_description.json file in BIDS directory if (os.path.isdir(os.path.join(directory))): try: with open(os.path.join(directory,'dataset_description.json')) as data_file: dataset = json.load(data_file) except OSError: logging.critical("Cannot find dataset_description.json file which is required in the BIDS spec") exit("-1") else: logging.critical("Error: BIDS directory %s does not exist!" %os.path.join(directory)) exit("-1") # create project / nidm-exp doc project = Project() # if there are git annex sources then add them num_sources=addGitAnnexSources(obj=project.get_uuid(),bids_root=directory) # else just add the local path to the dataset if num_sources == 0: project.add_attributes({Constants.PROV['Location']:"file:/" + directory}) # add various attributes if they exist in BIDS dataset for key in dataset: # if key from dataset_description file is mapped to term in BIDS_Constants.py then add to NIDM object if key in BIDS_Constants.dataset_description: if type(dataset[key]) is list: project.add_attributes({BIDS_Constants.dataset_description[key]:"".join(dataset[key])}) else: project.add_attributes({BIDS_Constants.dataset_description[key]:dataset[key]}) # get BIDS layout bids_layout = BIDSLayout(directory) # create empty dictinary for sessions where key is subject id and used later to link scans to same session as demographics session={} participant={} # Parse participants.tsv file in BIDS directory and create study and acquisition objects if os.path.isfile(os.path.join(directory,'participants.tsv')): with open(os.path.join(directory,'participants.tsv')) as csvfile: participants_data = csv.DictReader(csvfile, delimiter='\t') # logic to map variables to terms. # first iterate over variables in dataframe and check which ones are already mapped as BIDS constants and which are not. For those that are not # we want to use the variable-term mapping functions to help the user do the mapping # iterate over columns mapping_list=[] column_to_terms={} for field in participants_data.fieldnames: # column is not in BIDS_Constants if not (field in BIDS_Constants.participants): # add column to list for column_to_terms mapping mapping_list.append(field) #if user didn't supply a json mapping file but we're doing some variable-term mapping create an empty one for column_to_terms to use if args.json_map == False: #defaults to participants.json because here we're mapping the participants.tsv file variables to terms # if participants.json file doesn't exist then run without json mapping file if not os.path.isfile(os.path.join(directory,'participants.json')): #maps variables in CSV file to terms temp=DataFrame(columns=mapping_list) if args.no_concepts: column_to_terms,cde = map_variables_to_terms(directory=directory,assessment_name='participants.tsv', df=temp,output_file=os.path.join(directory,'participants.json'),bids=True,associate_concepts=False) else: column_to_terms,cde = map_variables_to_terms(directory=directory,assessment_name='participants.tsv', df=temp,output_file=os.path.join(directory,'participants.json'),bids=True) else: #maps variables in CSV file to terms temp=DataFrame(columns=mapping_list) if args.no_concepts: column_to_terms,cde = map_variables_to_terms(directory=directory, assessment_name='participants.tsv', df=temp, output_file=os.path.join(directory,'participants.json'),json_file=os.path.join(directory,'participants.json'),bids=True,associate_concepts=False) else: column_to_terms,cde = map_variables_to_terms(directory=directory, assessment_name='participants.tsv', df=temp, output_file=os.path.join(directory,'participants.json'),json_file=os.path.join(directory,'participants.json'),bids=True) else: #maps variables in CSV file to terms temp=DataFrame(columns=mapping_list) if args.no_concepts: column_to_terms, cde = map_variables_to_terms(directory=directory, assessment_name='participants.tsv', df=temp, output_file=os.path.join(directory,'participants.json'),json_file=args.json_map,bids=True,associate_concepts=False) else: column_to_terms, cde = map_variables_to_terms(directory=directory, assessment_name='participants.tsv', df=temp, output_file=os.path.join(directory,'participants.json'),json_file=args.json_map,bids=True) for row in participants_data: #create session object for subject to be used for participant metadata and image data #parse subject id from "sub-XXXX" string temp = row['participant_id'].split("-") #for ambiguity in BIDS datasets. Sometimes participant_id is sub-XXXX and othertimes it's just XXXX if len(temp) > 1: subjid = temp[1] else: subjid = temp[0] logging.info(subjid) session[subjid] = Session(project) #add acquisition object acq = AssessmentAcquisition(session=session[subjid]) acq_entity = AssessmentObject(acquisition=acq) participant[subjid] = {} participant[subjid]['person'] = acq.add_person(attributes=({Constants.NIDM_SUBJECTID:row['participant_id']})) # add nfo:filename entry to assessment entity to reflect provenance of where this data came from acq_entity.add_attributes({Constants.NIDM_FILENAME:getRelPathToBIDS(os.path.join(directory,'participants.tsv'),directory)}) #acq_entity.add_attributes({Constants.NIDM_FILENAME:os.path.join(directory,'participants.tsv')}) #add qualified association of participant with acquisition activity acq.add_qualified_association(person=participant[subjid]['person'],role=Constants.NIDM_PARTICIPANT) # print(acq) # if there are git annex sources for participants.tsv file then add them num_sources=addGitAnnexSources(obj=acq_entity.get_uuid(),bids_root=directory) # else just add the local path to the dataset if num_sources == 0: acq_entity.add_attributes({Constants.PROV['Location']:"file:/" + os.path.join(directory,'participants.tsv')}) # if there's a JSON sidecar file then create an entity and associate it with all the assessment entities if os.path.isfile(os.path.join(directory,'participants.json')): json_sidecar = AssessmentObject(acquisition=acq) json_sidecar.add_attributes({PROV_TYPE:QualifiedName(Namespace("bids",Constants.BIDS),"sidecar_file"), Constants.NIDM_FILENAME: getRelPathToBIDS(os.path.join(directory,'participants.json'),directory)}) # add Git Annex Sources # if there are git annex sources for participants.tsv file then add them num_sources=addGitAnnexSources(obj=json_sidecar.get_uuid(),filepath=os.path.join(directory,'participants.json'),bids_root=directory) # else just add the local path to the dataset if num_sources == 0: json_sidecar.add_attributes({Constants.PROV['Location']:"file:/" + os.path.join(directory,'participants.json')}) # check if json_sidecar entity exists and if so associate assessment entity with it if 'json_sidecar' in locals(): #connect json_entity with acq_entity acq_entity.add_attributes({Constants.PROV["wasInfluencedBy"]:json_sidecar}) for key,value in row.items(): if not value: continue #for variables in participants.tsv file who have term mappings in BIDS_Constants.py use those, add to json_map so we don't have to map these if user #supplied arguments to map variables if key in BIDS_Constants.participants: # WIP # Here we are adding to CDE graph data elements for BIDS Constants that remain fixed for each BIDS-compliant dataset if not (BIDS_Constants.participants[key] == Constants.NIDM_SUBJECTID): # create a namespace with the URL for fixed BIDS_Constants term # item_ns = Namespace(str(Constants.BIDS.namespace.uri)) # add prefix to namespace which is the BIDS fixed variable name # cde.bind(prefix="bids", namespace=item_ns) # ID for BIDS variables is always the same bids:[bids variable] cde_id = Constants.BIDS[key] # add the data element to the CDE graph cde.add((cde_id,RDF.type, Constants.NIDM['DataElement'])) cde.add((cde_id,RDF.type, Constants.PROV['Entity'])) # add some basic information about this data element cde.add((cde_id,Constants.RDFS['label'],Literal(BIDS_Constants.participants[key].localpart))) cde.add((cde_id,Constants.NIDM['isAbout'],URIRef(BIDS_Constants.participants[key].uri))) cde.add((cde_id,Constants.NIDM['source_variable'],Literal(key))) cde.add((cde_id,Constants.NIDM['description'],Literal("participant/subject identifier"))) cde.add((cde_id,Constants.RDFS['comment'],Literal("BIDS participants_id variable fixed in specification"))) cde.add((cde_id,Constants.RDFS['valueType'],URIRef(Constants.XSD["string"]))) acq_entity.add_attributes({cde_id:Literal(value)}) # if this was the participant_id, we already handled it above creating agent / qualified association # if not (BIDS_Constants.participants[key] == Constants.NIDM_SUBJECTID): # acq_entity.add_attributes({BIDS_Constants.participants[key]:value}) # else if user added -mapvars flag to command line then we'll use the variable-> term mapping procedures to help user map variables to terms (also used # in CSV2NIDM.py) else: # WIP: trying to add new support for CDEs... add_attributes_with_cde(prov_object=acq_entity,cde=cde,row_variable=key,value=value) # if key in column_to_terms: # acq_entity.add_attributes({QualifiedName(provNamespace(Core.safe_string(None,string=str(key)), column_to_terms[key]["url"]), ""):value}) # else: # acq_entity.add_attributes({Constants.BIDS[key.replace(" ", "_")]:value}) # create acquisition objects for each scan for each subject # loop through all subjects in dataset for subject_id in bids_layout.get_subjects(): logging.info("Converting subject: %s" %subject_id) # skip .git directories...added to support datalad datasets if subject_id.startswith("."): continue # check if there are a session numbers. If so, store it in the session activity and create a new # sessions for these imaging acquisitions. Because we don't know which imaging session the root # participants.tsv file data may be associated with we simply link the imaging acquisitions to different # sessions (i.e. the participants.tsv file goes into an AssessmentAcquisition and linked to a unique # sessions and the imaging acquisitions go into MRAcquisitions and has a unique session) imaging_sessions = bids_layout.get_sessions(subject=subject_id) # if session_dirs has entries then get any metadata about session and store in session activity # bids_layout.get(subject=subject_id,type='session',extensions='.tsv') # bids_layout.get(subject=subject_id,type='scans',extensions='.tsv') # bids_layout.get(extensions='.tsv',return_type='obj') # loop through each session if there is a sessions directory if len(imaging_sessions) > 0: for img_session in imaging_sessions: # create a new session ses = Session(project) # add session number as metadata ses.add_attributes({Constants.BIDS['session_number']:img_session}) addimagingsessions(bids_layout=bids_layout,subject_id=subject_id,session=ses,participant=participant, directory=directory,img_session=img_session) # else we have no ses-* directories in the BIDS layout addimagingsessions(bids_layout=bids_layout,subject_id=subject_id,session=Session(project),participant=participant, directory=directory) # Added temporarily to support phenotype files # for each *.tsv / *.json file pair in the phenotypes directory # WIP: ADD VARIABLE -> TERM MAPPING HERE for tsv_file in glob.glob(os.path.join(directory,"phenotype","*.tsv")): # for now, open the TSV file, extract the row for this subject, store it in an acquisition object and link to # the associated JSON data dictionary file with open(tsv_file) as phenofile: pheno_data = csv.DictReader(phenofile, delimiter='\t') for row in pheno_data: subjid = row['participant_id'].split("-") if not subjid[1] == subject_id: continue else: # add acquisition object acq = AssessmentAcquisition(session=session[subjid[1]]) # add qualified association with person acq.add_qualified_association(person=participant[subject_id]['person'],role=Constants.NIDM_PARTICIPANT) acq_entity = AssessmentObject(acquisition=acq) for key,value in row.items(): if not value: continue # we're using participant_id in NIDM in agent so don't add to assessment as a triple. # BIDS phenotype files seem to have an index column with no column header variable name so skip those if ((not key == "participant_id") and (key != "")): # for now we're using a placeholder namespace for BIDS and simply the variable names as the concept IDs.. acq_entity.add_attributes({Constants.BIDS[key]:value}) # link TSV file acq_entity.add_attributes({Constants.NIDM_FILENAME:getRelPathToBIDS(tsv_file,directory)}) #acq_entity.add_attributes({Constants.NIDM_FILENAME:tsv_file}) # if there are git annex sources for participants.tsv file then add them num_sources=addGitAnnexSources(obj=acq_entity.get_uuid(),bids_root=directory) # else just add the local path to the dataset if num_sources == 0: acq_entity.add_attributes({Constants.PROV['Location']:"file:/" + tsv_file}) # link associated JSON file if it exists data_dict = os.path.join(directory,"phenotype",os.path.splitext(os.path.basename(tsv_file))[0]+ ".json") if os.path.isfile(data_dict): # if file exists, create a new entity and associate it with the appropriate activity and a used relationship # with the TSV-related entity json_entity = AssessmentObject(acquisition=acq) json_entity.add_attributes({PROV_TYPE:Constants.BIDS["sidecar_file"], Constants.NIDM_FILENAME: getRelPathToBIDS(data_dict,directory)}) # add Git Annex Sources # if there are git annex sources for participants.tsv file then add them num_sources=addGitAnnexSources(obj=json_entity.get_uuid(),filepath=data_dict,bids_root=directory) # else just add the local path to the dataset if num_sources == 0: json_entity.add_attributes({Constants.PROV['Location']:"file:/" + data_dict}) #connect json_entity with acq_entity acq_entity.add_attributes({Constants.PROV["wasInfluencedBy"]:json_entity.get_uuid()}) return project, cde
def makeTestFile(filename, params): global test_person_uuid, test_p2_subject_uuids nidm_project_name = params.get('NIDM_PROJECT_NAME', False) or "Project_name_sample" nidm_project_identifier = params.get('NIDM_PROJECT_IDENTIFIER', False) or 9610 nidm_project2_identifier = params.get('NIDM_PROJECT_IDENTIFIER', False) or 550 nidm_project_description = params.get( 'NIDM_PROJECT_DESCRIPTION', False) or "1234356 Test investigation" project_uuid = params.get('PROJECT_UUID', False) or "_proj1" project_uuid2 = params.get('PROJECT2_UUID', False) or "_proj2" session_uuid = params.get('SESSION_UUID', False) or "_ses1" session_uuid2 = params.get('SESSION2_UUID', False) or "_ses2" p1kwargs = { Constants.NIDM_PROJECT_NAME: nidm_project_name, Constants.NIDM_PROJECT_IDENTIFIER: nidm_project_identifier, Constants.NIDM_PROJECT_DESCRIPTION: nidm_project_description } p2kwargs = { Constants.NIDM_PROJECT_NAME: nidm_project_name, Constants.NIDM_PROJECT_IDENTIFIER: nidm_project2_identifier, Constants.NIDM_PROJECT_DESCRIPTION: nidm_project_description } project = Project(uuid=project_uuid, attributes=p1kwargs) session = Session(uuid=session_uuid, project=project) acq = Acquisition(uuid="_acq1", session=session) acq2 = Acquisition(uuid="_acq2", session=session) acq3 = Acquisition(uuid="_acq2", session=session) person = acq.add_person(attributes=({Constants.NIDM_SUBJECTID: "a1_9999"})) test_person_uuid = (str(person.identifier)).replace("niiri:", "") acq.add_qualified_association(person=person, role=Constants.NIDM_PARTICIPANT) person2 = acq2.add_person(attributes=({ Constants.NIDM_SUBJECTID: "a1_8888" })) acq2.add_qualified_association(person=person2, role=Constants.NIDM_PARTICIPANT) person3 = acq3.add_person(attributes=({ Constants.NIDM_SUBJECTID: "a2_7777" })) acq2.add_qualified_association(person=person3, role=Constants.NIDM_PARTICIPANT) project2 = Project(uuid=project_uuid2, attributes=p2kwargs) session2 = Session(uuid=session_uuid2, project=project2) acq4 = Acquisition(uuid="_acq3", session=session2) acq5 = Acquisition(uuid="_acq4", session=session2) person4 = acq4.add_person(attributes=({ Constants.NIDM_SUBJECTID: "a3_6666" })) acq4.add_qualified_association(person=person4, role=Constants.NIDM_PARTICIPANT) person5 = acq5.add_person(attributes=({ Constants.NIDM_SUBJECTID: "a4_5555" })) acq5.add_qualified_association(person=person5, role=Constants.NIDM_PARTICIPANT) # now add some assessment instrument data addData( acq, { Constants.NIDM_AGE: 9, Constants.NIDM_HANDEDNESS: "R", Constants.NIDM_DIAGNOSIS: "Anxiety" }) addData( acq2, { Constants.NIDM_AGE: 8, Constants.NIDM_HANDEDNESS: "L", Constants.NIDM_DIAGNOSIS: "ADHD" }) addData( acq4, { Constants.NIDM_AGE: 7, Constants.NIDM_HANDEDNESS: "A", Constants.NIDM_DIAGNOSIS: "Depression" }) addData( acq5, { Constants.NIDM_AGE: 6, Constants.NIDM_HANDEDNESS: "R", Constants.NIDM_DIAGNOSIS: "Depression" }) test_p2_subject_uuids.append( (str(person4.identifier)).replace("niiri:", "")) test_p2_subject_uuids.append( (str(person5.identifier)).replace("niiri:", "")) with open("a.ttl", 'w') as f: f.write(project.graph.serialize(None, format='rdf', rdf_format='ttl')) with open("b.ttl", 'w') as f: f.write(project2.graph.serialize(None, format='rdf', rdf_format='ttl')) #create empty graph graph = Graph() for nidm_file in ("a.ttl", "b.ttl"): tmp = Graph() graph = graph + tmp.parse(nidm_file, format=util.guess_format(nidm_file)) graph.serialize(filename, format='turtle') os.unlink("a.ttl") os.unlink("b.ttl") with open(filename, "r") as f: x = f.read() with open("./agent.ttl", "w") as f: f.write(x)