def query(nidm_file_list, query_file, output_file, get_participants,get_instruments,get_instrument_vars): #query result list results = [] if get_participants: df = GetParticipantIDs(nidm_file_list.split(','),output_file=output_file) elif get_instruments: #first get all project UUIDs then iterate and get instruments adding to output dataframe project_list = GetProjectsUUID(nidm_file_list.split(',')) count=1 for project in project_list: if count == 1: df = GetProjectInstruments(nidm_file_list.split(','),project_id=project) count+=1 else: df = df.append(GetProjectInstruments(nidm_file_list.split(','),project_id=project)) #write dataframe #if output file parameter specified if (output_file is not None): df.to_csv(output_file) #with open(output_file,'w') as myfile: # wr=csv.writer(myfile,quoting=csv.QUOTE_ALL) # wr.writerow(df) #pd.DataFrame.from_records(df,columns=["Instruments"]).to_csv(output_file) else: print(df) elif get_instrument_vars: #first get all project UUIDs then iterate and get instruments adding to output dataframe project_list = GetProjectsUUID(nidm_file_list.split(',')) count=1 for project in project_list: if count == 1: df = GetInstrumentVariables(nidm_file_list.split(','),project_id=project) count+=1 else: df = df.append(GetInstrumentVariables(nidm_file_list.split(','),project_id=project)) #write dataframe #if output file parameter specified if (output_file is not None): df.to_csv(output_file) else: print(df) else: #read query from text fiile with open(query_file, 'r') as fp: query = fp.read() df = sparql_query_nidm(nidm_file_list.split(','),query,output_file) return df
def query(nidm_file_list, cde_file_list, query_file, output_file, get_participants, get_instruments, get_instrument_vars, get_dataelements, get_brainvols, get_dataelements_brainvols, get_fields, uri, blaze, j, verbosity): """ This function provides query support for NIDM graphs. """ #query result list results = [] # if there is a CDE file list, seed the CDE cache if cde_file_list: getCDEs(cde_file_list.split(",")) if blaze: os.environ["BLAZEGRAPH_URL"] = blaze print("setting BLAZEGRAPH_URL to {}".format(blaze)) if get_participants: df = GetParticipantIDs(nidm_file_list.split(','), output_file=output_file) if ((output_file) is None): print(df.to_string()) return df elif get_instruments: #first get all project UUIDs then iterate and get instruments adding to output dataframe project_list = GetProjectsUUID(nidm_file_list.split(',')) count = 1 for project in project_list: if count == 1: df = GetProjectInstruments(nidm_file_list.split(','), project_id=project) count += 1 else: df = df.append( GetProjectInstruments(nidm_file_list.split(','), project_id=project)) #write dataframe #if output file parameter specified if (output_file is not None): df.to_csv(output_file) #with open(output_file,'w') as myfile: # wr=csv.writer(myfile,quoting=csv.QUOTE_ALL) # wr.writerow(df) #pd.DataFrame.from_records(df,columns=["Instruments"]).to_csv(output_file) else: print(df.to_string()) elif get_instrument_vars: #first get all project UUIDs then iterate and get instruments adding to output dataframe project_list = GetProjectsUUID(nidm_file_list.split(',')) count = 1 for project in project_list: if count == 1: df = GetInstrumentVariables(nidm_file_list.split(','), project_id=project) count += 1 else: df = df.append( GetInstrumentVariables(nidm_file_list.split(','), project_id=project)) #write dataframe #if output file parameter specified if (output_file is not None): df.to_csv(output_file) else: print(df.to_string()) elif get_dataelements: datael = GetDataElements(nidm_file_list=nidm_file_list) #if output file parameter specified if (output_file is not None): datael.to_csv(output_file) else: print(datael.to_string()) elif get_fields: # fields only query. We'll do it with the rest api restParser = RestParser(verbosity_level=int(verbosity)) if (output_file is not None): restParser.setOutputFormat(RestParser.OBJECT_FORMAT) df_list = [] else: restParser.setOutputFormat(RestParser.CLI_FORMAT) # set up uri to do fields query for each nidm file for nidm_file in nidm_file_list.split(","): # get project UUID project = GetProjectsUUID([nidm_file]) uri = "/projects/" + project[0].toPython().split( "/")[-1] + "?fields=" + get_fields # get fields output from each file and concatenate if (output_file is None): # just print results print(restParser.run([nidm_file], uri)) else: df_list.append(pd.DataFrame(restParser.run([nidm_file], uri))) if (output_file is not None): # concatenate data frames df = pd.concat(df_list) # output to csv file df.to_csv(output_file) elif uri: restParser = RestParser(verbosity_level=int(verbosity)) if j: restParser.setOutputFormat(RestParser.JSON_FORMAT) elif (output_file is not None): restParser.setOutputFormat(RestParser.OBJECT_FORMAT) else: restParser.setOutputFormat(RestParser.CLI_FORMAT) df = restParser.run(nidm_file_list.split(','), uri) if (output_file is not None): if j: with open(output_file, "w+") as f: f.write(dumps(df)) else: # convert object df to dataframe and output pd.DataFrame(df).to_csv(output_file) else: print(df) elif get_dataelements_brainvols: brainvol = GetBrainVolumeDataElements(nidm_file_list=nidm_file_list) #if output file parameter specified if (output_file is not None): brainvol.to_csv(output_file) else: print(brainvol.to_string()) elif get_brainvols: brainvol = GetBrainVolumes(nidm_file_list=nidm_file_list) #if output file parameter specified if (output_file is not None): brainvol.to_csv(output_file) else: print(brainvol.to_string()) elif query_file: df = sparql_query_nidm(nidm_file_list.split(','), query_file, output_file) if ((output_file) is None): print(df.to_string()) return df else: print("ERROR: No query parameter provided. See help:") print() os.system("pynidm query --help") exit(1)
def data_aggregation(): # all data from all the files is collected """ This function provides query support for NIDM graphs. """ # query result list results = [] # if there is a CDE file list, seed the CDE cache if v: #ex: age,sex,DX_GROUP print( "***********************************************************************************************************" ) command = "pynidm k-means -nl " + n + " -variables \"" + v + "\" " + "-k " + str( k_num) + " -m " + cm print("Your command was: " + command) if (o is not None): f = open(o, "w") f.write("Your command was " + command) f.close() verbosity = 0 restParser = RestParser(verbosity_level=int(verbosity)) restParser.setOutputFormat(RestParser.OBJECT_FORMAT) global df_list # used in dataparsing() df_list = [] # set up uri to do fields query for each nidm file global file_list file_list = n.split(",") df_list_holder = {} for i in range(len(file_list)): df_list_holder[i] = [] df_holder = {} for i in range(len(file_list)): df_holder[i] = [] global condensed_data_holder condensed_data_holder = {} for i in range(len(file_list)): condensed_data_holder[i] = [] count = 0 not_found_count = 0 for nidm_file in file_list: # get project UUID project = GetProjectsUUID([nidm_file]) # split the model into its constituent variables global var_list # below, we edit the model so it splits by +,~, or =. However, to help it out in catching everything # we replaced ~ and = with a + so that we can still use split. Regex wasn't working. var_list = v.split(",") for i in range(len(var_list) ): # here, we remove any leading or trailing spaces var_list[i] = var_list[i].strip() # set the dependent variable to the one dependent variable in the model global vars # used in dataparsing() vars = "" for i in range(len(var_list) - 1, -1, -1): if not "*" in var_list[ i]: # removing the star term from the columns we're about to pull from data vars = vars + var_list[i] + "," else: print( "Interacting variables are not present in clustering models. They will be removed." ) vars = vars[0:len(vars) - 1] uri = "/projects/" + project[0].toPython().split( "/")[-1] + "?fields=" + vars # get fields output from each file and concatenate df_list_holder[count].append( pd.DataFrame(restParser.run([nidm_file], uri))) # global dep_var df = pd.concat(df_list_holder[count]) with tempfile.NamedTemporaryFile( delete=False ) as temp: # turns the dataframe into a temporary csv df.to_csv(temp.name + '.csv') temp.close() data = list( csv.reader(open(temp.name + '.csv')) ) # makes the csv a 2D list to make it easier to call the contents of certain cells var_list = vars.split( ",") # makes a list of the independent variables numcols = (len(data) - 1) // ( len(var_list) ) # Finds the number of columns in the original dataframe global condensed_data # also used in linreg() condensed_data_holder[count] = [ [0] * (len(var_list)) ] # makes an array 1 row by the number of necessary columns for i in range( numcols ): # makes the 2D array big enough to store all of the necessary values in the edited dataset condensed_data_holder[count].append([0] * (len(var_list))) for m in range(0, len(var_list)): end_url = var_list[m].split("/") if "/" in var_list[m]: var_list[m] = end_url[len(end_url) - 1] for i in range( len(var_list) ): # stores the independent variable names in the first row condensed_data_holder[count][0][i] = var_list[i] numrows = 1 # begins at the first row to add data fieldcolumn = 0 # the column the variable name is in in the original dataset valuecolumn = 0 # the column the value is in in the original dataset datacolumn = 0 # if it is identified by the dataElement name instead of the field's name not_found_list = [] for i in range(len(data[0])): if data[0][ i] == 'sourceVariable': # finds the column where the variable names are fieldcolumn = i elif data[0][ i] == 'source_variable': # finds the column where the variable names are fieldcolumn = i elif data[0][i] == 'isAbout': aboutcolumn = i elif data[0][i] == 'label': namecolumn = i # finds the column where the variable names are elif data[0][i] == 'value': valuecolumn = i # finds the column where the values are elif data[0][ i] == 'dataElement': # finds the column where the data element is if necessary datacolumn = i for i in range( len(condensed_data_holder[count][0]) ): # starts iterating through the dataset, looking for the name in that for j in range( 1, len(data) ): # column, so it can append the values under the proper variables try: if data[j][fieldcolumn] == condensed_data_holder[count][ 0][i]: # in the dataframe, the name is in column 3 condensed_data_holder[count][numrows][i] = data[j][ valuecolumn] # in the dataframe, the value is in column 2 numrows = numrows + 1 # moves on to the next row to add the proper values elif data[j][aboutcolumn] == condensed_data_holder[ count][0][i]: condensed_data_holder[count][numrows][i] = data[j][ valuecolumn] # in the dataframe, the value is in column 2 numrows = numrows + 1 # moves on to the next row to add the proper values elif condensed_data_holder[count][0][i] in data[j][ aboutcolumn]: # this is in case the uri only works by querying the part after the last backslash condensed_data_holder[count][numrows][i] = data[j][ valuecolumn] # in the dataframe, the value is in column 2 numrows = numrows + 1 # moves on to the next row to add the proper values elif data[j][namecolumn] == condensed_data_holder[ count][0][ i]: # in the dataframe, the name is in column 12 condensed_data_holder[count][numrows][i] = data[j][ valuecolumn] # in the dataframe, the value is in column 2 numrows = numrows + 1 # moves on to the next row to add the proper values elif condensed_data_holder[count][0][i] == data[j][ datacolumn]: # in the dataframe, the name is in column 9 condensed_data_holder[count][numrows][i] = data[j][ valuecolumn] # in the dataframe, the value is in column 2 numrows = numrows + 1 # moves on to the next row to add the proper values except IndexError: numrows = numrows + 1 numrows = 1 # resets to the first row for the next variable temp_list = condensed_data_holder[count] for j in range( len(temp_list[0]) - 1, 0, -1 ): # if the software appends a column with 0 as the heading, it removes this null column if temp_list[0][j] == "0" or temp_list[0][j] == "NaN": for row in condensed_data_holder[count]: row.pop(j) rowsize = len(condensed_data_holder[count][0]) count1 = 0 for i in range(0, rowsize): for row in condensed_data_holder[count]: if row[i] == 0 or row[i] == "NaN" or row[i] == "0": count1 = count1 + 1 if count1 > len(condensed_data_holder[count]) - 2: not_found_list.append(condensed_data_holder[count][0][i]) count1 = 0 for i in range(len(condensed_data_holder[count][0])): if " " in condensed_data_holder[count][0][i]: condensed_data_holder[count][0][i] = condensed_data_holder[ count][0][i].replace(" ", "_") for i in range(len(var_list)): if "/" in var_list[i]: splitted = var_list[i].split("/") var_list[i] = splitted[len(splitted) - 1] if " " in var_list[i]: var_list[i] = var_list[i].replace(" ", "_") count = count + 1 if len(not_found_list) > 0: print( "***********************************************************************************************************" ) print() print("Your variables were " + v) print() print( "The following variables were not found in " + nidm_file + ". The model cannot run because this will skew the data. Try checking your spelling or use nidm_query.py to see other possible variables." ) if (o is not None): f = open(o, "a") f.write("Your variables were " + v) f.write( "The following variables were not found in " + nidm_file + ". The model cannot run because this will skew the data. Try checking your spelling or use nidm_query.py to see other possible variables." ) f.close() for i in range(0, len(not_found_list)): print(str(i + 1) + ". " + not_found_list[i]) if (o is not None): f = open(o, "a") f.write(str(i + 1) + ". " + not_found_list[i]) f.close() for j in range(len(not_found_list) - 1, 0, -1): not_found_list.pop(j) not_found_count = not_found_count + 1 print() if not_found_count > 0: exit(1) else: print("ERROR: No query parameter provided. See help:") print() os.system("pynidm k-means --help") exit(1)
def query(nidm_file_list, cde_file_list, query_file, output_file, get_participants, get_instruments, get_instrument_vars, get_dataelements, get_brainvols, get_dataelements_brainvols, uri, j, verbosity): #query result list results = [] # if there is a CDE file list, seed the CDE cache if cde_file_list: getCDEs(cde_file_list.split(",")) if get_participants: df = GetParticipantIDs(nidm_file_list.split(','), output_file=output_file) if ((output_file) is None): print(df.to_string()) return df elif get_instruments: #first get all project UUIDs then iterate and get instruments adding to output dataframe project_list = GetProjectsUUID(nidm_file_list.split(',')) count = 1 for project in project_list: if count == 1: df = GetProjectInstruments(nidm_file_list.split(','), project_id=project) count += 1 else: df = df.append( GetProjectInstruments(nidm_file_list.split(','), project_id=project)) #write dataframe #if output file parameter specified if (output_file is not None): df.to_csv(output_file) #with open(output_file,'w') as myfile: # wr=csv.writer(myfile,quoting=csv.QUOTE_ALL) # wr.writerow(df) #pd.DataFrame.from_records(df,columns=["Instruments"]).to_csv(output_file) else: print(df.to_string()) elif get_instrument_vars: #first get all project UUIDs then iterate and get instruments adding to output dataframe project_list = GetProjectsUUID(nidm_file_list.split(',')) count = 1 for project in project_list: if count == 1: df = GetInstrumentVariables(nidm_file_list.split(','), project_id=project) count += 1 else: df = df.append( GetInstrumentVariables(nidm_file_list.split(','), project_id=project)) #write dataframe #if output file parameter specified if (output_file is not None): df.to_csv(output_file) else: print(df.to_string()) elif get_dataelements: datael = GetDataElements(nidm_file_list=nidm_file_list) #if output file parameter specified if (output_file is not None): datael.to_csv(output_file) else: print(datael.to_string()) elif uri: df = restParser(nidm_file_list.split(','), uri, int(verbosity)) if j: print(dumps(df, indent=2)) else: if type(df) == list: for x in df: print(x) elif type(df) == dict: for k in df.keys(): print(str(k) + ' ' + str(df[k])) else: print(df.to_string()) elif get_dataelements_brainvols: brainvol = GetBrainVolumeDataElements(nidm_file_list=nidm_file_list) #if output file parameter specified if (output_file is not None): brainvol.to_csv(output_file) else: print(brainvol.to_string()) elif get_brainvols: brainvol = GetBrainVolumes(nidm_file_list=nidm_file_list) #if output file parameter specified if (output_file is not None): brainvol.to_csv(output_file) else: print(brainvol.to_string()) else: #read query from text fiile with open(query_file, 'r') as fp: query = fp.read() df = sparql_query_nidm(nidm_file_list.split(','), query, output_file) if ((output_file) is None): print(df.to_string()) return df
def query(nidm_file_list, cde_file_list, query_file, output_file, get_participants, get_instruments, get_instrument_vars, get_dataelements, get_brainvols,get_dataelements_brainvols, uri, j, verbosity): """ This function provides query support for NIDM graphs. """ #query result list results = [] # if there is a CDE file list, seed the CDE cache if cde_file_list: getCDEs(cde_file_list.split(",")) if get_participants: df = GetParticipantIDs(nidm_file_list.split(','),output_file=output_file) if ((output_file) is None): print(df.to_string()) return df elif get_instruments: #first get all project UUIDs then iterate and get instruments adding to output dataframe project_list = GetProjectsUUID(nidm_file_list.split(',')) count=1 for project in project_list: if count == 1: df = GetProjectInstruments(nidm_file_list.split(','),project_id=project) count+=1 else: df = df.append(GetProjectInstruments(nidm_file_list.split(','),project_id=project)) #write dataframe #if output file parameter specified if (output_file is not None): df.to_csv(output_file) #with open(output_file,'w') as myfile: # wr=csv.writer(myfile,quoting=csv.QUOTE_ALL) # wr.writerow(df) #pd.DataFrame.from_records(df,columns=["Instruments"]).to_csv(output_file) else: print(df.to_string()) elif get_instrument_vars: #first get all project UUIDs then iterate and get instruments adding to output dataframe project_list = GetProjectsUUID(nidm_file_list.split(',')) count=1 for project in project_list: if count == 1: df = GetInstrumentVariables(nidm_file_list.split(','),project_id=project) count+=1 else: df = df.append(GetInstrumentVariables(nidm_file_list.split(','),project_id=project)) #write dataframe #if output file parameter specified if (output_file is not None): df.to_csv(output_file) else: print(df.to_string()) elif get_dataelements: datael = GetDataElements(nidm_file_list=nidm_file_list) #if output file parameter specified if (output_file is not None): datael.to_csv(output_file) else: print(datael.to_string()) elif uri: restParser = RestParser(verbosity_level = int(verbosity)) if j: restParser.setOutputFormat(RestParser.JSON_FORMAT) else: restParser.setOutputFormat(RestParser.CLI_FORMAT) df = restParser.run(nidm_file_list.split(','), uri) print (df) elif get_dataelements_brainvols: brainvol = GetBrainVolumeDataElements(nidm_file_list=nidm_file_list) #if output file parameter specified if (output_file is not None): brainvol.to_csv(output_file) else: print(brainvol.to_string()) elif get_brainvols: brainvol = GetBrainVolumes(nidm_file_list=nidm_file_list) #if output file parameter specified if (output_file is not None): brainvol.to_csv(output_file) else: print(brainvol.to_string()) elif query_file: df = sparql_query_nidm(nidm_file_list.split(','),query_file,output_file) if ((output_file) is None): print(df.to_string()) return df else: print("ERROR: No query parameter provided. See help:") print() os.system("pynidm query --help") exit(1)
def main(argv): parser = ArgumentParser( description='This program will convert a NIDM-Experiment RDF document \ to a BIDS dataset. The program will query the NIDM-Experiment document for subjects, \ MRI scans, and associated assessments saving the MRI data to disk in an organization \ according to the BIDS specification, metadata to a participants.tsv \ file, the project-level metdata to a dataset_description.json file, and the \ assessments to *.tsv/*.json file pairs in a phenotypes directory.', epilog='Example of use: \ NIDM2BIDSMRI.py -nidm_file NIDM.ttl -part_fields age,gender -bids_dir BIDS' ) parser.add_argument('-nidm_file', dest='rdf_file', required=True, help="NIDM RDF file") parser.add_argument('-part_fields', nargs='+', dest='part_fields', required=False, \ help='Variables to add to BIDS participant file. Variables will be fuzzy-matched to NIDM URIs') parser.add_argument( '-anat', dest='anat', action='store_true', required=False, help="Include flag to add anatomical scans to BIDS dataset") parser.add_argument( '-func', dest='func', action='store_true', required=False, help= "Include flag to add functional scans + events files to BIDS dataset") parser.add_argument( '-dwi', dest='dwi', action='store_true', required=False, help="Include flag to add DWI scans + Bval/Bvec files to BIDS dataset") parser.add_argument('-bids_dir', dest='bids_dir', required=True, help="Directory to store BIDS dataset") args = parser.parse_args() rdf_file = args.rdf_file output_directory = args.bids_dir # check if output directory exists, if not create it if not isdir(output_directory): mkdir(path=output_directory) #try to read RDF file print("Guessing RDF file format...") format_found = False for format in 'turtle', 'xml', 'n3', 'trix', 'rdfa': try: print("reading RDF file as %s..." % format) #load NIDM graph into NIDM-Exp API objects nidm_project = read_nidm(rdf_file) print("RDF file sucessfully read") format_found = True break except Exception: print("file: %s appears to be an invalid %s RDF file" % (rdf_file, format)) if not format_found: print( "File doesn't appear to be a valid RDF format supported by Python RDFLib! Please check input file" ) print("exiting...") exit(-1) #set up output directory for BIDS data if not os.path.isdir(output_directory): os.mkdir(output_directory) if not os.path.isdir( join(output_directory, os.path.splitext(args.rdf_file)[0])): os.mkdir(join(output_directory, os.path.splitext(args.rdf_file)[0])) #convert Project NIDM object -> dataset_description.json file NIDMProject2BIDSDatasetDescriptor( nidm_project, join(output_directory, os.path.splitext(args.rdf_file)[0])) #create participants.tsv file. In BIDS datasets there is no specification for how many or which type of assessment #variables might be in this file. The specification does mention a minimum participant_id which indexes each of the #subjects in the BIDS dataset. # #if parameter -parts_field is defined then the variables listed will be fuzzy matched to the URIs in the NIDM file #and added to the participants.tsv file #use RDFLib here for temporary graph making query easier rdf_graph = Graph() rdf_graph_parse = rdf_graph.parse(source=StringIO( nidm_project.serializeTurtle()), format='turtle') #create participants file CreateBIDSParticipantFile( rdf_graph_parse, join(output_directory, os.path.splitext(args.rdf_file)[0], "participants"), args.part_fields) # get nidm:Project prov:Location # first get nidm:Project UUIDs project_uuid = GetProjectsUUID([rdf_file], output_file=None) project_location = [] for uuid in project_uuid: project_location.append( GetProjectLocation(nidm_file_list=[rdf_file], project_uuid=uuid)) #creating BIDS hierarchy with requested scans if args.anat == True: #query NIDM document for acquisition entity "subjects" with predicate nidm:hasImageUsageType and object nidm:Anatomical for anat_acq in rdf_graph_parse.subjects( predicate=URIRef(Constants.NIDM_IMAGE_USAGE_TYPE.uri), object=URIRef(Constants.NIDM_MRI_ANATOMIC_SCAN.uri)): # first see if file exists locally. Get nidm:Project prov:Location and append the nfo:Filename of the image # from the anat_acq acquisition entity. If that file doesn't exist try the prov:Location in the anat acq # entity and see if we can download it from the cloud # get acquisition uuid from entity uuid anat_act = rdf_graph_parse.objects( subject=anat_acq, predicate=Constants.PROV['wasGeneratedBy']) # get participant ID with sio:Subject role in anat_acq qualified association part_id = GetParticipantIDFromAcquisition( nidm_file_list=[rdf_file], acquisition=anat_act[0]) # make BIDS sub directory sub_dir = join(output_directory, "sub-" + part_id[0]) sub_filename_base = "sub-" + part_id[0] if not os.path.exists(sub_dir): os.makedirs(sub_dir) # make BIDS anat directory if not os.path.exists(join(sub_dir, "anat")): os.makedirs(join(sub_dir, "anat")) for anat_filename in rdf_graph_parse.objects( subject=anat_acq, predicate=URIRef(Constants.NIDM_FILENAME.uri)): # check if file exists for location in project_location: # if anatomical MRI exists in this location then copy and rename if isfile(location[0] + anat_filename): # copy and rename file to be BIDS compliant copyfile(srd=location[0] + anat_filename, dest=join( sub_dir, "anat", sub_filename_base + splitext(anat_filename)[1])) continue # if the file wasn't accessible locally, try with the prov:Location in the anat_acq for location in rdf_graph_parse.objects( subject=anat_acq, predicate=URIRef(Constants.PROV['Location'])): # try to download the file and rename ret = GetImageFromURL(location) if ret == -1: print( "Can't download file: %s from url: %s, skipping...." % (anat_filename, location)) else: # copy temporary file to BIDS directory copyfile(srd=join(ret), dest=join(output_directory, 'anat')) # rename file in dest move(src=join(output_directory, 'anat', basename(ret)), dest=join(output_directory, 'anat', anat_filename))
def main(argv): parser = ArgumentParser( description='This program will convert a NIDM-Experiment RDF document \ to a BIDS dataset. The program will query the NIDM-Experiment document for subjects, \ MRI scans, and associated assessments saving the MRI data to disk in an organization \ according to the BIDS specification, metadata to a participants.tsv \ file, the project-level metdata to a dataset_description.json file, and the \ assessments to *.tsv/*.json file pairs in a phenotypes directory.', epilog='Example of use: \ NIDM2BIDSMRI.py -nidm_file NIDM.ttl -part_fields age,gender -bids_dir BIDS' ) parser.add_argument('-nidm_file', dest='rdf_file', required=True, help="NIDM RDF file") parser.add_argument('-part_fields', nargs='+', dest='part_fields', required=False, \ help='Variables to add to BIDS participant file. Variables will be fuzzy-matched to NIDM URIs') parser.add_argument( '-anat', dest='anat', action='store_true', required=False, help="Include flag to add anatomical scans to BIDS dataset") parser.add_argument( '-func', dest='func', action='store_true', required=False, help= "Include flag to add functional scans + events files to BIDS dataset") parser.add_argument( '-dwi', dest='dwi', action='store_true', required=False, help="Include flag to add DWI scans + Bval/Bvec files to BIDS dataset") parser.add_argument('-bids_dir', dest='bids_dir', required=True, help="Directory to store BIDS dataset") group = parser.add_mutually_exclusive_group() group.add_argument( '-no_downloads', dest='no_downloads', action='store_true', required=False, help= "If this flag is set then script won't attempt to download images using datalad" "and AWS S3. Default behavior is files are downloaded if they don't exist locally." ) group.add_argument( '-aws_url', dest='aws_url', required=False, help="This tool facilites export of " "user-selected information from a NIDM file to a BIDS dataset and may have to fetch images. The NIDM files contain links from" "the local filesystem used to convert BIDS to NIDM and possibly DataLad dataset links to the files if the" " original BIDS data was a DataLad dataset. Here we support 3 modes of trying to find images: (1) copy from" " the local directory space using the prov:Location information in the NIDM file; (2) fetch the images from" " a DataLad remote if the original BIDS dataset was a DataLad dataset when bids2nidm was run; (3) attempt " " to download the images via a AWS S3 link. This parameter lets the user set the base AWS S3 URL to try and" " find the images. Currently it supports using the URL provided here and adding the dataset id, subject id," " and filename. For example, in OpenNeuro (OpenNeuro is supported by default but will serve as an example) the base AWS S3" " URL is \'s3://openneuro.org\'. The URL then becomes (for example) " " s3://openneuro.org/ds000002/sub-06/func/sub-06_task-probabilisticclassification_run-02_bold.nii.gz where this tool" " has added \'ds000002/sub-06/[FILENAME] to the base AWS S3 URL.") parser.add_argument( '-dataset_string', dest='dataset_string', required=False, help="If -aws_url parameter is supplied" " this parameter (-dataset_string) is required as it will be added to the aws_baseurl to retrieve images for each" " subject and file. For example, if -aws_baseurl is \'s3://davedata.org \' and -dataset_string is \'dataset1\' then" " the AWS S3 url for sub-1 and file sub1-task-rest_run-1_bold.nii.gz would be: " " \'s3://davedata.org/dataset1/sub-1/[anat | func | dwi/sub1-task-rest_run-1_bold.nii.gz\'" ) args = parser.parse_args() # check some argument dependencies if args.aws_url and not args.dataset_string: print( "ERROR! You must include a -dataset_string if you supplied the -aws_baseurl. If there is no dataset" " string in your AWS S3 urls then just supply -aws_baseurl with nothing after it." ) print(args.print_help()) exit(-1) # set up some local variables rdf_file = args.rdf_file output_directory = args.bids_dir # check if output directory exists, if not create it if not isdir(output_directory): mkdir(path=output_directory) #try to read RDF file print("Guessing RDF file format...") format_found = False for format in 'turtle', 'xml', 'n3', 'trix', 'rdfa': try: print("Reading RDF file as %s..." % format) #load NIDM graph into NIDM-Exp API objects nidm_project = read_nidm(rdf_file) # temporary save nidm_project with open("/Users/dbkeator/Downloads/nidm.ttl", 'w') as f: print(nidm_project.serializeTurtle(), file=f) print("RDF file sucessfully read") format_found = True break except Exception: print("File: %s appears to be an invalid %s RDF file" % (rdf_file, format)) if not format_found: print( "File doesn't appear to be a valid RDF format supported by Python RDFLib! Please check input file" ) print("exiting...") exit(-1) # if not os.path.isdir(join(output_directory,os.path.splitext(args.rdf_file)[0])): # os.mkdir(join(output_directory,os.path.splitext(args.rdf_file)[0])) #convert Project NIDM object -> dataset_description.json file NIDMProject2BIDSDatasetDescriptor(nidm_project, output_directory) #create participants.tsv file. In BIDS datasets there is no specification for how many or which type of assessment #variables might be in this file. The specification does mention a minimum participant_id which indexes each of the #subjects in the BIDS dataset. # #if parameter -parts_field is defined then the variables listed will be fuzzy matched to the URIs in the NIDM file #and added to the participants.tsv file #use RDFLib here for temporary graph making query easier rdf_graph = Graph() rdf_graph_parse = rdf_graph.parse(source=StringIO( nidm_project.serializeTurtle()), format='turtle') # temporary write out turtle file for testing # rdf_graph_parse.serialize(destination="/Users/dbkeator/Downloads/ds000117.ttl", format='turtle') #create participants file CreateBIDSParticipantFile(rdf_graph_parse, join(output_directory, "participants"), args.part_fields) # get nidm:Project prov:Location # first get nidm:Project UUIDs project_uuid = GetProjectsUUID([rdf_file], output_file=None) project_location = [] for uuid in project_uuid: project_location.append( GetProjectLocation(nidm_file_list=[rdf_file], project_uuid=uuid)) #creating BIDS hierarchy with requested scans if args.anat == True: ProcessFiles(graph=rdf_graph_parse, scan_type=Constants.NIDM_MRI_ANATOMIC_SCAN.uri, output_directory=output_directory, project_location=project_location, args=args) if args.func == True: ProcessFiles(graph=rdf_graph_parse, scan_type=Constants.NIDM_MRI_FUNCTION_SCAN.uri, output_directory=output_directory, project_location=project_location, args=args) if args.dwi == True: ProcessFiles(graph=rdf_graph_parse, scan_type=Constants.NIDM_MRI_DIFFUSION_TENSOR.uri, output_directory=output_directory, project_location=project_location, args=args)