Exemple #1
0
def query(nidm_file_list, query_file, output_file, get_participants,get_instruments,get_instrument_vars):

    #query result list
    results = []

    if get_participants:
        df = GetParticipantIDs(nidm_file_list.split(','),output_file=output_file)
    elif get_instruments:
        #first get all project UUIDs then iterate and get instruments adding to output dataframe
        project_list = GetProjectsUUID(nidm_file_list.split(','))
        count=1
        for project in project_list:
            if count == 1:
                df = GetProjectInstruments(nidm_file_list.split(','),project_id=project)
                count+=1
            else:
                df = df.append(GetProjectInstruments(nidm_file_list.split(','),project_id=project))

        #write dataframe
        #if output file parameter specified
        if (output_file is not None):

            df.to_csv(output_file)
            #with open(output_file,'w') as myfile:
            #    wr=csv.writer(myfile,quoting=csv.QUOTE_ALL)
            #    wr.writerow(df)

            #pd.DataFrame.from_records(df,columns=["Instruments"]).to_csv(output_file)
        else:
            print(df)
    elif get_instrument_vars:
        #first get all project UUIDs then iterate and get instruments adding to output dataframe
        project_list = GetProjectsUUID(nidm_file_list.split(','))
        count=1
        for project in project_list:
            if count == 1:
                df = GetInstrumentVariables(nidm_file_list.split(','),project_id=project)
                count+=1
            else:
                df = df.append(GetInstrumentVariables(nidm_file_list.split(','),project_id=project))

        #write dataframe
        #if output file parameter specified
        if (output_file is not None):

            df.to_csv(output_file)
        else:
            print(df)
    else:
        #read query from text fiile
        with open(query_file, 'r') as fp:
            query = fp.read()
        df = sparql_query_nidm(nidm_file_list.split(','),query,output_file)

    return df
Exemple #2
0
def query(nidm_file_list, cde_file_list, query_file, output_file,
          get_participants, get_instruments, get_instrument_vars,
          get_dataelements, get_brainvols, get_dataelements_brainvols,
          get_fields, uri, blaze, j, verbosity):
    """
    This function provides query support for NIDM graphs.
    """
    #query result list
    results = []

    # if there is a CDE file list, seed the CDE cache
    if cde_file_list:
        getCDEs(cde_file_list.split(","))

    if blaze:
        os.environ["BLAZEGRAPH_URL"] = blaze
        print("setting BLAZEGRAPH_URL to {}".format(blaze))

    if get_participants:
        df = GetParticipantIDs(nidm_file_list.split(','),
                               output_file=output_file)
        if ((output_file) is None):

            print(df.to_string())

        return df
    elif get_instruments:
        #first get all project UUIDs then iterate and get instruments adding to output dataframe
        project_list = GetProjectsUUID(nidm_file_list.split(','))
        count = 1
        for project in project_list:
            if count == 1:
                df = GetProjectInstruments(nidm_file_list.split(','),
                                           project_id=project)
                count += 1
            else:
                df = df.append(
                    GetProjectInstruments(nidm_file_list.split(','),
                                          project_id=project))

        #write dataframe
        #if output file parameter specified
        if (output_file is not None):

            df.to_csv(output_file)
            #with open(output_file,'w') as myfile:
            #    wr=csv.writer(myfile,quoting=csv.QUOTE_ALL)
            #    wr.writerow(df)

            #pd.DataFrame.from_records(df,columns=["Instruments"]).to_csv(output_file)
        else:
            print(df.to_string())
    elif get_instrument_vars:
        #first get all project UUIDs then iterate and get instruments adding to output dataframe
        project_list = GetProjectsUUID(nidm_file_list.split(','))
        count = 1
        for project in project_list:
            if count == 1:
                df = GetInstrumentVariables(nidm_file_list.split(','),
                                            project_id=project)
                count += 1
            else:
                df = df.append(
                    GetInstrumentVariables(nidm_file_list.split(','),
                                           project_id=project))

        #write dataframe
        #if output file parameter specified
        if (output_file is not None):

            df.to_csv(output_file)
        else:
            print(df.to_string())
    elif get_dataelements:
        datael = GetDataElements(nidm_file_list=nidm_file_list)
        #if output file parameter specified
        if (output_file is not None):

            datael.to_csv(output_file)
        else:
            print(datael.to_string())
    elif get_fields:
        # fields only query.  We'll do it with the rest api
        restParser = RestParser(verbosity_level=int(verbosity))
        if (output_file is not None):
            restParser.setOutputFormat(RestParser.OBJECT_FORMAT)
            df_list = []
        else:
            restParser.setOutputFormat(RestParser.CLI_FORMAT)
        # set up uri to do fields query for each nidm file
        for nidm_file in nidm_file_list.split(","):
            # get project UUID
            project = GetProjectsUUID([nidm_file])
            uri = "/projects/" + project[0].toPython().split(
                "/")[-1] + "?fields=" + get_fields
            # get fields output from each file and concatenate
            if (output_file is None):
                # just print results
                print(restParser.run([nidm_file], uri))
            else:
                df_list.append(pd.DataFrame(restParser.run([nidm_file], uri)))

        if (output_file is not None):
            # concatenate data frames
            df = pd.concat(df_list)
            # output to csv file
            df.to_csv(output_file)

    elif uri:
        restParser = RestParser(verbosity_level=int(verbosity))
        if j:
            restParser.setOutputFormat(RestParser.JSON_FORMAT)
        elif (output_file is not None):
            restParser.setOutputFormat(RestParser.OBJECT_FORMAT)
        else:
            restParser.setOutputFormat(RestParser.CLI_FORMAT)
        df = restParser.run(nidm_file_list.split(','), uri)
        if (output_file is not None):
            if j:
                with open(output_file, "w+") as f:
                    f.write(dumps(df))
            else:
                # convert object df to dataframe and output
                pd.DataFrame(df).to_csv(output_file)
        else:
            print(df)

    elif get_dataelements_brainvols:
        brainvol = GetBrainVolumeDataElements(nidm_file_list=nidm_file_list)
        #if output file parameter specified
        if (output_file is not None):

            brainvol.to_csv(output_file)
        else:
            print(brainvol.to_string())
    elif get_brainvols:
        brainvol = GetBrainVolumes(nidm_file_list=nidm_file_list)
        #if output file parameter specified
        if (output_file is not None):

            brainvol.to_csv(output_file)
        else:
            print(brainvol.to_string())
    elif query_file:

        df = sparql_query_nidm(nidm_file_list.split(','), query_file,
                               output_file)

        if ((output_file) is None):

            print(df.to_string())

        return df
    else:
        print("ERROR: No query parameter provided.  See help:")
        print()
        os.system("pynidm query --help")
        exit(1)
Exemple #3
0
def data_aggregation():  # all data from all the files is collected
    """    This function provides query support for NIDM graphs.   """
    # query result list
    results = []
    # if there is a CDE file list, seed the CDE cache
    if v:  #ex: age,sex,DX_GROUP
        print(
            "***********************************************************************************************************"
        )
        command = "pynidm k-means -nl " + n + " -variables \"" + v + "\" " + "-k " + str(
            k_num) + " -m " + cm

        print("Your command was: " + command)
        if (o is not None):
            f = open(o, "w")
            f.write("Your command was " + command)
            f.close()
        verbosity = 0
        restParser = RestParser(verbosity_level=int(verbosity))
        restParser.setOutputFormat(RestParser.OBJECT_FORMAT)
        global df_list  # used in dataparsing()
        df_list = []
        # set up uri to do fields query for each nidm file
        global file_list
        file_list = n.split(",")
        df_list_holder = {}
        for i in range(len(file_list)):
            df_list_holder[i] = []
        df_holder = {}
        for i in range(len(file_list)):
            df_holder[i] = []
        global condensed_data_holder
        condensed_data_holder = {}
        for i in range(len(file_list)):
            condensed_data_holder[i] = []

        count = 0
        not_found_count = 0
        for nidm_file in file_list:
            # get project UUID
            project = GetProjectsUUID([nidm_file])
            # split the model into its constituent variables
            global var_list
            # below, we edit the model so it splits by +,~, or =. However, to help it out in catching everything
            # we replaced ~ and = with a + so that we can still use split. Regex wasn't working.
            var_list = v.split(",")
            for i in range(len(var_list)
                           ):  # here, we remove any leading or trailing spaces
                var_list[i] = var_list[i].strip()
            # set the dependent variable to the one dependent variable in the model
            global vars  # used in dataparsing()
            vars = ""
            for i in range(len(var_list) - 1, -1, -1):
                if not "*" in var_list[
                        i]:  # removing the star term from the columns we're about to pull from data
                    vars = vars + var_list[i] + ","
                else:
                    print(
                        "Interacting variables are not present in clustering models. They will be removed."
                    )
            vars = vars[0:len(vars) - 1]
            uri = "/projects/" + project[0].toPython().split(
                "/")[-1] + "?fields=" + vars
            # get fields output from each file and concatenate
            df_list_holder[count].append(
                pd.DataFrame(restParser.run([nidm_file], uri)))
            # global dep_var
            df = pd.concat(df_list_holder[count])
            with tempfile.NamedTemporaryFile(
                    delete=False
            ) as temp:  # turns the dataframe into a temporary csv
                df.to_csv(temp.name + '.csv')
                temp.close()
            data = list(
                csv.reader(open(temp.name + '.csv'))
            )  # makes the csv a 2D list to make it easier to call the contents of certain cells

            var_list = vars.split(
                ",")  # makes a list of the independent variables
            numcols = (len(data) - 1) // (
                len(var_list)
            )  # Finds the number of columns in the original dataframe
            global condensed_data  # also used in linreg()
            condensed_data_holder[count] = [
                [0] * (len(var_list))
            ]  # makes an array 1 row by the number of necessary columns
            for i in range(
                    numcols
            ):  # makes the 2D array big enough to store all of the necessary values in the edited dataset
                condensed_data_holder[count].append([0] * (len(var_list)))
            for m in range(0, len(var_list)):
                end_url = var_list[m].split("/")
                if "/" in var_list[m]:
                    var_list[m] = end_url[len(end_url) - 1]
            for i in range(
                    len(var_list)
            ):  # stores the independent variable names in the first row
                condensed_data_holder[count][0][i] = var_list[i]
            numrows = 1  # begins at the first row to add data
            fieldcolumn = 0  # the column the variable name is in in the original dataset
            valuecolumn = 0  # the column the value is in in the original dataset
            datacolumn = 0  # if it is identified by the dataElement name instead of the field's name
            not_found_list = []
            for i in range(len(data[0])):
                if data[0][
                        i] == 'sourceVariable':  # finds the column where the variable names are
                    fieldcolumn = i
                elif data[0][
                        i] == 'source_variable':  # finds the column where the variable names are
                    fieldcolumn = i
                elif data[0][i] == 'isAbout':
                    aboutcolumn = i
                elif data[0][i] == 'label':
                    namecolumn = i  # finds the column where the variable names are
                elif data[0][i] == 'value':
                    valuecolumn = i  # finds the column where the values are
                elif data[0][
                        i] == 'dataElement':  # finds the column where the data element is if necessary
                    datacolumn = i
            for i in range(
                    len(condensed_data_holder[count][0])
            ):  # starts iterating through the dataset, looking for the name in that
                for j in range(
                        1, len(data)
                ):  # column, so it can append the values under the proper variables
                    try:
                        if data[j][fieldcolumn] == condensed_data_holder[count][
                                0][i]:  # in the dataframe, the name is in column 3
                            condensed_data_holder[count][numrows][i] = data[j][
                                valuecolumn]  # in the dataframe, the value is in column 2
                            numrows = numrows + 1  # moves on to the next row to add the proper values
                        elif data[j][aboutcolumn] == condensed_data_holder[
                                count][0][i]:
                            condensed_data_holder[count][numrows][i] = data[j][
                                valuecolumn]  # in the dataframe, the value is in column 2
                            numrows = numrows + 1  # moves on to the next row to add the proper values
                        elif condensed_data_holder[count][0][i] in data[j][
                                aboutcolumn]:  # this is in case the uri only works by querying the part after the last backslash
                            condensed_data_holder[count][numrows][i] = data[j][
                                valuecolumn]  # in the dataframe, the value is in column 2
                            numrows = numrows + 1  # moves on to the next row to add the proper values
                        elif data[j][namecolumn] == condensed_data_holder[
                                count][0][
                                    i]:  # in the dataframe, the name is in column 12
                            condensed_data_holder[count][numrows][i] = data[j][
                                valuecolumn]  # in the dataframe, the value is in column 2
                            numrows = numrows + 1  # moves on to the next row to add the proper values
                        elif condensed_data_holder[count][0][i] == data[j][
                                datacolumn]:  # in the dataframe, the name is in column 9
                            condensed_data_holder[count][numrows][i] = data[j][
                                valuecolumn]  # in the dataframe, the value is in column 2
                            numrows = numrows + 1  # moves on to the next row to add the proper values
                    except IndexError:
                        numrows = numrows + 1
                numrows = 1  # resets to the first row for the next variable
            temp_list = condensed_data_holder[count]
            for j in range(
                    len(temp_list[0]) - 1, 0, -1
            ):  # if the software appends a column with 0 as the heading, it removes this null column
                if temp_list[0][j] == "0" or temp_list[0][j] == "NaN":
                    for row in condensed_data_holder[count]:
                        row.pop(j)
            rowsize = len(condensed_data_holder[count][0])
            count1 = 0
            for i in range(0, rowsize):
                for row in condensed_data_holder[count]:
                    if row[i] == 0 or row[i] == "NaN" or row[i] == "0":
                        count1 = count1 + 1
                if count1 > len(condensed_data_holder[count]) - 2:
                    not_found_list.append(condensed_data_holder[count][0][i])
                count1 = 0
            for i in range(len(condensed_data_holder[count][0])):
                if " " in condensed_data_holder[count][0][i]:
                    condensed_data_holder[count][0][i] = condensed_data_holder[
                        count][0][i].replace(" ", "_")
            for i in range(len(var_list)):
                if "/" in var_list[i]:
                    splitted = var_list[i].split("/")
                    var_list[i] = splitted[len(splitted) - 1]
                if " " in var_list[i]:
                    var_list[i] = var_list[i].replace(" ", "_")
            count = count + 1
            if len(not_found_list) > 0:
                print(
                    "***********************************************************************************************************"
                )
                print()
                print("Your variables were " + v)
                print()
                print(
                    "The following variables were not found in " + nidm_file +
                    ". The model cannot run because this will skew the data. Try checking your spelling or use nidm_query.py to see other possible variables."
                )
                if (o is not None):
                    f = open(o, "a")
                    f.write("Your variables were " + v)
                    f.write(
                        "The following variables were not found in " +
                        nidm_file +
                        ". The model cannot run because this will skew the data. Try checking your spelling or use nidm_query.py to see other possible variables."
                    )
                    f.close()
                for i in range(0, len(not_found_list)):
                    print(str(i + 1) + ". " + not_found_list[i])
                    if (o is not None):
                        f = open(o, "a")
                        f.write(str(i + 1) + ". " + not_found_list[i])
                        f.close()
                for j in range(len(not_found_list) - 1, 0, -1):
                    not_found_list.pop(j)
                not_found_count = not_found_count + 1
                print()
        if not_found_count > 0:
            exit(1)

    else:
        print("ERROR: No query parameter provided.  See help:")
        print()
        os.system("pynidm k-means --help")
        exit(1)
Exemple #4
0
def query(nidm_file_list, cde_file_list, query_file, output_file,
          get_participants, get_instruments, get_instrument_vars,
          get_dataelements, get_brainvols, get_dataelements_brainvols, uri, j,
          verbosity):

    #query result list
    results = []

    # if there is a CDE file list, seed the CDE cache
    if cde_file_list:
        getCDEs(cde_file_list.split(","))

    if get_participants:
        df = GetParticipantIDs(nidm_file_list.split(','),
                               output_file=output_file)
        if ((output_file) is None):

            print(df.to_string())

        return df
    elif get_instruments:
        #first get all project UUIDs then iterate and get instruments adding to output dataframe
        project_list = GetProjectsUUID(nidm_file_list.split(','))
        count = 1
        for project in project_list:
            if count == 1:
                df = GetProjectInstruments(nidm_file_list.split(','),
                                           project_id=project)
                count += 1
            else:
                df = df.append(
                    GetProjectInstruments(nidm_file_list.split(','),
                                          project_id=project))

        #write dataframe
        #if output file parameter specified
        if (output_file is not None):

            df.to_csv(output_file)
            #with open(output_file,'w') as myfile:
            #    wr=csv.writer(myfile,quoting=csv.QUOTE_ALL)
            #    wr.writerow(df)

            #pd.DataFrame.from_records(df,columns=["Instruments"]).to_csv(output_file)
        else:
            print(df.to_string())
    elif get_instrument_vars:
        #first get all project UUIDs then iterate and get instruments adding to output dataframe
        project_list = GetProjectsUUID(nidm_file_list.split(','))
        count = 1
        for project in project_list:
            if count == 1:
                df = GetInstrumentVariables(nidm_file_list.split(','),
                                            project_id=project)
                count += 1
            else:
                df = df.append(
                    GetInstrumentVariables(nidm_file_list.split(','),
                                           project_id=project))

        #write dataframe
        #if output file parameter specified
        if (output_file is not None):

            df.to_csv(output_file)
        else:
            print(df.to_string())
    elif get_dataelements:
        datael = GetDataElements(nidm_file_list=nidm_file_list)
        #if output file parameter specified
        if (output_file is not None):

            datael.to_csv(output_file)
        else:
            print(datael.to_string())
    elif uri:
        df = restParser(nidm_file_list.split(','), uri, int(verbosity))
        if j:
            print(dumps(df, indent=2))
        else:
            if type(df) == list:
                for x in df:
                    print(x)
            elif type(df) == dict:
                for k in df.keys():
                    print(str(k) + ' ' + str(df[k]))
            else:
                print(df.to_string())
    elif get_dataelements_brainvols:
        brainvol = GetBrainVolumeDataElements(nidm_file_list=nidm_file_list)
        #if output file parameter specified
        if (output_file is not None):

            brainvol.to_csv(output_file)
        else:
            print(brainvol.to_string())
    elif get_brainvols:
        brainvol = GetBrainVolumes(nidm_file_list=nidm_file_list)
        #if output file parameter specified
        if (output_file is not None):

            brainvol.to_csv(output_file)
        else:
            print(brainvol.to_string())
    else:

        #read query from text fiile
        with open(query_file, 'r') as fp:
            query = fp.read()

        df = sparql_query_nidm(nidm_file_list.split(','), query, output_file)

        if ((output_file) is None):

            print(df.to_string())

        return df
Exemple #5
0
def query(nidm_file_list, cde_file_list, query_file, output_file, get_participants, get_instruments, get_instrument_vars, get_dataelements, get_brainvols,get_dataelements_brainvols, uri, j, verbosity):
    """
    This function provides query support for NIDM graphs.
    """
    #query result list
    results = []

    # if there is a CDE file list, seed the CDE cache
    if cde_file_list:
        getCDEs(cde_file_list.split(","))

    if get_participants:
        df = GetParticipantIDs(nidm_file_list.split(','),output_file=output_file)
        if ((output_file) is None):

            print(df.to_string())


        return df
    elif get_instruments:
        #first get all project UUIDs then iterate and get instruments adding to output dataframe
        project_list = GetProjectsUUID(nidm_file_list.split(','))
        count=1
        for project in project_list:
            if count == 1:
                df = GetProjectInstruments(nidm_file_list.split(','),project_id=project)
                count+=1
            else:
                df = df.append(GetProjectInstruments(nidm_file_list.split(','),project_id=project))

        #write dataframe
        #if output file parameter specified
        if (output_file is not None):

            df.to_csv(output_file)
            #with open(output_file,'w') as myfile:
            #    wr=csv.writer(myfile,quoting=csv.QUOTE_ALL)
            #    wr.writerow(df)

            #pd.DataFrame.from_records(df,columns=["Instruments"]).to_csv(output_file)
        else:
            print(df.to_string())
    elif get_instrument_vars:
        #first get all project UUIDs then iterate and get instruments adding to output dataframe
        project_list = GetProjectsUUID(nidm_file_list.split(','))
        count=1
        for project in project_list:
            if count == 1:
                df = GetInstrumentVariables(nidm_file_list.split(','),project_id=project)
                count+=1
            else:
                df = df.append(GetInstrumentVariables(nidm_file_list.split(','),project_id=project))

        #write dataframe
        #if output file parameter specified
        if (output_file is not None):

            df.to_csv(output_file)
        else:
            print(df.to_string())
    elif get_dataelements:
        datael = GetDataElements(nidm_file_list=nidm_file_list)
         #if output file parameter specified
        if (output_file is not None):

            datael.to_csv(output_file)
        else:
            print(datael.to_string())
    elif uri:
        restParser = RestParser(verbosity_level = int(verbosity))
        if j:
            restParser.setOutputFormat(RestParser.JSON_FORMAT)
        else:
            restParser.setOutputFormat(RestParser.CLI_FORMAT)
        df = restParser.run(nidm_file_list.split(','), uri)

        print (df)

    elif get_dataelements_brainvols:
        brainvol = GetBrainVolumeDataElements(nidm_file_list=nidm_file_list)
         #if output file parameter specified
        if (output_file is not None):

            brainvol.to_csv(output_file)
        else:
            print(brainvol.to_string())
    elif get_brainvols:
        brainvol = GetBrainVolumes(nidm_file_list=nidm_file_list)
         #if output file parameter specified
        if (output_file is not None):

            brainvol.to_csv(output_file)
        else:
            print(brainvol.to_string())
    elif query_file:

        df = sparql_query_nidm(nidm_file_list.split(','),query_file,output_file)

        if ((output_file) is None):

            print(df.to_string())

        return df
    else:
        print("ERROR: No query parameter provided.  See help:")
        print()
        os.system("pynidm query --help")
        exit(1)
Exemple #6
0
def main(argv):
    parser = ArgumentParser(
        description='This program will convert a NIDM-Experiment RDF document \
        to a BIDS dataset.  The program will query the NIDM-Experiment document for subjects, \
        MRI scans, and associated assessments saving the MRI data to disk in an organization \
        according to the BIDS specification, metadata to a participants.tsv \
        file, the project-level metdata to a dataset_description.json file, and the \
        assessments to *.tsv/*.json file pairs in a phenotypes directory.',
        epilog='Example of use: \
        NIDM2BIDSMRI.py -nidm_file NIDM.ttl -part_fields age,gender -bids_dir BIDS'
    )

    parser.add_argument('-nidm_file',
                        dest='rdf_file',
                        required=True,
                        help="NIDM RDF file")
    parser.add_argument('-part_fields', nargs='+', dest='part_fields', required=False, \
                        help='Variables to add to BIDS participant file. Variables will be fuzzy-matched to NIDM URIs')
    parser.add_argument(
        '-anat',
        dest='anat',
        action='store_true',
        required=False,
        help="Include flag to add anatomical scans to BIDS dataset")
    parser.add_argument(
        '-func',
        dest='func',
        action='store_true',
        required=False,
        help=
        "Include flag to add functional scans + events files to BIDS dataset")
    parser.add_argument(
        '-dwi',
        dest='dwi',
        action='store_true',
        required=False,
        help="Include flag to add DWI scans + Bval/Bvec files to BIDS dataset")
    parser.add_argument('-bids_dir',
                        dest='bids_dir',
                        required=True,
                        help="Directory to store BIDS dataset")
    args = parser.parse_args()

    rdf_file = args.rdf_file
    output_directory = args.bids_dir
    # check if output directory exists, if not create it
    if not isdir(output_directory):
        mkdir(path=output_directory)

    #try to read RDF file
    print("Guessing RDF file format...")
    format_found = False
    for format in 'turtle', 'xml', 'n3', 'trix', 'rdfa':
        try:
            print("reading RDF file as %s..." % format)
            #load NIDM graph into NIDM-Exp API objects
            nidm_project = read_nidm(rdf_file)
            print("RDF file sucessfully read")
            format_found = True
            break
        except Exception:
            print("file: %s appears to be an invalid %s RDF file" %
                  (rdf_file, format))

    if not format_found:
        print(
            "File doesn't appear to be a valid RDF format supported by Python RDFLib!  Please check input file"
        )
        print("exiting...")
        exit(-1)
    #set up output directory for BIDS data
    if not os.path.isdir(output_directory):
        os.mkdir(output_directory)
    if not os.path.isdir(
            join(output_directory,
                 os.path.splitext(args.rdf_file)[0])):
        os.mkdir(join(output_directory, os.path.splitext(args.rdf_file)[0]))

    #convert Project NIDM object -> dataset_description.json file
    NIDMProject2BIDSDatasetDescriptor(
        nidm_project, join(output_directory,
                           os.path.splitext(args.rdf_file)[0]))

    #create participants.tsv file.  In BIDS datasets there is no specification for how many or which type of assessment
    #variables might be in this file.  The specification does mention a minimum participant_id which indexes each of the
    #subjects in the BIDS dataset.
    #
    #if parameter -parts_field is defined then the variables listed will be fuzzy matched to the URIs in the NIDM file
    #and added to the participants.tsv file

    #use RDFLib here for temporary graph making query easier
    rdf_graph = Graph()
    rdf_graph_parse = rdf_graph.parse(source=StringIO(
        nidm_project.serializeTurtle()),
                                      format='turtle')

    #create participants file
    CreateBIDSParticipantFile(
        rdf_graph_parse,
        join(output_directory,
             os.path.splitext(args.rdf_file)[0], "participants"),
        args.part_fields)

    # get nidm:Project prov:Location
    # first get nidm:Project UUIDs
    project_uuid = GetProjectsUUID([rdf_file], output_file=None)
    project_location = []
    for uuid in project_uuid:
        project_location.append(
            GetProjectLocation(nidm_file_list=[rdf_file], project_uuid=uuid))

    #creating BIDS hierarchy with requested scans
    if args.anat == True:

        #query NIDM document for acquisition entity "subjects" with predicate nidm:hasImageUsageType and object nidm:Anatomical
        for anat_acq in rdf_graph_parse.subjects(
                predicate=URIRef(Constants.NIDM_IMAGE_USAGE_TYPE.uri),
                object=URIRef(Constants.NIDM_MRI_ANATOMIC_SCAN.uri)):
            # first see if file exists locally.  Get nidm:Project prov:Location and append the nfo:Filename of the image
            # from the anat_acq acquisition entity.  If that file doesn't exist try the prov:Location in the anat acq
            # entity and see if we can download it from the cloud

            # get acquisition uuid from entity uuid
            anat_act = rdf_graph_parse.objects(
                subject=anat_acq, predicate=Constants.PROV['wasGeneratedBy'])
            # get participant ID with sio:Subject role in anat_acq qualified association
            part_id = GetParticipantIDFromAcquisition(
                nidm_file_list=[rdf_file], acquisition=anat_act[0])

            # make BIDS sub directory
            sub_dir = join(output_directory, "sub-" + part_id[0])
            sub_filename_base = "sub-" + part_id[0]
            if not os.path.exists(sub_dir):
                os.makedirs(sub_dir)

            # make BIDS anat directory
            if not os.path.exists(join(sub_dir, "anat")):
                os.makedirs(join(sub_dir, "anat"))

            for anat_filename in rdf_graph_parse.objects(
                    subject=anat_acq,
                    predicate=URIRef(Constants.NIDM_FILENAME.uri)):
                # check if file exists
                for location in project_location:
                    # if anatomical MRI exists in this location then copy and rename
                    if isfile(location[0] + anat_filename):
                        # copy and rename file to be BIDS compliant
                        copyfile(srd=location[0] + anat_filename,
                                 dest=join(
                                     sub_dir, "anat", sub_filename_base +
                                     splitext(anat_filename)[1]))
                        continue
                # if the file wasn't accessible locally, try with the prov:Location in the anat_acq
                for location in rdf_graph_parse.objects(
                        subject=anat_acq,
                        predicate=URIRef(Constants.PROV['Location'])):
                    # try to download the file and rename
                    ret = GetImageFromURL(location)
                    if ret == -1:
                        print(
                            "Can't download file: %s from url: %s, skipping...."
                            % (anat_filename, location))
                    else:
                        # copy temporary file to BIDS directory
                        copyfile(srd=join(ret),
                                 dest=join(output_directory, 'anat'))
                        # rename file in dest
                        move(src=join(output_directory, 'anat', basename(ret)),
                             dest=join(output_directory, 'anat',
                                       anat_filename))
Exemple #7
0
def main(argv):
    parser = ArgumentParser(
        description='This program will convert a NIDM-Experiment RDF document \
        to a BIDS dataset.  The program will query the NIDM-Experiment document for subjects, \
        MRI scans, and associated assessments saving the MRI data to disk in an organization \
        according to the BIDS specification, metadata to a participants.tsv \
        file, the project-level metdata to a dataset_description.json file, and the \
        assessments to *.tsv/*.json file pairs in a phenotypes directory.',
        epilog='Example of use: \
        NIDM2BIDSMRI.py -nidm_file NIDM.ttl -part_fields age,gender -bids_dir BIDS'
    )

    parser.add_argument('-nidm_file',
                        dest='rdf_file',
                        required=True,
                        help="NIDM RDF file")
    parser.add_argument('-part_fields', nargs='+', dest='part_fields', required=False, \
                        help='Variables to add to BIDS participant file. Variables will be fuzzy-matched to NIDM URIs')
    parser.add_argument(
        '-anat',
        dest='anat',
        action='store_true',
        required=False,
        help="Include flag to add anatomical scans to BIDS dataset")
    parser.add_argument(
        '-func',
        dest='func',
        action='store_true',
        required=False,
        help=
        "Include flag to add functional scans + events files to BIDS dataset")
    parser.add_argument(
        '-dwi',
        dest='dwi',
        action='store_true',
        required=False,
        help="Include flag to add DWI scans + Bval/Bvec files to BIDS dataset")
    parser.add_argument('-bids_dir',
                        dest='bids_dir',
                        required=True,
                        help="Directory to store BIDS dataset")

    group = parser.add_mutually_exclusive_group()
    group.add_argument(
        '-no_downloads',
        dest='no_downloads',
        action='store_true',
        required=False,
        help=
        "If this flag is set then script won't attempt to download images using datalad"
        "and AWS S3.  Default behavior is files are downloaded if they don't exist locally."
    )
    group.add_argument(
        '-aws_url',
        dest='aws_url',
        required=False,
        help="This tool facilites export of "
        "user-selected information from a NIDM file to a BIDS dataset and may have to fetch images. The NIDM files contain links from"
        "the local filesystem used to convert BIDS to NIDM and possibly DataLad dataset links to the files if the"
        " original BIDS data was a DataLad dataset. Here we support 3 modes of trying to find images: (1) copy from"
        " the local directory space using the prov:Location information in the NIDM file; (2) fetch the images from"
        " a DataLad remote if the original BIDS dataset was a DataLad dataset when bids2nidm was run; (3) attempt "
        " to download the images via a AWS S3 link.  This parameter lets the user set the base AWS S3 URL to try and"
        " find the images.  Currently it supports using the URL provided here and adding the dataset id, subject id,"
        " and filename.  For example, in OpenNeuro (OpenNeuro is supported by default but will serve as an example) the base AWS S3"
        " URL is \'s3://openneuro.org\'. The URL then becomes (for example) "
        " s3://openneuro.org/ds000002/sub-06/func/sub-06_task-probabilisticclassification_run-02_bold.nii.gz where this tool"
        " has added \'ds000002/sub-06/[FILENAME] to the base AWS S3 URL.")
    parser.add_argument(
        '-dataset_string',
        dest='dataset_string',
        required=False,
        help="If -aws_url parameter is supplied"
        " this parameter (-dataset_string) is required as it will be added to the aws_baseurl to retrieve images for each"
        " subject and file.  For example, if -aws_baseurl is \'s3://davedata.org \' and -dataset_string is \'dataset1\' then"
        " the AWS S3 url for sub-1 and file sub1-task-rest_run-1_bold.nii.gz would be: "
        " \'s3://davedata.org/dataset1/sub-1/[anat | func | dwi/sub1-task-rest_run-1_bold.nii.gz\'"
    )

    args = parser.parse_args()

    # check some argument dependencies
    if args.aws_url and not args.dataset_string:
        print(
            "ERROR! You must include a -dataset_string if you supplied the -aws_baseurl.  If there is no dataset"
            " string in your AWS S3 urls then just supply -aws_baseurl with nothing after it."
        )
        print(args.print_help())
        exit(-1)

    # set up some local variables
    rdf_file = args.rdf_file
    output_directory = args.bids_dir

    # check if output directory exists, if not create it
    if not isdir(output_directory):
        mkdir(path=output_directory)

    #try to read RDF file
    print("Guessing RDF file format...")
    format_found = False
    for format in 'turtle', 'xml', 'n3', 'trix', 'rdfa':
        try:
            print("Reading RDF file as %s..." % format)
            #load NIDM graph into NIDM-Exp API objects
            nidm_project = read_nidm(rdf_file)
            # temporary save nidm_project
            with open("/Users/dbkeator/Downloads/nidm.ttl", 'w') as f:
                print(nidm_project.serializeTurtle(), file=f)
            print("RDF file sucessfully read")
            format_found = True
            break
        except Exception:
            print("File: %s appears to be an invalid %s RDF file" %
                  (rdf_file, format))

    if not format_found:
        print(
            "File doesn't appear to be a valid RDF format supported by Python RDFLib!  Please check input file"
        )
        print("exiting...")
        exit(-1)

#  if not os.path.isdir(join(output_directory,os.path.splitext(args.rdf_file)[0])):
#      os.mkdir(join(output_directory,os.path.splitext(args.rdf_file)[0]))

#convert Project NIDM object -> dataset_description.json file
    NIDMProject2BIDSDatasetDescriptor(nidm_project, output_directory)

    #create participants.tsv file.  In BIDS datasets there is no specification for how many or which type of assessment
    #variables might be in this file.  The specification does mention a minimum participant_id which indexes each of the
    #subjects in the BIDS dataset.
    #
    #if parameter -parts_field is defined then the variables listed will be fuzzy matched to the URIs in the NIDM file
    #and added to the participants.tsv file

    #use RDFLib here for temporary graph making query easier
    rdf_graph = Graph()
    rdf_graph_parse = rdf_graph.parse(source=StringIO(
        nidm_project.serializeTurtle()),
                                      format='turtle')

    # temporary write out turtle file for testing
    # rdf_graph_parse.serialize(destination="/Users/dbkeator/Downloads/ds000117.ttl", format='turtle')

    #create participants file
    CreateBIDSParticipantFile(rdf_graph_parse,
                              join(output_directory, "participants"),
                              args.part_fields)

    # get nidm:Project prov:Location
    # first get nidm:Project UUIDs
    project_uuid = GetProjectsUUID([rdf_file], output_file=None)
    project_location = []
    for uuid in project_uuid:
        project_location.append(
            GetProjectLocation(nidm_file_list=[rdf_file], project_uuid=uuid))

    #creating BIDS hierarchy with requested scans
    if args.anat == True:
        ProcessFiles(graph=rdf_graph_parse,
                     scan_type=Constants.NIDM_MRI_ANATOMIC_SCAN.uri,
                     output_directory=output_directory,
                     project_location=project_location,
                     args=args)

    if args.func == True:
        ProcessFiles(graph=rdf_graph_parse,
                     scan_type=Constants.NIDM_MRI_FUNCTION_SCAN.uri,
                     output_directory=output_directory,
                     project_location=project_location,
                     args=args)
    if args.dwi == True:
        ProcessFiles(graph=rdf_graph_parse,
                     scan_type=Constants.NIDM_MRI_DIFFUSION_TENSOR.uri,
                     output_directory=output_directory,
                     project_location=project_location,
                     args=args)