Beispiel #1
0
def query(nidm_file_list, query_file, output_file, get_participants,get_instruments,get_instrument_vars):

    #query result list
    results = []

    if get_participants:
        df = GetParticipantIDs(nidm_file_list.split(','),output_file=output_file)
    elif get_instruments:
        #first get all project UUIDs then iterate and get instruments adding to output dataframe
        project_list = GetProjectsUUID(nidm_file_list.split(','))
        count=1
        for project in project_list:
            if count == 1:
                df = GetProjectInstruments(nidm_file_list.split(','),project_id=project)
                count+=1
            else:
                df = df.append(GetProjectInstruments(nidm_file_list.split(','),project_id=project))

        #write dataframe
        #if output file parameter specified
        if (output_file is not None):

            df.to_csv(output_file)
            #with open(output_file,'w') as myfile:
            #    wr=csv.writer(myfile,quoting=csv.QUOTE_ALL)
            #    wr.writerow(df)

            #pd.DataFrame.from_records(df,columns=["Instruments"]).to_csv(output_file)
        else:
            print(df)
    elif get_instrument_vars:
        #first get all project UUIDs then iterate and get instruments adding to output dataframe
        project_list = GetProjectsUUID(nidm_file_list.split(','))
        count=1
        for project in project_list:
            if count == 1:
                df = GetInstrumentVariables(nidm_file_list.split(','),project_id=project)
                count+=1
            else:
                df = df.append(GetInstrumentVariables(nidm_file_list.split(','),project_id=project))

        #write dataframe
        #if output file parameter specified
        if (output_file is not None):

            df.to_csv(output_file)
        else:
            print(df)
    else:
        #read query from text fiile
        with open(query_file, 'r') as fp:
            query = fp.read()
        df = sparql_query_nidm(nidm_file_list.split(','),query,output_file)

    return df
Beispiel #2
0
def query(nidm_file_list, cde_file_list, query_file, output_file,
          get_participants, get_instruments, get_instrument_vars,
          get_dataelements, get_brainvols, get_dataelements_brainvols,
          get_fields, uri, blaze, j, verbosity):
    """
    This function provides query support for NIDM graphs.
    """
    #query result list
    results = []

    # if there is a CDE file list, seed the CDE cache
    if cde_file_list:
        getCDEs(cde_file_list.split(","))

    if blaze:
        os.environ["BLAZEGRAPH_URL"] = blaze
        print("setting BLAZEGRAPH_URL to {}".format(blaze))

    if get_participants:
        df = GetParticipantIDs(nidm_file_list.split(','),
                               output_file=output_file)
        if ((output_file) is None):

            print(df.to_string())

        return df
    elif get_instruments:
        #first get all project UUIDs then iterate and get instruments adding to output dataframe
        project_list = GetProjectsUUID(nidm_file_list.split(','))
        count = 1
        for project in project_list:
            if count == 1:
                df = GetProjectInstruments(nidm_file_list.split(','),
                                           project_id=project)
                count += 1
            else:
                df = df.append(
                    GetProjectInstruments(nidm_file_list.split(','),
                                          project_id=project))

        #write dataframe
        #if output file parameter specified
        if (output_file is not None):

            df.to_csv(output_file)
            #with open(output_file,'w') as myfile:
            #    wr=csv.writer(myfile,quoting=csv.QUOTE_ALL)
            #    wr.writerow(df)

            #pd.DataFrame.from_records(df,columns=["Instruments"]).to_csv(output_file)
        else:
            print(df.to_string())
    elif get_instrument_vars:
        #first get all project UUIDs then iterate and get instruments adding to output dataframe
        project_list = GetProjectsUUID(nidm_file_list.split(','))
        count = 1
        for project in project_list:
            if count == 1:
                df = GetInstrumentVariables(nidm_file_list.split(','),
                                            project_id=project)
                count += 1
            else:
                df = df.append(
                    GetInstrumentVariables(nidm_file_list.split(','),
                                           project_id=project))

        #write dataframe
        #if output file parameter specified
        if (output_file is not None):

            df.to_csv(output_file)
        else:
            print(df.to_string())
    elif get_dataelements:
        datael = GetDataElements(nidm_file_list=nidm_file_list)
        #if output file parameter specified
        if (output_file is not None):

            datael.to_csv(output_file)
        else:
            print(datael.to_string())
    elif get_fields:
        # fields only query.  We'll do it with the rest api
        restParser = RestParser(verbosity_level=int(verbosity))
        if (output_file is not None):
            restParser.setOutputFormat(RestParser.OBJECT_FORMAT)
            df_list = []
        else:
            restParser.setOutputFormat(RestParser.CLI_FORMAT)
        # set up uri to do fields query for each nidm file
        for nidm_file in nidm_file_list.split(","):
            # get project UUID
            project = GetProjectsUUID([nidm_file])
            uri = "/projects/" + project[0].toPython().split(
                "/")[-1] + "?fields=" + get_fields
            # get fields output from each file and concatenate
            if (output_file is None):
                # just print results
                print(restParser.run([nidm_file], uri))
            else:
                df_list.append(pd.DataFrame(restParser.run([nidm_file], uri)))

        if (output_file is not None):
            # concatenate data frames
            df = pd.concat(df_list)
            # output to csv file
            df.to_csv(output_file)

    elif uri:
        restParser = RestParser(verbosity_level=int(verbosity))
        if j:
            restParser.setOutputFormat(RestParser.JSON_FORMAT)
        elif (output_file is not None):
            restParser.setOutputFormat(RestParser.OBJECT_FORMAT)
        else:
            restParser.setOutputFormat(RestParser.CLI_FORMAT)
        df = restParser.run(nidm_file_list.split(','), uri)
        if (output_file is not None):
            if j:
                with open(output_file, "w+") as f:
                    f.write(dumps(df))
            else:
                # convert object df to dataframe and output
                pd.DataFrame(df).to_csv(output_file)
        else:
            print(df)

    elif get_dataelements_brainvols:
        brainvol = GetBrainVolumeDataElements(nidm_file_list=nidm_file_list)
        #if output file parameter specified
        if (output_file is not None):

            brainvol.to_csv(output_file)
        else:
            print(brainvol.to_string())
    elif get_brainvols:
        brainvol = GetBrainVolumes(nidm_file_list=nidm_file_list)
        #if output file parameter specified
        if (output_file is not None):

            brainvol.to_csv(output_file)
        else:
            print(brainvol.to_string())
    elif query_file:

        df = sparql_query_nidm(nidm_file_list.split(','), query_file,
                               output_file)

        if ((output_file) is None):

            print(df.to_string())

        return df
    else:
        print("ERROR: No query parameter provided.  See help:")
        print()
        os.system("pynidm query --help")
        exit(1)
Beispiel #3
0
def query(nidm_file_list, cde_file_list, query_file, output_file,
          get_participants, get_instruments, get_instrument_vars,
          get_dataelements, get_brainvols, get_dataelements_brainvols, uri, j,
          verbosity):

    #query result list
    results = []

    # if there is a CDE file list, seed the CDE cache
    if cde_file_list:
        getCDEs(cde_file_list.split(","))

    if get_participants:
        df = GetParticipantIDs(nidm_file_list.split(','),
                               output_file=output_file)
        if ((output_file) is None):

            print(df.to_string())

        return df
    elif get_instruments:
        #first get all project UUIDs then iterate and get instruments adding to output dataframe
        project_list = GetProjectsUUID(nidm_file_list.split(','))
        count = 1
        for project in project_list:
            if count == 1:
                df = GetProjectInstruments(nidm_file_list.split(','),
                                           project_id=project)
                count += 1
            else:
                df = df.append(
                    GetProjectInstruments(nidm_file_list.split(','),
                                          project_id=project))

        #write dataframe
        #if output file parameter specified
        if (output_file is not None):

            df.to_csv(output_file)
            #with open(output_file,'w') as myfile:
            #    wr=csv.writer(myfile,quoting=csv.QUOTE_ALL)
            #    wr.writerow(df)

            #pd.DataFrame.from_records(df,columns=["Instruments"]).to_csv(output_file)
        else:
            print(df.to_string())
    elif get_instrument_vars:
        #first get all project UUIDs then iterate and get instruments adding to output dataframe
        project_list = GetProjectsUUID(nidm_file_list.split(','))
        count = 1
        for project in project_list:
            if count == 1:
                df = GetInstrumentVariables(nidm_file_list.split(','),
                                            project_id=project)
                count += 1
            else:
                df = df.append(
                    GetInstrumentVariables(nidm_file_list.split(','),
                                           project_id=project))

        #write dataframe
        #if output file parameter specified
        if (output_file is not None):

            df.to_csv(output_file)
        else:
            print(df.to_string())
    elif get_dataelements:
        datael = GetDataElements(nidm_file_list=nidm_file_list)
        #if output file parameter specified
        if (output_file is not None):

            datael.to_csv(output_file)
        else:
            print(datael.to_string())
    elif uri:
        df = restParser(nidm_file_list.split(','), uri, int(verbosity))
        if j:
            print(dumps(df, indent=2))
        else:
            if type(df) == list:
                for x in df:
                    print(x)
            elif type(df) == dict:
                for k in df.keys():
                    print(str(k) + ' ' + str(df[k]))
            else:
                print(df.to_string())
    elif get_dataelements_brainvols:
        brainvol = GetBrainVolumeDataElements(nidm_file_list=nidm_file_list)
        #if output file parameter specified
        if (output_file is not None):

            brainvol.to_csv(output_file)
        else:
            print(brainvol.to_string())
    elif get_brainvols:
        brainvol = GetBrainVolumes(nidm_file_list=nidm_file_list)
        #if output file parameter specified
        if (output_file is not None):

            brainvol.to_csv(output_file)
        else:
            print(brainvol.to_string())
    else:

        #read query from text fiile
        with open(query_file, 'r') as fp:
            query = fp.read()

        df = sparql_query_nidm(nidm_file_list.split(','), query, output_file)

        if ((output_file) is None):

            print(df.to_string())

        return df
Beispiel #4
0
def main(argv):
    parser = ArgumentParser(
        description=
        'This program will load in a CSV file and iterate over the header \
     variable names performing an elastic search of https://scicrunch.org/ for NIDM-ReproNim \
     tagged terms that fuzzy match the variable names.  The user will then interactively pick \
     a term to associate with the variable name.  The resulting annotated CSV data will \
     then be written to a NIDM data file.  Note, you must obtain an API key to Interlex by signing up \
     for an account at scicrunch.org then going to My Account and API Keys.  Then set the environment \
     variable INTERLEX_API_KEY with your key.')

    parser.add_argument('-csv',
                        dest='csv_file',
                        required=True,
                        help="Full path to CSV file to convert")
    # parser.add_argument('-ilxkey', dest='key', required=True, help="Interlex/SciCrunch API key to use for query")
    dd_group = parser.add_mutually_exclusive_group()
    dd_group.add_argument(
        '-json_map',
        dest='json_map',
        required=False,
        help=
        "Full path to user-suppled JSON file containing variable-term mappings."
    )
    dd_group.add_argument(
        '-redcap',
        dest='redcap',
        required=False,
        help=
        "Full path to a user-supplied RedCap formatted data dictionary for csv file."
    )
    parser.add_argument(
        '-nidm',
        dest='nidm_file',
        required=False,
        help=
        "Optional full path of NIDM file to add CSV->NIDM converted graph to")
    parser.add_argument(
        '-no_concepts',
        action='store_true',
        required=False,
        help='If this flag is set then no concept associations will be'
        'asked of the user.  This is useful if you already have a -json_map specified without concepts and want to'
        'simply run this program to get a NIDM file with user interaction to associate concepts.'
    )
    parser.add_argument(
        '-log',
        '--log',
        dest='logfile',
        required=False,
        default=None,
        help=
        "full path to directory to save log file. Log file name is csv2nidm_[arg.csv_file].log"
    )
    parser.add_argument('-out',
                        dest='output_file',
                        required=True,
                        help="Full path with filename to save NIDM file")
    args = parser.parse_args()

    # if we have a redcap datadictionary then convert it straight away to a json representation
    if args.redcap:
        json_map = redcap_datadictionary_to_json(args.redcap,
                                                 basename(args.csv_file))
    else:
        json_map = args.json_map
    #open CSV file and load into
    df = pd.read_csv(args.csv_file)
    #temp = csv.reader(args.csv_file)
    #df = pd.DataFrame(temp)

    #maps variables in CSV file to terms
    #if args.owl is not False:
    #    column_to_terms = map_variables_to_terms(df=df, apikey=args.key, directory=dirname(args.output_file), output_file=args.output_file, json_file=args.json_map, owl_file=args.owl)
    #else:
    # if user did not specify -no_concepts then associate concepts interactively with user
    if not args.no_concepts:
        column_to_terms, cde = map_variables_to_terms(
            df=df,
            assessment_name=basename(args.csv_file),
            directory=dirname(args.output_file),
            output_file=args.output_file,
            json_source=json_map)
    # run without concept mappings
    else:
        column_to_terms, cde = map_variables_to_terms(
            df=df,
            assessment_name=basename(args.csv_file),
            directory=dirname(args.output_file),
            output_file=args.output_file,
            json_source=json_map,
            associate_concepts=False)

    if args.logfile is not None:
        logging.basicConfig(filename=join(
            args.logfile, 'csv2nidm_' +
            os.path.splitext(os.path.basename(args.csv_file))[0] + '.log'),
                            level=logging.DEBUG)
        # add some logging info
        logging.info("csv2nidm %s" % args)

    #If user has added an existing NIDM file as a command line parameter then add to existing file for subjects who exist in the NIDM file
    if args.nidm_file:
        print("Adding to NIDM file...")
        # get subjectID list for later
        qres = GetParticipantIDs([args.nidm_file])

        #read in NIDM file
        project = read_nidm(args.nidm_file)
        #with open("/Users/dbkeator/Downloads/test.ttl","w") as f:
        #    f.write(project.serializeTurtle())

        #get list of session objects
        session_objs = project.get_sessions()

        #look at column_to_terms dictionary for NIDM URL for subject id  (Constants.NIDM_SUBJECTID)
        id_field = None
        for key, value in column_to_terms.items():
            if 'isAbout' in column_to_terms[key]:
                for isabout_key, isabout_value in column_to_terms[key][
                        'isAbout'].items():
                    if (isabout_key == 'url') or (isabout_key == '@id'):
                        if (isabout_value == Constants.NIDM_SUBJECTID._uri):
                            key_tuple = eval(key)
                            #id_field=key
                            id_field = key_tuple.variable
                            #make sure id_field is a string for zero-padded subject ids
                            #re-read data file with constraint that key field is read as string
                            df = pd.read_csv(args.csv_file,
                                             dtype={id_field: str})
                            break

        #if we couldn't find a subject ID field in column_to_terms, ask user
        if id_field is None:
            option = 1
            for column in df.columns:
                print("%d: %s" % (option, column))
                option = option + 1
            selection = input(
                "Please select the subject ID field from the list above: ")
            # Make sure user selected one of the options.  If not present user with selection input again
            while (not selection.isdigit()) or (int(selection) > int(option)):
                # Wait for user input
                selection = input(
                    "Please select the subject ID field from the list above: \t"
                    % option)
            id_field = df.columns[int(selection) - 1]
            #make sure id_field is a string for zero-padded subject ids
            #re-read data file with constraint that key field is read as string
            df = pd.read_csv(args.csv_file, dtype={id_field: str})

        ###use RDFLib here for temporary graph making query easier
        #rdf_graph = Graph()
        #rdf_graph.parse(source=StringIO(project.serializeTurtle()),format='turtle')

        #print("Querying for existing participants in NIDM graph....")

        ###find subject ids and sessions in NIDM document
        #query = """SELECT DISTINCT ?session ?nidm_subj_id ?agent
        #            WHERE {
        #                ?activity prov:wasAssociatedWith ?agent ;
        #                    dct:isPartOf ?session  .
        #                ?agent rdf:type prov:Agent ;
        #                    ndar:src_subject_id ?nidm_subj_id .
        #            }"""
        ###print(query)
        #qres = rdf_graph.query(query)

        for index, row in qres.iterrows():
            logging.info("participant in NIDM file %s \t %s" %
                         (row[0], row[1]))
            #find row in CSV file with subject id matching agent from NIDM file

            #csv_row = df.loc[df[id_field]==type(df[id_field][0])(row[1])]
            #find row in CSV file with matching subject id to the agent in the NIDM file
            #be carefull about data types...simply type-change dataframe subject id column and query to strings.
            #here we're removing the leading 0's from IDs because pandas.read_csv strips those unless you know ahead of
            #time which column is the subject id....
            csv_row = df.loc[df[id_field].astype('str').str.contains(
                str(row[1]).lstrip("0"))]

            #if there was data about this subject in the NIDM file already (i.e. an agent already exists with this subject id)
            #then add this CSV assessment data to NIDM file, else skip it....
            if (not (len(csv_row.index) == 0)):

                logging.info("found participant in CSV file")

                # create a new session for this assessment
                new_session = Session(project=project)

                #NIDM document sesssion uuid
                #session_uuid = row[0]

                #temporary list of string-based URIs of session objects from API
                #temp = [o.identifier._uri for o in session_objs]
                #get session object from existing NIDM file that is associated with a specific subject id
                #nidm_session = (i for i,x in enumerate([o.identifier._uri for o in session_objs]) if x == str(session_uuid))
                #nidm_session = session_objs[temp.index(str(session_uuid))]
                #for nidm_session in session_objs:
                #    if nidm_session.identifier._uri == str(session_uuid):
                #add an assessment acquisition for the phenotype data to session and associate with agent
                #acq=AssessmentAcquisition(session=nidm_session)
                acq = AssessmentAcquisition(session=new_session)
                #add acquisition entity for assessment
                acq_entity = AssessmentObject(acquisition=acq)
                #add qualified association with existing agent
                acq.add_qualified_association(person=row[0],
                                              role=Constants.NIDM_PARTICIPANT)

                # add git-annex info if exists
                num_sources = addGitAnnexSources(obj=acq_entity,
                                                 filepath=args.csv_file,
                                                 bids_root=dirname(
                                                     args.csv_file))
                # if there aren't any git annex sources then just store the local directory information
                if num_sources == 0:
                    # WIP: add absolute location of BIDS directory on disk for later finding of files
                    acq_entity.add_attributes(
                        {Constants.PROV['Location']: "file:/" + args.csv_file})

                # store file to acq_entity
                acq_entity.add_attributes(
                    {Constants.NIDM_FILENAME: basename(args.csv_file)})

                #store other data from row with columns_to_term mappings
                for row_variable in csv_row:
                    #check if row_variable is subject id, if so skip it
                    if row_variable == id_field:
                        continue
                    else:
                        if not csv_row[row_variable].values[0]:
                            continue

                        add_attributes_with_cde(
                            acq_entity, cde, row_variable,
                            csv_row[row_variable].values[0])

                continue

        print("Adding CDEs to graph....")
        # convert to rdflib Graph and add CDEs
        rdf_graph = Graph()
        rdf_graph.parse(source=StringIO(project.serializeTurtle()),
                        format='turtle')
        rdf_graph = rdf_graph + cde

        print("Backing up original NIDM file...")
        copy2(src=args.nidm_file, dst=args.nidm_file + ".bak")
        print("Writing NIDM file....")
        rdf_graph.serialize(destination=args.nidm_file, format='turtle')

    else:
        print("Creating NIDM file...")
        #If user did not choose to add this data to an existing NIDM file then create a new one for the CSV data
        #create empty project
        project = Project()

        #simply add name of file to project since we don't know anything about it
        project.add_attributes({Constants.NIDM_FILENAME: args.csv_file})

        #look at column_to_terms dictionary for NIDM URL for subject id  (Constants.NIDM_SUBJECTID)
        id_field = None
        for key, value in column_to_terms.items():
            # using skos:sameAs relationship to associate subject identifier variable from csv with a known term
            # for subject IDs
            if 'sameAs' in column_to_terms[key]:
                if Constants.NIDM_SUBJECTID.uri == column_to_terms[key][
                        'sameAs']:
                    key_tuple = eval(key)
                    id_field = key_tuple.variable
                    #make sure id_field is a string for zero-padded subject ids
                    #re-read data file with constraint that key field is read as string
                    df = pd.read_csv(args.csv_file, dtype={id_field: str})
                    break

        #if we couldn't find a subject ID field in column_to_terms, ask user
        if id_field is None:
            option = 1
            for column in df.columns:
                print("%d: %s" % (option, column))
                option = option + 1
            selection = input(
                "Please select the subject ID field from the list above: ")
            # Make sure user selected one of the options.  If not present user with selection input again
            while (not selection.isdigit()) or (int(selection) > int(option)):
                # Wait for user input
                selection = input(
                    "Please select the subject ID field from the list above: \t"
                    % option)
            id_field = df.columns[int(selection) - 1]
            #make sure id_field is a string for zero-padded subject ids
            #re-read data file with constraint that key field is read as string
            df = pd.read_csv(args.csv_file, dtype={id_field: str})

        #iterate over rows and store in NIDM file
        for csv_index, csv_row in df.iterrows():
            #create a session object
            session = Session(project)

            #create and acquisition activity and entity
            acq = AssessmentAcquisition(session)
            acq_entity = AssessmentObject(acq)

            #create prov:Agent for subject
            #acq.add_person(attributes=({Constants.NIDM_SUBJECTID:row['participant_id']}))

            # add git-annex info if exists
            num_sources = addGitAnnexSources(obj=acq_entity,
                                             filepath=args.csv_file,
                                             bids_root=os.path.dirname(
                                                 args.csv_file))
            # if there aren't any git annex sources then just store the local directory information
            if num_sources == 0:
                # WIP: add absolute location of BIDS directory on disk for later finding of files
                acq_entity.add_attributes(
                    {Constants.PROV['Location']: "file:/" + args.csv_file})

            # store file to acq_entity
            acq_entity.add_attributes(
                {Constants.NIDM_FILENAME: basename(args.csv_file)})

            #store other data from row with columns_to_term mappings
            for row_variable, row_data in csv_row.iteritems():
                if not row_data:
                    continue

                #check if row_variable is subject id, if so skip it
                if row_variable == id_field:
                    ### WIP: Check if agent already exists with the same ID.  If so, use it else create a new agent

                    #add qualified association with person
                    acq.add_qualified_association(
                        person=acq.add_person(
                            attributes=({
                                Constants.NIDM_SUBJECTID: str(row_data)
                            })),
                        role=Constants.NIDM_PARTICIPANT)

                    continue
                else:
                    add_attributes_with_cde(acq_entity, cde, row_variable,
                                            row_data)

                    #print(project.serializeTurtle())

        # convert to rdflib Graph and add CDEs
        rdf_graph = Graph()
        rdf_graph.parse(source=StringIO(project.serializeTurtle()),
                        format='turtle')
        rdf_graph = rdf_graph + cde

        print("Writing NIDM file....")
        rdf_graph.serialize(destination=args.output_file, format='turtle')
Beispiel #5
0
def query(nidm_file_list, cde_file_list, query_file, output_file, get_participants, get_instruments, get_instrument_vars, get_dataelements, get_brainvols,get_dataelements_brainvols, uri, j, verbosity):
    """
    This function provides query support for NIDM graphs.
    """
    #query result list
    results = []

    # if there is a CDE file list, seed the CDE cache
    if cde_file_list:
        getCDEs(cde_file_list.split(","))

    if get_participants:
        df = GetParticipantIDs(nidm_file_list.split(','),output_file=output_file)
        if ((output_file) is None):

            print(df.to_string())


        return df
    elif get_instruments:
        #first get all project UUIDs then iterate and get instruments adding to output dataframe
        project_list = GetProjectsUUID(nidm_file_list.split(','))
        count=1
        for project in project_list:
            if count == 1:
                df = GetProjectInstruments(nidm_file_list.split(','),project_id=project)
                count+=1
            else:
                df = df.append(GetProjectInstruments(nidm_file_list.split(','),project_id=project))

        #write dataframe
        #if output file parameter specified
        if (output_file is not None):

            df.to_csv(output_file)
            #with open(output_file,'w') as myfile:
            #    wr=csv.writer(myfile,quoting=csv.QUOTE_ALL)
            #    wr.writerow(df)

            #pd.DataFrame.from_records(df,columns=["Instruments"]).to_csv(output_file)
        else:
            print(df.to_string())
    elif get_instrument_vars:
        #first get all project UUIDs then iterate and get instruments adding to output dataframe
        project_list = GetProjectsUUID(nidm_file_list.split(','))
        count=1
        for project in project_list:
            if count == 1:
                df = GetInstrumentVariables(nidm_file_list.split(','),project_id=project)
                count+=1
            else:
                df = df.append(GetInstrumentVariables(nidm_file_list.split(','),project_id=project))

        #write dataframe
        #if output file parameter specified
        if (output_file is not None):

            df.to_csv(output_file)
        else:
            print(df.to_string())
    elif get_dataelements:
        datael = GetDataElements(nidm_file_list=nidm_file_list)
         #if output file parameter specified
        if (output_file is not None):

            datael.to_csv(output_file)
        else:
            print(datael.to_string())
    elif uri:
        restParser = RestParser(verbosity_level = int(verbosity))
        if j:
            restParser.setOutputFormat(RestParser.JSON_FORMAT)
        else:
            restParser.setOutputFormat(RestParser.CLI_FORMAT)
        df = restParser.run(nidm_file_list.split(','), uri)

        print (df)

    elif get_dataelements_brainvols:
        brainvol = GetBrainVolumeDataElements(nidm_file_list=nidm_file_list)
         #if output file parameter specified
        if (output_file is not None):

            brainvol.to_csv(output_file)
        else:
            print(brainvol.to_string())
    elif get_brainvols:
        brainvol = GetBrainVolumes(nidm_file_list=nidm_file_list)
         #if output file parameter specified
        if (output_file is not None):

            brainvol.to_csv(output_file)
        else:
            print(brainvol.to_string())
    elif query_file:

        df = sparql_query_nidm(nidm_file_list.split(','),query_file,output_file)

        if ((output_file) is None):

            print(df.to_string())

        return df
    else:
        print("ERROR: No query parameter provided.  See help:")
        print()
        os.system("pynidm query --help")
        exit(1)
Beispiel #6
0
def merge(nidm_file_list, s, out_file):
    """
    This function will merge NIDM files.  See command line parameters for supported merge operations.
    """

    #graph = Graph()
    #for nidm_file in nidm_file_list.split(','):
    #    graph.parse(nidm_file,format=util.guess_format(nidm_file))

    # create empty graph
    graph = Graph()
    # start with the first NIDM file and merge the rest into the first
    first = True
    for nidm_file in nidm_file_list.split(','):
        # if merging by subject:
        if s:
            if first:
                # get list of all subject IDs
                first_file_subjids = GetParticipantIDs([nidm_file])
                first = False
                first_graph = Graph()
                first_graph.parse(nidm_file,
                                  format=util.guess_format(nidm_file))
            else:
                # load second graph
                graph.parse(nidm_file, format=util.guess_format(nidm_file))

                # get list of second file subject IDs
                subj = GetParticipantIDs([nidm_file])

                # for each UUID / subject ID look in graph and see if you can find the same ID.  If so get the UUID of
                # that prov:agent and change all the UUIDs in nidm_file to match then concatenate the two graphs.
                query = '''

                    PREFIX prov:<http://www.w3.org/ns/prov#>
                    PREFIX sio: <http://semanticscience.org/ontology/sio.owl#>
                    PREFIX ndar: <https://ndar.nih.gov/api/datadictionary/v2/dataelement/>
                    PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
                    PREFIX prov:<http://www.w3.org/ns/prov#>

                    SELECT DISTINCT ?uuid ?ID
                    WHERE {

                        ?uuid a prov:Agent ;
                            %s ?ID .
                    FILTER(?ID =
                    ''' % Constants.NIDM_SUBJECTID

                # add filters to above query to only look for subject IDs which are in the first file to merge into
                temp = True
                for ID in first_file_subjids['ID']:
                    if temp:
                        query = query + "\"" + ID + "\""
                        temp = False
                    else:
                        query = query + "|| ?ID= \"" + ID + "\""

                query = query + ") }"

                qres = graph.query(query)

                # if len(qres) > 0 then we have matches so load the nidm_file into a temporary graph so we can
                # make changes to it then concatenate it.
                if len(qres) > 0:
                    #tmp = Graph()
                    #tmp.parse(nidm_file,format=util.guess_format(nidm_file))

                    # for each ID in the merged graph that matches an ID in the nidm_file graph
                    for row in qres:
                        # find ID from first file that matches ID in this file
                        t = first_file_subjids['ID'].str.match(row['ID'])
                        # then get uuid for that match from first file
                        uuid_replacement = first_file_subjids.iloc[
                            [*filter(t.get, t.index)][0], 0]

                        for s, p, o in graph.triples((None, None, None)):
                            if (s == row['uuid']):
                                #print("replacing subject in triple %s %s %s with %s" %(s,p,o,uuid_to_replace))
                                graph.add((uuid_replacement, p, o))
                                graph.remove((row['uuid'], p, o))
                            elif (o == row['uuid']):
                                #print("replacing object in triple %s %s %s with %s" %(s,p,o,uuid_to_replace))
                                graph.add((s, p, uuid_replacement))
                                graph.remove((s, p, row['uuid']))
                            elif (p == row['uuid']):
                                #print("replacing predicate in triple %s %s %s with %s" %(s,p,o,uuid_to_replace))
                                graph.add((s, uuid_replacement, o))
                                graph.remove((s, row['uuid'], o))

                # merge updated graph

                graph = first_graph + graph

        graph.serialize(out_file, format='turtle')
Beispiel #7
0
def merge(nidm_file_list, s, out_file):
    """
    This function will merge NIDM files.  See command line parameters for supported merge operations.
    """

    graph = Graph()
    for nidm_file in nidm_file_list.split(','):
        graph.parse(nidm_file, format=util.guess_format(nidm_file))

    # create empty graph
    graph = Graph()
    # start with the first NIDM file and merge the rest into the first
    first = True
    for nidm_file in nidm_file_list.split(','):
        if first:
            graph.parse(nidm_file, format=util.guess_format(nidm_file))
            first = False
        # if argument -s is set then merge by subject IDs
        elif s:
            # first get all subject UUIDs in current nidm_file
            subj = GetParticipantIDs([nidm_file])

            # for each UUID / subject ID look in graph and see if you can find the same ID.  If so get the UUID of
            # that prov:agent and change all the UUIDs in nidm_file to match then concatenate the two graphs.
            query = '''

                PREFIX prov:<http://www.w3.org/ns/prov#>
                PREFIX sio: <http://semanticscience.org/ontology/sio.owl#>
                PREFIX ndar: <https://ndar.nih.gov/api/datadictionary/v2/dataelement/>
                PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
                PREFIX prov:<http://www.w3.org/ns/prov#>

                SELECT DISTINCT ?uuid ?ID
                WHERE {

                        ?uuid a prov:Person ;
                            %s ?ID .
                FILTER(?ID =
                ''' % Constants.NIDM_SUBJECTID

            first = True
            for ID in subj['ID']:
                if first:
                    query = query + "\"" + ID + "\""
                    first = False
                else:
                    query = query + "|| ?ID= \"" + ID + "\""

            query = query + ") }"

            qres = graph.query(query)

            # if len(qres) > 0 then we have matches so load the nidm_file into a temporary graph so we can
            # make changes to it then concatenate it.
            if len(qres) > 0:
                tmp = Graph()
                tmp.parse(nidm_file, format=util.guess_format(nidm_file))

                # for each ID in the merged graph that matches an ID in the nidm_file graph
                for row in qres:
                    # find the UUID in the subj data frame for the matching ID and change all triples that reference
                    # this uuid to the one in row['uuid']
                    uuid_to_replace = (subj[subj['ID'].str.match(
                        row['ID'])])['uuid'].values[0]

                    for s, p, o in tmp.triples((None, None, None)):
                        if (s == uuid_to_replace):
                            #print("replacing subject in triple %s %s %s with %s" %(s,p,o,uuid_to_replace))
                            tmp.set((uuid_to_replace, p, o))
                        elif (o == uuid_to_replace):
                            #print("replacing object in triple %s %s %s with %s" %(s,p,o,uuid_to_replace))
                            tmp.set((s, p, uuid_to_replace))
                        elif (p == uuid_to_replace):
                            #print("replacing predicate in triple %s %s %s with %s" %(s,p,o,uuid_to_replace))
                            tmp.set((s, uuid_to_replace, o))

            # merge updated graph
            graph = graph + tmp

    graph.serialize(out_file, format='turtle')