Esempio n. 1
0
def convert(nidm_file_list, type):
    """
    This function will convert NIDM files to various RDF-supported formats and name then / put them in the same
    place as the input file.
    """

    for nidm_file in nidm_file_list.split(','):
        # WIP: for now we use pynidm for jsonld exports to make more human readable and rdflib for everything
        # else.
        if type == 'jsonld':
            # read in nidm file
            project = read_nidm(nidm_file)
            #write jsonld file with same name
            with open(splitext(nidm_file)[0] + ".json", 'w') as f:
                f.write(project.serializeJSONLD())
        elif type == 'turtle':
            graph = Graph()
            graph.parse(nidm_file, format=util.guess_format(nidm_file))
            graph.serialize(splitext(nidm_file)[0] + ".ttl", format='turtle')
        elif type == 'xml-rdf':
            graph = Graph()
            graph.parse(nidm_file, format=util.guess_format(nidm_file))
            graph.serialize(splitext(nidm_file)[0] + ".xml",
                            format='pretty-xml')
        elif type == 'n3':
            graph = Graph()
            graph.parse(nidm_file, format=util.guess_format(nidm_file))
            graph.serialize(splitext(nidm_file)[0] + ".n3", format='n3')
        elif type == 'trig':
            # read in nidm file
            project = read_nidm(nidm_file)
            with open(splitext(nidm_file)[0] + ".trig", 'w') as f:
                f.write(project.serializeTrig())
        else:
            print("Error, type is not supported at this time")
Esempio n. 2
0
def main(argv):

    parser = ArgumentParser(description='This program contains various NIDM-Experiment utilities')
    sub = parser.add_subparsers(dest='command')
    concat = sub.add_parser('concat', description="This command will simply concatenate the supplied NIDM files into a single output")
    visualize = sub.add_parser('visualize', description="This command will produce a visualization(pdf) of the supplied NIDM files")
    jsonld = sub.add_parser('jsonld', description="This command will save NIDM files as jsonld")

    for arg in [concat,visualize,jsonld]:
        arg.add_argument('-nl', '--nl', dest="nidm_files", nargs="+", required=True, help="A comma separated list of NIDM files with full path")

    concat.add_argument('-o', '--o', dest='output_file', required=True, help="Merged NIDM output file name + path")
    visualize.add_argument('-o', '--o', dest='output_file', required=True, help="Output file name+path of dot graph")


    args=parser.parse_args()

    #concatenate nidm files
    if args.command == 'concat':

        #create empty graph
        graph=Graph()
        for nidm_file in args.nidm_files:
             tmp = Graph()
             graph = graph + tmp.parse(nidm_file,format=util.guess_format(nidm_file))

        graph.serialize(args.output_file, format='turtle')



    elif args.command == 'visualize':
        #create empty graph
        graph=Graph()
        for nidm_file in args.nidm_files:
             tmp = Graph()
             graph = graph + tmp.parse(nidm_file,format=util.guess_format(nidm_file))


        project=read_nidm(StringIO.write(graph.serialize(format='turtle')))
        project.save_DotGraph(filename=args.output_file+'.pdf',format='pdf')

    elif args.command == 'jsonld':
        #create empty graph
        for nidm_file in args.nidm_files:
            project=read_nidm(nidm_file)
            #serialize to jsonld
            with open(splitext(nidm_file)[0]+".json",'w') as f:
                f.write(project.serializeJSONLD())
Esempio n. 3
0
def main(argv):
    parser = ArgumentParser()
    #parse command line arguments
    parser.add_argument('-nidm', dest='nidm_file', required=True, help="NIDM-Exp RDF File to import")
    args = parser.parse_args()

    project = read_nidm(args.nidm_file)
    project.save_DotGraph(join(dirname(args.nidm_file),splitext(args.nidm_file)[0]+".png"),format="png")

    sessions = project.get_sessions()
    print("Sessions:\n %s" % sessions)
    #example add attributes to existing session
    #sessions[0].add_attributes({Constants.NIDM: "test"})

    acquisitions=[]
    for session in sessions:
        acquisitions = session.get_acquisitions()
        print("Acquisitions:\n %s" % acquisitions)

        for acq in acquisitions:
            acquisition_objects = acq.get_acquisition_objects()
            print("Acquisition Objects:\n %s" % acquisition_objects)

    #save a turtle file
    with open(join(dirname(args.nidm_file),splitext(args.nidm_file)[0]+"_read.ttl"),'w') as f:
        f.write (project.serializeTurtle())

    #save a json file
    with open(join(dirname(args.nidm_file),splitext(args.nidm_file)[0]+"_read.json"),'w') as f:
        f.write (project.serializeJSONLD())
Esempio n. 4
0
def visualize(nidm_file_list):
    '''
    This command will produce a visualization(pdf) of the supplied NIDM files named the same as the input files and
    stored in the same directories.
    '''

    for nidm_file in nidm_file_list.split(','):
        # read in nidm file
        project = read_nidm(nidm_file)

        # split path and filename for output file writing
        file_parts = os.path.split(nidm_file)

        # write graph as nidm filename + .pdf
        project.save_DotGraph(filename=os.path.join(
            file_parts[0],
            os.path.splitext(file_parts[1])[0] + '.pdf'),
                              format='pdf')
Esempio n. 5
0
def main(argv):
    parser = ArgumentParser()
    #parse command line arguments
    parser.add_argument('-nidm',
                        dest='nidm_file',
                        required=True,
                        help="NIDM-Exp RDF File to import")
    parser.add_argument('-out',
                        dest='outfile',
                        required=True,
                        help="output file name")
    args = parser.parse_args()

    project = read_nidm(args.nidm_file)

    print("Project: \n %s" % project.get_uuid())
    sessions = project.get_sessions()
    print("Sessions:\n %s" % sessions)

    acquisitions = []
    for session in sessions:
        acquisitions = session.get_acquisitions()
        print("Acquisitions:\n %s" % acquisitions)

        for acq in acquisitions:
            acquisition_objects = acq.get_acquisition_objects()
            print("Acquisition Objects:\n %s" % acquisition_objects)

    # check for data elements
    print("Data Elements: \n %s" % project.get_dataelements())

    # derivatives

    #and for derivatives
    print("Derivatives: \n %s" % project.get_derivatives())
    for deriv in project.get_derivatives():
        derivobj = deriv.get_derivative_objects()
        print("Derivative Objects: \n %s" % derivobj)

    with open(args.outfile, 'w') as f:
        #serialize project for comparison with the original
        f.write(project.serializeTurtle())
Esempio n. 6
0
def main(argv):
    parser = ArgumentParser(description='This program will load in a CSV file and iterate over the header \
     variable names performing an elastic search of https://scicrunch.org/ for NIDM-ReproNim \
     tagged terms that fuzzy match the variable names.  The user will then interactively pick \
     a term to associate with the variable name.  The resulting annotated CSV data will \
     then be written to a NIDM data file.  Note, you must obtain an API key to Interlex by signing up \
     for an account at scicrunch.org then going to My Account and API Keys.  Then set the environment \
     variable INTERLEX_API_KEY with your key.')

    parser.add_argument('-csv', dest='csv_file', required=True, help="Full path to CSV file to convert")
    # parser.add_argument('-ilxkey', dest='key', required=True, help="Interlex/SciCrunch API key to use for query")
    parser.add_argument('-json_map', dest='json_map',required=False,help="Full path to user-suppled JSON file containing variable-term mappings.")
    parser.add_argument('-nidm', dest='nidm_file', required=False, help="Optional full path of NIDM file to add CSV->NIDM converted graph to")
    parser.add_argument('-no_concepts', action='store_true', required=False, help='If this flag is set then no concept associations will be'
                                'asked of the user.  This is useful if you already have a -json_map specified without concepts and want to'
                                'simply run this program to get a NIDM file with user interaction to associate concepts.')
    # parser.add_argument('-owl', action='store_true', required=False, help='Optionally searches NIDM OWL files...internet connection required')
    # parser.add_argument('-png', action='store_true', required=False, help='Optional flag, when set a PNG image file of RDF graph will be produced')
    # parser.add_argument('-jsonld', action='store_true', required=False, help='Optional flag, when set NIDM files are saved as JSON-LD instead of TURTLE')
    parser.add_argument('-log','--log', dest='logfile',required=False, default=None, help="full path to directory to save log file. Log file name is csv2nidm_[arg.csv_file].log")
    parser.add_argument('-out', dest='output_file', required=True, help="Full path with filename to save NIDM file")
    args = parser.parse_args()



    #open CSV file and load into
    df = pd.read_csv(args.csv_file)
    #temp = csv.reader(args.csv_file)
    #df = pd.DataFrame(temp)

    #maps variables in CSV file to terms
    #if args.owl is not False:
    #    column_to_terms = map_variables_to_terms(df=df, apikey=args.key, directory=dirname(args.output_file), output_file=args.output_file, json_file=args.json_map, owl_file=args.owl)
    #else:
    # if user did not specify -no_concepts then associate concepts interactively with user
    if not args.no_concepts:
        column_to_terms, cde = map_variables_to_terms(df=df,  assessment_name=basename(args.csv_file),directory=dirname(args.output_file), output_file=args.output_file, json_file=args.json_map)
    # run without concept mappings
    else:
        column_to_terms, cde = map_variables_to_terms(df=df, assessment_name=basename(args.csv_file),
                                                      directory=dirname(args.output_file), output_file=args.output_file,
                                                      json_file=args.json_map, associate_concepts=False)

    if args.logfile is not None:
        logging.basicConfig(filename=join(args.logfile,'csv2nidm_' + os.path.splitext(os.path.basename(args.csv_file))[0] + '.log'), level=logging.DEBUG)
        # add some logging info
        logging.info("csv2nidm %s" %args)


    #If user has added an existing NIDM file as a command line parameter then add to existing file for subjects who exist in the NIDM file
    if args.nidm_file:
        print("Adding to NIDM file...")
        #read in NIDM file
        project = read_nidm(args.nidm_file)
        #get list of session objects
        session_objs=project.get_sessions()

        #look at column_to_terms dictionary for NIDM URL for subject id  (Constants.NIDM_SUBJECTID)
        id_field=None
        for key, value in column_to_terms.items():
            if Constants.NIDM_SUBJECTID._str == column_to_terms[key]['label']:
                key_tuple = eval(key)
                #id_field=key
                id_field = key_tuple.variable
                #make sure id_field is a string for zero-padded subject ids
                #re-read data file with constraint that key field is read as string
                df = pd.read_csv(args.csv_file,dtype={id_field : str})
                break

        #if we couldn't find a subject ID field in column_to_terms, ask user
        if id_field is None:
            option=1
            for column in df.columns:
                print("%d: %s" %(option,column))
                option=option+1
            selection=input("Please select the subject ID field from the list above: ")
            # Make sure user selected one of the options.  If not present user with selection input again
            while (not selection.isdigit()) or (int(selection) > int(option)):
                # Wait for user input
                selection = input("Please select the subject ID field from the list above: \t" % option)
            id_field=df.columns[int(selection)-1]
            #make sure id_field is a string for zero-padded subject ids
            #re-read data file with constraint that key field is read as string
            df = pd.read_csv(args.csv_file,dtype={id_field : str})



        #use RDFLib here for temporary graph making query easier
        rdf_graph = Graph()
        rdf_graph.parse(source=StringIO(project.serializeTurtle()),format='turtle')

        print("Querying for existing participants in NIDM graph....")
        #find subject ids and sessions in NIDM document
        query = """SELECT DISTINCT ?session ?nidm_subj_id ?agent
                    WHERE {
                        ?activity prov:wasAssociatedWith ?agent ;
                            dct:isPartOf ?session  .
                        ?agent rdf:type prov:Agent ;
                            ndar:src_subject_id ?nidm_subj_id .
                    }"""
        #print(query)
        qres = rdf_graph.query(query)


        for row in qres:
            logging.info("found existing participant %s \t %s" %(row[0],row[1]))
            #find row in CSV file with subject id matching agent from NIDM file

            #csv_row = df.loc[df[id_field]==type(df[id_field][0])(row[1])]
            #find row in CSV file with matching subject id to the agent in the NIDM file
            #be carefull about data types...simply type-change dataframe subject id column and query to strings.
            #here we're removing the leading 0's from IDs because pandas.read_csv strips those unless you know ahead of
            #time which column is the subject id....
            csv_row = df.loc[df[id_field].astype('str').str.contains(str(row[1]).lstrip("0"))]

            #if there was data about this subject in the NIDM file already (i.e. an agent already exists with this subject id)
            #then add this CSV assessment data to NIDM file, else skip it....
            if (not (len(csv_row.index)==0)):

                #NIDM document sesssion uuid
                session_uuid = row[0]

                #temporary list of string-based URIs of session objects from API
                temp = [o.identifier._uri for o in session_objs]
                #get session object from existing NIDM file that is associated with a specific subject id
                #nidm_session = (i for i,x in enumerate([o.identifier._uri for o in session_objs]) if x == str(session_uuid))
                nidm_session = session_objs[temp.index(str(session_uuid))]
                #for nidm_session in session_objs:
                #    if nidm_session.identifier._uri == str(session_uuid):
                #add an assessment acquisition for the phenotype data to session and associate with agent
                acq=AssessmentAcquisition(session=nidm_session)
                #add acquisition entity for assessment
                acq_entity = AssessmentObject(acquisition=acq)
                #add qualified association with existing agent
                acq.add_qualified_association(person=row[2],role=Constants.NIDM_PARTICIPANT)

                # add git-annex info if exists
                num_sources = addGitAnnexSources(obj=acq_entity,filepath=args.csv_file,bids_root=dirname(args.csv_file))
                # if there aren't any git annex sources then just store the local directory information
                if num_sources == 0:
                    # WIP: add absolute location of BIDS directory on disk for later finding of files
                    acq_entity.add_attributes({Constants.PROV['Location']:"file:/" + args.csv_file})

                # store file to acq_entity
                acq_entity.add_attributes({Constants.NIDM_FILENAME:basename(args.csv_file)})

                #store other data from row with columns_to_term mappings
                for row_variable in csv_row:
                    #check if row_variable is subject id, if so skip it
                    if row_variable==id_field:
                        continue
                    else:
                        if not csv_row[row_variable].values[0]:
                            continue


                        add_attributes_with_cde(acq_entity, cde, row_variable, csv_row[row_variable].values[0])



                continue

        print ("Adding CDEs to graph....")
        # convert to rdflib Graph and add CDEs
        rdf_graph = Graph()
        rdf_graph.parse(source=StringIO(project.serializeTurtle()),format='turtle')
        rdf_graph = rdf_graph + cde

        print("Backing up original NIDM file...")
        copy2(src=args.nidm_file,dst=args.nidm_file+".bak")
        print("Writing NIDM file....")
        rdf_graph.serialize(destination=args.nidm_file,format='turtle')

    else:
        print("Creating NIDM file...")
        #If user did not choose to add this data to an existing NIDM file then create a new one for the CSV data
        #create empty project
        project=Project()

        #simply add name of file to project since we don't know anything about it
        project.add_attributes({Constants.NIDM_FILENAME:args.csv_file})


        #look at column_to_terms dictionary for NIDM URL for subject id  (Constants.NIDM_SUBJECTID)
        id_field=None
        for key, value in column_to_terms.items():
            # using skos:sameAs relationship to associate subject identifier variable from csv with a known term
            # for subject IDs
            if 'sameAs' in column_to_terms[key]:
                if Constants.NIDM_SUBJECTID.uri == column_to_terms[key]['sameAs']:
                    key_tuple = eval(key)
                    id_field=key_tuple.variable
                    #make sure id_field is a string for zero-padded subject ids
                    #re-read data file with constraint that key field is read as string
                    df = pd.read_csv(args.csv_file,dtype={id_field : str})
                    break

        #if we couldn't find a subject ID field in column_to_terms, ask user
        if id_field is None:
            option=1
            for column in df.columns:
                print("%d: %s" %(option,column))
                option=option+1
            selection=input("Please select the subject ID field from the list above: ")
            # Make sure user selected one of the options.  If not present user with selection input again
            while (not selection.isdigit()) or (int(selection) > int(option)):
                # Wait for user input
                selection = input("Please select the subject ID field from the list above: \t" % option)
            id_field=df.columns[int(selection)-1]
            #make sure id_field is a string for zero-padded subject ids
            #re-read data file with constraint that key field is read as string
            df = pd.read_csv(args.csv_file,dtype={id_field : str})


        #iterate over rows and store in NIDM file
        for csv_index, csv_row in df.iterrows():
            #create a session object
            session=Session(project)

            #create and acquisition activity and entity
            acq=AssessmentAcquisition(session)
            acq_entity=AssessmentObject(acq)

            #create prov:Agent for subject
            #acq.add_person(attributes=({Constants.NIDM_SUBJECTID:row['participant_id']}))

            # add git-annex info if exists
            num_sources = addGitAnnexSources(obj=acq_entity,filepath=args.csv_file,bids_root=os.path.dirname(args.csv_file))
            # if there aren't any git annex sources then just store the local directory information
            if num_sources == 0:
                # WIP: add absolute location of BIDS directory on disk for later finding of files
                acq_entity.add_attributes({Constants.PROV['Location']:"file:/" + args.csv_file})

            # store file to acq_entity
            acq_entity.add_attributes({Constants.NIDM_FILENAME : basename(args.csv_file)})


            #store other data from row with columns_to_term mappings
            for row_variable,row_data in csv_row.iteritems():
                if not row_data:
                    continue

                #check if row_variable is subject id, if so skip it
                if row_variable==id_field:
                    ### WIP: Check if agent already exists with the same ID.  If so, use it else create a new agent

                    #add qualified association with person
                    acq.add_qualified_association(person= acq.add_person(attributes=({Constants.NIDM_SUBJECTID:str(row_data)})),role=Constants.NIDM_PARTICIPANT)

                    continue
                else:
                    add_attributes_with_cde(acq_entity, cde, row_variable, row_data)

                    #print(project.serializeTurtle())

        # convert to rdflib Graph and add CDEs
        rdf_graph = Graph()
        rdf_graph.parse(source=StringIO(project.serializeTurtle()),format='turtle')
        rdf_graph = rdf_graph + cde

        print("Writing NIDM file....")
        rdf_graph.serialize(destination=args.output_file,format='turtle')
Esempio n. 7
0
def main(argv):
    parser = ArgumentParser(description='This program will convert a NIDM-Experiment RDF document \
        to a BIDS dataset.  The program will query the NIDM-Experiment document for subjects, \
        MRI scans, and associated assessments saving the MRI data to disk in an organization \
        according to the BIDS specification, metadata to a participants.tsv \
        file, the project-level metdata to a dataset_description.json file, and the \
        assessments to *.tsv/*.json file pairs in a phenotypes directory.', epilog='Example of use: \
        NIDM2BIDSMRI.py -nidm_file NIDM.ttl -part_fields age,gender -bids_dir BIDS')

    parser.add_argument('-nidm_file', dest='rdf_file', required=True, help="NIDM RDF file")
    parser.add_argument('-part_fields', nargs='+', dest='part_fields', required=False, \
                        help='Variables to add to BIDS participant file. Variables will be fuzzy-matched to NIDM URIs')
    parser.add_argument('-anat', dest='anat', action='store_true', required=False, help="Include flag to add anatomical scans to BIDS dataset")
    parser.add_argument('-func', dest='func', action='store_true', required=False, help="Include flag to add functional scans + events files to BIDS dataset")
    parser.add_argument('-dwi', dest='dwi', action='store_true', required=False, help="Include flag to add DWI scans + Bval/Bvec files to BIDS dataset")
    parser.add_argument('-bids_dir', dest='bids_dir', required=True, help="Directory to store BIDS dataset")
    args = parser.parse_args()

    rdf_file = args.rdf_file
    output_directory = args.bids_dir


    #try to read RDF file
    print("Guessing RDF file format...")
    format_found=False
    for format in 'turtle','xml','n3','trix','rdfa':
        try:
            print("reading RDF file as %s..." % format)
            #load NIDM graph into NIDM-Exp API objects
            nidm_project = read_nidm(rdf_file)
            print("RDF file sucessfully read")
            format_found=True
            break
        except Exception:
            print("file: %s appears to be an invalid %s RDF file" % (rdf_file,format))

    if not format_found:
        print("File doesn't appear to be a valid RDF format supported by Python RDFLib!  Please check input file")
        print("exiting...")
        exit(-1)
    #set up output directory for BIDS data
    if not os.path.isdir(output_directory):
        os.mkdir(output_directory)
    if not os.path.isdir(join(output_directory,os.path.splitext(args.rdf_file)[0])):
        os.mkdir(join(output_directory,os.path.splitext(args.rdf_file)[0]))

    #convert Project NIDM object -> dataset_description.json file
    NIDMProject2BIDSDatasetDescriptor(nidm_project,join(output_directory,os.path.splitext(args.rdf_file)[0]))

    #create participants.tsv file.  In BIDS datasets there is no specification for how many or which type of assessment
    #variables might be in this file.  The specification does mention a minimum participant_id which indexes each of the
    #subjects in the BIDS dataset.
    #
    #if parameter -parts_field is defined then the variables listed will be fuzzy matched to the URIs in the NIDM file
    #and added to the participants.tsv file

    #use RDFLib here for temporary graph making query easier
    rdf_graph = Graph()
    rdf_graph_parse = rdf_graph.parse(source=StringIO(nidm_project.serializeTurtle()),format='turtle')

    #create participants file
    CreateBIDSParticipantFile(rdf_graph_parse,join(output_directory,os.path.splitext(args.rdf_file)[0],"participants"),args.part_fields)

    #creating BIDS hierarchy with requested scans
    if args.anat==True:
        #make BIDS anat directory
        if not os.path.exists(join(output_directory,"anat")):
            os.makedirs(join(output_directory,"anat"))

        #query NIDM document for acquisition entity "subjects" with predicate nidm:hasImageUsageType and object nidm:Anatomical
        for anat_acq in rdf_graph_parse.subjects(predicate=URIRef(Constants.NIDM_IMAGE_USAGE_TYPE.uri),object=URIRef(Constants.NIDM_MRI_ANATOMIC_SCAN.uri)):
            #get filename
            for anat_filename in rdf_graph_parse.objects(subject=anat_acq,predicate=URIRef(Constants.NIDM_FILENAME.uri)):
Esempio n. 8
0
def main(argv):
    parser = ArgumentParser(
        description="""This program will load in a CSV file made during simple-2
                brain volumes experiment which has the following organization:
                source	FSL	FSL	FSL
                participant_id	left nucleus accumbens volume	left amygdala volume
                sub-0050002	    796.4723293	    1255.574283	    4449.579039
                sub-0050003	    268.9688215	    878.7860634	    3838.602449
                sub-0050004	    539.0969914	    1195.288168	    3561.518188
                If will use the first row to determine the software used for the segmentations and the
                second row for the variable names.  Then it does a simple NIDM conversion using
                example model in: https://docs.google.com/document/d/1PyBoM7J0TuzTC1TIIFPDqd05nomcCM5Pvst8yCoqLng/edit"""
    )

    parser.add_argument('-csv',
                        dest='csv_file',
                        required=True,
                        help="Path to CSV file to convert")
    parser.add_argument('-ilxkey',
                        dest='key',
                        required=True,
                        help="Interlex/SciCrunch API key to use for query")
    parser.add_argument(
        '-json_map',
        dest='json_map',
        required=False,
        help="User-suppled JSON file containing variable-term mappings.")
    parser.add_argument(
        '-nidm',
        dest='nidm_file',
        required=False,
        help="Optional NIDM file to add CSV->NIDM converted graph to")
    parser.add_argument(
        '-owl',
        action='store_true',
        required=False,
        help='Optionally searches NIDM OWL files...internet connection required'
    )
    parser.add_argument(
        '-png',
        action='store_true',
        required=False,
        help=
        'Optional flag, when set a PNG image file of RDF graph will be produced'
    )
    parser.add_argument('-out',
                        dest='output_file',
                        required=True,
                        help="Filename to save NIDM file")
    args = parser.parse_args()

    #open CSV file and read first line which is the source of the segmentations
    source_row = pd.read_csv(args.csv_file, nrows=0)
    #open CSV file and load into
    df = pd.read_csv(args.csv_file, skiprows=0, header=1)
    #account for duplicate column names
    # df.columns = df.iloc[0]
    df = df.reindex(df.index.drop(0)).reset_index(drop=True)

    #get unique variable names from CSV data file
    #note, duplicate variable names will be appended with a ".X" where X is the number of duplicates
    unique_vars = []
    for variable in list(df):
        temp = variable.split(".")[0]
        if temp not in unique_vars:
            unique_vars.append(temp)

    #do same as above for unique software agents
    unique_software = []
    for variable in list(source_row):
        temp = variable.split(".")[0]
        if temp not in unique_software:
            unique_software.append(temp)

    #maps variables in CSV file to terms
    if args.owl:
        column_to_terms = map_variables_to_terms(
            df=pd.DataFrame(columns=unique_vars),
            apikey=args.key,
            directory=dirname(args.output_file),
            output_file=join(dirname(args.output_file), "json_map.json"),
            json_file=args.json_map,
            owl_file=args.owl)
    else:
        column_to_terms = map_variables_to_terms(
            df=pd.DataFrame(columns=unique_vars),
            apikey=args.key,
            directory=dirname(args.output_file),
            output_file=join(dirname(args.output_file), "json_map.json"),
            json_file=args.json_map)

    #get subjectID field from CSV
    id_field = getSubjIDColumn(column_to_terms, df)

    # WIP!!!#########################################################################################
    #go line by line through CSV file creating NIDM structures
    #If user has added an existing NIDM file as a command line parameter then add to existing file for subjects who exist in the NIDM file
    if args.nidm_file is not None:
        print("Adding to NIDM file...")
        #read in NIDM file
        project = read_nidm(args.nidm_file)

        root_act = project.graph.activity(
            QualifiedName(provNamespace("niiri", Constants.NIIRI), getUUID()),
            other_attributes={
                Constants.NIDM_PROJECT_DESCRIPTION:
                "Brain volumes provenance document"
            })

        #this function sucks...more thought needed for version that works with adding to existing NIDM file versus creating a new NIDM file....
        add_brainvolume_data(nidmdoc=project,
                             df=df,
                             id_field=id_field,
                             root_act=root_act,
                             column_to_terms=column_to_terms,
                             png_file=args.png,
                             output_file=args.output_file,
                             source_row=source_row,
                             nidm_graph=True)

        #serialize NIDM file
        with open(args.output_file, 'w') as f:
            print("Writing NIDM file...")
            f.write(project.serializeTurtle())
            #if args.png:
            #    nidmdoc.save_DotGraph(str(args.output_file + ".png"), format="png")


#        #find subject ids and sessions in NIDM document
#        query = """SELECT DISTINCT ?session ?nidm_subj_id ?agent ?entity
#                    WHERE {
#                        ?activity prov:wasAssociatedWith ?agent ;
#                            dct:isPartOf ?session  .
#                        ?entity prov:wasGeneratedBy ?activity ;
#                            nidm:hasImageUsageType nidm:Anatomical .
#                        ?agent rdf:type prov:Agent ;
#                            ndar:src_subject_id ?nidm_subj_id .
#
#                    }"""
#        #print(query)
#        qres = rdf_graph_parse.query(query)

#        for row in qres:
#            print('%s \t %s' %(row[0],row[1]))
#            #find row in CSV file with subject id matching agent from NIDM file

#            #csv_row = df.loc[df[id_field]==type(df[id_field][0])(row[1])]
#            #find row in CSV file with matching subject id to the agent in the NIDM file
#            #be carefull about data types...simply type-change dataframe subject id column and query to strings.
#            #here we're removing the leading 0's from IDs because pandas.read_csv strips those unless you know ahead of
#            #time which column is the subject id....
#            csv_row = df.loc[df[id_field].astype('str').str.contains(str(row[1]).lstrip("0"))]

#            #if there was data about this subject in the NIDM file already (i.e. an agent already exists with this subject id)
#            #then add this brain volumes data to NIDM file, else skip it....
#            if (not (len(csv_row.index)==0)):

#Here we're sure we have an agent in the NIDM graph that corresponds to the participant in the
#brain volumes data.  We don't know which AcquisitionObject (entity) describes the T1-weighted scans
#used for the project.  Since we don't have the SHA512 sums in the brain volumes data (YET) we can't
#really verify that it's a particular T1-weighted scan that was used for the brain volumes but we're
#simply, for the moment, going to assume it's the activity/session returned by the above query
#where we've specifically asked for the entity which has a nidm:hasImageUsageType nidm:Anatomical

#NIDM document entity uuid which has a nidm:hasImageUsageType nidm:Anatomical
#this is the entity that is associated with the brain volume report for this participant
#                entity_uuid = row[3]

#Now we need to set up the entities/activities, etc. to add the brain volume data for this row of the
#CSV file and link it to the above entity and the agent for this participant which is row[0]

#add acquisition entity for assessment
#                acq_entity = AssessmentObject(acquisition=acq)
#add qualified association with existing agent
#                acq.add_qualified_association(person=row[2],role=Constants.NIDM_PARTICIPANT)

#                #store other data from row with columns_to_term mappings
#                for row_variable in csv_row:
#check if row_variable is subject id, if so skip it
#                    if row_variable==id_field:
#                        continue
#                    else:
#get column_to_term mapping uri and add as namespace in NIDM document
#provNamespace(Core.safe_string(None,string=str(row_variable)), column_to_terms[row_variable]["url"])
#                        acq_entity.add_attributes({QualifiedName(provNamespace(Core.safe_string(None,string=str(row_variable)), column_to_terms[row_variable]["url"]), ""):csv_row[row_variable].values[0]})
#                continue

#        #serialize NIDM file
#        with open(args.nidm_file,'w') as f:
#            print("Writing NIDM file...")
#            f.write(project.serializeTurtle())
#            project.save_DotGraph(str(args.nidm_file + ".png"), format="png")
##############################################################################################################################

    else:
        print("Creating NIDM file...")
        #If user did not choose to add this data to an existing NIDM file then create a new one for the CSV data

        #create an empty NIDM graph
        nidmdoc = Core()
        root_act = nidmdoc.graph.activity(
            QualifiedName(provNamespace("niiri", Constants.NIIRI), getUUID()),
            other_attributes={
                Constants.NIDM_PROJECT_DESCRIPTION:
                "Brain volumes provenance document"
            })

        #this function sucks...more thought needed for version that works with adding to existing NIDM file versus creating a new NIDM file....
        add_brainvolume_data(nidmdoc=nidmdoc,
                             df=df,
                             id_field=id_field,
                             root_act=root_act,
                             column_to_terms=column_to_terms,
                             png_file=args.png,
                             output_file=args.output_file,
                             source_row=source_row)

        #serialize NIDM file
        with open(args.output_file, 'w') as f:
            print("Writing NIDM file...")
            f.write(nidmdoc.serializeTurtle())
            if args.png:
                #    nidmdoc.save_DotGraph(str(args.output_file + ".png"), format="png")

                nidmdoc.save_DotGraph(str(args.output_file + ".pdf"),
                                      format="pdf")
Esempio n. 9
0
def main(argv):
    parser = ArgumentParser(description='This program will load in a CSV file and iterate over the header \
     variable names performing an elastic search of https://scicrunch.org/ for NIDM-ReproNim \
     tagged terms that fuzzy match the variable names.  The user will then interactively pick \
     a term to associate with the variable name.  The resulting annotated CSV data will \
     then be written to a NIDM data file.')

    parser.add_argument('-csv', dest='csv_file', required=True, help="Path to CSV file to convert")
    parser.add_argument('-ilxkey', dest='key', required=True, help="Interlex/SciCrunch API key to use for query")
    parser.add_argument('-json_map', dest='json_map',required=False,help="User-suppled JSON file containing variable-term mappings.")
    parser.add_argument('-nidm', dest='nidm_file', required=False, help="Optional NIDM file to add CSV->NIDM converted graph to")
    #parser.add_argument('-owl', action='store_true', required=False, help='Optionally searches NIDM OWL files...internet connection required')
    parser.add_argument('-png', action='store_true', required=False, help='Optional flag, when set a PNG image file of RDF graph will be produced')
    parser.add_argument('-jsonld', action='store_true', required=False, help='Optional flag, when set NIDM files are saved as JSON-LD instead of TURTLE')
    parser.add_argument('-out', dest='output_file', required=True, help="Filename to save NIDM file")
    args = parser.parse_args()

    #open CSV file and load into
    df = pd.read_csv(args.csv_file)

    #maps variables in CSV file to terms
    #if args.owl is not False:
    #    column_to_terms = map_variables_to_terms(df=df, apikey=args.key, directory=dirname(args.output_file), output_file=args.output_file, json_file=args.json_map, owl_file=args.owl)
    #else:
    column_to_terms = map_variables_to_terms(df=df, apikey=args.key, directory=dirname(args.output_file), output_file=args.output_file, json_file=args.json_map)



    #If user has added an existing NIDM file as a command line parameter then add to existing file for subjects who exist in the NIDM file
    if args.nidm_file:
        print("Adding to NIDM file...")
        #read in NIDM file
        project = read_nidm(args.nidm_file)
        #get list of session objects
        session_objs=project.get_sessions()

        #look at column_to_terms dictionary for NIDM URL for subject id  (Constants.NIDM_SUBJECTID)
        id_field=None
        for key, value in column_to_terms.items():
            if Constants.NIDM_SUBJECTID._str == column_to_terms[key]['label']:
                id_field=key
                #make sure id_field is a string for zero-padded subject ids
                #re-read data file with constraint that key field is read as string
                #df = pd.read_csv(args.csv_file,dtype={id_field : str})

        #if we couldn't find a subject ID field in column_to_terms, ask user
        if id_field is None:
            option=1
            for column in df.columns:
                print("%d: %s" %(option,column))
                option=option+1
            selection=input("Please select the subject ID field from the list above: ")
            id_field=df.columns[int(selection)-1]
            #make sure id_field is a string for zero-padded subject ids
            #re-read data file with constraint that key field is read as string
            #df = pd.read_csv(args.csv_file,dtype={id_field : str})



        #use RDFLib here for temporary graph making query easier
        rdf_graph = Graph()
        rdf_graph_parse = rdf_graph.parse(source=StringIO(project.serializeTurtle()),format='turtle')

        #find subject ids and sessions in NIDM document
        query = """SELECT DISTINCT ?session ?nidm_subj_id ?agent
                    WHERE {
                        ?activity prov:wasAssociatedWith ?agent ;
                            dct:isPartOf ?session  .
                        ?agent rdf:type prov:Agent ;
                            ndar:src_subject_id ?nidm_subj_id .
                    }"""
        #print(query)
        qres = rdf_graph_parse.query(query)


        for row in qres:
            print('%s \t %s' %(row[0],row[1]))
            #find row in CSV file with subject id matching agent from NIDM file

            #csv_row = df.loc[df[id_field]==type(df[id_field][0])(row[1])]
            #find row in CSV file with matching subject id to the agent in the NIDM file
            #be carefull about data types...simply type-change dataframe subject id column and query to strings.
            #here we're removing the leading 0's from IDs because pandas.read_csv strips those unless you know ahead of
            #time which column is the subject id....
            csv_row = df.loc[df[id_field].astype('str').str.contains(str(row[1]).lstrip("0"))]

            #if there was data about this subject in the NIDM file already (i.e. an agent already exists with this subject id)
            #then add this CSV assessment data to NIDM file, else skip it....
            if (not (len(csv_row.index)==0)):

                #NIDM document sesssion uuid
                session_uuid = row[0]

                #temporary list of string-based URIs of session objects from API
                temp = [o.identifier._uri for o in session_objs]
                #get session object from existing NIDM file that is associated with a specific subject id
                #nidm_session = (i for i,x in enumerate([o.identifier._uri for o in session_objs]) if x == str(session_uuid))
                nidm_session = session_objs[temp.index(str(session_uuid))]
                #for nidm_session in session_objs:
                #    if nidm_session.identifier._uri == str(session_uuid):
                #add an assessment acquisition for the phenotype data to session and associate with agent
                acq=AssessmentAcquisition(session=nidm_session)
                #add acquisition entity for assessment
                acq_entity = AssessmentObject(acquisition=acq)
                #add qualified association with existing agent
                acq.add_qualified_association(person=row[2],role=Constants.NIDM_PARTICIPANT)

                #store other data from row with columns_to_term mappings
                for row_variable in csv_row:
                    #check if row_variable is subject id, if so skip it
                    if row_variable==id_field:
                        continue
                    else:
                        if not csv_row[row_variable].values[0]:
                            continue
                        #get column_to_term mapping uri and add as namespace in NIDM document
                        #provNamespace(Core.safe_string(None,string=str(row_variable)), column_to_terms[row_variable]["url"])
                        acq_entity.add_attributes({QualifiedName(provNamespace(Core.safe_string(None,string=str(row_variable)), column_to_terms[row_variable]["url"]), ""):csv_row[row_variable].values[0]})
                continue

        #serialize NIDM file
        with open(args.nidm_file,'w') as f:
            print("Writing NIDM file...")
            if args.jsonld:
                f.write(project.serializeJSONLD())
            else:
                f.write(project.serializeTurtle())

            project.save_DotGraph(str(args.nidm_file + ".png"), format="png")



    else:
        print("Creating NIDM file...")
        #If user did not choose to add this data to an existing NIDM file then create a new one for the CSV data
        #create empty project
        project=Project()

        #simply add name of file to project since we don't know anything about it
        project.add_attributes({Constants.NIDM_FILENAME:args.csv_file})


        #look at column_to_terms dictionary for NIDM URL for subject id  (Constants.NIDM_SUBJECTID)
        id_field=None
        for key, value in column_to_terms.items():
            if Constants.NIDM_SUBJECTID._str == column_to_terms[key]['label']:
                id_field=key
                #make sure id_field is a string for zero-padded subject ids
                #re-read data file with constraint that key field is read as string
                #df = pd.read_csv(args.csv_file,dtype={id_field : str})

        #if we couldn't find a subject ID field in column_to_terms, ask user
        if id_field is None:
            option=1
            for column in df.columns:
                print("%d: %s" %(option,column))
                option=option+1
            selection=input("Please select the subject ID field from the list above: ")
            id_field=df.columns[int(selection)-1]


        #iterate over rows and store in NIDM file
        for csv_index, csv_row in df.iterrows():
            #create a session object
            session=Session(project)

            #create and acquisition activity and entity
            acq=AssessmentAcquisition(session)
            acq_entity=AssessmentObject(acq)



            #store other data from row with columns_to_term mappings
            for row_variable,row_data in csv_row.iteritems():
                if not row_data:
                    continue
                #check if row_variable is subject id, if so skip it
                if row_variable==id_field:
                    #add qualified association with person
                    acq.add_qualified_association(person= acq.add_person(attributes=({Constants.NIDM_SUBJECTID:row_data})),role=Constants.NIDM_PARTICIPANT)

                    continue
                else:
                    #get column_to_term mapping uri and add as namespace in NIDM document
                    acq_entity.add_attributes({QualifiedName(provNamespace(Core.safe_string(None,string=str(row_variable)), column_to_terms[row_variable]["url"]),""):row_data})
                    #print(project.serializeTurtle())

        #serialize NIDM file
        with open(args.output_file,'w') as f:
            print("Writing NIDM file...")
            if args.jsonld:
                f.write(project.serializeJSONLD())
            else:
                f.write(project.serializeTurtle())
            if args.png:
                project.save_DotGraph(str(args.output_file + ".png"), format="png")
Esempio n. 10
0
def main(argv):
    parser = ArgumentParser(
        description='This program will convert a NIDM-Experiment RDF document \
        to a BIDS dataset.  The program will query the NIDM-Experiment document for subjects, \
        MRI scans, and associated assessments saving the MRI data to disk in an organization \
        according to the BIDS specification, metadata to a participants.tsv \
        file, the project-level metdata to a dataset_description.json file, and the \
        assessments to *.tsv/*.json file pairs in a phenotypes directory.',
        epilog='Example of use: \
        NIDM2BIDSMRI.py -nidm_file NIDM.ttl -part_fields age,gender -bids_dir BIDS'
    )

    parser.add_argument('-nidm_file',
                        dest='rdf_file',
                        required=True,
                        help="NIDM RDF file")
    parser.add_argument('-part_fields', nargs='+', dest='part_fields', required=False, \
                        help='Variables to add to BIDS participant file. Variables will be fuzzy-matched to NIDM URIs')
    parser.add_argument(
        '-anat',
        dest='anat',
        action='store_true',
        required=False,
        help="Include flag to add anatomical scans to BIDS dataset")
    parser.add_argument(
        '-func',
        dest='func',
        action='store_true',
        required=False,
        help=
        "Include flag to add functional scans + events files to BIDS dataset")
    parser.add_argument(
        '-dwi',
        dest='dwi',
        action='store_true',
        required=False,
        help="Include flag to add DWI scans + Bval/Bvec files to BIDS dataset")
    parser.add_argument('-bids_dir',
                        dest='bids_dir',
                        required=True,
                        help="Directory to store BIDS dataset")
    args = parser.parse_args()

    rdf_file = args.rdf_file
    output_directory = args.bids_dir
    # check if output directory exists, if not create it
    if not isdir(output_directory):
        mkdir(path=output_directory)

    #try to read RDF file
    print("Guessing RDF file format...")
    format_found = False
    for format in 'turtle', 'xml', 'n3', 'trix', 'rdfa':
        try:
            print("reading RDF file as %s..." % format)
            #load NIDM graph into NIDM-Exp API objects
            nidm_project = read_nidm(rdf_file)
            print("RDF file sucessfully read")
            format_found = True
            break
        except Exception:
            print("file: %s appears to be an invalid %s RDF file" %
                  (rdf_file, format))

    if not format_found:
        print(
            "File doesn't appear to be a valid RDF format supported by Python RDFLib!  Please check input file"
        )
        print("exiting...")
        exit(-1)
    #set up output directory for BIDS data
    if not os.path.isdir(output_directory):
        os.mkdir(output_directory)
    if not os.path.isdir(
            join(output_directory,
                 os.path.splitext(args.rdf_file)[0])):
        os.mkdir(join(output_directory, os.path.splitext(args.rdf_file)[0]))

    #convert Project NIDM object -> dataset_description.json file
    NIDMProject2BIDSDatasetDescriptor(
        nidm_project, join(output_directory,
                           os.path.splitext(args.rdf_file)[0]))

    #create participants.tsv file.  In BIDS datasets there is no specification for how many or which type of assessment
    #variables might be in this file.  The specification does mention a minimum participant_id which indexes each of the
    #subjects in the BIDS dataset.
    #
    #if parameter -parts_field is defined then the variables listed will be fuzzy matched to the URIs in the NIDM file
    #and added to the participants.tsv file

    #use RDFLib here for temporary graph making query easier
    rdf_graph = Graph()
    rdf_graph_parse = rdf_graph.parse(source=StringIO(
        nidm_project.serializeTurtle()),
                                      format='turtle')

    #create participants file
    CreateBIDSParticipantFile(
        rdf_graph_parse,
        join(output_directory,
             os.path.splitext(args.rdf_file)[0], "participants"),
        args.part_fields)

    # get nidm:Project prov:Location
    # first get nidm:Project UUIDs
    project_uuid = GetProjectsUUID([rdf_file], output_file=None)
    project_location = []
    for uuid in project_uuid:
        project_location.append(
            GetProjectLocation(nidm_file_list=[rdf_file], project_uuid=uuid))

    #creating BIDS hierarchy with requested scans
    if args.anat == True:

        #query NIDM document for acquisition entity "subjects" with predicate nidm:hasImageUsageType and object nidm:Anatomical
        for anat_acq in rdf_graph_parse.subjects(
                predicate=URIRef(Constants.NIDM_IMAGE_USAGE_TYPE.uri),
                object=URIRef(Constants.NIDM_MRI_ANATOMIC_SCAN.uri)):
            # first see if file exists locally.  Get nidm:Project prov:Location and append the nfo:Filename of the image
            # from the anat_acq acquisition entity.  If that file doesn't exist try the prov:Location in the anat acq
            # entity and see if we can download it from the cloud

            # get acquisition uuid from entity uuid
            anat_act = rdf_graph_parse.objects(
                subject=anat_acq, predicate=Constants.PROV['wasGeneratedBy'])
            # get participant ID with sio:Subject role in anat_acq qualified association
            part_id = GetParticipantIDFromAcquisition(
                nidm_file_list=[rdf_file], acquisition=anat_act[0])

            # make BIDS sub directory
            sub_dir = join(output_directory, "sub-" + part_id[0])
            sub_filename_base = "sub-" + part_id[0]
            if not os.path.exists(sub_dir):
                os.makedirs(sub_dir)

            # make BIDS anat directory
            if not os.path.exists(join(sub_dir, "anat")):
                os.makedirs(join(sub_dir, "anat"))

            for anat_filename in rdf_graph_parse.objects(
                    subject=anat_acq,
                    predicate=URIRef(Constants.NIDM_FILENAME.uri)):
                # check if file exists
                for location in project_location:
                    # if anatomical MRI exists in this location then copy and rename
                    if isfile(location[0] + anat_filename):
                        # copy and rename file to be BIDS compliant
                        copyfile(srd=location[0] + anat_filename,
                                 dest=join(
                                     sub_dir, "anat", sub_filename_base +
                                     splitext(anat_filename)[1]))
                        continue
                # if the file wasn't accessible locally, try with the prov:Location in the anat_acq
                for location in rdf_graph_parse.objects(
                        subject=anat_acq,
                        predicate=URIRef(Constants.PROV['Location'])):
                    # try to download the file and rename
                    ret = GetImageFromURL(location)
                    if ret == -1:
                        print(
                            "Can't download file: %s from url: %s, skipping...."
                            % (anat_filename, location))
                    else:
                        # copy temporary file to BIDS directory
                        copyfile(srd=join(ret),
                                 dest=join(output_directory, 'anat'))
                        # rename file in dest
                        move(src=join(output_directory, 'anat', basename(ret)),
                             dest=join(output_directory, 'anat',
                                       anat_filename))
Esempio n. 11
0
def main(argv):

    parser = ArgumentParser(
        description='This program contains various NIDM-Experiment utilities')
    sub = parser.add_subparsers(dest='command')
    concat = sub.add_parser(
        'concat',
        description=
        "This command will simply concatenate the supplied NIDM files into a single output"
    )
    visualize = sub.add_parser(
        'visualize',
        description=
        "This command will produce a visualization(pdf) of the supplied NIDM files"
    )
    jsonld = sub.add_parser(
        'jsonld', description="This command will save NIDM files as jsonld")

    for arg in [concat, visualize, jsonld]:
        arg.add_argument(
            '-nl',
            '--nl',
            dest="nidm_files",
            nargs="+",
            required=True,
            help="A comma separated list of NIDM files with full path")

    concat.add_argument('-o',
                        '--o',
                        dest='output_file',
                        required=True,
                        help="Merged NIDM output file name + path")
    # visualize.add_argument('-o', '--o', dest='output_file', required=True, help="Output file name+path of dot graph")

    args = parser.parse_args()

    #concatenate nidm files
    if args.command == 'concat':

        #create empty graph
        graph = Graph()
        for nidm_file in args.nidm_files:
            tmp = Graph()
            graph = graph + tmp.parse(nidm_file,
                                      format=util.guess_format(nidm_file))

        graph.serialize(args.output_file, format='turtle')

    elif args.command == 'visualize':

        for nidm_file in args.nidm_files:
            # read in nidm file
            project = read_nidm(nidm_file)

            # split path and filename for output file writing
            file_parts = os.path.split(nidm_file)

            # write graph as nidm filename + .pdf
            project.save_DotGraph(filename=os.path.join(
                file_parts[0],
                os.path.splitext(file_parts[1])[0] + '.pdf'),
                                  format='pdf')

        #create empty graph
        #graph=Graph()
        #for nidm_file in args.nidm_files:
        #     tmp = Graph()
        #     graph = graph + tmp.parse(nidm_file,format=util.guess_format(nidm_file))

        # project=read_nidm(StringIO.write(graph.serialize(format='turtle')))
        # project.save_DotGraph(filename=args.output_file+'.pdf',format='pdf')
        # WIP: Workaround because not all NIDM files only contain NIDM-E objects and so read_nidm function needs to be
        # updated for project.save_DotGraph to work...so this is a clunky workaround using the command line tool
        # rdf2dot

        # result is the standard output dot graph stream
        # write temporary file to disk and use for stats
        #temp = tempfile.NamedTemporaryFile(delete=False)
        #temp.write(graph.serialize(format='turtle'))
        #temp.close()
        #uber_nidm_file = temp.name
        #result = subprocess.run(['rdf2dot',uber_nidm_file], stdout=subprocess.PIPE)

        # now use graphviz Source to create dot graph object
        #src=Source(result)
        #src.render(args.output_file+'.pdf',view=False,format='pdf')

    elif args.command == 'jsonld':
        #create empty graph
        for nidm_file in args.nidm_files:
            project = read_nidm(nidm_file)
            #serialize to jsonld
            with open(splitext(nidm_file)[0] + ".json", 'w') as f:
                f.write(project.serializeJSONLD())
Esempio n. 12
0
def main(argv):
    parser = ArgumentParser(
        description='This program will convert a NIDM-Experiment RDF document \
        to a BIDS dataset.  The program will query the NIDM-Experiment document for subjects, \
        MRI scans, and associated assessments saving the MRI data to disk in an organization \
        according to the BIDS specification, metadata to a participants.tsv \
        file, the project-level metdata to a dataset_description.json file, and the \
        assessments to *.tsv/*.json file pairs in a phenotypes directory.',
        epilog='Example of use: \
        NIDM2BIDSMRI.py -nidm_file NIDM.ttl -part_fields age,gender -bids_dir BIDS'
    )

    parser.add_argument('-nidm_file',
                        dest='rdf_file',
                        required=True,
                        help="NIDM RDF file")
    parser.add_argument('-part_fields', nargs='+', dest='part_fields', required=False, \
                        help='Variables to add to BIDS participant file. Variables will be fuzzy-matched to NIDM URIs')
    parser.add_argument(
        '-anat',
        dest='anat',
        action='store_true',
        required=False,
        help="Include flag to add anatomical scans to BIDS dataset")
    parser.add_argument(
        '-func',
        dest='func',
        action='store_true',
        required=False,
        help=
        "Include flag to add functional scans + events files to BIDS dataset")
    parser.add_argument(
        '-dwi',
        dest='dwi',
        action='store_true',
        required=False,
        help="Include flag to add DWI scans + Bval/Bvec files to BIDS dataset")
    parser.add_argument('-bids_dir',
                        dest='bids_dir',
                        required=True,
                        help="Directory to store BIDS dataset")

    group = parser.add_mutually_exclusive_group()
    group.add_argument(
        '-no_downloads',
        dest='no_downloads',
        action='store_true',
        required=False,
        help=
        "If this flag is set then script won't attempt to download images using datalad"
        "and AWS S3.  Default behavior is files are downloaded if they don't exist locally."
    )
    group.add_argument(
        '-aws_url',
        dest='aws_url',
        required=False,
        help="This tool facilites export of "
        "user-selected information from a NIDM file to a BIDS dataset and may have to fetch images. The NIDM files contain links from"
        "the local filesystem used to convert BIDS to NIDM and possibly DataLad dataset links to the files if the"
        " original BIDS data was a DataLad dataset. Here we support 3 modes of trying to find images: (1) copy from"
        " the local directory space using the prov:Location information in the NIDM file; (2) fetch the images from"
        " a DataLad remote if the original BIDS dataset was a DataLad dataset when bids2nidm was run; (3) attempt "
        " to download the images via a AWS S3 link.  This parameter lets the user set the base AWS S3 URL to try and"
        " find the images.  Currently it supports using the URL provided here and adding the dataset id, subject id,"
        " and filename.  For example, in OpenNeuro (OpenNeuro is supported by default but will serve as an example) the base AWS S3"
        " URL is \'s3://openneuro.org\'. The URL then becomes (for example) "
        " s3://openneuro.org/ds000002/sub-06/func/sub-06_task-probabilisticclassification_run-02_bold.nii.gz where this tool"
        " has added \'ds000002/sub-06/[FILENAME] to the base AWS S3 URL.")
    parser.add_argument(
        '-dataset_string',
        dest='dataset_string',
        required=False,
        help="If -aws_url parameter is supplied"
        " this parameter (-dataset_string) is required as it will be added to the aws_baseurl to retrieve images for each"
        " subject and file.  For example, if -aws_baseurl is \'s3://davedata.org \' and -dataset_string is \'dataset1\' then"
        " the AWS S3 url for sub-1 and file sub1-task-rest_run-1_bold.nii.gz would be: "
        " \'s3://davedata.org/dataset1/sub-1/[anat | func | dwi/sub1-task-rest_run-1_bold.nii.gz\'"
    )

    args = parser.parse_args()

    # check some argument dependencies
    if args.aws_url and not args.dataset_string:
        print(
            "ERROR! You must include a -dataset_string if you supplied the -aws_baseurl.  If there is no dataset"
            " string in your AWS S3 urls then just supply -aws_baseurl with nothing after it."
        )
        print(args.print_help())
        exit(-1)

    # set up some local variables
    rdf_file = args.rdf_file
    output_directory = args.bids_dir

    # check if output directory exists, if not create it
    if not isdir(output_directory):
        mkdir(path=output_directory)

    #try to read RDF file
    print("Guessing RDF file format...")
    format_found = False
    for format in 'turtle', 'xml', 'n3', 'trix', 'rdfa':
        try:
            print("Reading RDF file as %s..." % format)
            #load NIDM graph into NIDM-Exp API objects
            nidm_project = read_nidm(rdf_file)
            # temporary save nidm_project
            with open("/Users/dbkeator/Downloads/nidm.ttl", 'w') as f:
                print(nidm_project.serializeTurtle(), file=f)
            print("RDF file sucessfully read")
            format_found = True
            break
        except Exception:
            print("File: %s appears to be an invalid %s RDF file" %
                  (rdf_file, format))

    if not format_found:
        print(
            "File doesn't appear to be a valid RDF format supported by Python RDFLib!  Please check input file"
        )
        print("exiting...")
        exit(-1)

#  if not os.path.isdir(join(output_directory,os.path.splitext(args.rdf_file)[0])):
#      os.mkdir(join(output_directory,os.path.splitext(args.rdf_file)[0]))

#convert Project NIDM object -> dataset_description.json file
    NIDMProject2BIDSDatasetDescriptor(nidm_project, output_directory)

    #create participants.tsv file.  In BIDS datasets there is no specification for how many or which type of assessment
    #variables might be in this file.  The specification does mention a minimum participant_id which indexes each of the
    #subjects in the BIDS dataset.
    #
    #if parameter -parts_field is defined then the variables listed will be fuzzy matched to the URIs in the NIDM file
    #and added to the participants.tsv file

    #use RDFLib here for temporary graph making query easier
    rdf_graph = Graph()
    rdf_graph_parse = rdf_graph.parse(source=StringIO(
        nidm_project.serializeTurtle()),
                                      format='turtle')

    # temporary write out turtle file for testing
    # rdf_graph_parse.serialize(destination="/Users/dbkeator/Downloads/ds000117.ttl", format='turtle')

    #create participants file
    CreateBIDSParticipantFile(rdf_graph_parse,
                              join(output_directory, "participants"),
                              args.part_fields)

    # get nidm:Project prov:Location
    # first get nidm:Project UUIDs
    project_uuid = GetProjectsUUID([rdf_file], output_file=None)
    project_location = []
    for uuid in project_uuid:
        project_location.append(
            GetProjectLocation(nidm_file_list=[rdf_file], project_uuid=uuid))

    #creating BIDS hierarchy with requested scans
    if args.anat == True:
        ProcessFiles(graph=rdf_graph_parse,
                     scan_type=Constants.NIDM_MRI_ANATOMIC_SCAN.uri,
                     output_directory=output_directory,
                     project_location=project_location,
                     args=args)

    if args.func == True:
        ProcessFiles(graph=rdf_graph_parse,
                     scan_type=Constants.NIDM_MRI_FUNCTION_SCAN.uri,
                     output_directory=output_directory,
                     project_location=project_location,
                     args=args)
    if args.dwi == True:
        ProcessFiles(graph=rdf_graph_parse,
                     scan_type=Constants.NIDM_MRI_DIFFUSION_TENSOR.uri,
                     output_directory=output_directory,
                     project_location=project_location,
                     args=args)