Ejemplo n.º 1
0
def test_GetParticipantIDs():

    kwargs = {
        Constants.NIDM_PROJECT_NAME: "FBIRN_PhaseII",
        Constants.NIDM_PROJECT_IDENTIFIER: 9610,
        Constants.NIDM_PROJECT_DESCRIPTION: "Test investigation"
    }
    project = Project(uuid="_123456", attributes=kwargs)
    session = Session(uuid="_13579", project=project)
    acq = Acquisition(uuid="_15793", session=session)
    acq2 = Acquisition(uuid="_15795", session=session)

    person = acq.add_person(attributes=({Constants.NIDM_SUBJECTID: "9999"}))
    acq.add_qualified_association(person=person,
                                  role=Constants.NIDM_PARTICIPANT)

    person2 = acq2.add_person(attributes=({Constants.NIDM_SUBJECTID: "8888"}))
    acq2.add_qualified_association(person=person2,
                                   role=Constants.NIDM_PARTICIPANT)

    #save a turtle file
    with open("test.ttl", 'w') as f:
        f.write(project.serializeTurtle())

    participant_list = Query.GetParticipantIDs(["test.ttl"])

    remove("test.ttl")
    assert (participant_list['ID'].str.contains('9999').any())
    assert (participant_list['ID'].str.contains('8888').any())
Ejemplo n.º 2
0
def saveTestFile(file_name, data):
    project = Project(uuid="_123_" + file_name, attributes=data)

    # save a turtle file
    with open(file_name, 'w') as f:
        f.write(project.serializeTurtle())
    return "nidm:_123_{}".format(file_name)
Ejemplo n.º 3
0
def test_1(tmpdir):
    tmpdir.chdir()

    project = Project()

    #save a turtle file
    with open("test.ttl", 'w') as f:
        f.write(project.serializeTurtle())
Ejemplo n.º 4
0
def test_2(tmpdir):
    tmpdir.chdir()

    kwargs = {
        Constants.NIDM_PROJECT_NAME: "FBIRN_PhaseII",
        Constants.NIDM_PROJECT_IDENTIFIER: 9610,
        Constants.NIDM_PROJECT_DESCRIPTION: "Test investigation"
    }
    project = Project(attributes=kwargs)

    with open("test.ttl", 'w') as f:
        f.write(project.serializeTurtle())
Ejemplo n.º 5
0
def test_GetProjects():

    kwargs={Constants.NIDM_PROJECT_NAME:"FBIRN_PhaseII",Constants.NIDM_PROJECT_IDENTIFIER:9610,Constants.NIDM_PROJECT_DESCRIPTION:"Test investigation"}
    project = Project(uuid="_123456",attributes=kwargs)


    #save a turtle file
    with open("test.ttl",'w') as f:
        f.write(project.serializeTurtle())

    project_list = Query.GetProjectsUUID(["test.ttl"])

    assert URIRef(Constants.NIDM + "_123456") in project_list
Ejemplo n.º 6
0
def test_sessions_3(tmpdir):
    tmpdir.chdir()

    project1 = Project()
    project2 = Project()

    session1 = Session(project1)
    session2 = Session(project2)

    project1.add_sessions(session1)
    project1.add_sessions(session2)

    assert len(project1.sessions) == 2
    assert session2.label == project1.sessions[1].label
    assert session1.label == project1.sessions[0].label
Ejemplo n.º 7
0
def test_project_trig_serialization():

    outfile = StringIO()

    kwargs = {
        Constants.NIDM_PROJECT_NAME: "FBIRN_PhaseII",
        Constants.NIDM_PROJECT_IDENTIFIER: 9610,
        Constants.NIDM_PROJECT_DESCRIPTION: "Test investigation"
    }
    project = Project(uuid="_123456", attributes=kwargs)

    #save as trig file with graph identifier Constants.NIDM_Project
    test = project.serializeTrig(identifier=Constants.NIIRI["_996"])
    outfile.write(test)
    outfile.seek(0)
Ejemplo n.º 8
0
def test_project_emptygraph():
    # creating project without parameters
    proj = Project(empty_graph=True)

    # checking if we created ProvDocument
    assert type(proj.bundle) is prov.model.ProvDocument

    # checking graph namespace
    namesp = [i.prefix for i in proj.graph.namespaces]
    assert namesp == ["nidm"]

    # checking type
    proj_type = proj.get_type()
    assert eval(proj_type.provn_representation()) == 'prov:Activity'

    assert len(proj.graph.get_records()) == 1
Ejemplo n.º 9
0
def test_sessions_2(tmpdir):
    tmpdir.chdir()

    project = Project()
    assert project.sessions == []

    session1 = Session(project)
    assert project.sessions[0].label == session1.label
Ejemplo n.º 10
0
def test_jsonld_exports():

    kwargs = {
        Constants.NIDM_PROJECT_NAME: "FBIRN_PhaseII",
        Constants.NIDM_PROJECT_IDENTIFIER: 9610,
        Constants.NIDM_PROJECT_DESCRIPTION: "Test investigation"
    }
    project = Project(uuid="_123456", attributes=kwargs)

    #save a turtle file
    with open("test.json", 'w') as f:
        f.write(project.serializeJSONLD())

    #load in JSON file
    with open("test.json") as json_file:
        data = json.load(json_file)

    assert (data["Identifier"]['@value'] == "9610")
Ejemplo n.º 11
0
def test_project_noparameters():
    # creating project without parameters
    proj = Project()

    # checking if we created ProvDocument
    assert type(proj.bundle) is Constants.NIDMDocument
    assert issubclass(type(proj.bundle), prov.model.ProvDocument)

    # checking graph namespace
    const_l = list(Constants.namespaces)
    namesp = [i.prefix for i in proj.graph.namespaces]
    assert sorted(const_l) == sorted(namesp)

    # checking type
    proj_type = proj.get_type()
    assert eval(proj_type.provn_representation()) == 'prov:Activity'

    # checking length of graph records; it doesn work if all tests are run
    assert len(proj.graph.get_records()) == 1
Ejemplo n.º 12
0
def test_GetProjectInstruments():
    kwargs = {
        Constants.NIDM_PROJECT_NAME: "FBIRN_PhaseII",
        Constants.NIDM_PROJECT_IDENTIFIER: 9610,
        Constants.NIDM_PROJECT_DESCRIPTION: "Test investigation"
    }
    proj_uuid = "_123456gpi"
    project = Project(uuid=proj_uuid, attributes=kwargs)

    session = Session(project)
    acq = AssessmentAcquisition(session)

    kwargs = {
        pm.PROV_TYPE:
        pm.QualifiedName(pm.Namespace("nidm", Constants.NIDM),
                         "NorthAmericanAdultReadingTest")
    }
    acq_obj = AssessmentObject(acq, attributes=kwargs)

    acq2 = AssessmentAcquisition(session)

    kwargs = {
        pm.PROV_TYPE:
        pm.QualifiedName(pm.Namespace("nidm", Constants.NIDM),
                         "PositiveAndNegativeSyndromeScale")
    }
    acq_obj2 = AssessmentObject(acq2, attributes=kwargs)

    #save a turtle file
    with open("test_gpi.ttl", 'w') as f:
        f.write(project.serializeTurtle())

    assessment_list = Query.GetProjectInstruments(["test_gpi.ttl"], proj_uuid)

    remove("test_gpi.ttl")

    assert Constants.NIDM + "NorthAmericanAdultReadingTest" in [
        str(x) for x in assessment_list['assessment_type'].to_list()
    ]
    assert Constants.NIDM + "PositiveAndNegativeSyndromeScale" in [
        str(x) for x in assessment_list['assessment_type'].to_list()
    ]
Ejemplo n.º 13
0
def test_uri_project_id():

    kwargs = {
        Constants.NIDM_PROJECT_NAME: "FBIRN_PhaseII",
        Constants.NIDM_PROJECT_IDENTIFIER: 9610,
        Constants.NIDM_PROJECT_DESCRIPTION: "1234356 Test investigation"
    }
    project = Project(uuid="_123456", attributes=kwargs)
    #save a turtle file
    with open("uri2test.ttl", 'w') as f:
        f.write(project.serializeTurtle())

    kwargs = {
        Constants.NIDM_PROJECT_NAME: "FBIRN_PhaseIII",
        Constants.NIDM_PROJECT_IDENTIFIER: 1200,
        Constants.NIDM_PROJECT_DESCRIPTION: "Test investigation2"
    }
    project = Project(uuid="_654321", attributes=kwargs)
    #save a turtle file
    with open("uri2test2.ttl", 'w') as f:
        f.write(project.serializeTurtle())

    result = restParser(['uri2test.ttl', 'uri2test2.ttl'],
                        '/projects/nidm:_123456')

    pp = pprint.PrettyPrinter()
    pp.pprint(result)

    assert type(result) == dict
    assert result["dct:description"] == "1234356 Test investigation"
Ejemplo n.º 14
0
def test_uri_project_list():

    import uuid

    kwargs={Constants.NIDM_PROJECT_NAME:"FBIRN_PhaseII",Constants.NIDM_PROJECT_IDENTIFIER:9610,Constants.NIDM_PROJECT_DESCRIPTION:"Test investigation"}
    proj1_uuid = str(uuid.uuid1())
    proj2_uuid = str(uuid.uuid1())
    project = Project(uuid=proj1_uuid,attributes=kwargs)
    #save a turtle file
    with open("uritest.ttl",'w') as f:
        f.write(project.serializeTurtle())

    kwargs={Constants.NIDM_PROJECT_NAME:"FBIRN_PhaseIII",Constants.NIDM_PROJECT_IDENTIFIER:1200,Constants.NIDM_PROJECT_DESCRIPTION:"Test investigation2"}
    project = Project(uuid=proj2_uuid,attributes=kwargs)
    #save a turtle file
    with open("uritest2.ttl",'w') as f:
        f.write(project.serializeTurtle())

    restParser = RestParser()
    result = restParser.run(['uritest.ttl', 'uritest2.ttl'], '/projects')


    project_uuids = []

    for uuid in result:
        project_uuids.append(uuid)

    assert type(result) == list
    assert len(project_uuids) >= 2
    assert proj1_uuid in project_uuids
    assert proj2_uuid in project_uuids

    os.remove("uritest.ttl")
    os.remove("uritest2.ttl")
Ejemplo n.º 15
0
def test_GetProjectMetadata():

    kwargs={Constants.NIDM_PROJECT_NAME:"FBIRN_PhaseII",Constants.NIDM_PROJECT_IDENTIFIER:9610,Constants.NIDM_PROJECT_DESCRIPTION:"Test investigation"}
    project = Project(uuid="_123456",attributes=kwargs)

    session = Session(project)
    acq = AssessmentAcquisition(session)

    kwargs={Constants.NIDM_HANDEDNESS:"Left", Constants.NIDM_AGE:"90"}
    acq_obj = AssessmentObject(acq,kwargs)

    #save a turtle file
    with open("test.ttl",'w') as f:
        f.write(project.serializeTurtle())

    kwargs={Constants.NIDM_PROJECT_NAME:"FBIRN_PhaseIII",Constants.NIDM_PROJECT_IDENTIFIER:1200,Constants.NIDM_PROJECT_DESCRIPTION:"Test investigation"}
    project = Project(uuid="_654321",attributes=kwargs)

    session = Session(project)
    acq = AssessmentAcquisition(session)

    kwargs={Constants.NIDM_HANDEDNESS:"Right", Constants.NIDM_AGE:"75"}
    acq_obj = AssessmentObject(acq,kwargs)

    #save a turtle file
    with open("test2.ttl",'w') as f:
        f.write(project.serializeTurtle())


    test = Query.GetProjectMetadata(["test.ttl", "test2.ttl"])
Ejemplo n.º 16
0
def test_project_att():
    # creating project without parameters
    proj = Project(
        attributes={
            prov.model.QualifiedName(Constants.NIDM, "title"): "MyPRoject"
        })

    # checking if we created ProvDocument
    assert type(proj.bundle) is Constants.NIDMDocument
    assert issubclass(type(proj.bundle), prov.model.ProvDocument)

    # checking graph namespace
    const_l = list(Constants.namespaces)
    namesp = [i.prefix for i in proj.graph.namespaces]
    assert sorted(const_l +
                  [rdflib.term.URIRef('http://purl.org/nidash/nidm#prefix')]
                  ) == sorted(namesp)

    # checking type
    proj_type = proj.get_type()
    assert eval(proj_type.provn_representation()) == 'prov:Activity'

    # checking length of graph records; it doesn work if all tests are run
    assert len(proj.graph.get_records()) == 1
Ejemplo n.º 17
0
def test_sessions_1(tmpdir):
    tmpdir.chdir()

    project = Project()
    assert project.sessions == []

    session1 = Session(project)
    project.add_sessions(session1)
    assert session1.label == project.sessions[0].label

    session2 = Session(project)
    project.add_sessions(session2)
    assert len(project.sessions) == 2
    assert session2.label == project.sessions[1].label
Ejemplo n.º 18
0
def makeProjectTestFile2(filename):
    DCTYPES = Namespace("http://purl.org/dc/dcmitype/")

    kwargs = {
        Constants.NIDM_PROJECT_NAME: "TEST B",  # this is the "title"
        Constants.NIDM_PROJECT_IDENTIFIER: 1234,
        Constants.NIDM_PROJECT_DESCRIPTION: "More Scans",
        Constants.NIDM_FILENAME: "testfile2.ttl",
        Constants.NIDM_PROJECT_LICENSE: "Creative Commons",
        Constants.NIDM_PROJECT_SOURCE: "Other",
        Constants.NIDM_HAD_NUMERICAL_VALUE: "numval???",
        Constants.NIDM_BATH_SOLUTION: "bath",
        Constants.NIDM_CELL_TYPE: "ctype",
        Constants.NIDM_CHANNEL_NUMBER: "5",
        Constants.NIDM_ELECTRODE_IMPEDANCE: ".01",
        Constants.NIDM_GROUP_LABEL: "group 123",
        Constants.NIDM_HOLLOW_ELECTRODE_SOLUTION: "water",
        Constants.NIDM_HAD_IMAGE_CONTRACT_TYPE: "off",
        Constants.NIDM_HAD_IMAGE_USAGE_TYPE: "abcd",
        Constants.NIDM_NUBMER_OF_CHANNELS: "11",
        Constants.NIDM_APPLIED_FILTER: "on",
        Constants.NIDM_SOLUTION_FLOW_SPEED: "2.8",
        Constants.NIDM_RECORDING_LOCATION: "lab"
    }
    project = Project(uuid="_123_" + filename, attributes=kwargs)
    s1 = Session(project)

    a1 = AssessmentAcquisition(session=s1)
    # = s1.add_acquisition("a1", attributes={"http://ncicb.nci.nih.gov/xml/owl/EVS/Thesaurus.owl#Age" : 22})

    p1 = a1.add_person("p1",
                       attributes={
                           Constants.NIDM_GIVEN_NAME: "George",
                           Constants.NIDM_AGE: 22
                       })
    a1.add_qualified_association(person=p1, role=Constants.NIDM_PARTICIPANT)

    return saveProject(filename, project)
Ejemplo n.º 19
0
def test_uri_project_id():

    kwargs={Constants.NIDM_PROJECT_NAME:"FBIRN_PhaseII",Constants.NIDM_PROJECT_IDENTIFIER:9610,Constants.NIDM_PROJECT_DESCRIPTION:"1234356 Test investigation"}
    project = Project(uuid="_123456",attributes=kwargs)
    #save a turtle file
    with open("uri2test.ttl",'w') as f:
        f.write(project.serializeTurtle())

    kwargs={Constants.NIDM_PROJECT_NAME:"FBIRN_PhaseIII",Constants.NIDM_PROJECT_IDENTIFIER:1200,Constants.NIDM_PROJECT_DESCRIPTION:"Test investigation2"}
    project = Project(uuid="_654321",attributes=kwargs)
    #save a turtle file
    with open("uri2test2.ttl",'w') as f:
        f.write(project.serializeTurtle())

    result = restParser(['uri2test.ttl', 'uri2test2.ttl'], '/projects/{}_123456'.format(Query.matchPrefix(Constants.NIIRI)) )

    assert type(result) == dict
    assert result["dct:description"] == "1234356 Test investigation"

    os.remove("uri2test.ttl")
    os.remove("uri2test2.ttl")
Ejemplo n.º 20
0
def test_uri_project():

    kwargs = {
        Constants.NIDM_PROJECT_NAME: "FBIRN_PhaseII",
        Constants.NIDM_PROJECT_IDENTIFIER: 9610,
        Constants.NIDM_PROJECT_DESCRIPTION: "Test investigation"
    }
    project = Project(uuid="_123456", attributes=kwargs)
    #save a turtle file
    with open("uritest.ttl", 'w') as f:
        f.write(project.serializeTurtle())

    kwargs = {
        Constants.NIDM_PROJECT_NAME: "FBIRN_PhaseIII",
        Constants.NIDM_PROJECT_IDENTIFIER: 1200,
        Constants.NIDM_PROJECT_DESCRIPTION: "Test investigation2"
    }
    project = Project(uuid="_654321", attributes=kwargs)
    #save a turtle file
    with open("uritest2.ttl", 'w') as f:
        f.write(project.serializeTurtle())

    result = restParser(['uritest.ttl', 'uritest2.ttl'], '/projects')

    print(result)

    project_uuids = []

    for uuid in result:
        project_uuids.append(uuid)

    assert type(result) == list
    assert len(project_uuids) == 2
    assert str(Constants.NIDM_URL) + "_123456" in project_uuids
    assert str(Constants.NIDM_URL) + "_654321" in project_uuids

    os.remove("uritest.ttl")
    os.remove("uritest2.ttl")
Ejemplo n.º 21
0
def test_GetProjectMetadata():

    kwargs = {
        Constants.NIDM_PROJECT_NAME: "FBIRN_PhaseII",
        Constants.NIDM_PROJECT_IDENTIFIER: 9610,
        Constants.NIDM_PROJECT_DESCRIPTION: "Test investigation"
    }
    project = Project(uuid="_123456", attributes=kwargs)

    #save a turtle file
    with open("test.ttl", 'w') as f:
        f.write(project.serializeTurtle())

    kwargs = {
        Constants.NIDM_PROJECT_NAME: "FBIRN_PhaseIII",
        Constants.NIDM_PROJECT_IDENTIFIER: 1200,
        Constants.NIDM_PROJECT_DESCRIPTION: "Test investigation2"
    }
    project = Project(uuid="_654321", attributes=kwargs)

    #save a turtle file
    with open("test2.ttl", 'w') as f:
        f.write(project.serializeTurtle())
Ejemplo n.º 22
0
def test_GetProjectMetadata():

    kwargs = {
        Constants.NIDM_PROJECT_NAME: "FBIRN_PhaseII",
        Constants.NIDM_PROJECT_IDENTIFIER: 9610,
        Constants.NIDM_PROJECT_DESCRIPTION: "Test investigation"
    }
    project = Project(uuid="_123456", attributes=kwargs)

    #save a turtle file
    with open("test_gpm.ttl", 'w') as f:
        f.write(project.serializeTurtle())

    kwargs = {
        Constants.NIDM_PROJECT_NAME: "FBIRN_PhaseIII",
        Constants.NIDM_PROJECT_IDENTIFIER: 1200,
        Constants.NIDM_PROJECT_DESCRIPTION: "Test investigation2"
    }
    project = Project(uuid="_654321", attributes=kwargs)

    #save a turtle file
    with open("test2_gpm.ttl", 'w') as f:
        f.write(project.serializeTurtle())

    #WIP test = Query.GetProjectMetadata(["test.ttl", "test2.ttl"])

    #assert URIRef(Constants.NIDM + "_654321") in test
    #assert URIRef(Constants.NIDM + "_123456") in test
    #assert URIRef(Constants.NIDM_PROJECT_IDENTIFIER + "1200") in test
    #assert URIRef(Constants.NIDM_PROJECT_IDENTIFIER + "9610") in test
    #assert URIRef((Constants.NIDM_PROJECT_NAME + "FBIRN_PhaseII")) in test
    #assert URIRef((Constants.NIDM_PROJECT_NAME + "FBIRN_PhaseIII")) in test
    #assert URIRef((Constants.NIDM_PROJECT_DESCRIPTION + "Test investigation")) in test
    #assert URIRef((Constants.NIDM_PROJECT_DESCRIPTION + "Test investigation2")) in test

    remove("test_gpm.ttl")
    remove("test2_gpm.ttl")
Ejemplo n.º 23
0
def main(argv):
    parser = ArgumentParser(description='This program will load in a CSV file and iterate over the header \
     variable names performing an elastic search of https://scicrunch.org/ for NIDM-ReproNim \
     tagged terms that fuzzy match the variable names.  The user will then interactively pick \
     a term to associate with the variable name.  The resulting annotated CSV data will \
     then be written to a NIDM data file.  Note, you must obtain an API key to Interlex by signing up \
     for an account at scicrunch.org then going to My Account and API Keys.  Then set the environment \
     variable INTERLEX_API_KEY with your key.')

    parser.add_argument('-csv', dest='csv_file', required=True, help="Full path to CSV file to convert")
    # parser.add_argument('-ilxkey', dest='key', required=True, help="Interlex/SciCrunch API key to use for query")
    parser.add_argument('-json_map', dest='json_map',required=False,help="Full path to user-suppled JSON file containing variable-term mappings.")
    parser.add_argument('-nidm', dest='nidm_file', required=False, help="Optional full path of NIDM file to add CSV->NIDM converted graph to")
    parser.add_argument('-no_concepts', action='store_true', required=False, help='If this flag is set then no concept associations will be'
                                'asked of the user.  This is useful if you already have a -json_map specified without concepts and want to'
                                'simply run this program to get a NIDM file with user interaction to associate concepts.')
    # parser.add_argument('-owl', action='store_true', required=False, help='Optionally searches NIDM OWL files...internet connection required')
    # parser.add_argument('-png', action='store_true', required=False, help='Optional flag, when set a PNG image file of RDF graph will be produced')
    # parser.add_argument('-jsonld', action='store_true', required=False, help='Optional flag, when set NIDM files are saved as JSON-LD instead of TURTLE')
    parser.add_argument('-log','--log', dest='logfile',required=False, default=None, help="full path to directory to save log file. Log file name is csv2nidm_[arg.csv_file].log")
    parser.add_argument('-out', dest='output_file', required=True, help="Full path with filename to save NIDM file")
    args = parser.parse_args()



    #open CSV file and load into
    df = pd.read_csv(args.csv_file)
    #temp = csv.reader(args.csv_file)
    #df = pd.DataFrame(temp)

    #maps variables in CSV file to terms
    #if args.owl is not False:
    #    column_to_terms = map_variables_to_terms(df=df, apikey=args.key, directory=dirname(args.output_file), output_file=args.output_file, json_file=args.json_map, owl_file=args.owl)
    #else:
    # if user did not specify -no_concepts then associate concepts interactively with user
    if not args.no_concepts:
        column_to_terms, cde = map_variables_to_terms(df=df,  assessment_name=basename(args.csv_file),directory=dirname(args.output_file), output_file=args.output_file, json_file=args.json_map)
    # run without concept mappings
    else:
        column_to_terms, cde = map_variables_to_terms(df=df, assessment_name=basename(args.csv_file),
                                                      directory=dirname(args.output_file), output_file=args.output_file,
                                                      json_file=args.json_map, associate_concepts=False)

    if args.logfile is not None:
        logging.basicConfig(filename=join(args.logfile,'csv2nidm_' + os.path.splitext(os.path.basename(args.csv_file))[0] + '.log'), level=logging.DEBUG)
        # add some logging info
        logging.info("csv2nidm %s" %args)


    #If user has added an existing NIDM file as a command line parameter then add to existing file for subjects who exist in the NIDM file
    if args.nidm_file:
        print("Adding to NIDM file...")
        #read in NIDM file
        project = read_nidm(args.nidm_file)
        #get list of session objects
        session_objs=project.get_sessions()

        #look at column_to_terms dictionary for NIDM URL for subject id  (Constants.NIDM_SUBJECTID)
        id_field=None
        for key, value in column_to_terms.items():
            if Constants.NIDM_SUBJECTID._str == column_to_terms[key]['label']:
                key_tuple = eval(key)
                #id_field=key
                id_field = key_tuple.variable
                #make sure id_field is a string for zero-padded subject ids
                #re-read data file with constraint that key field is read as string
                df = pd.read_csv(args.csv_file,dtype={id_field : str})
                break

        #if we couldn't find a subject ID field in column_to_terms, ask user
        if id_field is None:
            option=1
            for column in df.columns:
                print("%d: %s" %(option,column))
                option=option+1
            selection=input("Please select the subject ID field from the list above: ")
            # Make sure user selected one of the options.  If not present user with selection input again
            while (not selection.isdigit()) or (int(selection) > int(option)):
                # Wait for user input
                selection = input("Please select the subject ID field from the list above: \t" % option)
            id_field=df.columns[int(selection)-1]
            #make sure id_field is a string for zero-padded subject ids
            #re-read data file with constraint that key field is read as string
            df = pd.read_csv(args.csv_file,dtype={id_field : str})



        #use RDFLib here for temporary graph making query easier
        rdf_graph = Graph()
        rdf_graph.parse(source=StringIO(project.serializeTurtle()),format='turtle')

        print("Querying for existing participants in NIDM graph....")
        #find subject ids and sessions in NIDM document
        query = """SELECT DISTINCT ?session ?nidm_subj_id ?agent
                    WHERE {
                        ?activity prov:wasAssociatedWith ?agent ;
                            dct:isPartOf ?session  .
                        ?agent rdf:type prov:Agent ;
                            ndar:src_subject_id ?nidm_subj_id .
                    }"""
        #print(query)
        qres = rdf_graph.query(query)


        for row in qres:
            logging.info("found existing participant %s \t %s" %(row[0],row[1]))
            #find row in CSV file with subject id matching agent from NIDM file

            #csv_row = df.loc[df[id_field]==type(df[id_field][0])(row[1])]
            #find row in CSV file with matching subject id to the agent in the NIDM file
            #be carefull about data types...simply type-change dataframe subject id column and query to strings.
            #here we're removing the leading 0's from IDs because pandas.read_csv strips those unless you know ahead of
            #time which column is the subject id....
            csv_row = df.loc[df[id_field].astype('str').str.contains(str(row[1]).lstrip("0"))]

            #if there was data about this subject in the NIDM file already (i.e. an agent already exists with this subject id)
            #then add this CSV assessment data to NIDM file, else skip it....
            if (not (len(csv_row.index)==0)):

                #NIDM document sesssion uuid
                session_uuid = row[0]

                #temporary list of string-based URIs of session objects from API
                temp = [o.identifier._uri for o in session_objs]
                #get session object from existing NIDM file that is associated with a specific subject id
                #nidm_session = (i for i,x in enumerate([o.identifier._uri for o in session_objs]) if x == str(session_uuid))
                nidm_session = session_objs[temp.index(str(session_uuid))]
                #for nidm_session in session_objs:
                #    if nidm_session.identifier._uri == str(session_uuid):
                #add an assessment acquisition for the phenotype data to session and associate with agent
                acq=AssessmentAcquisition(session=nidm_session)
                #add acquisition entity for assessment
                acq_entity = AssessmentObject(acquisition=acq)
                #add qualified association with existing agent
                acq.add_qualified_association(person=row[2],role=Constants.NIDM_PARTICIPANT)

                # add git-annex info if exists
                num_sources = addGitAnnexSources(obj=acq_entity,filepath=args.csv_file,bids_root=dirname(args.csv_file))
                # if there aren't any git annex sources then just store the local directory information
                if num_sources == 0:
                    # WIP: add absolute location of BIDS directory on disk for later finding of files
                    acq_entity.add_attributes({Constants.PROV['Location']:"file:/" + args.csv_file})

                # store file to acq_entity
                acq_entity.add_attributes({Constants.NIDM_FILENAME:basename(args.csv_file)})

                #store other data from row with columns_to_term mappings
                for row_variable in csv_row:
                    #check if row_variable is subject id, if so skip it
                    if row_variable==id_field:
                        continue
                    else:
                        if not csv_row[row_variable].values[0]:
                            continue


                        add_attributes_with_cde(acq_entity, cde, row_variable, csv_row[row_variable].values[0])



                continue

        print ("Adding CDEs to graph....")
        # convert to rdflib Graph and add CDEs
        rdf_graph = Graph()
        rdf_graph.parse(source=StringIO(project.serializeTurtle()),format='turtle')
        rdf_graph = rdf_graph + cde

        print("Backing up original NIDM file...")
        copy2(src=args.nidm_file,dst=args.nidm_file+".bak")
        print("Writing NIDM file....")
        rdf_graph.serialize(destination=args.nidm_file,format='turtle')

    else:
        print("Creating NIDM file...")
        #If user did not choose to add this data to an existing NIDM file then create a new one for the CSV data
        #create empty project
        project=Project()

        #simply add name of file to project since we don't know anything about it
        project.add_attributes({Constants.NIDM_FILENAME:args.csv_file})


        #look at column_to_terms dictionary for NIDM URL for subject id  (Constants.NIDM_SUBJECTID)
        id_field=None
        for key, value in column_to_terms.items():
            # using skos:sameAs relationship to associate subject identifier variable from csv with a known term
            # for subject IDs
            if 'sameAs' in column_to_terms[key]:
                if Constants.NIDM_SUBJECTID.uri == column_to_terms[key]['sameAs']:
                    key_tuple = eval(key)
                    id_field=key_tuple.variable
                    #make sure id_field is a string for zero-padded subject ids
                    #re-read data file with constraint that key field is read as string
                    df = pd.read_csv(args.csv_file,dtype={id_field : str})
                    break

        #if we couldn't find a subject ID field in column_to_terms, ask user
        if id_field is None:
            option=1
            for column in df.columns:
                print("%d: %s" %(option,column))
                option=option+1
            selection=input("Please select the subject ID field from the list above: ")
            # Make sure user selected one of the options.  If not present user with selection input again
            while (not selection.isdigit()) or (int(selection) > int(option)):
                # Wait for user input
                selection = input("Please select the subject ID field from the list above: \t" % option)
            id_field=df.columns[int(selection)-1]
            #make sure id_field is a string for zero-padded subject ids
            #re-read data file with constraint that key field is read as string
            df = pd.read_csv(args.csv_file,dtype={id_field : str})


        #iterate over rows and store in NIDM file
        for csv_index, csv_row in df.iterrows():
            #create a session object
            session=Session(project)

            #create and acquisition activity and entity
            acq=AssessmentAcquisition(session)
            acq_entity=AssessmentObject(acq)

            #create prov:Agent for subject
            #acq.add_person(attributes=({Constants.NIDM_SUBJECTID:row['participant_id']}))

            # add git-annex info if exists
            num_sources = addGitAnnexSources(obj=acq_entity,filepath=args.csv_file,bids_root=os.path.dirname(args.csv_file))
            # if there aren't any git annex sources then just store the local directory information
            if num_sources == 0:
                # WIP: add absolute location of BIDS directory on disk for later finding of files
                acq_entity.add_attributes({Constants.PROV['Location']:"file:/" + args.csv_file})

            # store file to acq_entity
            acq_entity.add_attributes({Constants.NIDM_FILENAME : basename(args.csv_file)})


            #store other data from row with columns_to_term mappings
            for row_variable,row_data in csv_row.iteritems():
                if not row_data:
                    continue

                #check if row_variable is subject id, if so skip it
                if row_variable==id_field:
                    ### WIP: Check if agent already exists with the same ID.  If so, use it else create a new agent

                    #add qualified association with person
                    acq.add_qualified_association(person= acq.add_person(attributes=({Constants.NIDM_SUBJECTID:str(row_data)})),role=Constants.NIDM_PARTICIPANT)

                    continue
                else:
                    add_attributes_with_cde(acq_entity, cde, row_variable, row_data)

                    #print(project.serializeTurtle())

        # convert to rdflib Graph and add CDEs
        rdf_graph = Graph()
        rdf_graph.parse(source=StringIO(project.serializeTurtle()),format='turtle')
        rdf_graph = rdf_graph + cde

        print("Writing NIDM file....")
        rdf_graph.serialize(destination=args.output_file,format='turtle')
Ejemplo n.º 24
0
def main(argv):
    parser = ArgumentParser(
        description=
        'This program will convert a BIDS MRI dataset to a NIDM-Experiment \
        RDF document.  It will parse phenotype information and simply store variables/values \
        and link to the associated json data dictionary file.')

    parser.add_argument('-d',
                        dest='directory',
                        required=True,
                        help="Path to BIDS dataset directory")
    parser.add_argument('-o',
                        dest='outputfile',
                        default="nidm.ttl",
                        help="NIDM output turtle file")
    args = parser.parse_args()

    directory = args.directory
    outputfile = args.outputfile

    #importlib.reload(sys)
    #sys.setdefaultencoding('utf8')

    #Parse dataset_description.json file in BIDS directory
    with open(os.path.join(directory,
                           'dataset_description.json')) as data_file:
        dataset = json.load(data_file)
    #print(dataset_data)

    #create project / nidm-exp doc
    project = Project()

    #add various attributes if they exist in BIDS dataset
    for key in dataset:
        #print(key)
        #if key from dataset_description file is mapped to term in BIDS_Constants.py then add to NIDM object
        if key in BIDS_Constants.dataset_description:
            if type(dataset[key]) is list:
                project.add_attributes({
                    BIDS_Constants.dataset_description[key]:
                    "".join(dataset[key])
                })
            else:
                project.add_attributes(
                    {BIDS_Constants.dataset_description[key]: dataset[key]})

    #create empty dictinary for sessions where key is subject id and used later to link scans to same session as demographics
    session = {}
    #Parse participants.tsv file in BIDS directory and create study and acquisition objects
    with open(os.path.join(directory, 'participants.tsv')) as csvfile:
        participants_data = csv.DictReader(csvfile, delimiter='\t')
        #print(participants_data.fieldnames)
        for row in participants_data:
            #create session object for subject to be used for participant metadata and image data
            #parse subject id from "sub-XXXX" string
            subjid = row['participant_id'].split("-")
            session[subjid[1]] = Session(project)

            #add acquisition object
            acq = Acquisition(session=session[subjid[1]])
            acq_entity = DemographicsAcquisitionObject(acquisition=acq)
            participant = acq.add_person(role=Constants.NIDM_PARTICIPANT,
                                         attributes=({
                                             Constants.NIDM_SUBJECTID:
                                             row['participant_id']
                                         }))

            for key, value in row.items():
                #for now only convert variables in participants.tsv file who have term mappings in BIDS_Constants.py
                if key in BIDS_Constants.participants:
                    acq_entity.add_attributes(
                        {BIDS_Constants.participants[key]: value})

    #get BIDS layout
    bids_layout = BIDSLayout(directory)

    #create acquisition objects for each scan for each subject

    #loop through all subjects in dataset
    for subject_id in bids_layout.get_subjects():
        #skip .git directories...added to support datalad datasets
        if subject_id.startswith("."):
            continue
        for file_tpl in bids_layout.get(subject=subject_id,
                                        extensions=['.nii', '.nii.gz']):
            #create an acquisition activity
            acq = Acquisition(session[subject_id])

            #print(file_tpl.type)
            if file_tpl.modality == 'anat':
                #do something with anatomicals
                acq_obj = MRAcquisitionObject(acq)
                acq_obj.add_attributes(
                    {PROV_TYPE: BIDS_Constants.scans[file_tpl.modality]})
                #add file link
                #make relative link to
                acq_obj.add_attributes(
                    {Constants.NIDM_FILENAME: file_tpl.filename})
                #get associated JSON file if exists
                json_data = bids_layout.get_metadata(file_tpl.filename)
                if json_data:
                    for key in json_data:
                        if key in BIDS_Constants.json_keys:
                            if type(json_data[key]) is list:
                                acq_obj.add_attributes({
                                    BIDS_Constants.json_keys[key]:
                                    ''.join(str(e) for e in json_data[key])
                                })
                            else:
                                acq_obj.add_attributes({
                                    BIDS_Constants.json_keys[key]:
                                    json_data[key]
                                })
            elif file_tpl.modality == 'func':
                #do something with functionals
                acq_obj = MRAcquisitionObject(acq)
                acq_obj.add_attributes(
                    {PROV_TYPE: BIDS_Constants.scans[file_tpl.modality]})
                #add file link
                acq_obj.add_attributes(
                    {Constants.NIDM_FILENAME: file_tpl.filename})
                if 'run' in file_tpl._fields:
                    acq_obj.add_attributes(
                        {BIDS_Constants.json_keys["run"]: file_tpl.run})

                #get associated JSON file if exists
                json_data = bids_layout.get_metadata(file_tpl.filename)

                if json_data:
                    for key in json_data:
                        if key in BIDS_Constants.json_keys:
                            if type(json_data[key]) is list:
                                acq_obj.add_attributes({
                                    BIDS_Constants.json_keys[key]:
                                    ''.join(str(e) for e in json_data[key])
                                })
                            else:
                                acq_obj.add_attributes({
                                    BIDS_Constants.json_keys[key]:
                                    json_data[key]
                                })

                #get associated events TSV file
                if 'run' in file_tpl._fields:
                    events_file = bids_layout.get(subject=subject_id,
                                                  extensions=['.tsv'],
                                                  modality=file_tpl.modality,
                                                  task=file_tpl.task,
                                                  run=file_tpl.run)
                else:
                    events_file = bids_layout.get(subject=subject_id,
                                                  extensions=['.tsv'],
                                                  modality=file_tpl.modality,
                                                  task=file_tpl.task)
                #if there is an events file then this is task-based so create an acquisition object for the task file and link
                if events_file:
                    #for now create acquisition object and link it to the associated scan
                    events_obj = AcquisitionObject(acq)
                    #add prov type, task name as prov:label, and link to filename of events file
                    events_obj.add_attributes({
                        PROV_TYPE:
                        Constants.NIDM_MRI_BOLD_EVENTS,
                        BIDS_Constants.json_keys["TaskName"]:
                        json_data["TaskName"],
                        Constants.NFO["filename"]:
                        events_file[0].filename
                    })
                    #link it to appropriate MR acquisition entity
                    events_obj.wasAttributedTo(acq_obj)

            elif file_tpl.modality == 'dwi':
                #do stuff with with dwi scans...
                acq_obj = MRAcquisitionObject(acq)
                acq_obj.add_attributes(
                    {PROV_TYPE: BIDS_Constants.scans[file_tpl.modality]})
                #add file link
                acq_obj.add_attributes(
                    {Constants.NIDM_FILENAME: file_tpl.filename})
                if 'run' in file_tpl._fields:
                    acq_obj.add_attributes(
                        {BIDS_Constants.json_keys["run"]: file_tpl.run})
                    #get associated JSON file if exists
                json_data = bids_layout.get_metadata(file_tpl.filename)

                if json_data:
                    for key in json_data:
                        if key in BIDS_Constants.json_keys:
                            if type(json_data[key]) is list:
                                acq_obj.add_attributes({
                                    BIDS_Constants.json_keys[key]:
                                    ''.join(str(e) for e in json_data[key])
                                })
                            else:
                                acq_obj.add_attributes({
                                    BIDS_Constants.json_keys[key]:
                                    json_data[key]
                                })

                #for bval and bvec files, what to do with those?
                #for now, create new generic acquisition objects, link the files, and associate with the one for the DWI scan?
                acq_obj_bval = AcquisitionObject(acq)
                acq_obj_bval.add_attributes(
                    {PROV_TYPE: BIDS_Constants.scans["bval"]})
                #add file link to bval files
                acq_obj_bval.add_attributes({
                    Constants.NIDM_FILENAME:
                    bids_layout.get_bval(file_tpl.filename)
                })
                acq_obj_bvec = AcquisitionObject(acq)
                acq_obj_bvec.add_attributes(
                    {PROV_TYPE: BIDS_Constants.scans["bvec"]})
                #add file link to bvec files
                acq_obj_bvec.add_attributes({
                    Constants.NIDM_FILENAME:
                    bids_layout.get_bvec(file_tpl.filename)
                })
                #link bval and bvec acquisition object entities together or is their association with enclosing activity enough?

        #Added temporarily to support phenotype files
        #for each *.tsv / *.json file pair in the phenotypes directory
        for tsv_file in glob.glob(os.path.join(directory, "phenotype",
                                               "*.tsv")):
            #for now, open the TSV file, extract the row for this subject, store it in an acquisition object and link to
            #the associated JSON data dictionary file
            with open(tsv_file) as phenofile:
                pheno_data = csv.DictReader(phenofile, delimiter='\t')
                for row in pheno_data:
                    subjid = row['participant_id'].split("-")
                    if not subjid[1] == subject_id:
                        continue
                    else:
                        #add acquisition object
                        acq = Acquisition(session=session[subjid[1]])
                        acq_entity = AssessmentAcquisitionObject(
                            acquisition=acq)
                        participant = acq.add_person(
                            role=Constants.NIDM_PARTICIPANT,
                            attributes=({
                                Constants.NIDM_SUBJECTID:
                                row['participant_id']
                            }))

                        for key, value in row.items():
                            if not key == "participant_id":
                                #for now we're using a placeholder namespace for BIDS and simply the variable names as the concept IDs..
                                acq_entity.add_attributes(
                                    {Constants.BIDS[key]: value})

                        #link TSV file
                        acq_entity.add_attributes(
                            {Constants.NIDM_FILENAME: tsv_file})
                        #link associated JSON file if it exists
                        data_dict = os.path.join(
                            directory, "phenotype",
                            os.path.splitext(os.path.basename(tsv_file))[0] +
                            ".json")
                        if os.path.isfile(data_dict):
                            acq_entity.add_attributes(
                                {Constants.BIDS["data_dictionary"]: data_dict})

    #serialize graph
    #print(project.graph.get_provn())
    with open(outputfile, 'w') as f:
        f.write(project.serializeTurtle())
        #f.write(project.graph.get_provn())
    #save a DOT graph as PNG
    project.save_DotGraph(str(outputfile + ".png"), format="png")
Ejemplo n.º 25
0
def main(argv):
    parser = ArgumentParser(description='This program will load in a CSV file and iterate over the header \
     variable names performing an elastic search of https://scicrunch.org/ for NIDM-ReproNim \
     tagged terms that fuzzy match the variable names.  The user will then interactively pick \
     a term to associate with the variable name.  The resulting annotated CSV data will \
     then be written to a NIDM data file.')

    parser.add_argument('-csv', dest='csv_file', required=True, help="Path to CSV file to convert")
    parser.add_argument('-ilxkey', dest='key', required=True, help="Interlex/SciCrunch API key to use for query")
    parser.add_argument('-json_map', dest='json_map',required=False,help="User-suppled JSON file containing variable-term mappings.")
    parser.add_argument('-nidm', dest='nidm_file', required=False, help="Optional NIDM file to add CSV->NIDM converted graph to")
    #parser.add_argument('-owl', action='store_true', required=False, help='Optionally searches NIDM OWL files...internet connection required')
    parser.add_argument('-png', action='store_true', required=False, help='Optional flag, when set a PNG image file of RDF graph will be produced')
    parser.add_argument('-jsonld', action='store_true', required=False, help='Optional flag, when set NIDM files are saved as JSON-LD instead of TURTLE')
    parser.add_argument('-out', dest='output_file', required=True, help="Filename to save NIDM file")
    args = parser.parse_args()

    #open CSV file and load into
    df = pd.read_csv(args.csv_file)

    #maps variables in CSV file to terms
    #if args.owl is not False:
    #    column_to_terms = map_variables_to_terms(df=df, apikey=args.key, directory=dirname(args.output_file), output_file=args.output_file, json_file=args.json_map, owl_file=args.owl)
    #else:
    column_to_terms = map_variables_to_terms(df=df, apikey=args.key, directory=dirname(args.output_file), output_file=args.output_file, json_file=args.json_map)



    #If user has added an existing NIDM file as a command line parameter then add to existing file for subjects who exist in the NIDM file
    if args.nidm_file:
        print("Adding to NIDM file...")
        #read in NIDM file
        project = read_nidm(args.nidm_file)
        #get list of session objects
        session_objs=project.get_sessions()

        #look at column_to_terms dictionary for NIDM URL for subject id  (Constants.NIDM_SUBJECTID)
        id_field=None
        for key, value in column_to_terms.items():
            if Constants.NIDM_SUBJECTID._str == column_to_terms[key]['label']:
                id_field=key
                #make sure id_field is a string for zero-padded subject ids
                #re-read data file with constraint that key field is read as string
                #df = pd.read_csv(args.csv_file,dtype={id_field : str})

        #if we couldn't find a subject ID field in column_to_terms, ask user
        if id_field is None:
            option=1
            for column in df.columns:
                print("%d: %s" %(option,column))
                option=option+1
            selection=input("Please select the subject ID field from the list above: ")
            id_field=df.columns[int(selection)-1]
            #make sure id_field is a string for zero-padded subject ids
            #re-read data file with constraint that key field is read as string
            #df = pd.read_csv(args.csv_file,dtype={id_field : str})



        #use RDFLib here for temporary graph making query easier
        rdf_graph = Graph()
        rdf_graph_parse = rdf_graph.parse(source=StringIO(project.serializeTurtle()),format='turtle')

        #find subject ids and sessions in NIDM document
        query = """SELECT DISTINCT ?session ?nidm_subj_id ?agent
                    WHERE {
                        ?activity prov:wasAssociatedWith ?agent ;
                            dct:isPartOf ?session  .
                        ?agent rdf:type prov:Agent ;
                            ndar:src_subject_id ?nidm_subj_id .
                    }"""
        #print(query)
        qres = rdf_graph_parse.query(query)


        for row in qres:
            print('%s \t %s' %(row[0],row[1]))
            #find row in CSV file with subject id matching agent from NIDM file

            #csv_row = df.loc[df[id_field]==type(df[id_field][0])(row[1])]
            #find row in CSV file with matching subject id to the agent in the NIDM file
            #be carefull about data types...simply type-change dataframe subject id column and query to strings.
            #here we're removing the leading 0's from IDs because pandas.read_csv strips those unless you know ahead of
            #time which column is the subject id....
            csv_row = df.loc[df[id_field].astype('str').str.contains(str(row[1]).lstrip("0"))]

            #if there was data about this subject in the NIDM file already (i.e. an agent already exists with this subject id)
            #then add this CSV assessment data to NIDM file, else skip it....
            if (not (len(csv_row.index)==0)):

                #NIDM document sesssion uuid
                session_uuid = row[0]

                #temporary list of string-based URIs of session objects from API
                temp = [o.identifier._uri for o in session_objs]
                #get session object from existing NIDM file that is associated with a specific subject id
                #nidm_session = (i for i,x in enumerate([o.identifier._uri for o in session_objs]) if x == str(session_uuid))
                nidm_session = session_objs[temp.index(str(session_uuid))]
                #for nidm_session in session_objs:
                #    if nidm_session.identifier._uri == str(session_uuid):
                #add an assessment acquisition for the phenotype data to session and associate with agent
                acq=AssessmentAcquisition(session=nidm_session)
                #add acquisition entity for assessment
                acq_entity = AssessmentObject(acquisition=acq)
                #add qualified association with existing agent
                acq.add_qualified_association(person=row[2],role=Constants.NIDM_PARTICIPANT)

                #store other data from row with columns_to_term mappings
                for row_variable in csv_row:
                    #check if row_variable is subject id, if so skip it
                    if row_variable==id_field:
                        continue
                    else:
                        if not csv_row[row_variable].values[0]:
                            continue
                        #get column_to_term mapping uri and add as namespace in NIDM document
                        #provNamespace(Core.safe_string(None,string=str(row_variable)), column_to_terms[row_variable]["url"])
                        acq_entity.add_attributes({QualifiedName(provNamespace(Core.safe_string(None,string=str(row_variable)), column_to_terms[row_variable]["url"]), ""):csv_row[row_variable].values[0]})
                continue

        #serialize NIDM file
        with open(args.nidm_file,'w') as f:
            print("Writing NIDM file...")
            if args.jsonld:
                f.write(project.serializeJSONLD())
            else:
                f.write(project.serializeTurtle())

            project.save_DotGraph(str(args.nidm_file + ".png"), format="png")



    else:
        print("Creating NIDM file...")
        #If user did not choose to add this data to an existing NIDM file then create a new one for the CSV data
        #create empty project
        project=Project()

        #simply add name of file to project since we don't know anything about it
        project.add_attributes({Constants.NIDM_FILENAME:args.csv_file})


        #look at column_to_terms dictionary for NIDM URL for subject id  (Constants.NIDM_SUBJECTID)
        id_field=None
        for key, value in column_to_terms.items():
            if Constants.NIDM_SUBJECTID._str == column_to_terms[key]['label']:
                id_field=key
                #make sure id_field is a string for zero-padded subject ids
                #re-read data file with constraint that key field is read as string
                #df = pd.read_csv(args.csv_file,dtype={id_field : str})

        #if we couldn't find a subject ID field in column_to_terms, ask user
        if id_field is None:
            option=1
            for column in df.columns:
                print("%d: %s" %(option,column))
                option=option+1
            selection=input("Please select the subject ID field from the list above: ")
            id_field=df.columns[int(selection)-1]


        #iterate over rows and store in NIDM file
        for csv_index, csv_row in df.iterrows():
            #create a session object
            session=Session(project)

            #create and acquisition activity and entity
            acq=AssessmentAcquisition(session)
            acq_entity=AssessmentObject(acq)



            #store other data from row with columns_to_term mappings
            for row_variable,row_data in csv_row.iteritems():
                if not row_data:
                    continue
                #check if row_variable is subject id, if so skip it
                if row_variable==id_field:
                    #add qualified association with person
                    acq.add_qualified_association(person= acq.add_person(attributes=({Constants.NIDM_SUBJECTID:row_data})),role=Constants.NIDM_PARTICIPANT)

                    continue
                else:
                    #get column_to_term mapping uri and add as namespace in NIDM document
                    acq_entity.add_attributes({QualifiedName(provNamespace(Core.safe_string(None,string=str(row_variable)), column_to_terms[row_variable]["url"]),""):row_data})
                    #print(project.serializeTurtle())

        #serialize NIDM file
        with open(args.output_file,'w') as f:
            print("Writing NIDM file...")
            if args.jsonld:
                f.write(project.serializeJSONLD())
            else:
                f.write(project.serializeTurtle())
            if args.png:
                project.save_DotGraph(str(args.output_file + ".png"), format="png")
Ejemplo n.º 26
0
def saveTestFile(file_name, data):
    project = Project(uuid="_123_" + file_name, attributes=data)

    return saveProject(file_name, project)
Ejemplo n.º 27
0
def bidsmri2project(directory, args):
    #Parse dataset_description.json file in BIDS directory
    if (os.path.isdir(os.path.join(directory))):
        try:
            with open(os.path.join(directory,
                                   'dataset_description.json')) as data_file:
                dataset = json.load(data_file)
        except OSError:
            logging.critical(
                "Cannot find dataset_description.json file which is required in the BIDS spec"
            )
            exit("-1")
    else:
        logging.critical("Error: BIDS directory %s does not exist!" %
                         os.path.join(directory))
        exit("-1")

    #create project / nidm-exp doc
    project = Project()

    #add various attributes if they exist in BIDS dataset
    for key in dataset:
        #if key from dataset_description file is mapped to term in BIDS_Constants.py then add to NIDM object
        if key in BIDS_Constants.dataset_description:
            if type(dataset[key]) is list:
                project.add_attributes({
                    BIDS_Constants.dataset_description[key]:
                    "".join(dataset[key])
                })
            else:
                project.add_attributes(
                    {BIDS_Constants.dataset_description[key]: dataset[key]})
        #add absolute location of BIDS directory on disk for later finding of files which are stored relatively in NIDM document
        project.add_attributes({Constants.PROV['Location']: directory})

    #get BIDS layout
    bids_layout = BIDSLayout(directory)

    #create empty dictinary for sessions where key is subject id and used later to link scans to same session as demographics
    session = {}
    participant = {}
    #Parse participants.tsv file in BIDS directory and create study and acquisition objects
    if os.path.isfile(os.path.join(directory, 'participants.tsv')):
        with open(os.path.join(directory, 'participants.tsv')) as csvfile:
            participants_data = csv.DictReader(csvfile, delimiter='\t')

            #logic to map variables to terms.#########################################################################################################

            #first iterate over variables in dataframe and check which ones are already mapped as BIDS constants and which are not.  For those that are not
            #we want to use the variable-term mapping functions to help the user do the mapping
            #iterate over columns
            mapping_list = []
            column_to_terms = {}
            for field in participants_data.fieldnames:

                #column is not in BIDS_Constants
                if not (field in BIDS_Constants.participants):
                    #add column to list for column_to_terms mapping
                    mapping_list.append(field)

            #do variable-term mappings
            if ((args.json_map != False) or (args.key != None)):

                #if user didn't supply a json mapping file but we're doing some variable-term mapping create an empty one for column_to_terms to use
                if args.json_map == False:
                    #defaults to participants.json because here we're mapping the participants.tsv file variables to terms
                    # if participants.json file doesn't exist then run without json mapping file
                    if not os.path.isfile(
                            os.path.join(directory, 'participants.json')):
                        #maps variables in CSV file to terms
                        temp = DataFrame(columns=mapping_list)

                        column_to_terms, cde = map_variables_to_terms(
                            directory=directory,
                            assessment_name='participants.tsv',
                            df=temp,
                            apikey=args.key,
                            output_file=os.path.join(directory,
                                                     'participants.json'))
                    else:
                        #maps variables in CSV file to terms
                        temp = DataFrame(columns=mapping_list)
                        column_to_terms, cde = map_variables_to_terms(
                            directory=directory,
                            assessment_name='participants.tsv',
                            df=temp,
                            apikey=args.key,
                            output_file=os.path.join(directory,
                                                     'participants.json'),
                            json_file=os.path.join(directory,
                                                   'participants.json'))

                else:
                    #maps variables in CSV file to terms
                    temp = DataFrame(columns=mapping_list)
                    column_to_terms, cde = map_variables_to_terms(
                        directory=directory,
                        assessment_name='participants.tsv',
                        df=temp,
                        apikey=args.key,
                        output_file=os.path.join(directory,
                                                 'participants.json'),
                        json_file=args.json_map)

            for row in participants_data:
                #create session object for subject to be used for participant metadata and image data
                #parse subject id from "sub-XXXX" string
                temp = row['participant_id'].split("-")
                #for ambiguity in BIDS datasets.  Sometimes participant_id is sub-XXXX and othertimes it's just XXXX
                if len(temp) > 1:
                    subjid = temp[1]
                else:
                    subjid = temp[0]
                logging.info(subjid)
                session[subjid] = Session(project)

                #add acquisition object
                acq = AssessmentAcquisition(session=session[subjid])

                acq_entity = AssessmentObject(acquisition=acq)
                participant[subjid] = {}
                participant[subjid]['person'] = acq.add_person(
                    attributes=({
                        Constants.NIDM_SUBJECTID: row['participant_id']
                    }))

                #add qualified association of participant with acquisition activity
                acq.add_qualified_association(
                    person=participant[subjid]['person'],
                    role=Constants.NIDM_PARTICIPANT)
                print(acq)

                for key, value in row.items():
                    if not value:
                        continue
                    #for variables in participants.tsv file who have term mappings in BIDS_Constants.py use those, add to json_map so we don't have to map these if user
                    #supplied arguments to map variables
                    if key in BIDS_Constants.participants:

                        #if this was the participant_id, we already handled it above creating agent / qualified association
                        if not (BIDS_Constants.participants[key]
                                == Constants.NIDM_SUBJECTID):
                            acq_entity.add_attributes(
                                {BIDS_Constants.participants[key]: value})

                    #else if user added -mapvars flag to command line then we'll use the variable-> term mapping procedures to help user map variables to terms (also used
                    # in CSV2NIDM.py)
                    else:

                        # WIP: trying to add new support for CDEs...
                        add_attributes_with_cde(prov_object=acq_entity,
                                                cde=cde,
                                                row_variable=key,
                                                value=value)
                        # if key in column_to_terms:
                        #    acq_entity.add_attributes({QualifiedName(provNamespace(Core.safe_string(None,string=str(key)), column_to_terms[key]["url"]), ""):value})
                        #else:

                        #    acq_entity.add_attributes({Constants.BIDS[key.replace(" ", "_")]:value})

    #create acquisition objects for each scan for each subject

    #loop through all subjects in dataset
    for subject_id in bids_layout.get_subjects():
        logging.info("Converting subject: %s" % subject_id)
        #skip .git directories...added to support datalad datasets
        if subject_id.startswith("."):
            continue

        #check if there's a session number.  If so, store it in the session activity
        session_dirs = bids_layout.get(target='session',
                                       subject=subject_id,
                                       return_type='dir')
        #if session_dirs has entries then get any metadata about session and store in session activity

        #bids_layout.get(subject=subject_id,type='session',extensions='.tsv')
        #bids_layout.get(subject=subject_id,type='scans',extensions='.tsv')
        #bids_layout.get(extensions='.tsv',return_type='obj')

        #check whether sessions have been created (i.e. was there a participants.tsv file?  If not, create here
        if not (subject_id in session):
            session[subject_id] = Session(project)

        for file_tpl in bids_layout.get(subject=subject_id,
                                        extensions=['.nii', '.nii.gz']):
            #create an acquisition activity
            acq = MRAcquisition(session[subject_id])

            #check whether participant (i.e. agent) for this subject already exists (i.e. if participants.tsv file exists) else create one
            if not (subject_id in participant):
                participant[subject_id] = {}
                participant[subject_id]['person'] = acq.add_person(
                    attributes=({
                        Constants.NIDM_SUBJECTID: subject_id
                    }))

            #add qualified association with person
            acq.add_qualified_association(
                person=participant[subject_id]['person'],
                role=Constants.NIDM_PARTICIPANT)

            if file_tpl.entities['datatype'] == 'anat':
                #do something with anatomicals
                acq_obj = MRObject(acq)
                #add image contrast type
                if file_tpl.entities['suffix'] in BIDS_Constants.scans:
                    acq_obj.add_attributes({
                        Constants.NIDM_IMAGE_CONTRAST_TYPE:
                        BIDS_Constants.scans[file_tpl.entities['suffix']]
                    })
                else:
                    logging.info(
                        "WARNING: No matching image contrast type found in BIDS_Constants.py for %s"
                        % file_tpl.entities['suffix'])

                #add image usage type
                if file_tpl.entities['datatype'] in BIDS_Constants.scans:
                    acq_obj.add_attributes({
                        Constants.NIDM_IMAGE_USAGE_TYPE:
                        BIDS_Constants.scans[file_tpl.entities['datatype']]
                    })
                else:
                    logging.info(
                        "WARNING: No matching image usage type found in BIDS_Constants.py for %s"
                        % file_tpl.entities['datatype'])
                #add file link
                #make relative link to
                acq_obj.add_attributes({
                    Constants.NIDM_FILENAME:
                    getRelPathToBIDS(join(file_tpl.dirname, file_tpl.filename),
                                     directory)
                })
                #WIP: add absolute location of BIDS directory on disk for later finding of files
                acq_obj.add_attributes({Constants.PROV['Location']: directory})

                #add sha512 sum
                if isfile(join(directory, file_tpl.dirname,
                               file_tpl.filename)):
                    acq_obj.add_attributes({
                        Constants.CRYPTO_SHA512:
                        getsha512(
                            join(directory, file_tpl.dirname,
                                 file_tpl.filename))
                    })
                else:
                    logging.info(
                        "WARNINGL file %s doesn't exist! No SHA512 sum stored in NIDM files..."
                        % join(directory, file_tpl.dirname, file_tpl.filename))
                #get associated JSON file if exists
                #There is T1w.json file with information
                json_data = (bids_layout.get(
                    suffix=file_tpl.entities['suffix'],
                    subject=subject_id))[0].metadata
                if len(json_data.info) > 0:
                    for key in json_data.info.items():
                        if key in BIDS_Constants.json_keys:
                            if type(json_data.info[key]) is list:
                                acq_obj.add_attributes({
                                    BIDS_Constants.json_keys[key.replace(
                                        " ", "_")]:
                                    ''.join(
                                        str(e) for e in json_data.info[key])
                                })
                            else:
                                acq_obj.add_attributes({
                                    BIDS_Constants.json_keys[key.replace(
                                        " ", "_")]:
                                    json_data.info[key]
                                })

                #Parse T1w.json file in BIDS directory to add the attributes contained inside
                if (os.path.isdir(os.path.join(directory))):
                    try:
                        with open(os.path.join(directory,
                                               'T1w.json')) as data_file:
                            dataset = json.load(data_file)
                    except OSError:
                        logging.critical(
                            "Cannot find T1w.json file which is required in the BIDS spec"
                        )
                        exit("-1")
                else:
                    logging.critical(
                        "Error: BIDS directory %s does not exist!" %
                        os.path.join(directory))
                    exit("-1")

                #add various attributes if they exist in BIDS dataset
                for key in dataset:
                    #if key from T1w.json file is mapped to term in BIDS_Constants.py then add to NIDM object
                    if key in BIDS_Constants.json_keys:
                        if type(dataset[key]) is list:
                            acq_obj.add_attributes({
                                BIDS_Constants.json_keys[key]:
                                "".join(dataset[key])
                            })
                        else:
                            acq_obj.add_attributes(
                                {BIDS_Constants.json_keys[key]: dataset[key]})

            elif file_tpl.entities['datatype'] == 'func':
                #do something with functionals
                acq_obj = MRObject(acq)
                #add image contrast type
                if file_tpl.entities['suffix'] in BIDS_Constants.scans:
                    acq_obj.add_attributes({
                        Constants.NIDM_IMAGE_CONTRAST_TYPE:
                        BIDS_Constants.scans[file_tpl.entities['suffix']]
                    })
                else:
                    logging.info(
                        "WARNING: No matching image contrast type found in BIDS_Constants.py for %s"
                        % file_tpl.entities['suffix'])

                #add image usage type
                if file_tpl.entities['datatype'] in BIDS_Constants.scans:
                    acq_obj.add_attributes({
                        Constants.NIDM_IMAGE_USAGE_TYPE:
                        BIDS_Constants.scans[file_tpl.entities['datatype']]
                    })
                else:
                    logging.info(
                        "WARNING: No matching image usage type found in BIDS_Constants.py for %s"
                        % file_tpl.entities['datatype'])
                #make relative link to
                acq_obj.add_attributes({
                    Constants.NIDM_FILENAME:
                    getRelPathToBIDS(join(file_tpl.dirname, file_tpl.filename),
                                     directory)
                })
                #WIP: add absolute location of BIDS directory on disk for later finding of files
                acq_obj.add_attributes({Constants.PROV['Location']: directory})

                #add sha512 sum
                if isfile(join(directory, file_tpl.dirname,
                               file_tpl.filename)):
                    acq_obj.add_attributes({
                        Constants.CRYPTO_SHA512:
                        getsha512(
                            join(directory, file_tpl.dirname,
                                 file_tpl.filename))
                    })
                else:
                    logging.info(
                        "WARNINGL file %s doesn't exist! No SHA512 sum stored in NIDM files..."
                        % join(directory, file_tpl.dirname, file_tpl.filename))

                if 'run' in file_tpl.entities:
                    acq_obj.add_attributes({
                        BIDS_Constants.json_keys["run"]:
                        file_tpl.entities['run']
                    })

                #get associated JSON file if exists
                json_data = (bids_layout.get(
                    suffix=file_tpl.entities['suffix'],
                    subject=subject_id))[0].metadata

                if len(json_data.info) > 0:
                    for key in json_data.info.items():
                        if key in BIDS_Constants.json_keys:
                            if type(json_data.info[key]) is list:
                                acq_obj.add_attributes({
                                    BIDS_Constants.json_keys[key.replace(
                                        " ", "_")]:
                                    ''.join(
                                        str(e) for e in json_data.info[key])
                                })
                            else:
                                acq_obj.add_attributes({
                                    BIDS_Constants.json_keys[key.replace(
                                        " ", "_")]:
                                    json_data.info[key]
                                })
                #get associated events TSV file
                if 'run' in file_tpl.entities:
                    events_file = bids_layout.get(
                        subject=subject_id,
                        extensions=['.tsv'],
                        modality=file_tpl.entities['datatype'],
                        task=file_tpl.entities['task'],
                        run=file_tpl.entities['run'])
                else:
                    events_file = bids_layout.get(
                        subject=subject_id,
                        extensions=['.tsv'],
                        modality=file_tpl.entities['datatype'],
                        task=file_tpl.entities['task'])
                #if there is an events file then this is task-based so create an acquisition object for the task file and link
                if events_file:
                    #for now create acquisition object and link it to the associated scan
                    events_obj = AcquisitionObject(acq)
                    #add prov type, task name as prov:label, and link to filename of events file

                    events_obj.add_attributes({
                        PROV_TYPE:
                        Constants.NIDM_MRI_BOLD_EVENTS,
                        BIDS_Constants.json_keys["TaskName"]:
                        json_data["TaskName"],
                        Constants.NIDM_FILENAME:
                        getRelPathToBIDS(events_file[0].filename, directory)
                    })
                    #link it to appropriate MR acquisition entity
                    events_obj.wasAttributedTo(acq_obj)

                #Parse task-rest_bold.json file in BIDS directory to add the attributes contained inside
                if (os.path.isdir(os.path.join(directory))):
                    try:
                        with open(
                                os.path.join(
                                    directory,
                                    'task-rest_bold.json')) as data_file:
                            dataset = json.load(data_file)
                    except OSError:
                        logging.critical(
                            "Cannot find task-rest_bold.json file which is required in the BIDS spec"
                        )
                        exit("-1")
                else:
                    logging.critical(
                        "Error: BIDS directory %s does not exist!" %
                        os.path.join(directory))
                    exit("-1")

                #add various attributes if they exist in BIDS dataset
                for key in dataset:
                    #if key from task-rest_bold.json file is mapped to term in BIDS_Constants.py then add to NIDM object
                    if key in BIDS_Constants.json_keys:
                        if type(dataset[key]) is list:
                            acq_obj.add_attributes({
                                BIDS_Constants.json_keys[key]:
                                ",".join(map(str, dataset[key]))
                            })
                        else:
                            acq_obj.add_attributes(
                                {BIDS_Constants.json_keys[key]: dataset[key]})

            elif file_tpl.entities['datatype'] == 'dwi':
                #do stuff with with dwi scans...
                acq_obj = MRObject(acq)
                #add image contrast type
                if file_tpl.entities['suffix'] in BIDS_Constants.scans:
                    acq_obj.add_attributes({
                        Constants.NIDM_IMAGE_CONTRAST_TYPE:
                        BIDS_Constants.scans[file_tpl.entities['suffix']]
                    })
                else:
                    logging.info(
                        "WARNING: No matching image contrast type found in BIDS_Constants.py for %s"
                        % file_tpl.entities['suffix'])

                #add image usage type
                if file_tpl.entities['datatype'] in BIDS_Constants.scans:
                    acq_obj.add_attributes({
                        Constants.NIDM_IMAGE_USAGE_TYPE:
                        BIDS_Constants.scans["dti"]
                    })
                else:
                    logging.info(
                        "WARNING: No matching image usage type found in BIDS_Constants.py for %s"
                        % file_tpl.entities['datatype'])
                #make relative link to
                acq_obj.add_attributes({
                    Constants.NIDM_FILENAME:
                    getRelPathToBIDS(join(file_tpl.dirname, file_tpl.filename),
                                     directory)
                })
                #add sha512 sum
                if isfile(join(directory, file_tpl.dirname,
                               file_tpl.filename)):
                    acq_obj.add_attributes({
                        Constants.CRYPTO_SHA512:
                        getsha512(
                            join(directory, file_tpl.dirname,
                                 file_tpl.filename))
                    })
                else:
                    logging.info(
                        "WARNINGL file %s doesn't exist! No SHA512 sum stored in NIDM files..."
                        % join(directory, file_tpl.dirname, file_tpl.filename))

                if 'run' in file_tpl._fields:
                    acq_obj.add_attributes(
                        {BIDS_Constants.json_keys["run"]: file_tpl.run})

                #get associated JSON file if exists
                json_data = (bids_layout.get(
                    suffix=file_tpl.entities['suffix'],
                    subject=subject_id))[0].metadata

                if len(json_data.info) > 0:
                    for key in json_data.info.items():
                        if key in BIDS_Constants.json_keys:
                            if type(json_data.info[key]) is list:
                                acq_obj.add_attributes({
                                    BIDS_Constants.json_keys[key.replace(
                                        " ", "_")]:
                                    ''.join(
                                        str(e) for e in json_data.info[key])
                                })
                            else:
                                acq_obj.add_attributes({
                                    BIDS_Constants.json_keys[key.replace(
                                        " ", "_")]:
                                    json_data.info[key]
                                })
                #for bval and bvec files, what to do with those?

                #for now, create new generic acquisition objects, link the files, and associate with the one for the DWI scan?
                acq_obj_bval = AcquisitionObject(acq)
                acq_obj_bval.add_attributes(
                    {PROV_TYPE: BIDS_Constants.scans["bval"]})
                #add file link to bval files
                acq_obj_bval.add_attributes({
                    Constants.NIDM_FILENAME:
                    getRelPathToBIDS(
                        join(file_tpl.dirname,
                             bids_layout.get_bval(file_tpl.filename)),
                        directory)
                })
                #WIP: add absolute location of BIDS directory on disk for later finding of files
                acq_obj_bval.add_attributes(
                    {Constants.PROV['Location']: directory})

                #add sha512 sum
                if isfile(join(directory, file_tpl.dirname,
                               file_tpl.filename)):
                    acq_obj_bval.add_attributes({
                        Constants.CRYPTO_SHA512:
                        getsha512(
                            join(directory, file_tpl.dirname,
                                 file_tpl.filename))
                    })
                else:
                    logging.info(
                        "WARNINGL file %s doesn't exist! No SHA512 sum stored in NIDM files..."
                        % join(directory, file_tpl.dirname, file_tpl.filename))
                acq_obj_bvec = AcquisitionObject(acq)
                acq_obj_bvec.add_attributes(
                    {PROV_TYPE: BIDS_Constants.scans["bvec"]})
                #add file link to bvec files
                acq_obj_bvec.add_attributes({
                    Constants.NIDM_FILENAME:
                    getRelPathToBIDS(
                        join(file_tpl.dirname,
                             bids_layout.get_bvec(file_tpl.filename)),
                        directory)
                })
                #WIP: add absolute location of BIDS directory on disk for later finding of files
                acq_obj_bvec.add_attributes(
                    {Constants.PROV['Location']: directory})

                if isfile(join(directory, file_tpl.dirname,
                               file_tpl.filename)):
                    #add sha512 sum
                    acq_obj_bvec.add_attributes({
                        Constants.CRYPTO_SHA512:
                        getsha512(
                            join(directory, file_tpl.dirname,
                                 file_tpl.filename))
                    })
                else:
                    logging.info(
                        "WARNINGL file %s doesn't exist! No SHA512 sum stored in NIDM files..."
                        % join(directory, file_tpl.dirname, file_tpl.filename))

                #link bval and bvec acquisition object entities together or is their association with DWI scan...

        #Added temporarily to support phenotype files
        #for each *.tsv / *.json file pair in the phenotypes directory
        #WIP: ADD VARIABLE -> TERM MAPPING HERE
        for tsv_file in glob.glob(os.path.join(directory, "phenotype",
                                               "*.tsv")):
            #for now, open the TSV file, extract the row for this subject, store it in an acquisition object and link to
            #the associated JSON data dictionary file
            with open(tsv_file) as phenofile:
                pheno_data = csv.DictReader(phenofile, delimiter='\t')
                for row in pheno_data:
                    subjid = row['participant_id'].split("-")
                    if not subjid[1] == subject_id:
                        continue
                    else:
                        #add acquisition object
                        acq = AssessmentAcquisition(session=session[subjid[1]])
                        #add qualified association with person
                        acq.add_qualified_association(
                            person=participant[subject_id]['person'],
                            role=Constants.NIDM_PARTICIPANT)

                        acq_entity = AssessmentObject(acquisition=acq)

                        for key, value in row.items():
                            if not value:
                                continue
                            #we're using participant_id in NIDM in agent so don't add to assessment as a triple.
                            #BIDS phenotype files seem to have an index column with no column header variable name so skip those
                            if ((not key == "participant_id") and (key != "")):
                                #for now we're using a placeholder namespace for BIDS and simply the variable names as the concept IDs..
                                acq_entity.add_attributes(
                                    {Constants.BIDS[key]: value})

                        #link TSV file
                        acq_entity.add_attributes({
                            Constants.NIDM_FILENAME:
                            getRelPathToBIDS(tsv_file, directory)
                        })
                        #WIP: add absolute location of BIDS directory on disk for later finding of files
                        acq_entity.add_attributes(
                            {Constants.PROV['Location']: directory})

                        #link associated JSON file if it exists
                        data_dict = os.path.join(
                            directory, "phenotype",
                            os.path.splitext(os.path.basename(tsv_file))[0] +
                            ".json")
                        if os.path.isfile(data_dict):
                            acq_entity.add_attributes({
                                Constants.BIDS["data_dictionary"]:
                                getRelPathToBIDS(data_dict, directory)
                            })

    return project, cde
Ejemplo n.º 28
0
def main(argv):
    #create new nidm-experiment document with project
    kwargs={Constants.NIDM_PROJECT_NAME:"FBIRN_PhaseII",Constants.NIDM_PROJECT_IDENTIFIER:9610,Constants.NIDM_PROJECT_DESCRIPTION:"Test investigation"}
    project = Project(attributes=kwargs)
    
    #test add string attribute with existing namespace
    #nidm_doc.addLiteralAttribute("nidm","isFun","ForMe")
    project.add_attributes({Constants.NIDM["isFun"]:"ForMe"})

    #test adding string attribute with new namespace/term
    project.addLiteralAttribute("fred","notFound","in namespaces","www.fred.org/")

    #test add float attribute
    project.addLiteralAttribute("nidm", "float", float(2.34))

    #test adding attributes in bulk with mix of existing and new namespaces
    #nidm_doc.addAttributesWithNamespaces(nidm_doc.getProject(),[{"prefix":"nidm", "uri":nidm_doc.namespaces["nidm"], "term":"score", "value":int(15)}, \
        #                                              {"prefix":"dave", "uri":"http://www.davidkeator.com/", "term":"isAwesome", "value":"15"}, \
        #                                              {"prefix":"nidm", "uri":nidm_doc.namespaces["nidm"], "term":"value", "value":float(2.34)}])
    
    #nidm_doc.addAttributes(nidm_doc.getProject(),{"nidm:test":int(15), "ncit:isTerminology":"15","ncit:joker":float(1)})


    #test add PI to investigation
    project_PI = project.add_person(attributes={Constants.NIDM_FAMILY_NAME:"Keator", Constants.NIDM_GIVEN_NAME:"David"})

    #add qualified association of project PI to project activity
    project.add_qualified_association(person=project_PI,role=Constants.NIDM_PI)

    #test add session to graph and associate with project
    session = Session(project)
    session.add_attributes({Constants.NIDM:"test"})
    #project.add_sessions(session)

    #test add MR acquisition activity / entity to graph and associate with session
    acq_act = MRAcquisition(session=session)
    #test add acquisition object entity to graph associated with participant role NIDM_PARTICIPANT
    acq_entity = MRObject(acquisition=acq_act)

    #add person to graph
    person = acq_act.add_person(attributes={Constants.NIDM_GIVEN_NAME:"George"})
    #add qualified association of person with role NIDM_PARTICIPANT, and associated with acquistion activity
    acq_act.add_qualified_association(person=person, role=Constants.NIDM_PARTICIPANT)


    #test add Assessment acquisition activity / entity to graph and associate with session
    acq_act = AssessmentAcquisition(session=session)
    #test add acquisition object entity to graph associated with participant role NIDM_PARTICIPANT
    acq_entity = AssessmentObject(acquisition=acq_act)
    acq_entity.add_attributes({Constants.NIDM["Q1"]:"Q1 Answer",Constants.NIDM["Q2"]:"Q2 Answer" })
    #associate person as participant
    acq_act.add_qualified_association(person=person, role=Constants.NIDM_PARTICIPANT)


    #test add DemographicsAssessment acquisition activity / entity to graph and associate with session
    acq_act = AssessmentAcquisition(session=session)
    #test add acquisition object entity to graph associated with participant role NIDM_PARTICIPANT
    acq_entity = DemographicsObject(acquisition=acq_act)
    #add new person to graph
    person2 = acq_act.add_person(attributes={Constants.NIDM_FAMILY_NAME:"Doe", \
            Constants.NIDM_GIVEN_NAME:"John"})
    #associate person2 with assessment acquisition
    acq_act.add_qualified_association(person=person2, role=Constants.NIDM_PARTICIPANT)

    acq_entity.add_attributes({Constants.NIDM_AGE:60,Constants.NIDM_GENDER:"Male" })


    #save a turtle file
    with open("test.ttl",'w') as f:
        f.write (project.serializeTurtle())

    #save a DOT graph as PDF
    project.save_DotGraph("test.png",format="png")
Ejemplo n.º 29
0
def bidsmri2project(directory, args):

    # initialize empty cde graph...it may get replaced if we're doing variable to term mapping or not
    cde=Graph()

    # Parse dataset_description.json file in BIDS directory
    if (os.path.isdir(os.path.join(directory))):
        try:
            with open(os.path.join(directory,'dataset_description.json')) as data_file:
                dataset = json.load(data_file)
        except OSError:
            logging.critical("Cannot find dataset_description.json file which is required in the BIDS spec")
            exit("-1")
    else:
        logging.critical("Error: BIDS directory %s does not exist!" %os.path.join(directory))
        exit("-1")

    # create project / nidm-exp doc
    project = Project()

    # if there are git annex sources then add them
    num_sources=addGitAnnexSources(obj=project.get_uuid(),bids_root=directory)
    # else just add the local path to the dataset
    if num_sources == 0:
        project.add_attributes({Constants.PROV['Location']:"file:/" + directory})


    # add various attributes if they exist in BIDS dataset
    for key in dataset:
        # if key from dataset_description file is mapped to term in BIDS_Constants.py then add to NIDM object
        if key in BIDS_Constants.dataset_description:
            if type(dataset[key]) is list:
                project.add_attributes({BIDS_Constants.dataset_description[key]:"".join(dataset[key])})
            else:
                project.add_attributes({BIDS_Constants.dataset_description[key]:dataset[key]})




    # get BIDS layout
    bids_layout = BIDSLayout(directory)


    # create empty dictinary for sessions where key is subject id and used later to link scans to same session as demographics
    session={}
    participant={}
    # Parse participants.tsv file in BIDS directory and create study and acquisition objects
    if os.path.isfile(os.path.join(directory,'participants.tsv')):
        with open(os.path.join(directory,'participants.tsv')) as csvfile:
            participants_data = csv.DictReader(csvfile, delimiter='\t')

            # logic to map variables to terms.
            # first iterate over variables in dataframe and check which ones are already mapped as BIDS constants and which are not.  For those that are not
            # we want to use the variable-term mapping functions to help the user do the mapping
            # iterate over columns
            mapping_list=[]
            column_to_terms={}
            for field in participants_data.fieldnames:

                # column is not in BIDS_Constants
                if not (field in BIDS_Constants.participants):
                    # add column to list for column_to_terms mapping
                    mapping_list.append(field)



            #if user didn't supply a json mapping file but we're doing some variable-term mapping create an empty one for column_to_terms to use
            if args.json_map == False:
                #defaults to participants.json because here we're mapping the participants.tsv file variables to terms
                # if participants.json file doesn't exist then run without json mapping file
                if not os.path.isfile(os.path.join(directory,'participants.json')):
                    #maps variables in CSV file to terms
                    temp=DataFrame(columns=mapping_list)
                    if args.no_concepts:
                        column_to_terms,cde = map_variables_to_terms(directory=directory,assessment_name='participants.tsv',
                            df=temp,output_file=os.path.join(directory,'participants.json'),bids=True,associate_concepts=False)
                    else:
                        column_to_terms,cde = map_variables_to_terms(directory=directory,assessment_name='participants.tsv',
                            df=temp,output_file=os.path.join(directory,'participants.json'),bids=True)
                else:
                    #maps variables in CSV file to terms
                    temp=DataFrame(columns=mapping_list)
                    if args.no_concepts:
                        column_to_terms,cde = map_variables_to_terms(directory=directory, assessment_name='participants.tsv', df=temp,
                            output_file=os.path.join(directory,'participants.json'),json_file=os.path.join(directory,'participants.json'),bids=True,associate_concepts=False)
                    else:
                        column_to_terms,cde = map_variables_to_terms(directory=directory, assessment_name='participants.tsv', df=temp,
                            output_file=os.path.join(directory,'participants.json'),json_file=os.path.join(directory,'participants.json'),bids=True)
            else:
                #maps variables in CSV file to terms
                temp=DataFrame(columns=mapping_list)
                if args.no_concepts:
                    column_to_terms, cde = map_variables_to_terms(directory=directory, assessment_name='participants.tsv', df=temp,
                        output_file=os.path.join(directory,'participants.json'),json_file=args.json_map,bids=True,associate_concepts=False)
                else:
                    column_to_terms, cde = map_variables_to_terms(directory=directory, assessment_name='participants.tsv', df=temp,
                        output_file=os.path.join(directory,'participants.json'),json_file=args.json_map,bids=True)


            for row in participants_data:
                #create session object for subject to be used for participant metadata and image data
                #parse subject id from "sub-XXXX" string
                temp = row['participant_id'].split("-")
                #for ambiguity in BIDS datasets.  Sometimes participant_id is sub-XXXX and othertimes it's just XXXX
                if len(temp) > 1:
                    subjid = temp[1]
                else:
                    subjid = temp[0]
                logging.info(subjid)
                session[subjid] = Session(project)

                #add acquisition object
                acq = AssessmentAcquisition(session=session[subjid])

                acq_entity = AssessmentObject(acquisition=acq)
                participant[subjid] = {}
                participant[subjid]['person'] = acq.add_person(attributes=({Constants.NIDM_SUBJECTID:row['participant_id']}))

                # add nfo:filename entry to assessment entity to reflect provenance of where this data came from
                acq_entity.add_attributes({Constants.NIDM_FILENAME:getRelPathToBIDS(os.path.join(directory,'participants.tsv'),directory)})
                #acq_entity.add_attributes({Constants.NIDM_FILENAME:os.path.join(directory,'participants.tsv')})

                #add qualified association of participant with acquisition activity
                acq.add_qualified_association(person=participant[subjid]['person'],role=Constants.NIDM_PARTICIPANT)
                # print(acq)

                # if there are git annex sources for participants.tsv file then add them
                num_sources=addGitAnnexSources(obj=acq_entity.get_uuid(),bids_root=directory)
                # else just add the local path to the dataset
                if num_sources == 0:
                    acq_entity.add_attributes({Constants.PROV['Location']:"file:/" + os.path.join(directory,'participants.tsv')})

                 # if there's a JSON sidecar file then create an entity and associate it with all the assessment entities
                if os.path.isfile(os.path.join(directory,'participants.json')):
                    json_sidecar = AssessmentObject(acquisition=acq)
                    json_sidecar.add_attributes({PROV_TYPE:QualifiedName(Namespace("bids",Constants.BIDS),"sidecar_file"), Constants.NIDM_FILENAME:
                        getRelPathToBIDS(os.path.join(directory,'participants.json'),directory)})

                    # add Git Annex Sources
                    # if there are git annex sources for participants.tsv file then add them
                    num_sources=addGitAnnexSources(obj=json_sidecar.get_uuid(),filepath=os.path.join(directory,'participants.json'),bids_root=directory)
                    # else just add the local path to the dataset
                    if num_sources == 0:
                        json_sidecar.add_attributes({Constants.PROV['Location']:"file:/" + os.path.join(directory,'participants.json')})


                # check if json_sidecar entity exists and if so associate assessment entity with it
                if 'json_sidecar' in  locals():
                    #connect json_entity with acq_entity
                    acq_entity.add_attributes({Constants.PROV["wasInfluencedBy"]:json_sidecar})

                for key,value in row.items():
                    if not value:
                        continue
                    #for variables in participants.tsv file who have term mappings in BIDS_Constants.py use those, add to json_map so we don't have to map these if user
                    #supplied arguments to map variables
                    if key in BIDS_Constants.participants:
                        # WIP
                        # Here we are adding to CDE graph data elements for BIDS Constants that remain fixed for each BIDS-compliant dataset

                        if not (BIDS_Constants.participants[key] == Constants.NIDM_SUBJECTID):


                            # create a namespace with the URL for fixed BIDS_Constants term
                            # item_ns = Namespace(str(Constants.BIDS.namespace.uri))
                            # add prefix to namespace which is the BIDS fixed variable name
                            # cde.bind(prefix="bids", namespace=item_ns)
                            # ID for BIDS variables is always the same bids:[bids variable]
                            cde_id = Constants.BIDS[key]
                            # add the data element to the CDE graph
                            cde.add((cde_id,RDF.type, Constants.NIDM['DataElement']))
                            cde.add((cde_id,RDF.type, Constants.PROV['Entity']))
                            # add some basic information about this data element
                            cde.add((cde_id,Constants.RDFS['label'],Literal(BIDS_Constants.participants[key].localpart)))
                            cde.add((cde_id,Constants.NIDM['isAbout'],URIRef(BIDS_Constants.participants[key].uri)))
                            cde.add((cde_id,Constants.NIDM['source_variable'],Literal(key)))
                            cde.add((cde_id,Constants.NIDM['description'],Literal("participant/subject identifier")))
                            cde.add((cde_id,Constants.RDFS['comment'],Literal("BIDS participants_id variable fixed in specification")))
                            cde.add((cde_id,Constants.RDFS['valueType'],URIRef(Constants.XSD["string"])))

                            acq_entity.add_attributes({cde_id:Literal(value)})

                        # if this was the participant_id, we already handled it above creating agent / qualified association
                        # if not (BIDS_Constants.participants[key] == Constants.NIDM_SUBJECTID):
                        #    acq_entity.add_attributes({BIDS_Constants.participants[key]:value})


                    # else if user added -mapvars flag to command line then we'll use the variable-> term mapping procedures to help user map variables to terms (also used
                    # in CSV2NIDM.py)
                    else:

                        # WIP: trying to add new support for CDEs...
                        add_attributes_with_cde(prov_object=acq_entity,cde=cde,row_variable=key,value=value)
                        # if key in column_to_terms:
                        #    acq_entity.add_attributes({QualifiedName(provNamespace(Core.safe_string(None,string=str(key)), column_to_terms[key]["url"]), ""):value})
                        # else:

                        #    acq_entity.add_attributes({Constants.BIDS[key.replace(" ", "_")]:value})


    # create acquisition objects for each scan for each subject

    # loop through all subjects in dataset
    for subject_id in bids_layout.get_subjects():
        logging.info("Converting subject: %s" %subject_id)
        # skip .git directories...added to support datalad datasets
        if subject_id.startswith("."):
            continue

        # check if there are a session numbers.  If so, store it in the session activity and create a new
        # sessions for these imaging acquisitions.  Because we don't know which imaging session the root
        # participants.tsv file data may be associated with we simply link the imaging acquisitions to different
        # sessions (i.e. the participants.tsv file goes into an AssessmentAcquisition and linked to a unique
        # sessions and the imaging acquisitions go into MRAcquisitions and has a unique session)
        imaging_sessions = bids_layout.get_sessions(subject=subject_id)
        # if session_dirs has entries then get any metadata about session and store in session activity

        # bids_layout.get(subject=subject_id,type='session',extensions='.tsv')
        # bids_layout.get(subject=subject_id,type='scans',extensions='.tsv')
        # bids_layout.get(extensions='.tsv',return_type='obj')

        # loop through each session if there is a sessions directory
        if len(imaging_sessions) > 0:
            for img_session in imaging_sessions:
                # create a new session
                ses = Session(project)
                # add session number as metadata
                ses.add_attributes({Constants.BIDS['session_number']:img_session})
                addimagingsessions(bids_layout=bids_layout,subject_id=subject_id,session=ses,participant=participant, directory=directory,img_session=img_session)
        # else we have no ses-* directories in the BIDS layout
        addimagingsessions(bids_layout=bids_layout,subject_id=subject_id,session=Session(project),participant=participant, directory=directory)



        # Added temporarily to support phenotype files
        # for each *.tsv / *.json file pair in the phenotypes directory
        # WIP: ADD VARIABLE -> TERM MAPPING HERE
        for tsv_file in glob.glob(os.path.join(directory,"phenotype","*.tsv")):
            # for now, open the TSV file, extract the row for this subject, store it in an acquisition object and link to
            # the associated JSON data dictionary file
            with open(tsv_file) as phenofile:
                pheno_data = csv.DictReader(phenofile, delimiter='\t')
                for row in pheno_data:
                    subjid = row['participant_id'].split("-")
                    if not subjid[1] == subject_id:
                        continue
                    else:
                        # add acquisition object
                        acq = AssessmentAcquisition(session=session[subjid[1]])
                        # add qualified association with person
                        acq.add_qualified_association(person=participant[subject_id]['person'],role=Constants.NIDM_PARTICIPANT)

                        acq_entity = AssessmentObject(acquisition=acq)



                        for key,value in row.items():
                            if not value:
                                continue
                            # we're using participant_id in NIDM in agent so don't add to assessment as a triple.
                            # BIDS phenotype files seem to have an index column with no column header variable name so skip those
                            if ((not key == "participant_id") and (key != "")):
                                # for now we're using a placeholder namespace for BIDS and simply the variable names as the concept IDs..
                                acq_entity.add_attributes({Constants.BIDS[key]:value})

                        # link TSV file
                        acq_entity.add_attributes({Constants.NIDM_FILENAME:getRelPathToBIDS(tsv_file,directory)})
                        #acq_entity.add_attributes({Constants.NIDM_FILENAME:tsv_file})

                        # if there are git annex sources for participants.tsv file then add them
                        num_sources=addGitAnnexSources(obj=acq_entity.get_uuid(),bids_root=directory)
                        # else just add the local path to the dataset
                        if num_sources == 0:
                            acq_entity.add_attributes({Constants.PROV['Location']:"file:/" + tsv_file})


                        # link associated JSON file if it exists
                        data_dict = os.path.join(directory,"phenotype",os.path.splitext(os.path.basename(tsv_file))[0]+ ".json")
                        if os.path.isfile(data_dict):
                            # if file exists, create a new entity and associate it with the appropriate activity  and a used relationship
                            # with the TSV-related entity
                            json_entity = AssessmentObject(acquisition=acq)
                            json_entity.add_attributes({PROV_TYPE:Constants.BIDS["sidecar_file"], Constants.NIDM_FILENAME:
                                getRelPathToBIDS(data_dict,directory)})

                            # add Git Annex Sources
                            # if there are git annex sources for participants.tsv file then add them
                            num_sources=addGitAnnexSources(obj=json_entity.get_uuid(),filepath=data_dict,bids_root=directory)
                            # else just add the local path to the dataset
                            if num_sources == 0:
                                json_entity.add_attributes({Constants.PROV['Location']:"file:/" + data_dict})

                            #connect json_entity with acq_entity
                            acq_entity.add_attributes({Constants.PROV["wasInfluencedBy"]:json_entity.get_uuid()})


    return project, cde
Ejemplo n.º 30
0
def makeTestFile(filename, params):
    global test_person_uuid, test_p2_subject_uuids

    nidm_project_name = params.get('NIDM_PROJECT_NAME',
                                   False) or "Project_name_sample"
    nidm_project_identifier = params.get('NIDM_PROJECT_IDENTIFIER',
                                         False) or 9610
    nidm_project2_identifier = params.get('NIDM_PROJECT_IDENTIFIER',
                                          False) or 550
    nidm_project_description = params.get(
        'NIDM_PROJECT_DESCRIPTION', False) or "1234356 Test investigation"
    project_uuid = params.get('PROJECT_UUID', False) or "_proj1"
    project_uuid2 = params.get('PROJECT2_UUID', False) or "_proj2"
    session_uuid = params.get('SESSION_UUID', False) or "_ses1"
    session_uuid2 = params.get('SESSION2_UUID', False) or "_ses2"
    p1kwargs = {
        Constants.NIDM_PROJECT_NAME: nidm_project_name,
        Constants.NIDM_PROJECT_IDENTIFIER: nidm_project_identifier,
        Constants.NIDM_PROJECT_DESCRIPTION: nidm_project_description
    }
    p2kwargs = {
        Constants.NIDM_PROJECT_NAME: nidm_project_name,
        Constants.NIDM_PROJECT_IDENTIFIER: nidm_project2_identifier,
        Constants.NIDM_PROJECT_DESCRIPTION: nidm_project_description
    }

    project = Project(uuid=project_uuid, attributes=p1kwargs)
    session = Session(uuid=session_uuid, project=project)
    acq = Acquisition(uuid="_acq1", session=session)
    acq2 = Acquisition(uuid="_acq2", session=session)
    acq3 = Acquisition(uuid="_acq2", session=session)

    person = acq.add_person(attributes=({Constants.NIDM_SUBJECTID: "a1_9999"}))
    test_person_uuid = (str(person.identifier)).replace("niiri:", "")

    acq.add_qualified_association(person=person,
                                  role=Constants.NIDM_PARTICIPANT)

    person2 = acq2.add_person(attributes=({
        Constants.NIDM_SUBJECTID: "a1_8888"
    }))
    acq2.add_qualified_association(person=person2,
                                   role=Constants.NIDM_PARTICIPANT)
    person3 = acq3.add_person(attributes=({
        Constants.NIDM_SUBJECTID: "a2_7777"
    }))
    acq2.add_qualified_association(person=person3,
                                   role=Constants.NIDM_PARTICIPANT)

    project2 = Project(uuid=project_uuid2, attributes=p2kwargs)
    session2 = Session(uuid=session_uuid2, project=project2)
    acq4 = Acquisition(uuid="_acq3", session=session2)
    acq5 = Acquisition(uuid="_acq4", session=session2)

    person4 = acq4.add_person(attributes=({
        Constants.NIDM_SUBJECTID: "a3_6666"
    }))
    acq4.add_qualified_association(person=person4,
                                   role=Constants.NIDM_PARTICIPANT)
    person5 = acq5.add_person(attributes=({
        Constants.NIDM_SUBJECTID: "a4_5555"
    }))
    acq5.add_qualified_association(person=person5,
                                   role=Constants.NIDM_PARTICIPANT)

    # now add some assessment instrument data
    addData(
        acq, {
            Constants.NIDM_AGE: 9,
            Constants.NIDM_HANDEDNESS: "R",
            Constants.NIDM_DIAGNOSIS: "Anxiety"
        })
    addData(
        acq2, {
            Constants.NIDM_AGE: 8,
            Constants.NIDM_HANDEDNESS: "L",
            Constants.NIDM_DIAGNOSIS: "ADHD"
        })
    addData(
        acq4, {
            Constants.NIDM_AGE: 7,
            Constants.NIDM_HANDEDNESS: "A",
            Constants.NIDM_DIAGNOSIS: "Depression"
        })
    addData(
        acq5, {
            Constants.NIDM_AGE: 6,
            Constants.NIDM_HANDEDNESS: "R",
            Constants.NIDM_DIAGNOSIS: "Depression"
        })

    test_p2_subject_uuids.append(
        (str(person4.identifier)).replace("niiri:", ""))
    test_p2_subject_uuids.append(
        (str(person5.identifier)).replace("niiri:", ""))

    with open("a.ttl", 'w') as f:
        f.write(project.graph.serialize(None, format='rdf', rdf_format='ttl'))
    with open("b.ttl", 'w') as f:
        f.write(project2.graph.serialize(None, format='rdf', rdf_format='ttl'))

    #create empty graph
    graph = Graph()
    for nidm_file in ("a.ttl", "b.ttl"):
        tmp = Graph()
        graph = graph + tmp.parse(nidm_file,
                                  format=util.guess_format(nidm_file))

    graph.serialize(filename, format='turtle')

    os.unlink("a.ttl")
    os.unlink("b.ttl")

    with open(filename, "r") as f:
        x = f.read()

    with open("./agent.ttl", "w") as f:
        f.write(x)