Beispiel #1
0
def create_descriptor():
    """
    Returns a simple but complete ISA-JSON 1.0 descriptor for illustration.
    """

    # Create an empty Investigation object and set some values to the
    # instance variables.

    investigation = Investigation()
    investigation.identifier = "1"
    investigation.title = "My Simple ISA Investigation"
    investigation.description = \
        "We could alternatively use the class constructor's parameters to " \
        "set some default values at the time of creation, however we " \
        "want to demonstrate how to use the object's instance variables " \
        "to set values."
    investigation.submission_date = "2016-11-03"
    investigation.public_release_date = "2016-11-03"

    # Create an empty Study object and set some values. The Study must have a
    # filename, otherwise when we serialize it to ISA-Tab we would not know
    # where to write it. We must also attach the study to the investigation
    # by adding it to the 'investigation' object's list of studies.

    study = Study(filename="s_study.txt")
    study.identifier = "1"
    study.title = "My ISA Study"
    study.description = \
        "Like with the Investigation, we could use the class constructor " \
        "to set some default values, but have chosen to demonstrate in this " \
        "example the use of instance variables to set initial values."
    study.submission_date = "2016-11-03"
    study.public_release_date = "2016-11-03"
    investigation.studies.append(study)

    # This is to show that ISA Comments can be used to annotate ISA objects, here ISA Study
    study.comments.append(Comment(name="Study Start Date", value="Sun"))

    # Some instance variables are typed with different objects and lists of
    # objects. For example, a Study can have a list of design descriptors.
    # A design descriptor is an Ontology Annotation describing the kind of
    # study at hand. Ontology Annotations should typically reference an
    # Ontology Source. We demonstrate a mix of using the class constructors
    # and setting values with instance variables. Note that the
    # OntologyAnnotation object 'intervention_design' links its 'term_source'
    # directly to the 'obi' object instance. To ensure the OntologySource
    # is encapsulated in the descriptor, it is added to a list of
    # 'ontology_source_references' in the Investigation object. The
    # 'intervention_design' object is then added to the list of
    # 'design_descriptors' held by the Study object.

    obi = OntologySource(name='OBI',
                         description="Ontology for Biomedical Investigations")
    investigation.ontology_source_references.append(obi)

    intervention_design = OntologyAnnotation(term_source=obi)
    intervention_design.term = "intervention design"
    intervention_design.term_accession = \
        "http://purl.obolibrary.org/obo/OBI_0000115"
    study.design_descriptors.append(intervention_design)

    # Other instance variables common to both Investigation and Study objects
    # include 'contacts' and 'publications', each with lists of corresponding
    # Person and Publication objects.

    contact = Person(first_name="Alice",
                     last_name="Robertson",
                     affiliation="University of Life",
                     roles=[OntologyAnnotation(term='submitter')])
    study.contacts.append(contact)
    publication = Publication(title="Experiments with Elephants",
                              author_list="A. Robertson, B. Robertson")
    publication.pubmed_id = "12345678"
    publication.status = OntologyAnnotation(term="published")
    study.publications.append(publication)

    # To create the study graph that corresponds to the contents of the study
    # table file (the s_*.txt file), we need to create a process sequence.
    # To do this we use the Process class and attach it to the Study object's
    # 'process_sequence' list instance variable. Each process must be linked
    # with a Protocol object that is attached to a Study object's 'protocols'
    # list instance variable. The sample collection Process object usually has
    # as input a Source material and as output a Sample material.

    # Here we create one Source material object and attach it to our study.

    source = Source(name='source_material')
    study.sources.append(source)

    # Then we create three Sample objects, with organism as H**o Sapiens, and
    # attach them to the study. We use the utility function
    # batch_create_material() to clone a prototype material object. The
    # function automatiaclly appends an index to the material name. In this
    # case, three samples will be created, with the names 'sample_material-0',
    # 'sample_material-1' and 'sample_material-2'.

    prototype_sample = Sample(name='sample_material', derives_from=[source])

    ncbitaxon = OntologySource(name='NCBITaxon', description="NCBI Taxonomy")
    investigation.ontology_source_references.append(ncbitaxon)

    characteristic_organism = Characteristic(
        category=OntologyAnnotation(term="Organism"),
        value=OntologyAnnotation(
            term="H**o Sapiens",
            term_source=ncbitaxon,
            term_accession="http://purl.bioontology.org/ontology/NCBITAXON/"
            "9606"))

    # Adding the description to the ISA Source Material:
    source.characteristics.append(characteristic_organism)
    study.sources.append(source)

    #declaring a new ontology and adding it to the list of resources used
    uberon = OntologySource(name='UBERON', description='Uber Anatomy Ontology')
    investigation.ontology_source_references.append(uberon)

    #preparing an ISA Characteristic object (~Material Property ) to annotate sample materials
    characteristic_organ = Characteristic(
        category=OntologyAnnotation(term="OrganismPart"),
        value=OntologyAnnotation(
            term="liver",
            term_source=uberon,
            term_accession="http://purl.bioontology.org/ontology/UBERON/"
            "123245"))

    prototype_sample.characteristics.append(characteristic_organ)

    study.samples = batch_create_materials(prototype_sample, n=3)
    # creates a batch of 3 samples

    # Now we create a single Protocol object that represents our sample
    # collection protocol, and attach it to the study object. Protocols must be
    # declared before we describe Processes, as a processing event of some sort
    # must execute some defined protocol. In the case of the class model,
    # Protocols should therefore be declared before Processes in order for the
    # Process to be linked to one.

    sample_collection_protocol = Protocol(
        name="sample collection",
        protocol_type=OntologyAnnotation(term="sample collection"))
    study.protocols.append(sample_collection_protocol)
    sample_collection_process = Process(
        executes_protocol=sample_collection_protocol)

    # adding a dummy Comment[] to ISA.protocol object
    study.protocols[0].comments.append(
        Comment(name="Study Start Date", value="Uranus"))
    study.protocols[0].comments.append(
        Comment(name="Study End Date", value="2017-08-11"))
    # checking that the ISA Protocool object has been modified
    # print(study.protocols[0])

    # Creation of an ISA Study Factor object
    f = StudyFactor(
        name="treatment['modality']",
        factor_type=OntologyAnnotation(term="treatment['modality']"))
    # testing serialization to ISA-TAB of Comments attached to ISA objects.
    f.comments.append(Comment(name="Study Start Date", value="Saturn"))
    f.comments.append(Comment(name="Study End Date", value="2039-12-12"))
    print(f.comments[0].name, "|", f.comments[0].value)

    # checking that the ISA Factor object has been modified
    study.factors.append(f)

    # Next, we link our materials to the Process. In this particular case, we
    # are describing a sample collection process that takes one source
    # material, and produces three different samples.
    #
    # (source_material)->(sample collection)->
    # [(sample_material-0), (sample_material-1), (sample_material-2)]

    for src in study.sources:
        sample_collection_process.inputs.append(src)
    for sam in study.samples:
        sample_collection_process.outputs.append(sam)

    # Finally, attach the finished Process object to the study
    # process_sequence. This can be done many times to describe multiple
    # sample collection events.

    study.process_sequence.append(sample_collection_process)

    #IMPORTANT: remember to populate the list of ontology categories used to annotation ISA Material in a Study:
    study.characteristic_categories.append(characteristic_organism.category)

    # Next, we build n Assay object and attach two protocols,
    # extraction and sequencing.

    assay = Assay(filename="a_assay.txt")
    extraction_protocol = Protocol(
        name='extraction',
        protocol_type=OntologyAnnotation(term="material extraction"))
    study.protocols.append(extraction_protocol)
    sequencing_protocol = Protocol(
        name='sequencing',
        protocol_type=OntologyAnnotation(term="material sequencing"))
    study.protocols.append(sequencing_protocol)

    # To build out assay graphs, we enumereate the samples from the
    # study-level, and for each sample we create an extraction process and
    # a sequencing process. The extraction process takes as input a sample
    # material, and produces an extract material. The sequencing process
    # takes the extract material and produces a data file. This will
    # produce three graphs, from sample material through to data, as follows:
    #
    # (sample_material-0)->(extraction)->(extract-0)->(sequencing)->
    # (sequenced-data-0)
    # (sample_material-1)->(extraction)->(extract-1)->(sequencing)->
    # (sequenced-data-1)
    # (sample_material-2)->(extraction)->(extract-2)->(sequencing)->
    # (sequenced-data-2)
    #
    # Note that the extraction processes and sequencing processes are
    # distinctly separate instances, where the three
    # graphs are NOT interconnected.

    for i, sample in enumerate(study.samples):

        # create an extraction process that executes the extraction protocol

        extraction_process = Process(executes_protocol=extraction_protocol)

        # extraction process takes as input a sample, and produces an extract
        # material as output

        extraction_process.inputs.append(sample)
        material = Material(name="extract-{}".format(i))
        material.type = "Extract Name"
        extraction_process.outputs.append(material)

        # create a sequencing process that executes the sequencing protocol

        sequencing_process = Process(executes_protocol=sequencing_protocol)
        sequencing_process.name = "assay-name-{}".format(i)
        sequencing_process.inputs.append(extraction_process.outputs[0])

        # Sequencing process usually has an output data file

        datafile = DataFile(filename="sequenced-data-{}".format(i),
                            label="Raw Data File",
                            generated_from=[sample])
        sequencing_process.outputs.append(datafile)

        # ensure Processes are linked
        plink(sequencing_process, extraction_process)

        # make sure the extract, data file, and the processes are attached to
        # the assay

        assay.samples.append(sample)
        assay.data_files.append(datafile)
        assay.other_material.append(material)
        assay.process_sequence.append(extraction_process)
        assay.process_sequence.append(sequencing_process)
        assay.measurement_type = OntologyAnnotation(term="gene sequencing")
        assay.technology_type = OntologyAnnotation(
            term="nucleotide sequencing")

    # attach the assay to the study
    study.assays.append(assay)

    import json
    from isatools.isajson import ISAJSONEncoder

    # To write JSON out, use the ISAJSONEncoder class with the json package
    # and use dump() or dumps(). Note that the extra parameters sort_keys,
    # indent and separators are to make the output more human-readable.

    return json.dumps(investigation,
                      cls=ISAJSONEncoder,
                      sort_keys=True,
                      indent=4,
                      separators=(',', ': '))
Beispiel #2
0
    def _exportISATAB(self, destinationPath, detailsDict):
        """
		Export the dataset's metadata to the directory *destinationPath* as ISATAB
		detailsDict should have the format:
		detailsDict = {
		    'investigation_identifier' : "i1",
		    'investigation_title' : "Give it a title",
		    'investigation_description' : "Add a description",
		    'investigation_submission_date' : "2016-11-03",
		    'investigation_public_release_date' : "2016-11-03",
		    'first_name' : "Noureddin",
		    'last_name' : "Sadawi",
		    'affiliation' : "University",
		    'study_filename' : "my_ms_study",
		    'study_material_type' : "Serum",
		    'study_identifier' : "s1",
		    'study_title' : "Give the study a title",
		    'study_description' : "Add study description",
		    'study_submission_date' : "2016-11-03",
		    'study_public_release_date' : "2016-11-03",
		    'assay_filename' : "my_ms_assay"
		}

		:param str destinationPath: Path to a directory in which the output will be saved
		:param dict detailsDict: Contains several key, value pairs required to for ISATAB
		:raises IOError: If writing one of the files fails
		"""

        from isatools.model import Investigation, Study, Assay, OntologyAnnotation, OntologySource, Person, Publication, Protocol, Source
        from isatools.model import Comment, Sample, Characteristic, Process, Material, DataFile, ParameterValue, plink
        from isatools import isatab
        import isaExplorer as ie

        investigation = Investigation()

        investigation.identifier = detailsDict['investigation_identifier']
        investigation.title = detailsDict['investigation_title']
        investigation.description = detailsDict['investigation_description']
        investigation.submission_date = detailsDict[
            'investigation_submission_date']  #use today if not specified
        investigation.public_release_date = detailsDict[
            'investigation_public_release_date']
        study = Study(filename='s_' + detailsDict['study_filename'] + '.txt')
        study.identifier = detailsDict['study_identifier']
        study.title = detailsDict['study_title']
        study.description = detailsDict['study_description']
        study.submission_date = detailsDict['study_submission_date']
        study.public_release_date = detailsDict['study_public_release_date']
        investigation.studies.append(study)
        obi = OntologySource(
            name='OBI', description="Ontology for Biomedical Investigations")
        investigation.ontology_source_references.append(obi)
        intervention_design = OntologyAnnotation(term_source=obi)
        intervention_design.term = "intervention design"
        intervention_design.term_accession = "http://purl.obolibrary.org/obo/OBI_0000115"
        study.design_descriptors.append(intervention_design)

        # Other instance variables common to both Investigation and Study objects include 'contacts' and 'publications',
        # each with lists of corresponding Person and Publication objects.

        contact = Person(first_name=detailsDict['first_name'],
                         last_name=detailsDict['last_name'],
                         affiliation=detailsDict['affiliation'],
                         roles=[OntologyAnnotation(term='submitter')])
        study.contacts.append(contact)
        publication = Publication(title="Experiments with Data",
                                  author_list="Auther 1, Author 2")
        publication.pubmed_id = "12345678"
        publication.status = OntologyAnnotation(term="published")
        study.publications.append(publication)

        # To create the study graph that corresponds to the contents of the study table file (the s_*.txt file), we need
        # to create a process sequence. To do this we use the Process class and attach it to the Study object's
        # 'process_sequence' list instance variable. Each process must be linked with a Protocol object that is attached to
        # a Study object's 'protocols' list instance variable. The sample collection Process object usually has as input
        # a Source material and as output a Sample material.

        sample_collection_protocol = Protocol(
            id_="sample collection",
            name="sample collection",
            protocol_type=OntologyAnnotation(term="sample collection"))
        aliquoting_protocol = Protocol(
            id_="aliquoting",
            name="aliquoting",
            protocol_type=OntologyAnnotation(term="aliquoting"))

        for index, row in self.sampleMetadata.iterrows():
            src_name = row['Sample File Name']
            source = Source(name=src_name)

            source.comments.append(
                Comment(name='Study Name', value=row['Study']))
            study.sources.append(source)

            sample_name = src_name
            sample = Sample(name=sample_name, derives_from=[source])
            # check if field exists first
            status = row[
                'Status'] if 'Status' in self.sampleMetadata.columns else 'N/A'
            characteristic_material_type = Characteristic(
                category=OntologyAnnotation(term="material type"),
                value=status)
            sample.characteristics.append(characteristic_material_type)

            #characteristic_material_role = Characteristic(category=OntologyAnnotation(term="material role"), value=row['AssayRole'])
            #sample.characteristics.append(characteristic_material_role)

            # check if field exists first
            age = row['Age'] if 'Age' in self.sampleMetadata.columns else 'N/A'
            characteristic_age = Characteristic(
                category=OntologyAnnotation(term="Age"),
                value=age,
                unit='Year')
            sample.characteristics.append(characteristic_age)
            # check if field exists first
            gender = row[
                'Gender'] if 'Gender' in self.sampleMetadata.columns else 'N/A'
            characteristic_gender = Characteristic(
                category=OntologyAnnotation(term="Gender"), value=gender)
            sample.characteristics.append(characteristic_gender)

            ncbitaxon = OntologySource(name='NCBITaxon',
                                       description="NCBI Taxonomy")
            characteristic_organism = Characteristic(
                category=OntologyAnnotation(term="Organism"),
                value=OntologyAnnotation(
                    term="H**o Sapiens",
                    term_source=ncbitaxon,
                    term_accession=
                    "http://purl.bioontology.org/ontology/NCBITAXON/9606"))
            sample.characteristics.append(characteristic_organism)

            study.samples.append(sample)

            # check if field exists first
            sampling_date = row['Sampling Date'] if not pandas.isnull(
                row['Sampling Date']) else None
            sample_collection_process = Process(
                id_='sam_coll_proc',
                executes_protocol=sample_collection_protocol,
                date_=sampling_date)
            aliquoting_process = Process(id_='sam_coll_proc',
                                         executes_protocol=aliquoting_protocol,
                                         date_=sampling_date)

            sample_collection_process.inputs = [source]
            aliquoting_process.outputs = [sample]

            # links processes
            plink(sample_collection_process, aliquoting_process)

            study.process_sequence.append(sample_collection_process)
            study.process_sequence.append(aliquoting_process)

        study.protocols.append(sample_collection_protocol)
        study.protocols.append(aliquoting_protocol)

        ### Add NMR Assay ###
        nmr_assay = Assay(
            filename='a_' + detailsDict['assay_filename'] + '.txt',
            measurement_type=OntologyAnnotation(term="metabolite profiling"),
            technology_type=OntologyAnnotation(term="NMR spectroscopy"))
        extraction_protocol = Protocol(
            name='extraction',
            protocol_type=OntologyAnnotation(term="material extraction"))

        study.protocols.append(extraction_protocol)
        nmr_protocol = Protocol(
            name='NMR spectroscopy',
            protocol_type=OntologyAnnotation(term="NMR Assay"))
        nmr_protocol.add_param('Run Order')
        #if 'Instrument' in self.sampleMetadata.columns:
        nmr_protocol.add_param('Instrument')
        #if 'Sample Batch' in self.sampleMetadata.columns:
        nmr_protocol.add_param('Sample Batch')
        nmr_protocol.add_param('Acquisition Batch')

        study.protocols.append(nmr_protocol)

        #for index, row in sampleMetadata.iterrows():
        for index, sample in enumerate(study.samples):
            row = self.sampleMetadata.loc[
                self.sampleMetadata['Sample File Name'].astype(
                    str) == sample.name]
            # create an extraction process that executes the extraction protocol
            extraction_process = Process(executes_protocol=extraction_protocol)

            # extraction process takes as input a sample, and produces an extract material as output
            sample_name = sample.name
            sample = Sample(name=sample_name, derives_from=[source])
            #print(row['Acquired Time'].values[0])

            extraction_process.inputs.append(sample)
            material = Material(name="extract-{}".format(index))
            material.type = "Extract Name"
            extraction_process.outputs.append(material)

            # create a ms process that executes the nmr protocol
            nmr_process = Process(executes_protocol=nmr_protocol,
                                  date_=datetime.isoformat(
                                      datetime.strptime(
                                          str(row['Acquired Time'].values[0]),
                                          '%Y-%m-%d %H:%M:%S')))

            nmr_process.name = "assay-name-{}".format(index)
            nmr_process.inputs.append(extraction_process.outputs[0])
            # nmr process usually has an output data file
            # check if field exists first
            assay_data_name = row['Assay data name'].values[
                0] if 'Assay data name' in self.sampleMetadata.columns else 'N/A'
            datafile = DataFile(filename=assay_data_name,
                                label="NMR Assay Name",
                                generated_from=[sample])
            nmr_process.outputs.append(datafile)

            #nmr_process.parameter_values.append(ParameterValue(category='Run Order',value=str(i)))
            nmr_process.parameter_values = [
                ParameterValue(category=nmr_protocol.get_param('Run Order'),
                               value=row['Run Order'].values[0])
            ]
            # check if field exists first
            instrument = row['Instrument'].values[
                0] if 'Instrument' in self.sampleMetadata.columns else 'N/A'
            nmr_process.parameter_values.append(
                ParameterValue(category=nmr_protocol.get_param('Instrument'),
                               value=instrument))
            # check if field exists first
            sbatch = row['Sample batch'].values[
                0] if 'Sample batch' in self.sampleMetadata.columns else 'N/A'
            nmr_process.parameter_values.append(
                ParameterValue(category=nmr_protocol.get_param('Sample Batch'),
                               value=sbatch))
            nmr_process.parameter_values.append(
                ParameterValue(
                    category=nmr_protocol.get_param('Acquisition Batch'),
                    value=row['Batch'].values[0]))

            # ensure Processes are linked forward and backward
            plink(extraction_process, nmr_process)
            # make sure the extract, data file, and the processes are attached to the assay
            nmr_assay.samples.append(sample)
            nmr_assay.data_files.append(datafile)
            nmr_assay.other_material.append(material)
            nmr_assay.process_sequence.append(extraction_process)
            nmr_assay.process_sequence.append(nmr_process)
            nmr_assay.measurement_type = OntologyAnnotation(
                term="metabolite profiling")
            nmr_assay.technology_type = OntologyAnnotation(
                term="NMR spectroscopy")

        # attach the assay to the study
        study.assays.append(nmr_assay)

        if os.path.exists(os.path.join(destinationPath,
                                       'i_Investigation.txt')):
            ie.appendStudytoISA(study, destinationPath)
        else:
            isatab.dump(isa_obj=investigation, output_path=destinationPath)
Beispiel #3
0
def create_descriptor():
    """
    Returns a simple but complete ISA-Tab 1.0 descriptor for illustration.
    """

    # Create an empty Investigation object and set some values to the instance
    # variables.

    investigation = Investigation()
    investigation.identifier = "i1"
    investigation.title = "My Simple ISA Investigation"
    investigation.description = \
        "We could alternatively use the class constructor's parameters to " \
        "set some default values at the time of creation, however we want " \
        "to demonstrate how to use the object's instance variables to " \
        "set values."
    investigation.submission_date = "2016-11-03"
    investigation.public_release_date = "2016-11-03"

    # Create an empty Study object and set some values. The Study must have a
    # filename, otherwise when we serialize it to ISA-Tab we would not know
    # where to write it. We must also attach the study to the investigation by
    # adding it to the 'investigation' object's list of studies.

    study = Study(filename="s_study.txt")
    study.identifier = "s1"
    study.title = "My ISA Study"
    study.description = \
        "Like with the Investigation, we could use the class constructor to " \
        "set some default values, but have chosen to demonstrate in this " \
        "example the use of instance variables to set initial values."
    study.submission_date = "2016-11-03"
    study.public_release_date = "2016-11-03"
    investigation.studies.append(study)

    # Some instance variables are typed with different objects and lists of
    # objects. For example, a Study can have a list of design descriptors. A
    # design descriptor is an Ontology Annotation describing the kind of study
    # at hand. Ontology Annotations should typically reference an Ontology
    # Source. We demonstrate a mix of using the class constructors and setting
    # values with instance variables. Note that the OntologyAnnotation object
    # 'intervention_design' links its 'term_source' directly to the 'obi'
    # object instance. To ensure the OntologySource is encapsulated in the
    # descriptor, it is added to a list of 'ontology_source_references' in
    # the Investigation object. The 'intervention_design' object is then
    # added to the list of 'design_descriptors' held by the Study object.

    obi = OntologySource(
        name='OBI',
        description="Ontology for Biomedical Investigations")
    investigation.ontology_source_references.append(obi)
    intervention_design = OntologyAnnotation(term_source=obi)
    intervention_design.term = "intervention design"
    intervention_design.term_accession = \
        "http://purl.obolibrary.org/obo/OBI_0000115"
    study.design_descriptors.append(intervention_design)

    # Other instance variables common to both Investigation and Study objects
    # include 'contacts' and 'publications', each with lists of corresponding
    # Person and Publication objects.

    contact = Person(
        first_name="Alice",
        last_name="Robertson",
        affiliation="University of Life",
        roles=[
            OntologyAnnotation(
                term='submitter')])
    study.contacts.append(contact)
    publication = Publication(
        title="Experiments with Elephants",
        author_list="A. Robertson, B. Robertson")
    publication.pubmed_id = "12345678"
    publication.status = OntologyAnnotation(term="published")
    study.publications.append(publication)

    # To create the study graph that corresponds to the contents of the study
    # table file (the s_*.txt file), we need to create a process sequence.
    # To do this we use the Process class and attach it to the Study object's
    # 'process_sequence' list instance variable. Each process must be linked
    # with a Protocol object that is attached to a Study object's 'protocols'
    # list instance variable. The sample collection Process object usually has
    # as input a Source material and as output a Sample material.

    # Here we create one Source material object and attach it to our study.

    source = Source(name='source_material')
    study.sources.append(source)

    # Then we create three Sample objects, with organism as H**o Sapiens, and
    # attach them to the study. We use the utility function
    # batch_create_material() to clone a prototype material object. The
    # function automatiaclly appends an index to the material name. In this
    # case, three samples will be created, with the names 'sample_material-0',
    # 'sample_material-1' and 'sample_material-2'.

    prototype_sample = Sample(name='sample_material', derives_from=[source])
    ncbitaxon = OntologySource(name='NCBITaxon', description="NCBI Taxonomy")
    characteristic_organism = Characteristic(
        category=OntologyAnnotation(term="Organism"),
        value=OntologyAnnotation(
            term="H**o Sapiens",
            term_source=ncbitaxon,
            term_accession="http://purl.bioontology.org/ontology/NCBITAXON/"
                           "9606"))
    prototype_sample.characteristics.append(characteristic_organism)

    study.samples = batch_create_materials(
        prototype_sample, n=3)  # creates a batch of 3 samples

    # Now we create a single Protocol object that represents our
    # sample collection protocol, and attach it to the study object. Protocols
    # must be declared before we describe Processes, as a processing event of
    # some sort must execute some defined protocol. In the case of the class
    # model, Protocols should therefore be declared before Processes in order
    # for the Process to be linked to one.

    sample_collection_protocol = Protocol(
        name="sample collection",
        protocol_type=OntologyAnnotation(term="sample collection"))
    study.protocols.append(sample_collection_protocol)
    sample_collection_process = Process(
        executes_protocol=sample_collection_protocol)

    # Next, we link our materials to the Process. In this particular case,
    # we are describing a sample collection process that takes one
    # source material, and produces three different samples.
    #
    # (source_material)->(sample collection)->[(sample_material-0),
    # (sample_material-1), (sample_material-2)]

    for src in study.sources:
        sample_collection_process.inputs.append(src)
    for sam in study.samples:
        sample_collection_process.outputs.append(sam)

    # Finally, attach the finished Process object to the study
    # process_sequence. This can be done many times to
    # describe multiple sample collection events.

    study.process_sequence.append(sample_collection_process)

    # Next, we build n Assay object and attach two protocols, extraction and
    # sequencing.

    assay = Assay(filename="a_assay.txt")
    extraction_protocol = Protocol(
        name='extraction',
        protocol_type=OntologyAnnotation(
            term="material extraction"))
    study.protocols.append(extraction_protocol)
    sequencing_protocol = Protocol(
        name='sequencing',
        protocol_type=OntologyAnnotation(
            term="material sequencing"))
    study.protocols.append(sequencing_protocol)

    # To build out assay graphs, we enumereate the samples from the
    # study-level, and for each sample we create an extraction process and a
    # sequencing process. The extraction process takes as input a
    # sample material, and produces an extract material. The sequencing
    # process takes the extract material and produces a data file. This will
    # produce three graphs, from sample material through to data, as follows:
    #
    # (sample_material-0)->(extraction)->(extract-0)->(sequencing)->
    # (sequenced-data-0)
    # (sample_material-1)->(extraction)->(extract-1)->(sequencing)->
    # (sequenced-data-1)
    # (sample_material-2)->(extraction)->(extract-2)->(sequencing)->
    # (sequenced-data-2)
    #
    # Note that the extraction processes and sequencing processes are
    # distinctly separate instances, where the three graphs are NOT
    # interconnected.

    for i, sample in enumerate(study.samples):

        # create an extraction process that executes the extraction protocol

        extraction_process = Process(executes_protocol=extraction_protocol)

        # extraction process takes as input a sample, and produces an extract
        # material as output

        extraction_process.inputs.append(sample)
        material = Material(name="extract-{}".format(i))
        material.type = "Extract Name"
        extraction_process.outputs.append(material)

        # create a sequencing process that executes the sequencing protocol

        sequencing_process = Process(executes_protocol=sequencing_protocol)
        sequencing_process.name = "assay-name-{}".format(i)
        sequencing_process.inputs.append(extraction_process.outputs[0])

        # Sequencing process usually has an output data file

        datafile = DataFile(
            filename="sequenced-data-{}".format(i),
            label="Raw Data File",
            generated_from=[sample])
        sequencing_process.outputs.append(datafile)

        # ensure Processes are linked forward and backward

        plink(extraction_process, sequencing_process)

        # make sure the extract, data file, and the processes are attached to
        # the assay

        assay.samples.append(sample)
        assay.data_files.append(datafile)
        assay.other_material.append(material)
        assay.process_sequence.append(extraction_process)
        assay.process_sequence.append(sequencing_process)
        assay.measurement_type = OntologyAnnotation(term="gene sequencing")
        assay.technology_type = OntologyAnnotation(
            term="nucleotide sequencing")

    # attach the assay to the study

    study.assays.append(assay)

    from isatools import isatab
    # dumps() writes out the ISA as a string representation of the ISA-Tab
    return isatab.dumps(investigation)
Beispiel #4
0
def convert(json_path, output_path):
    print(json_path)
    print(output_path)

    with open(json_path, 'r') as f:
        dcc_json = json.load(f)

    # print(array['protocol'])
    # for element in array['protocol']:
    #     array['protocol'][element]['id']
    #     array['protocol'][element]['description']
    #     array['protocol'][element]['type']
    #     array['protocol'][element]['filename']

    # for element in array['measurement']:
    #     print(array['measurement'][element]['corrected_mz'])

    # for element in array['subject']:
    #     print(array['subject'][element]['species'])

    # Building the Investigation Object and its elements:

    project_set_json = dcc_json.get('project')

    if len(project_set_json) == 0:
        raise IOError('No project found in input JSON')

    # print(next(iter(project_set_json)))
    project_json = next(iter(project_set_json.values()))
    investigation = Investigation(identifier=project_json['id'])

    obi = OntologySource(name='OBI',
                         description='Ontology for Biomedical Investigations')
    investigation.ontology_source_references.append(obi)

    inv_person = Person(
        first_name=project_json['PI_first_name'],
        last_name=project_json['PI_last_name'],
        email=project_json['PI_email'],
        address=project_json['address'],
        affiliation=(', '.join(
            [project_json['department'], project_json['institution']])),
        roles=[
            OntologyAnnotation(term="",
                               term_source=obi,
                               term_accession="http://purl.org/obo/OBI_1")
        ])
    investigation.contacts.append(inv_person)

    study_set_json = dcc_json.get('study')

    if len(study_set_json) > 0:
        study_json = next(iter(study_set_json.values()))

        study = Study(
            identifier=study_json['id'],
            title=study_json['title'],
            description=study_json['description'],
            design_descriptors=[
                OntologyAnnotation(term=study_json['type'],
                                   term_source=obi,
                                   term_accession="http://purl.org/obo/OBI_1")
            ],
            filename='s_{study_id}.txt'.format(study_id=study_json['id']))

        investigation.studies = [study]

        studyid = study_json['id']
        print(studyid)
        study_person = Person(
            first_name=study_json['PI_first_name'],
            last_name=study_json['PI_last_name'],
            email=study_json['PI_email'],
            address=study_json['address'],
            affiliation=(', '.join(
                [study_json['department'], study_json['institution']])),
            roles=[
                OntologyAnnotation(term='principal investigator',
                                   term_source=obi,
                                   term_accession="http://purl.org/obo/OBI_1")
            ])

        study.contacts.append(study_person)

        for factor_json in dcc_json['factor'].values():
            factor = StudyFactor(name=factor_json['id'])
            study.factors.append(factor)

        for i, protocol_json in enumerate(dcc_json['protocol'].values()):
            oat_p = protocol_json['type']
            oa_protocol_type = OntologyAnnotation(
                term=oat_p,
                term_source=obi,
                term_accession="http://purl.org/obo/OBI_1")
            study.protocols.append(
                Protocol(name=protocol_json['id'],
                         protocol_type=oa_protocol_type,
                         description=protocol_json['description'],
                         uri=protocol_json['filename']))

            if 'MS' in protocol_json['type']:
                study.assays.append(
                    Assay(measurement_type=OntologyAnnotation(
                        term='mass isotopologue distribution analysis',
                        term_source=obi,
                        term_accession="http://purl.org/obo/OBI_112"),
                          technology_type=OntologyAnnotation(
                              term='mass spectrometry',
                              term_source=obi,
                              term_accession="http://purl.org/obo/OBI_1"),
                          filename='a_assay_ms_{count}.txt'.format(count=i)))

            if 'NMR' in protocol_json['type']:
                study.assays.append(
                    Assay(measurement_type=OntologyAnnotation(
                        term='isotopomer analysis',
                        term_source=obi,
                        term_accession="http://purl.org/obo/OBI_111"),
                          technology_type=OntologyAnnotation(
                              term='nmr spectroscopy',
                              term_source=obi,
                              term_accession="http://purl.org/obo/OBI_1"),
                          filename='a_assay_nmr.txt'))

        for subject_json in dcc_json['subject'].values():

            # print(array['subject'][element])
            if "organism" in subject_json['type']:

                source = Source(name=subject_json['id'])

                ncbitaxon = OntologySource(name='NCBITaxon',
                                           description="NCBI Taxonomy")
                characteristic_organism = Characteristic(
                    category=OntologyAnnotation(term="Organism"),
                    value=OntologyAnnotation(
                        term=subject_json['species'],
                        term_source=ncbitaxon,
                        term_accession=
                        'http://purl.bioontology.org/ontology/NCBITAXON/9606'))
                source.characteristics.append(characteristic_organism)
                study.sources.append(source)

            elif 'tissue_slice' in subject_json['type']:
                # print(array['subject'][element]['type'])
                source = Source(name=subject_json['id'])
                study.sources.append(source)
                ncbitaxon = OntologySource(name='NCBITaxon',
                                           description="NCBI Taxonomy")
                characteristic_organism = Characteristic(
                    category=OntologyAnnotation(term="Organism"),
                    value=OntologyAnnotation(
                        term=subject_json['species'],
                        term_source=ncbitaxon,
                        term_accession=
                        'http://purl.bioontology.org/ontology/NCBITAXON/9606'))
                source.characteristics.append(characteristic_organism)

                sample = Sample(name=subject_json['id'],
                                derives_from=subject_json['parentID'])
                characteristic_organismpart = Characteristic(
                    category=OntologyAnnotation(term='organism_part'),
                    value=OntologyAnnotation(
                        term=subject_json['tissue_type'],
                        term_source=obi,
                        term_accession="http://purl.org/obo/OBI_1"))

                sample.characteristics.append(characteristic_organismpart)
                study.samples.append(sample)
                # print(study.samples[0].name)

                sample_collection_process = Process(
                    executes_protocol=study.get_prot(
                        subject_json['protocol.id']))
                sample_collection_process.inputs.append(source)
                sample_collection_process.outputs.append(sample)
                study.process_sequence.append(sample_collection_process)

            else:
                source = Source(name=subject_json['id'])

                ncbitaxon = OntologySource(name='NCBITaxon',
                                           description="NCBI Taxonomy")
                characteristic_organism = Characteristic(
                    category=OntologyAnnotation(term="Organism"),
                    value=OntologyAnnotation(
                        term=subject_json['species'],
                        term_source=ncbitaxon,
                        term_accession=
                        'http://purl.bioontology.org/ontology/NCBITAXON/9606'))
                source.characteristics.append(characteristic_organism)
                study.sources.append(source)
                print(subject_json['id'])
                print(subject_json['species'])
                print(subject_json['type'])
        # for src in investigation.studies[0].materials:
        #
        # for sam in investigation.studies[0].materials:

        for sample_json in dcc_json['sample'].values():

            if 'cells' in sample_json['type']:
                material_separation_process = Process(
                    executes_protocol=study.get_prot(
                        sample_json['protocol.id']))
                material_separation_process.name = sample_json['id']
                # dealing with input material, check that the parent material is already among known samples or sources

                if len([
                        x for x in study.samples
                        if x.name == sample_json['parentID']
                ]) == 0:
                    material_in = Sample(name=sample_json['parentID'])
                    material_separation_process.inputs.append(material_in)
                    study.assays[0].samples.append(material_in)
                else:
                    print([
                        x for x in study.samples
                        if x.name == sample_json['parentID']
                    ])
                    material_separation_process.inputs.append([
                        x for x in study.samples
                        if x.name == sample_json['parentID']
                    ][0])

                material_out = Sample(name=sample_json['id'])
                material_type = Characteristic(
                    category=OntologyAnnotation(term='material_type'),
                    value=OntologyAnnotation(
                        term=sample_json['type'],
                        term_source=obi,
                        term_accession="http://purl.org/obo/OBI_xxxxxxx"))
                material_out.characteristics.append(material_type)
                material_separation_process.outputs.append(material_out)
                study.assays[0].samples.append(material_out)
                try:
                    sample_collection_process
                except NameError:
                    sample_collection_process = None
                if sample_collection_process is None:
                    sample_collection_process = Process(executes_protocol="")
                else:
                    # plink(protein_extraction_process, data_acq_process)
                    # plink(material_separation_process, protein_extraction_process)

                    plink(sample_collection_process,
                          protein_extraction_process)

            if 'protein_extract' in sample_json['type']:
                protein_extraction_process = Process(
                    executes_protocol=study.get_prot(
                        sample_json['protocol.id']))
                protein_extraction_process.name = sample_json['id']

                if len([
                        x for x in study.samples
                        if x.name == sample_json['parentID']
                ]) == 0:
                    material_in = Sample(name=sample_json['parentID'])
                    protein_extraction_process.inputs.append(material_in)
                    study.assays[0].samples.append(material_in)
                else:
                    # print([x for x in study.samples if x.name == sample_json['parentID']])
                    protein_extraction_process.inputs.append(material_in)

                # for material_in in study.samples:
                #     # print("OHO:", material_in.name)
                #     if material_in.name == sample_json['parentID']:
                #         # print("C:",sample_json['parentID'])
                #         #no need to create, just link to process
                #         protein_extraction_process.inputs.append(x)
                #     else:
                #         # print("D:", sample_json['parentID'])
                #         #create new material and link
                #         material_in = Sample(name=sample_json['parentID'])
                #         protein_extraction_process.inputs.append(material_in)

                material_out = Material(name=sample_json['id'])
                material_out.type = "Extract Name"
                material_type = Characteristic(
                    category=OntologyAnnotation(term='material_type'),
                    value=OntologyAnnotation(
                        term=sample_json['type'],
                        term_source=obi,
                        term_accession="http://purl.org/obo/OBI_1"))
                material_out.characteristics.append(material_type)

                study.assays[0].samples.append(material_in)
                study.assays[0].materials['other_material'].append(material_in)
                try:
                    material_separation_process
                except NameError:
                    material_separation_process = None
                if material_separation_process is None:
                    material_separation_process = Process(executes_protocol="")
                else:
                    # plink(protein_extraction_process, data_acq_process)
                    plink(material_separation_process,
                          protein_extraction_process)

            if 'polar' in sample_json['type']:

                material_in = Material(name=sample_json['parentID'])
                material_type = Characteristic(
                    category=OntologyAnnotation(term='material_type',
                                                term_source=obi),
                    value=OntologyAnnotation(term=sample_json['type'],
                                             term_source=obi))
                material_in.characteristics.append(material_type)
                study.assays[0].materials['other_material'].append(material_in)

                data_acq_process = Process(executes_protocol=study.get_prot(
                    sample_json['protocol.id']))
                data_acq_process.name = sample_json['id']
                datafile = DataFile(
                    filename='{filename}.txt'.format(filename='_'.join(
                        ['mass_isotopomer-data', studyid, sample_json['id']])),
                    label='Raw Data File')
                data_acq_process.outputs.append(datafile)
                # print(study.assays[0].technology_type.term)

                study.assays[0].data_files.append(datafile)
                try:
                    protein_extraction_process
                except NameError:
                    protein_extraction_process = None
                if protein_extraction_process is None:
                    protein_extraction_process = Process(executes_protocol="")
                else:
                    plink(protein_extraction_process, data_acq_process)

            # else:
            #     material_in = Material(name=sample_json['parentID'])
            #     material_out = Material(name=sample_json['id'])
            #     material_type = Characteristic(
            #         category=OntologyAnnotation(term="material_type"),
            #         value=OntologyAnnotation(term=sample_json['type'],
            #                                  term_source=obi,
            #                                  term_accession="http://purl.org/obo/OBI_1"))
            #     material_out.characteristics.append(material_type)
            #     process = Process(executes_protocol=sample_json['protocol.id'])
            #     process.name = sample_json['id']
            #     process.inputs.append(material_in)
            #     process.outputs.append(material_out)
            #
            #     study.assays[0].materials['other_material'].append(material_in)
            #     study.assays[0].materials['other_material'].append(material_out)

            if 'bulk_tissue' in sample_json['type']:
                bulk_process = Process(executes_protocol=study.get_prot(
                    sample_json['protocol.id']))
                bulk_process.name = sample_json['id']

                if len([
                        x for x in study.samples
                        if x.name == sample_json['parentID']
                ]) == 0:
                    material_in = Sample(name=sample_json['parentID'])
                    bulk_process.inputs.append(material_in)
                    study.assays[0].samples.append(material_in)
                else:
                    # print([x for x in study.samples if x.name == sample_json['parentID']])
                    bulk_process.inputs.append(material_in)

                    plink(sample_collection_process, bulk_process)

    data_rec_header = '\t'.join(
        ('metabolite name', 'assignment', 'signal intensity', 'retention time',
         'm/z', 'formula', 'adduct', 'isotopologue', 'sample identifier'))
    records = []
    for element in dcc_json['measurement']:
        # metabolite_name: -> compound
        # array['measurement'][element]['signal_intensity']
        record = '\t'.join((dcc_json['measurement'][element]['compound'],
                            dcc_json['measurement'][element]['assignment'],
                            dcc_json['measurement'][element]['raw_intensity'],
                            dcc_json['measurement'][element]['retention_time'],
                            dcc_json['measurement'][element]['corrected_mz'],
                            dcc_json['measurement'][element]['formula'],
                            dcc_json['measurement'][element]['adduct'],
                            dcc_json['measurement'][element]['isotopologue'],
                            dcc_json['measurement'][element]['sample.id']))
        # print(record)
        records.append(record)

    if not os.path.exists(output_path):
        os.makedirs(output_path)
        try:
            with open(
                    '{output_path}/{study_id}-maf-data-nih-dcc-json.txt'.
                    format(output_path=output_path,
                           study_id=studyid), 'w') as fh:
                print(
                    "'writing 'maf file document' to file from 'generate_maf_file' method:..."
                )
                fh.writelines(data_rec_header)
                fh.writelines('\n')
                for item in records:
                    fh.writelines(item)
                    fh.writelines('\n')

            print("writing 'investigation information' to file...")
            print(isatab.dumps(investigation))

            isatab.dump(investigation, output_path=output_path)
        except IOError:
            print("Error: in main() method can't open file or write data")
Beispiel #5
0
def modify_investigation(fp):
    """Load, edit, and dump an ISA-Tab 1.0 descriptor."""

    # Load an existing ISA-Tab investigation file. In this example, we load an
    # unpopulated i_investigation.txt file
    investigation = load(fp, skip_load_tables=True)
    investigation.identifier = "i1"
    investigation.title = "My Simple ISA Investigation"
    investigation.description = \
        "We could alternatively use the class constructor's parameters to " \
        "set some default values at the time of creation, however we want " \
        "to demonstrate how to use the object's instance variables to set " \
        "values."
    investigation.submission_date = "2016-11-03"
    investigation.public_release_date = "2016-11-03"

    study = Study(filename="s_study.txt")
    study.identifier = "s1"
    study.title = "My ISA Study"
    study.description = \
        "Like with the Investigation, we could use the class constructor to " \
        "set some default values, but have chosen to demonstrate in this " \
        "example the use of instance variables to set initial values."
    study.submission_date = "2016-11-03"
    study.public_release_date = "2016-11-03"
    investigation.studies[0] = study

    obi = OntologySource(name='OBI',
                         description="Ontology for Biomedical Investigations")
    investigation.ontology_source_references.append(obi)
    intervention_design = OntologyAnnotation(term_source=obi)
    intervention_design.term = "intervention design"
    intervention_design.term_accession = \
        "http://purl.obolibrary.org/obo/OBI_0000115"
    study.design_descriptors.append(intervention_design)

    # Other instance variables common to both Investigation and Study objects
    # include 'contacts' and 'publications' each with lists of corresponding
    # Person and Publication objects.

    contact = Person(first_name="Alice",
                     last_name="Robertson",
                     affiliation="University of Life",
                     roles=[OntologyAnnotation(term='submitter')])
    study.contacts.append(contact)
    publication = Publication(title="Experiments with Elephants",
                              author_list="A. Robertson, B. Robertson")
    publication.pubmed_id = "12345678"
    publication.status = OntologyAnnotation(term="published")
    study.publications.append(publication)

    source = Source(name='source_material')
    study.sources.append(source)

    prototype_sample = Sample(name='sample_material', derives_from=[source])
    ncbitaxon = OntologySource(name='NCBITaxon', description="NCBI Taxonomy")
    characteristic_organism = Characteristic(
        category=OntologyAnnotation(term="Organism"),
        value=OntologyAnnotation(
            term="H**o Sapiens",
            term_source=ncbitaxon,
            term_accession="http://purl.bioontology.org/ontology/NCBITAXON/"
            "9606"))
    prototype_sample.characteristics.append(characteristic_organism)

    study.samples = batch_create_materials(prototype_sample, n=3)
    # creates a batch of 3 samples

    sample_collection_protocol = Protocol(
        name="sample collection",
        protocol_type=OntologyAnnotation(term="sample collection"))
    study.protocols.append(sample_collection_protocol)
    sample_collection_process = Process(
        executes_protocol=sample_collection_protocol)

    for src in study.sources:
        sample_collection_process.inputs.append(src)
    for sam in study.samples:
        sample_collection_process.outputs.append(sam)

    study.process_sequence.append(sample_collection_process)

    assay = Assay(filename="a_assay.txt")
    extraction_protocol = Protocol(
        name='extraction',
        protocol_type=OntologyAnnotation(term="material extraction"))
    study.protocols.append(extraction_protocol)
    sequencing_protocol = Protocol(
        name='sequencing',
        protocol_type=OntologyAnnotation(term="material sequencing"))
    study.protocols.append(sequencing_protocol)

    for i, sample in enumerate(study.samples):
        extraction_process = Process(executes_protocol=extraction_protocol)

        extraction_process.inputs.append(sample)
        material = Material(name="extract-{}".format(i))
        material.type = "Extract Name"
        extraction_process.outputs.append(material)

        sequencing_process = Process(executes_protocol=sequencing_protocol)
        sequencing_process.name = "assay-name-{}".format(i)
        sequencing_process.inputs.append(extraction_process.outputs[0])

        datafile = DataFile(filename="sequenced-data-{}".format(i),
                            label="Raw Data File")
        sequencing_process.outputs.append(datafile)

        extraction_process.next_process = sequencing_process
        sequencing_process.prev_process = extraction_process

        assay.samples.append(sample)
        assay.data_files.append(datafile)
        assay.other_material.append(material)
        assay.process_sequence.append(extraction_process)
        assay.process_sequence.append(sequencing_process)
        assay.measurement_type = OntologyAnnotation(term="gene sequencing")
        assay.technology_type = \
            OntologyAnnotation(term="nucleotide sequencing")

    study.assays.append(assay)

    # dumps() writes out the ISA as a string representation of the ISA-Tab,
    # but we are skipping writing tables
    return dumps(investigation, skip_dump_tables=True)