Esempio n. 1
0
def create_descriptor():
    """Returns a ISA-Tab descriptor using a simple sample plan for
    illustration."""
    investigation = Investigation(identifier='I1')
    plan = SampleAssayPlan()
    plan.add_sample_type('liver')
    plan.add_sample_plan_record('liver', 5)
    plan.add_sample_type('blood')
    plan.add_sample_plan_record('blood', 3)
    plan.group_size = 2
    f1 = StudyFactor(name='AGENT',
                     factor_type=OntologyAnnotation(term='pertubation agent'))
    f2 = StudyFactor(name='INTENSITY',
                     factor_type=OntologyAnnotation(term='intensity'))
    f3 = StudyFactor(name='DURATION',
                     factor_type=OntologyAnnotation(term='time'))
    treatment_factory = TreatmentFactory(factors=[f1, f2, f3])
    treatment_factory.add_factor_value(f1, {'cocaine', 'crack', 'aether'})
    treatment_factory.add_factor_value(f2, {'low', 'medium', 'high'})
    treatment_factory.add_factor_value(f3, {'short', 'long'})
    ffactorial_design_treatments = treatment_factory\
        .compute_full_factorial_design()
    treatment_sequence = TreatmentSequence(
        ranked_treatments=ffactorial_design_treatments)
    # treatment_factory.add_factor_value('intensity', 1.05)
    study = IsaModelObjectFactory(plan, treatment_sequence)\
        .create_study_from_plan()
    study.filename = 's_study.txt'
    investigation.studies = [study]
    print(isatab.dumps(investigation))
def create_isa_investigations(endpoint):
    """Create ISA investigations from a BrAPI endpoint, starting from the trials information"""
    investigations = []
    for trial in get_brapi_trials(endpoint):
        this_investigation = Investigation()
        this_investigation.identifier = trial['trialDbId']
        this_investigation.title = trial['trialName']
        # investigation.comments.append(Comment("Investigation Start Date", trial['startDate']))
        # investigation.comments.append(Comment("Investigation End Date", trial['endDate']))
        # investigation.comments.append(Comment("Active", trial['active']))

        for this_study in trial['studies']:
            this_study = create_isa_study(this_study['studyDbId'])
            this_investigation.studies.append(this_study)
            investigations.append(this_investigation)
    return investigations
Esempio n. 3
0
 def setUp(self):
     self.investigation = Investigation(identifier='I1')
     self.f1 = StudyFactor(
         name='AGENT',
         factor_type=OntologyAnnotation(term='pertubation agent'))
     self.f2 = StudyFactor(name='INTENSITY',
                           factor_type=OntologyAnnotation(term='intensity'))
     self.f3 = StudyFactor(name='DURATION',
                           factor_type=OntologyAnnotation(term='time'))
    def test_convert_study(self, client_mock):
        """Test conversion of BrAPI study to ISA study using mock data."""
        # Mock call to BrAPI study
        instance_mock = client_mock.return_value = mock.Mock()
        instance_mock.get_study.return_value = mock_data.mock_study
        instance_mock.get_study_observation_units.return_value = mock_data.mock_observation_units

        study_id = mock_data.mock_study['studyDbId']
        investigation = Investigation()

        # Call convert BrAPI study to ISA study
        (study, _) = self.converter.create_isa_study(study_id, investigation)

        # Assert
        assert instance_mock.get_study.called
        assert study is not None
        assert study.filename == f's_{study_id}.txt'
        assert len(study.assays) == 1
        assert study.assays[0].filename == f'a_{study_id}_default.txt'
Esempio n. 5
0
def create_descriptor():
    """
    Returns a simple but complete ISA-JSON 1.0 descriptor for illustration.
    """

    # Create an empty Investigation object and set some values to the
    # instance variables.

    investigation = Investigation()
    investigation.identifier = "1"
    investigation.title = "My Simple ISA Investigation"
    investigation.description = \
        "We could alternatively use the class constructor's parameters to " \
        "set some default values at the time of creation, however we " \
        "want to demonstrate how to use the object's instance variables " \
        "to set values."
    investigation.submission_date = "2016-11-03"
    investigation.public_release_date = "2016-11-03"

    # Create an empty Study object and set some values. The Study must have a
    # filename, otherwise when we serialize it to ISA-Tab we would not know
    # where to write it. We must also attach the study to the investigation
    # by adding it to the 'investigation' object's list of studies.

    study = Study(filename="s_study.txt")
    study.identifier = "1"
    study.title = "My ISA Study"
    study.description = \
        "Like with the Investigation, we could use the class constructor " \
        "to set some default values, but have chosen to demonstrate in this " \
        "example the use of instance variables to set initial values."
    study.submission_date = "2016-11-03"
    study.public_release_date = "2016-11-03"
    investigation.studies.append(study)

    # This is to show that ISA Comments can be used to annotate ISA objects, here ISA Study
    study.comments.append(Comment(name="Study Start Date", value="Sun"))

    # Some instance variables are typed with different objects and lists of
    # objects. For example, a Study can have a list of design descriptors.
    # A design descriptor is an Ontology Annotation describing the kind of
    # study at hand. Ontology Annotations should typically reference an
    # Ontology Source. We demonstrate a mix of using the class constructors
    # and setting values with instance variables. Note that the
    # OntologyAnnotation object 'intervention_design' links its 'term_source'
    # directly to the 'obi' object instance. To ensure the OntologySource
    # is encapsulated in the descriptor, it is added to a list of
    # 'ontology_source_references' in the Investigation object. The
    # 'intervention_design' object is then added to the list of
    # 'design_descriptors' held by the Study object.

    obi = OntologySource(name='OBI',
                         description="Ontology for Biomedical Investigations")
    investigation.ontology_source_references.append(obi)

    intervention_design = OntologyAnnotation(term_source=obi)
    intervention_design.term = "intervention design"
    intervention_design.term_accession = \
        "http://purl.obolibrary.org/obo/OBI_0000115"
    study.design_descriptors.append(intervention_design)

    # Other instance variables common to both Investigation and Study objects
    # include 'contacts' and 'publications', each with lists of corresponding
    # Person and Publication objects.

    contact = Person(first_name="Alice",
                     last_name="Robertson",
                     affiliation="University of Life",
                     roles=[OntologyAnnotation(term='submitter')])
    study.contacts.append(contact)
    publication = Publication(title="Experiments with Elephants",
                              author_list="A. Robertson, B. Robertson")
    publication.pubmed_id = "12345678"
    publication.status = OntologyAnnotation(term="published")
    study.publications.append(publication)

    # To create the study graph that corresponds to the contents of the study
    # table file (the s_*.txt file), we need to create a process sequence.
    # To do this we use the Process class and attach it to the Study object's
    # 'process_sequence' list instance variable. Each process must be linked
    # with a Protocol object that is attached to a Study object's 'protocols'
    # list instance variable. The sample collection Process object usually has
    # as input a Source material and as output a Sample material.

    # Here we create one Source material object and attach it to our study.

    source = Source(name='source_material')
    study.sources.append(source)

    # Then we create three Sample objects, with organism as H**o Sapiens, and
    # attach them to the study. We use the utility function
    # batch_create_material() to clone a prototype material object. The
    # function automatiaclly appends an index to the material name. In this
    # case, three samples will be created, with the names 'sample_material-0',
    # 'sample_material-1' and 'sample_material-2'.

    prototype_sample = Sample(name='sample_material', derives_from=[source])

    ncbitaxon = OntologySource(name='NCBITaxon', description="NCBI Taxonomy")
    investigation.ontology_source_references.append(ncbitaxon)

    characteristic_organism = Characteristic(
        category=OntologyAnnotation(term="Organism"),
        value=OntologyAnnotation(
            term="H**o Sapiens",
            term_source=ncbitaxon,
            term_accession="http://purl.bioontology.org/ontology/NCBITAXON/"
            "9606"))

    # Adding the description to the ISA Source Material:
    source.characteristics.append(characteristic_organism)
    study.sources.append(source)

    #declaring a new ontology and adding it to the list of resources used
    uberon = OntologySource(name='UBERON', description='Uber Anatomy Ontology')
    investigation.ontology_source_references.append(uberon)

    #preparing an ISA Characteristic object (~Material Property ) to annotate sample materials
    characteristic_organ = Characteristic(
        category=OntologyAnnotation(term="OrganismPart"),
        value=OntologyAnnotation(
            term="liver",
            term_source=uberon,
            term_accession="http://purl.bioontology.org/ontology/UBERON/"
            "123245"))

    prototype_sample.characteristics.append(characteristic_organ)

    study.samples = batch_create_materials(prototype_sample, n=3)
    # creates a batch of 3 samples

    # Now we create a single Protocol object that represents our sample
    # collection protocol, and attach it to the study object. Protocols must be
    # declared before we describe Processes, as a processing event of some sort
    # must execute some defined protocol. In the case of the class model,
    # Protocols should therefore be declared before Processes in order for the
    # Process to be linked to one.

    sample_collection_protocol = Protocol(
        name="sample collection",
        protocol_type=OntologyAnnotation(term="sample collection"))
    study.protocols.append(sample_collection_protocol)
    sample_collection_process = Process(
        executes_protocol=sample_collection_protocol)

    # adding a dummy Comment[] to ISA.protocol object
    study.protocols[0].comments.append(
        Comment(name="Study Start Date", value="Uranus"))
    study.protocols[0].comments.append(
        Comment(name="Study End Date", value="2017-08-11"))
    # checking that the ISA Protocool object has been modified
    # print(study.protocols[0])

    # Creation of an ISA Study Factor object
    f = StudyFactor(
        name="treatment['modality']",
        factor_type=OntologyAnnotation(term="treatment['modality']"))
    # testing serialization to ISA-TAB of Comments attached to ISA objects.
    f.comments.append(Comment(name="Study Start Date", value="Saturn"))
    f.comments.append(Comment(name="Study End Date", value="2039-12-12"))
    print(f.comments[0].name, "|", f.comments[0].value)

    # checking that the ISA Factor object has been modified
    study.factors.append(f)

    # Next, we link our materials to the Process. In this particular case, we
    # are describing a sample collection process that takes one source
    # material, and produces three different samples.
    #
    # (source_material)->(sample collection)->
    # [(sample_material-0), (sample_material-1), (sample_material-2)]

    for src in study.sources:
        sample_collection_process.inputs.append(src)
    for sam in study.samples:
        sample_collection_process.outputs.append(sam)

    # Finally, attach the finished Process object to the study
    # process_sequence. This can be done many times to describe multiple
    # sample collection events.

    study.process_sequence.append(sample_collection_process)

    #IMPORTANT: remember to populate the list of ontology categories used to annotation ISA Material in a Study:
    study.characteristic_categories.append(characteristic_organism.category)

    # Next, we build n Assay object and attach two protocols,
    # extraction and sequencing.

    assay = Assay(filename="a_assay.txt")
    extraction_protocol = Protocol(
        name='extraction',
        protocol_type=OntologyAnnotation(term="material extraction"))
    study.protocols.append(extraction_protocol)
    sequencing_protocol = Protocol(
        name='sequencing',
        protocol_type=OntologyAnnotation(term="material sequencing"))
    study.protocols.append(sequencing_protocol)

    # To build out assay graphs, we enumereate the samples from the
    # study-level, and for each sample we create an extraction process and
    # a sequencing process. The extraction process takes as input a sample
    # material, and produces an extract material. The sequencing process
    # takes the extract material and produces a data file. This will
    # produce three graphs, from sample material through to data, as follows:
    #
    # (sample_material-0)->(extraction)->(extract-0)->(sequencing)->
    # (sequenced-data-0)
    # (sample_material-1)->(extraction)->(extract-1)->(sequencing)->
    # (sequenced-data-1)
    # (sample_material-2)->(extraction)->(extract-2)->(sequencing)->
    # (sequenced-data-2)
    #
    # Note that the extraction processes and sequencing processes are
    # distinctly separate instances, where the three
    # graphs are NOT interconnected.

    for i, sample in enumerate(study.samples):

        # create an extraction process that executes the extraction protocol

        extraction_process = Process(executes_protocol=extraction_protocol)

        # extraction process takes as input a sample, and produces an extract
        # material as output

        extraction_process.inputs.append(sample)
        material = Material(name="extract-{}".format(i))
        material.type = "Extract Name"
        extraction_process.outputs.append(material)

        # create a sequencing process that executes the sequencing protocol

        sequencing_process = Process(executes_protocol=sequencing_protocol)
        sequencing_process.name = "assay-name-{}".format(i)
        sequencing_process.inputs.append(extraction_process.outputs[0])

        # Sequencing process usually has an output data file

        datafile = DataFile(filename="sequenced-data-{}".format(i),
                            label="Raw Data File",
                            generated_from=[sample])
        sequencing_process.outputs.append(datafile)

        # ensure Processes are linked
        plink(sequencing_process, extraction_process)

        # make sure the extract, data file, and the processes are attached to
        # the assay

        assay.samples.append(sample)
        assay.data_files.append(datafile)
        assay.other_material.append(material)
        assay.process_sequence.append(extraction_process)
        assay.process_sequence.append(sequencing_process)
        assay.measurement_type = OntologyAnnotation(term="gene sequencing")
        assay.technology_type = OntologyAnnotation(
            term="nucleotide sequencing")

    # attach the assay to the study
    study.assays.append(assay)

    import json
    from isatools.isajson import ISAJSONEncoder

    # To write JSON out, use the ISAJSONEncoder class with the json package
    # and use dump() or dumps(). Note that the extra parameters sort_keys,
    # indent and separators are to make the output more human-readable.

    return json.dumps(investigation,
                      cls=ISAJSONEncoder,
                      sort_keys=True,
                      indent=4,
                      separators=(',', ': '))
Esempio n. 6
0
    def _exportISATAB(self, destinationPath, detailsDict):
        """
		Export the dataset's metadata to the directory *destinationPath* as ISATAB
		detailsDict should have the format:
		detailsDict = {
		    'investigation_identifier' : "i1",
		    'investigation_title' : "Give it a title",
		    'investigation_description' : "Add a description",
		    'investigation_submission_date' : "2016-11-03",
		    'investigation_public_release_date' : "2016-11-03",
		    'first_name' : "Noureddin",
		    'last_name' : "Sadawi",
		    'affiliation' : "University",
		    'study_filename' : "my_ms_study",
		    'study_material_type' : "Serum",
		    'study_identifier' : "s1",
		    'study_title' : "Give the study a title",
		    'study_description' : "Add study description",
		    'study_submission_date' : "2016-11-03",
		    'study_public_release_date' : "2016-11-03",
		    'assay_filename' : "my_ms_assay"
		}

		:param str destinationPath: Path to a directory in which the output will be saved
		:param dict detailsDict: Contains several key, value pairs required to for ISATAB
		:raises IOError: If writing one of the files fails
		"""

        from isatools.model import Investigation, Study, Assay, OntologyAnnotation, OntologySource, Person, Publication, Protocol, Source
        from isatools.model import Comment, Sample, Characteristic, Process, Material, DataFile, ParameterValue, plink
        from isatools import isatab
        import isaExplorer as ie

        investigation = Investigation()

        investigation.identifier = detailsDict['investigation_identifier']
        investigation.title = detailsDict['investigation_title']
        investigation.description = detailsDict['investigation_description']
        investigation.submission_date = detailsDict[
            'investigation_submission_date']  #use today if not specified
        investigation.public_release_date = detailsDict[
            'investigation_public_release_date']
        study = Study(filename='s_' + detailsDict['study_filename'] + '.txt')
        study.identifier = detailsDict['study_identifier']
        study.title = detailsDict['study_title']
        study.description = detailsDict['study_description']
        study.submission_date = detailsDict['study_submission_date']
        study.public_release_date = detailsDict['study_public_release_date']
        investigation.studies.append(study)
        obi = OntologySource(
            name='OBI', description="Ontology for Biomedical Investigations")
        investigation.ontology_source_references.append(obi)
        intervention_design = OntologyAnnotation(term_source=obi)
        intervention_design.term = "intervention design"
        intervention_design.term_accession = "http://purl.obolibrary.org/obo/OBI_0000115"
        study.design_descriptors.append(intervention_design)

        # Other instance variables common to both Investigation and Study objects include 'contacts' and 'publications',
        # each with lists of corresponding Person and Publication objects.

        contact = Person(first_name=detailsDict['first_name'],
                         last_name=detailsDict['last_name'],
                         affiliation=detailsDict['affiliation'],
                         roles=[OntologyAnnotation(term='submitter')])
        study.contacts.append(contact)
        publication = Publication(title="Experiments with Data",
                                  author_list="Auther 1, Author 2")
        publication.pubmed_id = "12345678"
        publication.status = OntologyAnnotation(term="published")
        study.publications.append(publication)

        # To create the study graph that corresponds to the contents of the study table file (the s_*.txt file), we need
        # to create a process sequence. To do this we use the Process class and attach it to the Study object's
        # 'process_sequence' list instance variable. Each process must be linked with a Protocol object that is attached to
        # a Study object's 'protocols' list instance variable. The sample collection Process object usually has as input
        # a Source material and as output a Sample material.

        sample_collection_protocol = Protocol(
            id_="sample collection",
            name="sample collection",
            protocol_type=OntologyAnnotation(term="sample collection"))
        aliquoting_protocol = Protocol(
            id_="aliquoting",
            name="aliquoting",
            protocol_type=OntologyAnnotation(term="aliquoting"))

        for index, row in self.sampleMetadata.iterrows():
            src_name = row['Sample File Name']
            source = Source(name=src_name)

            source.comments.append(
                Comment(name='Study Name', value=row['Study']))
            study.sources.append(source)

            sample_name = src_name
            sample = Sample(name=sample_name, derives_from=[source])
            # check if field exists first
            status = row[
                'Status'] if 'Status' in self.sampleMetadata.columns else 'N/A'
            characteristic_material_type = Characteristic(
                category=OntologyAnnotation(term="material type"),
                value=status)
            sample.characteristics.append(characteristic_material_type)

            #characteristic_material_role = Characteristic(category=OntologyAnnotation(term="material role"), value=row['AssayRole'])
            #sample.characteristics.append(characteristic_material_role)

            # check if field exists first
            age = row['Age'] if 'Age' in self.sampleMetadata.columns else 'N/A'
            characteristic_age = Characteristic(
                category=OntologyAnnotation(term="Age"),
                value=age,
                unit='Year')
            sample.characteristics.append(characteristic_age)
            # check if field exists first
            gender = row[
                'Gender'] if 'Gender' in self.sampleMetadata.columns else 'N/A'
            characteristic_gender = Characteristic(
                category=OntologyAnnotation(term="Gender"), value=gender)
            sample.characteristics.append(characteristic_gender)

            ncbitaxon = OntologySource(name='NCBITaxon',
                                       description="NCBI Taxonomy")
            characteristic_organism = Characteristic(
                category=OntologyAnnotation(term="Organism"),
                value=OntologyAnnotation(
                    term="H**o Sapiens",
                    term_source=ncbitaxon,
                    term_accession=
                    "http://purl.bioontology.org/ontology/NCBITAXON/9606"))
            sample.characteristics.append(characteristic_organism)

            study.samples.append(sample)

            # check if field exists first
            sampling_date = row['Sampling Date'] if not pandas.isnull(
                row['Sampling Date']) else None
            sample_collection_process = Process(
                id_='sam_coll_proc',
                executes_protocol=sample_collection_protocol,
                date_=sampling_date)
            aliquoting_process = Process(id_='sam_coll_proc',
                                         executes_protocol=aliquoting_protocol,
                                         date_=sampling_date)

            sample_collection_process.inputs = [source]
            aliquoting_process.outputs = [sample]

            # links processes
            plink(sample_collection_process, aliquoting_process)

            study.process_sequence.append(sample_collection_process)
            study.process_sequence.append(aliquoting_process)

        study.protocols.append(sample_collection_protocol)
        study.protocols.append(aliquoting_protocol)

        ### Add NMR Assay ###
        nmr_assay = Assay(
            filename='a_' + detailsDict['assay_filename'] + '.txt',
            measurement_type=OntologyAnnotation(term="metabolite profiling"),
            technology_type=OntologyAnnotation(term="NMR spectroscopy"))
        extraction_protocol = Protocol(
            name='extraction',
            protocol_type=OntologyAnnotation(term="material extraction"))

        study.protocols.append(extraction_protocol)
        nmr_protocol = Protocol(
            name='NMR spectroscopy',
            protocol_type=OntologyAnnotation(term="NMR Assay"))
        nmr_protocol.add_param('Run Order')
        #if 'Instrument' in self.sampleMetadata.columns:
        nmr_protocol.add_param('Instrument')
        #if 'Sample Batch' in self.sampleMetadata.columns:
        nmr_protocol.add_param('Sample Batch')
        nmr_protocol.add_param('Acquisition Batch')

        study.protocols.append(nmr_protocol)

        #for index, row in sampleMetadata.iterrows():
        for index, sample in enumerate(study.samples):
            row = self.sampleMetadata.loc[
                self.sampleMetadata['Sample File Name'].astype(
                    str) == sample.name]
            # create an extraction process that executes the extraction protocol
            extraction_process = Process(executes_protocol=extraction_protocol)

            # extraction process takes as input a sample, and produces an extract material as output
            sample_name = sample.name
            sample = Sample(name=sample_name, derives_from=[source])
            #print(row['Acquired Time'].values[0])

            extraction_process.inputs.append(sample)
            material = Material(name="extract-{}".format(index))
            material.type = "Extract Name"
            extraction_process.outputs.append(material)

            # create a ms process that executes the nmr protocol
            nmr_process = Process(executes_protocol=nmr_protocol,
                                  date_=datetime.isoformat(
                                      datetime.strptime(
                                          str(row['Acquired Time'].values[0]),
                                          '%Y-%m-%d %H:%M:%S')))

            nmr_process.name = "assay-name-{}".format(index)
            nmr_process.inputs.append(extraction_process.outputs[0])
            # nmr process usually has an output data file
            # check if field exists first
            assay_data_name = row['Assay data name'].values[
                0] if 'Assay data name' in self.sampleMetadata.columns else 'N/A'
            datafile = DataFile(filename=assay_data_name,
                                label="NMR Assay Name",
                                generated_from=[sample])
            nmr_process.outputs.append(datafile)

            #nmr_process.parameter_values.append(ParameterValue(category='Run Order',value=str(i)))
            nmr_process.parameter_values = [
                ParameterValue(category=nmr_protocol.get_param('Run Order'),
                               value=row['Run Order'].values[0])
            ]
            # check if field exists first
            instrument = row['Instrument'].values[
                0] if 'Instrument' in self.sampleMetadata.columns else 'N/A'
            nmr_process.parameter_values.append(
                ParameterValue(category=nmr_protocol.get_param('Instrument'),
                               value=instrument))
            # check if field exists first
            sbatch = row['Sample batch'].values[
                0] if 'Sample batch' in self.sampleMetadata.columns else 'N/A'
            nmr_process.parameter_values.append(
                ParameterValue(category=nmr_protocol.get_param('Sample Batch'),
                               value=sbatch))
            nmr_process.parameter_values.append(
                ParameterValue(
                    category=nmr_protocol.get_param('Acquisition Batch'),
                    value=row['Batch'].values[0]))

            # ensure Processes are linked forward and backward
            plink(extraction_process, nmr_process)
            # make sure the extract, data file, and the processes are attached to the assay
            nmr_assay.samples.append(sample)
            nmr_assay.data_files.append(datafile)
            nmr_assay.other_material.append(material)
            nmr_assay.process_sequence.append(extraction_process)
            nmr_assay.process_sequence.append(nmr_process)
            nmr_assay.measurement_type = OntologyAnnotation(
                term="metabolite profiling")
            nmr_assay.technology_type = OntologyAnnotation(
                term="NMR spectroscopy")

        # attach the assay to the study
        study.assays.append(nmr_assay)

        if os.path.exists(os.path.join(destinationPath,
                                       'i_Investigation.txt')):
            ie.appendStudytoISA(study, destinationPath)
        else:
            isatab.dump(isa_obj=investigation, output_path=destinationPath)
Esempio n. 7
0
def create_from_plan_parameters(galaxy_parameters_file,
                                sample_assay_plans_file, study_info_file,
                                treatment_plans_file, target_dir):
    decoder = SampleAssayPlanDecoder()
    if galaxy_parameters_file:
        galaxy_parameters = json.load(galaxy_parameters_file)
        sample_and_assay_plans, study_info, treatment_plan_params = \
            map_galaxy_to_isa_create_json(galaxy_parameters)
        plan = decoder.load(io.StringIO(json.dumps(sample_and_assay_plans)))
    elif sample_assay_plans_file and study_info_file and treatment_plans_file:
        plan = decoder.load(sample_assay_plans_file)
        study_info = json.load(study_info_file)
        treatment_plan_params = json.load(treatment_plans_file)
    else:
        raise IOError('Wrong parameters provided')

    study_type = treatment_plan_params['study_type_cond']['study_type']
    if study_type != 'intervention':
        raise NotImplementedError('Only supports Intervention studies')

    single_or_multiple = treatment_plan_params['study_type_cond'][
        'one_or_more']['single_or_multiple']
    if single_or_multiple == 'multiple':
        raise NotImplementedError(
            'Multiple treatments not yet implemented. Please select Single')

    intervention_type = treatment_plan_params['study_type_cond'][
        'one_or_more']['intervention_type']['select_intervention_type']
    if intervention_type != 'chemical intervention':
        raise NotImplementedError(
            'Only Chemical Interventions supported at this time')

    treatment_factory = TreatmentFactory(
        intervention_type=INTERVENTIONS['CHEMICAL'], factors=BASE_FACTORS)
    agent_levels = treatment_plan_params['study_type_cond']['one_or_more'][
        'intervention_type']['agent'].split(',')
    for agent_level in agent_levels:
        treatment_factory.add_factor_value(BASE_FACTORS[0],
                                           agent_level.strip())
    dose_levels = treatment_plan_params['study_type_cond']['one_or_more'][
        'intervention_type']['intensity'].split(',')
    for dose_level in dose_levels:
        treatment_factory.add_factor_value(BASE_FACTORS[1], dose_level.strip())
    duration_of_exposure_levels = treatment_plan_params['study_type_cond'][
        'one_or_more']['intervention_type']['duration'].split(',')
    for duration_of_exposure_level in duration_of_exposure_levels:
        treatment_factory.add_factor_value(BASE_FACTORS[2],
                                           duration_of_exposure_level.strip())
    treatment_sequence = TreatmentSequence(
        ranked_treatments=treatment_factory.compute_full_factorial_design())
    isa_object_factory = IsaModelObjectFactory(plan, treatment_sequence)
    s = isa_object_factory.create_assays_from_plan()
    contact = Person()
    contact.affiliation = study_info['study_pi_affiliation']
    contact.last_name = study_info['study_pi_last_name']
    contact.email = study_info['study_pi_email']
    contact.first_name = study_info['study_pi_first_name']
    s.contacts = [contact]
    s.description = study_info['study_description']
    s.filename = 's_study.txt'
    s.title = 'ISA created {}'.format(datetime.datetime.now().isoformat())
    s.identifier = 'ISA-{}'.format(uuid.uuid4().hex[:8])

    i = Investigation()
    i.contacts = [contact]
    i.description = s.description
    i.title = s.title
    i.identifier = s.identifier

    i.studies = [s]
    isatab.dump(isa_obj=i,
                output_path=target_dir,
                i_file_name='i_investigation.txt')

    for assay in s.assays:
        for data_file in assay.data_files:
            data_file_path = os.path.join(target_dir, data_file.filename)
            with open(data_file_path, 'a'):
                os.utime(data_file_path, None)
Esempio n. 8
0
def create_from_galaxy_parameters(galaxy_parameters_file, target_dir):

    def _create_treatment_sequence(galaxy_parameters):
        treatment_plan = galaxy_parameters['treatment_plan']
        study_type = treatment_plan['study_type']['study_type_selector']
        log.debug(json.dumps(galaxy_parameters, indent=4))
        try:
            single_or_multiple = treatment_plan['study_type']['balance'][
                'multiple_interventions']
        except KeyError:
            single_or_multiple = \
                treatment_plan['study_type']['multiple_interventions'][
                    'multiple_interventions_selector']
        if single_or_multiple == 'multiple':
            raise NotImplementedError(
                'Multiple treatments not yet implemented. Please select Single')

        if study_type == 'full_factorial':
            intervention_type = \
            treatment_plan['study_type']['multiple_interventions'][
                'intervention_type']['intervention_type_selector']
            if intervention_type == 'chemical intervention':
                interventions = INTERVENTIONS['CHEMICAL']
            elif intervention_type == 'dietary intervention':
                interventions = INTERVENTIONS['DIET']
            elif intervention_type == 'behavioural intervention':
                interventions = INTERVENTIONS['BEHAVIOURAL']
            elif intervention_type == 'biological intervention':
                interventions = INTERVENTIONS['BIOLOGICAL']
            elif intervention_type == 'surgical intervention':
                interventions = INTERVENTIONS['SURGICAL']
            elif intervention_type == 'radiological intervention':  # not in tool yet
                interventions = INTERVENTIONS['RADIOLOGICAL']
            else:  # default to chemical
                interventions = INTERVENTIONS['CHEMICAL']
            treatment_factory = TreatmentFactory(
                intervention_type=interventions, factors=BASE_FACTORS)

            # Treatment Sequence
            agent_levels = \
            treatment_plan['study_type']['multiple_interventions'][
                'intervention_type']['agent'].split(',')
            for agent_level in agent_levels:
                treatment_factory.add_factor_value(BASE_FACTORS[0],
                                                   agent_level.strip())
            dose_levels = \
            treatment_plan['study_type']['multiple_interventions'][
                'intervention_type']['intensity'].split(',')
            for dose_level in dose_levels:
                treatment_factory.add_factor_value(BASE_FACTORS[1],
                                                   dose_level.strip())
            duration_of_exposure_levels = treatment_plan[
                'study_type']['multiple_interventions']['intervention_type'][
                'duration'].split(',')
            for duration_of_exposure_level in duration_of_exposure_levels:
                treatment_factory.add_factor_value(
                    BASE_FACTORS[2], duration_of_exposure_level.strip())
            treatment_sequence = TreatmentSequence(
                ranked_treatments=treatment_factory
                    .compute_full_factorial_design())
            group_size = int(
                galaxy_parameters['treatment_plan']['study_type'][
                    'multiple_interventions']['group_size'])
            for ranked_treatment in \
                    treatment_sequence.ranked_treatments:
                ranked_treatment[0].group_size = group_size
            return treatment_sequence

        elif study_type == 'fractional_factorial':
            intervention_type = \
                treatment_plan['study_type']['balance'][
                    'multiple_interventions']['intervention_type_selector']
            treatments = set()
            study_factors = [StudyFactor(name=x.strip()) for x in
                             treatment_plan['study_type'][
                                 'balance']['multiple_interventions'][
                                 'study_factors'].split(',')]
            for group in \
                    treatment_plan['study_type']['balance'][
                        'multiple_interventions']['study_groups']:
                factor_values = ()
                for x, y in zip(study_factors, [x.strip() for x in
                                                group['factor_values'].split(
                                                    ',')]):
                    factor_value = FactorValue(factor_name=x, value=y)
                    factor_values = factor_values + (factor_value,)
                if galaxy_parameters['treatment_plan']['study_type'][
                    'balance']['balanced_groups']:
                    group_size = int(
                        galaxy_parameters['treatment_plan']['study_type'][
                            'balance']['multiple_interventions']['group_size'])
                else:
                    group_size = int(group['group_size'])
                treatment = Treatment(treatment_type=intervention_type,
                    factor_values=factor_values, group_size=group_size)
                treatments.add(treatment)
            treatment_sequence = TreatmentSequence(ranked_treatments=treatments)
            return treatment_sequence

    def _create_sample_plan(sample_assay_plan, sample_plan_record):

        def _create_nmr_assay_type(assay_plan_record):
            nmr_assay_type = AssayType(
                measurement_type='metabolite profiling',
                technology_type='nmr spectroscopy')
            nmr_top_mods = NMRTopologyModifiers()
            nmr_top_mods.technical_replicates = assay_plan_record[
                'assay_type']['acquisition_mode']['technical_replicates']
            nmr_top_mods.acquisition_modes.add(
                assay_plan_record['assay_type'][
                    'acquisition_mode']['acquisition_mode_selector'])
            nmr_top_mods.instruments.add('{} {}'.format(
                assay_plan_record['assay_type'][
                    'acquisition_mode']['nmr_instrument'],
                assay_plan_record['assay_type']['acquisition_mode']['magnet']))
            nmr_top_mods.pulse_sequences.add(
                assay_plan_record['assay_type'][
                    'acquisition_mode']['pulse_sequence']
            )
            nmr_top_mods.magnet_power = \
                assay_plan_record['assay_type']['acquisition_mode']['magnet']
            nmr_assay_type.topology_modifiers = nmr_top_mods
            return nmr_assay_type

        def _create_ms_assay_type(assay_plan_record):
            ms_assay_type = AssayType(
                measurement_type='metabolite profiling',
                technology_type='mass spectrometry')
            ms_assay_type.topology_modifiers = MSTopologyModifiers(
                sample_fractions=set(map(
                    lambda x: x['sample_fraction'],
                    assay_plan_record['assay_type']['sample_fractions'])))
            injection_modes = ms_assay_type.topology_modifiers.injection_modes
            if len(assay_plan_record['assay_type']['injections']) > 0:
                for inj_mod in assay_plan_record['assay_type']['injections']:
                    injection_mode = MSInjectionMode(
                        injection_mode=inj_mod[
                            'injection_mode']['injection_mode_selector'],
                        ms_instrument=inj_mod['injection_mode']['instrument']
                    )
                    if inj_mod['injection_mode'][
                        'injection_mode_selector'] in ('LC', 'GC'):
                        injection_mode.chromatography_instrument = inj_mod[
                            'injection_mode']['chromatography_instrument']
                    if inj_mod[
                        'injection_mode']['injection_mode_selector'] == 'LC':
                        injection_mode.chromatography_column = inj_mod[
                            'injection_mode']['chromatography_column']
                    injection_modes.add(injection_mode)
                    for acq_mod in inj_mod['injection_mode']['acquisitions']:
                        injection_mode.acquisition_modes.add(
                            MSAcquisitionMode(
                                acquisition_method=acq_mod['acquisition_mode'],
                                technical_repeats=acq_mod[
                                    'technical_replicates']
                            )
                        )
                        if inj_mod['injection_mode'][
                            'injection_mode_selector'] == 'GC':
                            for deriva in inj_mod['injection_mode'][
                                    'derivatizations']:
                                derivatization = deriva['derivatization']
                                if re.match('(.*?) \((.*?)\)', derivatization):
                                    matches = next(iter(
                                        re.findall('(.*?) \((.*?)\)',
                                                   derivatization)))
                                    term, ontoid = matches[0], matches[1]
                                    source_name, accession_id = \
                                    ontoid.split(':')[0], \
                                    ontoid.split(':')[1]
                                    source = OntologySource(name=source_name)
                                    derivatization = OntologyAnnotation(
                                        term=term, term_source=source,
                                        term_accession=accession_id)
                                injection_mode.derivatizations.add(
                                    derivatization)
            return ms_assay_type

        if sample_plan_record['material_type'] == 'user defined':
            sample_type = sample_plan_record['material_type']['sample_type_ud']
        else:
            sample_type = sample_plan_record['material_type']
            if re.match('(.*?) \((.*?)\)', sample_type):
                matches = next(iter(re.findall('(.*?) \((.*?)\)', sample_type)))
                term, ontoid = matches[0], matches[1]
                source_name, accession_id = ontoid.split(':')[0], \
                                            ontoid.split(':')[1]
                source = OntologySource(name=source_name)
                sample_type = OntologyAnnotation(term=term, term_source=source,
                                                 term_accession=accession_id)
        sample_assay_plan.add_sample_type(sample_type)
        sample_size = sample_plan_record['sample_collections']
        sample_assay_plan.add_sample_plan_record(sample_type, sample_size)
        for assay_plan_record in sample_plan_record['assay_plans']:
            tt = assay_plan_record['assay_type']['assay_type_selector']
            if tt == 'nmr':
                assay_type = _create_nmr_assay_type(assay_plan_record)
            elif tt == 'ms':
                assay_type = _create_ms_assay_type(assay_plan_record)
            else:
                raise NotImplementedError('Only MS and NMR assays supported')
            sample_assay_plan.add_assay_type(assay_type)
            sample_assay_plan.add_assay_plan_record(sample_type, assay_type)
        return sample_assay_plan

    def _inject_qcqa_plan(sample_assay_plan, qcqa_record):
        qc_type = qcqa_record['qc_type']['qc_type_selector']
        if qc_type == 'interval_series':
            material_type = qcqa_record['material_type']
            if re.match('(.*?) \((.*?)\)', material_type):
                matches = next(iter(
                    re.findall('(.*?) \((.*?)\)', material_type)))
                term, ontoid = matches[0], matches[1]
                source_name, accession_id = ontoid.split(':')[0], \
                                            ontoid.split(':')[1]
                source = OntologySource(name=source_name)
                material_type = OntologyAnnotation(
                    term=term, term_source=source, term_accession=accession_id)
            sample_assay_plan.add_sample_qc_plan_record(
                material_type=material_type,
                injection_interval=qcqa_record[
                    'qc_type']['injection_frequency'])
        elif 'dilution_series' in qc_type:
            values = [int(x) for x in qcqa_record[
                'qc_type']['values'].split(',')]
            material_type = qcqa_record['material_type']
            if re.match('(.*?) \((.*?)\)', material_type):
                matches = next(iter(
                    re.findall('(.*?) \((.*?)\)', material_type)))
                term, ontoid = matches[0], matches[1]
                source_name, accession_id = ontoid.split(':')[0], \
                                            ontoid.split(':')[1]
                source = OntologySource(name=source_name)
                material_type = OntologyAnnotation(
                    term=term, term_source=source, term_accession=accession_id)
            batch = SampleQCBatch(material=material_type)
            for value in values:
                batch.characteristic_values.append(
                    Characteristic(category=OntologyAnnotation(
                        term='quantity'), value=value)
                )
            if 'pre' in qc_type:
                sample_assay_plan.pre_run_batch = batch
            elif 'post' in qc_type:
                sample_assay_plan.post_run_batch = batch
        else:
            raise NotImplementedError('QC type not recognized!')

        return sample_assay_plan

    # pre-generation checks
    if galaxy_parameters_file:
        galaxy_parameters = json.load(galaxy_parameters_file)
        log.debug(json.dumps(galaxy_parameters, indent=4))
    else:
        raise IOError('Could not load Galaxy parameters file!')
    if target_dir:
        if not os.path.exists(target_dir):
            raise IOError('Target path does not exist!')
    if len(galaxy_parameters['sample_and_assay_planning']['sample_plans']) == 0:
        raise IOError('No Sampling plan specified')

    treatment_sequence = _create_treatment_sequence(galaxy_parameters)
    sample_assay_plan = SampleAssayPlan()
    for sample_plan_record in galaxy_parameters['sample_and_assay_planning'][
            'sample_plans']:
        _ = _create_sample_plan(sample_assay_plan, sample_plan_record)
    for qcqa_record in galaxy_parameters['qc_planning']['qc_plans']:
        _ = _inject_qcqa_plan(sample_assay_plan, qcqa_record)
    try:
        sample_assay_plan.group_size = \
            int(galaxy_parameters['treatment_plan']['study_type'][
                'multiple_interventions']['group_size'])
    except KeyError:
        try:
            sample_assay_plan.group_size = \
                int(galaxy_parameters['treatment_plan']['study_type'][
                    'balance']['multiple_interventions']['group_size'])
        except KeyError:
            log.debug(
                'Group size not set for root plan as multiple intervention')
            sample_assay_plan.group_size = 0  # raises AttributeError

    study_info = galaxy_parameters['study_metadata']

    if len(sample_assay_plan.sample_plan) == 0:
        log.info('No sample plan defined')
    if len(sample_assay_plan.assay_plan) == 0:
        log.info('No assay plan defined')

    study_design = StudyDesign()
    study_design.add_single_sequence_plan(treatment_sequence, sample_assay_plan)
    isa_object_factory = IsaModelObjectFactory(study_design)
    if len(sample_assay_plan.sample_plan) == 0:
        s = Study()
    else:
        s = isa_object_factory.create_assays_from_plan()

    c = Person()
    c.affiliation = study_info.get('affiliation')
    c.last_name = study_info.get('last_name')
    c.email = study_info['email']
    c.first_name = study_info['first_name']
    s.contacts = [c]
    s.description = study_info['description']
    s.filename = 's_study.txt'
    s.title = study_info['title']
    s.identifier = 'ISA-{}'.format(uuid.uuid4().hex[:8])
    s.comments = [
        Comment(name='Consent Information (ICO:0000011)',
                value=study_info['study_consent']),
        Comment(name='Data Use Requirement (DUO:0000017)',
                value=study_info['study_use_condition'])
    ]
    i = Investigation()
    i.contacts = [c]
    i.description = ""
    i.title = "Investigation"
    i.identifier = s.identifier
    i.studies = [s]
    try:
        i.ontology_source_references = s.ontology_source_references
    except AttributeError:
        pass
    i.ontology_source_references.append(OntologySource(name='ICO'))
    i.ontology_source_references.append(OntologySource(name='DUO'))

    def sanitize_filename(filename):
        filename = str(filename).strip().replace(' ', '_')
        filename = re.sub(r'(?u)[^-\w.]', '_', filename)
        return filename

    i.filename = sanitize_filename(i.filename)
    for s in i.studies:
        s.filename = sanitize_filename(s.filename)
        for a in s.assays:
            a.filename = sanitize_filename(a.filename)

    isatab.dump(isa_obj=i, output_path=target_dir)
    if e.errno != errno.EEXIST:
        raise

# tdf_file = 'out/' + study_id
with open(directory + 't_' + study_id + '.txt', 'w') as tdf:
    for element in variable_records:
        print(element)
        tdf.write(element + '\n')

tdf.close()

# this is really slow and broken, so cheat for now!
# all_germplasm = ('Zea_VIB___1','Zea_VIB___2','Zea_VIB___3','Zea_VIB___4')

# Creating ISA objects
investigation = Investigation()
study = create_isa_study(study_id)
investigation.studies.append(study)

# for germplasm in germplasms:
#     # print("found germplasm:",germplasm["germplasmDbId"])
#     source = create_isa_source(germplasm)
#     # source1 = Source(name=germplasm["germplasmDbId"])
#     study.sources.append(source)
#     sample = Sample(germplasm["germplasmDbId"])
#     #isa_sources.append(source)
#     # print(source)
#     # study.materials['sources'].append(source)
#     study.materials['samples'].append(sample)
#     # print(sample)
#     sample_collection_protocol = Protocol(name="sample collection",
Esempio n. 10
0
def convert(json_path, output_path):
    print(json_path)
    print(output_path)

    with open(json_path, 'r') as f:
        dcc_json = json.load(f)

    # print(array['protocol'])
    # for element in array['protocol']:
    #     array['protocol'][element]['id']
    #     array['protocol'][element]['description']
    #     array['protocol'][element]['type']
    #     array['protocol'][element]['filename']

    # for element in array['measurement']:
    #     print(array['measurement'][element]['corrected_mz'])

    # for element in array['subject']:
    #     print(array['subject'][element]['species'])

    # Building the Investigation Object and its elements:

    project_set_json = dcc_json.get('project')

    if len(project_set_json) == 0:
        raise IOError('No project found in input JSON')

    # print(next(iter(project_set_json)))
    project_json = next(iter(project_set_json.values()))
    investigation = Investigation(identifier=project_json['id'])

    obi = OntologySource(name='OBI',
                         description='Ontology for Biomedical Investigations')
    investigation.ontology_source_references.append(obi)

    inv_person = Person(
        first_name=project_json['PI_first_name'],
        last_name=project_json['PI_last_name'],
        email=project_json['PI_email'],
        address=project_json['address'],
        affiliation=(', '.join(
            [project_json['department'], project_json['institution']])),
        roles=[
            OntologyAnnotation(term="",
                               term_source=obi,
                               term_accession="http://purl.org/obo/OBI_1")
        ])
    investigation.contacts.append(inv_person)

    study_set_json = dcc_json.get('study')

    if len(study_set_json) > 0:
        study_json = next(iter(study_set_json.values()))

        study = Study(
            identifier=study_json['id'],
            title=study_json['title'],
            description=study_json['description'],
            design_descriptors=[
                OntologyAnnotation(term=study_json['type'],
                                   term_source=obi,
                                   term_accession="http://purl.org/obo/OBI_1")
            ],
            filename='s_{study_id}.txt'.format(study_id=study_json['id']))

        investigation.studies = [study]

        studyid = study_json['id']
        print(studyid)
        study_person = Person(
            first_name=study_json['PI_first_name'],
            last_name=study_json['PI_last_name'],
            email=study_json['PI_email'],
            address=study_json['address'],
            affiliation=(', '.join(
                [study_json['department'], study_json['institution']])),
            roles=[
                OntologyAnnotation(term='principal investigator',
                                   term_source=obi,
                                   term_accession="http://purl.org/obo/OBI_1")
            ])

        study.contacts.append(study_person)

        for factor_json in dcc_json['factor'].values():
            factor = StudyFactor(name=factor_json['id'])
            study.factors.append(factor)

        for i, protocol_json in enumerate(dcc_json['protocol'].values()):
            oat_p = protocol_json['type']
            oa_protocol_type = OntologyAnnotation(
                term=oat_p,
                term_source=obi,
                term_accession="http://purl.org/obo/OBI_1")
            study.protocols.append(
                Protocol(name=protocol_json['id'],
                         protocol_type=oa_protocol_type,
                         description=protocol_json['description'],
                         uri=protocol_json['filename']))

            if 'MS' in protocol_json['type']:
                study.assays.append(
                    Assay(measurement_type=OntologyAnnotation(
                        term='mass isotopologue distribution analysis',
                        term_source=obi,
                        term_accession="http://purl.org/obo/OBI_112"),
                          technology_type=OntologyAnnotation(
                              term='mass spectrometry',
                              term_source=obi,
                              term_accession="http://purl.org/obo/OBI_1"),
                          filename='a_assay_ms_{count}.txt'.format(count=i)))

            if 'NMR' in protocol_json['type']:
                study.assays.append(
                    Assay(measurement_type=OntologyAnnotation(
                        term='isotopomer analysis',
                        term_source=obi,
                        term_accession="http://purl.org/obo/OBI_111"),
                          technology_type=OntologyAnnotation(
                              term='nmr spectroscopy',
                              term_source=obi,
                              term_accession="http://purl.org/obo/OBI_1"),
                          filename='a_assay_nmr.txt'))

        for subject_json in dcc_json['subject'].values():

            # print(array['subject'][element])
            if "organism" in subject_json['type']:

                source = Source(name=subject_json['id'])

                ncbitaxon = OntologySource(name='NCBITaxon',
                                           description="NCBI Taxonomy")
                characteristic_organism = Characteristic(
                    category=OntologyAnnotation(term="Organism"),
                    value=OntologyAnnotation(
                        term=subject_json['species'],
                        term_source=ncbitaxon,
                        term_accession=
                        'http://purl.bioontology.org/ontology/NCBITAXON/9606'))
                source.characteristics.append(characteristic_organism)
                study.sources.append(source)

            elif 'tissue_slice' in subject_json['type']:
                # print(array['subject'][element]['type'])
                source = Source(name=subject_json['id'])
                study.sources.append(source)
                ncbitaxon = OntologySource(name='NCBITaxon',
                                           description="NCBI Taxonomy")
                characteristic_organism = Characteristic(
                    category=OntologyAnnotation(term="Organism"),
                    value=OntologyAnnotation(
                        term=subject_json['species'],
                        term_source=ncbitaxon,
                        term_accession=
                        'http://purl.bioontology.org/ontology/NCBITAXON/9606'))
                source.characteristics.append(characteristic_organism)

                sample = Sample(name=subject_json['id'],
                                derives_from=subject_json['parentID'])
                characteristic_organismpart = Characteristic(
                    category=OntologyAnnotation(term='organism_part'),
                    value=OntologyAnnotation(
                        term=subject_json['tissue_type'],
                        term_source=obi,
                        term_accession="http://purl.org/obo/OBI_1"))

                sample.characteristics.append(characteristic_organismpart)
                study.samples.append(sample)
                # print(study.samples[0].name)

                sample_collection_process = Process(
                    executes_protocol=study.get_prot(
                        subject_json['protocol.id']))
                sample_collection_process.inputs.append(source)
                sample_collection_process.outputs.append(sample)
                study.process_sequence.append(sample_collection_process)

            else:
                source = Source(name=subject_json['id'])

                ncbitaxon = OntologySource(name='NCBITaxon',
                                           description="NCBI Taxonomy")
                characteristic_organism = Characteristic(
                    category=OntologyAnnotation(term="Organism"),
                    value=OntologyAnnotation(
                        term=subject_json['species'],
                        term_source=ncbitaxon,
                        term_accession=
                        'http://purl.bioontology.org/ontology/NCBITAXON/9606'))
                source.characteristics.append(characteristic_organism)
                study.sources.append(source)
                print(subject_json['id'])
                print(subject_json['species'])
                print(subject_json['type'])
        # for src in investigation.studies[0].materials:
        #
        # for sam in investigation.studies[0].materials:

        for sample_json in dcc_json['sample'].values():

            if 'cells' in sample_json['type']:
                material_separation_process = Process(
                    executes_protocol=study.get_prot(
                        sample_json['protocol.id']))
                material_separation_process.name = sample_json['id']
                # dealing with input material, check that the parent material is already among known samples or sources

                if len([
                        x for x in study.samples
                        if x.name == sample_json['parentID']
                ]) == 0:
                    material_in = Sample(name=sample_json['parentID'])
                    material_separation_process.inputs.append(material_in)
                    study.assays[0].samples.append(material_in)
                else:
                    print([
                        x for x in study.samples
                        if x.name == sample_json['parentID']
                    ])
                    material_separation_process.inputs.append([
                        x for x in study.samples
                        if x.name == sample_json['parentID']
                    ][0])

                material_out = Sample(name=sample_json['id'])
                material_type = Characteristic(
                    category=OntologyAnnotation(term='material_type'),
                    value=OntologyAnnotation(
                        term=sample_json['type'],
                        term_source=obi,
                        term_accession="http://purl.org/obo/OBI_xxxxxxx"))
                material_out.characteristics.append(material_type)
                material_separation_process.outputs.append(material_out)
                study.assays[0].samples.append(material_out)
                try:
                    sample_collection_process
                except NameError:
                    sample_collection_process = None
                if sample_collection_process is None:
                    sample_collection_process = Process(executes_protocol="")
                else:
                    # plink(protein_extraction_process, data_acq_process)
                    # plink(material_separation_process, protein_extraction_process)

                    plink(sample_collection_process,
                          protein_extraction_process)

            if 'protein_extract' in sample_json['type']:
                protein_extraction_process = Process(
                    executes_protocol=study.get_prot(
                        sample_json['protocol.id']))
                protein_extraction_process.name = sample_json['id']

                if len([
                        x for x in study.samples
                        if x.name == sample_json['parentID']
                ]) == 0:
                    material_in = Sample(name=sample_json['parentID'])
                    protein_extraction_process.inputs.append(material_in)
                    study.assays[0].samples.append(material_in)
                else:
                    # print([x for x in study.samples if x.name == sample_json['parentID']])
                    protein_extraction_process.inputs.append(material_in)

                # for material_in in study.samples:
                #     # print("OHO:", material_in.name)
                #     if material_in.name == sample_json['parentID']:
                #         # print("C:",sample_json['parentID'])
                #         #no need to create, just link to process
                #         protein_extraction_process.inputs.append(x)
                #     else:
                #         # print("D:", sample_json['parentID'])
                #         #create new material and link
                #         material_in = Sample(name=sample_json['parentID'])
                #         protein_extraction_process.inputs.append(material_in)

                material_out = Material(name=sample_json['id'])
                material_out.type = "Extract Name"
                material_type = Characteristic(
                    category=OntologyAnnotation(term='material_type'),
                    value=OntologyAnnotation(
                        term=sample_json['type'],
                        term_source=obi,
                        term_accession="http://purl.org/obo/OBI_1"))
                material_out.characteristics.append(material_type)

                study.assays[0].samples.append(material_in)
                study.assays[0].materials['other_material'].append(material_in)
                try:
                    material_separation_process
                except NameError:
                    material_separation_process = None
                if material_separation_process is None:
                    material_separation_process = Process(executes_protocol="")
                else:
                    # plink(protein_extraction_process, data_acq_process)
                    plink(material_separation_process,
                          protein_extraction_process)

            if 'polar' in sample_json['type']:

                material_in = Material(name=sample_json['parentID'])
                material_type = Characteristic(
                    category=OntologyAnnotation(term='material_type',
                                                term_source=obi),
                    value=OntologyAnnotation(term=sample_json['type'],
                                             term_source=obi))
                material_in.characteristics.append(material_type)
                study.assays[0].materials['other_material'].append(material_in)

                data_acq_process = Process(executes_protocol=study.get_prot(
                    sample_json['protocol.id']))
                data_acq_process.name = sample_json['id']
                datafile = DataFile(
                    filename='{filename}.txt'.format(filename='_'.join(
                        ['mass_isotopomer-data', studyid, sample_json['id']])),
                    label='Raw Data File')
                data_acq_process.outputs.append(datafile)
                # print(study.assays[0].technology_type.term)

                study.assays[0].data_files.append(datafile)
                try:
                    protein_extraction_process
                except NameError:
                    protein_extraction_process = None
                if protein_extraction_process is None:
                    protein_extraction_process = Process(executes_protocol="")
                else:
                    plink(protein_extraction_process, data_acq_process)

            # else:
            #     material_in = Material(name=sample_json['parentID'])
            #     material_out = Material(name=sample_json['id'])
            #     material_type = Characteristic(
            #         category=OntologyAnnotation(term="material_type"),
            #         value=OntologyAnnotation(term=sample_json['type'],
            #                                  term_source=obi,
            #                                  term_accession="http://purl.org/obo/OBI_1"))
            #     material_out.characteristics.append(material_type)
            #     process = Process(executes_protocol=sample_json['protocol.id'])
            #     process.name = sample_json['id']
            #     process.inputs.append(material_in)
            #     process.outputs.append(material_out)
            #
            #     study.assays[0].materials['other_material'].append(material_in)
            #     study.assays[0].materials['other_material'].append(material_out)

            if 'bulk_tissue' in sample_json['type']:
                bulk_process = Process(executes_protocol=study.get_prot(
                    sample_json['protocol.id']))
                bulk_process.name = sample_json['id']

                if len([
                        x for x in study.samples
                        if x.name == sample_json['parentID']
                ]) == 0:
                    material_in = Sample(name=sample_json['parentID'])
                    bulk_process.inputs.append(material_in)
                    study.assays[0].samples.append(material_in)
                else:
                    # print([x for x in study.samples if x.name == sample_json['parentID']])
                    bulk_process.inputs.append(material_in)

                    plink(sample_collection_process, bulk_process)

    data_rec_header = '\t'.join(
        ('metabolite name', 'assignment', 'signal intensity', 'retention time',
         'm/z', 'formula', 'adduct', 'isotopologue', 'sample identifier'))
    records = []
    for element in dcc_json['measurement']:
        # metabolite_name: -> compound
        # array['measurement'][element]['signal_intensity']
        record = '\t'.join((dcc_json['measurement'][element]['compound'],
                            dcc_json['measurement'][element]['assignment'],
                            dcc_json['measurement'][element]['raw_intensity'],
                            dcc_json['measurement'][element]['retention_time'],
                            dcc_json['measurement'][element]['corrected_mz'],
                            dcc_json['measurement'][element]['formula'],
                            dcc_json['measurement'][element]['adduct'],
                            dcc_json['measurement'][element]['isotopologue'],
                            dcc_json['measurement'][element]['sample.id']))
        # print(record)
        records.append(record)

    if not os.path.exists(output_path):
        os.makedirs(output_path)
        try:
            with open(
                    '{output_path}/{study_id}-maf-data-nih-dcc-json.txt'.
                    format(output_path=output_path,
                           study_id=studyid), 'w') as fh:
                print(
                    "'writing 'maf file document' to file from 'generate_maf_file' method:..."
                )
                fh.writelines(data_rec_header)
                fh.writelines('\n')
                for item in records:
                    fh.writelines(item)
                    fh.writelines('\n')

            print("writing 'investigation information' to file...")
            print(isatab.dumps(investigation))

            isatab.dump(investigation, output_path=output_path)
        except IOError:
            print("Error: in main() method can't open file or write data")
Esempio n. 11
0
def main(arg):
    """ Given a SERVER value (and BRAPI isa_study identifier), generates an ISA-Tab document"""

    client = BrapiClient(SERVER, logger)
    converter = BrapiToIsaConverter(logger, SERVER)

    # iterating through the trials held in a BRAPI server:
    # for trial in client.get_trials(TRIAL_IDS):
    for trial in get_trials(client):
        logger.info('we start from a set of Trials')
        investigation = Investigation()

        output_directory = get_output_path( trial['trialName'])
        logger.info("Generating output in : "+ output_directory)

        if 'contacts' in trial.keys():
            for brapicontact in trial['contacts']:
                #NOTE: brapi has just name atribute -> no seperate first/last name
                ContactName = brapicontact['name'].split(' ')
                contact = Person(first_name=ContactName[0], last_name=ContactName[1],
                affiliation=brapicontact['institutionName'], email=brapicontact['email'])
                investigation.contacts.append(contact)
        
        # iterating through the BRAPI studies associated to a given BRAPI trial:
        for brapi_study in trial['studies']:
            germplasminfo = {}
            #NOTE keeping track of germplasm info for data file generation
            brapi_study_id = brapi_study['studyDbId']
            obs_levels_in_study_and_var, obs_levels = converter.obtain_brapi_obs_levels_and_var(brapi_study_id)
            # NB: this method always create an ISA Assay Type
            isa_study, investigation = converter.create_isa_study(brapi_study_id, investigation, obs_levels_in_study_and_var.keys())
            investigation.studies.append(isa_study)

            # creating the main ISA protocols:
            sample_collection_protocol = Protocol(name="sample collection",
                                                  protocol_type=OntologyAnnotation(term="sample collection"))
            isa_study.protocols.append(sample_collection_protocol)

            # !!!: fix isatab.py to access other protocol_type values to enable Assay Tab serialization
            # TODO: see https://github.com/ISA-tools/isa-api/blob/master/isatools/isatab.py#L886
            phenotyping_protocol = Protocol(name="phenotyping",
                                            protocol_type=OntologyAnnotation(term="nucleic acid sequencing"))
            isa_study.protocols.append(phenotyping_protocol)

            # Getting the list of all germplasms used in the BRAPI isa_study:
            germplasms = client.get_study_germplasms(brapi_study_id)

            germ_counter = 0
            
            # Iterating through the germplasm considered as biosource,
            # For each of them, we retrieve their attributes and create isa characteristics
            for germ in germplasms:
                # print("GERM:", germ['germplasmName']) # germplasmDbId
                # WARNING: BRAPIv1 endpoints are not consistently using these
                # depending on endpoints, attributes may have to swapped
                # get_germplasm_chars(germ)
                # Creating corresponding ISA biosources with is Creating isa characteristics from germplasm attributes.
                # ------------------------------------------------------
                source = Source(name=germ['germplasmName'], characteristics=converter.create_germplasm_chars(germ))
                
                if germ['germplasmDbId'] not in germplasminfo:
                    germplasminfo[germ['germplasmDbId']] = [germ['accessionNumber']]

                # Associating ISA sources to ISA isa_study object
                isa_study.sources.append(source)

                germ_counter = germ_counter + 1

            # Now dealing with BRAPI observation units and attempting to create ISA samples
            create_study_sample_and_assay(client, brapi_study_id, isa_study,  sample_collection_protocol, phenotyping_protocol)

            # Writing isa_study to ISA-Tab format:
            # --------------------------------
            try:
                # isatools.isatab.dumps(investigation)  # dumps() writes out the ISA
                # !!!: fix isatab.py to access other protocol_type values to enable Assay Tab serialization
                # !!!: if Assay Table is missing the 'Assay Name' field, remember to check protocol_type used !!!
                isatab.dump(isa_obj=investigation, output_path=output_directory)
                logger.info('DONE!...')
            except IOError as ioe:
                logger.info('CONVERSION FAILED!...')
                logger.info(str(ioe))

            try:
                variable_records = converter.create_isa_tdf_from_obsvars(client.get_study_observed_variables(brapi_study_id))
                # Writing Trait Definition File:
                # ------------------------------
                write_records_to_file(this_study_id=str(brapi_study_id),
                                      this_directory=output_directory,
                                      records=variable_records,
                                      filetype="t_")
            except Exception as ioe:
                print(ioe)

            # Getting Variable Data and writing Measurement Data File
            # -------------------------------------------------------
            for level, variables in obs_levels_in_study_and_var.items():
                try:
                    obsvarlist = []
                    for i in client.get_study_observation_units(brapi_study_id):
                        obsvarlist.append(i)
                    data_readings = converter.create_isa_obs_data_from_obsvars(obsvarlist, list(variables) ,level, germplasminfo, obs_levels)
                    logger.debug("Generating data files")
                    write_records_to_file(this_study_id=str(brapi_study_id), this_directory=output_directory, records=data_readings,
                                        filetype="d_", ObservationLevel=level)
                except Exception as ioe:
                    print(ioe)
Esempio n. 12
0
def create_descriptor():
    """
    Returns a simple but complete ISA-Tab 1.0 descriptor for illustration.
    """

    # Create an empty Investigation object and set some values to the instance
    # variables.

    investigation = Investigation()
    investigation.identifier = "i1"
    investigation.title = "My Simple ISA Investigation"
    investigation.description = \
        "We could alternatively use the class constructor's parameters to " \
        "set some default values at the time of creation, however we want " \
        "to demonstrate how to use the object's instance variables to " \
        "set values."
    investigation.submission_date = "2016-11-03"
    investigation.public_release_date = "2016-11-03"

    # Create an empty Study object and set some values. The Study must have a
    # filename, otherwise when we serialize it to ISA-Tab we would not know
    # where to write it. We must also attach the study to the investigation by
    # adding it to the 'investigation' object's list of studies.

    study = Study(filename="s_study.txt")
    study.identifier = "s1"
    study.title = "My ISA Study"
    study.description = \
        "Like with the Investigation, we could use the class constructor to " \
        "set some default values, but have chosen to demonstrate in this " \
        "example the use of instance variables to set initial values."
    study.submission_date = "2016-11-03"
    study.public_release_date = "2016-11-03"
    investigation.studies.append(study)

    # Some instance variables are typed with different objects and lists of
    # objects. For example, a Study can have a list of design descriptors. A
    # design descriptor is an Ontology Annotation describing the kind of study
    # at hand. Ontology Annotations should typically reference an Ontology
    # Source. We demonstrate a mix of using the class constructors and setting
    # values with instance variables. Note that the OntologyAnnotation object
    # 'intervention_design' links its 'term_source' directly to the 'obi'
    # object instance. To ensure the OntologySource is encapsulated in the
    # descriptor, it is added to a list of 'ontology_source_references' in
    # the Investigation object. The 'intervention_design' object is then
    # added to the list of 'design_descriptors' held by the Study object.

    obi = OntologySource(
        name='OBI',
        description="Ontology for Biomedical Investigations")
    investigation.ontology_source_references.append(obi)
    intervention_design = OntologyAnnotation(term_source=obi)
    intervention_design.term = "intervention design"
    intervention_design.term_accession = \
        "http://purl.obolibrary.org/obo/OBI_0000115"
    study.design_descriptors.append(intervention_design)

    # Other instance variables common to both Investigation and Study objects
    # include 'contacts' and 'publications', each with lists of corresponding
    # Person and Publication objects.

    contact = Person(
        first_name="Alice",
        last_name="Robertson",
        affiliation="University of Life",
        roles=[
            OntologyAnnotation(
                term='submitter')])
    study.contacts.append(contact)
    publication = Publication(
        title="Experiments with Elephants",
        author_list="A. Robertson, B. Robertson")
    publication.pubmed_id = "12345678"
    publication.status = OntologyAnnotation(term="published")
    study.publications.append(publication)

    # To create the study graph that corresponds to the contents of the study
    # table file (the s_*.txt file), we need to create a process sequence.
    # To do this we use the Process class and attach it to the Study object's
    # 'process_sequence' list instance variable. Each process must be linked
    # with a Protocol object that is attached to a Study object's 'protocols'
    # list instance variable. The sample collection Process object usually has
    # as input a Source material and as output a Sample material.

    # Here we create one Source material object and attach it to our study.

    source = Source(name='source_material')
    study.sources.append(source)

    # Then we create three Sample objects, with organism as H**o Sapiens, and
    # attach them to the study. We use the utility function
    # batch_create_material() to clone a prototype material object. The
    # function automatiaclly appends an index to the material name. In this
    # case, three samples will be created, with the names 'sample_material-0',
    # 'sample_material-1' and 'sample_material-2'.

    prototype_sample = Sample(name='sample_material', derives_from=[source])
    ncbitaxon = OntologySource(name='NCBITaxon', description="NCBI Taxonomy")
    characteristic_organism = Characteristic(
        category=OntologyAnnotation(term="Organism"),
        value=OntologyAnnotation(
            term="H**o Sapiens",
            term_source=ncbitaxon,
            term_accession="http://purl.bioontology.org/ontology/NCBITAXON/"
                           "9606"))
    prototype_sample.characteristics.append(characteristic_organism)

    study.samples = batch_create_materials(
        prototype_sample, n=3)  # creates a batch of 3 samples

    # Now we create a single Protocol object that represents our
    # sample collection protocol, and attach it to the study object. Protocols
    # must be declared before we describe Processes, as a processing event of
    # some sort must execute some defined protocol. In the case of the class
    # model, Protocols should therefore be declared before Processes in order
    # for the Process to be linked to one.

    sample_collection_protocol = Protocol(
        name="sample collection",
        protocol_type=OntologyAnnotation(term="sample collection"))
    study.protocols.append(sample_collection_protocol)
    sample_collection_process = Process(
        executes_protocol=sample_collection_protocol)

    # Next, we link our materials to the Process. In this particular case,
    # we are describing a sample collection process that takes one
    # source material, and produces three different samples.
    #
    # (source_material)->(sample collection)->[(sample_material-0),
    # (sample_material-1), (sample_material-2)]

    for src in study.sources:
        sample_collection_process.inputs.append(src)
    for sam in study.samples:
        sample_collection_process.outputs.append(sam)

    # Finally, attach the finished Process object to the study
    # process_sequence. This can be done many times to
    # describe multiple sample collection events.

    study.process_sequence.append(sample_collection_process)

    # Next, we build n Assay object and attach two protocols, extraction and
    # sequencing.

    assay = Assay(filename="a_assay.txt")
    extraction_protocol = Protocol(
        name='extraction',
        protocol_type=OntologyAnnotation(
            term="material extraction"))
    study.protocols.append(extraction_protocol)
    sequencing_protocol = Protocol(
        name='sequencing',
        protocol_type=OntologyAnnotation(
            term="material sequencing"))
    study.protocols.append(sequencing_protocol)

    # To build out assay graphs, we enumereate the samples from the
    # study-level, and for each sample we create an extraction process and a
    # sequencing process. The extraction process takes as input a
    # sample material, and produces an extract material. The sequencing
    # process takes the extract material and produces a data file. This will
    # produce three graphs, from sample material through to data, as follows:
    #
    # (sample_material-0)->(extraction)->(extract-0)->(sequencing)->
    # (sequenced-data-0)
    # (sample_material-1)->(extraction)->(extract-1)->(sequencing)->
    # (sequenced-data-1)
    # (sample_material-2)->(extraction)->(extract-2)->(sequencing)->
    # (sequenced-data-2)
    #
    # Note that the extraction processes and sequencing processes are
    # distinctly separate instances, where the three graphs are NOT
    # interconnected.

    for i, sample in enumerate(study.samples):

        # create an extraction process that executes the extraction protocol

        extraction_process = Process(executes_protocol=extraction_protocol)

        # extraction process takes as input a sample, and produces an extract
        # material as output

        extraction_process.inputs.append(sample)
        material = Material(name="extract-{}".format(i))
        material.type = "Extract Name"
        extraction_process.outputs.append(material)

        # create a sequencing process that executes the sequencing protocol

        sequencing_process = Process(executes_protocol=sequencing_protocol)
        sequencing_process.name = "assay-name-{}".format(i)
        sequencing_process.inputs.append(extraction_process.outputs[0])

        # Sequencing process usually has an output data file

        datafile = DataFile(
            filename="sequenced-data-{}".format(i),
            label="Raw Data File",
            generated_from=[sample])
        sequencing_process.outputs.append(datafile)

        # ensure Processes are linked forward and backward

        plink(extraction_process, sequencing_process)

        # make sure the extract, data file, and the processes are attached to
        # the assay

        assay.samples.append(sample)
        assay.data_files.append(datafile)
        assay.other_material.append(material)
        assay.process_sequence.append(extraction_process)
        assay.process_sequence.append(sequencing_process)
        assay.measurement_type = OntologyAnnotation(term="gene sequencing")
        assay.technology_type = OntologyAnnotation(
            term="nucleotide sequencing")

    # attach the assay to the study

    study.assays.append(assay)

    from isatools import isatab
    # dumps() writes out the ISA as a string representation of the ISA-Tab
    return isatab.dumps(investigation)
Esempio n. 13
0
 def __init__(self):
     self.ISA = Investigation(studies=[Study()])
     self._idfdict = {}
     self._ts_dict = {}