def _copyFiles(self):
        """
        Downloads metadata and centroided imzML files to a temporary directory,
        then copies files to the S3 bucket, putting them into structure expected by sm-engine.
        """
        self._uploaded_to_s3 = {obj.key for obj in self._bucket.objects.filter(Prefix=self._study_rel_dir)}

        self._ftp = self._ftpConnection()
        filenames = []
        self._ftp.retrlines('NLST', filenames.append)

        for fn in filenames:
            if fn.endswith(".txt"):
                self._fetchFromFTP(fn)

        if self._parse_isatab:
            print("Parsing ISATab metadata")
            self._study = ip.parse(self._study_dir).studies[0]
            self._info = self._extractStudyMetadata(self._study)
        else:
            self._study = self._info = None

        print("Copying datasets to S3")
        targets = self._centroidedDatasets(filenames)
        self._jobs = self._createTasks(targets)

        print("Ready to submit jobs")
Exemple #2
0
    def convert(self, work_dir):
        """
            Convert an ISA-Tab dataset (version 1) to JSON provided the ISA
            model v1.0 JSON Schemas
            :param work_dir: directory containing the ISA-tab dataset
        """
        log.info("Converting ISA-Tab to ISA-JSON for %s", work_dir)

        isa_tab = parse(work_dir)

        if isa_tab is None:
            log.fatal("No ISA-Tab dataset found")
        else:
            isa_json = dict([])
            if isa_tab.metadata != {}:
                isa_json = dict([
                    ("identifier",
                     isa_tab.metadata['Investigation Identifier']),
                    ("title", isa_tab.metadata['Investigation Title']),
                    ("description",
                     isa_tab.metadata['Investigation Description']),
                    ("submissionDate",
                     isa_tab.metadata['Investigation Submission Date']),
                    ("publicReleaseDate",
                     isa_tab.metadata['Investigation Public Release Date']),
                    ("ontologySourceReferences",
                     self.createOntologySourceReferences(
                         isa_tab.ontology_refs)),
                    ("publications",
                     self.createPublications(isa_tab.publications,
                                             "Investigation")),
                    ("people",
                     self.createContacts(isa_tab.contacts, "Investigation")),
                    ("studies", self.createStudies(isa_tab.studies)),
                    ("comments", self.createComments(isa_tab.metadata))
                ])

            # validate json
            with open(join(SCHEMAS_PATH, INVESTIGATION_SCHEMA)) as json_fp:
                schema = json.load(json_fp)
                resolver = RefResolver(
                    'file://' + join(SCHEMAS_PATH, INVESTIGATION_SCHEMA),
                    schema)
                validator = Draft4Validator(schema, resolver=resolver)
                validator.validate(isa_json, schema)

                log.info("Conversion finished")
                return isa_json
Exemple #3
0
    def convert(self, work_dir, json_dir):
        """Convert an ISA-Tab dataset (version 1) to JSON provided the ISA model v1.0 JSON Schemas
            :param work_dir: directory containing the ISA-tab dataset
            :param json_dir: output directory where the resulting json file will be saved
        """
        print("Converting ISAtab to ISAjson for ", work_dir)

        isa_tab = parse(work_dir)
        #print(isa_tab)

        if isa_tab is None:
            print("No ISAtab dataset found")
        else:
                if isa_tab.metadata != {}:
                    #print("isa_tab.metadata->",isa_tab.metadata)
                    isa_json = dict([
                        ("identifier",isa_tab.metadata['Investigation Identifier']),
                        ("title", isa_tab.metadata['Investigation Title']),
                        ("description",isa_tab.metadata['Investigation Description']),
                        ("submissionDate", isa_tab.metadata['Investigation Submission Date']),
                        ("publicReleaseDate", isa_tab.metadata['Investigation Public Release Date']),
                        ("commentCreatedWithConfiguration", self.createComment('Created With Configuration',isa_tab.metadata['Comment[Created With Configuration]'])),
                        ("commentLastOpenedWithConfiguration", self.createComment('Last Opened With Configuration', isa_tab.metadata['Comment[Last Opened With Configuration]'])),
                        ("ontologySourceReferences", self.createOntologySourceReferences(isa_tab.ontology_refs)),
                        ("publications", self.createPublications(isa_tab.publications, "Investigation")),
                        ("people", self.createContacts(isa_tab.contacts, "Investigation")),
                        ("studies", self.createStudies(isa_tab.studies))
                    ])

                if (isa_tab.metadata['Investigation Identifier']):
                    file_name = os.path.join(json_dir,isa_tab.metadata['Investigation Identifier']+".json")
                else:
                    file_name = os.path.join(json_dir,isa_tab.studies[0].metadata['Study Identifier']+".json")

                #validate json
                schema = json.load(open(join(SCHEMAS_PATH, INVESTIGATION_SCHEMA)))
                resolver = RefResolver('file://'+join(SCHEMAS_PATH, INVESTIGATION_SCHEMA), schema)
                validator = Draft4Validator(schema, resolver=resolver)
                validator.validate(isa_json, schema)

                #TODO refactor saving the file into a separate method
                with open(file_name, "w") as outfile:
                    json.dump(isa_json, outfile, indent=4, sort_keys=True)
                    outfile.close()
                print("... conversion finished.")
                return isa_json
Exemple #4
0
    def convert(self, work_dir):
        """Convert an ISA-Tab dataset (version 1) to JSON provided the ISA model v1.0 JSON Schemas
            :param work_dir: directory containing the ISA-tab dataset
        """
        logger.info("Converting ISAtab to ISAjson for {}".format(work_dir))

        isa_tab = parse(work_dir)
        # print(isa_tab)

        if isa_tab is None:
            logger.fatal("No ISAtab dataset found")
        else:
            if isa_tab.metadata != {}:
                # print("isa_tab.metadata->",isa_tab.metadata)
                isa_json = dict(
                    [
                        ("identifier", isa_tab.metadata["Investigation Identifier"]),
                        ("title", isa_tab.metadata["Investigation Title"]),
                        ("description", isa_tab.metadata["Investigation Description"]),
                        ("submissionDate", isa_tab.metadata["Investigation Submission Date"]),
                        ("publicReleaseDate", isa_tab.metadata["Investigation Public Release Date"]),
                        ("ontologySourceReferences", self.createOntologySourceReferences(isa_tab.ontology_refs)),
                        ("publications", self.createPublications(isa_tab.publications, "Investigation")),
                        ("people", self.createContacts(isa_tab.contacts, "Investigation")),
                        ("studies", self.createStudies(isa_tab.studies)),
                        ("comments", self.createComments(isa_tab.metadata)),
                    ]
                )

            # validate json
            schema = json.load(open(join(SCHEMAS_PATH, INVESTIGATION_SCHEMA)))
            resolver = RefResolver("file://" + join(SCHEMAS_PATH, INVESTIGATION_SCHEMA), schema)
            validator = Draft4Validator(schema, resolver=resolver)
            validator.validate(isa_json, schema)

            logger.info("... conversion finished.")
            return isa_json
    def createCEDARjson(self, work_dir, json_dir, inv_identifier):
        print("Converting ISA to CEDAR model for ", work_dir)
        schema_file = "InvestigationSchema.json"
        schema = json.load(open(join(CEDAR_SCHEMA_PATH,schema_file)))
        resolver = RefResolver('file://'+join(CEDAR_SCHEMA_PATH, schema_file), schema)
        validator = Draft4Validator(schema, resolver=resolver)

        isa_tab = parse(work_dir)
        print("ISATab", isa_tab)

        if isa_tab is None:
            print("No ISAtab dataset found")
        else:
                if isa_tab.metadata != {}:
                    investigationObject = dict([
                        ("schemaID", "https://repo.metadatacenter.org/UUID"),
                        ("@id", "https://repo.metadatacenter.org/UUID/"+str(uuid4())),
                        ("@type", "https://repo.metadatacenter.org/model/Investigation"),
                        ("@context", dict(
                            [
                                ("model", "https://repo.metadatacenter.org/model/"),
                                ("xsd", "http://www.w3.org/2001/XMLSchema"),
                                ("schema", "https://schema.org/"),
                                ("title", "https://repo.metadatacenter.org/model/title"),
                                ("description", "https://repo.metadatacenter.org/model/description")
                            ]
                        )),
                        ("title", dict([ ("value", isa_tab.metadata['Investigation Title'])])),
                        ("description", dict([ ("value", isa_tab.metadata['Investigation Description'])])),
                        ("identifier", dict([ ("value", isa_tab.metadata['Investigation Identifier'])])),
                        ("submissionDate", dict([ ("value", isa_tab.metadata['Investigation Submission Date'])])),
                        ("publicReleaseDate", dict([ ("value", isa_tab.metadata['Investigation Public Release Date'])])),
                        ("hasStudy", self.createStudiesList(isa_tab.studies)),
                        ("hasContact", self.createInvestigationContactsList(isa_tab.contacts)),
                        ("hasPublication", self.createInvestigationPublicationsList(isa_tab.publications)),
                        ("provenance", dict([
                            ("wasGeneratedBy", "http://www.isa-tools.org"),
                            ("hadPrimarySource", self.primary_source)
                        ]))
                    ])
                else:
                    investigationObject = dict([
                        ("schemaID", "https://repo.metadatacenter.org/UUID"),
                        ("@id", "https://repo.metadatacenter.org/UUID"+str(uuid4())),
                        ("@type", "https://repo.metadatacenter.org/model/Investigation"),
                        ("@context", dict(
                            [
                                ("model", "https://repo.metadatacenter.org/model/"),
                                ("xsd", "http://www.w3.org/2001/XMLSchema"),
                                ("schema", "https://schema.org/"),
                                ("title", "https://repo.metadatacenter.org/model/title"),
                                ("description", "https://repo.metadatacenter.org/model/description")
                            ]
                        )),
                        ("title", dict([ ("value", "")])),
                        ("description", dict([ ("value", "")])),
                        ("identifier", dict([ ("value", "")])),
                        ("submissionDate", dict([ ("value", "")])),
                        ("publicReleaseDate", dict([ ("value", "")])),
                        ("hasStudy", self.createStudiesList(isa_tab.studies)),
                        ("hasContact", self.createInvestigationContactsList(isa_tab.contacts)),
                        ("hasPublication", self.createInvestigationPublicationsList(isa_tab.publications))
                    ])

                cedar_json = dict([
                    ("investigation",investigationObject)
                ])

                validator.validate(cedar_json, schema)

                #save output json
                if (inv_identifier):
                    file_name = os.path.join(json_dir,isa_tab.metadata['Investigation Identifier']+".json")
                else:
                    #print isa_tab.studies[0]
                    file_name = os.path.join(json_dir,isa_tab.studies[0].metadata['Study Identifier']+".json")
                with open(file_name, "w") as outfile:
                    json.dump(cedar_json, outfile, indent=4, sort_keys=True)
                    outfile.close()
                print("... conversion finished.")
Exemple #6
0
    def createCEDARjson(self, work_dir, json_dir, inv_identifier):
        log.info("Converting ISA to CEDAR model for ".format(work_dir))
        schema_file = "investigation_template.json"
        with open(join(CEDAR_SCHEMA_PATH, schema_file)) as json_fp:
            schema = json.load(json_fp)
        if schema is None:
            raise IOError("Could not load schema from {}".format(
                join(CEDAR_SCHEMA_PATH, schema_file)))
        resolver = RefResolver(
            'file://' + join(CEDAR_SCHEMA_PATH, schema_file), schema)
        validator = Draft4Validator(schema, resolver=resolver)

        isa_tab = isatab_parser.parse(work_dir)

        if isa_tab is None:
            log.info("No ISAtab dataset found")
        else:
            if isa_tab.metadata != {}:
                investigationObject = dict([
                    ("@id",
                     "https://repo.metadatacenter.org/UUID/" + str(uuid4())),
                    ("_templateId", "http://example.org"),
                    ("@type",
                     "https://repo.metadatacenter.org/model/Investigation"),
                    ("@context",
                     dict([
                         ("description",
                          "https://metadatacenter.org/schemas/description"),
                         ("title", "https://metadatacenter.org/schemas/title"),
                         ("study", "https://metadatacenter.org/schemas/study"),
                         ("submissionDate",
                          "https://metadatacenter.org/schemas/"
                          "submissionDate"),
                         ("_value", "https://schema.org/value"),
                         ("publicReleaseDate",
                          "https://metadatacenter.org/schemas/"
                          "publicReleaseDate"),
                         ("identifier",
                          "https://metadatacenter.org/schemas/identifier")
                     ])),
                    ("title",
                     dict([("_value", isa_tab.metadata['Investigation Title'])
                           ])),
                    ("description",
                     dict([("_value",
                            isa_tab.metadata['Investigation Description'])])),
                    ("identifier",
                     dict([("_value",
                            isa_tab.metadata['Investigation Identifier'])])),
                    ("submissionDate",
                     dict([("_value",
                            isa_tab.metadata['Investigation Submission Date'])
                           ])),
                    ("publicReleaseDate",
                     dict([(
                         "_value",
                         isa_tab.metadata['Investigation Public Release Date'])
                           ])),
                    ("study", self.createStudiesList(isa_tab.studies))
                ])
            else:
                investigationObject = dict([
                    ("@id",
                     "https://repo.metadatacenter.org/UUID" + str(uuid4())),
                    ("_templateId", "http://example.org"),
                    ("@type", "https://repo.metadatacenter.org/model/"
                     "Investigation"),
                    ("@context",
                     dict([
                         ("description",
                          "https://metadatacenter.org/schemas/description"),
                         ("title", "https://metadatacenter.org/schemas/title"),
                         ("study", "https://metadatacenter.org/schemas/study"),
                         ("submissionDate",
                          "https://metadatacenter.org/schemas/"
                          "submissionDate"),
                         ("_value", "https://schema.org/value"),
                         ("publicReleaseDate",
                          "https://metadatacenter.org/schemas/"
                          "publicReleaseDate"),
                         ("identifier",
                          "https://metadatacenter.org/schemas/identifier")
                     ])),
                    ("title", dict([("_value", "")])),
                    ("description", dict([("_value", "")])),
                    ("identifier", dict([("_value", "")])),
                    ("submissionDate", dict([("_value", "")])),
                    ("publicReleaseDate", dict([("_value", "")])),
                    ("study", self.createStudiesList(isa_tab.studies)),
                ])

            cedar_json = investigationObject
            try:
                investigation_identifier = \
                    isa_tab.metadata['Investigation Identifier']
            except KeyError:
                investigation_identifier = ""

            try:
                study_identifier = \
                    isa_tab.studies[0].metadata['Study Identifier']
            except KeyError:
                study_identifier = ""

            try:
                validator.validate(cedar_json, schema)
            except ValidationError as e:
                error_file_name = os.path.join(json_dir, "error.log")
                with open(error_file_name, "w") as errorfile:
                    errorfile.write(e.message)
                    errorfile.write(e.cause)
                    errorfile.close()

            if inv_identifier:
                file_name = os.path.join(json_dir,
                                         investigation_identifier + ".json")
            else:
                file_name = os.path.join(json_dir, study_identifier + ".json")
            with open(file_name, "w") as outfile:
                json.dump(cedar_json, outfile, indent=4, sort_keys=True)
                outfile.close()

            log.info("... conversion finished.")
Exemple #7
0
def load(isatab_dir):

    def _createOntologySourceReferences(ontology_refs):
        ontologies = []
        for ontology_ref in ontology_refs:
            ontology = OntologySourceReference(
                description=ontology_ref['Term Source Description'],
                file=ontology_ref['Term Source File'],
                name=ontology_ref['Term Source Name'],
                version=ontology_ref['Term Source Version'],
            )
            ontologies.append(ontology)
        return ontologies

    def _createPublications(isapubs, inv_or_study):
        publications = []
        for pub in isapubs:
            publication = Publication(
                pubmed_id=pub[inv_or_study+' PubMed ID'],
                doi=pub[inv_or_study+' Publication DOI'],
                author_list=pub[inv_or_study+' Publication Author List'],
                title=pub[inv_or_study+' Publication Title'],
                status=_createOntologyAnnotationForInvOrStudy(pub, inv_or_study, ' Publication Status')
            )
            publications.append(publication)
        return publications

    def _createOntologyAnnotationForInvOrStudy(object_, inv_or_study, type_):
        onto_ann = OntologyAnnotation(
                name=object_[inv_or_study+type_],
                term_source=object_[inv_or_study+type_+" Term Source REF"],
                term_accession=object_[inv_or_study+type_+" Term Accession Number"],
        )
        return onto_ann

    def _createContacts(contacts, inv_or_study):
        people_json = []
        for contact in contacts:
            person_json = Person(
                last_name=contact[inv_or_study+" Person Last Name"],
                first_name=contact[inv_or_study+" Person First Name"],
                mid_initials=contact[inv_or_study+" Person Mid Initials"],
                email=contact[inv_or_study+" Person Email"],
                phone=contact[inv_or_study+" Person Phone"],
                fax=contact[inv_or_study+" Person Fax"],
                address=contact[inv_or_study+" Person Address"],
                affiliation=contact[inv_or_study+" Person Affiliation"],
                # FIXME Parsing roles?
                roles=[]
            )
            people_json.append(person_json)
        return people_json


    def _createCharacteristicList(node_name, node):
        obj_list = []
        for header in node.metadata:
            if header.startswith("Characteristics"):
                characteristic = header.replace("]", "").split("[")[-1]
                characteristic_obj = Characteristic(
                    value=OntologyAnnotation(name=characteristic)
                )
                obj_item = dict([
                    ("characteristic", characteristic_obj)
                ])
                obj_list.append(obj_item)
        return obj_list

    def _createOntologyAnnotationListForInvOrStudy(array, inv_or_study, type_):
        onto_annotations = []
        for object_ in array:
            onto_ann = OntologyAnnotation(
                name=object_[inv_or_study+type_],
                term_source=object_[inv_or_study+type_+" Term Source REF"],
                term_accession=object_[inv_or_study+type_+" Term Accession Number"],
            )
            onto_annotations.append(onto_ann)
        return onto_annotations

    def _createProtocols(protocols):
        protocols_list = []
        for prot in protocols:
            protocol = Protocol(
                name=prot['Study Protocol Name'],
                protocol_type=_createOntologyAnnotationForInvOrStudy(prot, "Study", " Protocol Type"),
                description=prot['Study Protocol Description'],
                uri=prot['Study Protocol URI'],
                version=prot['Study Protocol Version'],
                parameters=_createProtocolParameterList(prot),
            )
            protocols_list.append(protocol)
        return protocols_list

    def _createProtocolParameterList(protocol):
        parameters_list = []
        parameters_annotations = _createOntologyAnnotationsFromStringList(protocol, "Study",
                                                                          " Protocol Parameters Name")
        for parameter_annotation in parameters_annotations:
            parameter = ProtocolParameter(
                parameterName=parameter_annotation
            )
            # TODO Units?
            parameters_list.append(parameter)
        return parameters_list

    def _createOntologyAnnotationsFromStringList(object_, inv_or_study, type_):
        #FIXME If empty string, it returns 1?
        name_array = object_[inv_or_study+type_].split(";")
        term_source_array = object_[inv_or_study+type_+" Term Source REF"].split(";")
        term_accession_array = object_[inv_or_study+type_+" Term Accession Number"].split(";")
        onto_annotations = []
        for i in range(0, len(name_array)):
             onto_ann = OntologyAnnotation(
                 name=name_array[i],
                 term_source=term_source_array[i],
                 term_accession=term_accession_array[i],
             )
             onto_annotations.append(onto_ann)
        return onto_annotations

    #TODO Finish how to process nodes etc.
    def _createDataFiles(nodes):
        json_dict = dict([])
        for node_index in nodes:
            if nodes[node_index].ntype.endswith("Data File"):
                json_item = Data(
                    name=nodes[node_index].name,
                    type_=nodes[node_index].ntype
                )
                json_dict.update({node_index: json_item})
        return json_dict

    def _createProcessSequence(process_nodes, source_dict, sample_dict, data_dict):
        json_list = []
        for process_node_name in process_nodes:
            try:
                measurement_type = process_nodes[process_node_name].study_assay.metadata["Study Assay Measurement Type"]
            except:
                measurement_type = ""

            try:
                platform = process_nodes[process_node_name].study_assay.metadata["Study Assay Technology Platform"]
            except:
                platform = ""

            try:
                technology = process_nodes[process_node_name].study_assay.metadata["Study Assay Technology Type"]
            except:
                technology = ""

            json_item = dict([
                    ("executesProtocol", _createExecuteStudyProtocol(process_node_name, process_nodes[process_node_name])),
                    ("parameters", []),
                    ("inputs", _createInputList(process_nodes[process_node_name].inputs, source_dict, sample_dict)),
                    ("outputs", _createOutputList(process_nodes[process_node_name].outputs, sample_dict) )
            ])
            json_list.append(json_item)
        return json_list

    def _createExecuteStudyProtocol(process_node_name, process_node):
        json_item = dict([
                   # ("name", dict([("value", process_node_name)])),
                   # ("description", dict([("value", process_node_name)])),
                   # ("version", dict([("value", process_node_name)])),
                   # ("uri", dict([("value", process_node_name)])),
                   # ("parameters", self.createProcessParameterList(process_node_name, process_node))
                ])
        return json_item

    def _createInputList(inputs, source_dict, sample_dict):
        json_list = []
        for argument in inputs:
            try:
                json_item = source_dict[argument]
                json_list.append(json_item)
            except KeyError:
                pass
            try:
                json_item = sample_dict[argument]
                json_list.append(json_item)
            except KeyError:
                pass
        return json_list

    def _createOutputList(arguments, sample_dict):
        json_list = []
        for argument in arguments:
            try:
                json_item = sample_dict[argument]
                json_list.append(json_item)
            except KeyError:
                pass
        return json_list

    # def _createStudyAssaysList(assays):
    #     json_list = []
    #     for assay in assays:
    #         source_dict = _createSourcesDictionary(assay.nodes)
    #         sample_dict = _createSampleDictionary(assay.nodes)
    #         data_dict = _createDataFiles(assay.nodes)
    #         json_item = Assay(
    #             file_name=assay.metadata['Study Assay File Name'],
    #             measurement_type=OntologyAnnotation(
    #                 name=assay.metadata['Study Assay Measurement Type'],
    #                 term_source=assay.metadata['Study Assay Measurement Type Term Source REF'],
    #                 term_accession=assay.metadata['Study Assay Measurement Type Term Accession Number']),
    #             technology_type=OntologyAnnotation(
    #                 name=assay.metadata['Study Assay Technology Type'],
    #                 term_source=assay.metadata['Study Assay Technology Type Term Source REF'],
    #                 term_accession=assay.metadata['Study Assay Technology Type Term Accession Number']),
    #             technology_platform=assay.metadata['Study Assay Technology Platform'],
    #             process_sequence=_createProcessSequence(assay.process_nodes, source_dict, sample_dict, data_dict),
    #         )
    #         json_list.append(json_item)
    #     return json_list

    def _createFactorValueList(self, node_name, node):
        json_list = []
        for header in node.metadata:
            if header.startswith("Factor Value"):
                 factor_value = header.replace("]", "").split("[")[-1]
                 factor_value_ontology_annotation = self.createOntologyAnnotation(factor_value, "", "")
                 factor_value_json = dict([
                     ("value", factor_value_ontology_annotation)
                 ])
                 json_list.append(factor_value_json)
        return json_list

    def _createSourcesSamples(nodes):
        samples_json_dict = dict([])
        sources_obj_dict = dict([])
        for node_index in nodes:
            if nodes[node_index].ntype == "Sample Name":
                sample = Sample(
                    name=node_index,
                    factors=_createFactorValueList(node_index, nodes[node_index]),
                    characteristics=_createCharacteristicList(node_index, nodes[node_index])
                )
                samples_json_dict.update({node_index: sample})
            elif nodes[node_index].ntype == "Source Name":
                source = Source(
                    name=node_index,
                    characteristics=_createCharacteristicList(node_index, nodes[node_index])
                )
                sources_obj_dict.update({node_index: source})
        return sources_obj_dict, samples_json_dict

    def _createStudies(studies):
        study_array = []
        for study in studies:
            sources, samples = _createSourcesSamples(study.nodes)
            data_dict = _createDataFiles(study.nodes)
            study_obj = Study(
                identifier=study.metadata['Study Identifier'],
                title=study.metadata['Study Title'],
                description=study.metadata['Study Description'],
                submission_date=study.metadata['Study Submission Date'],
                public_release_date=study.metadata['Study Public Release Date'],
                factors=None,
                file_name=study.metadata['Study File Name'],
                design_descriptors=_createOntologyAnnotationListForInvOrStudy(study.design_descriptors, "Study",
                                                                              " Design Type"),
                publications=_createPublications(study.publications, "Study"),
                contacts=_createContacts(study.contacts, "Study"),
                protocols=_createProtocols(study.protocols),
                sources=list(sources.values()),
                samples=list(samples.values()),
                process_sequence=_createProcessSequence(study.process_nodes, sources, samples, data_dict),
                assays=_createStudyAssaysList(study.assays),
            )
            study_array.append(study_obj)
        return study_array

    investigation = None
    isa_tab = isatab_parser.parse(isatab_dir)
    if isa_tab is None:
        raise IOError("There was problem parsing the ISA Tab")
    else:
        if isa_tab.metadata != {}:
            #print("isa_tab.metadata->",isa_tab.metadata)
            investigation = Investigation(
                identifier=isa_tab.metadata['Investigation Identifier'],
                title=isa_tab.metadata['Investigation Title'],
                description=isa_tab.metadata['Investigation Description'],
                submission_date=isa_tab.metadata['Investigation Submission Date'],
                public_release_date=isa_tab.metadata['Investigation Public Release Date'],
                ontology_source_references=_createOntologySourceReferences(isa_tab.ontology_refs),
                publications=_createPublications(isa_tab.publications, "Investigation"),
                contacts=_createContacts(isa_tab.contacts, "Investigation"),
                studies=_createStudies(isa_tab.studies),
            )
    return investigation
Exemple #8
0
    def createCEDARjson(self, work_dir, json_dir, inv_identifier):
        print("Converting ISA to CEDAR model for ", work_dir)
        schema_file = "investigation_template.json"
        schema = json.load(open(join(CEDAR_SCHEMA_PATH,schema_file)))
        resolver = RefResolver('file://'+join(CEDAR_SCHEMA_PATH, schema_file), schema)
        validator = Draft4Validator(schema, resolver=resolver)

        isa_tab = parse(work_dir)
        # parser_file_name = os.path.join(json_dir, "parser.log")
        # with open(parser_file_name, "w") as parserfile:
        #                 parserfile.write(str(isa_tab))
        #                 parserfile.close()

        if isa_tab is None:
            print("No ISAtab dataset found")
        else:
                if isa_tab.metadata != {}:
                    investigationObject = dict([
                        ("@id", "https://repo.metadatacenter.org/UUID/"+str(uuid4())),
                        ("_templateId", "http://example.org"),
                        ("@type", "https://repo.metadatacenter.org/model/Investigation"),
                        ("@context", dict(
                            [
                                ("description", "https://metadatacenter.org/schemas/description"),
                                ("title", "https://metadatacenter.org/schemas/title"),
                                ("study", "https://metadatacenter.org/schemas/study"),
                                ("submissionDate", "https://metadatacenter.org/schemas/submissionDate"),
                                ("_value", "https://schema.org/value"),
                                ("publicReleaseDate", "https://metadatacenter.org/schemas/publicReleaseDate"),
                                ("identifier", "https://metadatacenter.org/schemas/identifier")
                            ]
                        )),
                        ("title", dict([ ("_value", isa_tab.metadata['Investigation Title'])])),
                        ("description", dict([ ("_value", isa_tab.metadata['Investigation Description'])])),
                        ("identifier", dict([ ("_value", isa_tab.metadata['Investigation Identifier'])])),
                        ("submissionDate", dict([ ("_value", isa_tab.metadata['Investigation Submission Date'])])),
                        ("publicReleaseDate", dict([ ("_value", isa_tab.metadata['Investigation Public Release Date'])])),
                        ("study", self.createStudiesList(isa_tab.studies))
                    ])
                else:
                    investigationObject = dict([
                        ("@id", "https://repo.metadatacenter.org/UUID"+str(uuid4())),
                        ("_templateId", "http://example.org"),
                        ("@type", "https://repo.metadatacenter.org/model/Investigation"),
                        ("@context", dict(
                            [
                                ("description", "https://metadatacenter.org/schemas/description"),
                                ("title", "https://metadatacenter.org/schemas/title"),
                                ("study", "https://metadatacenter.org/schemas/study"),
                                ("submissionDate", "https://metadatacenter.org/schemas/submissionDate"),
                                ("_value", "https://schema.org/value"),
                                ("publicReleaseDate", "https://metadatacenter.org/schemas/publicReleaseDate"),
                                ("identifier", "https://metadatacenter.org/schemas/identifier")
                            ]
                        )),
                        ("title", dict([ ("_value", "")])),
                        ("description", dict([ ("_value", "")])),
                        ("identifier", dict([ ("_value", "")])),
                        ("submissionDate", dict([ ("_value", "")])),
                        ("publicReleaseDate", dict([ ("_value", "")])),
                        ("study", self.createStudiesList(isa_tab.studies)),
                    ])

                cedar_json = investigationObject
                try:
                    investigation_identifier = isa_tab.metadata['Investigation Identifier']
                except KeyError:
                    investigation_identifier = ""

                try:
                    study_identifier = isa_tab.studies[0].metadata['Study Identifier']
                    #study_identifier = study_identifier[study_identifier.find("/")+1:]
                except KeyError:
                    study_identifier = ""

                try:
                    validator.validate(cedar_json, schema)
                except ValidationError as e:
                    error_file_name = os.path.join(json_dir, "error.log")
                    with open(error_file_name, "w") as errorfile:
                        errorfile.write(e.message)
                        errorfile.write(e.cause)
                        errorfile.close()

                #save output json
                if (inv_identifier):
                    file_name = os.path.join(json_dir,investigation_identifier+".json")
                else:
                    #print isa_tab.studies[0]
                    file_name = os.path.join(json_dir,study_identifier+".json")
                with open(file_name, "w") as outfile:
                    json.dump(cedar_json, outfile, indent=4, sort_keys=True)
                    outfile.close()

                print("... conversion finished.")