def _copyFiles(self): """ Downloads metadata and centroided imzML files to a temporary directory, then copies files to the S3 bucket, putting them into structure expected by sm-engine. """ self._uploaded_to_s3 = {obj.key for obj in self._bucket.objects.filter(Prefix=self._study_rel_dir)} self._ftp = self._ftpConnection() filenames = [] self._ftp.retrlines('NLST', filenames.append) for fn in filenames: if fn.endswith(".txt"): self._fetchFromFTP(fn) if self._parse_isatab: print("Parsing ISATab metadata") self._study = ip.parse(self._study_dir).studies[0] self._info = self._extractStudyMetadata(self._study) else: self._study = self._info = None print("Copying datasets to S3") targets = self._centroidedDatasets(filenames) self._jobs = self._createTasks(targets) print("Ready to submit jobs")
def convert(self, work_dir): """ Convert an ISA-Tab dataset (version 1) to JSON provided the ISA model v1.0 JSON Schemas :param work_dir: directory containing the ISA-tab dataset """ log.info("Converting ISA-Tab to ISA-JSON for %s", work_dir) isa_tab = parse(work_dir) if isa_tab is None: log.fatal("No ISA-Tab dataset found") else: isa_json = dict([]) if isa_tab.metadata != {}: isa_json = dict([ ("identifier", isa_tab.metadata['Investigation Identifier']), ("title", isa_tab.metadata['Investigation Title']), ("description", isa_tab.metadata['Investigation Description']), ("submissionDate", isa_tab.metadata['Investigation Submission Date']), ("publicReleaseDate", isa_tab.metadata['Investigation Public Release Date']), ("ontologySourceReferences", self.createOntologySourceReferences( isa_tab.ontology_refs)), ("publications", self.createPublications(isa_tab.publications, "Investigation")), ("people", self.createContacts(isa_tab.contacts, "Investigation")), ("studies", self.createStudies(isa_tab.studies)), ("comments", self.createComments(isa_tab.metadata)) ]) # validate json with open(join(SCHEMAS_PATH, INVESTIGATION_SCHEMA)) as json_fp: schema = json.load(json_fp) resolver = RefResolver( 'file://' + join(SCHEMAS_PATH, INVESTIGATION_SCHEMA), schema) validator = Draft4Validator(schema, resolver=resolver) validator.validate(isa_json, schema) log.info("Conversion finished") return isa_json
def convert(self, work_dir, json_dir): """Convert an ISA-Tab dataset (version 1) to JSON provided the ISA model v1.0 JSON Schemas :param work_dir: directory containing the ISA-tab dataset :param json_dir: output directory where the resulting json file will be saved """ print("Converting ISAtab to ISAjson for ", work_dir) isa_tab = parse(work_dir) #print(isa_tab) if isa_tab is None: print("No ISAtab dataset found") else: if isa_tab.metadata != {}: #print("isa_tab.metadata->",isa_tab.metadata) isa_json = dict([ ("identifier",isa_tab.metadata['Investigation Identifier']), ("title", isa_tab.metadata['Investigation Title']), ("description",isa_tab.metadata['Investigation Description']), ("submissionDate", isa_tab.metadata['Investigation Submission Date']), ("publicReleaseDate", isa_tab.metadata['Investigation Public Release Date']), ("commentCreatedWithConfiguration", self.createComment('Created With Configuration',isa_tab.metadata['Comment[Created With Configuration]'])), ("commentLastOpenedWithConfiguration", self.createComment('Last Opened With Configuration', isa_tab.metadata['Comment[Last Opened With Configuration]'])), ("ontologySourceReferences", self.createOntologySourceReferences(isa_tab.ontology_refs)), ("publications", self.createPublications(isa_tab.publications, "Investigation")), ("people", self.createContacts(isa_tab.contacts, "Investigation")), ("studies", self.createStudies(isa_tab.studies)) ]) if (isa_tab.metadata['Investigation Identifier']): file_name = os.path.join(json_dir,isa_tab.metadata['Investigation Identifier']+".json") else: file_name = os.path.join(json_dir,isa_tab.studies[0].metadata['Study Identifier']+".json") #validate json schema = json.load(open(join(SCHEMAS_PATH, INVESTIGATION_SCHEMA))) resolver = RefResolver('file://'+join(SCHEMAS_PATH, INVESTIGATION_SCHEMA), schema) validator = Draft4Validator(schema, resolver=resolver) validator.validate(isa_json, schema) #TODO refactor saving the file into a separate method with open(file_name, "w") as outfile: json.dump(isa_json, outfile, indent=4, sort_keys=True) outfile.close() print("... conversion finished.") return isa_json
def convert(self, work_dir): """Convert an ISA-Tab dataset (version 1) to JSON provided the ISA model v1.0 JSON Schemas :param work_dir: directory containing the ISA-tab dataset """ logger.info("Converting ISAtab to ISAjson for {}".format(work_dir)) isa_tab = parse(work_dir) # print(isa_tab) if isa_tab is None: logger.fatal("No ISAtab dataset found") else: if isa_tab.metadata != {}: # print("isa_tab.metadata->",isa_tab.metadata) isa_json = dict( [ ("identifier", isa_tab.metadata["Investigation Identifier"]), ("title", isa_tab.metadata["Investigation Title"]), ("description", isa_tab.metadata["Investigation Description"]), ("submissionDate", isa_tab.metadata["Investigation Submission Date"]), ("publicReleaseDate", isa_tab.metadata["Investigation Public Release Date"]), ("ontologySourceReferences", self.createOntologySourceReferences(isa_tab.ontology_refs)), ("publications", self.createPublications(isa_tab.publications, "Investigation")), ("people", self.createContacts(isa_tab.contacts, "Investigation")), ("studies", self.createStudies(isa_tab.studies)), ("comments", self.createComments(isa_tab.metadata)), ] ) # validate json schema = json.load(open(join(SCHEMAS_PATH, INVESTIGATION_SCHEMA))) resolver = RefResolver("file://" + join(SCHEMAS_PATH, INVESTIGATION_SCHEMA), schema) validator = Draft4Validator(schema, resolver=resolver) validator.validate(isa_json, schema) logger.info("... conversion finished.") return isa_json
def createCEDARjson(self, work_dir, json_dir, inv_identifier): print("Converting ISA to CEDAR model for ", work_dir) schema_file = "InvestigationSchema.json" schema = json.load(open(join(CEDAR_SCHEMA_PATH,schema_file))) resolver = RefResolver('file://'+join(CEDAR_SCHEMA_PATH, schema_file), schema) validator = Draft4Validator(schema, resolver=resolver) isa_tab = parse(work_dir) print("ISATab", isa_tab) if isa_tab is None: print("No ISAtab dataset found") else: if isa_tab.metadata != {}: investigationObject = dict([ ("schemaID", "https://repo.metadatacenter.org/UUID"), ("@id", "https://repo.metadatacenter.org/UUID/"+str(uuid4())), ("@type", "https://repo.metadatacenter.org/model/Investigation"), ("@context", dict( [ ("model", "https://repo.metadatacenter.org/model/"), ("xsd", "http://www.w3.org/2001/XMLSchema"), ("schema", "https://schema.org/"), ("title", "https://repo.metadatacenter.org/model/title"), ("description", "https://repo.metadatacenter.org/model/description") ] )), ("title", dict([ ("value", isa_tab.metadata['Investigation Title'])])), ("description", dict([ ("value", isa_tab.metadata['Investigation Description'])])), ("identifier", dict([ ("value", isa_tab.metadata['Investigation Identifier'])])), ("submissionDate", dict([ ("value", isa_tab.metadata['Investigation Submission Date'])])), ("publicReleaseDate", dict([ ("value", isa_tab.metadata['Investigation Public Release Date'])])), ("hasStudy", self.createStudiesList(isa_tab.studies)), ("hasContact", self.createInvestigationContactsList(isa_tab.contacts)), ("hasPublication", self.createInvestigationPublicationsList(isa_tab.publications)), ("provenance", dict([ ("wasGeneratedBy", "http://www.isa-tools.org"), ("hadPrimarySource", self.primary_source) ])) ]) else: investigationObject = dict([ ("schemaID", "https://repo.metadatacenter.org/UUID"), ("@id", "https://repo.metadatacenter.org/UUID"+str(uuid4())), ("@type", "https://repo.metadatacenter.org/model/Investigation"), ("@context", dict( [ ("model", "https://repo.metadatacenter.org/model/"), ("xsd", "http://www.w3.org/2001/XMLSchema"), ("schema", "https://schema.org/"), ("title", "https://repo.metadatacenter.org/model/title"), ("description", "https://repo.metadatacenter.org/model/description") ] )), ("title", dict([ ("value", "")])), ("description", dict([ ("value", "")])), ("identifier", dict([ ("value", "")])), ("submissionDate", dict([ ("value", "")])), ("publicReleaseDate", dict([ ("value", "")])), ("hasStudy", self.createStudiesList(isa_tab.studies)), ("hasContact", self.createInvestigationContactsList(isa_tab.contacts)), ("hasPublication", self.createInvestigationPublicationsList(isa_tab.publications)) ]) cedar_json = dict([ ("investigation",investigationObject) ]) validator.validate(cedar_json, schema) #save output json if (inv_identifier): file_name = os.path.join(json_dir,isa_tab.metadata['Investigation Identifier']+".json") else: #print isa_tab.studies[0] file_name = os.path.join(json_dir,isa_tab.studies[0].metadata['Study Identifier']+".json") with open(file_name, "w") as outfile: json.dump(cedar_json, outfile, indent=4, sort_keys=True) outfile.close() print("... conversion finished.")
def createCEDARjson(self, work_dir, json_dir, inv_identifier): log.info("Converting ISA to CEDAR model for ".format(work_dir)) schema_file = "investigation_template.json" with open(join(CEDAR_SCHEMA_PATH, schema_file)) as json_fp: schema = json.load(json_fp) if schema is None: raise IOError("Could not load schema from {}".format( join(CEDAR_SCHEMA_PATH, schema_file))) resolver = RefResolver( 'file://' + join(CEDAR_SCHEMA_PATH, schema_file), schema) validator = Draft4Validator(schema, resolver=resolver) isa_tab = isatab_parser.parse(work_dir) if isa_tab is None: log.info("No ISAtab dataset found") else: if isa_tab.metadata != {}: investigationObject = dict([ ("@id", "https://repo.metadatacenter.org/UUID/" + str(uuid4())), ("_templateId", "http://example.org"), ("@type", "https://repo.metadatacenter.org/model/Investigation"), ("@context", dict([ ("description", "https://metadatacenter.org/schemas/description"), ("title", "https://metadatacenter.org/schemas/title"), ("study", "https://metadatacenter.org/schemas/study"), ("submissionDate", "https://metadatacenter.org/schemas/" "submissionDate"), ("_value", "https://schema.org/value"), ("publicReleaseDate", "https://metadatacenter.org/schemas/" "publicReleaseDate"), ("identifier", "https://metadatacenter.org/schemas/identifier") ])), ("title", dict([("_value", isa_tab.metadata['Investigation Title']) ])), ("description", dict([("_value", isa_tab.metadata['Investigation Description'])])), ("identifier", dict([("_value", isa_tab.metadata['Investigation Identifier'])])), ("submissionDate", dict([("_value", isa_tab.metadata['Investigation Submission Date']) ])), ("publicReleaseDate", dict([( "_value", isa_tab.metadata['Investigation Public Release Date']) ])), ("study", self.createStudiesList(isa_tab.studies)) ]) else: investigationObject = dict([ ("@id", "https://repo.metadatacenter.org/UUID" + str(uuid4())), ("_templateId", "http://example.org"), ("@type", "https://repo.metadatacenter.org/model/" "Investigation"), ("@context", dict([ ("description", "https://metadatacenter.org/schemas/description"), ("title", "https://metadatacenter.org/schemas/title"), ("study", "https://metadatacenter.org/schemas/study"), ("submissionDate", "https://metadatacenter.org/schemas/" "submissionDate"), ("_value", "https://schema.org/value"), ("publicReleaseDate", "https://metadatacenter.org/schemas/" "publicReleaseDate"), ("identifier", "https://metadatacenter.org/schemas/identifier") ])), ("title", dict([("_value", "")])), ("description", dict([("_value", "")])), ("identifier", dict([("_value", "")])), ("submissionDate", dict([("_value", "")])), ("publicReleaseDate", dict([("_value", "")])), ("study", self.createStudiesList(isa_tab.studies)), ]) cedar_json = investigationObject try: investigation_identifier = \ isa_tab.metadata['Investigation Identifier'] except KeyError: investigation_identifier = "" try: study_identifier = \ isa_tab.studies[0].metadata['Study Identifier'] except KeyError: study_identifier = "" try: validator.validate(cedar_json, schema) except ValidationError as e: error_file_name = os.path.join(json_dir, "error.log") with open(error_file_name, "w") as errorfile: errorfile.write(e.message) errorfile.write(e.cause) errorfile.close() if inv_identifier: file_name = os.path.join(json_dir, investigation_identifier + ".json") else: file_name = os.path.join(json_dir, study_identifier + ".json") with open(file_name, "w") as outfile: json.dump(cedar_json, outfile, indent=4, sort_keys=True) outfile.close() log.info("... conversion finished.")
def load(isatab_dir): def _createOntologySourceReferences(ontology_refs): ontologies = [] for ontology_ref in ontology_refs: ontology = OntologySourceReference( description=ontology_ref['Term Source Description'], file=ontology_ref['Term Source File'], name=ontology_ref['Term Source Name'], version=ontology_ref['Term Source Version'], ) ontologies.append(ontology) return ontologies def _createPublications(isapubs, inv_or_study): publications = [] for pub in isapubs: publication = Publication( pubmed_id=pub[inv_or_study+' PubMed ID'], doi=pub[inv_or_study+' Publication DOI'], author_list=pub[inv_or_study+' Publication Author List'], title=pub[inv_or_study+' Publication Title'], status=_createOntologyAnnotationForInvOrStudy(pub, inv_or_study, ' Publication Status') ) publications.append(publication) return publications def _createOntologyAnnotationForInvOrStudy(object_, inv_or_study, type_): onto_ann = OntologyAnnotation( name=object_[inv_or_study+type_], term_source=object_[inv_or_study+type_+" Term Source REF"], term_accession=object_[inv_or_study+type_+" Term Accession Number"], ) return onto_ann def _createContacts(contacts, inv_or_study): people_json = [] for contact in contacts: person_json = Person( last_name=contact[inv_or_study+" Person Last Name"], first_name=contact[inv_or_study+" Person First Name"], mid_initials=contact[inv_or_study+" Person Mid Initials"], email=contact[inv_or_study+" Person Email"], phone=contact[inv_or_study+" Person Phone"], fax=contact[inv_or_study+" Person Fax"], address=contact[inv_or_study+" Person Address"], affiliation=contact[inv_or_study+" Person Affiliation"], # FIXME Parsing roles? roles=[] ) people_json.append(person_json) return people_json def _createCharacteristicList(node_name, node): obj_list = [] for header in node.metadata: if header.startswith("Characteristics"): characteristic = header.replace("]", "").split("[")[-1] characteristic_obj = Characteristic( value=OntologyAnnotation(name=characteristic) ) obj_item = dict([ ("characteristic", characteristic_obj) ]) obj_list.append(obj_item) return obj_list def _createOntologyAnnotationListForInvOrStudy(array, inv_or_study, type_): onto_annotations = [] for object_ in array: onto_ann = OntologyAnnotation( name=object_[inv_or_study+type_], term_source=object_[inv_or_study+type_+" Term Source REF"], term_accession=object_[inv_or_study+type_+" Term Accession Number"], ) onto_annotations.append(onto_ann) return onto_annotations def _createProtocols(protocols): protocols_list = [] for prot in protocols: protocol = Protocol( name=prot['Study Protocol Name'], protocol_type=_createOntologyAnnotationForInvOrStudy(prot, "Study", " Protocol Type"), description=prot['Study Protocol Description'], uri=prot['Study Protocol URI'], version=prot['Study Protocol Version'], parameters=_createProtocolParameterList(prot), ) protocols_list.append(protocol) return protocols_list def _createProtocolParameterList(protocol): parameters_list = [] parameters_annotations = _createOntologyAnnotationsFromStringList(protocol, "Study", " Protocol Parameters Name") for parameter_annotation in parameters_annotations: parameter = ProtocolParameter( parameterName=parameter_annotation ) # TODO Units? parameters_list.append(parameter) return parameters_list def _createOntologyAnnotationsFromStringList(object_, inv_or_study, type_): #FIXME If empty string, it returns 1? name_array = object_[inv_or_study+type_].split(";") term_source_array = object_[inv_or_study+type_+" Term Source REF"].split(";") term_accession_array = object_[inv_or_study+type_+" Term Accession Number"].split(";") onto_annotations = [] for i in range(0, len(name_array)): onto_ann = OntologyAnnotation( name=name_array[i], term_source=term_source_array[i], term_accession=term_accession_array[i], ) onto_annotations.append(onto_ann) return onto_annotations #TODO Finish how to process nodes etc. def _createDataFiles(nodes): json_dict = dict([]) for node_index in nodes: if nodes[node_index].ntype.endswith("Data File"): json_item = Data( name=nodes[node_index].name, type_=nodes[node_index].ntype ) json_dict.update({node_index: json_item}) return json_dict def _createProcessSequence(process_nodes, source_dict, sample_dict, data_dict): json_list = [] for process_node_name in process_nodes: try: measurement_type = process_nodes[process_node_name].study_assay.metadata["Study Assay Measurement Type"] except: measurement_type = "" try: platform = process_nodes[process_node_name].study_assay.metadata["Study Assay Technology Platform"] except: platform = "" try: technology = process_nodes[process_node_name].study_assay.metadata["Study Assay Technology Type"] except: technology = "" json_item = dict([ ("executesProtocol", _createExecuteStudyProtocol(process_node_name, process_nodes[process_node_name])), ("parameters", []), ("inputs", _createInputList(process_nodes[process_node_name].inputs, source_dict, sample_dict)), ("outputs", _createOutputList(process_nodes[process_node_name].outputs, sample_dict) ) ]) json_list.append(json_item) return json_list def _createExecuteStudyProtocol(process_node_name, process_node): json_item = dict([ # ("name", dict([("value", process_node_name)])), # ("description", dict([("value", process_node_name)])), # ("version", dict([("value", process_node_name)])), # ("uri", dict([("value", process_node_name)])), # ("parameters", self.createProcessParameterList(process_node_name, process_node)) ]) return json_item def _createInputList(inputs, source_dict, sample_dict): json_list = [] for argument in inputs: try: json_item = source_dict[argument] json_list.append(json_item) except KeyError: pass try: json_item = sample_dict[argument] json_list.append(json_item) except KeyError: pass return json_list def _createOutputList(arguments, sample_dict): json_list = [] for argument in arguments: try: json_item = sample_dict[argument] json_list.append(json_item) except KeyError: pass return json_list # def _createStudyAssaysList(assays): # json_list = [] # for assay in assays: # source_dict = _createSourcesDictionary(assay.nodes) # sample_dict = _createSampleDictionary(assay.nodes) # data_dict = _createDataFiles(assay.nodes) # json_item = Assay( # file_name=assay.metadata['Study Assay File Name'], # measurement_type=OntologyAnnotation( # name=assay.metadata['Study Assay Measurement Type'], # term_source=assay.metadata['Study Assay Measurement Type Term Source REF'], # term_accession=assay.metadata['Study Assay Measurement Type Term Accession Number']), # technology_type=OntologyAnnotation( # name=assay.metadata['Study Assay Technology Type'], # term_source=assay.metadata['Study Assay Technology Type Term Source REF'], # term_accession=assay.metadata['Study Assay Technology Type Term Accession Number']), # technology_platform=assay.metadata['Study Assay Technology Platform'], # process_sequence=_createProcessSequence(assay.process_nodes, source_dict, sample_dict, data_dict), # ) # json_list.append(json_item) # return json_list def _createFactorValueList(self, node_name, node): json_list = [] for header in node.metadata: if header.startswith("Factor Value"): factor_value = header.replace("]", "").split("[")[-1] factor_value_ontology_annotation = self.createOntologyAnnotation(factor_value, "", "") factor_value_json = dict([ ("value", factor_value_ontology_annotation) ]) json_list.append(factor_value_json) return json_list def _createSourcesSamples(nodes): samples_json_dict = dict([]) sources_obj_dict = dict([]) for node_index in nodes: if nodes[node_index].ntype == "Sample Name": sample = Sample( name=node_index, factors=_createFactorValueList(node_index, nodes[node_index]), characteristics=_createCharacteristicList(node_index, nodes[node_index]) ) samples_json_dict.update({node_index: sample}) elif nodes[node_index].ntype == "Source Name": source = Source( name=node_index, characteristics=_createCharacteristicList(node_index, nodes[node_index]) ) sources_obj_dict.update({node_index: source}) return sources_obj_dict, samples_json_dict def _createStudies(studies): study_array = [] for study in studies: sources, samples = _createSourcesSamples(study.nodes) data_dict = _createDataFiles(study.nodes) study_obj = Study( identifier=study.metadata['Study Identifier'], title=study.metadata['Study Title'], description=study.metadata['Study Description'], submission_date=study.metadata['Study Submission Date'], public_release_date=study.metadata['Study Public Release Date'], factors=None, file_name=study.metadata['Study File Name'], design_descriptors=_createOntologyAnnotationListForInvOrStudy(study.design_descriptors, "Study", " Design Type"), publications=_createPublications(study.publications, "Study"), contacts=_createContacts(study.contacts, "Study"), protocols=_createProtocols(study.protocols), sources=list(sources.values()), samples=list(samples.values()), process_sequence=_createProcessSequence(study.process_nodes, sources, samples, data_dict), assays=_createStudyAssaysList(study.assays), ) study_array.append(study_obj) return study_array investigation = None isa_tab = isatab_parser.parse(isatab_dir) if isa_tab is None: raise IOError("There was problem parsing the ISA Tab") else: if isa_tab.metadata != {}: #print("isa_tab.metadata->",isa_tab.metadata) investigation = Investigation( identifier=isa_tab.metadata['Investigation Identifier'], title=isa_tab.metadata['Investigation Title'], description=isa_tab.metadata['Investigation Description'], submission_date=isa_tab.metadata['Investigation Submission Date'], public_release_date=isa_tab.metadata['Investigation Public Release Date'], ontology_source_references=_createOntologySourceReferences(isa_tab.ontology_refs), publications=_createPublications(isa_tab.publications, "Investigation"), contacts=_createContacts(isa_tab.contacts, "Investigation"), studies=_createStudies(isa_tab.studies), ) return investigation
def createCEDARjson(self, work_dir, json_dir, inv_identifier): print("Converting ISA to CEDAR model for ", work_dir) schema_file = "investigation_template.json" schema = json.load(open(join(CEDAR_SCHEMA_PATH,schema_file))) resolver = RefResolver('file://'+join(CEDAR_SCHEMA_PATH, schema_file), schema) validator = Draft4Validator(schema, resolver=resolver) isa_tab = parse(work_dir) # parser_file_name = os.path.join(json_dir, "parser.log") # with open(parser_file_name, "w") as parserfile: # parserfile.write(str(isa_tab)) # parserfile.close() if isa_tab is None: print("No ISAtab dataset found") else: if isa_tab.metadata != {}: investigationObject = dict([ ("@id", "https://repo.metadatacenter.org/UUID/"+str(uuid4())), ("_templateId", "http://example.org"), ("@type", "https://repo.metadatacenter.org/model/Investigation"), ("@context", dict( [ ("description", "https://metadatacenter.org/schemas/description"), ("title", "https://metadatacenter.org/schemas/title"), ("study", "https://metadatacenter.org/schemas/study"), ("submissionDate", "https://metadatacenter.org/schemas/submissionDate"), ("_value", "https://schema.org/value"), ("publicReleaseDate", "https://metadatacenter.org/schemas/publicReleaseDate"), ("identifier", "https://metadatacenter.org/schemas/identifier") ] )), ("title", dict([ ("_value", isa_tab.metadata['Investigation Title'])])), ("description", dict([ ("_value", isa_tab.metadata['Investigation Description'])])), ("identifier", dict([ ("_value", isa_tab.metadata['Investigation Identifier'])])), ("submissionDate", dict([ ("_value", isa_tab.metadata['Investigation Submission Date'])])), ("publicReleaseDate", dict([ ("_value", isa_tab.metadata['Investigation Public Release Date'])])), ("study", self.createStudiesList(isa_tab.studies)) ]) else: investigationObject = dict([ ("@id", "https://repo.metadatacenter.org/UUID"+str(uuid4())), ("_templateId", "http://example.org"), ("@type", "https://repo.metadatacenter.org/model/Investigation"), ("@context", dict( [ ("description", "https://metadatacenter.org/schemas/description"), ("title", "https://metadatacenter.org/schemas/title"), ("study", "https://metadatacenter.org/schemas/study"), ("submissionDate", "https://metadatacenter.org/schemas/submissionDate"), ("_value", "https://schema.org/value"), ("publicReleaseDate", "https://metadatacenter.org/schemas/publicReleaseDate"), ("identifier", "https://metadatacenter.org/schemas/identifier") ] )), ("title", dict([ ("_value", "")])), ("description", dict([ ("_value", "")])), ("identifier", dict([ ("_value", "")])), ("submissionDate", dict([ ("_value", "")])), ("publicReleaseDate", dict([ ("_value", "")])), ("study", self.createStudiesList(isa_tab.studies)), ]) cedar_json = investigationObject try: investigation_identifier = isa_tab.metadata['Investigation Identifier'] except KeyError: investigation_identifier = "" try: study_identifier = isa_tab.studies[0].metadata['Study Identifier'] #study_identifier = study_identifier[study_identifier.find("/")+1:] except KeyError: study_identifier = "" try: validator.validate(cedar_json, schema) except ValidationError as e: error_file_name = os.path.join(json_dir, "error.log") with open(error_file_name, "w") as errorfile: errorfile.write(e.message) errorfile.write(e.cause) errorfile.close() #save output json if (inv_identifier): file_name = os.path.join(json_dir,investigation_identifier+".json") else: #print isa_tab.studies[0] file_name = os.path.join(json_dir,study_identifier+".json") with open(file_name, "w") as outfile: json.dump(cedar_json, outfile, indent=4, sort_keys=True) outfile.close() print("... conversion finished.")