def setUp(self): self._json_data_dir = utils.JSON_DATA_DIR self._sra_data_dir = utils.SRA_DATA_DIR self._tmp_dir = tempfile.mkdtemp() study_id = 'BII-S-7' self._inv_obj = isajson.load(open(os.path.join(self._json_data_dir, study_id, study_id + '.json'))) self._study_sra_data_dir = os.path.join(self._sra_data_dir, study_id) self._expected_submission_xml_obj = etree.fromstring(open(os.path.join(self._study_sra_data_dir, 'submission.xml'), 'rb').read()) self._expected_project_set_xml_obj = etree.fromstring(open(os.path.join(self._study_sra_data_dir, 'project_set.xml'), 'rb').read()) self._expected_sample_set_xml_obj = etree.fromstring(open(os.path.join(self._study_sra_data_dir, 'sample_set.xml'), 'rb').read()) self._expected_exp_set_xml_obj = etree.fromstring(open(os.path.join(self._study_sra_data_dir, 'experiment_set.xml'), 'rb').read()) self._expected_run_set_xml_obj = etree.fromstring(open(os.path.join(self._study_sra_data_dir, 'run_set.xml'), 'rb').read()) self._sra_default_config = { "sra_broker": "", "sra_center": "OXFORD", "sra_project": "OXFORD", "sra_lab": "Oxford e-Research Centre", "sra_broker_inform_on_status": "*****@*****.**", "sra_broker_inform_on_error": "*****@*****.**", "sra_broker_contact_name": "PRS" }
def convert(json_fp, path, config_dir=None, sra_settings=None, datafilehashes=None, validate_first=True): """ Converter for ISA-JSON to SRA. :param json_fp: File pointer to ISA JSON input :param path: Directory for output SRA XMLs to be written :param config_dir: path to JSON configuration. If none, uses default embedded in API :param sra_settings: SRA settings dict :param datafilehashes: Data files with hashes, in a dict """ if validate_first: log.info("Validating input JSON before conversion") report = isajson.validate(fp=json_fp, config_dir=config_dir, log_level=logging.ERROR) if len(report.get('errors')) > 0: log.fatal("Could not proceed with conversion as there are some " "validation errors. Check log.") return log.info("Loading isajson {}".format(json_fp.name)) isa = isajson.load(fp=json_fp) log.info("Exporting SRA to {}".format(path)) log.debug("Using SRA settings ".format(sra_settings)) sra.export(isa, path, sra_settings=sra_settings, datafilehashes=datafilehashes)
def _make_investigation_instance(self, filename): # Parse JSON file fp = utf8_text_file_open(filename) isa = isajson.load(fp) return isa
def _make_investigation_instance(self, filename): # Parse JSON file with open(filename, newline='', encoding='utf8') as fp: isa = isajson.load(fp) return isa
def convert(source_json_fp, out_path): """ Converter for ISA-JSON to MAGE-Tab. :param source_json_fp: File descriptor of input ISA JSON file :param out_path: Output path to write output MAGE-Tab to """ ISA = isajson.load(source_json_fp) magetab.dump(ISA, out_path)
def test_json_load_and_dump_bii_s_3(self): # Load into ISA objects with open(os.path.join(utils.JSON_DATA_DIR, 'BII-S-3', 'BII-S-3.json')) as isajson_fp: ISA = isajson.load(isajson_fp) # Dump into ISA JSON from ISA objects ISA_J = json.loads(json.dumps(ISA, cls=isajson.ISAJSONEncoder)) self.assertListEqual([s['filename'] for s in ISA_J['studies']], ['s_BII-S-3.txt']) # 1 studies in i_gilbert.txt study_bii_s_3 = [s for s in ISA_J['studies'] if s['filename'] == 's_BII-S-3.txt'][0] self.assertEqual(len(study_bii_s_3['materials']['sources']), 4) # 4 sources in s_BII-S-1.txt self.assertEqual(len(study_bii_s_3['materials']['samples']), 4) # 4 study samples in s_BII-S-1.txt self.assertEqual(len(study_bii_s_3['processSequence']), 4) # 4 study processes in s_BII-S-1.txt self.assertListEqual([a['filename'] for a in study_bii_s_3['assays']], ['a_gilbert-assay-Gx.txt', 'a_gilbert-assay-Tx.txt']) # 2 assays in s_BII-S-1.txt assay_gx = [a for a in study_bii_s_3['assays'] if a['filename'] == 'a_gilbert-assay-Gx.txt'][0] self.assertEqual(len(assay_gx['materials']['samples']), 4) # 4 assay samples in a_gilbert-assay-Gx.txt self.assertEqual(len(assay_gx['materials']['otherMaterials']), 4) # 4 other materials in a_gilbert-assay-Gx.txt self.assertEqual(len(assay_gx['dataFiles']), 6) # 6 data files in a_gilbert-assay-Gx.txt self.assertEqual(len(assay_gx['processSequence']), 18) # 18 processes in in a_gilbert-assay-Gx.txt assay_tx = [a for a in study_bii_s_3['assays'] if a['filename'] == 'a_gilbert-assay-Tx.txt'][0] self.assertEqual(len(assay_tx['materials']['samples']), 4) # 4 assay samples in a_gilbert-assay-Tx.txt self.assertEqual(len(assay_tx['materials']['otherMaterials']), 4) # 4 other materials in a_gilbert-assay-Tx.txt self.assertEqual(len(assay_tx['dataFiles']), 24) # 24 data files in a_gilbert-assay-Tx.txt self.assertEqual(len(assay_tx['processSequence']), 36) # 36 processes in in a_gilbert-assay-Tx.txt
def test_json_load_and_dump_bii_s_3(self): with open(os.path.join(self._json_data_dir, 'BII-S-3', 'BII-S-3.json')) as isajson_fp: ISA = isajson.load(isajson_fp) ISA_J = json.loads(json.dumps(ISA, cls=isajson.ISAJSONEncoder)) self.assertListEqual([s['filename'] for s in ISA_J['studies']], ['s_BII-S-3.txt']) study_bii_s_3 = \ [s for s in ISA_J['studies'] if s['filename'] == 's_BII-S-3.txt'][0] self.assertEqual(len(study_bii_s_3['materials']['sources']), 4) self.assertEqual(len(study_bii_s_3['materials']['samples']), 4) self.assertEqual(len(study_bii_s_3['processSequence']), 4) self.assertListEqual( [a['filename'] for a in study_bii_s_3['assays']], ['a_gilbert-assay-Gx.txt', 'a_gilbert-assay-Tx.txt']) assay_gx = [ a for a in study_bii_s_3['assays'] if a['filename'] == 'a_gilbert-assay-Gx.txt' ][0] self.assertEqual(len(assay_gx['materials']['samples']), 4) self.assertEqual(len(assay_gx['materials']['otherMaterials']), 4) self.assertEqual(len(assay_gx['dataFiles']), 6) self.assertEqual(len(assay_gx['processSequence']), 18) assay_tx = [ a for a in study_bii_s_3['assays'] if a['filename'] == 'a_gilbert-assay-Tx.txt' ][0] self.assertEqual(len(assay_tx['materials']['samples']), 4) self.assertEqual(len(assay_tx['materials']['otherMaterials']), 4) self.assertEqual(len(assay_tx['dataFiles']), 24) self.assertEqual(len(assay_tx['processSequence']), 36)
def convert(source_json_fp, target_fp): """ Converter for ISA-JSON to SampleTab. :param source_json_fp: File descriptor of input ISA JSON file :param target_fp: File descriptor to write output SampleTab to (must be writeable) """ ISA = isajson.load(source_json_fp) sampletab.dump(ISA, target_fp)
def convert(source_json_fp, out_path): """ Converter for ISA-JSON to MAGE-Tab. :param source_json_fp: File descriptor of input ISA JSON file :param out_path: Output path to write output MAGE-Tab to """ log.info("loading isajson %s", source_json_fp.name) ISA = isajson.load(source_json_fp) log.info("dumping magetab %s", out_path) magetab.dump(ISA, out_path)
def convert(source_json_fp, target_fp): """ Converter for ISA-JSON to SampleTab. :param source_json_fp: File descriptor of input ISA JSON file :param target_fp: File descriptor to write output SampleTab to (must be writeable) """ log.info("loading isajson %s", source_json_fp.name) ISA = isajson.load(source_json_fp) log.info("dumping sampletab %s", target_fp.name) sampletab.dump(ISA, target_fp)
def test_json_load_and_dump_bii_i_1(self): # Load into ISA objects with open(os.path.join(utils.JSON_DATA_DIR, 'BII-I-1', 'BII-I-1.json')) as isajson_fp: ISA = isajson.load(isajson_fp) # Dump into ISA JSON from ISA objects ISA_J = json.loads(json.dumps(ISA, cls=isajson.ISAJSONEncoder)) self.assertListEqual([s['filename'] for s in ISA_J['studies']], ['s_BII-S-1.txt', 's_BII-S-2.txt']) # 2 studies in i_investigation.txt study_bii_s_1 = [s for s in ISA_J['studies'] if s['filename'] == 's_BII-S-1.txt'][0] self.assertEqual(len(study_bii_s_1['materials']['sources']), 18) # 18 sources in s_BII-S-1.txt self.assertEqual(len(study_bii_s_1['materials']['samples']), 164) # 164 study samples in s_BII-S-1.txt self.assertEqual(len(study_bii_s_1['processSequence']), 18) # 18 study processes in s_BII-S-1.txt self.assertListEqual([a['filename'] for a in study_bii_s_1['assays']], ['a_proteome.txt', 'a_metabolome.txt', 'a_transcriptome.txt']) # 2 assays in s_BII-S-1.txt assay_proteome = [a for a in study_bii_s_1['assays'] if a['filename'] == 'a_proteome.txt'][0] self.assertEqual(len(assay_proteome['materials']['samples']), 8) # 8 assay samples in a_proteome.txt self.assertEqual(len(assay_proteome['materials']['otherMaterials']), 19) # 19 other materials in a_proteome.txt self.assertEqual(len(assay_proteome['dataFiles']), 7) # 7 data files in a_proteome.txt self.assertEqual(len(assay_proteome['processSequence']), 25) # 25 processes in in a_proteome.txt assay_metabolome = [a for a in study_bii_s_1['assays'] if a['filename'] == 'a_metabolome.txt'][0] self.assertEqual(len(assay_metabolome['materials']['samples']), 92) # 92 assay samples in a_metabolome.txt self.assertEqual(len(assay_metabolome['materials']['otherMaterials']), 92) # 92 other materials in a_metabolome.txt self.assertEqual(len(assay_metabolome['dataFiles']), 111) # 111 data files in a_metabolome.txt self.assertEqual(len(assay_metabolome['processSequence']), 203) # 203 processes in in a_metabolome.txt assay_transcriptome = [a for a in study_bii_s_1['assays'] if a['filename'] == 'a_transcriptome.txt'][0] self.assertEqual(len(assay_transcriptome['materials']['samples']), 48) # 48 assay samples in a_transcriptome.txt self.assertEqual(len(assay_transcriptome['materials']['otherMaterials']), 96) # 96 other materials in a_transcriptome.txt self.assertEqual(len(assay_transcriptome['dataFiles']), 49) # 49 data files in a_transcriptome.txt self.assertEqual(len(assay_transcriptome['processSequence']), 193) # 203 processes in in a_transcriptome.txt study_bii_s_2 = [s for s in ISA_J['studies'] if s['filename'] == 's_BII-S-2.txt'][0] self.assertEqual(len(study_bii_s_2['materials']['sources']), 1) # 1 sources in s_BII-S-2.txt self.assertEqual(len(study_bii_s_2['materials']['samples']), 2) # 2 study samples in s_BII-S-2.txt self.assertEqual(len(study_bii_s_2['processSequence']), 1) # 1 study processes in s_BII-S-2.txt self.assertEqual(len(study_bii_s_2['assays']), 1) # 1 assays in s_BII-S-2.txt self.assertListEqual([a['filename'] for a in study_bii_s_2['assays']], ['a_microarray.txt']) # 1 assays in s_BII-S-2.txt assay_microarray = [a for a in study_bii_s_2['assays'] if a['filename'] == 'a_microarray.txt'][0] self.assertEqual(len(assay_microarray['materials']['samples']), 2) # 2 assay samples in a_microarray.txt self.assertEqual(len(assay_microarray['materials']['otherMaterials']), 28) # 28 other materials in a_microarray.txt self.assertEqual(len(assay_microarray['dataFiles']), 15) # 15 data files in a_microarray.txt self.assertEqual(len(assay_microarray['processSequence']), 45) # 45 processes in in a_microarray.txt
def test_detect_graph_process_pooling(self): with open(os.path.join( test_utils.JSON_DATA_DIR, 'MTBLS1', 'MTBLS1.json')) as \ isajson_fp: ISA = isajson.load(isajson_fp) for study in ISA.studies: utils.detect_graph_process_pooling(study.graph) for assay in study.assays: pooling_list = utils.detect_graph_process_pooling( assay.graph) self.assertListEqual( sorted(pooling_list), sorted(['#process/Extraction1', '#process/NMR_assay1']))
def convert(json_fp, path, i_file_name='i_investigation.txt', config_dir=isajson.default_config_dir, validate_first=True): """ Converter for ISA JSON to ISA Tab. Currently only converts investigation file contents :param json_fp: File pointer to ISA JSON input :param path: Directory to ISA tab output :param i_file_name: Investigation file name, default is i_investigation.txt :param config_dir: Directory to config directory :param validate_first: Validate JSON before conversion, default is True Example usage: Read from a JSON and write to an investigation file, make sure to create/open relevant Python file objects. from isatools.convert import json2isatab json_file = open('BII-I-1.json', 'r') tab_file = open('i_investigation.txt', 'w') json2isatab.convert(json_file, path) """ if validate_first: log.info("Validating input JSON before conversion") report = isajson.validate(fp=json_fp, config_dir=config_dir, log_level=logging.ERROR) if len(report['errors']) > 0: log.fatal("Could not proceed with conversion as there are some " "fatal validation errors. Check log.") return json_fp.seek(0) # reset file pointer after validation log.info("Loading ISA-JSON from %s", json_fp.name) isa_obj = isajson.load(fp=json_fp) log.info("Dumping ISA-Tab to %s", path) log.debug("Using configuration from %s", config_dir) isatab.dump(isa_obj=isa_obj, output_path=path, i_file_name=i_file_name) # copy data files across from source directory where JSON is located log.info("Copying data files from source to target") for file in [ f for f in os.listdir(os.path.dirname(json_fp.name)) if not (f.endswith('.txt') and (f.startswith('i_') or f.startswith( 's_') or f.startswith('a_'))) and not (f.endswith('.json')) ]: filepath = os.path.join(os.path.dirname(json_fp.name), file) if os.path.isfile(filepath): log.debug("Copying %s to %s", filepath, path) shutil.copy(filepath, path)
def convert2(json_fp, path, config_dir=None, sra_settings=None, datafilehashes=None, validate_first=True): """ (New) Converter for ISA JSON to SRA. :param json_fp: File pointer to ISA JSON input :param path: Directory for output to be written :param config_dir: path to JSON configuration. If none, uses default embedded in API :param sra_settings: SRA settings dict :param datafilehashes: Data files with hashes, in a dict """ if validate_first: log_msg_stream = isajson.validate(fp=json_fp, config_dir=config_dir, log_level=logging.WARNING) if '(E)' in log_msg_stream.getvalue(): logger.fatal("Could not proceed with conversion as there are some validation errors. Check log.") return i = isajson.load(fp=json_fp) sra.export(i, path, sra_settings=sra_settings, datafilehashes=datafilehashes)
def convert(json_fp, path, config_dir=None, sra_settings=None, datafilehashes=None, validate_first=True): """ (New) Converter for ISA JSON to SRA. :param json_fp: File pointer to ISA JSON input :param path: Directory for output to be written :param config_dir: path to JSON configuration. If none, uses default embedded in API :param sra_settings: SRA settings dict :param datafilehashes: Data files with hashes, in a dict """ if validate_first: log_msg_stream = isajson.validate(fp=json_fp, config_dir=config_dir, log_level=logging.WARNING) if '(E)' in log_msg_stream.getvalue(): logger.fatal("Could not proceed with conversion as there are some validation errors. Check log.") return i = isajson.load(fp=json_fp) sra.export(i, path, sra_settings=sra_settings, datafilehashes=datafilehashes)
def test_detect_graph_process_pooling_batch_on_mtbls(self): for i in range(1, 1): try: J = MTBLS.getj('MTBLS{}'.format(i)) ISA = isajson.load(StringIO(json.dumps(J))) for study in ISA.studies: utils.detect_graph_process_pooling(study.graph) for assay in study.assays: utils.detect_graph_process_pooling(assay.graph) except IOError: log.error('IO Error, skipping...') except KeyError: log.error('KeyError, skipping...') except AttributeError: log.error('AttributeError, skipping...') except ValidationError: log.error('jsonschema ValidationError, skipping...')
def test_json_load_and_dump_bii_s_7(self): # Load into ISA objects with open(os.path.join(utils.JSON_DATA_DIR, 'BII-S-7', 'BII-S-7.json')) as isajson_fp: ISA = isajson.load(isajson_fp) # Dump into ISA JSON from ISA objects ISA_J = json.loads(json.dumps(ISA, cls=isajson.ISAJSONEncoder)) self.assertListEqual( [s['filename'] for s in ISA_J['studies']], ['s_BII-S-7.txt']) # 1 studies in i_gilbert.txt study_bii_s_7 = [ s for s in ISA_J['studies'] if s['filename'] == 's_BII-S-7.txt' ][0] self.assertEqual(len(study_bii_s_7['materials']['sources']), 29) # 29 sources in s_BII-S-1.txt self.assertEqual(len(study_bii_s_7['materials']['samples']), 29) # 29 study samples in s_BII-S-1.txt self.assertEqual(len(study_bii_s_7['processSequence']), 29) # 29 study processes in s_BII-S-1.txt self.assertListEqual( [a['filename'] for a in study_bii_s_7['assays']], ['a_matteo-assay-Gx.txt']) # 1 assays in s_BII-S-1.txt assay_gx = [ a for a in study_bii_s_7['assays'] if a['filename'] == 'a_matteo-assay-Gx.txt' ][0] self.assertEqual(len(assay_gx['materials']['samples']), 29) # 29 assay samples in a_matteo-assay-Gx.txt self.assertEqual(len(assay_gx['materials']['otherMaterials']), 29) # 29 other materials in a_matteo-assay-Gx.txt self.assertEqual(len(assay_gx['dataFiles']), 29) # 29 data files in a_matteo-assay-Gx.txt self.assertEqual(len(assay_gx['processSequence']), 116) # 116 processes in in a_matteo-assay-Gx.txt
def setUp(self): self._json_data_dir = utils.JSON_DATA_DIR self._sra_data_dir = utils.SRA_DATA_DIR self._tmp_dir = tempfile.mkdtemp() study_id = 'BII-S-7' with open( os.path.join(self._json_data_dir, study_id, study_id + '.json')) as json_fp: self._inv_obj = isajson.load(json_fp) self._study_sra_data_dir = os.path.join(self._sra_data_dir, study_id) with open(os.path.join(self._study_sra_data_dir, 'submission.xml'), 'rb') as sub_fp: self._expected_submission_xml_obj = etree.fromstring(sub_fp.read()) with open(os.path.join(self._study_sra_data_dir, 'project_set.xml'), 'rb') as ps_fp: self._expected_project_set_xml_obj = etree.fromstring(ps_fp.read()) with open(os.path.join(self._study_sra_data_dir, 'sample_set.xml'), 'rb') as ss_fp: self._expected_sample_set_xml_obj = etree.fromstring(ss_fp.read()) with open(os.path.join(self._study_sra_data_dir, 'experiment_set.xml'), 'rb') as es_fp: self._expected_exp_set_xml_obj = etree.fromstring(es_fp.read()) with open(os.path.join(self._study_sra_data_dir, 'run_set.xml'), 'rb') as rs_fp: self._expected_run_set_xml_obj = etree.fromstring(rs_fp.read()) self._sra_default_config = { "sra_broker": "", "sra_center": "OXFORD", "sra_project": "OXFORD", "sra_lab": "Oxford e-Research Centre", "sra_broker_inform_on_status": "*****@*****.**", "sra_broker_inform_on_error": "*****@*****.**", "sra_broker_contact_name": "PRS" }
def test_json_load_and_dump_bii_s_7(self): # Load into ISA objects with open(os.path.join(self._json_data_dir, 'BII-S-7', 'BII-S-7.json')) as isajson_fp: ISA = isajson.load(isajson_fp) ISA_J = json.loads(json.dumps(ISA, cls=isajson.ISAJSONEncoder)) self.assertListEqual([s['filename'] for s in ISA_J['studies']], ['s_BII-S-7.txt']) study_bii_s_7 = \ [s for s in ISA_J['studies'] if s['filename'] == 's_BII-S-7.txt'][0] self.assertEqual(len(study_bii_s_7['materials']['sources']), 29) self.assertEqual(len(study_bii_s_7['materials']['samples']), 29) self.assertEqual(len(study_bii_s_7['processSequence']), 29) self.assertListEqual( [a['filename'] for a in study_bii_s_7['assays']], ['a_matteo-assay-Gx.txt']) assay_gx = [ a for a in study_bii_s_7['assays'] if a['filename'] == 'a_matteo-assay-Gx.txt' ][0] self.assertEqual(len(assay_gx['materials']['samples']), 29) self.assertEqual(len(assay_gx['materials']['otherMaterials']), 29) self.assertEqual(len(assay_gx['dataFiles']), 29) self.assertEqual(len(assay_gx['processSequence']), 116)
def test_json_load_and_dump_bii_i_1(self): with open(os.path.join(self._json_data_dir, 'BII-I-1', 'BII-I-1.json')) as isajson_fp: ISA = isajson.load(isajson_fp) ISA_J = json.loads(json.dumps(ISA, cls=isajson.ISAJSONEncoder)) self.assertListEqual([s['filename'] for s in ISA_J['studies']], ['s_BII-S-1.txt', 's_BII-S-2.txt']) study_bii_s_1 = \ [s for s in ISA_J['studies'] if s['filename'] == 's_BII-S-1.txt'][0] self.assertEqual(len(study_bii_s_1['materials']['sources']), 18) self.assertEqual(len(study_bii_s_1['materials']['samples']), 164) self.assertEqual(len(study_bii_s_1['processSequence']), 18) self.assertListEqual( [a['filename'] for a in study_bii_s_1['assays']], ['a_proteome.txt', 'a_metabolome.txt', 'a_transcriptome.txt']) assay_proteome = [ a for a in study_bii_s_1['assays'] if a['filename'] == 'a_proteome.txt' ][0] self.assertEqual(len(assay_proteome['materials']['samples']), 8) self.assertEqual( len(assay_proteome['materials']['otherMaterials']), 19) self.assertEqual(len(assay_proteome['dataFiles']), 7) self.assertEqual(len(assay_proteome['processSequence']), 25) assay_metabolome = [ a for a in study_bii_s_1['assays'] if a['filename'] == 'a_metabolome.txt' ][0] self.assertEqual(len(assay_metabolome['materials']['samples']), 92) self.assertEqual( len(assay_metabolome['materials']['otherMaterials']), 92) self.assertEqual(len(assay_metabolome['dataFiles']), 111) self.assertEqual(len(assay_metabolome['processSequence']), 203) assay_transcriptome = [ a for a in study_bii_s_1['assays'] if a['filename'] == 'a_transcriptome.txt' ][0] self.assertEqual(len(assay_transcriptome['materials']['samples']), 48) self.assertEqual( len(assay_transcriptome['materials']['otherMaterials']), 96) self.assertEqual(len(assay_transcriptome['dataFiles']), 49) self.assertEqual(len(assay_transcriptome['processSequence']), 193) study_bii_s_2 = \ [s for s in ISA_J['studies'] if s['filename'] == 's_BII-S-2.txt'][0] self.assertEqual(len(study_bii_s_2['materials']['sources']), 1) self.assertEqual(len(study_bii_s_2['materials']['samples']), 2) self.assertEqual(len(study_bii_s_2['processSequence']), 1) self.assertEqual(len(study_bii_s_2['assays']), 1) self.assertListEqual( [a['filename'] for a in study_bii_s_2['assays']], ['a_microarray.txt']) assay_microarray = [ a for a in study_bii_s_2['assays'] if a['filename'] == 'a_microarray.txt' ][0] self.assertEqual(len(assay_microarray['materials']['samples']), 2) self.assertEqual( len(assay_microarray['materials']['otherMaterials']), 28) self.assertEqual(len(assay_microarray['dataFiles']), 15) self.assertEqual(len(assay_microarray['processSequence']), 45)