def test_validate_isajson_isajson_schemas(self): """Tests against 0002""" log_msg_stream = isajson.validate(open(os.path.join(self._unit_json_data_dir, 'minimal_syntax.json'))) if "The JSON does not validate against the provided ISA-JSON schemas!" in log_msg_stream.getvalue(): self.fail("Error raised when trying to parse valid ISA-JSON, when it should have been fine!") log_msg_stream = isajson.validate(open(os.path.join(self._unit_json_data_dir, 'invalid_isajson.json'))) if "The JSON does not validate against the provided ISA-JSON schemas!" not in log_msg_stream.getvalue(): self.fail("NO error raised when validating against some non-ISA-JSON conforming JSON!")
def test_validate_isajson_study_config_validation(self): """Tests against 4004""" log_msg_stream = isajson.validate(open(os.path.join(self._unit_json_data_dir, 'study_config.json'))) if "protocol sequence ['sample collection'] does not match study graph" in log_msg_stream.getvalue(): self.fail("Validation failed against default study configuration, when it should have passed") log_msg_stream = isajson.validate(open(os.path.join(self._unit_json_data_dir, 'study_config_fail.json'))) if "protocol sequence ['sample collection'] does not match study graph" not in log_msg_stream.getvalue(): self.fail("Validation passed against default study configuration, when it should have failed")
def test_validate_isajson_json_load(self): """Tests against 0001""" log_msg_stream = isajson.validate(open(os.path.join(self._unit_json_data_dir, 'minimal_syntax.json'))) if "There was an error when trying to parse the JSON" in log_msg_stream.getvalue(): self.fail("Error raised when trying to parse JSON, when it should have been fine!") log_msg_stream = isajson.validate(open(os.path.join(self._unit_json_data_dir, 'invalid.json'))) if "There was an error when trying to parse the JSON" not in log_msg_stream.getvalue(): self.fail("NO error raised when trying to parse invalid formed JSON!")
def test_validate_isajson_assay_config_validation(self): """Tests against 4004""" with open(os.path.join(self._unit_json_data_dir, 'assay_config.json')) as fp: report = isajson.validate(fp) if 4004 in [e['code'] for e in report['warnings']]: self.fail("Validation failed against transcription_seq.json configuration, when it should have passed") report = isajson.validate(open(os.path.join(self._unit_json_data_dir, 'assay_config_fail.json'))) if 4004 not in [e['code'] for e in report['warnings']]: self.fail("Validation passed against transcription_seq.json configuration, when it should have failed")
def test_validate_isajson_utf8_encoding_check(self): """Tests against 0010""" log_msg_stream = isajson.validate(open(os.path.join(self._unit_json_data_dir, 'minimal_syntax.json'))) if "File should be UTF-8 encoding" in log_msg_stream.getvalue(): self.fail("Validation warning present when testing against UTF-8 encoded file") log_msg_stream = isajson.validate(open(os.path.join(self._unit_json_data_dir, 'non_utf8.json'))) if "File should be UTF-8 encoding" not in log_msg_stream.getvalue(): self.fail("Validation warning missing when testing against UTF-16 encoded file (UTF-8 required)")
def test_validate_isajson_utf8_encoding_check(self): """Tests against 0010""" with open(os.path.join(self._unit_json_data_dir, 'minimal_syntax.json')) as fp: report = isajson.validate(fp) if 10 in [e['code'] for e in report['warnings']]: self.fail("Validation warning present when testing against UTF-8 encoded file") with open(os.path.join(self._unit_json_data_dir, 'non_utf8.json')) as fp: report = isajson.validate(fp) if 10 not in [e['code'] for e in report['warnings']]: self.fail("Validation warning missing when testing against UTF-16 encoded file (UTF-8 required)")
def test_validate_isajson_isajson_schemas(self): """Tests against 0003""" with open(os.path.join(self._unit_json_data_dir, 'minimal_syntax.json')) as fp: report = isajson.validate(fp) if 3 in [e['code'] for e in report['errors']]: self.fail("Error raised when trying to parse valid ISA-JSON, when it should have been fine!") with open(os.path.join(self._unit_json_data_dir, 'invalid_isajson.json')) as fp: report = isajson.validate(fp) if 3 not in [e['code'] for e in report['errors']]: self.fail("NO error raised when validating against some non-ISA-JSON conforming JSON!")
def test_validate_isajson_json_load(self): """Tests against 0002""" with open(os.path.join(self._unit_json_data_dir, 'minimal_syntax.json')) as fp: report = isajson.validate(fp) if 2 in [e['code'] for e in report['errors']]: self.fail("Error raised when trying to parse JSON, when it should have been fine!") with open(os.path.join(self._unit_json_data_dir, 'invalid.json')) as fp: report = isajson.validate(fp) if 2 not in [e['code'] for e in report['errors']]: self.fail("NO error raised when trying to parse invalid formed JSON!")
def test_validate_isajson_assay_config_validation(self): """Tests against 4004""" log_msg_stream = isajson.validate(open(os.path.join(self._unit_json_data_dir, 'assay_config.json'))) if "protocol sequence ['nucleic acid extraction', 'library construction', 'nucleic acid sequencing', " \ "'sequence analysis data transformation'] does not match study graph" in log_msg_stream.getvalue(): self.fail("Validation failed against transcription_seq.json configuration, when it should have passed") log_msg_stream = isajson.validate(open(os.path.join(self._unit_json_data_dir, 'assay_config_fail.json'))) if "protocol sequence ['nucleic acid extraction', 'library construction', 'nucleic acid sequencing', " \ "'sequence analysis data transformation'] does not match study graph" not in log_msg_stream.getvalue(): self.fail("Validation passed against transcription_seq.json configuration, when it should have failed")
def test_validate_isajson_source_link(self): """Tests against 1002""" log_msg_stream = isajson.validate(open(os.path.join(self._unit_json_data_dir, 'source_link.json'))) if "['#source/1'] not found" in log_msg_stream.getvalue(): self.fail("Validation error present when should pass without error - source link reports broken when " "present in data") log_msg_stream = isajson.validate(open(os.path.join(self._unit_json_data_dir, 'source_link_fail.json'))) if "['#source/1'] not found" not in log_msg_stream.getvalue(): self.fail("Validation error missing when should report error - data has broken source link but not " "reported in validation report")
def test_validate_isajson_source_link(self): """Tests against 1002, but reports 1005 error (more general case)""" with open(os.path.join(self._unit_json_data_dir, 'source_link.json')) as fp: report = isajson.validate(fp) if 1005 in [e['code'] for e in report['errors']]: self.fail("Validation error present when should pass without error - source link reports broken when " "present in data") with open(os.path.join(self._unit_json_data_dir, 'source_link_fail.json')) as fp: report = isajson.validate(fp) if 1005 not in [e['code'] for e in report['errors']]: self.fail("Validation error missing when should report error - data has broken source link but not " "reported in validation report")
def test_validate_isajson_protocol_ref_link(self): """Tests against 1007""" log_msg_stream = isajson.validate(open(os.path.join(self._unit_json_data_dir, 'protocol_ref_link.json'))) if "['#protocol/1'] used in a study or assay process sequence not declared" in log_msg_stream.getvalue(): self.fail( "Validation error present when should pass without error - executesProtocol link reports broken when " "present in data") log_msg_stream = isajson.validate(open(os.path.join(self._unit_json_data_dir, 'protocol_ref_link_fail.json'))) if "['#protocol/1'] used in a study or assay process sequence not declared" not in log_msg_stream.getvalue(): self.fail( "Validation error missing when should report error - data has broken executesProtocol link but not " "reported in validation report")
def test_validate_isajson_factor_used(self): """Tests against 3006""" log_msg_stream = isajson.validate(open(os.path.join(self._unit_json_data_dir, 'factor_used.json'))) if "factors declared ['#factor/1'] that have not been used" in log_msg_stream.getvalue(): self.fail( "Validation error present when should pass without error - incorrectly reports #factor/1 not used when " "it has been used in #sample/1") log_msg_stream = isajson.validate(open(os.path.join(self._unit_json_data_dir, 'factor_used_fail.json'))) if "factors declared ['#factor/1'] that have not been used" not in log_msg_stream.getvalue(): self.fail( "Validation error missing when should report error - data has incorrectly reported everything is OK " "but not reported #factor/1 as being unused")
def test_validate_isajson_iso8601(self): """Tests against 3001""" log_msg_stream = isajson.validate(open(os.path.join(self._unit_json_data_dir, 'iso8601.json'))) if "does not conform to ISO8601 format" in log_msg_stream.getvalue(): self.fail( "Validation error present when should pass without error - incorrectly formatted ISO8601 date in " "publicReleaseDate reports invalid when valid data") log_msg_stream = isajson.validate(open(os.path.join(self._unit_json_data_dir, 'iso8601_fail.json'))) if "does not conform to ISO8601 format" not in log_msg_stream.getvalue(): self.fail( "Validation error missing when should report error - data has incorrectly formatted ISO8601 date in " "publicReleaseDate but not reported in validation report")
def test_validate_isajson_term_source_used(self): """Tests against 3007""" log_msg_stream = isajson.validate(open(os.path.join(self._unit_json_data_dir, 'term_source_used.json'))) if "ontology sources declared ['PATO'] that have not been used" in log_msg_stream.getvalue(): self.fail( "Validation error present when should pass without error - incorrectly reports PATO not used when it " "has been used in #factor/1") log_msg_stream = isajson.validate(open(os.path.join(self._unit_json_data_dir, 'term_source_used_fail.json'))) if "ontology sources declared ['PATO'] that have not been used" not in log_msg_stream.getvalue(): self.fail( "Validation error missing when should report error - data has incorrectly reported everything is OK " "but not reported PATO as being unused")
def test_validate_isajson_process_link(self): """Tests against 1006""" log_msg_stream = isajson.validate(open(os.path.join(self._unit_json_data_dir, 'process_link.json'))) if "link #process/1 in process #process/2 does not refer to another process" in log_msg_stream.getvalue(): self.fail( "Validation error present when should pass without error - process link reports broken when present " "in data") log_msg_stream = isajson.validate(open(os.path.join(self._unit_json_data_dir, 'process_link_fail.json'))) if "link #process/1 in process #process/2 does not refer to another process" not in log_msg_stream.getvalue(): self.fail( "Validation error missing when should report error - data has broken process link but not reported in " "validation report")
def test_validate_isajson_factor_link(self): """Tests against 1008""" log_msg_stream = isajson.validate(open(os.path.join(self._unit_json_data_dir, 'factor_link.json'))) if "['#factor/1'] used in a study or assay process sequence not declared" in log_msg_stream.getvalue(): self.fail( "Validation error present when should pass without error - factor link in factorValue reports broken " "when present in data") log_msg_stream = isajson.validate(open(os.path.join(self._unit_json_data_dir, 'factor_link_fail.json'))) if "['#factor/1'] used in a study or assay process sequence not declared" in log_msg_stream.getvalue(): self.fail( "Validation error missing when should report error - data has broken factor link in factorValue but " "not reported in validation report")
def test_validate_isajson_doi(self): """Tests against 3002""" log_msg_stream = isajson.validate(open(os.path.join(self._unit_json_data_dir, 'doi.json'))) if "does not conform to DOI format" in log_msg_stream.getvalue(): self.fail( "Validation error present when should pass without error - incorrectly formatted DOI in publication " "reports invalid when valid data") log_msg_stream = isajson.validate(open(os.path.join(self._unit_json_data_dir, 'doi_fail.json'))) if "does not conform to DOI format" not in log_msg_stream.getvalue(): self.fail( "Validation error missing when should report error - data has incorrectly formatted DOI in publication " "but not reported in validation report")
def test_validate_isajson_pubmed(self): """Tests against 3003""" log_msg_stream = isajson.validate(open(os.path.join(self._unit_json_data_dir, 'pubmed.json'))) if "is not valid format" in log_msg_stream.getvalue(): self.fail( "Validation error present when should pass without error - incorrectly formatted Pubmed ID in " "publication reports invalid when valid data") log_msg_stream = isajson.validate(open(os.path.join(self._unit_json_data_dir, 'pubmed_fail.json'))) if "is not valid format" not in log_msg_stream.getvalue(): self.fail( "Validation error missing when should report error - data has incorrectly formatted Pubmed ID in " "publication but not reported in validation report")
def test_validate_isajson_protocol_parameter_link(self): """Tests against 1009""" with open(os.path.join(self._unit_json_data_dir, 'protocol_parameter_link.json')) as fp: report = isajson.validate(fp) if 1009 in [e['code'] for e in report['errors']]: self.fail( "Validation error present when should pass without error - parameter link in parameterValue reports " "broken when present in data") with open(os.path.join(self._unit_json_data_dir, 'protocol_parameter_link_fail.json')) as fp: report = isajson.validate(fp) if 1009 not in [e['code'] for e in report['errors']]: self.fail("Validation error missing when should report error - data has broken parameter link in " "parameterValue but not reported in validation report")
def test_validate_isajson_protocol_used(self): """Tests against 3005""" log_msg_stream = isajson.validate(open(os.path.join(self._unit_json_data_dir, 'protocol_used.json'))) if "['#protocol/1'] not used" in log_msg_stream.getvalue(): self.fail( "Validation error present when should pass without error - incorrectly reports #protocol/1 not used " "when it has been used in #process/1") log_msg_stream = isajson.validate(open(os.path.join(self._unit_json_data_dir, 'protocol_used_fail.json'))) if "['#protocol/1'] not used" not in log_msg_stream.getvalue(): self.fail( "Validation error missing when should report error - data has incorrectly reported everything is OK " "but not reported #protocol/1 as being unused")
def test_validate_isajson_protocol_parameter_link(self): """Tests against 1009""" log_msg_stream = isajson.validate(open(os.path.join(self._unit_json_data_dir, 'protocol_parameter_link.json'))) if "['#parameter/1'] used in a study or assay process sequence not declared" in log_msg_stream.getvalue(): self.fail( "Validation error present when should pass without error - parameter link in parameterValue reports " "broken when present in data") log_msg_stream = isajson.validate(open(os.path.join(self._unit_json_data_dir, 'protocol_' 'parameter_link_fail.json'))) if "['#parameter/1'] used in a study or assay process sequence not declared" in log_msg_stream.getvalue(): self.fail( "Validation error missing when should report error - data has broken parameter link in parameterValue " "but not reported in validation report")
def test_validate_isajson_protocol_ref_link(self): """Tests against 1007""" with open(os.path.join(self._unit_json_data_dir, 'protocol_ref_link.json')) as fp: report = isajson.validate(fp) if 1007 in [e['code'] for e in report['errors']]: self.fail( "Validation error present when should pass without error - executesProtocol link reports broken when " "present in data") with open(os.path.join(self._unit_json_data_dir, 'protocol_ref_link_fail.json')) as fp: report = isajson.validate(fp) if 1007 not in [e['code'] for e in report['errors']]: self.fail( "Validation error missing when should report error - data has broken executesProtocol link but not " "reported in validation report")
def test_validate_isajson_factor_link(self): """Tests against 1008""" with open(os.path.join(self._unit_json_data_dir, 'factor_link.json')) as fp: report = isajson.validate(fp) if 1008 in [e['code'] for e in report['errors']]: self.fail( "Validation error present when should pass without error - factor link in factorValue reports broken " "when present in data") with open(os.path.join(self._unit_json_data_dir, 'factor_link_fail.json')) as fp: report = isajson.validate(fp) if 1008 not in [e['code'] for e in report['errors']]: self.fail( "Validation error missing when should report error - data has broken factor link in factorValue but " "not reported in validation report")
def test_validate_isajson_iso8601(self): """Tests against 3001""" report = isajson.validate( open(os.path.join(self._unit_json_data_dir, 'iso8601.json'))) if 3001 in [e['code'] for e in report['warnings']]: self.fail( "Validation error present when should pass without error - incorrectly formatted ISO8601 date in " "publicReleaseDate reports invalid when valid data") report = isajson.validate( open(os.path.join(self._unit_json_data_dir, 'iso8601_fail.json'))) if 3001 not in [e['code'] for e in report['warnings']]: self.fail( "Validation error missing when should report error - data has incorrectly formatted ISO8601 date in " "publicReleaseDate but not reported in validation report")
def test_validate_isajson_term_source_used(self): """Tests against 3007""" with open(os.path.join(self._unit_json_data_dir, 'term_source_used.json')) as fp: report = isajson.validate(fp) if 3007 in [e['code'] for e in report['warnings']]: self.fail( "Validation error present when should pass without error - incorrectly reports PATO not used when " "it has been used in #factor/1") with open(os.path.join(self._unit_json_data_dir, 'term_source_used_fail.json')) as fp: report = isajson.validate(fp) if 3007 not in [e['code'] for e in report['warnings']]: self.fail( "Validation error missing when should report error - data has incorrectly reported everything is " "OK but not reported PATO as being unused")
def test_validate_isajson_doi(self): """Tests against 3002""" with open(os.path.join(self._unit_json_data_dir, 'doi.json')) as fp: report = isajson.validate(fp) if 3002 in [e['code'] for e in report['warnings']]: self.fail( "Validation error present when should pass without error - incorrectly formatted DOI in publication " "reports invalid when valid data") with open(os.path.join(self._unit_json_data_dir, 'doi_fail.json')) as fp: report = isajson.validate(fp) if 3002 not in [e['code'] for e in report['warnings']]: self.fail( "Validation error missing when should report error - data has incorrectly formatted DOI in publication " "but not reported in validation report")
def test_validate_isajson_factor_used(self): """Tests against 1021""" with open(os.path.join(self._unit_json_data_dir, 'factor_used.json')) as fp: report = isajson.validate(fp) if 1021 in [e['code'] for e in report['warnings']]: self.fail( "Validation error present when should pass without error - incorrectly reports #factor/1 not used when " "it has been used in #sample/1") with open(os.path.join(self._unit_json_data_dir, 'factor_used_fail.json')) as fp: report = isajson.validate(fp) if 1021 not in [e['code'] for e in report['warnings']]: self.fail( "Validation error missing when should report error - data has incorrectly reported everything is OK " "but not reported #factor/1 as being unused")
def test_validate_isajson_pubmed(self): """Tests against 3003""" report = isajson.validate( open(os.path.join(self._unit_json_data_dir, 'pubmed.json'))) if 3003 in [e['code'] for e in report['warnings']]: self.fail( "Validation error present when should pass without error - incorrectly formatted Pubmed ID in " "publication reports invalid when valid data") report = isajson.validate( open(os.path.join(self._unit_json_data_dir, 'pubmed_fail.json'))) if 3003 not in [e['code'] for e in report['warnings']]: self.fail( "Validation error missing when should report error - data has incorrectly formatted Pubmed ID in " "publication but not reported in validation report")
def test_validate_isajson_process_link(self): """Tests against 1006""" with open(os.path.join(self._unit_json_data_dir, 'process_link.json')) as fp: report = isajson.validate(fp) if 1006 in [e['code'] for e in report['errors']]: self.fail( "Validation error present when should pass without error - process link reports broken when present " "in data") with open(os.path.join(self._unit_json_data_dir, 'process_link_fail.json')) as fp: report = isajson.validate(fp) if 1006 not in [e['code'] for e in report['errors']]: self.fail( "Validation error missing when should report error - data has broken process link but not reported in " "validation report")
def test_validate_isajson_material_link(self): """Tests against 1005""" report = isajson.validate( open(os.path.join(self._unit_json_data_dir, 'material_link.json'))) if 1005 in [e['code'] for e in report['errors']]: self.fail( "Validation error present when should pass without error -material link link reports broken when " "present in data") report = isajson.validate( open( os.path.join(self._unit_json_data_dir, 'material_link_fail.json'))) if 1005 not in [e['code'] for e in report['errors']]: self.fail( "Validation error missing when should report error - data has broken material link but not " "reported in validation report")
def test_validate_isajson_data_file_link(self): """Tests against 1004 but reports 1005 error (more general case)""" report = isajson.validate( open(os.path.join(self._unit_json_data_dir, 'datafile_link.json'))) if 1005 in [e['code'] for e in report['errors']]: self.fail( "Validation error present when should pass without error - data file link reports broken when present " "in data") report = isajson.validate( open( os.path.join(self._unit_json_data_dir, 'datafile_link_fail.json'))) if 1005 not in [e['code'] for e in report['errors']]: self.fail( "Validation error missing when should report error - data has broken data file link but not reported " "in validation report")
def convert(json_fp, path, config_dir=None, sra_settings=None, datafilehashes=None, validate_first=True): """ Converter for ISA-JSON to SRA. :param json_fp: File pointer to ISA JSON input :param path: Directory for output SRA XMLs to be written :param config_dir: path to JSON configuration. If none, uses default embedded in API :param sra_settings: SRA settings dict :param datafilehashes: Data files with hashes, in a dict """ if validate_first: log.info("Validating input JSON before conversion") report = isajson.validate(fp=json_fp, config_dir=config_dir, log_level=logging.ERROR) if len(report.get('errors')) > 0: log.fatal("Could not proceed with conversion as there are some " "validation errors. Check log.") return log.info("Loading isajson {}".format(json_fp.name)) isa = isajson.load(fp=json_fp) log.info("Exporting SRA to {}".format(path)) log.debug("Using SRA settings ".format(sra_settings)) sra.export(isa, path, sra_settings=sra_settings, datafilehashes=datafilehashes)
def test_validate_isajson_study_config_validation(self): """Tests against 4004""" report = isajson.validate( open(os.path.join(self._unit_json_data_dir, 'study_config.json'))) if 4004 in [e['code'] for e in report['warnings']]: self.fail( "Validation failed against default study configuration, when it should have passed" ) report = isajson.validate( open( os.path.join(self._unit_json_data_dir, 'study_config_fail.json'))) if 4004 not in [e['code'] for e in report['warnings']]: self.fail( "Validation passed against default study configuration, when it should have failed" )
def validate_isajson(self): lg.log('Validating ISA-JSON', level=Loglvl.INFO, type=Logtype.FILE) with open(os.path.join(self.json_path, 'isa_json.json')) as json_file: v = isajson.validate(json_file) lg.log(v, level=Loglvl.INFO, type=Logtype.FILE) return
def test_validate_isajson_protocol_used(self): """Tests against 1019""" report = isajson.validate( open(os.path.join(self._unit_json_data_dir, 'protocol_used.json'))) if 1019 in [e['code'] for e in report['warnings']]: self.fail( "Validation error present when should pass without error - incorrectly reports #protocol/1 not used " "when it has been used in #process/1") report = isajson.validate( open( os.path.join(self._unit_json_data_dir, 'protocol_used_fail.json'))) if 1019 not in [e['code'] for e in report['warnings']]: self.fail( "Validation error missing when should report error - data has incorrectly reported everything is OK " "but not reported #protocol/1 as being unused")
def test_validate_testdata_sample_pool_no_protocol_ref_json(self): test_case = 'TEST-ISA-sample-pool-no-protocolref' report = isajson.validate(fp=open( os.path.join(utils.JSON_DATA_DIR, test_case + '.json')), log_level=self._reporting_level) if len(report['errors']) > 0: self.fail("Error found when validating ISA JSON: {}".format( report['errors']))
def test_validate_sampledata_bii_i_1_json(self): test_case = 'BII-I-1' report = isajson.validate(fp=open( os.path.join(utils.SAMPLE_DATA_DIR, test_case + '.json')), log_level=logging.ERROR) if len(report['errors']) > 0: self.fail("Errors found when validating ISA JSON: {}".format( report['errors']))
def test_validate_testdata_charac_param_factor_json(self): test_case = 'TEST-ISA-charac-param-factor' report = isajson.validate(fp=open( os.path.join(utils.JSON_DATA_DIR, test_case + '.json')), log_level=self._reporting_level) if len(report['errors']) > 0: self.fail("Error found when validating ISA JSON: {}".format( report['errors']))
def test_validate_testdata_repeated_measure_json(self): test_case = 'TEST-ISA-repeated-measure' report = isajson.validate(fp=open( os.path.join(utils.JSON_DATA_DIR, test_case + '.json')), log_level=self._reporting_level) if len(report['errors']) > 0: self.fail("Error found when validating ISA JSON: {}".format( report['errors']))
def test_magetab2json_convert_e_mexp_31(self): actual_json = magetab2json.convert( os.path.join(self._magetab_data_dir, 'E-MEXP-31.idf.txt'), ) with open(os.path.join(self._tmp_dir, 'isa.json'), 'w') as out_fp: json.dump(actual_json, out_fp) with open(os.path.join(self._tmp_dir, 'isa.json')) as actual_json: report = isajson.validate(actual_json) self.assertEqual(len(report['errors']), 0)
def test_validate_testdata_bii_s_7_json(self): test_case = 'BII-S-7' report = isajson.validate(fp=open( os.path.join(utils.JSON_DATA_DIR, test_case, test_case + '.json')), log_level=self._reporting_level) if len(report['errors']) > 0: self.fail("Error found when validating ISA JSON: {}".format( report['errors']))
def test_validate_testdata_source_split_with_error_json(self): test_case = 'TEST-ISA-source-split-with-error' report = isajson.validate(fp=open( os.path.join(utils.JSON_DATA_DIR, test_case + '.json')), log_level=self._reporting_level) if len(report['errors']) > 0: self.fail("Error found when validating ISA JSON: {}".format( report['errors']))
def test_isatab2json_convert_sample_pool_with_error(self): test_case = 'TEST-ISA-sample-pool-with-error' actual_json = isatab2json.convert(os.path.join(self._tab_data_dir, test_case), validate_first=False, use_new_parser=True) json.dump(actual_json, open(os.path.join(self._tmp_dir, 'isa.json'), 'w')) with open(os.path.join(self._tmp_dir, 'isa.json')) as actual_json: report = isajson.validate(actual_json) self.assertEqual(len(report['errors']), 0)
def test_isatab2json_convert_mtbls3(self): test_case = 'MTBLS3' actual_json = isatab2json.convert(os.path.join(self._tab_data_dir, test_case), validate_first=False, use_new_parser=True) json.dump(actual_json, open(os.path.join(self._tmp_dir, 'isa.json'), 'w')) with open(os.path.join(self._tmp_dir, 'isa.json')) as actual_json: report = isajson.validate(actual_json) self.assertEqual(len(report['errors']), 0)
def test_magetab2json_convert_e_mexp_31(self): with open(os.path.join(self._magetab_data_dir, 'E-MEXP-31.idf.txt')) as idf_fp: actual_json = magetab2json.convert(idf_fp, 'protein microarray', 'protein expression profiling') json.dump(actual_json, open(os.path.join(self._tmp_dir, 'isa.json'), 'w')) with open(os.path.join(self._tmp_dir, 'isa.json')) as actual_json: report = isajson.validate(actual_json) self.assertEqual(len(report['errors']), 0)
def convert2(json_fp, path, config_dir=None, sra_settings=None, datafilehashes=None, validate_first=True): """ (New) Converter for ISA JSON to SRA. :param json_fp: File pointer to ISA JSON input :param path: Directory for output to be written :param config_dir: path to JSON configuration. If none, uses default embedded in API :param sra_settings: SRA settings dict :param datafilehashes: Data files with hashes, in a dict """ if validate_first: log_msg_stream = isajson.validate(fp=json_fp, config_dir=config_dir, log_level=logging.WARNING) if '(E)' in log_msg_stream.getvalue(): logger.fatal("Could not proceed with conversion as there are some validation errors. Check log.") return i = isajson.load(fp=json_fp) sra.export(i, path, sra_settings=sra_settings, datafilehashes=datafilehashes)
def post(self): response = Response(status=415) if request.mimetype == "application/json": tmp_file = str(uuid.uuid4()) + ".json" tmp_dir = _create_temp_dir() try: # Write request data to file file_path = _write_request_data(request, tmp_dir, tmp_file) if file_path is None: return Response(500) log_msg_stream = isajson.validate(open(file_path)) result = { "result:": log_msg_stream.getvalue() } response = jsonify(result) except Exception: response = Response(status=500) finally: # cleanup generated directories shutil.rmtree(tmp_dir, ignore_errors=True) return response
def test_validate_sampledata_bii_i_1_json(self): test_case = 'BII-I-1' log_msg_stream = isajson.validate(fp=open(os.path.join(utils.SAMPLE_DATA_DIR, test_case + '.json')), log_level=logging.ERROR) if '(E)' in log_msg_stream.getvalue(): self.fail("Error found when validating ISA JSON: {}".format(log_msg_stream.getvalue()))
def test_validate_testdata_source_split_with_error_json(self): test_case = 'TEST-ISA-source-split-with-error' log_msg_stream = isajson.validate(fp=open(os.path.join(utils.JSON_DATA_DIR, test_case + '.json')), log_level=self._reporting_level) if '(E)' in log_msg_stream.getvalue() or '(F)' in log_msg_stream.getvalue(): self.fail("Error found when validating ISA JSON: {}".format(log_msg_stream.getvalue()))
def test_validate_testdata_sample_pool_no_protocol_ref_json(self): test_case = 'TEST-ISA-sample-pool-no-protocolref' log_msg_stream = isajson.validate(fp=open(os.path.join(utils.JSON_DATA_DIR, test_case + '.json')), log_level=self._reporting_level) if '(E)' in log_msg_stream.getvalue() or '(F)' in log_msg_stream.getvalue(): self.fail("Error found when validating ISA JSON: {}".format(log_msg_stream.getvalue()))
def _do_aspera_transfer(self, transfer_token=None, user_name=None, password=None, remote_path=None, file_path=None, path2library=None, sub_id=None): # check submission status submission_status = Submission().isComplete(sub_id) if not submission_status or submission_status == 'false': lg.log('Starting aspera transfer', level=Loglvl.INFO, type=Logtype.FILE) kwargs = dict(target_id=sub_id, commenced_on=str(datetime.now())) Submission().save_record(dict(), **kwargs) # k is a loop counter which keeps track of the number of files transfered k = -1 f_str = str() for f in file_path: f_str = f_str + ' ' + f cmd = "./ascp -d -QT -l300M -L- {f_str!s} {user_name!s}:{remote_path!s}".format(**locals()) lg.log(cmd, level=Loglvl.INFO, type=Logtype.FILE) os.chdir(path2library) try: thread = pexpect.spawn(cmd, timeout=None) thread.expect(["assword:", pexpect.EOF]) thread.sendline(password) cpl = thread.compile_pattern_list([pexpect.EOF, '(.+)']) while True: i = thread.expect_list(cpl, timeout=None) if i == 0: # EOF! Possible error point if encountered before transfer completion print("Process termination - check exit status!") break elif i == 1: pexp_match = thread.match.group(1) prev_file = '' tokens_to_match = ["Mb/s"] units_to_match = ["KB", "MB"] time_units = ['d', 'h', 'm', 's'] end_of_transfer = False if all(tm in pexp_match.decode("utf-8") for tm in tokens_to_match): fields = { "transfer_status": "transferring", "current_time": datetime.now().strftime("%d-%m-%Y %H:%M:%S") } tokens = pexp_match.decode("utf-8").split(" ") for token in tokens: if not token == '': if "file" in token: fields['file_path'] = token.split('=')[-1] if prev_file != fields['file_path']: k = k + 1 prev_file == fields['file_path'] elif '%' in token: pct = float((token.rstrip("%"))) # pct = (1/len(file_path) * pct) + (k * 1/len(file_path) * 100) fields['pct_completed'] = pct # flag end of transfer print(str(transfer_token) + ": " + str(pct) + '% transfered') if token.rstrip("%") == 100: end_of_transfer = True elif any(um in token for um in units_to_match): fields['amt_transferred'] = token elif "Mb/s" in token or "Mbps" in token: t = token[:-4] if '=' in t: fields['transfer_rate'] = t[t.find('=') + 1:] else: fields['transfer_rate'] = t elif "status" in token: fields['transfer_status'] = token.split('=')[-1] elif "rate" in token: fields['transfer_rate'] = token.split('=')[-1] elif "elapsed" in token: fields['elapsed_time'] = token.split('=')[-1] elif "loss" in token: fields['bytes_lost'] = token.split('=')[-1] elif "size" in token: fields['file_size_bytes'] = token.split('=')[-1] elif "ETA" in token: eta = tokens[-2] estimated_completion = "" eta_split = eta.split(":") t_u = time_units[-len(eta_split):] for indx, eta_token in enumerate(eta.split(":")): if eta_token == "00": continue estimated_completion += eta_token + t_u[indx] + " " fields['estimated_completion'] = estimated_completion RemoteDataFile().update_transfer(transfer_token, fields) kwargs = dict(target_id=sub_id, completed_on=datetime.now()) Submission().save_record(dict(), **kwargs) # close thread thread.close() lg.log('Aspera Transfer completed', level=Loglvl.INFO, type=Logtype.FILE) except OSError: return redirect('web.apps.web_copo.views.goto_error', request=HttpRequest(), message='There appears to be an issue with EBI.') # setup paths for conversion directories conv_dir = os.path.join(self._dir, sub_id) if not os.path.exists(os.path.join(conv_dir, 'json')): os.makedirs(os.path.join(conv_dir, 'json')) json_file_path = os.path.join(conv_dir, 'json', 'isa_json.json') xml_dir = conv_dir xml_path = os.path.join(xml_dir, 'run_set.xml') # Convert COPO JSON to ISA JSON lg.log('Obtaining ISA-JSON', level=Loglvl.INFO, type=Logtype.FILE) conv = cnv.Investigation(submission_token=sub_id) meta = conv.get_schema() json_file = open(json_file_path, '+w') # dump metadata to output file json_file.write(dumps(meta)) json_file.close() # Validate ISA_JSON lg.log('Validating ISA-JSON', level=Loglvl.INFO, type=Logtype.FILE) with open(json_file_path) as json_file: v = isajson.validate(json_file) lg.log(v, level=Loglvl.INFO, type=Logtype.FILE) # convert to SRA with isatools converter lg.log('Converting to SRA', level=Loglvl.INFO, type=Logtype.FILE) sra_settings = d_utils.json_to_pytype(SRA_SETTINGS).get("properties", dict()) datafilehashes = conv.get_datafilehashes() json2sra.convert2(json_fp=open(json_file_path), path=conv_dir, sra_settings=sra_settings, datafilehashes=datafilehashes, validate_first=False) # finally submit to SRA lg.log('Submitting XMLS to ENA via CURL', level=Loglvl.INFO, type=Logtype.FILE) submission_file = os.path.join(xml_dir, 'submission.xml') project_file = os.path.join(xml_dir, 'project_set.xml') sample_file = os.path.join(xml_dir, 'sample_set.xml') experiment_file = os.path.join(xml_dir, 'experiment_set.xml') run_file = os.path.join(xml_dir, 'run_set.xml') curl_cmd = 'curl -k -F "SUBMISSION=@' + submission_file + '" \ -F "PROJECT=@' + os.path.join(remote_path, project_file) + '" \ -F "SAMPLE=@' + os.path.join(remote_path, sample_file) + '" \ -F "EXPERIMENT=@' + os.path.join(remote_path, experiment_file) + '" \ -F "RUN=@' + os.path.join(remote_path, run_file) + '" \ "https://www-test.ebi.ac.uk/ena/submit/drop-box/submit/?auth=ENA%20Webin-39233%20Apple123"' output = subprocess.check_output(curl_cmd, shell=True) lg.log(output, level=Loglvl.INFO, type=Logtype.FILE) lg.log("Extracting fields from receipt", level=Loglvl.INFO, type=Logtype.FILE) xml = ET.fromstring(output) accessions = dict() # get project accessions project = xml.find('./PROJECT') project_accession = project.get('accession', default='undefined') project_alias = project.get('alias', default='undefined') accessions['project'] = {'accession': project_accession, 'alias': project_alias} # get experiment accessions experiment = xml.find('./EXPERIMENT') experiment_accession = experiment.get('accession', default='undefined') experiment_alias = experiment.get('alias', default='undefined') accessions['experiment'] = {'accession': experiment_accession, 'alias': experiment_alias} # get submission accessions submission = xml.find('./SUBMISSION') submission_accession = submission.get('accession', default='undefined') submission_alias = submission.get('alias', default='undefined') accessions['submission'] = {'accession': submission_accession, 'alias': submission_alias} # get run accessions run = xml.find('./RUN') run_accession = run.get('accession', default='undefined') run_alias = run.get('alias', default='undefined') accessions['run'] = {'accession': run_accession, 'alias': run_alias} # get sample accessions samples = xml.findall('./SAMPLE') sample_accessions = list() for sample in samples: sample_accession = sample.get('accession', default='undefined') sample_alias = sample.get('alias', default='undefined') s = {'sample_accession': sample_accession, 'sample_alias': sample_alias} for bio_s in sample: s['biosample_accession'] = bio_s.get('accession', default='undefined') sample_accessions.append(s) accessions['sample'] = sample_accessions # save accessions to mongo profile record s = Submission().get_record(sub_id) s['accessions'] = accessions s['complete'] = True s['target_id'] = str(s.pop('_id')) Submission().save_record(dict(), **s)
def test_validate_testdata_repeated_measure_json(self): test_case = 'TEST-ISA-repeated-measure' log_msg_stream = isajson.validate(fp=open(os.path.join(utils.JSON_DATA_DIR, test_case + '.json')), log_level=self._reporting_level) if '(E)' in log_msg_stream.getvalue() or '(F)' in log_msg_stream.getvalue(): self.fail("Error found when validating ISA JSON: {}".format(log_msg_stream.getvalue()))