Beispiel #1
0
 def test_validate_isajson_isajson_schemas(self):
     """Tests against 0002"""
     log_msg_stream = isajson.validate(open(os.path.join(self._unit_json_data_dir, 'minimal_syntax.json')))
     if "The JSON does not validate against the provided ISA-JSON schemas!" in log_msg_stream.getvalue():
         self.fail("Error raised when trying to parse valid ISA-JSON, when it should have been fine!")
     log_msg_stream = isajson.validate(open(os.path.join(self._unit_json_data_dir, 'invalid_isajson.json')))
     if "The JSON does not validate against the provided ISA-JSON schemas!" not in log_msg_stream.getvalue():
         self.fail("NO error raised when validating against some non-ISA-JSON conforming JSON!")
Beispiel #2
0
 def test_validate_isajson_study_config_validation(self):
     """Tests against 4004"""
     log_msg_stream = isajson.validate(open(os.path.join(self._unit_json_data_dir, 'study_config.json')))
     if "protocol sequence ['sample collection'] does not match study graph" in log_msg_stream.getvalue():
         self.fail("Validation failed against default study configuration, when it should have passed")
     log_msg_stream = isajson.validate(open(os.path.join(self._unit_json_data_dir, 'study_config_fail.json')))
     if "protocol sequence ['sample collection'] does not match study graph" not in log_msg_stream.getvalue():
         self.fail("Validation passed against default study configuration, when it should have failed")
Beispiel #3
0
 def test_validate_isajson_json_load(self):
     """Tests against 0001"""
     log_msg_stream = isajson.validate(open(os.path.join(self._unit_json_data_dir, 'minimal_syntax.json')))
     if "There was an error when trying to parse the JSON" in log_msg_stream.getvalue():
         self.fail("Error raised when trying to parse JSON, when it should have been fine!")
     log_msg_stream = isajson.validate(open(os.path.join(self._unit_json_data_dir, 'invalid.json')))
     if "There was an error when trying to parse the JSON" not in log_msg_stream.getvalue():
         self.fail("NO error raised when trying to parse invalid formed JSON!")
Beispiel #4
0
 def test_validate_isajson_assay_config_validation(self):
     """Tests against 4004"""
     with open(os.path.join(self._unit_json_data_dir, 'assay_config.json')) as fp:
         report = isajson.validate(fp)
         if 4004 in [e['code'] for e in report['warnings']]:
             self.fail("Validation failed against transcription_seq.json configuration, when it should have passed")
         report = isajson.validate(open(os.path.join(self._unit_json_data_dir, 'assay_config_fail.json')))
         if 4004 not in [e['code'] for e in report['warnings']]:
             self.fail("Validation passed against transcription_seq.json configuration, when it should have failed")
Beispiel #5
0
    def test_validate_isajson_utf8_encoding_check(self):
        """Tests against 0010"""
        log_msg_stream = isajson.validate(open(os.path.join(self._unit_json_data_dir, 'minimal_syntax.json')))
        if "File should be UTF-8 encoding" in log_msg_stream.getvalue():
            self.fail("Validation warning present when testing against UTF-8 encoded file")

        log_msg_stream = isajson.validate(open(os.path.join(self._unit_json_data_dir, 'non_utf8.json')))
        if "File should be UTF-8 encoding" not in log_msg_stream.getvalue():
            self.fail("Validation warning missing when testing against UTF-16 encoded file (UTF-8 required)")
Beispiel #6
0
 def test_validate_isajson_utf8_encoding_check(self):
     """Tests against 0010"""
     with open(os.path.join(self._unit_json_data_dir, 'minimal_syntax.json')) as fp:
         report = isajson.validate(fp)
         if 10 in [e['code'] for e in report['warnings']]:
             self.fail("Validation warning present when testing against UTF-8 encoded file")
         with open(os.path.join(self._unit_json_data_dir, 'non_utf8.json')) as fp:
             report = isajson.validate(fp)
             if 10 not in [e['code'] for e in report['warnings']]:
                 self.fail("Validation warning missing when testing against UTF-16 encoded file (UTF-8 required)")
Beispiel #7
0
 def test_validate_isajson_isajson_schemas(self):
     """Tests against 0003"""
     with open(os.path.join(self._unit_json_data_dir, 'minimal_syntax.json')) as fp:
         report = isajson.validate(fp)
         if 3 in [e['code'] for e in report['errors']]:
             self.fail("Error raised when trying to parse valid ISA-JSON, when it should have been fine!")
     with open(os.path.join(self._unit_json_data_dir, 'invalid_isajson.json')) as fp:
         report = isajson.validate(fp)
         if 3 not in [e['code'] for e in report['errors']]:
             self.fail("NO error raised when validating against some non-ISA-JSON conforming JSON!")
Beispiel #8
0
 def test_validate_isajson_json_load(self):
     """Tests against 0002"""
     with open(os.path.join(self._unit_json_data_dir, 'minimal_syntax.json')) as fp:
         report = isajson.validate(fp)
         if 2 in [e['code'] for e in report['errors']]:
             self.fail("Error raised when trying to parse JSON, when it should have been fine!")
     with open(os.path.join(self._unit_json_data_dir, 'invalid.json')) as fp:
         report = isajson.validate(fp)
         if 2 not in [e['code'] for e in report['errors']]:
             self.fail("NO error raised when trying to parse invalid formed JSON!")
Beispiel #9
0
 def test_validate_isajson_assay_config_validation(self):
     """Tests against 4004"""
     log_msg_stream = isajson.validate(open(os.path.join(self._unit_json_data_dir, 'assay_config.json')))
     if "protocol sequence ['nucleic acid extraction', 'library construction', 'nucleic acid sequencing', " \
        "'sequence analysis data transformation'] does not match study graph" in log_msg_stream.getvalue():
         self.fail("Validation failed against transcription_seq.json configuration, when it should have passed")
     log_msg_stream = isajson.validate(open(os.path.join(self._unit_json_data_dir, 'assay_config_fail.json')))
     if "protocol sequence ['nucleic acid extraction', 'library construction', 'nucleic acid sequencing', " \
        "'sequence analysis data transformation'] does not match study graph" not in log_msg_stream.getvalue():
         self.fail("Validation passed against transcription_seq.json configuration, when it should have failed")
Beispiel #10
0
    def test_validate_isajson_source_link(self):
        """Tests against 1002"""
        log_msg_stream = isajson.validate(open(os.path.join(self._unit_json_data_dir, 'source_link.json')))
        if "['#source/1'] not found" in log_msg_stream.getvalue():
            self.fail("Validation error present when should pass without error - source link reports broken when "
                      "present in data")

        log_msg_stream = isajson.validate(open(os.path.join(self._unit_json_data_dir, 'source_link_fail.json')))
        if "['#source/1'] not found" not in log_msg_stream.getvalue():
            self.fail("Validation error missing when should report error - data has broken source link but not "
                      "reported in validation report")
Beispiel #11
0
 def test_validate_isajson_source_link(self):
     """Tests against 1002, but reports 1005 error (more general case)"""
     with open(os.path.join(self._unit_json_data_dir, 'source_link.json')) as fp:
         report = isajson.validate(fp)
         if 1005 in [e['code'] for e in report['errors']]:
             self.fail("Validation error present when should pass without error - source link reports broken when "
                       "present in data")
     with open(os.path.join(self._unit_json_data_dir, 'source_link_fail.json')) as fp:
         report = isajson.validate(fp)
         if 1005 not in [e['code'] for e in report['errors']]:
             self.fail("Validation error missing when should report error - data has broken source link but not "
                       "reported in validation report")
Beispiel #12
0
 def test_validate_isajson_protocol_ref_link(self):
     """Tests against 1007"""
     log_msg_stream = isajson.validate(open(os.path.join(self._unit_json_data_dir, 'protocol_ref_link.json')))
     if "['#protocol/1'] used in a study or assay process sequence not declared" in log_msg_stream.getvalue():
         self.fail(
             "Validation error present when should pass without error - executesProtocol link reports broken when "
             "present in data")
     log_msg_stream = isajson.validate(open(os.path.join(self._unit_json_data_dir, 'protocol_ref_link_fail.json')))
     if "['#protocol/1'] used in a study or assay process sequence not declared" not in log_msg_stream.getvalue():
         self.fail(
             "Validation error missing when should report error - data has broken executesProtocol link but not "
             "reported in validation report")
Beispiel #13
0
 def test_validate_isajson_factor_used(self):
     """Tests against 3006"""
     log_msg_stream = isajson.validate(open(os.path.join(self._unit_json_data_dir, 'factor_used.json')))
     if "factors declared ['#factor/1'] that have not been used" in log_msg_stream.getvalue():
         self.fail(
             "Validation error present when should pass without error - incorrectly reports #factor/1 not used when "
             "it has been used in #sample/1")
     log_msg_stream = isajson.validate(open(os.path.join(self._unit_json_data_dir, 'factor_used_fail.json')))
     if "factors declared ['#factor/1'] that have not been used" not in log_msg_stream.getvalue():
         self.fail(
             "Validation error missing when should report error - data has incorrectly reported everything is OK "
             "but not reported #factor/1 as being unused")
Beispiel #14
0
    def test_validate_isajson_iso8601(self):
        """Tests against 3001"""
        log_msg_stream = isajson.validate(open(os.path.join(self._unit_json_data_dir, 'iso8601.json')))
        if "does not conform to ISO8601 format" in log_msg_stream.getvalue():
            self.fail(
                "Validation error present when should pass without error - incorrectly formatted ISO8601 date in "
                "publicReleaseDate reports invalid when valid data")

        log_msg_stream = isajson.validate(open(os.path.join(self._unit_json_data_dir, 'iso8601_fail.json')))
        if "does not conform to ISO8601 format" not in log_msg_stream.getvalue():
            self.fail(
                "Validation error missing when should report error - data has incorrectly formatted ISO8601 date in "
                "publicReleaseDate but not reported in validation report")
Beispiel #15
0
    def test_validate_isajson_term_source_used(self):
        """Tests against 3007"""
        log_msg_stream = isajson.validate(open(os.path.join(self._unit_json_data_dir, 'term_source_used.json')))
        if "ontology sources declared ['PATO'] that have not been used" in log_msg_stream.getvalue():
            self.fail(
                "Validation error present when should pass without error - incorrectly reports PATO not used when it "
                "has been used in #factor/1")

        log_msg_stream = isajson.validate(open(os.path.join(self._unit_json_data_dir, 'term_source_used_fail.json')))
        if "ontology sources declared ['PATO'] that have not been used" not in log_msg_stream.getvalue():
            self.fail(
                "Validation error missing when should report error - data has incorrectly reported everything is OK "
                "but not reported PATO as being unused")
Beispiel #16
0
    def test_validate_isajson_process_link(self):
        """Tests against 1006"""
        log_msg_stream = isajson.validate(open(os.path.join(self._unit_json_data_dir, 'process_link.json')))
        if "link #process/1 in process #process/2 does not refer to another process" in log_msg_stream.getvalue():
            self.fail(
                "Validation error present when should pass without error - process link reports broken when present "
                "in data")

        log_msg_stream = isajson.validate(open(os.path.join(self._unit_json_data_dir, 'process_link_fail.json')))
        if "link #process/1 in process #process/2 does not refer to another process" not in log_msg_stream.getvalue():
            self.fail(
                "Validation error missing when should report error - data has broken process link but not reported in "
                "validation report")
Beispiel #17
0
    def test_validate_isajson_factor_link(self):
        """Tests against 1008"""
        log_msg_stream = isajson.validate(open(os.path.join(self._unit_json_data_dir, 'factor_link.json')))
        if "['#factor/1'] used in a study or assay process sequence not declared" in log_msg_stream.getvalue():
            self.fail(
                "Validation error present when should pass without error - factor link in factorValue reports broken "
                "when present in data")

        log_msg_stream = isajson.validate(open(os.path.join(self._unit_json_data_dir, 'factor_link_fail.json')))
        if "['#factor/1'] used in a study or assay process sequence not declared" in log_msg_stream.getvalue():
            self.fail(
                "Validation error missing when should report error - data has broken factor link in factorValue but "
                "not reported in validation report")
Beispiel #18
0
    def test_validate_isajson_doi(self):
        """Tests against 3002"""
        log_msg_stream = isajson.validate(open(os.path.join(self._unit_json_data_dir, 'doi.json')))
        if "does not conform to DOI format" in log_msg_stream.getvalue():
            self.fail(
                "Validation error present when should pass without error - incorrectly formatted DOI in publication "
                "reports invalid when valid data")

        log_msg_stream = isajson.validate(open(os.path.join(self._unit_json_data_dir, 'doi_fail.json')))
        if "does not conform to DOI format" not in log_msg_stream.getvalue():
            self.fail(
                "Validation error missing when should report error - data has incorrectly formatted DOI in publication "
                "but not reported in validation report")
Beispiel #19
0
    def test_validate_isajson_pubmed(self):
        """Tests against 3003"""
        log_msg_stream = isajson.validate(open(os.path.join(self._unit_json_data_dir, 'pubmed.json')))
        if "is not valid format" in log_msg_stream.getvalue():
            self.fail(
                "Validation error present when should pass without error - incorrectly formatted Pubmed ID in "
                "publication reports invalid when valid data")

        log_msg_stream = isajson.validate(open(os.path.join(self._unit_json_data_dir, 'pubmed_fail.json')))
        if "is not valid format" not in log_msg_stream.getvalue():
            self.fail(
                "Validation error missing when should report error - data has incorrectly formatted Pubmed ID in "
                "publication but not reported in validation report")
Beispiel #20
0
 def test_validate_isajson_protocol_parameter_link(self):
     """Tests against 1009"""
     with open(os.path.join(self._unit_json_data_dir, 'protocol_parameter_link.json')) as fp:
         report = isajson.validate(fp)
         if 1009 in [e['code'] for e in report['errors']]:
             self.fail(
                 "Validation error present when should pass without error - parameter link in parameterValue reports "
                 "broken when present in data")
     with open(os.path.join(self._unit_json_data_dir, 'protocol_parameter_link_fail.json')) as fp:
         report = isajson.validate(fp)
         if 1009 not in [e['code'] for e in report['errors']]:
             self.fail("Validation error missing when should report error - data has broken parameter link in "
                       "parameterValue but not reported in validation report")
Beispiel #21
0
    def test_validate_isajson_protocol_used(self):
        """Tests against 3005"""
        log_msg_stream = isajson.validate(open(os.path.join(self._unit_json_data_dir, 'protocol_used.json')))
        if "['#protocol/1'] not used" in log_msg_stream.getvalue():
            self.fail(
                "Validation error present when should pass without error - incorrectly reports #protocol/1 not used "
                "when it has been used in #process/1")

        log_msg_stream = isajson.validate(open(os.path.join(self._unit_json_data_dir, 'protocol_used_fail.json')))
        if "['#protocol/1'] not used" not in log_msg_stream.getvalue():
            self.fail(
                "Validation error missing when should report error - data has incorrectly reported everything is OK "
                "but not reported #protocol/1 as being unused")
Beispiel #22
0
    def test_validate_isajson_protocol_parameter_link(self):
        """Tests against 1009"""
        log_msg_stream = isajson.validate(open(os.path.join(self._unit_json_data_dir, 'protocol_parameter_link.json')))
        if "['#parameter/1'] used in a study or assay process sequence not declared" in log_msg_stream.getvalue():
            self.fail(
                "Validation error present when should pass without error - parameter link in parameterValue reports "
                "broken when present in data")

        log_msg_stream = isajson.validate(open(os.path.join(self._unit_json_data_dir, 'protocol_'
                                                                                       'parameter_link_fail.json')))
        if "['#parameter/1'] used in a study or assay process sequence not declared" in log_msg_stream.getvalue():
            self.fail(
                "Validation error missing when should report error - data has broken parameter link in parameterValue "
                "but not reported in validation report")
Beispiel #23
0
 def test_validate_isajson_protocol_ref_link(self):
     """Tests against 1007"""
     with open(os.path.join(self._unit_json_data_dir, 'protocol_ref_link.json')) as fp:
         report = isajson.validate(fp)
         if 1007 in [e['code'] for e in report['errors']]:
             self.fail(
                 "Validation error present when should pass without error - executesProtocol link reports broken when "
                 "present in data")
     with open(os.path.join(self._unit_json_data_dir, 'protocol_ref_link_fail.json')) as fp:
         report = isajson.validate(fp)
         if 1007 not in [e['code'] for e in report['errors']]:
             self.fail(
                 "Validation error missing when should report error - data has broken executesProtocol link but not "
                 "reported in validation report")
Beispiel #24
0
 def test_validate_isajson_factor_link(self):
     """Tests against 1008"""
     with open(os.path.join(self._unit_json_data_dir, 'factor_link.json')) as fp:
         report = isajson.validate(fp)
         if 1008 in [e['code'] for e in report['errors']]:
             self.fail(
                 "Validation error present when should pass without error - factor link in factorValue reports broken "
                 "when present in data")
     with open(os.path.join(self._unit_json_data_dir, 'factor_link_fail.json')) as fp:
         report = isajson.validate(fp)
         if 1008 not in [e['code'] for e in report['errors']]:
             self.fail(
                 "Validation error missing when should report error - data has broken factor link in factorValue but "
                 "not reported in validation report")
Beispiel #25
0
 def test_validate_isajson_iso8601(self):
     """Tests against 3001"""
     report = isajson.validate(
         open(os.path.join(self._unit_json_data_dir, 'iso8601.json')))
     if 3001 in [e['code'] for e in report['warnings']]:
         self.fail(
             "Validation error present when should pass without error - incorrectly formatted ISO8601 date in "
             "publicReleaseDate reports invalid when valid data")
     report = isajson.validate(
         open(os.path.join(self._unit_json_data_dir, 'iso8601_fail.json')))
     if 3001 not in [e['code'] for e in report['warnings']]:
         self.fail(
             "Validation error missing when should report error - data has incorrectly formatted ISO8601 date in "
             "publicReleaseDate but not reported in validation report")
Beispiel #26
0
 def test_validate_isajson_term_source_used(self):
     """Tests against 3007"""
     with open(os.path.join(self._unit_json_data_dir, 'term_source_used.json')) as fp:
         report = isajson.validate(fp)
         if 3007 in [e['code'] for e in report['warnings']]:
             self.fail(
                 "Validation error present when should pass without error - incorrectly reports PATO not used when "
                 "it has been used in #factor/1")
     with open(os.path.join(self._unit_json_data_dir, 'term_source_used_fail.json')) as fp:
         report = isajson.validate(fp)
         if 3007 not in [e['code'] for e in report['warnings']]:
             self.fail(
                 "Validation error missing when should report error - data has incorrectly reported everything is "
                 "OK but not reported PATO as being unused")
Beispiel #27
0
 def test_validate_isajson_doi(self):
     """Tests against 3002"""
     with open(os.path.join(self._unit_json_data_dir, 'doi.json')) as fp:
         report = isajson.validate(fp)
         if 3002 in [e['code'] for e in report['warnings']]:
             self.fail(
                 "Validation error present when should pass without error - incorrectly formatted DOI in publication "
                 "reports invalid when valid data")
     with open(os.path.join(self._unit_json_data_dir, 'doi_fail.json')) as fp:
         report = isajson.validate(fp)
         if 3002 not in [e['code'] for e in report['warnings']]:
             self.fail(
                 "Validation error missing when should report error - data has incorrectly formatted DOI in publication "
                 "but not reported in validation report")
Beispiel #28
0
 def test_validate_isajson_factor_used(self):
     """Tests against 1021"""
     with open(os.path.join(self._unit_json_data_dir, 'factor_used.json')) as fp:
         report = isajson.validate(fp)
         if 1021 in [e['code'] for e in report['warnings']]:
             self.fail(
                 "Validation error present when should pass without error - incorrectly reports #factor/1 not used when "
                 "it has been used in #sample/1")
     with open(os.path.join(self._unit_json_data_dir, 'factor_used_fail.json')) as fp:
         report = isajson.validate(fp)
         if 1021 not in [e['code'] for e in report['warnings']]:
             self.fail(
                 "Validation error missing when should report error - data has incorrectly reported everything is OK "
                 "but not reported #factor/1 as being unused")
Beispiel #29
0
 def test_validate_isajson_pubmed(self):
     """Tests against 3003"""
     report = isajson.validate(
         open(os.path.join(self._unit_json_data_dir, 'pubmed.json')))
     if 3003 in [e['code'] for e in report['warnings']]:
         self.fail(
             "Validation error present when should pass without error - incorrectly formatted Pubmed ID in "
             "publication reports invalid when valid data")
     report = isajson.validate(
         open(os.path.join(self._unit_json_data_dir, 'pubmed_fail.json')))
     if 3003 not in [e['code'] for e in report['warnings']]:
         self.fail(
             "Validation error missing when should report error - data has incorrectly formatted Pubmed ID in "
             "publication but not reported in validation report")
Beispiel #30
0
 def test_validate_isajson_process_link(self):
     """Tests against 1006"""
     with open(os.path.join(self._unit_json_data_dir, 'process_link.json')) as fp:
         report = isajson.validate(fp)
         if 1006 in [e['code'] for e in report['errors']]:
             self.fail(
                 "Validation error present when should pass without error - process link reports broken when present "
                 "in data")
     with open(os.path.join(self._unit_json_data_dir, 'process_link_fail.json')) as fp:
         report = isajson.validate(fp)
         if 1006 not in [e['code'] for e in report['errors']]:
             self.fail(
                 "Validation error missing when should report error - data has broken process link but not reported in "
                 "validation report")
Beispiel #31
0
 def test_validate_isajson_material_link(self):
     """Tests against 1005"""
     report = isajson.validate(
         open(os.path.join(self._unit_json_data_dir, 'material_link.json')))
     if 1005 in [e['code'] for e in report['errors']]:
         self.fail(
             "Validation error present when should pass without error -material link link reports broken when "
             "present in data")
     report = isajson.validate(
         open(
             os.path.join(self._unit_json_data_dir,
                          'material_link_fail.json')))
     if 1005 not in [e['code'] for e in report['errors']]:
         self.fail(
             "Validation error missing when should report error - data has broken material link but not "
             "reported in validation report")
Beispiel #32
0
 def test_validate_isajson_data_file_link(self):
     """Tests against 1004 but reports 1005 error (more general case)"""
     report = isajson.validate(
         open(os.path.join(self._unit_json_data_dir, 'datafile_link.json')))
     if 1005 in [e['code'] for e in report['errors']]:
         self.fail(
             "Validation error present when should pass without error - data file link reports broken when present "
             "in data")
     report = isajson.validate(
         open(
             os.path.join(self._unit_json_data_dir,
                          'datafile_link_fail.json')))
     if 1005 not in [e['code'] for e in report['errors']]:
         self.fail(
             "Validation error missing when should report error - data has broken data file link but not reported "
             "in validation report")
Beispiel #33
0
def convert(json_fp,
            path,
            config_dir=None,
            sra_settings=None,
            datafilehashes=None,
            validate_first=True):
    """ Converter for ISA-JSON to SRA.
    :param json_fp: File pointer to ISA JSON input
    :param path: Directory for output SRA XMLs to be written
    :param config_dir: path to JSON configuration. If none, uses default
        embedded in API
    :param sra_settings: SRA settings dict
    :param datafilehashes: Data files with hashes, in a dict
    """
    if validate_first:
        log.info("Validating input JSON before conversion")
        report = isajson.validate(fp=json_fp,
                                  config_dir=config_dir,
                                  log_level=logging.ERROR)
        if len(report.get('errors')) > 0:
            log.fatal("Could not proceed with conversion as there are some "
                      "validation errors. Check log.")
            return
    log.info("Loading isajson {}".format(json_fp.name))
    isa = isajson.load(fp=json_fp)
    log.info("Exporting SRA to {}".format(path))
    log.debug("Using SRA settings ".format(sra_settings))
    sra.export(isa,
               path,
               sra_settings=sra_settings,
               datafilehashes=datafilehashes)
Beispiel #34
0
 def test_validate_isajson_study_config_validation(self):
     """Tests against 4004"""
     report = isajson.validate(
         open(os.path.join(self._unit_json_data_dir, 'study_config.json')))
     if 4004 in [e['code'] for e in report['warnings']]:
         self.fail(
             "Validation failed against default study configuration, when it should have passed"
         )
     report = isajson.validate(
         open(
             os.path.join(self._unit_json_data_dir,
                          'study_config_fail.json')))
     if 4004 not in [e['code'] for e in report['warnings']]:
         self.fail(
             "Validation passed against default study configuration, when it should have failed"
         )
Beispiel #35
0
    def validate_isajson(self):
        lg.log('Validating ISA-JSON', level=Loglvl.INFO, type=Logtype.FILE)

        with open(os.path.join(self.json_path, 'isa_json.json')) as json_file:
            v = isajson.validate(json_file)
            lg.log(v, level=Loglvl.INFO, type=Logtype.FILE)

        return
Beispiel #36
0
    def test_validate_isajson_protocol_used(self):
        """Tests against 1019"""
        report = isajson.validate(
            open(os.path.join(self._unit_json_data_dir, 'protocol_used.json')))
        if 1019 in [e['code'] for e in report['warnings']]:
            self.fail(
                "Validation error present when should pass without error - incorrectly reports #protocol/1 not used "
                "when it has been used in #process/1")

        report = isajson.validate(
            open(
                os.path.join(self._unit_json_data_dir,
                             'protocol_used_fail.json')))
        if 1019 not in [e['code'] for e in report['warnings']]:
            self.fail(
                "Validation error missing when should report error - data has incorrectly reported everything is OK "
                "but not reported #protocol/1 as being unused")
 def test_validate_testdata_sample_pool_no_protocol_ref_json(self):
     test_case = 'TEST-ISA-sample-pool-no-protocolref'
     report = isajson.validate(fp=open(
         os.path.join(utils.JSON_DATA_DIR, test_case + '.json')),
                               log_level=self._reporting_level)
     if len(report['errors']) > 0:
         self.fail("Error found when validating ISA JSON: {}".format(
             report['errors']))
 def test_validate_sampledata_bii_i_1_json(self):
     test_case = 'BII-I-1'
     report = isajson.validate(fp=open(
         os.path.join(utils.SAMPLE_DATA_DIR, test_case + '.json')),
                               log_level=logging.ERROR)
     if len(report['errors']) > 0:
         self.fail("Errors found when validating ISA JSON: {}".format(
             report['errors']))
 def test_validate_testdata_charac_param_factor_json(self):
     test_case = 'TEST-ISA-charac-param-factor'
     report = isajson.validate(fp=open(
         os.path.join(utils.JSON_DATA_DIR, test_case + '.json')),
                               log_level=self._reporting_level)
     if len(report['errors']) > 0:
         self.fail("Error found when validating ISA JSON: {}".format(
             report['errors']))
 def test_validate_testdata_repeated_measure_json(self):
     test_case = 'TEST-ISA-repeated-measure'
     report = isajson.validate(fp=open(
         os.path.join(utils.JSON_DATA_DIR, test_case + '.json')),
                               log_level=self._reporting_level)
     if len(report['errors']) > 0:
         self.fail("Error found when validating ISA JSON: {}".format(
             report['errors']))
Beispiel #41
0
 def test_magetab2json_convert_e_mexp_31(self):
     actual_json = magetab2json.convert(
         os.path.join(self._magetab_data_dir, 'E-MEXP-31.idf.txt'), )
     with open(os.path.join(self._tmp_dir, 'isa.json'), 'w') as out_fp:
         json.dump(actual_json, out_fp)
     with open(os.path.join(self._tmp_dir, 'isa.json')) as actual_json:
         report = isajson.validate(actual_json)
         self.assertEqual(len(report['errors']), 0)
 def test_validate_testdata_bii_s_7_json(self):
     test_case = 'BII-S-7'
     report = isajson.validate(fp=open(
         os.path.join(utils.JSON_DATA_DIR, test_case, test_case + '.json')),
                               log_level=self._reporting_level)
     if len(report['errors']) > 0:
         self.fail("Error found when validating ISA JSON: {}".format(
             report['errors']))
 def test_validate_testdata_source_split_with_error_json(self):
     test_case = 'TEST-ISA-source-split-with-error'
     report = isajson.validate(fp=open(
         os.path.join(utils.JSON_DATA_DIR, test_case + '.json')),
                               log_level=self._reporting_level)
     if len(report['errors']) > 0:
         self.fail("Error found when validating ISA JSON: {}".format(
             report['errors']))
Beispiel #44
0
 def test_isatab2json_convert_sample_pool_with_error(self):
     test_case = 'TEST-ISA-sample-pool-with-error'
     actual_json = isatab2json.convert(os.path.join(self._tab_data_dir, test_case),
                                       validate_first=False,
                                       use_new_parser=True)
     json.dump(actual_json, open(os.path.join(self._tmp_dir, 'isa.json'), 'w'))
     with open(os.path.join(self._tmp_dir, 'isa.json')) as actual_json:
         report = isajson.validate(actual_json)
         self.assertEqual(len(report['errors']), 0)
Beispiel #45
0
 def test_isatab2json_convert_mtbls3(self):
     test_case = 'MTBLS3'
     actual_json = isatab2json.convert(os.path.join(self._tab_data_dir, test_case),
                                       validate_first=False,
                                       use_new_parser=True)
     json.dump(actual_json, open(os.path.join(self._tmp_dir, 'isa.json'), 'w'))
     with open(os.path.join(self._tmp_dir, 'isa.json')) as actual_json:
         report = isajson.validate(actual_json)
         self.assertEqual(len(report['errors']), 0)
Beispiel #46
0
 def test_magetab2json_convert_e_mexp_31(self):
     with open(os.path.join(self._magetab_data_dir,
                            'E-MEXP-31.idf.txt')) as idf_fp:
         actual_json = magetab2json.convert(idf_fp, 'protein microarray',
                                            'protein expression profiling')
         json.dump(actual_json,
                   open(os.path.join(self._tmp_dir, 'isa.json'), 'w'))
         with open(os.path.join(self._tmp_dir, 'isa.json')) as actual_json:
             report = isajson.validate(actual_json)
             self.assertEqual(len(report['errors']), 0)
Beispiel #47
0
def convert2(json_fp, path, config_dir=None, sra_settings=None, datafilehashes=None, validate_first=True):
    """ (New) Converter for ISA JSON to SRA.
    :param json_fp: File pointer to ISA JSON input
    :param path: Directory for output to be written
    :param config_dir: path to JSON configuration. If none, uses default embedded in API
    :param sra_settings: SRA settings dict
    :param datafilehashes: Data files with hashes, in a dict
    """
    if validate_first:
        log_msg_stream = isajson.validate(fp=json_fp, config_dir=config_dir, log_level=logging.WARNING)
        if '(E)' in log_msg_stream.getvalue():
            logger.fatal("Could not proceed with conversion as there are some validation errors. Check log.")
            return
    i = isajson.load(fp=json_fp)
    sra.export(i, path, sra_settings=sra_settings, datafilehashes=datafilehashes)
Beispiel #48
0
 def post(self):
     response = Response(status=415)
     if request.mimetype == "application/json":
         tmp_file = str(uuid.uuid4()) + ".json"
         tmp_dir = _create_temp_dir()
         try:
             # Write request data to file
             file_path = _write_request_data(request, tmp_dir, tmp_file)
             if file_path is None:
                 return Response(500)
             log_msg_stream = isajson.validate(open(file_path))
             result = {
                 "result:": log_msg_stream.getvalue()
             }
             response = jsonify(result)
         except Exception:
             response = Response(status=500)
         finally:
             # cleanup generated directories
             shutil.rmtree(tmp_dir, ignore_errors=True)
     return response
 def test_validate_sampledata_bii_i_1_json(self):
     test_case = 'BII-I-1'
     log_msg_stream = isajson.validate(fp=open(os.path.join(utils.SAMPLE_DATA_DIR, test_case + '.json')),
                                       log_level=logging.ERROR)
     if '(E)' in log_msg_stream.getvalue():
         self.fail("Error found when validating ISA JSON: {}".format(log_msg_stream.getvalue()))
 def test_validate_testdata_source_split_with_error_json(self):
     test_case = 'TEST-ISA-source-split-with-error'
     log_msg_stream = isajson.validate(fp=open(os.path.join(utils.JSON_DATA_DIR, test_case + '.json')),
                                       log_level=self._reporting_level)
     if '(E)' in log_msg_stream.getvalue() or '(F)' in log_msg_stream.getvalue():
         self.fail("Error found when validating ISA JSON: {}".format(log_msg_stream.getvalue()))
 def test_validate_testdata_sample_pool_no_protocol_ref_json(self):
     test_case = 'TEST-ISA-sample-pool-no-protocolref'
     log_msg_stream = isajson.validate(fp=open(os.path.join(utils.JSON_DATA_DIR, test_case + '.json')),
                                       log_level=self._reporting_level)
     if '(E)' in log_msg_stream.getvalue() or '(F)' in log_msg_stream.getvalue():
         self.fail("Error found when validating ISA JSON: {}".format(log_msg_stream.getvalue()))
Beispiel #52
0
    def _do_aspera_transfer(self, transfer_token=None, user_name=None, password=None, remote_path=None, file_path=None,
                            path2library=None, sub_id=None):

        # check submission status
        submission_status = Submission().isComplete(sub_id)

        if not submission_status or submission_status == 'false':

            lg.log('Starting aspera transfer', level=Loglvl.INFO, type=Logtype.FILE)

            kwargs = dict(target_id=sub_id, commenced_on=str(datetime.now()))
            Submission().save_record(dict(), **kwargs)

            # k is a loop counter which keeps track of the number of files transfered
            k = -1
            f_str = str()
            for f in file_path:
                f_str = f_str + ' ' + f
            cmd = "./ascp -d -QT -l300M -L- {f_str!s} {user_name!s}:{remote_path!s}".format(**locals())
            lg.log(cmd, level=Loglvl.INFO, type=Logtype.FILE)
            os.chdir(path2library)

            try:
                thread = pexpect.spawn(cmd, timeout=None)
                thread.expect(["assword:", pexpect.EOF])
                thread.sendline(password)

                cpl = thread.compile_pattern_list([pexpect.EOF, '(.+)'])

                while True:
                    i = thread.expect_list(cpl, timeout=None)
                    if i == 0:  # EOF! Possible error point if encountered before transfer completion
                        print("Process termination - check exit status!")
                        break
                    elif i == 1:
                        pexp_match = thread.match.group(1)
                        prev_file = ''
                        tokens_to_match = ["Mb/s"]
                        units_to_match = ["KB", "MB"]
                        time_units = ['d', 'h', 'm', 's']
                        end_of_transfer = False

                        if all(tm in pexp_match.decode("utf-8") for tm in tokens_to_match):
                            fields = {
                                "transfer_status": "transferring",
                                "current_time": datetime.now().strftime("%d-%m-%Y %H:%M:%S")
                            }

                            tokens = pexp_match.decode("utf-8").split(" ")

                            for token in tokens:
                                if not token == '':
                                    if "file" in token:
                                        fields['file_path'] = token.split('=')[-1]
                                        if prev_file != fields['file_path']:
                                            k = k + 1
                                        prev_file == fields['file_path']
                                    elif '%' in token:
                                        pct = float((token.rstrip("%")))
                                        # pct = (1/len(file_path) * pct) + (k * 1/len(file_path) * 100)
                                        fields['pct_completed'] = pct
                                        # flag end of transfer
                                        print(str(transfer_token) + ":  " + str(pct) + '% transfered')
                                        if token.rstrip("%") == 100:
                                            end_of_transfer = True
                                    elif any(um in token for um in units_to_match):
                                        fields['amt_transferred'] = token
                                    elif "Mb/s" in token or "Mbps" in token:
                                        t = token[:-4]
                                        if '=' in t:
                                            fields['transfer_rate'] = t[t.find('=') + 1:]
                                        else:
                                            fields['transfer_rate'] = t
                                    elif "status" in token:
                                        fields['transfer_status'] = token.split('=')[-1]
                                    elif "rate" in token:
                                        fields['transfer_rate'] = token.split('=')[-1]
                                    elif "elapsed" in token:
                                        fields['elapsed_time'] = token.split('=')[-1]
                                    elif "loss" in token:
                                        fields['bytes_lost'] = token.split('=')[-1]
                                    elif "size" in token:
                                        fields['file_size_bytes'] = token.split('=')[-1]

                                    elif "ETA" in token:
                                        eta = tokens[-2]
                                        estimated_completion = ""
                                        eta_split = eta.split(":")
                                        t_u = time_units[-len(eta_split):]
                                        for indx, eta_token in enumerate(eta.split(":")):
                                            if eta_token == "00":
                                                continue
                                            estimated_completion += eta_token + t_u[indx] + " "
                                        fields['estimated_completion'] = estimated_completion
                            RemoteDataFile().update_transfer(transfer_token, fields)

                kwargs = dict(target_id=sub_id, completed_on=datetime.now())
                Submission().save_record(dict(), **kwargs)
                # close thread
                thread.close()
                lg.log('Aspera Transfer completed', level=Loglvl.INFO, type=Logtype.FILE)

            except OSError:
                return redirect('web.apps.web_copo.views.goto_error', request=HttpRequest(),
                                message='There appears to be an issue with EBI.')

        # setup paths for conversion directories
        conv_dir = os.path.join(self._dir, sub_id)
        if not os.path.exists(os.path.join(conv_dir, 'json')):
            os.makedirs(os.path.join(conv_dir, 'json'))
        json_file_path = os.path.join(conv_dir, 'json', 'isa_json.json')
        xml_dir = conv_dir
        xml_path = os.path.join(xml_dir, 'run_set.xml')

        #  Convert COPO JSON to ISA JSON
        lg.log('Obtaining ISA-JSON', level=Loglvl.INFO, type=Logtype.FILE)
        conv = cnv.Investigation(submission_token=sub_id)
        meta = conv.get_schema()
        json_file = open(json_file_path, '+w')
        # dump metadata to output file
        json_file.write(dumps(meta))
        json_file.close()

        # Validate ISA_JSON
        lg.log('Validating ISA-JSON', level=Loglvl.INFO, type=Logtype.FILE)
        with open(json_file_path) as json_file:
            v = isajson.validate(json_file)
            lg.log(v, level=Loglvl.INFO, type=Logtype.FILE)

        # convert to SRA with isatools converter
        lg.log('Converting to SRA', level=Loglvl.INFO, type=Logtype.FILE)
        sra_settings = d_utils.json_to_pytype(SRA_SETTINGS).get("properties", dict())
        datafilehashes = conv.get_datafilehashes()
        json2sra.convert2(json_fp=open(json_file_path), path=conv_dir, sra_settings=sra_settings,
                          datafilehashes=datafilehashes, validate_first=False)

        # finally submit to SRA
        lg.log('Submitting XMLS to ENA via CURL', level=Loglvl.INFO, type=Logtype.FILE)
        submission_file = os.path.join(xml_dir, 'submission.xml')
        project_file = os.path.join(xml_dir, 'project_set.xml')
        sample_file = os.path.join(xml_dir, 'sample_set.xml')
        experiment_file = os.path.join(xml_dir, 'experiment_set.xml')
        run_file = os.path.join(xml_dir, 'run_set.xml')

        curl_cmd = 'curl -k -F "SUBMISSION=@' + submission_file + '" \
         -F "PROJECT=@' + os.path.join(remote_path, project_file) + '" \
         -F "SAMPLE=@' + os.path.join(remote_path, sample_file) + '" \
         -F "EXPERIMENT=@' + os.path.join(remote_path, experiment_file) + '" \
         -F "RUN=@' + os.path.join(remote_path, run_file) + '" \
         "https://www-test.ebi.ac.uk/ena/submit/drop-box/submit/?auth=ENA%20Webin-39233%20Apple123"'

        output = subprocess.check_output(curl_cmd, shell=True)
        lg.log(output, level=Loglvl.INFO, type=Logtype.FILE)
        lg.log("Extracting fields from receipt", level=Loglvl.INFO, type=Logtype.FILE)

        xml = ET.fromstring(output)

        accessions = dict()

        # get project accessions
        project = xml.find('./PROJECT')
        project_accession = project.get('accession', default='undefined')
        project_alias = project.get('alias', default='undefined')
        accessions['project'] = {'accession': project_accession, 'alias': project_alias}

        # get experiment accessions
        experiment = xml.find('./EXPERIMENT')
        experiment_accession = experiment.get('accession', default='undefined')
        experiment_alias = experiment.get('alias', default='undefined')
        accessions['experiment'] = {'accession': experiment_accession, 'alias': experiment_alias}

        # get submission accessions
        submission = xml.find('./SUBMISSION')
        submission_accession = submission.get('accession', default='undefined')
        submission_alias = submission.get('alias', default='undefined')
        accessions['submission'] = {'accession': submission_accession, 'alias': submission_alias}

        # get run accessions
        run = xml.find('./RUN')
        run_accession = run.get('accession', default='undefined')
        run_alias = run.get('alias', default='undefined')
        accessions['run'] = {'accession': run_accession, 'alias': run_alias}

        # get sample accessions
        samples = xml.findall('./SAMPLE')
        sample_accessions = list()
        for sample in samples:
            sample_accession = sample.get('accession', default='undefined')
            sample_alias = sample.get('alias', default='undefined')
            s = {'sample_accession': sample_accession, 'sample_alias': sample_alias}
            for bio_s in sample:
                s['biosample_accession'] = bio_s.get('accession', default='undefined')
            sample_accessions.append(s)
        accessions['sample'] = sample_accessions

        # save accessions to mongo profile record
        s = Submission().get_record(sub_id)
        s['accessions'] = accessions
        s['complete'] = True
        s['target_id'] = str(s.pop('_id'))
        Submission().save_record(dict(), **s)
 def test_validate_testdata_repeated_measure_json(self):
     test_case = 'TEST-ISA-repeated-measure'
     log_msg_stream = isajson.validate(fp=open(os.path.join(utils.JSON_DATA_DIR, test_case + '.json')),
                                       log_level=self._reporting_level)
     if '(E)' in log_msg_stream.getvalue() or '(F)' in log_msg_stream.getvalue():
         self.fail("Error found when validating ISA JSON: {}".format(log_msg_stream.getvalue()))