Esempio n. 1
0
 def __link_entity_accessions(submission: Submission, entity: Entity):
     run = submission.map('experiment_run', 'ABC1234', {})
     run.add_accession('ENA', 'ABC1234')
     for index in [123, 456, 789]:
         new_entity = submission.map('sample', str(index), {})
         new_entity.add_accession('BioSamples', f'SAME{index}')
         entity.add_link('sample', new_entity.identifier.index)
Esempio n. 2
0
 def get_release_date(data: Submission) -> date:
     if 'study' in data.get_entity_types():
         for study in data.get_entities('study'):
             if 'release_date' in study.attributes:
                 release_date = date.fromisoformat(study.attributes['release_date'])
                 if release_date > date.today():
                     return release_date
Esempio n. 3
0
 def test_mapping_identical_index_update_should_return_same_entity(self):
     submission = Submission(HandleCollision.UPDATE)
     entity_type = "test_case"
     index = "index1"
     entity1 = submission.map(entity_type, index, {})
     entity2 = submission.map(entity_type, index, {})
     self.assertEqual(entity1, entity2)
Esempio n. 4
0
class TestIssuesGeneration(unittest.TestCase):
    def setUp(self):
        self.schema_validation = JsonValidator("")
        self.maxDiff = None
        current_folder = dirname(__file__)
        with open(join(current_folder, "../../resources/data_for_test_issues.json")) as test_data_file:
            test_data = json.load(test_data_file)
        self.submission = Submission()
        for entity_type, attributes in test_data.items():
            self.submission.map(entity_type, attributes["index"], attributes)

    @patch('validation.json.requests.post')
    def test_when_validate_invalid_entity_with_valid_schema_should_return_errors(self, mock_post):
        # Given
        mock_post.return_value.json.side_effect = ([
                {
                    'dataPath': '.assembly_type',
                    'errors': [
                        'should be equal to one of the allowed values: [\'covid-19 outbreak\']'
                    ]
                },
                {
                    'dataPath': '.coverage',
                    'errors': [
                        "should have required property 'coverage'"
                    ]
                }
            ],
            [
                {
                    "dataPath": ".email_address",
                    "errors": [
                        "should have required property 'email_address'"
                    ]
                }
            ],
            [],
            []
        )
        mock_post.return_value.status = requests.codes['ok']
        expected_issues = {
            "isolate_genome_assembly_information": {
                "P17157_1007": {
                    "assembly_type": ["should be equal to one of the allowed values: ['covid-19 outbreak']"],
                    "coverage": ["should have required property 'coverage'"]
                }
            },
            "study": {
                "PRJEB39632": {
                    "email_address": ["should have required property 'email_address'"]
                }
            }
        }
        
        # When
        self.schema_validation.validate_data(self.submission)
        
        # Then
        self.assertDictEqual(expected_issues, self.submission.get_all_errors())
Esempio n. 5
0
    def test_when_study_contains_new_links_then_those_added_to_submission(
            self):
        # Given
        test_session_id = "test.session.id"
        auth_response = AuthResponse(status=HTTPStatus(200))
        auth_response.session_id = test_session_id
        self.mock_auth.login = MagicMock(return_value=auth_response)
        biostudies = BioStudies("url", "username", "password")

        response = ResponseObject()
        response.json = self.__create_submission()
        biostudies.get_submission_by_accession = MagicMock(
            return_value=response)

        submission = Submission()
        study = submission.map('study', 'test alias', attributes={})
        study.add_accession('test', 'PRJ1234')
        self.__link_entity_accessions(submission, study)
        expected_links = [{
            'url': 'ABC1234',
            'attributes': [{
                'name': 'Type',
                'value': 'ena'
            }]
        }, {
            'url':
            'SAME123',
            'attributes': [{
                'name': 'Type',
                'value': 'biosample'
            }]
        }, {
            'url':
            'SAME456',
            'attributes': [{
                'name': 'Type',
                'value': 'biosample'
            }]
        }, {
            'url':
            'SAME789',
            'attributes': [{
                'name': 'Type',
                'value': 'biosample'
            }]
        }]

        # When
        biostudies_submission = biostudies.update_links_in_submission(
            submission, study)

        # Then
        links_section = biostudies_submission.get('section',
                                                  {}).get('links', [])
        self.assertTrue(links_section)
        for expected_element in expected_links:
            self.assertIn(expected_element, links_section)
        self.assertCountEqual(expected_links, links_section)
Esempio n. 6
0
 def setUp(self):
     self.schema_validation = JsonValidator("")
     self.maxDiff = None
     current_folder = dirname(__file__)
     with open(join(current_folder, "../../resources/data_for_test_issues.json")) as test_data_file:
         test_data = json.load(test_data_file)
     self.submission = Submission()
     for entity_type, attributes in test_data.items():
         self.submission.map(entity_type, attributes["index"], attributes)
 def setUp(self):
     self.maxDiff = None
     with open(
             join(dirname(__file__),
                  "../../resources/data_for_test_issues.json")
     ) as test_data_file:
         test_data = json.load(test_data_file)
     self.submission = Submission()
     for entity_type, attributes in test_data.items():
         self.submission.map(entity_type, attributes["index"], attributes)
Esempio n. 8
0
 def test_mapping_identical_index_should_overwrite_entity_attributes(self):
     submission = Submission(HandleCollision.OVERWRITE)
     entity_type = "test_case"
     index = "index1"
     expected_attributes = {
         'second_entity': 'new'
     }
     entity1 = submission.map(entity_type, index, {'first_entity': 'old'})
     entity2 = submission.map(entity_type, index, {'second_entity': 'new'})
     self.assertDictEqual(expected_attributes, entity1.attributes)
     self.assertDictEqual(expected_attributes, entity2.attributes)        
Esempio n. 9
0
 def make_manifests(self, submission: Submission) -> Dict[str, str]:
     manifests = {}
     for run_experiment in submission.get_entities('run_experiment'):
         samples = submission.get_linked_entities(run_experiment, 'sample')
         studies = submission.get_linked_entities(run_experiment, 'study')
         if len(samples) == 1 and len(studies) == 1 and 'uploaded_file_1' in run_experiment.attributes:
             sample = samples.pop()
             study = studies.pop()
             sample_accession = sample.get_first_accession(SAMPLE_ACCESSION_PRIORITY)
             study_accession = study.get_accession('ENA_Study')
             if sample_accession and study_accession:
                 file_name, content = self.make_manifest(run_experiment, sample_accession, study_accession)
                 manifests[file_name] = content
     return manifests
    def test_when_entities_has_accessions_returns_them_by_type(self):
        expected_accession_by_type = {
            'BioSamples': {'SAME123', 'SAME456', 'SAME789'},
            'BioStudies': {"BST1"},
            'ENA': {'EXP123'}
        }
        submission = Submission()

        study = submission.map("study", "study", self.study)
        study.add_accession('BioStudies', "BST1")

        sample1 = submission.map("sample", "sample1", self.sample1)
        sample1.add_accession('BioSamples', "SAME123")

        sample2 = submission.map("sample", "sample2", self.sample2)
        sample2.add_accession('BioSamples', "SAME456")

        sample3 = submission.map("sample", "sample3", self.sample3)
        sample3.add_accession('BioSamples', "SAME789")

        run_experiment = submission.map("run_experiment", "sample3",
                                        self.run_experiment)
        run_experiment.add_accession('ENA', "EXP123")

        self.assertEqual(expected_accession_by_type,
                         submission.get_all_accessions())
Esempio n. 11
0
 def __update_links_section(self, links_section: List, study: Entity, submission: Submission):
     for entity_type, biostudies_type in BIOSTUDIES_LINK_TYPES.items():
         for linked_entity in submission.get_linked_entities(study, entity_type):
             accession = linked_entity.get_accession(ENTITY_TYPE_SERVICE[entity_type])
             if accession and not self.__accession_in_list(links_section, accession):
                 link_to_add = self.__create_link_element(biostudies_type, accession)
                 links_section.append(link_to_add)
Esempio n. 12
0
 def validate_data(self, data: Submission):
     for entity_type, entities in data.get_all_entities().items():
         logging.info(
             f'Validating {len(entities)} {entity_type}(s) with {self.__class__}'
         )
         for entity in entities:
             self.validate_entity(entity)
Esempio n. 13
0
    def convert_experiment(converter: EnaExperimentConverter, data: Submission, experiment: Entity) -> Element:
        samples = data.get_linked_entities(experiment, 'sample')
        studies = data.get_linked_entities(experiment, 'study')

        if len(samples) < 1 or len(studies) < 1:
            if len(samples) < 1:
                experiment.add_error('run_experiment_ena_experiment_accession', 'No Linked Sample')
            if len(studies) < 1:
                experiment.add_error('run_experiment_ena_experiment_accession', 'No Linked Study')
        else:
            len_samples = len(samples)
            len_studies = len(studies)
            sample = samples.pop()
            study = studies.pop()

            # ENA Only supports linking one study & sample to an experiment
            if len_samples > 1:
                experiment.add_error('run_experiment_ena_experiment_accession', f'More than one Sample Linked, using first: {sample.identifier.index}')
            if len_studies > 1:
                experiment.add_error('run_experiment_ena_experiment_accession', f'More than one Study Linked, using first: {study.identifier.index}')
            return converter.convert_experiment(experiment, sample, study)
Esempio n. 14
0
    def get_ena_files(self, data: Submission) -> Dict[str, Tuple[str, str]]:
        ena_files = {}
        for entity_type, converter in self.conversion_map:
            ena_type = converter.ena_type.upper()
            ena_set = etree.XML(f'<{ena_type}_SET />')
            for entity in data.get_entities(entity_type):
                ena_conversion = self.convert_entity(converter, data, entity)
                ena_set.append(ena_conversion)

            if len(ena_set) > 0:
                ena_files[ena_type] = (f'{ena_type}.xml', self.make_ena_file(ena_set))
        return ena_files
Esempio n. 15
0
    def validate_data(self, data: Submission):
        for entity_type, converter in self.converter.conversion_map:
            ena_type = converter.ena_type.upper()
            entities = data.get_entities(entity_type)
            logging.info(f'Validating {len(entities)} {entity_type}(s) against ENA {ena_type} schema')
            for entity in entities:
                schema = self.ena_schema[ena_type]
                ena_set = etree.XML(f'<{ena_type}_SET />')
                ena_set.append(self.converter.convert_entity(converter, data, entity))

                if not schema(ena_set):
                    self.add_errors(schema, ena_type, entity_type, entity)
Esempio n. 16
0
 def test_mapping_identical_index_should_update_entity_attributes(self):
     submission = Submission(HandleCollision.UPDATE)
     entity_type = "test_case"
     index = "index1"
     attributes1 = {
         'first_entity': 'old',
         'both_entities': 'old'
     }
     attributes2 = {
         'second_entity': 'new',
         'both_entities': 'new'
     }
     expected_attributes = {
         'first_entity': 'old',
         'second_entity': 'new',
         'both_entities': 'new'
     }
     entity1 = submission.map(entity_type, index, attributes1)
     entity2 = submission.map(entity_type, index, attributes2)
     self.assertDictEqual(expected_attributes, entity1.attributes)
     self.assertDictEqual(expected_attributes, entity2.attributes)
Esempio n. 17
0
 def test_mapping_identical_index_should_error(self):
     submission = Submission(HandleCollision.ERROR)
     entity_type = "test_case"
     index = "index1"
     submission.map(entity_type, index, {})
     with self.assertRaises(IndexError):
         submission.map(entity_type, index, {})
Esempio n. 18
0
 def validate_data(self, data: Submission):
     entities = data.get_entities('sample')
     logging.info(f'Validating taxonomy against scientific name in {len(entities)} sample(s)')
     for entity in entities:
         self.validate_entity(entity)
 def __init__(self, imgur_client, reddit_submission):
     Submission.__init__(self, reddit_submission)
     self._imgur = imgur_client
Esempio n. 20
0
 def test_has_data_should_become_true(self):
     submission = Submission()
     submission.map('type', 'index', {})
     self.assertTrue(submission.has_data())
Esempio n. 21
0
 def test_has_data_should_start_false(self):
     submission = Submission()
     self.assertFalse(submission.has_data())
 def __init__(self, tumblr_client, reddit_submission):
     Submission.__init__(self, reddit_submission)
     self._tumblr = tumblr_client
 def download_submission(self):
     return [Submission._download_data_from_url(next_url) for next_url in self._get_tumblr_submission_urls()]
Esempio n. 24
0
 def validate_data(self, data: Submission):
     entities = data.get_entities('run_experiment')
     logging.info(f'Validating file checksums for {len(entities)} run(s)')
     for entity in entities:
         self.validate_entity(entity)
Esempio n. 25
0
class TestSchemaValidation(unittest.TestCase):
    def setUp(self):
        self.maxDiff = None
        with open(
                join(dirname(__file__),
                     "../../resources/data_for_test_issues.json")
        ) as test_data_file:
            test_data = json.load(test_data_file)
        self.schema_validation = JsonValidator("")
        self.submission = Submission()
        for entity_type, attributes in test_data.items():
            self.submission.map(entity_type, attributes["index"], attributes)

    @patch('validation.json.requests.post')
    def test_when_entity_valid_should_return_no_errors(self, mock_post):
        # Given
        mock_post.return_value.json.return_value = []
        mock_post.return_value.status = requests.codes['ok']

        # When
        self.schema_validation.validate_data(self.submission)

        # Then
        self.assertFalse(self.submission.has_errors())
        self.assertDictEqual({}, self.submission.get_all_errors())

    @patch('validation.json.requests.post')
    def test_when_entity_invalid_entity_with_valid_schema_should_return_errors(
            self, mock_post):
        # Given
        mock_post.return_value.json.return_value = [{
            "dataPath":
            ".release_date",
            "errors": ["should have required property 'release_date'"]
        }]
        mock_post.return_value.status = requests.codes['ok']
        expected_errors = {
            "isolate_genome_assembly_information": {
                "P17157_1007": {
                    "release_date":
                    ["should have required property 'release_date'"]
                }
            },
            "study": {
                "PRJEB39632": {
                    "release_date":
                    ["should have required property 'release_date'"]
                }
            },
            "sample": {
                "ERS4858671": {
                    "release_date":
                    ["should have required property 'release_date'"]
                }
            },
            "run_experiment": {
                "ERX4331406": {
                    "release_date":
                    ["should have required property 'release_date'"]
                }
            }
        }
        study = self.submission.get_entity('study', 'PRJEB39632')
        # When
        self.schema_validation.validate_data(self.submission)

        # Then
        self.assertTrue(study.has_errors())
        self.assertTrue(self.submission.has_errors())
        self.assertDictEqual(expected_errors['study']['PRJEB39632'],
                             study.get_errors())
        self.assertDictEqual(expected_errors['study'],
                             self.submission.get_errors('study'))
        self.assertDictEqual(expected_errors, self.submission.get_all_errors())
class TestSubmissionErrors(unittest.TestCase):
    def setUp(self):
        self.maxDiff = None
        with open(
                join(dirname(__file__),
                     "../../resources/data_for_test_issues.json")
        ) as test_data_file:
            test_data = json.load(test_data_file)
        self.submission = Submission()
        for entity_type, attributes in test_data.items():
            self.submission.map(entity_type, attributes["index"], attributes)

    def test_submission_with_no_errors(self):
        # Given
        study: Entity = self.submission.get_entity('study', 'PRJEB39632')

        # Then
        self.assertFalse(study.has_errors())
        self.assertDictEqual({}, self.submission.get_errors('study'))
        self.assertFalse(self.submission.has_errors())
        self.assertDictEqual({}, self.submission.get_all_errors())

    def test_submission_entity_with_error(self):
        # Given
        expected_errors = {
            'study': {
                'PRJEB39632': {
                    'release_date':
                    ["should have required property 'release_date'"]
                }
            }
        }
        study: Entity = self.submission.get_entity('study', 'PRJEB39632')

        # When
        study.add_error('release_date',
                        "should have required property 'release_date'")

        # Then
        self.assertTrue(study.has_errors())
        self.assertTrue(self.submission.has_errors())
        self.assertDictEqual(expected_errors['study']['PRJEB39632'],
                             study.get_errors())
        self.assertDictEqual(expected_errors['study'],
                             self.submission.get_errors('study'))
        self.assertDictEqual(expected_errors, self.submission.get_all_errors())

    def test_submission_entities_with_errors(self):
        # Given
        expected_errors = {
            'study': {
                'PRJEB39632': {
                    'email_address':
                    ["should have required property 'email_address'"]
                }
            },
            'isolate_genome_assembly_information': {
                'P17157_1007': {
                    'assembly_type': [
                        "should be equal to one of the allowed values: ['covid-19 outbreak']"
                    ],
                    'coverage': ["should have required property 'coverage'"]
                }
            }
        }
        study: Entity = self.submission.get_entity('study', 'PRJEB39632')
        assembly: Entity = self.submission.get_entity(
            'isolate_genome_assembly_information', 'P17157_1007')

        # When
        study.add_error('email_address',
                        "should have required property 'email_address'")
        assembly.add_error(
            'assembly_type',
            "should be equal to one of the allowed values: ['covid-19 outbreak']"
        )
        assembly.add_error('coverage',
                           "should have required property 'coverage'")

        # Then
        self.assertTrue(self.submission.has_errors())
        self.assertDictEqual(expected_errors, self.submission.get_all_errors())
 def download_submission(self):
     return [Submission._download_data_from_url(self._get_gfycat_submission_url())]
 def __init__(self, reddit_submission):
     Submission.__init__(self, reddit_submission)