Exemple #1
0
 def __link_entity_accessions(submission: Submission, entity: Entity):
     run = submission.map('experiment_run', 'ABC1234', {})
     run.add_accession('ENA', 'ABC1234')
     for index in [123, 456, 789]:
         new_entity = submission.map('sample', str(index), {})
         new_entity.add_accession('BioSamples', f'SAME{index}')
         entity.add_link('sample', new_entity.identifier.index)
    def test_missmatched_checksum_should_log_error(self, mock: MagicMock):
        # Given
        secure_key = 'uuid'
        entity_type = 'run_experiment'
        index = f'{entity_type}1'
        file_name = 'file_name.extension1.ex2'
        expected_checksum = 'checksum'
        wrong_checksum = 'not-checksum'
        mock.return_value = f"{file_name},{expected_checksum}"

        # When
        validator = UploadValidator(secure_key)
        attributes = {
            'uploaded_file_1': file_name,
            'uploaded_file_1_checksum': wrong_checksum
        }
        entity = Entity(entity_type, index, attributes)
        validator.validate_entity(entity)

        # Then
        expected_errors = {
            'uploaded_file_1_checksum': [
                f'The checksum found on drag-and-drop {expected_checksum} does not match: {wrong_checksum}'
            ]
        }
        self.assertDictEqual(expected_errors, entity.get_errors())
Exemple #3
0
 def add_errors(self, schema, ena_type: str, entity_type: str, entity: Entity):
     for error in schema.error_log:
         match = self.regex.match(error.message)
         error_message = match.group('error') if match else error.message
         error_attribute = error.path.rpartition('/')[2].lower()
         if error_attribute not in entity.attributes:
             error_attribute = f'{entity_type}_ena_{ena_type}_accession'.lower()
             error_message = f'{error.path} {error_message}'
         entity.add_error(error_attribute, error_message)
Exemple #4
0
 def validate_entity(self, entity: Entity):
     # identify which attribute cases the error
     attribute = 'fake_attribute'
     error_msg = 'Example error message'
     entity.add_error(attribute, error_msg)
     # or if multiple errors occur for the same attribute
     error_msgs = ['error 1', 'error 2']
     entity.add_errors(attribute, error_msgs)
     raise NotImplementedError('Example validate entity used')
Exemple #5
0
 def __add_errors_to_entity(entity: Entity, schema_errors: dict):
     for schema_error in schema_errors:
         attribute_name = str(schema_error['dataPath']).strip('.')
         stripped_errors = []
         for error in schema_error['errors']:
             error.replace('"', '\'')
             if error == 'should NOT be valid':
                 error = JsonValidator.__improve_not_be_valid_error_message(
                     entity.identifier.entity_type, attribute_name)
             if error != 'should match some schema in anyOf':
                 stripped_errors.append(error)
         entity.add_errors(attribute_name, stripped_errors)
Exemple #6
0
    def test_valid_sample_tax_id_should_not_return_error(self):
        # Given
        sample_attributes = {
            'tax_id': '2697049'
        }
        self.taxonomy_validator.ena_taxonomy.validate_tax_id = MagicMock(return_value=self.valid_sarscov2)
        sample = Entity('sample', 'sample1', sample_attributes)

        # When
        self.taxonomy_validator.validate_entity(sample)

        # Then
        self.assertFalse(sample.has_errors())
        self.assertDictEqual({}, sample.get_errors())
Exemple #7
0
    def test_valid_sample_name_should_not_return_error(self):
        # Given
        sample_attributes = {
            'scientific_name': 'Severe acute respiratory syndrome coronavirus 2'
        }
        self.taxonomy_validator.ena_taxonomy.validate_scientific_name = MagicMock(return_value=self.valid_sarscov2)
        sample = Entity('sample', 'sample1', sample_attributes)

        # When
        self.taxonomy_validator.validate_entity(sample)

        # Then
        self.assertFalse(sample.has_errors())
        self.assertDictEqual({}, sample.get_errors())
Exemple #8
0
 def add_entity_accessions(entity: Entity, ignore: List[str]):
     prefix = f'{entity.identifier.entity_type}_'
     suffix = '_accession'
     attribute: str
     for attribute in entity.attributes.keys():
         if (attribute not in ignore and attribute.startswith(prefix)
                 and attribute.endswith(suffix)):
             service_name = attribute[len(prefix):len(attribute) -
                                      len(suffix)]
             if service_name in SERVICE_NAMES:
                 service_name = SERVICE_NAMES[service_name]
             if service_name and entity.attributes[attribute]:
                 entity.add_accession(service_name,
                                      entity.attributes[attribute])
Exemple #9
0
    def test_invalid_tax_id_should_return_error(self):
        # Given
        sample_attributes = {'tax_id': '999999999999'}
        error = 'Not valid tax_id: 999999999999.'
        expected_error = {
            'tax_id': [error]
        }
        self.taxonomy_validator.ena_taxonomy.validate_tax_id = MagicMock(return_value={'error': error})
        sample = Entity('sample', 'sample1', sample_attributes)

        # When
        self.taxonomy_validator.validate_entity(sample)

        # Then
        self.assertTrue(sample.has_errors())
        self.assertDictEqual(expected_error, sample.get_errors())
 def get_linked_accessions(self, entity: Entity) -> Dict[str, Set[str]]:
     accessions: Dict[str, Set[str]] = {}
     for entity_type in self.get_entity_types():
         for entity in self.get_linked_entities(entity, entity_type):
             for service, accession in entity.get_accessions():
                 accessions.setdefault(service, set()).add(accession)
     return accessions
Exemple #11
0
    def test_invalid_name_should_return_error(self):
        sample_attributes = {'scientific_name': 'Lorem Ipsum'}
        error = 'Not valid scientific_name: Lorem Ipsum.'
        expected_error = {
            'scientific_name': [error]
        }
        self.taxonomy_validator.ena_taxonomy.validate_scientific_name = MagicMock(return_value={'error': error})

        sample = Entity('sample', 'sample1', sample_attributes)
        
        # When
        self.taxonomy_validator.validate_entity(sample)

        # Then
        self.assertTrue(sample.has_errors())
        self.assertDictEqual(expected_error, sample.get_errors())
 def map(self, entity_type: str, index: str, attributes: dict) -> Entity:
     if entity_type in self.__map and index in self.__map[entity_type]:
         entity = self.__handle_collision(entity_type, index, attributes)
     else:
         entity = Entity(entity_type, index, attributes)
         self.__map.setdefault(entity_type, {})[index] = entity
     return entity
    def test_missing_file_should_log_error(self, mock: MagicMock):
        # Given
        entity_type = 'run_experiment'
        index = f'{entity_type}1'
        mock.return_value = f"file_name.extension1.ex2,checksum"
        validator = UploadValidator('uuid')
        entity = Entity(entity_type, index,
                        {'uploaded_file_1': 'missing.file'})
        expected_errors = {
            'uploaded_file_1':
            ['File has not been uploaded to drag-and-drop: missing.file']
        }
        # When
        validator.validate_entity(entity)

        # Then
        self.assertDictEqual(expected_errors, entity.get_errors())
Exemple #14
0
 def validate_entity(self, entity: Entity):
     sample = entity.attributes
     sample_errors = {}
     if 'tax_id' in sample and 'scientific_name' in sample:
         tax_response = self.ena_taxonomy.validate_taxonomy(
             tax_id=sample['tax_id'],
             scientific_name=sample['scientific_name']
         )
         sample_errors = self.get_taxonomy_errors(tax_response)
     else:
         if 'tax_id' in sample:
             tax_response = self.ena_taxonomy.validate_tax_id(sample['tax_id'])
             sample_errors = self.get_errors(tax_response, 'tax_id')
         elif 'scientific_name' in sample:
             tax_response = self.ena_taxonomy.validate_scientific_name(sample['scientific_name'])
             sample_errors = self.get_errors(tax_response, 'scientific_name')
     for attribute, errors in sample_errors.items():
         entity.add_errors(attribute, errors)
Exemple #15
0
 def validate_file(self, entity: Entity, file_attribute: str,
                   check_attribute: str):
     file_name = entity.attributes[file_attribute]
     if file_name not in self.file_checksum_map:
         entity.add_error(
             file_attribute,
             f'File has not been uploaded to drag-and-drop: {file_name}')
         return
     upload_checksum = self.file_checksum_map[file_name]
     if check_attribute in entity.attributes:
         stated_checksum = entity.attributes[check_attribute]
         if stated_checksum != upload_checksum:
             entity.add_error(
                 check_attribute,
                 f'The checksum found on drag-and-drop {upload_checksum} does not match: {stated_checksum}'
             )
             return
     else:
         entity.attributes[check_attribute] = upload_checksum
    def test_validation_with_second_file_present(self, mock: MagicMock):
        # Given
        secure_key = 'uuid'
        entity_type = 'run_experiment'
        index = f'{entity_type}1'
        mock.return_value = f"first-file,first-checksum\n" \
                            f"second-file,second-checksum"
        # When
        validator = UploadValidator(secure_key)
        attributes = {
            'uploaded_file_1': 'first-file',
            'uploaded_file_1_checksum': 'first-checksum',
            'uploaded_file_2': 'second-file',
            'uploaded_file_2_checksum': 'second-checksum',
        }
        entity = Entity(entity_type, index, attributes)
        validator.validate_entity(entity)

        # Then
        self.assertDictEqual({}, entity.get_errors())
Exemple #17
0
    def test_inconsistent_sample_should_return_error(self):
        # Given
        sample_attributes = {
            'scientific_name': 'Severe acute respiratory syndrome coronavirus 2',
            'tax_id': '9606'
        }
        self.taxonomy_validator.ena_taxonomy.validate_scientific_name = MagicMock(return_value=self.valid_sarscov2)
        self.taxonomy_validator.ena_taxonomy.validate_tax_id = MagicMock(return_value=self.valid_human)
        consistent_error = 'Information is not consistent between taxId: 9606 and scientificName: Severe acute respiratory syndrome coronavirus 2'
        expected_errors = {
            'scientific_name': [consistent_error],
            'tax_id': [consistent_error]
        }
        
        sample = Entity('sample', 'sample1', sample_attributes)

        # When
        self.taxonomy_validator.validate_entity(sample)

        # Then
        self.assertTrue(sample.has_errors())
        self.assertDictEqual(expected_errors, sample.get_errors())
    def test_validation_with_second_file_missing(self, mock: MagicMock):
        # Given
        secure_key = 'uuid'
        entity_type = 'run_experiment'
        index = f'{entity_type}1'
        mock.return_value = f"first-file,first-checksum"

        # When
        validator = UploadValidator(secure_key)
        attributes = {
            'uploaded_file_1': 'first-file',
            'uploaded_file_2': 'second-file'
        }
        entity = Entity(entity_type, index, attributes)
        validator.validate_entity(entity)

        # Then
        expected_errors = {
            'uploaded_file_2':
            ['File has not been uploaded to drag-and-drop: second-file']
        }
        self.assertDictEqual(expected_errors, entity.get_errors())
Exemple #19
0
    def test_invalid_sample_name_should_return_error(self):
        # Given
        sample_attributes = {
            'scientific_name': 'Lorem Ipsum',
            'tax_id': '2697049'
        }
        error = 'Not valid scientific_name: Lorem Ipsum.'
        consistent_error = 'Information is not consistent between taxId: 2697049 and scientificName: Lorem Ipsum'
        expected_errors = {
            'scientific_name': [error, consistent_error],
            'tax_id': [consistent_error]
        }
        self.taxonomy_validator.ena_taxonomy.validate_scientific_name = MagicMock(return_value={'error': error})
        self.taxonomy_validator.ena_taxonomy.validate_tax_id = MagicMock(return_value=self.valid_sarscov2)
        
        sample = Entity('sample', 'sample1', sample_attributes)
        
        # When
        self.taxonomy_validator.validate_entity(sample)

        # Then
        self.assertTrue(sample.has_errors())
        self.assertDictEqual(expected_errors, sample.get_errors())
 def convert_sample(self, sample_entity: Entity) -> Sample:
     sample = Sample(
         accession=sample_entity.get_accession('BioSamples'),
         name=self.named_attribute(sample_entity, 'sample_title'),
         domain=self.named_attribute(sample_entity, 'domain', self.domain),
         ncbi_taxon_id=self.named_attribute(sample_entity, 'tax_id'),
         species=self.named_attribute(sample_entity, 'scientific_name'))
     sample._append_organism_attribute()
     for name, value in sample_entity.attributes.items():
         if name not in REMOVE_KEYS:
             sample.attributes.append(
                 Attribute(name=name.replace('_', ' '),
                           value=value,
                           unit=self.unit_map.get(name, None)))
     return sample
    def convert_experiment(converter: EnaExperimentConverter, data: Submission, experiment: Entity) -> Element:
        samples = data.get_linked_entities(experiment, 'sample')
        studies = data.get_linked_entities(experiment, 'study')

        if len(samples) < 1 or len(studies) < 1:
            if len(samples) < 1:
                experiment.add_error('run_experiment_ena_experiment_accession', 'No Linked Sample')
            if len(studies) < 1:
                experiment.add_error('run_experiment_ena_experiment_accession', 'No Linked Study')
        else:
            len_samples = len(samples)
            len_studies = len(studies)
            sample = samples.pop()
            study = studies.pop()

            # ENA Only supports linking one study & sample to an experiment
            if len_samples > 1:
                experiment.add_error('run_experiment_ena_experiment_accession', f'More than one Sample Linked, using first: {sample.identifier.index}')
            if len_studies > 1:
                experiment.add_error('run_experiment_ena_experiment_accession', f'More than one Study Linked, using first: {study.identifier.index}')
            return converter.convert_experiment(experiment, sample, study)
    def test_passed_bio_study_entity_returns_correct_json_representative(self):
        bio_study_attributes = {
            "study_accession": "PRJEB12345",
            "study_alias": "SARS-CoV-2 genomes 123ABC alias",
            "email_address": "*****@*****.**",
            "center_name": "EBI",
            'study_name': 'SARS-CoV-2 genomes 123ABC name',
            "short_description": "test short description",
            "abstract": "test abstract",
            "release_date": "2020-08-21"
        }
        bio_study_entity = Entity(entity_type="study",
                                  index=bio_study_attributes["study_alias"],
                                  attributes=bio_study_attributes)

        expected_payload = self.__get_expected_payload(bio_study_entity)

        bio_study_json_payload = BioStudyConverter.convert_study(
            bio_study_entity)

        self.assertDictEqual(expected_payload, bio_study_json_payload)
    def test_validation_should_edit_file_attributes(self, mock: MagicMock):
        # Given
        secure_key = 'uuid'
        entity_type = 'run_experiment'
        index = f'{entity_type}1'
        file_name = 'file_name.extension1.ex2'
        checksum = 'checksum'
        mock.return_value = f"{file_name},{checksum}"

        # When
        validator = UploadValidator(secure_key)
        attributes = {'uploaded_file_1': file_name}
        entity = Entity(entity_type, index, attributes)
        validator.validate_entity(entity)

        # Then
        expected_attributes = {
            'uploaded_file_1': file_name,
            'uploaded_file_1_checksum': checksum,
        }
        self.assertDictEqual(expected_attributes, entity.attributes)
    def __add_accession(study: Entity, submission_payload: dict):
        study_accession: str = study.get_accession('BioStudies')
        if study_accession:
            submission_payload['accno'] = study_accession

        return submission_payload
 def get_linked_entities(self, entity: Entity,
                         entity_type: str) -> Set[Entity]:
     entities = set()
     for index in entity.get_linked_indexes(entity_type):
         entities.add(self.get_entity(entity_type, index))
     return entities
 def link_entities(entity_a: Entity, entity_b: Entity):
     entity_a.add_link_id(entity_b.identifier)
     entity_b.add_link_id(entity_a.identifier)
Exemple #27
0
 def add_link(link: dict, entity: Entity, accession_services: Iterable[str]):
     accession = entity.get_first_accession(accession_services)
     if accession:
         link['@accession'] = ['', fixed_attribute, accession]
     else:
         link['@refname'] = ['', fixed_attribute, entity.identifier.index]
Exemple #28
0
 def add_alias(self, spec: dict, entity: Entity):
     accession = entity.get_accession(f'ENA_{self.ena_type}')
     if accession:
         spec['@accession'] = ['', fixed_attribute, accession]
     else:
         spec['@alias'] = ['', fixed_attribute, entity.identifier.index]
 def update_links_in_submission(self, submission: Submission, study: Entity) -> dict:
     study_accession = study.get_accession('BioStudies')
     biostudies_submission = self.get_submission_by_accession(study_accession).json
     links_section = self.__get_links_section_from_submission(biostudies_submission)
     self.__update_links_section(links_section, study, submission)
     return biostudies_submission