def __link_entity_accessions(submission: Submission, entity: Entity): run = submission.map('experiment_run', 'ABC1234', {}) run.add_accession('ENA', 'ABC1234') for index in [123, 456, 789]: new_entity = submission.map('sample', str(index), {}) new_entity.add_accession('BioSamples', f'SAME{index}') entity.add_link('sample', new_entity.identifier.index)
def get_release_date(data: Submission) -> date: if 'study' in data.get_entity_types(): for study in data.get_entities('study'): if 'release_date' in study.attributes: release_date = date.fromisoformat(study.attributes['release_date']) if release_date > date.today(): return release_date
def test_mapping_identical_index_update_should_return_same_entity(self): submission = Submission(HandleCollision.UPDATE) entity_type = "test_case" index = "index1" entity1 = submission.map(entity_type, index, {}) entity2 = submission.map(entity_type, index, {}) self.assertEqual(entity1, entity2)
class TestIssuesGeneration(unittest.TestCase): def setUp(self): self.schema_validation = JsonValidator("") self.maxDiff = None current_folder = dirname(__file__) with open(join(current_folder, "../../resources/data_for_test_issues.json")) as test_data_file: test_data = json.load(test_data_file) self.submission = Submission() for entity_type, attributes in test_data.items(): self.submission.map(entity_type, attributes["index"], attributes) @patch('validation.json.requests.post') def test_when_validate_invalid_entity_with_valid_schema_should_return_errors(self, mock_post): # Given mock_post.return_value.json.side_effect = ([ { 'dataPath': '.assembly_type', 'errors': [ 'should be equal to one of the allowed values: [\'covid-19 outbreak\']' ] }, { 'dataPath': '.coverage', 'errors': [ "should have required property 'coverage'" ] } ], [ { "dataPath": ".email_address", "errors": [ "should have required property 'email_address'" ] } ], [], [] ) mock_post.return_value.status = requests.codes['ok'] expected_issues = { "isolate_genome_assembly_information": { "P17157_1007": { "assembly_type": ["should be equal to one of the allowed values: ['covid-19 outbreak']"], "coverage": ["should have required property 'coverage'"] } }, "study": { "PRJEB39632": { "email_address": ["should have required property 'email_address'"] } } } # When self.schema_validation.validate_data(self.submission) # Then self.assertDictEqual(expected_issues, self.submission.get_all_errors())
def test_when_study_contains_new_links_then_those_added_to_submission( self): # Given test_session_id = "test.session.id" auth_response = AuthResponse(status=HTTPStatus(200)) auth_response.session_id = test_session_id self.mock_auth.login = MagicMock(return_value=auth_response) biostudies = BioStudies("url", "username", "password") response = ResponseObject() response.json = self.__create_submission() biostudies.get_submission_by_accession = MagicMock( return_value=response) submission = Submission() study = submission.map('study', 'test alias', attributes={}) study.add_accession('test', 'PRJ1234') self.__link_entity_accessions(submission, study) expected_links = [{ 'url': 'ABC1234', 'attributes': [{ 'name': 'Type', 'value': 'ena' }] }, { 'url': 'SAME123', 'attributes': [{ 'name': 'Type', 'value': 'biosample' }] }, { 'url': 'SAME456', 'attributes': [{ 'name': 'Type', 'value': 'biosample' }] }, { 'url': 'SAME789', 'attributes': [{ 'name': 'Type', 'value': 'biosample' }] }] # When biostudies_submission = biostudies.update_links_in_submission( submission, study) # Then links_section = biostudies_submission.get('section', {}).get('links', []) self.assertTrue(links_section) for expected_element in expected_links: self.assertIn(expected_element, links_section) self.assertCountEqual(expected_links, links_section)
def setUp(self): self.schema_validation = JsonValidator("") self.maxDiff = None current_folder = dirname(__file__) with open(join(current_folder, "../../resources/data_for_test_issues.json")) as test_data_file: test_data = json.load(test_data_file) self.submission = Submission() for entity_type, attributes in test_data.items(): self.submission.map(entity_type, attributes["index"], attributes)
def setUp(self): self.maxDiff = None with open( join(dirname(__file__), "../../resources/data_for_test_issues.json") ) as test_data_file: test_data = json.load(test_data_file) self.submission = Submission() for entity_type, attributes in test_data.items(): self.submission.map(entity_type, attributes["index"], attributes)
def test_mapping_identical_index_should_overwrite_entity_attributes(self): submission = Submission(HandleCollision.OVERWRITE) entity_type = "test_case" index = "index1" expected_attributes = { 'second_entity': 'new' } entity1 = submission.map(entity_type, index, {'first_entity': 'old'}) entity2 = submission.map(entity_type, index, {'second_entity': 'new'}) self.assertDictEqual(expected_attributes, entity1.attributes) self.assertDictEqual(expected_attributes, entity2.attributes)
def make_manifests(self, submission: Submission) -> Dict[str, str]: manifests = {} for run_experiment in submission.get_entities('run_experiment'): samples = submission.get_linked_entities(run_experiment, 'sample') studies = submission.get_linked_entities(run_experiment, 'study') if len(samples) == 1 and len(studies) == 1 and 'uploaded_file_1' in run_experiment.attributes: sample = samples.pop() study = studies.pop() sample_accession = sample.get_first_accession(SAMPLE_ACCESSION_PRIORITY) study_accession = study.get_accession('ENA_Study') if sample_accession and study_accession: file_name, content = self.make_manifest(run_experiment, sample_accession, study_accession) manifests[file_name] = content return manifests
def test_when_entities_has_accessions_returns_them_by_type(self): expected_accession_by_type = { 'BioSamples': {'SAME123', 'SAME456', 'SAME789'}, 'BioStudies': {"BST1"}, 'ENA': {'EXP123'} } submission = Submission() study = submission.map("study", "study", self.study) study.add_accession('BioStudies', "BST1") sample1 = submission.map("sample", "sample1", self.sample1) sample1.add_accession('BioSamples', "SAME123") sample2 = submission.map("sample", "sample2", self.sample2) sample2.add_accession('BioSamples', "SAME456") sample3 = submission.map("sample", "sample3", self.sample3) sample3.add_accession('BioSamples', "SAME789") run_experiment = submission.map("run_experiment", "sample3", self.run_experiment) run_experiment.add_accession('ENA', "EXP123") self.assertEqual(expected_accession_by_type, submission.get_all_accessions())
def __update_links_section(self, links_section: List, study: Entity, submission: Submission): for entity_type, biostudies_type in BIOSTUDIES_LINK_TYPES.items(): for linked_entity in submission.get_linked_entities(study, entity_type): accession = linked_entity.get_accession(ENTITY_TYPE_SERVICE[entity_type]) if accession and not self.__accession_in_list(links_section, accession): link_to_add = self.__create_link_element(biostudies_type, accession) links_section.append(link_to_add)
def validate_data(self, data: Submission): for entity_type, entities in data.get_all_entities().items(): logging.info( f'Validating {len(entities)} {entity_type}(s) with {self.__class__}' ) for entity in entities: self.validate_entity(entity)
def convert_experiment(converter: EnaExperimentConverter, data: Submission, experiment: Entity) -> Element: samples = data.get_linked_entities(experiment, 'sample') studies = data.get_linked_entities(experiment, 'study') if len(samples) < 1 or len(studies) < 1: if len(samples) < 1: experiment.add_error('run_experiment_ena_experiment_accession', 'No Linked Sample') if len(studies) < 1: experiment.add_error('run_experiment_ena_experiment_accession', 'No Linked Study') else: len_samples = len(samples) len_studies = len(studies) sample = samples.pop() study = studies.pop() # ENA Only supports linking one study & sample to an experiment if len_samples > 1: experiment.add_error('run_experiment_ena_experiment_accession', f'More than one Sample Linked, using first: {sample.identifier.index}') if len_studies > 1: experiment.add_error('run_experiment_ena_experiment_accession', f'More than one Study Linked, using first: {study.identifier.index}') return converter.convert_experiment(experiment, sample, study)
def get_ena_files(self, data: Submission) -> Dict[str, Tuple[str, str]]: ena_files = {} for entity_type, converter in self.conversion_map: ena_type = converter.ena_type.upper() ena_set = etree.XML(f'<{ena_type}_SET />') for entity in data.get_entities(entity_type): ena_conversion = self.convert_entity(converter, data, entity) ena_set.append(ena_conversion) if len(ena_set) > 0: ena_files[ena_type] = (f'{ena_type}.xml', self.make_ena_file(ena_set)) return ena_files
def validate_data(self, data: Submission): for entity_type, converter in self.converter.conversion_map: ena_type = converter.ena_type.upper() entities = data.get_entities(entity_type) logging.info(f'Validating {len(entities)} {entity_type}(s) against ENA {ena_type} schema') for entity in entities: schema = self.ena_schema[ena_type] ena_set = etree.XML(f'<{ena_type}_SET />') ena_set.append(self.converter.convert_entity(converter, data, entity)) if not schema(ena_set): self.add_errors(schema, ena_type, entity_type, entity)
def test_mapping_identical_index_should_update_entity_attributes(self): submission = Submission(HandleCollision.UPDATE) entity_type = "test_case" index = "index1" attributes1 = { 'first_entity': 'old', 'both_entities': 'old' } attributes2 = { 'second_entity': 'new', 'both_entities': 'new' } expected_attributes = { 'first_entity': 'old', 'second_entity': 'new', 'both_entities': 'new' } entity1 = submission.map(entity_type, index, attributes1) entity2 = submission.map(entity_type, index, attributes2) self.assertDictEqual(expected_attributes, entity1.attributes) self.assertDictEqual(expected_attributes, entity2.attributes)
def test_mapping_identical_index_should_error(self): submission = Submission(HandleCollision.ERROR) entity_type = "test_case" index = "index1" submission.map(entity_type, index, {}) with self.assertRaises(IndexError): submission.map(entity_type, index, {})
def validate_data(self, data: Submission): entities = data.get_entities('sample') logging.info(f'Validating taxonomy against scientific name in {len(entities)} sample(s)') for entity in entities: self.validate_entity(entity)
def __init__(self, imgur_client, reddit_submission): Submission.__init__(self, reddit_submission) self._imgur = imgur_client
def test_has_data_should_become_true(self): submission = Submission() submission.map('type', 'index', {}) self.assertTrue(submission.has_data())
def test_has_data_should_start_false(self): submission = Submission() self.assertFalse(submission.has_data())
def __init__(self, tumblr_client, reddit_submission): Submission.__init__(self, reddit_submission) self._tumblr = tumblr_client
def download_submission(self): return [Submission._download_data_from_url(next_url) for next_url in self._get_tumblr_submission_urls()]
def validate_data(self, data: Submission): entities = data.get_entities('run_experiment') logging.info(f'Validating file checksums for {len(entities)} run(s)') for entity in entities: self.validate_entity(entity)
class TestSchemaValidation(unittest.TestCase): def setUp(self): self.maxDiff = None with open( join(dirname(__file__), "../../resources/data_for_test_issues.json") ) as test_data_file: test_data = json.load(test_data_file) self.schema_validation = JsonValidator("") self.submission = Submission() for entity_type, attributes in test_data.items(): self.submission.map(entity_type, attributes["index"], attributes) @patch('validation.json.requests.post') def test_when_entity_valid_should_return_no_errors(self, mock_post): # Given mock_post.return_value.json.return_value = [] mock_post.return_value.status = requests.codes['ok'] # When self.schema_validation.validate_data(self.submission) # Then self.assertFalse(self.submission.has_errors()) self.assertDictEqual({}, self.submission.get_all_errors()) @patch('validation.json.requests.post') def test_when_entity_invalid_entity_with_valid_schema_should_return_errors( self, mock_post): # Given mock_post.return_value.json.return_value = [{ "dataPath": ".release_date", "errors": ["should have required property 'release_date'"] }] mock_post.return_value.status = requests.codes['ok'] expected_errors = { "isolate_genome_assembly_information": { "P17157_1007": { "release_date": ["should have required property 'release_date'"] } }, "study": { "PRJEB39632": { "release_date": ["should have required property 'release_date'"] } }, "sample": { "ERS4858671": { "release_date": ["should have required property 'release_date'"] } }, "run_experiment": { "ERX4331406": { "release_date": ["should have required property 'release_date'"] } } } study = self.submission.get_entity('study', 'PRJEB39632') # When self.schema_validation.validate_data(self.submission) # Then self.assertTrue(study.has_errors()) self.assertTrue(self.submission.has_errors()) self.assertDictEqual(expected_errors['study']['PRJEB39632'], study.get_errors()) self.assertDictEqual(expected_errors['study'], self.submission.get_errors('study')) self.assertDictEqual(expected_errors, self.submission.get_all_errors())
class TestSubmissionErrors(unittest.TestCase): def setUp(self): self.maxDiff = None with open( join(dirname(__file__), "../../resources/data_for_test_issues.json") ) as test_data_file: test_data = json.load(test_data_file) self.submission = Submission() for entity_type, attributes in test_data.items(): self.submission.map(entity_type, attributes["index"], attributes) def test_submission_with_no_errors(self): # Given study: Entity = self.submission.get_entity('study', 'PRJEB39632') # Then self.assertFalse(study.has_errors()) self.assertDictEqual({}, self.submission.get_errors('study')) self.assertFalse(self.submission.has_errors()) self.assertDictEqual({}, self.submission.get_all_errors()) def test_submission_entity_with_error(self): # Given expected_errors = { 'study': { 'PRJEB39632': { 'release_date': ["should have required property 'release_date'"] } } } study: Entity = self.submission.get_entity('study', 'PRJEB39632') # When study.add_error('release_date', "should have required property 'release_date'") # Then self.assertTrue(study.has_errors()) self.assertTrue(self.submission.has_errors()) self.assertDictEqual(expected_errors['study']['PRJEB39632'], study.get_errors()) self.assertDictEqual(expected_errors['study'], self.submission.get_errors('study')) self.assertDictEqual(expected_errors, self.submission.get_all_errors()) def test_submission_entities_with_errors(self): # Given expected_errors = { 'study': { 'PRJEB39632': { 'email_address': ["should have required property 'email_address'"] } }, 'isolate_genome_assembly_information': { 'P17157_1007': { 'assembly_type': [ "should be equal to one of the allowed values: ['covid-19 outbreak']" ], 'coverage': ["should have required property 'coverage'"] } } } study: Entity = self.submission.get_entity('study', 'PRJEB39632') assembly: Entity = self.submission.get_entity( 'isolate_genome_assembly_information', 'P17157_1007') # When study.add_error('email_address', "should have required property 'email_address'") assembly.add_error( 'assembly_type', "should be equal to one of the allowed values: ['covid-19 outbreak']" ) assembly.add_error('coverage', "should have required property 'coverage'") # Then self.assertTrue(self.submission.has_errors()) self.assertDictEqual(expected_errors, self.submission.get_all_errors())
def download_submission(self): return [Submission._download_data_from_url(self._get_gfycat_submission_url())]
def __init__(self, reddit_submission): Submission.__init__(self, reddit_submission)