def test_update_entity__given_empty_ingest_json__then_fetch_resource(self): # given: ingest_json = { 'content': { 'k': 'v', 'k2': 'v2' }, '_links': { 'self': { 'href': 'url' } } } self.ingest_api.get_entity_by_uuid = Mock(return_value=ingest_json) # and: user1 = Entity('biomaterial', 'biomaterial_uuid', {'k': 'v2'}, None) # when: submitter = IngestSubmitter(self.ingest_api) submitter.update_entity(user1) # then: self.ingest_api.patch.assert_called_with( 'url', {'content': { 'k': 'v2', 'k2': 'v2' }})
def test_update_entity__given_content_has_no_update__then_do_not_patch( self): # given: ingest_json = { 'content': { 'k': 'v2', 'k2': 'v2' }, '_links': { 'self': { 'href': 'url' } } } self.ingest_api.get_entity_by_uuid = Mock(return_value=ingest_json) # and: user1 = Entity('biomaterial', 'biomaterial_uuid', {'k': 'v2'}, None) # when: submitter = IngestSubmitter(self.ingest_api) submitter.update_entity(user1) # then: self.ingest_api.patch.assert_not_called()
def test_update_entities(self): # given: entity_map = self._create_test_entity_map() user2 = entity_map.get_entity('user', 'user_2') user3 = entity_map.get_entity('user', 'user_3') # when: submitter = IngestSubmitter(self.ingest_api) submitter.update_entity = MagicMock() submitter.update_entities(entity_map) # then: submitter.update_entity.assert_has_calls( [call(user2), call(user3)], any_order=True)
def test_submit(self, submission_constructor): # given: submission = self._mock_submission(submission_constructor) # and: product = Entity('product', 'product_1', {}) project = Entity('project', 'id', {}) user = Entity('user', 'user_1', {}) entity_map = EntityMap(product, user, project) # when: submitter = IngestSubmitter(self.ingest_api) submitter.add_entity = MagicMock() submitter.add_entities(entity_map, submission_url='url') # then: submission_constructor.assert_called_with(self.ingest_api, 'url') submission.define_manifest.assert_called_with(entity_map) submission.add_entity.assert_has_calls( [call(product), call(user)], any_order=True)
def test_add_entity(self): new_entity_mock_response = { 'content': {}, 'submissionDate': '2018-05-08T10:17:49.476Z', 'updateDate': '2018-05-08T10:17:57.254Z', 'uuid': { 'uuid': '5a36689b-302b-40e4-bef1-837b47f0cb51' }, 'validationState': 'Draft' } ingest.api.ingestapi.requests.get = MagicMock() mock_ingest_api = MagicMock(name='mock_ingest_api') mock_ingest_api.load_root = MagicMock() mock_ingest_api.create_entity = MagicMock( return_value=new_entity_mock_response) submitter = IngestSubmitter(mock_ingest_api) entity = Entity(entity_id='id', entity_type='biomaterial', content={}) entity = submitter.add_entity(entity, 'url') self.assertEqual(new_entity_mock_response, entity.ingest_json)
def test_update_entity(self): # given: user1 = Entity('user', 'user_1', {'k': 'v2'}, { 'content': { 'k': 'v', 'k2': 'v2' }, '_links': { 'self': { 'href': 'url' } } }) # when: submitter = IngestSubmitter(self.ingest_api) submitter.update_entity(user1) # then: self.ingest_api.patch.assert_called_with( 'url', {'content': { 'k': 'v2', 'k2': 'v2' }})
def test_submit_linked_entity(self, submission_constructor): # given: submission = self._mock_submission(submission_constructor) # and: user = Entity('user', 'user_1', {}) entity_map = EntityMap(user) # and: link_to_user = { 'entity': 'user', 'id': 'user_1', 'relationship': 'wish_list' } linked_product = Entity('product', 'product_1', {}, direct_links=[link_to_user], is_reference=False, is_linking_reference=False) project = Entity('project', 'id', {}, is_reference=False, is_linking_reference=False) entity_map.add_entity(linked_product) entity_map.add_entity(project) # when: submitter = IngestSubmitter(self.ingest_api) submitter.add_entity = MagicMock() submitter.link_submission_to_project = MagicMock() submitter.PROGRESS_CTR = 1 submitter.add_entities(entity_map, submission_url='url') # then: submission_constructor.assert_called_with(self.ingest_api, 'url') submission.define_manifest.assert_called_with(entity_map) submission.add_entity.assert_has_calls( [call(user), call(linked_product)], any_order=True)
def __init__(self, ingest_api: IngestApi): self.ingest_api = ingest_api self.logger = logging.getLogger(__name__) self.submitter = IngestSubmitter(self.ingest_api)
class XlsImporter: """ XlsImporter is used to convert a contributor's spreadsheet into metadata json entities and to submit those to Ingest. Please see https://github.com/HumanCellAtlas/ingest-central/wiki/Data-Contributors-Spreadsheet-Quick-Guide for more information on the spreadsheet format. """ def __init__(self, ingest_api: IngestApi): self.ingest_api = ingest_api self.logger = logging.getLogger(__name__) self.submitter = IngestSubmitter(self.ingest_api) def generate_json(self, file_path, is_update, project_uuid=None, update_project=False): ingest_workbook = IngestWorkbook.from_file(file_path) try: template_mgr = template_manager.build(ingest_workbook.get_schemas(), self.ingest_api) except Exception as e: raise SchemaRetrievalError( f'There was an error retrieving the schema information to process the spreadsheet. {str(e)}') workbook_importer = WorkbookImporter(template_mgr) spreadsheet_json, errors = workbook_importer.do_import(ingest_workbook, is_update, project_uuid, update_project) return spreadsheet_json, template_mgr, errors def dry_run_import_file(self, file_path, project_uuid=None): spreadsheet_json, template_mgr, errors = self.generate_json(file_path, project_uuid) if errors: return None, errors entity_map = EntityMap.load(spreadsheet_json) entity_linker = EntityLinker(template_mgr, entity_map) entity_linker.handle_links_from_spreadsheet() return entity_map, [] def import_file(self, file_path, submission_url, is_update=False, project_uuid=None, update_project=False) -> Tuple[ Submission, TemplateManager]: try: if project_uuid: self.submitter.link_submission_to_project(project_uuid, submission_url) submission = None template_mgr = None spreadsheet_json, template_mgr, errors = self.generate_json(file_path, is_update, project_uuid=project_uuid, update_project=update_project) entity_map = EntityMap.load(spreadsheet_json) self.ingest_api.delete_submission_errors(submission_url) if errors: self.report_errors(submission_url, errors) elif is_update: self.submitter.update_entities(entity_map) else: entity_linker = EntityLinker(template_mgr, entity_map) entity_linker.handle_links_from_spreadsheet() submission = self._submit_new_entities(entity_map, submission_url) project = entity_map.get_project() if project and project_uuid and update_project: self.submitter.update_entity(project) except HTTPError as httpError: self.logger.exception(httpError) status = httpError.response.status_code text = httpError.response.text importer_error = ImporterError(f'Received an HTTP {status} from {httpError.request.url}: {text}') self.ingest_api.create_submission_error(submission_url, importer_error.getJSON()) return None, template_mgr except Exception as e: self.ingest_api.create_submission_error(submission_url, ImporterError(str(e)).getJSON()) self.logger.error(str(e), exc_info=True) return None, template_mgr finally: self.logger.info(f'Submission in {submission_url} is done!') return submission, template_mgr def _submit_new_entities(self, entity_map, submission_url): submission = self.submitter.add_entities(entity_map, submission_url) project = entity_map.get_project() if project and project.is_new: self.submitter.link_submission_to_project(project.uuid, submission_url) self.submitter.link_entities(entity_map, submission) return submission def report_errors(self, submission_url, errors): self.logger.info(f'Logged {len(errors)} ParsingErrors.', exc_info=False) for error in errors: self.ingest_api.create_submission_error( submission_url, ParserError(error["location"], error["type"], error["detail"]).getJSON() ) @staticmethod def update_spreadsheet_with_uuids(submission: Submission, template_mgr: TemplateManager, file_path): if not submission: return wb = IngestWorkbook.from_file(file_path, read_only=False) wb.add_entity_uuids(submission) wb.add_schemas_worksheet(template_mgr.get_schemas()) return wb.save(file_path) def import_project_from_workbook(self, workbook: Workbook, token: str) -> (str, List[dict]): project_metadata_json, errors = self._generate_project_json_from_workbook(workbook) if errors: return None, errors else: ingest_project = self.ingest_api.create_project(None, content=project_metadata_json, token=token) project_uuid = ingest_project['uuid']['uuid'] return project_uuid, [] def _generate_project_json_from_workbook(self, workbook): ingest_workbook = IngestWorkbook(workbook) template_mgr = self._setup_template_manager_for_project_import() workbook_importer = WorkbookImporter(template_mgr) spreadsheet_json, errors = workbook_importer.do_import(ingest_workbook, False, worksheet_titles=['Project']) if errors: return None, errors else: projects = list(spreadsheet_json.get('project').values()) project = projects[0] if projects else None project_metadata = project.get('content') return project_metadata, [] def _setup_template_manager_for_project_import(self): try: project_schema_url = self.ingest_api.get_latest_schema_url('type', 'project', 'project') template_mgr = template_manager.build([project_schema_url], self.ingest_api) except Exception as e: raise SchemaRetrievalError( f'There was an error retrieving the project schema information to import the project. {str(e)}') return template_mgr