Example #1
0
    def test_update_entity__given_empty_ingest_json__then_fetch_resource(self):
        # given:
        ingest_json = {
            'content': {
                'k': 'v',
                'k2': 'v2'
            },
            '_links': {
                'self': {
                    'href': 'url'
                }
            }
        }
        self.ingest_api.get_entity_by_uuid = Mock(return_value=ingest_json)

        # and:
        user1 = Entity('biomaterial', 'biomaterial_uuid', {'k': 'v2'}, None)

        # when:
        submitter = IngestSubmitter(self.ingest_api)
        submitter.update_entity(user1)

        # then:
        self.ingest_api.patch.assert_called_with(
            'url', {'content': {
                'k': 'v2',
                'k2': 'v2'
            }})
Example #2
0
    def test_update_entity__given_content_has_no_update__then_do_not_patch(
            self):
        # given:
        ingest_json = {
            'content': {
                'k': 'v2',
                'k2': 'v2'
            },
            '_links': {
                'self': {
                    'href': 'url'
                }
            }
        }
        self.ingest_api.get_entity_by_uuid = Mock(return_value=ingest_json)

        # and:
        user1 = Entity('biomaterial', 'biomaterial_uuid', {'k': 'v2'}, None)

        # when:
        submitter = IngestSubmitter(self.ingest_api)
        submitter.update_entity(user1)

        # then:
        self.ingest_api.patch.assert_not_called()
Example #3
0
    def test_update_entities(self):
        # given:
        entity_map = self._create_test_entity_map()
        user2 = entity_map.get_entity('user', 'user_2')
        user3 = entity_map.get_entity('user', 'user_3')

        # when:
        submitter = IngestSubmitter(self.ingest_api)
        submitter.update_entity = MagicMock()
        submitter.update_entities(entity_map)

        # then:
        submitter.update_entity.assert_has_calls(
            [call(user2), call(user3)], any_order=True)
Example #4
0
    def test_submit(self, submission_constructor):
        # given:
        submission = self._mock_submission(submission_constructor)

        # and:
        product = Entity('product', 'product_1', {})
        project = Entity('project', 'id', {})
        user = Entity('user', 'user_1', {})
        entity_map = EntityMap(product, user, project)

        # when:
        submitter = IngestSubmitter(self.ingest_api)
        submitter.add_entity = MagicMock()
        submitter.add_entities(entity_map, submission_url='url')

        # then:
        submission_constructor.assert_called_with(self.ingest_api, 'url')
        submission.define_manifest.assert_called_with(entity_map)
        submission.add_entity.assert_has_calls(
            [call(product), call(user)], any_order=True)
Example #5
0
    def test_add_entity(self):
        new_entity_mock_response = {
            'content': {},
            'submissionDate': '2018-05-08T10:17:49.476Z',
            'updateDate': '2018-05-08T10:17:57.254Z',
            'uuid': {
                'uuid': '5a36689b-302b-40e4-bef1-837b47f0cb51'
            },
            'validationState': 'Draft'
        }

        ingest.api.ingestapi.requests.get = MagicMock()
        mock_ingest_api = MagicMock(name='mock_ingest_api')
        mock_ingest_api.load_root = MagicMock()
        mock_ingest_api.create_entity = MagicMock(
            return_value=new_entity_mock_response)

        submitter = IngestSubmitter(mock_ingest_api)
        entity = Entity(entity_id='id', entity_type='biomaterial', content={})
        entity = submitter.add_entity(entity, 'url')

        self.assertEqual(new_entity_mock_response, entity.ingest_json)
Example #6
0
    def test_update_entity(self):
        # given:
        user1 = Entity('user', 'user_1', {'k': 'v2'}, {
            'content': {
                'k': 'v',
                'k2': 'v2'
            },
            '_links': {
                'self': {
                    'href': 'url'
                }
            }
        })

        # when:
        submitter = IngestSubmitter(self.ingest_api)
        submitter.update_entity(user1)

        # then:
        self.ingest_api.patch.assert_called_with(
            'url', {'content': {
                'k': 'v2',
                'k2': 'v2'
            }})
Example #7
0
    def test_submit_linked_entity(self, submission_constructor):
        # given:
        submission = self._mock_submission(submission_constructor)

        # and:
        user = Entity('user', 'user_1', {})
        entity_map = EntityMap(user)

        # and:
        link_to_user = {
            'entity': 'user',
            'id': 'user_1',
            'relationship': 'wish_list'
        }
        linked_product = Entity('product',
                                'product_1', {},
                                direct_links=[link_to_user],
                                is_reference=False,
                                is_linking_reference=False)
        project = Entity('project',
                         'id', {},
                         is_reference=False,
                         is_linking_reference=False)
        entity_map.add_entity(linked_product)
        entity_map.add_entity(project)

        # when:
        submitter = IngestSubmitter(self.ingest_api)
        submitter.add_entity = MagicMock()
        submitter.link_submission_to_project = MagicMock()
        submitter.PROGRESS_CTR = 1
        submitter.add_entities(entity_map, submission_url='url')

        # then:
        submission_constructor.assert_called_with(self.ingest_api, 'url')
        submission.define_manifest.assert_called_with(entity_map)
        submission.add_entity.assert_has_calls(
            [call(user), call(linked_product)], any_order=True)
Example #8
0
 def __init__(self, ingest_api: IngestApi):
     self.ingest_api = ingest_api
     self.logger = logging.getLogger(__name__)
     self.submitter = IngestSubmitter(self.ingest_api)
Example #9
0
class XlsImporter:
    """
    XlsImporter is used to convert a contributor's spreadsheet into metadata json entities and to submit those to
    Ingest. Please see https://github.com/HumanCellAtlas/ingest-central/wiki/Data-Contributors-Spreadsheet-Quick-Guide
    for more information on the spreadsheet format.
    """

    def __init__(self, ingest_api: IngestApi):
        self.ingest_api = ingest_api
        self.logger = logging.getLogger(__name__)
        self.submitter = IngestSubmitter(self.ingest_api)

    def generate_json(self, file_path, is_update, project_uuid=None, update_project=False):
        ingest_workbook = IngestWorkbook.from_file(file_path)

        try:
            template_mgr = template_manager.build(ingest_workbook.get_schemas(), self.ingest_api)
        except Exception as e:
            raise SchemaRetrievalError(
                f'There was an error retrieving the schema information to process the spreadsheet. {str(e)}')

        workbook_importer = WorkbookImporter(template_mgr)
        spreadsheet_json, errors = workbook_importer.do_import(ingest_workbook, is_update, project_uuid, update_project)

        return spreadsheet_json, template_mgr, errors

    def dry_run_import_file(self, file_path, project_uuid=None):
        spreadsheet_json, template_mgr, errors = self.generate_json(file_path, project_uuid)

        if errors:
            return None, errors

        entity_map = EntityMap.load(spreadsheet_json)
        entity_linker = EntityLinker(template_mgr, entity_map)
        entity_linker.handle_links_from_spreadsheet()

        return entity_map, []

    def import_file(self, file_path, submission_url, is_update=False, project_uuid=None, update_project=False) -> Tuple[
        Submission, TemplateManager]:
        try:
            if project_uuid:
                self.submitter.link_submission_to_project(project_uuid, submission_url)

            submission = None
            template_mgr = None
            spreadsheet_json, template_mgr, errors = self.generate_json(file_path, is_update, project_uuid=project_uuid,
                                                                        update_project=update_project)
            entity_map = EntityMap.load(spreadsheet_json)
            self.ingest_api.delete_submission_errors(submission_url)

            if errors:
                self.report_errors(submission_url, errors)
            elif is_update:
                self.submitter.update_entities(entity_map)
            else:
                entity_linker = EntityLinker(template_mgr, entity_map)
                entity_linker.handle_links_from_spreadsheet()
                submission = self._submit_new_entities(entity_map, submission_url)

            project = entity_map.get_project()
            if project and project_uuid and update_project:
                self.submitter.update_entity(project)

        except HTTPError as httpError:
            self.logger.exception(httpError)
            status = httpError.response.status_code
            text = httpError.response.text
            importer_error = ImporterError(f'Received an HTTP {status} from  {httpError.request.url}: {text}')
            self.ingest_api.create_submission_error(submission_url, importer_error.getJSON())
            return None, template_mgr
        except Exception as e:
            self.ingest_api.create_submission_error(submission_url, ImporterError(str(e)).getJSON())
            self.logger.error(str(e), exc_info=True)
            return None, template_mgr
        finally:
            self.logger.info(f'Submission in {submission_url} is done!')
            return submission, template_mgr

    def _submit_new_entities(self, entity_map, submission_url):
        submission = self.submitter.add_entities(entity_map, submission_url)
        project = entity_map.get_project()
        if project and project.is_new:
            self.submitter.link_submission_to_project(project.uuid, submission_url)

        self.submitter.link_entities(entity_map, submission)
        return submission

    def report_errors(self, submission_url, errors):
        self.logger.info(f'Logged {len(errors)} ParsingErrors.', exc_info=False)
        for error in errors:
            self.ingest_api.create_submission_error(
                submission_url,
                ParserError(error["location"], error["type"], error["detail"]).getJSON()
            )

    @staticmethod
    def update_spreadsheet_with_uuids(submission: Submission, template_mgr: TemplateManager, file_path):
        if not submission:
            return
        wb = IngestWorkbook.from_file(file_path, read_only=False)
        wb.add_entity_uuids(submission)
        wb.add_schemas_worksheet(template_mgr.get_schemas())
        return wb.save(file_path)

    def import_project_from_workbook(self, workbook: Workbook, token: str) -> (str, List[dict]):
        project_metadata_json, errors = self._generate_project_json_from_workbook(workbook)

        if errors:
            return None, errors
        else:
            ingest_project = self.ingest_api.create_project(None, content=project_metadata_json, token=token)
            project_uuid = ingest_project['uuid']['uuid']
            return project_uuid, []

    def _generate_project_json_from_workbook(self, workbook):
        ingest_workbook = IngestWorkbook(workbook)
        template_mgr = self._setup_template_manager_for_project_import()
        workbook_importer = WorkbookImporter(template_mgr)
        spreadsheet_json, errors = workbook_importer.do_import(ingest_workbook, False, worksheet_titles=['Project'])

        if errors:
            return None, errors
        else:
            projects = list(spreadsheet_json.get('project').values())
            project = projects[0] if projects else None
            project_metadata = project.get('content')
            return project_metadata, []

    def _setup_template_manager_for_project_import(self):
        try:
            project_schema_url = self.ingest_api.get_latest_schema_url('type', 'project', 'project')
            template_mgr = template_manager.build([project_schema_url], self.ingest_api)
        except Exception as e:
            raise SchemaRetrievalError(
                f'There was an error retrieving the project schema information to import the project. {str(e)}')
        return template_mgr