def test_serialize(): """Test JSON serialize.""" data = json.loads( JSONLDSerializer(CONTEXT, schema_class=_TestSchema).serialize( PersistentIdentifier(pid_type='recid', pid_value='2'), Record({'title': 'mytitle', 'recid': '2'}) ) ) assert data == { '@id': 'http://localhost/record/2', 'http://purl.org/dc/terms/title': [{'@value': 'mytitle'}] } data = json.loads(JSONLDSerializer( CONTEXT, schema_class=_TestSchema, expanded=False).serialize( PersistentIdentifier(pid_type='recid', pid_value='2'), Record({'title': 'mytitle', 'recid': '2'}) ) ) assert data == { '@context': { '@base': 'http://localhost/record/', 'dct': 'http://purl.org/dc/terms/', 'recid': '@id', 'title': 'dct:title' }, 'recid': '2', 'title': 'mytitle' }
def test_sequence_number_update_after_migration(app, location, script_info, current_year): """Test sequence number update after migration.""" # simulate a import of record < now(year) pid11 = PersistentIdentifier( pid_type='recid', pid_value='2093596', status=PIDStatus.REGISTERED, object_type='rec', object_uuid='e5428b04324b4c9fbfed02fbf78bb959') pid12 = PersistentIdentifier( pid_type='rn', pid_value='CERN-MOVIE-2012-193', status=PIDStatus.REGISTERED, object_type='rec', object_uuid='e5428b04324b4c9fbfed02fbf78bb959') db.session.add(pid11) db.session.add(pid12) db.session.commit() # run seq number update runner = CliRunner() res = runner.invoke(cli_sequence_generator, [current_year], obj=script_info) # no counter should be created assert res.exit_code == 0 assert Counter.query.all() == [] assert len(TemplateDefinition.query.all()) == 2 # simulate a import of record == now(year) pid11 = PersistentIdentifier( pid_type='recid', pid_value='2093597', status=PIDStatus.REGISTERED, object_type='rec', object_uuid='e5428b04324b4c9fbfed02fbf78bb950') pid12 = PersistentIdentifier( pid_type='rn', pid_value='CERN-MOVIE-{0}-5'.format(current_year), status=PIDStatus.REGISTERED, object_type='rec', object_uuid='e5428b04324b4c9fbfed02fbf78bb959') db.session.add(pid11) db.session.add(pid12) db.session.commit() # run seq number update runner = CliRunner() res = runner.invoke(cli_sequence_generator, [current_year], obj=script_info) # no counter should be created assert res.exit_code == 0 [counter] = Counter.query.all() assert counter.counter == 6 assert counter.definition_name == 'project-v1_0_0' assert counter.template_instance == 'CERN-MOVIE-{0}-{{counter}}' \ .format(current_year) assert len(TemplateDefinition.query.all()) == 2
def audit_records(minimal_record, db): """Audit test records.""" records = {} for i in (1, 2, 3, 4): record = RecordMetadata() record.json = deepcopy(minimal_record) record.json['recid'] = i record.json['_oai'] = { 'id': 'oai:{}'.format(i), 'sets': [], 'updated': datetime.utcnow().date().isoformat(), } db.session.add(record) db.session.commit() records[i] = str(ZenodoRecord(data=record.json, model=record).id) recid = PersistentIdentifier(pid_type='recid', pid_value=str(i), status='R', object_type='rec', object_uuid=record.id) oai_id = PersistentIdentifier(pid_type='oai', pid_value=record.json['_oai']['id'], status='R', object_type='rec', object_uuid=record.id) db.session.add(recid) db.session.add(oai_id) db.session.commit() return records
def test_serialize_search(serializer): """Test JSON serialize.""" def fetcher(obj_uuid, data): assert obj_uuid in ['a', 'b'] return PersistentIdentifier(pid_type='doi', pid_value=data['doi']) s = serializer(SimpleSchema) data = s.serialize_search( fetcher, dict( hits=dict( hits=[ { '_source': dict(doi='10.1234/a'), '_id': 'a', '_version': 1 }, { '_source': dict(doi='10.1234/b'), '_id': 'b', '_version': 1 }, ], total=2, ), aggregations={}, )) assert """<identifier identifierType="DOI">10.1234/a</identifier>""" \ in data assert """<identifier identifierType="DOI">10.1234/b</identifier>""" \ in data tree = s.serialize_oaipmh( PersistentIdentifier(pid_type='doi', pid_value='10.1234/b'), { '_source': dict(doi='10.1234/b'), '_id': 'b', '_version': 1 }) assert len(tree.xpath('/resource/identifier')) == 1 tree = OAIDataCiteSerializer( serializer=s, datacentre='CERN').serialize_oaipmh( PersistentIdentifier(pid_type='doi', pid_value='10.1234/b'), { '_source': dict(doi='10.1234/b'), '_id': 'b', '_version': 1 }) assert len(tree.xpath('/oai_datacite/datacentreSymbol')) == 1
def test_bai_create_retries_on_bais_in_db_change(): with mock.patch( "inspirehep.pidstore.providers.bai.current_app" ) as mocked_app, mock.patch( "inspirehep.pidstore.providers.bai.InspireBAIProvider.next_bai_number" ) as next_bai_mock, mock.patch( "inspirehep.pidstore.providers.bai.InspireBAIProvider.query_pid_value" ) as query_pid_value_mock, mock.patch( "inspirehep.pidstore.providers.bai.super") as super_mock: mocked_app.config = {"PIDSTORE_BAI_RETRY_DELAY": 0} next_bai_mock.side_effect = [123, 124] expected_pid_value = "Test.124" super_mock.return_value.create.side_effect = [ IntegrityError(None, None, None), InspireBAIProvider( PersistentIdentifier( pid_type="bai", pid_value=expected_pid_value, pid_provider="bai", status=PIDStatus.REGISTERED, )), ] query_pid_value_mock.return_value = [] created_bai = InspireBAIProvider.create( data={"name": { "value": "test" }}) assert created_bai.pid.pid_value == expected_pid_value assert super_mock.return_value.create.call_count == 2
def test_record_endpoint_by_db_pid(inspire_app): expected_endpoint = "authors" record = create_record("aut") cn = record["control_number"] pid = PersistentIdentifier(pid_type="recid", pid_value=str(cn)) endpoint = find_record_endpoint(pid) assert endpoint == expected_endpoint
def test_serialize_search(): """Test JSON serialize.""" def fetcher(obj_uuid, data): assert obj_uuid in ['a', 'b'] return PersistentIdentifier(pid_type='doi', pid_value='a') data = DublinCoreSerializer(SimpleSchema).serialize_search( fetcher, dict( hits=dict( hits=[ {'_source': {'titles': ['A']}, '_id': 'a', '_version': 1}, {'_source': {'titles': ['B']}, '_id': 'b', '_version': 1}, ], total=2, ), aggregations={}, ) ) assert """<dc:title>A</dc:title>""" in data assert """<dc:title>B</dc:title>""" in data s = DublinCoreSerializer(SimpleSchema) tree = s.serialize_oaipmh( PersistentIdentifier(pid_type='doi', pid_value='10.1234/b'), {'_source': {'titles': ['B']}, '_id': 'b', '_version': 1}) assert len(tree) == 1
def oaiid_pid(): """PID for OAI id.""" return PersistentIdentifier(pid_type='oai', pid_value='oai:zenodo.org:123', status='R', object_type='rec', object_uuid=uuid4())
def depid_pid(): """PID for minimal record.""" return PersistentIdentifier(pid_type='depid', pid_value='321', status='R', object_type='rec', object_uuid=uuid4())
def test_serialize_with_extra_col_other_separator(): """Test JSON serialize.""" EXCLUDE_FIELDS = [ "related.1.pid", "related.1.label", ] INCLUDE_FIELDS = ["langs", "title.title", "title.subtitle"] pid = PersistentIdentifier(pid_type='recid', pid_value='2') rec = Record(RECORD_2) data = CSVSerializer(SimpleSchema, csv_excluded_fields=EXCLUDE_FIELDS, header_separator=".").serialize(pid, rec) headers, row_1 = list(data) assert "description,extra.key,extra.value,langs.0,langs.1,langs.2,number,"\ "related.0.label,related.0.pid,related.2.label,related.2.pid,"\ "related.3.label,related.3.pid,title.subtitle,"\ "title.title" == headers.rstrip() assert "\"A very, very 'long' description, with some \"\"quotes\"\".\",An"\ " extra value,\"And special chars:¥,§, Æ, ®,m²☯⊋\",en,fr,de,2," \ "Relation A,55," \ "Relation C,52,"\ "Relation D,78,The subtitle 2,A title 2" == row_1.rstrip() data = CSVSerializer(SimpleSchema, csv_included_fields=INCLUDE_FIELDS, header_separator=".").serialize(pid, rec) headers, row_1 = list(data) assert "langs.0,langs.1,langs.2," \ "title.subtitle,title.title" == headers.rstrip() assert "en,fr,de,The subtitle 2,A title 2" == row_1.rstrip()
def test_preprocessor_mixin_searchhit(): """Test preprocessor mixin.""" pid = PersistentIdentifier(pid_type='doi', pid_value='10.1234/foo', status='R') data = PreprocessorMixin.preprocess_search_hit( pid, { '_source': { 'title': 'test', '_created': '2015-10-01T11:11:11.000001+00:00', '_updated': '2015-12-01T11:11:11.000001+00:00', }, '_version': 1, }) for k in keys: assert k in data assert data['metadata']['title'] == 'test' assert data['created'] == '2015-10-01T11:11:11.000001+00:00' assert data['revision'] == 1 assert '_created' not in data['metadata'] assert '_updated' not in data['metadata'] data = PreprocessorMixin.preprocess_search_hit(pid, { '_source': { 'title': 'test' }, '_version': 1, }) assert data['created'] is None assert data['updated'] is None
def test_uses_first_schema_that_returns_true_for_condition_that_uses_data(): class BaseMetadataSchema(Schema): metadata = fields.Method("get_metadata") class Schema1(BaseMetadataSchema): def get_metadata(self, data): return {"field1": data["metadata"]["field1"]} class Schema2(BaseMetadataSchema): def get_metadata(self, data): return {"field2": data["metadata"]["field2"]} class Schema3(BaseMetadataSchema): def get_metadata(self, data): return {"field3": data["metadata"]["field3"]} data = { "field1": "value1", "field2": "value2", "field3": "value3", "types": [2, 3] } serializer = ConditionalMultiSchemaJSONSerializer([ (lambda data: 1 in data["metadata"]["types"], Schema1), (lambda data: 2 in data["metadata"]["types"], Schema2), (lambda data: 3 in data["metadata"]["types"], Schema3), ]) serialized = json.loads( serializer.serialize( PersistentIdentifier(pid_type="recid", pid_value="1"), InspireRecord(data))) assert serialized["metadata"] == {"field2": "value2"}
def test_detail_links_factory_generates_proper_links(inspire_app, override_config): expected_links = { "format1": "http://localhost:5000/jobs/1?format=format1", "format2": "http://localhost:5000/jobs/1?format=format2", "format3": "http://localhost:5000/jobs/1?format=format3", } config = { "JOBS": { "record_serializers_aliases": { "format1": "format/1", "format2": "format/2", "format3": "format/3", } }, "LITERATURE": { "record_serializers_aliases": { "format4": "format/4", "format5": "format/5" } }, } with override_config(**config): pid = PersistentIdentifier(pid_type="job", pid_value=1) links = inspire_detail_links_factory(pid) assert links == expected_links
def test_detail_links_factory_generates_proper_additional_links(inspire_app): expected_links = { "format4": "http://localhost:5000/literature/1?format=format4", "format5": "http://localhost:5000/literature/1?format=format5", "citations": "http://localhost:5000/literature/1/citations", } config = { "JOBS": { "record_serializers_aliases": { "format1": "format/1", "format2": "format/2", "format3": "format/3", } }, "LITERATURE": { "record_serializers_aliases": {"format4": "format/4", "format5": "format/5"} }, "ADDITIONAL_LINKS": { "LITERATURE": {"citations": "inspirehep_records.literature_citations"} }, } with override_config(**config): pid = PersistentIdentifier(pid_type="lit", pid_value=1) links = inspire_detail_links_factory(pid) assert links == expected_links
def resolve(self, pid_value): """Resolver that bypasses PIDStore. :param pid_value: Persistent identifier. :returns: A tuple containing (pid, object). """ if isinstance(pid_value, uuid.UUID): object_uuid = pid_value pid_value = str(pid_value) else: object_uuid = uuid.UUID(pid_value) pid_value = str(pid_value) # todo: raise better error messages? # todo: create a pid wrapper # todo: handle execptions (e.g. no results for getting record is # detected here) return ( PersistentIdentifier( pid_type="recid", pid_value=pid_value, object_type="rec", object_uuid=object_uuid, status=PIDStatus.REGISTERED, ), self.object_getter(object_uuid), )
def test_transform_search_hit(): """Test marshmallow serializer.""" serializer = SimpleMarshmallowSerializer(_TestSchema) data = serializer.transform_record( PersistentIdentifier(pid_type='recid', pid_value='1'), Record({'title': 'test'}), marshmallow_context=dict(author='test2')) assert data == dict(title='test', author='test2')
def test_serialize_oaipmh(): """Test MARCXML serialize.""" s = MARCXMLSerializer(to_marc21, schema_class=MySchema) tree = s.serialize_oaipmh( PersistentIdentifier(pid_type='recid', pid_value='2'), {'_source': Record({'title': 'test'})}) assert tree.getchildren()[0].text == '2'
def test_minimal_record(app, db, minimal_record): """Test minimal record.""" # Create record and pid. record = Record.create(minimal_record) record.model.updated = datetime.utcnow() pid = PersistentIdentifier(pid_type='recid', pid_value='2') assert record.validate() is None expected = { u'date_and_time_of_latest_transaction': ( record.model.updated.strftime("%Y%m%d%H%M%S.0")), u'publication_distribution_imprint': [{ 'date_of_publication_distribution': record['publication_date'] }], u'control_number': '123', u'information_relating_to_copyright_status': { 'copyright_status': 'open' }, u'summary': { 'summary': 'My description' }, u'main_entry_personal_name': { 'personal_name': 'Test' }, u'resource_type': { 'type': 'software' }, u'title_statement': { 'title': 'Test' }, u'leader': { 'base_address_of_data': '00000', 'bibliographic_level': 'monograph_item', 'character_coding_scheme': 'marc-8', 'descriptive_cataloging_form': 'unknown', 'encoding_level': 'unknown', 'indicator_count': 2, 'length_of_the_implementation_defined_portion': 0, 'length_of_the_length_of_field_portion': 4, 'length_of_the_starting_character_position_portion': 5, 'multipart_resource_record_level': 'not_specified_or_not_applicable', 'record_length': '00000', 'record_status': 'new', 'subfield_code_count': 2, 'type_of_control': 'no_specified_type', 'type_of_record': 'computer_file', 'undefined': 0, }, } data = marcxml_v1.schema_class().dump(marcxml_v1.preprocess_record( pid=pid, record=record)).data assert_dict(expected, data) marcxml_v1.serialize(pid=pid, record=record)
def test_redirect(logger, app): """Test redirection.""" with app.app_context(): pid1 = PersistentIdentifier.create('rec', '1', status=PIDStatus.REGISTERED, object_type='rec', object_uuid=uuid.uuid4()) pid2 = PersistentIdentifier.create('doi', '2', status=PIDStatus.REGISTERED, object_type='rec', object_uuid=uuid.uuid4()) # Can't redirect these statuses i = 10 for s in [ PIDStatus.NEW, PIDStatus.RESERVED, PIDStatus.DELETED, ]: pid = PersistentIdentifier.create('rec', str(i), status=s) i += 1 pytest.raises(PIDInvalidAction, pid.redirect, pid1) pid = PersistentIdentifier.create('rec', str(i), status=PIDStatus.REGISTERED) # Can't redirect to non-exsting pid. pytest.raises(PIDDoesNotExistError, pid.redirect, PersistentIdentifier()) pid.redirect(pid1) assert logger.info.call_args[0][0].startswith("Redirected") assert 'pid' in logger.info.call_args[1]['extra'] assert pid.status == PIDStatus.REDIRECTED assert pid.object_type is None assert pid.object_uuid is not None new_pid = pid.get_redirect() assert new_pid.pid_type == 'rec' assert new_pid.pid_value == '1' # You can redirect an already redirected pid pid.redirect(pid2) new_pid = pid.get_redirect() assert new_pid.pid_type == 'doi' assert new_pid.pid_value == '2' # Assign with SQLError with patch('invenio_pidstore.models.db.session.begin_nested') as mock: mock.side_effect = SQLAlchemyError() pytest.raises(SQLAlchemyError, pid.redirect, '1') assert logger.exception.call_args[0][0].startswith( "Failed to redirect") assert 'pid' in logger.exception.call_args[1]['extra']
def test_transform_search_hit(): """Test marshmallow serializer.""" class TestSchema(Schema): title = fields.Str(attribute='metadata.title') serializer = MarshmallowSerializer(TestSchema) data = serializer.transform_record( PersistentIdentifier(pid_type='recid', pid_value='1'), Record({'title': 'test'})) assert data == dict(title='test')
def test_serialize(app): """Test JSON serialize.""" data = MARCXMLSerializer(to_marc21, schema_class=MySchema).serialize( PersistentIdentifier(pid_type="recid", pid_value="2"), Record({"title": "test"})) expected = (u"<?xml version='1.0' encoding='UTF-8'?>\n" u'<record xmlns="http://www.loc.gov/MARC21/slim">\n' u' <controlfield tag="001">2</controlfield>\n' u"</record>\n") assert data.decode("utf8") == expected
def test_serialize_oaipmh(): """Test MARCXML serialize.""" s = MARCXMLSerializer(to_marc21, schema_class=MySchema) tree = s.serialize_oaipmh( PersistentIdentifier(pid_type="recid", pid_value="2"), {"_source": Record({"title": "test"})}, ) assert tree.getchildren()[0].text == "2"
def test_record_endpoint_by_schema_content(inspire_app): expected_endpoint = "institutions" record_hit_data = { "_source": { "$schema": "http://localhost:5000/schemas/records/institutions.json" } } pid = PersistentIdentifier(pid_type="recid", pid_value=1) endpoint = find_record_endpoint(pid, record_hit=record_hit_data) assert endpoint == expected_endpoint
def test_serialize(app): """Test JSON serialize.""" data = MARCXMLSerializer(to_marc21, schema_class=MySchema).serialize( PersistentIdentifier(pid_type='recid', pid_value='2'), Record({'title': 'test'})) expected = u"<?xml version='1.0' encoding='UTF-8'?>\n" \ u'<record xmlns="http://www.loc.gov/MARC21/slim">\n' \ u' <controlfield tag="001">2</controlfield>\n' \ u'</record>\n' assert data.decode('utf8') == expected
def get_test_data(): pid = PersistentIdentifier(pid_type='recid', pid_value='1') record = Record({ 'title': 'Citeproc test', 'type': 'book', 'creators': [ {'family_name': 'Doe', 'given_name': 'John'}, {'family_name': 'Smith', 'given_name': 'Jane'} ], 'publication_date': [2016, 1, 1] }) return pid, record
def test_serialize(app): """Test JSON serialize.""" pid = PersistentIdentifier(pid_type='recid', pid_value='2') record = Record({'titles': ['DC test']}) data = DublinCoreSerializer(SimpleSchema).serialize(pid, record) assert """<dc:title>DC test</dc:title>""" in data s = DublinCoreSerializer(SimpleSchema) tree = s.serialize_oaipmh(pid, {'_source': record}) assert len(tree) == 1
def test_serialize_no_schema_class(): """Test MARCXML serialization without providing record schema.""" s = MARCXMLSerializer(to_marc21) rec = Record({'__order__': ['control_number_identifier'], 'control_number_identifier': 'SzGeCERN'}) data = s.serialize(PersistentIdentifier(pid_type='recid', pid_value='1'), rec) expected = u'<?xml version=\'1.0\' encoding=\'UTF-8\'?>\n' \ u'<record xmlns="http://www.loc.gov/MARC21/slim">\n' \ u' <controlfield tag="003">SzGeCERN</controlfield>\n' \ u'</record>\n' assert data.decode('utf8') == expected
def test_serialize(): """Test JSON serialize.""" class TestSchema(Schema): title = fields.Str(attribute='metadata.mytitle') id = fields.Str(attribute='pid.pid_value') data = json.loads( JSONSerializer(TestSchema).serialize( PersistentIdentifier(pid_type='recid', pid_value='2'), Record({'mytitle': 'test'}))) assert data['title'] == 'test' assert data['id'] == '2'
def test_serialize_no_schema_class(): """Test MARCXML serialization without providing record schema.""" s = MARCXMLSerializer(to_marc21) rec = Record({ "__order__": ["control_number_identifier"], "control_number_identifier": "SzGeCERN", }) data = s.serialize(PersistentIdentifier(pid_type="recid", pid_value="1"), rec) expected = (u"<?xml version='1.0' encoding='UTF-8'?>\n" u'<record xmlns="http://www.loc.gov/MARC21/slim">\n' u' <controlfield tag="003">SzGeCERN</controlfield>\n' u"</record>\n") assert data.decode("utf8") == expected
def test_transform_record_default_schema(): """Test marshmallow serializer without providing a schema.""" serializer = SimpleMarshmallowSerializer() data = serializer.transform_record( PersistentIdentifier(pid_type='recid', pid_value='1'), Record({'title': 'test'}) ) assert data == { 'id': 1, 'created': None, 'links': {}, 'metadata': {'title': 'test'}, 'updated': None }