def test_serialize(): """Test JSON serialize.""" data = json.loads( JSONLDSerializer(CONTEXT, schema_class=_TestSchema).serialize( PersistentIdentifier(pid_type='recid', pid_value='2'), Record({'title': 'mytitle', 'recid': '2'}) ) ) assert data == { '@id': 'http://localhost/record/2', 'http://purl.org/dc/terms/title': [{'@value': 'mytitle'}] } data = json.loads(JSONLDSerializer( CONTEXT, schema_class=_TestSchema, expanded=False).serialize( PersistentIdentifier(pid_type='recid', pid_value='2'), Record({'title': 'mytitle', 'recid': '2'}) ) ) assert data == { '@context': { '@base': 'http://localhost/record/', 'dct': 'http://purl.org/dc/terms/', 'recid': '@id', 'title': 'dct:title' }, 'recid': '2', 'title': 'mytitle' }
def test_default_deduplication_validator(app, simple_record, mocker): """Make sure default deduplication validator works.""" from invenio_records import Record mocker.patch('invenio_matcher.api.execute', duplicated_result) with app.app_context(): record = Record(simple_record) queries = [{'type': 'exact', 'match': 'title'}] expected = [MatchResult(1, record, 1)] result = list( match( record, index="records", doc_type="record", queries=queries, validator=None, )) assert expected == result # Test the same again to see if validator resets expected = [MatchResult(1, record, 1)] result = list( match( record, index="records", doc_type="record", queries=queries, validator=None, )) assert expected == result
def test_preprocessor_mixin_record(app, db): """Test preprocessor mixin.""" pid, record = create_record({'title': 'test', 'aref': {'$ref': '#/title'}}) record.model.created = datetime(2015, 10, 1, 11, 11, 11, 1) db.session.commit() data = PreprocessorMixin().preprocess_record(pid, record) for k in keys: assert k in data assert data['metadata']['title'] == 'test' assert data['metadata']['aref'] == {'$ref': '#/title'} assert data['created'] == '2015-10-01T11:11:11.000001+00:00' assert data['revision'] == 1 data = PreprocessorMixin(replace_refs=True).preprocess_record( pid, Record({ 'title': 'test2', 'aref': { '$ref': '#/title' } })) assert data['created'] is None assert data['updated'] is None assert data['metadata']['aref'] == 'test2'
def test_serialize_with_extra_col_other_separator(): """Test JSON serialize.""" EXCLUDE_FIELDS = [ "related.1.pid", "related.1.label", ] INCLUDE_FIELDS = ["langs", "title.title", "title.subtitle"] pid = PersistentIdentifier(pid_type='recid', pid_value='2') rec = Record(RECORD_2) data = CSVSerializer(SimpleSchema, csv_excluded_fields=EXCLUDE_FIELDS, header_separator=".").serialize(pid, rec) headers, row_1 = list(data) assert "description,extra.key,extra.value,langs.0,langs.1,langs.2,number,"\ "related.0.label,related.0.pid,related.2.label,related.2.pid,"\ "related.3.label,related.3.pid,title.subtitle,"\ "title.title" == headers.rstrip() assert "\"A very, very 'long' description, with some \"\"quotes\"\".\",An"\ " extra value,\"And special chars:¥,§, Æ, ®,m²☯⊋\",en,fr,de,2," \ "Relation A,55," \ "Relation C,52,"\ "Relation D,78,The subtitle 2,A title 2" == row_1.rstrip() data = CSVSerializer(SimpleSchema, csv_included_fields=INCLUDE_FIELDS, header_separator=".").serialize(pid, rec) headers, row_1 = list(data) assert "langs.0,langs.1,langs.2," \ "title.subtitle,title.title" == headers.rstrip() assert "en,fr,de,The subtitle 2,A title 2" == row_1.rstrip()
def index_after_commit(sender, changes): """Index records automatically after each modification.""" indexer = RecordIndexer() for model_instance, change in changes: if isinstance(model_instance, RecordMetadata): if change in ('insert', 'update') and model_instance.json: indexer.index(Record(model_instance.json, model_instance)) else: try: indexer.delete(Record(model_instance.json, model_instance)) except NotFoundError: # Record not found in ES current_app.logger.warning( 'Record with id "%s" not found in ElasticSearch' % model_instance.json.get('control_number'))
def test_transform_search_hit(): """Test marshmallow serializer.""" serializer = SimpleMarshmallowSerializer(_TestSchema) data = serializer.transform_record( PersistentIdentifier(pid_type='recid', pid_value='1'), Record({'title': 'test'}), marshmallow_context=dict(author='test2')) assert data == dict(title='test', author='test2')
def test_execute_missing_data(app): """Handle missing data in record.""" with app.app_context(): query = {'type': 'exact', 'match': 'title'} index = "records" doc_type = "record" record = Record({}) assert execute(index, doc_type, query, record) == []
def test_parse_invalid_query(app, simple_record): """Raise for malformed queries.""" with app.app_context(): query = {'type': 'exact'} record = Record(simple_record) with pytest.raises(InvalidQuery) as excinfo: _type, match, values, extras = _parse(query, record) assert 'not defined in query' in str(excinfo.value)
def test_serialize_oaipmh(): """Test MARCXML serialize.""" s = MARCXMLSerializer(to_marc21, schema_class=MySchema) tree = s.serialize_oaipmh( PersistentIdentifier(pid_type='recid', pid_value='2'), {'_source': Record({'title': 'test'})}) assert tree.getchildren()[0].text == '2'
def test_execute_missing_data_in_key(app): """Handle no data in key of record""" with app.app_context(): query = {'type': 'exact', 'match': 'title'} index = "records" doc_type = "record" record = Record({'title': None}) assert execute(index, doc_type, query, record) == []
def test_match_no_queries(app, simple_record): """Raise when no query is defined.""" from invenio_records import Record with app.app_context(): record = Record(simple_record) with pytest.raises(NoQueryDefined) as excinfo: list(match(record, index="records", doc_type="record")) assert 'No query defined' in str(excinfo.value)
def test_transform_search_hit(): """Test marshmallow serializer.""" class TestSchema(Schema): title = fields.Str(attribute='metadata.title') serializer = MarshmallowSerializer(TestSchema) data = serializer.transform_record( PersistentIdentifier(pid_type='recid', pid_value='1'), Record({'title': 'test'})) assert data == dict(title='test')
def test_serialize(app): """Test JSON serialize.""" data = MARCXMLSerializer(to_marc21, schema_class=MySchema).serialize( PersistentIdentifier(pid_type="recid", pid_value="2"), Record({"title": "test"})) expected = (u"<?xml version='1.0' encoding='UTF-8'?>\n" u'<record xmlns="http://www.loc.gov/MARC21/slim">\n' u' <controlfield tag="001">2</controlfield>\n' u"</record>\n") assert data.decode("utf8") == expected
def test_serialize(app): """Test JSON serialize.""" data = MARCXMLSerializer(to_marc21, schema_class=MySchema).serialize( PersistentIdentifier(pid_type='recid', pid_value='2'), Record({'title': 'test'})) expected = u"<?xml version='1.0' encoding='UTF-8'?>\n" \ u'<record xmlns="http://www.loc.gov/MARC21/slim">\n' \ u' <controlfield tag="001">2</controlfield>\n' \ u'</record>\n' assert data.decode('utf8') == expected
def test_execute_missing_type(app, simple_record): """Handle things when bad query type is passed.""" with app.app_context(): query = {'type': 'banana', 'match': 'title'} index = "records" doc_type = "record" record = Record(simple_record) with pytest.raises(NotImplementedQuery): result = execute(index, doc_type, query, record)
def test_serialize_oaipmh(): """Test MARCXML serialize.""" s = MARCXMLSerializer(to_marc21, schema_class=MySchema) tree = s.serialize_oaipmh( PersistentIdentifier(pid_type="recid", pid_value="2"), {"_source": Record({"title": "test"})}, ) assert tree.getchildren()[0].text == "2"
def test_serialize(app): """Test JSON serialize.""" pid = PersistentIdentifier(pid_type='recid', pid_value='2') record = Record({'titles': ['DC test']}) data = DublinCoreSerializer(SimpleSchema).serialize(pid, record) assert """<dc:title>DC test</dc:title>""" in data s = DublinCoreSerializer(SimpleSchema) tree = s.serialize_oaipmh(pid, {'_source': record}) assert len(tree) == 1
def test_match_no_queries_config(app, simple_record): """Raise when no query is defined properly in config.""" from invenio_records import Record app.config.update(dict(MATCHER_QUERIES={"records": {"record": []}})) with app.app_context(): record = Record(simple_record) with pytest.raises(NoQueryDefined) as excinfo: list(match(record, index="records", doc_type="record")) assert 'No query passed or defined' in str(excinfo.value)
def get_test_data(): pid = PersistentIdentifier(pid_type='recid', pid_value='1') record = Record({ 'title': 'Citeproc test', 'type': 'book', 'creators': [ {'family_name': 'Doe', 'given_name': 'John'}, {'family_name': 'Smith', 'given_name': 'Jane'} ], 'publication_date': [2016, 1, 1] }) return pid, record
def test_parse_query_accepts_dotted_values(app, record): """Parse a query accepting dotted values.""" with app.app_context(): query = {'type': 'exact', 'match': 'titles.title'} record = Record(record) _type, match, values, extras = _parse(query, record) assert _type == 'exact' assert match == 'titles.title' assert values == ['foo bar'] assert extras == {}
def test_parse_query_with_extras(app, simple_record): """Parse a query preserving other keyword arguments.""" with app.app_context(): query = {'type': 'exact', 'match': 'title', 'foo': 'bar'} record = Record(simple_record) _type, match, values, extras = _parse(query, record) assert _type == 'exact' assert match == 'title' assert values == ['foo bar'] assert extras == {'foo': 'bar'}
def test_parse_query_can_override_values(app, simple_record): """Parse a query overriding the extracted values.""" with app.app_context(): query = {'type': 'exact', 'match': 'title', 'values': ['qux quux']} record = Record(simple_record) _type, match, values, extras = _parse(query, record) assert _type == 'exact' assert match == 'title' assert values == ['qux quux'] assert extras == {}
def test_parse_query_with_with(app, simple_record): """Parse a query with the 'with' keyword.""" with app.app_context(): query = {'type': 'exact', 'match': 'title', 'with': 'titles.title'} record = Record(simple_record) _type, match, values, extras = _parse(query, record) assert _type == 'exact' assert match == 'titles.title' assert values == ['foo bar'] assert extras == {}
def test_serialize(): """Test JSON serialize.""" class TestSchema(Schema): title = fields.Str(attribute='metadata.mytitle') id = fields.Str(attribute='pid.pid_value') data = json.loads( JSONSerializer(TestSchema).serialize( PersistentIdentifier(pid_type='recid', pid_value='2'), Record({'mytitle': 'test'}))) assert data['title'] == 'test' assert data['id'] == '2'
def test_serialize_no_schema_class(): """Test MARCXML serialization without providing record schema.""" s = MARCXMLSerializer(to_marc21) rec = Record({'__order__': ['control_number_identifier'], 'control_number_identifier': 'SzGeCERN'}) data = s.serialize(PersistentIdentifier(pid_type='recid', pid_value='1'), rec) expected = u'<?xml version=\'1.0\' encoding=\'UTF-8\'?>\n' \ u'<record xmlns="http://www.loc.gov/MARC21/slim">\n' \ u' <controlfield tag="003">SzGeCERN</controlfield>\n' \ u'</record>\n' assert data.decode('utf8') == expected
def test_match_with_passed_queries(app, simple_record, mocker): """Search using queries passed as an argument.""" from invenio_records import Record mocker.patch('invenio_matcher.engine.search', one_search_result) with app.app_context(): record = Record(simple_record) queries = [{'type': 'exact', 'match': 'title'}] expected = [MatchResult(1, record, 1)] result = list( match(record, index="records", doc_type="record", queries=queries)) assert expected == result
def test_transform_record_default_schema(): """Test marshmallow serializer without providing a schema.""" serializer = SimpleMarshmallowSerializer() data = serializer.transform_record( PersistentIdentifier(pid_type='recid', pid_value='1'), Record({'title': 'test'}) ) assert data == { 'id': 1, 'created': None, 'links': {}, 'metadata': {'title': 'test'}, 'updated': None }
def test_execute_free(app, simple_record, mocker): """Dispatch the query of type free.""" mocker.patch('invenio_matcher.engine.search', empty_search_result) with app.app_context(): query = {'type': 'free', 'match': 'title'} index = "records" doc_type = "record" record = Record(simple_record) expected = [] result = execute(index, doc_type, query, record) assert result == expected
def test_match_with_configured_queries(app, simple_record, matcher_config, mocker): """Search using queries from config.""" from invenio_records import Record mocker.patch('invenio_matcher.engine.search', empty_search_result) app.config.update(dict(MATCHER_QUERIES=matcher_config)) with app.app_context(): record = Record(simple_record) expected = [] result = list(match(record, index="records", doc_type="record")) assert expected == result
def test_serialize_no_schema_class(): """Test MARCXML serialization without providing record schema.""" s = MARCXMLSerializer(to_marc21) rec = Record({ "__order__": ["control_number_identifier"], "control_number_identifier": "SzGeCERN", }) data = s.serialize(PersistentIdentifier(pid_type="recid", pid_value="1"), rec) expected = (u"<?xml version='1.0' encoding='UTF-8'?>\n" u'<record xmlns="http://www.loc.gov/MARC21/slim">\n' u' <controlfield tag="003">SzGeCERN</controlfield>\n' u"</record>\n") assert data.decode("utf8") == expected