Ejemplo n.º 1
0
def test_serialize():
    """Test JSON serialize."""
    data = json.loads(
        JSONLDSerializer(CONTEXT, schema_class=_TestSchema).serialize(
            PersistentIdentifier(pid_type='recid', pid_value='2'),
            Record({'title': 'mytitle', 'recid': '2'})
        )
    )

    assert data == {
        '@id': 'http://localhost/record/2',
        'http://purl.org/dc/terms/title': [{'@value': 'mytitle'}]
    }

    data = json.loads(JSONLDSerializer(
        CONTEXT, schema_class=_TestSchema, expanded=False).serialize(
            PersistentIdentifier(pid_type='recid', pid_value='2'),
            Record({'title': 'mytitle', 'recid': '2'})
        )
    )

    assert data == {
        '@context': {
            '@base': 'http://localhost/record/',
            'dct': 'http://purl.org/dc/terms/',
            'recid': '@id',
            'title': 'dct:title'
        },
        'recid': '2',
        'title': 'mytitle'
    }
Ejemplo n.º 2
0
def test_default_deduplication_validator(app, simple_record, mocker):
    """Make sure default deduplication validator works."""
    from invenio_records import Record
    mocker.patch('invenio_matcher.api.execute', duplicated_result)

    with app.app_context():
        record = Record(simple_record)
        queries = [{'type': 'exact', 'match': 'title'}]

        expected = [MatchResult(1, record, 1)]
        result = list(
            match(
                record,
                index="records",
                doc_type="record",
                queries=queries,
                validator=None,
            ))

        assert expected == result

        # Test the same again to see if validator resets
        expected = [MatchResult(1, record, 1)]
        result = list(
            match(
                record,
                index="records",
                doc_type="record",
                queries=queries,
                validator=None,
            ))

        assert expected == result
Ejemplo n.º 3
0
def test_preprocessor_mixin_record(app, db):
    """Test preprocessor mixin."""
    pid, record = create_record({'title': 'test', 'aref': {'$ref': '#/title'}})
    record.model.created = datetime(2015, 10, 1, 11, 11, 11, 1)
    db.session.commit()

    data = PreprocessorMixin().preprocess_record(pid, record)
    for k in keys:
        assert k in data

    assert data['metadata']['title'] == 'test'
    assert data['metadata']['aref'] == {'$ref': '#/title'}
    assert data['created'] == '2015-10-01T11:11:11.000001+00:00'
    assert data['revision'] == 1

    data = PreprocessorMixin(replace_refs=True).preprocess_record(
        pid, Record({
            'title': 'test2',
            'aref': {
                '$ref': '#/title'
            }
        }))
    assert data['created'] is None
    assert data['updated'] is None
    assert data['metadata']['aref'] == 'test2'
Ejemplo n.º 4
0
def test_serialize_with_extra_col_other_separator():
    """Test JSON serialize."""

    EXCLUDE_FIELDS = [
        "related.1.pid",
        "related.1.label",
    ]

    INCLUDE_FIELDS = ["langs", "title.title", "title.subtitle"]

    pid = PersistentIdentifier(pid_type='recid', pid_value='2')
    rec = Record(RECORD_2)
    data = CSVSerializer(SimpleSchema,
                         csv_excluded_fields=EXCLUDE_FIELDS,
                         header_separator=".").serialize(pid, rec)

    headers, row_1 = list(data)
    assert "description,extra.key,extra.value,langs.0,langs.1,langs.2,number,"\
           "related.0.label,related.0.pid,related.2.label,related.2.pid,"\
           "related.3.label,related.3.pid,title.subtitle,"\
           "title.title" == headers.rstrip()
    assert "\"A very, very 'long' description, with some \"\"quotes\"\".\",An"\
           " extra value,\"And special chars:¥,§, Æ, ®,m²☯⊋\",en,fr,de,2," \
           "Relation A,55," \
           "Relation C,52,"\
           "Relation D,78,The subtitle 2,A title 2" == row_1.rstrip()

    data = CSVSerializer(SimpleSchema,
                         csv_included_fields=INCLUDE_FIELDS,
                         header_separator=".").serialize(pid, rec)

    headers, row_1 = list(data)
    assert "langs.0,langs.1,langs.2," \
           "title.subtitle,title.title" == headers.rstrip()
    assert "en,fr,de,The subtitle 2,A title 2" == row_1.rstrip()
Ejemplo n.º 5
0
def index_after_commit(sender, changes):
    """Index records automatically after each modification."""

    indexer = RecordIndexer()
    for model_instance, change in changes:
        if isinstance(model_instance, RecordMetadata):
            if change in ('insert', 'update') and model_instance.json:
                indexer.index(Record(model_instance.json, model_instance))
            else:
                try:
                    indexer.delete(Record(model_instance.json, model_instance))
                except NotFoundError:
                    # Record not found in ES
                    current_app.logger.warning(
                        'Record with id "%s" not found in ElasticSearch' %
                        model_instance.json.get('control_number'))
def test_transform_search_hit():
    """Test marshmallow serializer."""
    serializer = SimpleMarshmallowSerializer(_TestSchema)
    data = serializer.transform_record(
        PersistentIdentifier(pid_type='recid', pid_value='1'),
        Record({'title': 'test'}),
        marshmallow_context=dict(author='test2'))
    assert data == dict(title='test', author='test2')
Ejemplo n.º 7
0
def test_execute_missing_data(app):
    """Handle missing data in record."""
    with app.app_context():
        query = {'type': 'exact', 'match': 'title'}
        index = "records"
        doc_type = "record"
        record = Record({})

        assert execute(index, doc_type, query, record) == []
Ejemplo n.º 8
0
def test_parse_invalid_query(app, simple_record):
    """Raise for malformed queries."""
    with app.app_context():
        query = {'type': 'exact'}
        record = Record(simple_record)

        with pytest.raises(InvalidQuery) as excinfo:
            _type, match, values, extras = _parse(query, record)
        assert 'not defined in query' in str(excinfo.value)
def test_serialize_oaipmh():
    """Test MARCXML serialize."""
    s = MARCXMLSerializer(to_marc21, schema_class=MySchema)

    tree = s.serialize_oaipmh(
        PersistentIdentifier(pid_type='recid', pid_value='2'),
        {'_source': Record({'title': 'test'})})

    assert tree.getchildren()[0].text == '2'
Ejemplo n.º 10
0
def test_execute_missing_data_in_key(app):
    """Handle no data in key of record"""
    with app.app_context():
        query = {'type': 'exact', 'match': 'title'}
        index = "records"
        doc_type = "record"
        record = Record({'title': None})

        assert execute(index, doc_type, query, record) == []
Ejemplo n.º 11
0
def test_match_no_queries(app, simple_record):
    """Raise when no query is defined."""
    from invenio_records import Record

    with app.app_context():
        record = Record(simple_record)

        with pytest.raises(NoQueryDefined) as excinfo:
            list(match(record, index="records", doc_type="record"))
        assert 'No query defined' in str(excinfo.value)
Ejemplo n.º 12
0
def test_transform_search_hit():
    """Test marshmallow serializer."""
    class TestSchema(Schema):
        title = fields.Str(attribute='metadata.title')

    serializer = MarshmallowSerializer(TestSchema)
    data = serializer.transform_record(
        PersistentIdentifier(pid_type='recid', pid_value='1'),
        Record({'title': 'test'}))
    assert data == dict(title='test')
Ejemplo n.º 13
0
def test_serialize(app):
    """Test JSON serialize."""
    data = MARCXMLSerializer(to_marc21, schema_class=MySchema).serialize(
        PersistentIdentifier(pid_type="recid", pid_value="2"),
        Record({"title": "test"}))
    expected = (u"<?xml version='1.0' encoding='UTF-8'?>\n"
                u'<record xmlns="http://www.loc.gov/MARC21/slim">\n'
                u'  <controlfield tag="001">2</controlfield>\n'
                u"</record>\n")
    assert data.decode("utf8") == expected
def test_serialize(app):
    """Test JSON serialize."""
    data = MARCXMLSerializer(to_marc21, schema_class=MySchema).serialize(
        PersistentIdentifier(pid_type='recid', pid_value='2'),
        Record({'title': 'test'}))
    expected = u"<?xml version='1.0' encoding='UTF-8'?>\n" \
               u'<record xmlns="http://www.loc.gov/MARC21/slim">\n' \
               u'  <controlfield tag="001">2</controlfield>\n' \
               u'</record>\n'
    assert data.decode('utf8') == expected
Ejemplo n.º 15
0
def test_execute_missing_type(app, simple_record):
    """Handle things when bad query type is passed."""
    with app.app_context():
        query = {'type': 'banana', 'match': 'title'}
        index = "records"
        doc_type = "record"
        record = Record(simple_record)

        with pytest.raises(NotImplementedQuery):
            result = execute(index, doc_type, query, record)
Ejemplo n.º 16
0
def test_serialize_oaipmh():
    """Test MARCXML serialize."""
    s = MARCXMLSerializer(to_marc21, schema_class=MySchema)

    tree = s.serialize_oaipmh(
        PersistentIdentifier(pid_type="recid", pid_value="2"),
        {"_source": Record({"title": "test"})},
    )

    assert tree.getchildren()[0].text == "2"
def test_serialize(app):
    """Test JSON serialize."""
    pid = PersistentIdentifier(pid_type='recid', pid_value='2')
    record = Record({'titles': ['DC test']})
    data = DublinCoreSerializer(SimpleSchema).serialize(pid, record)

    assert """<dc:title>DC test</dc:title>""" in data

    s = DublinCoreSerializer(SimpleSchema)
    tree = s.serialize_oaipmh(pid, {'_source': record})
    assert len(tree) == 1
Ejemplo n.º 18
0
def test_match_no_queries_config(app, simple_record):
    """Raise when no query is defined properly in config."""
    from invenio_records import Record

    app.config.update(dict(MATCHER_QUERIES={"records": {"record": []}}))
    with app.app_context():
        record = Record(simple_record)

        with pytest.raises(NoQueryDefined) as excinfo:
            list(match(record, index="records", doc_type="record"))
        assert 'No query passed or defined' in str(excinfo.value)
Ejemplo n.º 19
0
def get_test_data():
    pid = PersistentIdentifier(pid_type='recid', pid_value='1')
    record = Record({
        'title': 'Citeproc test', 'type': 'book',
        'creators': [
            {'family_name': 'Doe', 'given_name': 'John'},
            {'family_name': 'Smith', 'given_name': 'Jane'}
        ],
        'publication_date': [2016, 1, 1]
    })
    return pid, record
Ejemplo n.º 20
0
def test_parse_query_accepts_dotted_values(app, record):
    """Parse a query accepting dotted values."""
    with app.app_context():
        query = {'type': 'exact', 'match': 'titles.title'}
        record = Record(record)

        _type, match, values, extras = _parse(query, record)

        assert _type == 'exact'
        assert match == 'titles.title'
        assert values == ['foo bar']
        assert extras == {}
Ejemplo n.º 21
0
def test_parse_query_with_extras(app, simple_record):
    """Parse a query preserving other keyword arguments."""
    with app.app_context():
        query = {'type': 'exact', 'match': 'title', 'foo': 'bar'}
        record = Record(simple_record)

        _type, match, values, extras = _parse(query, record)

        assert _type == 'exact'
        assert match == 'title'
        assert values == ['foo bar']
        assert extras == {'foo': 'bar'}
Ejemplo n.º 22
0
def test_parse_query_can_override_values(app, simple_record):
    """Parse a query overriding the extracted values."""
    with app.app_context():
        query = {'type': 'exact', 'match': 'title', 'values': ['qux quux']}
        record = Record(simple_record)

        _type, match, values, extras = _parse(query, record)

        assert _type == 'exact'
        assert match == 'title'
        assert values == ['qux quux']
        assert extras == {}
Ejemplo n.º 23
0
def test_parse_query_with_with(app, simple_record):
    """Parse a query with the 'with' keyword."""
    with app.app_context():
        query = {'type': 'exact', 'match': 'title', 'with': 'titles.title'}
        record = Record(simple_record)

        _type, match, values, extras = _parse(query, record)

        assert _type == 'exact'
        assert match == 'titles.title'
        assert values == ['foo bar']
        assert extras == {}
Ejemplo n.º 24
0
def test_serialize():
    """Test JSON serialize."""
    class TestSchema(Schema):
        title = fields.Str(attribute='metadata.mytitle')
        id = fields.Str(attribute='pid.pid_value')

    data = json.loads(
        JSONSerializer(TestSchema).serialize(
            PersistentIdentifier(pid_type='recid', pid_value='2'),
            Record({'mytitle': 'test'})))
    assert data['title'] == 'test'
    assert data['id'] == '2'
Ejemplo n.º 25
0
def test_serialize_no_schema_class():
    """Test MARCXML serialization without providing record schema."""
    s = MARCXMLSerializer(to_marc21)
    rec = Record({'__order__': ['control_number_identifier'],
                  'control_number_identifier': 'SzGeCERN'})
    data = s.serialize(PersistentIdentifier(pid_type='recid', pid_value='1'),
                       rec)
    expected = u'<?xml version=\'1.0\' encoding=\'UTF-8\'?>\n' \
               u'<record xmlns="http://www.loc.gov/MARC21/slim">\n' \
               u'  <controlfield tag="003">SzGeCERN</controlfield>\n' \
               u'</record>\n'
    assert data.decode('utf8') == expected
Ejemplo n.º 26
0
def test_match_with_passed_queries(app, simple_record, mocker):
    """Search using queries passed as an argument."""
    from invenio_records import Record
    mocker.patch('invenio_matcher.engine.search', one_search_result)

    with app.app_context():
        record = Record(simple_record)
        queries = [{'type': 'exact', 'match': 'title'}]

        expected = [MatchResult(1, record, 1)]
        result = list(
            match(record, index="records", doc_type="record", queries=queries))
        assert expected == result
Ejemplo n.º 27
0
def test_transform_record_default_schema():
    """Test marshmallow serializer without providing a schema."""
    serializer = SimpleMarshmallowSerializer()
    data = serializer.transform_record(
        PersistentIdentifier(pid_type='recid', pid_value='1'),
        Record({'title': 'test'})
    )
    assert data == {
        'id': 1,
        'created': None,
        'links': {},
        'metadata': {'title': 'test'},
        'updated': None
    }
Ejemplo n.º 28
0
def test_execute_free(app, simple_record, mocker):
    """Dispatch the query of type free."""
    mocker.patch('invenio_matcher.engine.search', empty_search_result)

    with app.app_context():
        query = {'type': 'free', 'match': 'title'}
        index = "records"
        doc_type = "record"
        record = Record(simple_record)

        expected = []
        result = execute(index, doc_type, query, record)

        assert result == expected
Ejemplo n.º 29
0
def test_match_with_configured_queries(app, simple_record, matcher_config,
                                       mocker):
    """Search using queries from config."""
    from invenio_records import Record
    mocker.patch('invenio_matcher.engine.search', empty_search_result)

    app.config.update(dict(MATCHER_QUERIES=matcher_config))
    with app.app_context():
        record = Record(simple_record)

        expected = []
        result = list(match(record, index="records", doc_type="record"))

        assert expected == result
Ejemplo n.º 30
0
def test_serialize_no_schema_class():
    """Test MARCXML serialization without providing record schema."""
    s = MARCXMLSerializer(to_marc21)
    rec = Record({
        "__order__": ["control_number_identifier"],
        "control_number_identifier": "SzGeCERN",
    })
    data = s.serialize(PersistentIdentifier(pid_type="recid", pid_value="1"),
                       rec)
    expected = (u"<?xml version='1.0' encoding='UTF-8'?>\n"
                u'<record xmlns="http://www.loc.gov/MARC21/slim">\n'
                u'  <controlfield tag="003">SzGeCERN</controlfield>\n'
                u"</record>\n")
    assert data.decode("utf8") == expected