def load(collections, parent=None):
     """Create new collection."""
     for data in collections or []:
         collection = Collection(name=data['name'],
                                 dbquery=data.get('dbquery'),
                                 parent=parent)
         db.session.add(collection)
         db.session.flush()
         load(data.get('children'), parent=collection)
def load_records(app, filename, schema, tries=5):
    """Try to index records."""
    indexer = RecordIndexer()
    records = []
    with app.app_context():
        with mock.patch("invenio_records.api.Record.validate",
                        return_value=None):
            data_filename = pkg_resources.resource_filename(
                "invenio_records", filename)
            records_data = load(data_filename)
            with db.session.begin_nested():
                for item in records_data:
                    record_id = uuid.uuid4()
                    item_dict = dict(marc21.do(item))
                    item_dict["$schema"] = schema
                    recid_minter(record_id, item_dict)
                    oaiid_minter(record_id, item_dict)
                    record = current_oaiserver.record_cls.create(item_dict,
                                                                 id_=record_id)
                    indexer.index(record)
                    records.append(record.id)
            db.session.commit()

        # Wait for indexer to finish
        for i in range(tries):
            response = current_search_client.search()
            if response["hits"]["total"] >= len(records):
                break
            current_search.flush_and_refresh("_all")

    return records
def records():
    """Load demo records."""
    from dojson.contrib.marc21.utils import load
    from dojson.contrib.marc21.model import marc21
    from invenio_db import db
    from invenio_records import Record

    class NoCheckRecord(Record):
        """Skip record validation."""
        def validate(self):
            """Ignore schema."""
            return True

    schema = current_app.extensions['invenio-jsonschemas'].path_to_url(
        'marc21/bibliographic/bd-v1.0.0.json')
    data = pkg_resources.resource_filename('cernopendata_fixtures', 'data')
    files = list(glob.glob(os.path.join(data, '*.xml')))
    files += list(glob.glob(os.path.join(data, '*', '*.xml')))

    for filename in files:
        with open(filename, 'rb') as source:
            for data in load(source):
                record = marc21.do(data)
                record['$schema'] = schema
                click.echo(NoCheckRecord.create(record).id)
                db.session.commit()
                db.session.expunge_all()
Exemple #4
0
def load_records(app, filename, schema, tries=5):
    """Try to index records."""
    indexer = RecordIndexer()
    records = []
    with app.app_context():
        with mock.patch('invenio_records.api.Record.validate',
                        return_value=None):
            data_filename = pkg_resources.resource_filename(
                'invenio_records', filename)
            records_data = load(data_filename)
            with db.session.begin_nested():
                for item in records_data:
                    record_id = uuid.uuid4()
                    item_dict = dict(marc21.do(item))
                    item_dict['$schema'] = schema
                    recid_minter(record_id, item_dict)
                    oaiid_minter(record_id, item_dict)
                    record = Record.create(item_dict, id_=record_id)
                    indexer.index(record)
                    records.append(record.id)
            db.session.commit()

        # Wait for indexer to finish
        for i in range(tries):
            response = current_search_client.search()
            if response['hits']['total'] >= len(records):
                break
            sleep(5)

    return records
Exemple #5
0
def test_marc21_loader():
    """Test MARC21 loader."""
    COLLECTION = '<collection>{0}{1}</collection>'.format(
        RECORD, RECORD_SIMPLE)

    records = list(load(BytesIO(COLLECTION.encode('utf-8'))))
    assert len(records) == 2
def load_records(app, filename, schema, tries=5):
    """Try to index records."""
    indexer = RecordIndexer()
    records = []
    with app.app_context():
        with mock.patch('invenio_records.api.Record.validate',
                        return_value=None):
            data_filename = pkg_resources.resource_filename(
                'invenio_records', filename)
            records_data = load(data_filename)
            with db.session.begin_nested():
                for item in records_data:
                    record_id = uuid.uuid4()
                    item_dict = dict(marc21.do(item))
                    item_dict['$schema'] = schema
                    recid_minter(record_id, item_dict)
                    oaiid_minter(record_id, item_dict)
                    record = Record.create(item_dict, id_=record_id)
                    indexer.index(record)
                    records.append(record.id)
            db.session.commit()

        # Wait for indexer to finish
        for i in range(tries):
            response = current_search_client.search()
            if response['hits']['total'] >= len(records):
                break
            current_search.flush_and_refresh('_all')

    return records
Exemple #7
0
def import_records(dojson_model, schema, xmlfile):
    """Helper to import a MARCXML file for given schema."""
    ids = []
    from invenio_pidstore import current_pidstore
    with db.session.begin_nested():
        with open(xmlfile, 'rb') as fp:
            for item in load(fp):
                # Transform MARCXML to JSON
                data = dojson_model.do(item)
                # TODO: Add schema once schema validation has been fixed.
                # Schema
                # data['$schema'] = schema
                # Create a UUID for the record
                id_ = uuid.uuid4()
                # FIXME: Strip off control number otherwise minter will fail.
                if 'control_number' in data:
                    del data['control_number']
                # Mint a recid and OAI id.
                pid = current_pidstore.minters['recid'](id_, data)
                current_pidstore.minters['oaiid'](id_, data)
                # Store record.
                record = Record.create(data, id_=id_)
                click.echo('Created record {}'.format(pid.pid_value))
                ids.append(id_)
    return ids
def test_xslt_not_found():
    """Test xslt not found."""
    runner = CliRunner()
    with runner.isolated_filesystem():
        with open('record.xml', 'wb') as f:
            f.write(RECORD_SIMPLE.encode('utf-8'))
        data = list(load('record.xml'))
        pytest.raises(IOError, dumps, data, xslt_filename='file_not_exist')
def test_xslt_not_found():
    """Test xslt not found."""
    runner = CliRunner()
    with runner.isolated_filesystem():
        with open('record.xml', 'wb') as f:
            f.write(RECORD_SIMPLE.encode('utf-8'))
        data = list(load('record.xml'))
        pytest.raises(IOError, dumps, data, xslt_filename='file_not_exist')
Exemple #10
0
def test_xslt_dump():
    """Test xslt dump."""
    path = os.path.dirname(__file__)
    with open("{0}/demo_marc21_to_dc.converted.xml".format(path)) as myfile:
        expect = myfile.read()
    data = list(load("{0}/demo_marc21_to_dc.xml".format(path)))
    output = dumps(data, xslt_filename="{0}/demo_marc21_to_dc.xslt".format(path))
    assert output.decode("utf-8") == expect
Exemple #11
0
def test_marc21_loader():
    """Test MARC21 loader."""
    COLLECTION = '<collection>{0}{1}</collection>'.format(
        RECORD, RECORD_SIMPLE
    )

    records = list(load(BytesIO(COLLECTION.encode('utf-8'))))
    assert len(records) == 2
Exemple #12
0
def test_xslt_not_found():
    """Test xslt not found."""
    runner = CliRunner()
    with runner.isolated_filesystem():
        with open("record.xml", "wb") as f:
            f.write(RECORD_SIMPLE.encode("utf-8"))
        data = list(load("record.xml"))
        pytest.raises(IOError, dumps, data, xslt_filename="file_not_exist")
Exemple #13
0
def test_entry_points():
    """Test entry points."""
    dump = list(pkg_resources.iter_entry_points("dojson.cli.dump", "marcxml"))[0].load()
    path = os.path.dirname(__file__)
    with open("{0}/demo_marc21_to_dc.converted.xml".format(path)) as myfile:
        expect = myfile.read()
    data = list(load("{0}/demo_marc21_to_dc.xml".format(path)))
    output = dump(data, xslt_filename="{0}/demo_marc21_to_dc.xslt".format(path))
    assert output.decode("utf-8") == expect
Exemple #14
0
def collections():
    """Load default collections."""
    from invenio_db import db
    from invenio_collections.models import Collection

    from .fixtures import COLLECTIONS

    def load(collections, parent=None):
        """Create new collection."""
        for data in collections or []:
            collection = Collection(name=data['name'],
                                    dbquery=data.get('dbquery'),
                                    parent=parent)
            db.session.add(collection)
            db.session.flush()
            load(data.get('children'), parent=collection)

    load(COLLECTIONS)
    db.session.commit()
def test_xslt_dump():
    """Test xslt dump."""
    path = os.path.dirname(__file__)
    with open('{0}/demo_marc21_to_dc.converted.xml'.format(path)) as myfile:
        expect = myfile.read()
    data = list(load('{0}/demo_marc21_to_dc.xml'.format(path)))
    output = dumps(
        data,
        xslt_filename='{0}/demo_marc21_to_dc.xslt'.format(path)
    )
    assert output.decode('utf-8') == expect
Exemple #16
0
def test_output_type_from_dumps_etree():
    """Test output type from dumps_etree."""
    path = os.path.dirname(__file__)
    data = list(load("{0}/demo_marc21_to_dc.xml".format(path)))
    # test without arguments
    output1 = dumps_etree(data)
    # test with xslt_filename argument
    output2 = dumps_etree(data, xslt_filename="{0}/demo_marc21_to_dc.xslt".format(path))
    # it should not generate a TypeError exception
    assert isinstance(output1, _Element)
    assert isinstance(output2, _Element)
def test_entry_points():
    """Test entry points."""
    dump = list(pkg_resources.iter_entry_points('dojson.cli.dump',
                                                'marcxml'))[0].load()
    path = os.path.dirname(__file__)
    with open("{0}/demo_marc21_to_dc.converted.xml".format(path)) as myfile:
        expect = myfile.read().replace('\n', '')
    data = list(load('{0}/demo_marc21_to_dc.xml'.format(path)))
    output = dump(data,
                  xslt_filename='{0}/demo_marc21_to_dc.xslt'.format(path))
    assert output.decode('utf-8') == expect
Exemple #18
0
def test_marc21_loader():
    """Test MARC21 loader."""
    from six import BytesIO
    from dojson.contrib.marc21.utils import load

    COLLECTION = '<collection>{0}{1}</collection>'.format(
        RECORD, RECORD_SIMPLE
    )

    records = list(load(BytesIO(COLLECTION.encode('utf-8'))))
    assert len(records) == 2
def test_entry_points():
    """Test entry points."""
    dump = list(pkg_resources.iter_entry_points(
        'dojson.cli.dump', 'marcxml'
    ))[0].load()
    path = os.path.dirname(__file__)
    with open("{0}/demo_marc21_to_dc.converted.xml".format(path)) as myfile:
        expect = myfile.read()
    data = list(load('{0}/demo_marc21_to_dc.xml'.format(path)))
    output = dump(data,
                  xslt_filename='{0}/demo_marc21_to_dc.xslt'.format(path))
    assert output.decode('utf-8') == expect
def test_output_type_from_dumps_etree():
    """Test output type from dumps_etree."""
    path = os.path.dirname(__file__)
    data = list(load('{0}/demo_marc21_to_dc.xml'.format(path)))
    # test without arguments
    output1 = dumps_etree(data)
    # test with xslt_filename argument
    output2 = dumps_etree(
        data,
        xslt_filename='{0}/demo_marc21_to_dc.xslt'.format(path)
    )
    # it should not generate a TypeError exception
    assert isinstance(output1, _Element)
    assert isinstance(output2, _Element)
def load_records(es_app, filename, schema):
    """Try to index records."""
    indexer = RecordIndexer()
    with es_app.test_request_context():
        data_filename = pkg_resources.resource_filename("invenio_records", filename)
        records_data = load(data_filename)
        records = []
        for item in records_data:
            item_dict = dict(marc21.do(item))
            item_dict["$schema"] = schema
            record = Record.create(item_dict)
            records.append(record)
        db.session.commit()

        es_records = []
        for record in records:
            es_records.append(indexer.index(record))

        for record in es_records:
            search.client.get(index=record["_index"], doc_type=record["_type"], id=record["_id"])
def test_bibliographic_data(es_app):
    """Test indexation using bibliographic data."""
    search = InvenioSearch(es_app)
    search.create()
    indexer = RecordIndexer()
    with es_app.test_request_context():
        data_filename = pkg_resources.resource_filename(
            'invenio_records', 'data/marc21/bibliographic.xml')
        records_data = load(data_filename)
        records = []
        for item in records_data:
            record = Record.create(item)
            record['$schema'] = "mappings/marc21_holdings.json"
            es_record = indexer.index(record)
            records.append(es_record)

    for record in records:
        search.client.get(index=record['_index'],
                          doc_type=record['_type'],
                          id=record['_id'])
    search.delete()
def test_bibliographic_data(es_app):
    """Test indexation using bibliographic data."""
    search = InvenioSearch(es_app)
    search.create()
    indexer = RecordIndexer()
    with es_app.test_request_context():
        data_filename = pkg_resources.resource_filename(
            'invenio_records', 'data/marc21/bibliographic.xml')
        records_data = load(data_filename)
        records = []
        for item in records_data:
            record = Record.create(item)
            record['$schema'] = "mappings/marc21_holdings.json"
            es_record = indexer.index(record)
            records.append(es_record)

    for record in records:
        search.client.get(index=record['_index'],
                          doc_type=record['_type'],
                          id=record['_id'])
    search.delete()
def load_records(es_app, filename, schema):
    """Try to index records."""
    indexer = RecordIndexer()
    with es_app.test_request_context():
        data_filename = pkg_resources.resource_filename(
            'invenio_records', filename)
        records_data = load(data_filename)
        records = []
        for item in records_data:
            item_dict = dict(marc21.do(item))
            item_dict['$schema'] = schema
            record = Record.create(item_dict)
            records.append(record)
        db.session.commit()

        es_records = []
        for record in records:
            es_records.append(indexer.index(record))

        for record in es_records:
            search.client.get(index=record['_index'],
                              doc_type=record['_type'],
                              id=record['_id'])
def load_records(es_app, filename, schema):
    """Try to index records."""
    indexer = RecordIndexer()
    with es_app.test_request_context():
        data_filename = pkg_resources.resource_filename(
            'invenio_records', filename)
        records_data = load(data_filename)
        records = []
        for item in records_data:
            item_dict = dict(marc21.do(item))
            item_dict['$schema'] = schema
            record = Record.create(item_dict)
            records.append(record)
        db.session.commit()

        es_records = []
        for record in records:
            es_records.append(indexer.index(record))

        from invenio_search import current_search
        for record in es_records:
            current_search.client.get(index=record['_index'],
                                      doc_type=record['_type'],
                                      id=record['_id'])
def load_records(es_app, filename, schema):
    """Try to index records."""
    indexer = RecordIndexer()
    with es_app.test_request_context():
        data_filename = pkg_resources.resource_filename("invenio_records", filename)
        records_data = load(data_filename)
        records = []
        for item in records_data:
            item_dict = dict(marc21.do(item))
            item_dict["$schema"] = schema
            record = Record.create(item_dict)
            records.append(record)
        db.session.commit()

        es_records = []
        for record in records:
            es_records.append(indexer.index(record))

        from invenio_search import current_search

        for record in es_records:
            current_search.client.get(
                index=record["_index"], doc_type=record["_type"], id=record["_id"]
            )