Exemple #1
0
def load(source):
    """Load MARC XML and return Python dict."""
    for data in split_stream(source):
        record = create_record(data)
        # if record.get('999__', {}).get('a', '') == 'ALBUM':
        #     for rrecord in split_album(record):
        #         yield rrecord
        yield record
Exemple #2
0
def test_marc21_split_stream():
    """Test MARC21 split_stream()."""
    COLLECTION = u'<collection>{0}{1}</collection>'.format(
        RECORD, RECORD_SIMPLE
    )
    generator = split_stream(BytesIO(COLLECTION.encode('utf-8')))
    assert etree.tostring(
        next(generator), method='html').decode('utf-8') == RECORD
    assert etree.tostring(
        next(generator), method='html').decode('utf-8') == RECORD_SIMPLE
Exemple #3
0
def test_marc21_records_over_single_line():
    """Test records over single line."""

    records = (u'<record>foo</record>',
               u'<record>会意字</record>',
               u'<record>&gt;&amp;&lt;</record>')
    collection = u'<collection>{0}</collection>'.format(u''.join(records))

    generator = split_stream(BytesIO(collection.encode('utf-8')))
    for record in records:
        result = etree.tostring(next(generator),
                                encoding='utf-8',
                                method='xml')
        assert record.encode('utf-8') == result
from dojson.contrib.marc21.utils import create_record, split_stream
from scoap3.dojson.hep.model import hep
from invenio_records import Record
from invenio_db import db
from invenio_indexer.api import RecordIndexer
from scoap3.modules.pidstore.minters import scoap3_recid_minter

recs = [hep.do(create_record(data)) for data in split_stream(open('../data/scoap3export.xml', 'r'))]

for i, obj in enumerate(recs, start=1):
    print("Creating record {}/{}".format(i, len(recs)))
    record = Record.create(data, id_=None)
    print record

    # Create persistent identifier.
    pid = scoap3_recid_minter(str(record.id), record)
    print(pid.object_uuid)

    # Commit any changes to record
    record.commit()

    # Commit to DB before indexing
    db.session.commit()

    # Index record
    indexer = RecordIndexer()
    indexer.index_by_id(pid.object_uuid)