def convert_marcxml(source): """Convert MARC XML to JSON.""" from dojson.contrib.marc21.utils import create_record, split_blob from scoap3.dojson.conferences import conferences from scoap3.dojson.experiments import experiments from scoap3.dojson.hep import hep from scoap3.dojson.hepnames import hepnames from scoap3.dojson.institutions import institutions from scoap3.dojson.jobs import jobs from scoap3.dojson.journals import journals from scoap3.dojson.utils import strip_empty_values for data in split_blob(source.read()): record = create_record(data) if _collection_in_record(record, 'institution'): yield strip_empty_values(institutions.do(record)) elif _collection_in_record(record, 'experiment'): yield strip_empty_values(experiments.do(record)) elif _collection_in_record(record, 'journals'): yield strip_empty_values(journals.do(record)) elif _collection_in_record(record, 'hepnames'): yield strip_empty_values(hepnames.do(record)) elif _collection_in_record(record, 'job') or \ _collection_in_record(record, 'jobhidden'): yield strip_empty_values(jobs.do(record)) elif _collection_in_record(record, 'conferences'): yield strip_empty_values(conferences.do(record)) else: yield strip_empty_values(hep.do(record))
def convert_marcxml(source): """Convert MARC XML to JSON.""" from dojson.contrib.marc21.utils import create_record, split_blob from inspirehep.dojson.utils import strip_empty_values from inspirehep.dojson.hep import hep from inspirehep.dojson.institutions import institutions from inspirehep.dojson.journals import journals from inspirehep.dojson.experiments import experiments from inspirehep.dojson.hepnames import hepnames from inspirehep.dojson.jobs import jobs from inspirehep.dojson.conferences import conferences for data in split_blob(source.read()): record = create_record(data) if _collection_in_record(record, 'institution'): yield strip_empty_values(institutions.do(record)) elif _collection_in_record(record, 'experiment'): yield strip_empty_values(experiments.do(record)) elif _collection_in_record(record, 'journals'): yield strip_empty_values(journals.do(record)) elif _collection_in_record(record, 'hepnames'): yield strip_empty_values(hepnames.do(record)) elif _collection_in_record(record, 'job') or \ _collection_in_record(record, 'jobhidden'): yield strip_empty_values(jobs.do(record)) elif _collection_in_record(record, 'conferences'): yield strip_empty_values(conferences.do(record)) else: yield strip_empty_values(hep.do(record))
def convert_marcxml(source): """Convert MARC XML to JSON.""" from dojson.contrib.marc21 import marc21 from dojson.contrib.marc21.utils import create_record, split_blob for data in split_blob(source.read()): yield marc21.do(create_record(data))
def records(): """Load records.""" import pkg_resources from dojson.contrib.marc21 import marc21 from dojson.contrib.marc21.utils import create_record, split_blob from flask_login import login_user, logout_user from invenio_accounts.models import User from invenio_deposit.api import Deposit users = User.query.all() # pkg resources the demodata data_path = pkg_resources.resource_filename( 'invenio_records', 'data/marc21/bibliographic.xml') with open(data_path) as source: with current_app.test_request_context(): indexer = RecordIndexer() with db.session.begin_nested(): for index, data in enumerate(split_blob(source.read()), start=1): login_user(users[index % len(users)]) # do translate record = marc21.do(create_record(data)) # create record indexer.index(Deposit.create(record)) logout_user() db.session.commit()
def test_records_created(self): """Record - demo file how many records are created.""" xmltext = pkg_resources.resource_string( 'invenio.testsuite', os.path.join('data', 'demo_record_marc_data.xml')) recs = [record for record in split_blob(xmltext)] assert len(recs) == 142
def records(): """Load records.""" import pkg_resources import uuid from flask_login import login_user, logout_user from dojson.contrib.marc21 import marc21 from dojson.contrib.marc21.utils import create_record, split_blob from invenio_accounts.models import User from invenio_deposit.api import Deposit users = User.query.all() # pkg resources the demodata data_path = pkg_resources.resource_filename( 'invenio_records', 'data/marc21/bibliographic.xml' ) with open(data_path) as source: with current_app.test_request_context(): indexer = RecordIndexer() with db.session.begin_nested(): for index, data in enumerate(split_blob(source.read()), start=1): login_user(users[index % len(users)]) # do translate record = marc21.do(create_record(data)) # create record indexer.index(Deposit.create(record)) logout_user() db.session.commit()
def cds(temp, source): """CDS demo records.""" click.echo('Loading data it may take several minutes.') # pkg resources the demodata if not source: source = pkg_resources.resource_filename('cds.modules.fixtures', 'data/records.xml') files = _handle_source(source, temp) # Record indexer indexer = RecordIndexer() for f in files: with open(f) as source: # FIXME: Add some progress # with click.progressbar(data) as records: with db.session.begin_nested(): for index, data in enumerate(split_blob(source.read()), start=1): # create uuid rec_uuid = uuid.uuid4() # do translate record = marc21.do(create_record(data)) # create PID current_pidstore.minters['recid'](rec_uuid, record) # create record indexer.index(Record.create(record, id_=rec_uuid)) db.session.commit() click.echo('DONE :)')
def records(): """Load records.""" import pkg_resources import uuid from dojson.contrib.marc21 import marc21 from dojson.contrib.marc21.utils import create_record, split_blob from invenio_pidstore import current_pidstore from invenio_records.api import Record # pkg resources the demodata data_path = pkg_resources.resource_filename( 'invenio_records', 'data/marc21/bibliographic.xml' ) with open(data_path) as source: indexer = RecordIndexer() with db.session.begin_nested(): for index, data in enumerate(split_blob(source.read()), start=1): # create uuid rec_uuid = uuid.uuid4() # do translate record = marc21.do(create_record(data)) # create PID current_pidstore.minters['recid_minter']( rec_uuid, record ) # create record indexer.index(Record.create(record, id_=rec_uuid)) db.session.commit()
def records(): """Load records.""" import pkg_resources import uuid from dojson.contrib.marc21 import marc21 from dojson.contrib.marc21.utils import create_record, split_blob from invenio_pidstore import current_pidstore from invenio_records.api import Record # pkg resources the demodata data_path = pkg_resources.resource_filename( 'invenio_records', 'data/marc21/bibliographic.xml') with open(data_path) as source: indexer = RecordIndexer() with db.session.begin_nested(): for index, data in enumerate(split_blob(source.read()), start=1): # create uuid rec_uuid = uuid.uuid4() # do translate record = marc21.do(create_record(data)) # create PID current_pidstore.minters['recid'](rec_uuid, record) # create record indexer.index(Record.create(record, id_=rec_uuid)) db.session.commit()
def test_records_created(self): """Record - demo file how many records are created.""" xmlpath = os.path.join(os.path.dirname(__file__), 'data', 'demo_record_marc_data.xml') with open(xmlpath, 'r') as xmltext: recs = [record for record in split_blob(xmltext.read())] assert len(recs) == 142
def load(source): """Load MARC XML and return Python dict.""" for data in split_blob(source.read()): record = create_record(data) # if record.get('999__', {}).get('a', '') == 'ALBUM': # for rrecord in split_album(record): # yield rrecord yield record
def convert_marcxml(source): """Convert MARC XML to JSON.""" from dojson.contrib.marc21.utils import create_record, split_blob #from .book import book from .book import book for data in split_blob(source.read()): record = create_record(data) #if _collection_in_record(record, 'book'): # yield book.do(record) #else: # yield default.do(record) yield book.do(record)
def records(): """Load records.""" import pkg_resources from invenio_records.api import Record from dojson.contrib.marc21 import marc21 from dojson.contrib.marc21.utils import create_record, split_blob # pkg resources the demodata data_path = pkg_resources.resource_filename( 'invenio_records', 'data/marc21/bibliographic.xml' ) with open(data_path) as source: with db.session.begin_nested(): for data in split_blob(source.read()): Record.create(marc21.do(create_record(data)))
def test_error_catching(self): """ Record - catch any record conversion issues """ blob = """<?xml version="1.0" encoding="UTF-8"?> <collection> <record> <datafield tag="FFT" ind1=" " ind2=" "> <subfield code="a">/path/to</subfield> <subfield code="t">Test</subfield> </record> </collection> """ records = [create_record(r) for r in split_blob(blob)] assert len(records) == 1 assert 'FFT__' in records[0]
def convert_marcxml(source): """Convert MARC XML to JSON.""" from dojson.contrib.marc21.utils import create_record, split_blob # from .book import book from .book import book from .audio import audio for data in split_blob(source.read()): record = create_record(data) if _collection_in_record(record, 'book'): yield book.do(record) if _collection_in_record(record, 'audio'): yield audio.do(record) # else: # yield default.do(record) yield book.do(record)
def demo_records(app): """Create demo records.""" data_path = pkg_resources.resource_filename('cds.modules.fixtures', 'data/records.xml') with open(data_path) as source: indexer = RecordIndexer() with _db.session.begin_nested(): for index, data in enumerate(split_blob(source.read()), start=1): # create uuid rec_uuid = uuid.uuid4() # do translate record = marc21.do(create_record(data)) # create PID current_pidstore.minters['recid'](rec_uuid, record) # create record indexer.index(Record.create(record, id_=rec_uuid)) _db.session.commit() return data_path
def split_marcxml(source): """Split a MARCXML file using dojson MARC21 utils.""" from dojson.contrib.marc21.utils import split_blob return [data for data in split_blob(source.read())]