def load(collections, parent=None): """Create new collection.""" for data in collections or []: collection = Collection(name=data['name'], dbquery=data.get('dbquery'), parent=parent) db.session.add(collection) db.session.flush() load(data.get('children'), parent=collection)
def load_records(app, filename, schema, tries=5): """Try to index records.""" indexer = RecordIndexer() records = [] with app.app_context(): with mock.patch("invenio_records.api.Record.validate", return_value=None): data_filename = pkg_resources.resource_filename( "invenio_records", filename) records_data = load(data_filename) with db.session.begin_nested(): for item in records_data: record_id = uuid.uuid4() item_dict = dict(marc21.do(item)) item_dict["$schema"] = schema recid_minter(record_id, item_dict) oaiid_minter(record_id, item_dict) record = current_oaiserver.record_cls.create(item_dict, id_=record_id) indexer.index(record) records.append(record.id) db.session.commit() # Wait for indexer to finish for i in range(tries): response = current_search_client.search() if response["hits"]["total"] >= len(records): break current_search.flush_and_refresh("_all") return records
def records(): """Load demo records.""" from dojson.contrib.marc21.utils import load from dojson.contrib.marc21.model import marc21 from invenio_db import db from invenio_records import Record class NoCheckRecord(Record): """Skip record validation.""" def validate(self): """Ignore schema.""" return True schema = current_app.extensions['invenio-jsonschemas'].path_to_url( 'marc21/bibliographic/bd-v1.0.0.json') data = pkg_resources.resource_filename('cernopendata_fixtures', 'data') files = list(glob.glob(os.path.join(data, '*.xml'))) files += list(glob.glob(os.path.join(data, '*', '*.xml'))) for filename in files: with open(filename, 'rb') as source: for data in load(source): record = marc21.do(data) record['$schema'] = schema click.echo(NoCheckRecord.create(record).id) db.session.commit() db.session.expunge_all()
def load_records(app, filename, schema, tries=5): """Try to index records.""" indexer = RecordIndexer() records = [] with app.app_context(): with mock.patch('invenio_records.api.Record.validate', return_value=None): data_filename = pkg_resources.resource_filename( 'invenio_records', filename) records_data = load(data_filename) with db.session.begin_nested(): for item in records_data: record_id = uuid.uuid4() item_dict = dict(marc21.do(item)) item_dict['$schema'] = schema recid_minter(record_id, item_dict) oaiid_minter(record_id, item_dict) record = Record.create(item_dict, id_=record_id) indexer.index(record) records.append(record.id) db.session.commit() # Wait for indexer to finish for i in range(tries): response = current_search_client.search() if response['hits']['total'] >= len(records): break sleep(5) return records
def test_marc21_loader(): """Test MARC21 loader.""" COLLECTION = '<collection>{0}{1}</collection>'.format( RECORD, RECORD_SIMPLE) records = list(load(BytesIO(COLLECTION.encode('utf-8')))) assert len(records) == 2
def load_records(app, filename, schema, tries=5): """Try to index records.""" indexer = RecordIndexer() records = [] with app.app_context(): with mock.patch('invenio_records.api.Record.validate', return_value=None): data_filename = pkg_resources.resource_filename( 'invenio_records', filename) records_data = load(data_filename) with db.session.begin_nested(): for item in records_data: record_id = uuid.uuid4() item_dict = dict(marc21.do(item)) item_dict['$schema'] = schema recid_minter(record_id, item_dict) oaiid_minter(record_id, item_dict) record = Record.create(item_dict, id_=record_id) indexer.index(record) records.append(record.id) db.session.commit() # Wait for indexer to finish for i in range(tries): response = current_search_client.search() if response['hits']['total'] >= len(records): break current_search.flush_and_refresh('_all') return records
def import_records(dojson_model, schema, xmlfile): """Helper to import a MARCXML file for given schema.""" ids = [] from invenio_pidstore import current_pidstore with db.session.begin_nested(): with open(xmlfile, 'rb') as fp: for item in load(fp): # Transform MARCXML to JSON data = dojson_model.do(item) # TODO: Add schema once schema validation has been fixed. # Schema # data['$schema'] = schema # Create a UUID for the record id_ = uuid.uuid4() # FIXME: Strip off control number otherwise minter will fail. if 'control_number' in data: del data['control_number'] # Mint a recid and OAI id. pid = current_pidstore.minters['recid'](id_, data) current_pidstore.minters['oaiid'](id_, data) # Store record. record = Record.create(data, id_=id_) click.echo('Created record {}'.format(pid.pid_value)) ids.append(id_) return ids
def test_xslt_not_found(): """Test xslt not found.""" runner = CliRunner() with runner.isolated_filesystem(): with open('record.xml', 'wb') as f: f.write(RECORD_SIMPLE.encode('utf-8')) data = list(load('record.xml')) pytest.raises(IOError, dumps, data, xslt_filename='file_not_exist')
def test_xslt_dump(): """Test xslt dump.""" path = os.path.dirname(__file__) with open("{0}/demo_marc21_to_dc.converted.xml".format(path)) as myfile: expect = myfile.read() data = list(load("{0}/demo_marc21_to_dc.xml".format(path))) output = dumps(data, xslt_filename="{0}/demo_marc21_to_dc.xslt".format(path)) assert output.decode("utf-8") == expect
def test_marc21_loader(): """Test MARC21 loader.""" COLLECTION = '<collection>{0}{1}</collection>'.format( RECORD, RECORD_SIMPLE ) records = list(load(BytesIO(COLLECTION.encode('utf-8')))) assert len(records) == 2
def test_xslt_not_found(): """Test xslt not found.""" runner = CliRunner() with runner.isolated_filesystem(): with open("record.xml", "wb") as f: f.write(RECORD_SIMPLE.encode("utf-8")) data = list(load("record.xml")) pytest.raises(IOError, dumps, data, xslt_filename="file_not_exist")
def test_entry_points(): """Test entry points.""" dump = list(pkg_resources.iter_entry_points("dojson.cli.dump", "marcxml"))[0].load() path = os.path.dirname(__file__) with open("{0}/demo_marc21_to_dc.converted.xml".format(path)) as myfile: expect = myfile.read() data = list(load("{0}/demo_marc21_to_dc.xml".format(path))) output = dump(data, xslt_filename="{0}/demo_marc21_to_dc.xslt".format(path)) assert output.decode("utf-8") == expect
def collections(): """Load default collections.""" from invenio_db import db from invenio_collections.models import Collection from .fixtures import COLLECTIONS def load(collections, parent=None): """Create new collection.""" for data in collections or []: collection = Collection(name=data['name'], dbquery=data.get('dbquery'), parent=parent) db.session.add(collection) db.session.flush() load(data.get('children'), parent=collection) load(COLLECTIONS) db.session.commit()
def test_xslt_dump(): """Test xslt dump.""" path = os.path.dirname(__file__) with open('{0}/demo_marc21_to_dc.converted.xml'.format(path)) as myfile: expect = myfile.read() data = list(load('{0}/demo_marc21_to_dc.xml'.format(path))) output = dumps( data, xslt_filename='{0}/demo_marc21_to_dc.xslt'.format(path) ) assert output.decode('utf-8') == expect
def test_output_type_from_dumps_etree(): """Test output type from dumps_etree.""" path = os.path.dirname(__file__) data = list(load("{0}/demo_marc21_to_dc.xml".format(path))) # test without arguments output1 = dumps_etree(data) # test with xslt_filename argument output2 = dumps_etree(data, xslt_filename="{0}/demo_marc21_to_dc.xslt".format(path)) # it should not generate a TypeError exception assert isinstance(output1, _Element) assert isinstance(output2, _Element)
def test_entry_points(): """Test entry points.""" dump = list(pkg_resources.iter_entry_points('dojson.cli.dump', 'marcxml'))[0].load() path = os.path.dirname(__file__) with open("{0}/demo_marc21_to_dc.converted.xml".format(path)) as myfile: expect = myfile.read().replace('\n', '') data = list(load('{0}/demo_marc21_to_dc.xml'.format(path))) output = dump(data, xslt_filename='{0}/demo_marc21_to_dc.xslt'.format(path)) assert output.decode('utf-8') == expect
def test_marc21_loader(): """Test MARC21 loader.""" from six import BytesIO from dojson.contrib.marc21.utils import load COLLECTION = '<collection>{0}{1}</collection>'.format( RECORD, RECORD_SIMPLE ) records = list(load(BytesIO(COLLECTION.encode('utf-8')))) assert len(records) == 2
def test_entry_points(): """Test entry points.""" dump = list(pkg_resources.iter_entry_points( 'dojson.cli.dump', 'marcxml' ))[0].load() path = os.path.dirname(__file__) with open("{0}/demo_marc21_to_dc.converted.xml".format(path)) as myfile: expect = myfile.read() data = list(load('{0}/demo_marc21_to_dc.xml'.format(path))) output = dump(data, xslt_filename='{0}/demo_marc21_to_dc.xslt'.format(path)) assert output.decode('utf-8') == expect
def test_output_type_from_dumps_etree(): """Test output type from dumps_etree.""" path = os.path.dirname(__file__) data = list(load('{0}/demo_marc21_to_dc.xml'.format(path))) # test without arguments output1 = dumps_etree(data) # test with xslt_filename argument output2 = dumps_etree( data, xslt_filename='{0}/demo_marc21_to_dc.xslt'.format(path) ) # it should not generate a TypeError exception assert isinstance(output1, _Element) assert isinstance(output2, _Element)
def load_records(es_app, filename, schema): """Try to index records.""" indexer = RecordIndexer() with es_app.test_request_context(): data_filename = pkg_resources.resource_filename("invenio_records", filename) records_data = load(data_filename) records = [] for item in records_data: item_dict = dict(marc21.do(item)) item_dict["$schema"] = schema record = Record.create(item_dict) records.append(record) db.session.commit() es_records = [] for record in records: es_records.append(indexer.index(record)) for record in es_records: search.client.get(index=record["_index"], doc_type=record["_type"], id=record["_id"])
def test_bibliographic_data(es_app): """Test indexation using bibliographic data.""" search = InvenioSearch(es_app) search.create() indexer = RecordIndexer() with es_app.test_request_context(): data_filename = pkg_resources.resource_filename( 'invenio_records', 'data/marc21/bibliographic.xml') records_data = load(data_filename) records = [] for item in records_data: record = Record.create(item) record['$schema'] = "mappings/marc21_holdings.json" es_record = indexer.index(record) records.append(es_record) for record in records: search.client.get(index=record['_index'], doc_type=record['_type'], id=record['_id']) search.delete()
def load_records(es_app, filename, schema): """Try to index records.""" indexer = RecordIndexer() with es_app.test_request_context(): data_filename = pkg_resources.resource_filename( 'invenio_records', filename) records_data = load(data_filename) records = [] for item in records_data: item_dict = dict(marc21.do(item)) item_dict['$schema'] = schema record = Record.create(item_dict) records.append(record) db.session.commit() es_records = [] for record in records: es_records.append(indexer.index(record)) for record in es_records: search.client.get(index=record['_index'], doc_type=record['_type'], id=record['_id'])
def load_records(es_app, filename, schema): """Try to index records.""" indexer = RecordIndexer() with es_app.test_request_context(): data_filename = pkg_resources.resource_filename( 'invenio_records', filename) records_data = load(data_filename) records = [] for item in records_data: item_dict = dict(marc21.do(item)) item_dict['$schema'] = schema record = Record.create(item_dict) records.append(record) db.session.commit() es_records = [] for record in records: es_records.append(indexer.index(record)) from invenio_search import current_search for record in es_records: current_search.client.get(index=record['_index'], doc_type=record['_type'], id=record['_id'])
def load_records(es_app, filename, schema): """Try to index records.""" indexer = RecordIndexer() with es_app.test_request_context(): data_filename = pkg_resources.resource_filename("invenio_records", filename) records_data = load(data_filename) records = [] for item in records_data: item_dict = dict(marc21.do(item)) item_dict["$schema"] = schema record = Record.create(item_dict) records.append(record) db.session.commit() es_records = [] for record in records: es_records.append(indexer.index(record)) from invenio_search import current_search for record in es_records: current_search.client.get( index=record["_index"], doc_type=record["_type"], id=record["_id"] )