def create_fake_record(bulk_size, fake): """Create records for demo purposes.""" records_bulk = [] start = timeit.default_timer() for _ in range(bulk_size): # Create fake record metadata record_data = { "contributors": [{ "name": fake.name() }], "description": fake.bs(), "title": fake.company() + "'s dataset", } # Create record in DB rec_uuid = uuid.uuid4() current_pidstore.minters["recid"](rec_uuid, record_data) Record.create(record_data, id_=rec_uuid) # Add record for bulk indexing records_bulk.append(rec_uuid) # Flush to index and database db.session.commit() click.secho(f"Writing {bulk_size} records to the database", fg="green") # Bulk index records ri = RecordIndexer() ri.bulk_index(records_bulk) current_search.flush_and_refresh(index="records") click.secho(f"Sending {bulk_size} records to be indexed", fg="green") stop = timeit.default_timer() click.secho(f"Creating {bulk_size} records took {stop - start}.", fg="green")
def glossary_terms(): """Load demo terms records.""" from invenio_db import db from invenio_records import Record from invenio_indexer.api import RecordIndexer from cernopendata.modules.records.minters.termid import \ cernopendata_termid_minter indexer = RecordIndexer() schema = current_app.extensions['invenio-jsonschemas'].path_to_url( 'records/glossary-term-v1.0.0.json') data = pkg_resources.resource_filename('cernopendata', 'modules/fixtures/data') glossary_terms_json = glob.glob(os.path.join(data, 'terms', '*.json')) for filename in glossary_terms_json: click.echo('Loading glossary-terms from {0} ...'.format(filename)) with open(filename, 'rb') as source: for data in json.load(source): if "collections" not in data and \ not isinstance( data.get("collections", None), basestring): data["collections"] = [] data["collections"].append({"primary": "Terms"}) id = uuid.uuid4() cernopendata_termid_minter(id, data) data['$schema'] = schema record = Record.create(data, id_=id) db.session.commit() indexer.index(record) db.session.expunge_all()
def store_record(obj, eng): """Stores record in database""" if 'Italiana di Fisica'.lower( ) in obj.data['abstracts'][0]['source'].lower(): obj.data['abstracts'][0]['source'] = 'Springer/SIF' if 'Italiana di Fisica'.lower( ) in obj.data['acquisition_source']['source'].lower(): obj.data['acquisition_source']['source'] = 'Springer/SIF' obj.data['record_creation_year'] = parse_date( obj.data['record_creation_date']).year try: record = Record.create(obj.data, id_=None) # Create persistent identifier. pid = scoap3_recid_minter(str(record.id), record) obj.save() record.commit() # Commit to DB before indexing db.session.commit() obj.data['control_number'] = record['control_number'] obj.save() # Index record indexer = RecordIndexer() indexer.index_by_id(pid.object_uuid) except ValidationError as err: __halt_and_notify("Validation error: %s. Skipping..." % (err, ), obj, eng) except PIDAlreadyExists: __halt_and_notify("Record with this id already in DB", obj, eng)
def test_record_get_bucket_with_no_bucket(app, db, location): """Test retrival of the bucket when no bucket is associated.""" record = Record.create({'title': 'test'}, with_bucket=False) db.session.commit() record = Record.get_record(record.id) assert record.bucket is None assert record.files is None
def test_record_create_no_bucket(app, db, location): """Test record creation without bucket creation.""" record = Record.create({}, with_bucket=False) db.session.commit() assert record.files is None assert '_bucket' not in record assert '_files' not in record
def test_record_create_files(app, db, location): """Test record creation with bucket and files.""" record = Record.create({'title': 'test'}) record.files['hello.txt'] = BytesIO(b'Hello world!') db.session.commit() assert record['_bucket'] == record.bucket_id assert record['_files']
def test_transfer_cp(db): """Test factories.transfer_cp function.""" # first we create a record recid = uuid.uuid4() PersistentIdentifier.create( 'recid', '1337', object_type='rec', object_uuid=recid, status=PIDStatus.REGISTERED) record = Record.create({'title': 'record test'}, recid) # we setup a file storage tmppath = tempfile.mkdtemp() db.session.add(Location(name='default', uri=tmppath, default=True)) db.session.commit() # we add a file to the record bucket = Bucket.create() content = b'Aaah! A headcrab!!!\n' record_buckets = RecordsBuckets.create(record=record.model, bucket=bucket) record.files['crab.txt'] = BytesIO(content) # test! rec_dir = join(tmppath, create_accessioned_id('1337', 'recid')) factories.transfer_cp(record.id, tmppath) assert isdir(rec_dir) assert isfile(join(rec_dir, 'crab.txt')) with open(join(rec_dir, 'crab.txt'), "r") as f: assert f.read() == content # finalization rmtree(tmppath)
def record(app, db): """Create a record.""" record = {'title': 'fuu'} record = Record.create(record) record.commit() db.session.commit() return record
def glossary_terms(): """Load demo terms records.""" from invenio_db import db from invenio_records import Record from invenio_indexer.api import RecordIndexer from cernopendata.modules.records.minters.termid import \ cernopendata_termid_minter indexer = RecordIndexer() schema = current_app.extensions['invenio-jsonschemas'].path_to_url( 'records/glossary-term-v1.0.0.json' ) data = pkg_resources.resource_filename('cernopendata', 'modules/fixtures/data') glossary_terms_json = glob.glob(os.path.join(data, 'terms', '*.json')) for filename in glossary_terms_json: click.echo('Loading glossary-terms from {0} ...'.format(filename)) with open(filename, 'rb') as source: for data in json.load(source): if "collections" not in data and \ not isinstance( data.get("collections", None), basestring): data["collections"] = [] data["collections"].append({"primary": "Terms"}) id = uuid.uuid4() cernopendata_termid_minter(id, data) data['$schema'] = schema record = Record.create(data, id_=id) db.session.commit() indexer.index(record) db.session.expunge_all()
def create_object(bucket, record_dict): """Object creation inside the bucket using the file and its content.""" rec_uuid = uuid4() provider = RecordIdProvider.create(object_type='rec', object_uuid=rec_uuid) files_meta, num_of_iiif_valid_files = generate_files_metadata( bucket, record_dict['_files']) # If there are any iiif valid image files, iiif manifest api is added on # record metadata. iiif_manifest_url = '' if num_of_iiif_valid_files > 0: iiif_manifest_url = '/record/{0}/iiif/manifest.json'.format( provider.pid.pid_value) deposit_dict = record_dict['_deposit'] deposit_dict['iiif_manifest'] = iiif_manifest_url data = { 'pid_value': provider.pid.pid_value, '_deposit': deposit_dict, '_files': files_meta, } # from invenio_records_files.api import Record as RecordFile record = RecordFile.create(data, id_=rec_uuid) # connect to record and bucket db.session.add(RecordsBuckets( record_id=record.id, bucket_id=bucket.id, )) db.session.commit()
def datasets(skip_files): """Load demo datasets records.""" from invenio_db import db from invenio_records_files.api import Record from invenio_indexer.api import RecordIndexer from cernopendata.modules.records.minters.recid import \ cernopendata_recid_minter from cernopendata.modules.records.minters.datasetid import \ cernopendata_datasetid_minter from invenio_files_rest.models import \ Bucket, FileInstance, ObjectVersion from invenio_records_files.models import RecordsBuckets indexer = RecordIndexer() schema = current_app.extensions['invenio-jsonschemas'].path_to_url( 'records/datasets-v1.0.0.json') data = pkg_resources.resource_filename('cernopendata', 'modules/fixtures/data/datasets') datasets_json = glob.glob(os.path.join(data, '*.json')) # FIXME: change the treatment of `files` according to `records` fixtures. for filename in datasets_json: click.echo('Loading datasets from {0} ...'.format(filename)) with open(filename, 'rb') as source: for data in json.load(source): files = data.pop('files', []) id = uuid.uuid4() # (TOFIX) Remove if statement in production # as every dataset record should have a doi if data.get('doi', None): cernopendata_datasetid_minter(id, data) else: cernopendata_recid_minter(id, data) data['$schema'] = schema record = Record.create(data, id_=id) bucket = Bucket.create() RecordsBuckets.create(record=record.model, bucket=bucket) for file in files: if skip_files: break assert 'uri' in file assert 'size' in file assert 'checksum' in file f = FileInstance.create() filename = file.get("uri").split('/')[-1:][0] f.set_uri(file.get("uri"), file.get("size"), file.get("checksum")) ObjectVersion.create(bucket, filename, _file_id=f.id) db.session.commit() indexer.index(record) db.session.expunge_all()
def create_doc(data, schema): """Creates a new doc record.""" from invenio_records import Record id = uuid.uuid4() cernopendata_docid_minter(id, data) data['$schema'] = schema record = Record.create(data, id_=id) return record
def minted_record(app, db): """Create a test record.""" data = {'title': 'fuu'} with db.session.begin_nested(): rec_uuid = uuid.uuid4() pid = current_pidstore.minters['recid'](rec_uuid, data) record = Record.create(data, id_=rec_uuid) return pid, record
def data_policies(skip_files): """Load demo Data Policy records.""" from invenio_db import db from invenio_indexer.api import RecordIndexer from cernopendata.modules.records.minters.recid import \ cernopendata_recid_minter from invenio_files_rest.models import \ Bucket, FileInstance, ObjectVersion from invenio_records_files.models import RecordsBuckets from invenio_records_files.api import Record from invenio_records.models import RecordMetadata indexer = RecordIndexer() schema = current_app.extensions['invenio-jsonschemas'].path_to_url( 'records/data-policies-v1.0.0.json' ) data = pkg_resources.resource_filename('cernopendata', 'modules/fixtures/data') data_policies_json = glob.glob(os.path.join(data, '*.json')) for filename in data_policies_json: click.echo('Loading data-policies from {0} ...'.format(filename)) with open(filename, 'rb') as source: for data in json.load(source): files = data.pop('files', []) id = uuid.uuid4() cernopendata_recid_minter(id, data) data['$schema'] = schema record = Record.create(data, id_=id) bucket = Bucket.create() RecordsBuckets.create( record=record.model, bucket=bucket) for file in files: if skip_files: break assert 'uri' in file assert 'size' in file assert 'checksum' in file f = FileInstance.create() filename = file.get("uri").split('/')[-1:][0] f.set_uri(file.get("uri"), file.get( "size"), file.get("checksum")) ObjectVersion.create( bucket, filename, _file_id=f.id ) db.session.commit() indexer.index(record) db.session.expunge_all()
def test_file_download_ui(app, objects): """Test get buckets.""" app.config.update(dict( FILES_REST_PERMISSION_FACTORY=lambda *a, **kw: type( 'Allow', (object, ), {'can': lambda self: True} )(), RECORDS_UI_DEFAULT_PERMISSION_FACTORY=None, # No permission checking RECORDS_UI_ENDPOINTS=dict( recid=dict( pid_type='recid', route='/records/<pid_value>', ), recid_files=dict( pid_type='recid', route='/records/<pid_value>/files/<filename>', view_imp='invenio_records_files.utils:file_download_ui', record_class='invenio_records_files.api:Record', ), ) )) InvenioRecordsUI(app) obj1 = objects[0] with app.test_request_context(): # Record 1 - Live record rec_uuid = uuid.uuid4() PersistentIdentifier.create( 'recid', '1', object_type='rec', object_uuid=rec_uuid, status=PIDStatus.REGISTERED) record = Record.create({ 'title': 'Registered', 'recid': 1, '_files': [ {'key': obj1.key, 'bucket': str(obj1.bucket_id), 'checksum': 'invalid'}, ] }, id_=rec_uuid) RecordsBuckets.create(record=record.model, bucket=obj1.bucket) db.session.commit() main_url = url_for('invenio_records_ui.recid', pid_value='1') file_url = url_for( 'invenio_records_ui.recid_files', pid_value='1', filename=obj1.key) no_file_url = url_for( 'invenio_records_ui.recid_files', pid_value='1', filename='') invalid_file_url = url_for( 'invenio_records_ui.recid_files', pid_value='1', filename='no') with app.test_client() as client: res = client.get(main_url) assert res.status_code == 200 res = client.get(file_url) assert res.status_code == 200 res = client.get(no_file_url) assert res.status_code == 404 res = client.get(invalid_file_url) assert res.status_code == 404
def record_with_bucket(full_record, bucket, db): """Create a bucket.""" record = RecordFile.create(full_record) RecordsBuckets.create(bucket=bucket, record=record.model) pid = PersistentIdentifier.create( pid_type='recid', pid_value=12345, object_type='rec', object_uuid=record.id, status='R') db.session.commit() return pid, record
def record(app, db): """Create a record.""" record = { 'title': 'fuu' } record = Record.create(record) record.commit() db.session.commit() return record
def create_record(schema, data, files, skip_files): """Creates a new record.""" id = uuid.uuid4() cernopendata_recid_minter(id, data) data['$schema'] = schema record = Record.create(data, id_=id, with_bucket=not skip_files) if not skip_files: handle_record_files(data, record.bucket, files, skip_files) return record
def test_RecordSIP_create(db, mocker): """Test create method from the API class RecordSIP.""" # we setup a file storage tmppath = tempfile.mkdtemp() db.session.add(Location(name='default', uri=tmppath, default=True)) # setup metadata mtype = SIPMetadataType(title='JSON Test', name='json-test', format='json', schema='url://to/schema') db.session.add(mtype) db.session.commit() # first we create a record recid = uuid.uuid4() pid = PersistentIdentifier.create( 'recid', '1337', object_type='rec', object_uuid=recid, status=PIDStatus.REGISTERED) mocker.patch('invenio_records.api.RecordBase.validate', return_value=True, autospec=True) record = Record.create( {'title': 'record test', '$schema': 'url://to/schema'}, recid) # we add a file to the record bucket = Bucket.create() content = b'Test file\n' RecordsBuckets.create(record=record.model, bucket=bucket) record.files['test.txt'] = BytesIO(content) db.session.commit() # Let's create a SIP user = create_test_user('*****@*****.**') agent = {'email': '*****@*****.**', 'ip_address': '1.1.1.1'} rsip = RecordSIP.create(pid, record, True, user_id=user.id, agent=agent) db.session.commit() # test! assert RecordSIP_.query.count() == 1 assert SIP_.query.count() == 1 assert SIPFile.query.count() == 1 assert SIPMetadata.query.count() == 1 assert len(rsip.sip.files) == 1 assert len(rsip.sip.metadata) == 1 metadata = rsip.sip.metadata[0] assert metadata.type.format == 'json' assert '"title": "record test"' in metadata.content assert rsip.sip.archivable is True # we try with no files rsip = RecordSIP.create(pid, record, True, create_sip_files=False, user_id=user.id, agent=agent) assert SIPFile.query.count() == 1 assert SIPMetadata.query.count() == 2 assert len(rsip.sip.files) == 0 assert len(rsip.sip.metadata) == 1 # finalization rmtree(tmppath)
def record(db): """Record fixture.""" rec_uuid = uuid.uuid4() provider = RecordIdProvider.create( object_type='rec', object_uuid=rec_uuid) record = Record.create({ 'control_number': provider.pid.pid_value, 'title': 'TestDefault', }, id_=rec_uuid) db.session.commit() return record
def publish(self, pid=None, id_=None): """Publish a deposit.""" pid = pid or self.pid if not pid.is_registered(): raise PIDInvalidAction() self['_deposit']['status'] = 'published' if self['_deposit'].get('pid') is None: # First publishing minter = current_pidstore.minters[ current_app.config['DEPOSIT_PID_MINTER'] ] id_ = id_ or uuid.uuid4() record_pid = minter(id_, self) self['_deposit']['pid'] = { 'type': record_pid.pid_type, 'value': record_pid.pid_value, 'revision_id': 0, } data = dict(self.dumps()) data['$schema'] = self.record_schema # During first publishing create snapshot the bucket. @contextmanager def process_files(data): """Process deposit files.""" if self.files and self.files.bucket: assert not self.files.bucket.locked self.files.bucket.locked = True snapshot = self.files.bucket.snapshot(lock=True) data['_files'] = self.files.dumps(bucket=snapshot.id) yield data db.session.add(RecordsBuckets( record_id=id_, bucket_id=snapshot.id )) else: yield data with process_files(data) as data: record = Record.create(data, id_=id_) else: # Update after edit record_pid, record = self.fetch_published() # TODO add support for patching assert record.revision_id == self['_deposit']['pid']['revision_id'] data = dict(self.dumps()) data['$schema'] = self.record_schema record = record.__class__(data, model=record.model) record.commit() self.commit() return self
def record_with_bucket(full_record, bucket, db): """Create a bucket.""" record = RecordFile.create(full_record) RecordsBuckets.create(bucket=bucket, record=record.model) pid = PersistentIdentifier.create(pid_type='recid', pid_value=1, object_type='rec', object_uuid=record.id, status='R') db.session.commit() return pid, record
def create_record(schema, data, files, skip_files): """Creates a new record.""" id = uuid.uuid4() cernopendata_recid_minter(id, data) data['$schema'] = schema record = Record.create(data, id_=id) if not skip_files: bucket = Bucket.create() handle_record_files(data, bucket, files, skip_files) RecordsBuckets.create(record=record.model, bucket=bucket) return record
def create_record(schema, data, files, skip_files): """Creates a new record.""" id = uuid.uuid4() cernopendata_recid_minter(id, data) data['$schema'] = schema record = Record.create(data, id_=id) if not skip_files: bucket = Bucket.create() handle_record_files(data, bucket, files, skip_files) RecordsBuckets.create( record=record.model, bucket=bucket) return record
def _create_deposit(cls, record): """Create a deposit from the record.""" logging.debug('Create deposit') data = deepcopy(record) cls._resolve_schema(deposit=data, record=record) deposit = Record.create(data, validator=PartialDraft4Validator) cls._resolve_deposit(deposit=deposit, record=record) cls._resolve_bucket(deposit=deposit, record=record) cls._resolve_files(deposit=deposit, record=record) # generate files list cls._resolve_dumps(record=record) # db.session.commit() return record, deposit
def test_filesmixin(app, db, location, record): """Test bucket creation and assignment.""" class CustomFilesMixin(FilesMixin): def _create_bucket(self): return Bucket.create() class CustomRecord(Record, CustomFilesMixin): pass record = CustomRecord.create({}) assert record.files is not None record = Record.create({}) assert record.files is None
def draft_record(app, db, prepare_es, s3_location): """Testing draft-enabled record.""" draft_uuid = uuid.uuid4() data = { 'title': 'blah', # '$schema': TestRecord.PREFERRED_SCHEMA, 'id': '1' } PersistentIdentifier.create( pid_type='drecid', pid_value='1', status=PIDStatus.REGISTERED, object_type='rec', object_uuid=draft_uuid ) rec = Record.create(data, id_=draft_uuid) return rec
def software(skip_files): """Load demo software records.""" from invenio_db import db from invenio_records_files.api import Record from invenio_indexer.api import RecordIndexer from cernopendata.modules.records.minters.softid import \ cernopendata_softid_minter from invenio_files_rest.models import \ Bucket, FileInstance, ObjectVersion from invenio_records_files.models import RecordsBuckets indexer = RecordIndexer() schema = current_app.extensions['invenio-jsonschemas'].path_to_url( 'records/software-v1.0.0.json') data = pkg_resources.resource_filename('cernopendata', 'modules/fixtures/data/software') software_json = glob.glob(os.path.join(data, '*.json')) # FIXME: change the treatment of `files` according to `records` fixtures. for filename in software_json: with open(filename, 'rb') as source: for data in json.load(source): files = data.pop('files', []) id = uuid.uuid4() cernopendata_softid_minter(id, data) record = Record.create(data, id_=id) record['$schema'] = schema bucket = Bucket.create() RecordsBuckets.create(record=record.model, bucket=bucket) for file in files: if skip_files: break assert 'uri' in file assert 'size' in file assert 'checksum' in file f = FileInstance.create() filename = file.get("uri").split('/')[-1:][0] f.set_uri(file.get("uri"), file.get("size"), file.get("checksum")) ObjectVersion.create(bucket, filename, _file_id=f.id) db.session.commit() indexer.index(record) db.session.expunge_all()
def docs(): """Load demo article records.""" from invenio_db import db from invenio_records import Record from invenio_indexer.api import RecordIndexer from cernopendata.modules.records.minters.docid import \ cernopendata_docid_minter indexer = RecordIndexer() schema = current_app.extensions['invenio-jsonschemas'].path_to_url( 'records/docs-v1.0.0.json') data = pkg_resources.resource_filename('cernopendata', 'modules/fixtures/data/docs') articles_json = get_jsons_from_dir(data) for filename in articles_json: name = filename.split('/')[-1] if name.startswith('opera'): click.echo('Skipping opera records ...') continue with open(filename, 'rb') as source: for data in json.load(source): # Replace body with responding content assert data["body"]["content"] content_filename = os.path.join(*([ "/", ] + filename.split('/')[:-1] + [ data["body"]["content"], ])) with open(content_filename) as body_field: data["body"]["content"] = body_field.read() if "collections" not in data and \ not isinstance( data.get("collections", None), basestring): data["collections"] = [] id = uuid.uuid4() cernopendata_docid_minter(id, data) record = Record.create(data, id_=id) record['$schema'] = schema db.session.commit() indexer.index(record) db.session.expunge_all()
def test_files_protection(app, db, location): """Test record files property protection.""" record = Record.create({}) bucket = record.files.bucket assert bucket # Create first file: record.files['hello.txt'] = BytesIO(b'Hello world!') file_0 = record.files['hello.txt'] assert 'hello.txt' == file_0['key'] assert 1 == len(record.files) # Lock bucket. bucket.locked = True assert record.files.bucket.locked with pytest.raises(InvalidOperationError): del record.files['hello.txt']
def fixtures(): """Command for working with test data.""" temp_path = os.path.join(os.path.dirname(__file__), 'instance/temp') demo_files_path = os.path.join(os.path.dirname(__file__), 'demo_files') # Create location loc = Location(name='local', uri=temp_path, default=True) db.session.add(loc) db.session.commit() # Example files from the data folder demo_files = ( 'markdown.md', 'csvfile.csv', 'zipfile.zip', 'jsonfile.json', 'xmlfile.xml', 'notebook.ipynb', 'pdffile.pdf', 'jpgfile.jpg', 'pngfile.png', 'pdffile.pdf', ) rec_uuid = uuid4() provider = RecordIdProvider.create(object_type='rec', object_uuid=rec_uuid) data = { 'pid_value': provider.pid.pid_value, } record = Record.create(data, id_=rec_uuid) bucket = Bucket.create() RecordsBuckets.create(record=record.model, bucket=bucket) # Add files to the record for f in demo_files: with open(os.path.join(demo_files_path, f), 'rb') as fp: record.files[f] = fp record.files.flush() record.commit() db.session.commit()
def test_RecordSIP(db): """Test RecordSIP API class.""" user = create_test_user('*****@*****.**') agent = {'email': '*****@*****.**', 'ip_address': '1.1.1.1'} # we create a record recid = uuid.uuid4() pid = PersistentIdentifier.create('recid', '1337', object_type='rec', object_uuid=recid, status=PIDStatus.REGISTERED) title = {'title': 'record test'} record = Record.create(title, recid) # we create the models sip = SIP.create(True, user_id=user.id, agent=agent) recordsip = RecordSIP_(sip_id=sip.id, pid_id=pid.id) db.session.commit() # We create an API SIP on top of it api_recordsip = RecordSIP(recordsip, sip) assert api_recordsip.model is recordsip assert api_recordsip.sip.id == sip.id
def test_RecordSIP(db): """Test RecordSIP API class.""" user = create_test_user('*****@*****.**') agent = {'email': '*****@*****.**', 'ip_address': '1.1.1.1'} # we create a record recid = uuid.uuid4() pid = PersistentIdentifier.create( 'recid', '1337', object_type='rec', object_uuid=recid, status=PIDStatus.REGISTERED) title = {'title': 'record test'} record = Record.create(title, recid) # we create the models sip = SIP.create(True, user_id=user.id, agent=agent) recordsip = RecordSIP_(sip_id=sip.id, pid_id=pid.id) db.session.commit() # We create an API SIP on top of it api_recordsip = RecordSIP(recordsip, sip) assert api_recordsip.model is recordsip assert api_recordsip.sip.id == sip.id
def store_record(obj, eng): """Stores record in database""" set_springer_source_if_needed(obj) try: record = Record.create(obj.data, id_=None) # Create persistent identifier. scoap3_recid_minter(str(record.id), record) obj.save() record.commit() # Commit to DB before indexing db.session.commit() obj.data['control_number'] = record['control_number'] obj.save() except ValidationError as err: __halt_and_notify("Validation error: %s." % err, eng) except PIDAlreadyExists: __halt_and_notify("Record with this id already in DB", eng)
def create_record(cls, data, file_content): """Create a record. :param dict data: The record data. :param file_content: The file to store. """ with db.session.begin_nested(): # create uuid rec_uuid = uuid.uuid4() # create PID current_pidstore.minters['recid'](rec_uuid, data) # create record and the associated bucket created_record = Record.create(data, id_=rec_uuid) # index the record RecordIndexer().index(created_record) # store the file and link it to the metadata created_record.files[file_content.filename] = file_content db.session.commit() current_app.logger.info("Created file= " + created_record['title'] + ", by user= " + current_user.email)
def record(database, location, image_path): """Test record.""" rec_uuid = uuid.uuid4() pid1 = PersistentIdentifier.create( 'recid', '1', object_type='rec', object_uuid=rec_uuid, status=PIDStatus.REGISTERED, ) rec = Record.create( { 'id': 1, 'title': 'Lorem ipsum', 'description': 'Lorem ipsum dolor sit amet', }, id_=rec_uuid, ) with open(image_path, 'rb') as fp: rec.files['image-public-domain.jpg'] = fp database.session.commit() return rec
def create_record(schema, data, files, skip_files): """Creates a new record.""" bucket = Bucket.create() for file in files: if skip_files: break assert 'uri' in file assert 'size' in file assert 'checksum' in file try: f = FileInstance.create() filename = file.get("uri").split('/')[-1:][0] f.set_uri(file.get("uri"), file.get("size"), file.get("checksum")) obj = ObjectVersion.create(bucket, filename, _file_id=f.id) file.update({ 'bucket': str(obj.bucket_id), 'checksum': obj.file.checksum, 'key': obj.key, 'version_id': str(obj.version_id), }) except Exception as e: click.echo('Recid {0} file {1} could not be loaded due ' 'to {2}.'.format(data.get('recid'), filename, str(e))) continue id = uuid.uuid4() cernopendata_recid_minter(id, data) record = Record.create(data, id_=id) record['$schema'] = schema RecordsBuckets.create(record=record.model, bucket=bucket) return record
def store_record(obj, eng): """Stores record in database""" set_springer_source_if_needed(obj) obj.data['record_creation_year'] = parse_date(obj.data['record_creation_date']).year try: record = Record.create(obj.data, id_=None) # Create persistent identifier. scoap3_recid_minter(str(record.id), record) obj.save() record.commit() # Commit to DB before indexing db.session.commit() obj.data['control_number'] = record['control_number'] obj.save() except ValidationError as err: __halt_and_notify("Validation error: %s." % err, eng) except PIDAlreadyExists: __halt_and_notify("Record with this id already in DB", eng)
def test_files_property(app, db, location, bucket): """Test record files property.""" with pytest.raises(MissingModelError): Record({}).files record = Record.create({}) RecordsBuckets.create(bucket=bucket, record=record.model) assert 0 == len(record.files) assert 'invalid' not in record.files # make sure that _files key is not added after accessing record.files assert '_files' not in record with pytest.raises(KeyError): record.files['invalid'] bucket = record.files.bucket assert bucket # Create first file: record.files['hello.txt'] = BytesIO(b'Hello world!') file_0 = record.files['hello.txt'] assert 'hello.txt' == file_0['key'] assert 1 == len(record.files) assert 1 == len(record['_files']) # Update first file with new content: record.files['hello.txt'] = BytesIO(b'Hola mundo!') file_1 = record.files['hello.txt'] assert 'hello.txt' == file_1['key'] assert 1 == len(record.files) assert 1 == len(record['_files']) assert file_0['version_id'] != file_1['version_id'] # Create second file and check number of items in files. record.files['second.txt'] = BytesIO(b'Second file.') record.files['second.txt'] assert 2 == len(record.files) assert 'hello.txt' in record.files assert 'second.txt' in record.files # Check order of files. order_0 = [f['key'] for f in record.files] assert ['hello.txt', 'second.txt'] == order_0 record.files.sort_by(*reversed(order_0)) order_1 = [f['key'] for f in record.files] assert ['second.txt', 'hello.txt'] == order_1 # Try to rename second file to 'hello.txt'. with pytest.raises(Exception): record.files.rename('second.txt', 'hello.txt') # Remove the 'hello.txt' file. del record.files['hello.txt'] assert 'hello.txt' not in record.files # Make sure that 'second.txt' is still there. assert 'second.txt' in record.files with pytest.raises(KeyError): del record.files['hello.txt'] # Now you can rename 'second.txt' to 'hello.txt'. record.files.rename('second.txt', 'hello.txt') assert 'second.txt' not in record.files assert 'hello.txt' in record.files
def datasets(skip_files): """Load demo datasets records.""" from invenio_db import db from invenio_records_files.api import Record from invenio_indexer.api import RecordIndexer from cernopendata.modules.records.minters.recid import \ cernopendata_recid_minter from cernopendata.modules.records.minters.datasetid import \ cernopendata_datasetid_minter from invenio_files_rest.models import \ Bucket, FileInstance, ObjectVersion from invenio_records_files.models import RecordsBuckets indexer = RecordIndexer() schema = current_app.extensions['invenio-jsonschemas'].path_to_url( 'records/datasets-v1.0.0.json' ) data = pkg_resources.resource_filename('cernopendata', 'modules/fixtures/data/datasets') datasets_json = glob.glob(os.path.join(data, '*.json')) # FIXME: change the treatment of `files` according to `records` fixtures. for filename in datasets_json: click.echo('Loading datasets from {0} ...'.format(filename)) with open(filename, 'rb') as source: for data in json.load(source): files = data.pop('files', []) id = uuid.uuid4() # (TOFIX) Remove if statement in production # as every dataset record should have a doi if data.get('doi', None): cernopendata_datasetid_minter(id, data) else: cernopendata_recid_minter(id, data) data['$schema'] = schema record = Record.create(data, id_=id) bucket = Bucket.create() RecordsBuckets.create( record=record.model, bucket=bucket) for file in files: if skip_files: break assert 'uri' in file assert 'size' in file assert 'checksum' in file f = FileInstance.create() filename = file.get("uri").split('/')[-1:][0] f.set_uri(file.get("uri"), file.get( "size"), file.get("checksum")) ObjectVersion.create( bucket, filename, _file_id=f.id ) db.session.commit() indexer.index(record) db.session.expunge_all()
def create_fake_record(): """Create records for demo purposes.""" fake = Faker() data_to_use = { "_access": { "metadata_restricted": False, "files_restricted": False }, "_created_by": 2, "_default_preview": "previewer one", "_internal_notes": [{ "user": "******", "note": "RDM record", "timestamp": fake.iso8601(tzinfo=None, end_datetime=None), }], "_owners": [1], "access_right": "open", "embargo_date": fake.iso8601(tzinfo=None, end_datetime=None), "contact": "*****@*****.**", "resource_type": fake_resource_type(), "identifiers": { "DOI": "10.9999/rdm.9999999", "arXiv": "9999.99999", }, "creators": [{ "name": fake.name(), "type": "Personal", "identifiers": { "Orcid": "0000-0002-1825-0097", }, "affiliations": [{ "name": fake.company(), "identifiers": { "ror": "03yrm5c26" } }] }], "titles": [{ "title": fake.company() + "'s gallery", "type": "Other", "lang": "eng" }], "publication_date": fake_edtf_level_0(), "subjects": [{ "subject": "Romans", "identifier": "subj-1", "scheme": "no-scheme" }], "contributors": [{ "name": fake.name(), "type": "Personal", "identifiers": { "Orcid": "9999-9999-9999-9998", }, "affiliations": [{ "name": fake.company(), "identifiers": { "ror": "03yrm5c26" } }], "role": "RightsHolder" }], "dates": [{ # No end date to avoid computations based on start "start": fake.iso8601(tzinfo=None, end_datetime=None), "description": "Random test date", "type": "Other" }], "language": "eng", "related_identifiers": [{ "identifier": "10.9999/rdm.9999988", "scheme": "DOI", "relation_type": "Requires", "resource_type": fake_resource_type() }], "version": "v0.0.1", "licenses": [{ "license": "Berkeley Software Distribution 3", "uri": "https://opensource.org/licenses/BSD-3-Clause", "identifier": "BSD-3", "scheme": "BSD-3", }], "descriptions": [{ "description": fake.text(max_nb_chars=3000), "type": "Abstract", "lang": "eng" }], "locations": [{ "point": { "lat": str(fake.latitude()), "lon": str(fake.longitude()) }, "place": fake.location_on_land()[2], "description": "Random place on land for random coordinates..." }], "references": [{ "reference_string": "Reference to something et al.", "identifier": "9999.99988", "scheme": "GRID" }] } # Create and index record rec_uuid = uuid.uuid4() current_pidstore.minters['recid_v2'](rec_uuid, data_to_use) record = Record.create(data_to_use, id_=rec_uuid) RecordIndexer().index(record) # Flush to index and database current_search.flush_and_refresh(index='records') db.session.commit() return record
def test_missing_location(app, db): """Test missing location.""" assert Record.create({}).files is None
def add_record(metadata, collection, schema, force, files=[]): """Add record.""" collection = Collection.query.filter( Collection.name == collection).first() if collection is None: return data, pid, recid = construct_record( collection, metadata, 1, {} if force else schema) d = current_app.config['DATADIR'] buckets = [] data['_files'] = [] for file in files: bucket = Bucket.create(default_location=Location.get_default()) buckets.append(bucket) with open(pkg_resources.resource_filename( 'cap.modules.fixtures', os.path.join('data', 'files', file) ), 'rb') as fp: obj = ObjectVersion.create(bucket, file, stream=fp) data['_files'].append({ 'bucket': str(obj.bucket_id), 'key': obj.key, 'size': obj.file.size, 'checksum': str(obj.file.checksum), 'version_id': str(obj.version_id), }) try: record = Record.create(data, id_=recid) for bucket in buckets: rb = RecordsBuckets(record_id=record.id, bucket_id=bucket.id) db.session.add(rb) # Invenio-Indexer is delegating the document inferring to # Invenio-Search which is analysing the string splitting by `/` and # using `.json` to be sure that it cans understand the mapping. record['$schema'] = 'mappings/{0}.json'.format(collection.name.lower()) indexer = RecordIndexer() indexer.index(record) # Creating permission needs for the record action_edit_record = RecordUpdateActionNeed(str(recid)) action_read_record = RecordReadActionNeed(str(recid)) action_index_record = RecordIndexActionNeed(str(recid)) # Giving index, read, write permissions to user/creator db.session.add(ActionUsers.allow(action_edit_record)) db.session.add(ActionUsers.allow(action_read_record)) db.session.add(ActionUsers.allow(action_index_record)) db.session.commit() print("DONE!!!") except ValidationError as error: print("============================") pprint(error.message) pprint(error.path) print("============================") db.session.rollback()