def test_record_get_bucket_with_no_bucket(app, db, location): """Test retrival of the bucket when no bucket is associated.""" record = Record.create({'title': 'test'}, with_bucket=False) db.session.commit() record = Record.get_record(record.id) assert record.bucket is None assert record.files is None
def create_fake_record(bulk_size, fake): """Create records for demo purposes.""" records_bulk = [] start = timeit.default_timer() for _ in range(bulk_size): # Create fake record metadata record_data = { "contributors": [{ "name": fake.name() }], "description": fake.bs(), "title": fake.company() + "'s dataset", } # Create record in DB rec_uuid = uuid.uuid4() current_pidstore.minters["recid"](rec_uuid, record_data) Record.create(record_data, id_=rec_uuid) # Add record for bulk indexing records_bulk.append(rec_uuid) # Flush to index and database db.session.commit() click.secho(f"Writing {bulk_size} records to the database", fg="green") # Bulk index records ri = RecordIndexer() ri.bulk_index(records_bulk) current_search.flush_and_refresh(index="records") click.secho(f"Sending {bulk_size} records to be indexed", fg="green") stop = timeit.default_timer() click.secho(f"Creating {bulk_size} records took {stop - start}.", fg="green")
def create_object(bucket, record_dict): """Object creation inside the bucket using the file and its content.""" rec_uuid = uuid4() provider = RecordIdProvider.create(object_type='rec', object_uuid=rec_uuid) files_meta, num_of_iiif_valid_files = generate_files_metadata( bucket, record_dict['_files']) # If there are any iiif valid image files, iiif manifest api is added on # record metadata. iiif_manifest_url = '' if num_of_iiif_valid_files > 0: iiif_manifest_url = '/record/{0}/iiif/manifest.json'.format( provider.pid.pid_value) deposit_dict = record_dict['_deposit'] deposit_dict['iiif_manifest'] = iiif_manifest_url data = { 'pid_value': provider.pid.pid_value, '_deposit': deposit_dict, '_files': files_meta, } # from invenio_records_files.api import Record as RecordFile record = RecordFile.create(data, id_=rec_uuid) # connect to record and bucket db.session.add(RecordsBuckets( record_id=record.id, bucket_id=bucket.id, )) db.session.commit()
def glossary_terms(): """Load demo terms records.""" from invenio_db import db from invenio_records import Record from invenio_indexer.api import RecordIndexer from cernopendata.modules.records.minters.termid import \ cernopendata_termid_minter indexer = RecordIndexer() schema = current_app.extensions['invenio-jsonschemas'].path_to_url( 'records/glossary-term-v1.0.0.json' ) data = pkg_resources.resource_filename('cernopendata', 'modules/fixtures/data') glossary_terms_json = glob.glob(os.path.join(data, 'terms', '*.json')) for filename in glossary_terms_json: click.echo('Loading glossary-terms from {0} ...'.format(filename)) with open(filename, 'rb') as source: for data in json.load(source): if "collections" not in data and \ not isinstance( data.get("collections", None), basestring): data["collections"] = [] data["collections"].append({"primary": "Terms"}) id = uuid.uuid4() cernopendata_termid_minter(id, data) data['$schema'] = schema record = Record.create(data, id_=id) db.session.commit() indexer.index(record) db.session.expunge_all()
def datacite_register(pid_value, record_uuid): """Mint the DOI with DataCite. :param pid_value: Value of record PID, with pid_type='recid'. :type pid_value: str """ try: record = Record.get_record(record_uuid) # Bail out if not a Zenodo DOI. if not is_local_doi(record['doi']): return dcp = DataCiteProvider.get(record['doi']) url = current_app.config['ZENODO_RECORDS_UI_LINKS_FORMAT'].format( recid=pid_value) doc = datacite_v31.serialize(dcp.pid, record) if dcp.pid.status == PIDStatus.REGISTERED: dcp.update(url, doc) else: dcp.register(url, doc) db.session.commit() except Exception as exc: datacite_register.retry(exc=exc)
def add_oai_information(obj, eng): """Adds OAI information like identifier""" recid = obj.data['control_number'] pid = PersistentIdentifier.get('recid', recid) existing_record = Record.get_record(pid.object_uuid) if '_oai' not in existing_record: try: oaiid_minter(pid.object_uuid, existing_record) except PIDAlreadyExists: oai_prefix = current_app.config.get('OAISERVER_ID_PREFIX') existing_record['_oai'] = { 'id': '%s:%s' % (oai_prefix, recid), 'sets': _get_oai_sets(existing_record) } if 'id' not in existing_record['_oai']: current_app.logger.info('adding new oai id') oaiid_minter(pid.object_uuid, existing_record) if 'sets' not in existing_record[ '_oai'] or not existing_record['_oai']['sets']: existing_record['_oai']['sets'] = _get_oai_sets(existing_record) existing_record['_oai']['updated'] = datetime.utcnow().strftime( '%Y-%m-%dT%H:%M:%SZ') existing_record.commit() obj.save() db.session.commit()
def attach_files(obj, eng): if 'files' in obj.extra_data: recid = obj.data['control_number'] pid = PersistentIdentifier.get('recid', recid) existing_record = Record.get_record(pid.object_uuid) if '_files' not in existing_record or not existing_record['_files']: bucket = Bucket.create() RecordsBuckets.create(record=existing_record.model, bucket=bucket) for file_ in obj.extra_data['files']: if file_['url'].startswith('http'): headers = file_.get('headers', {}) data = requests_retry_session().get(file_['url'], headers=headers) if data.status_code != 200: __halt_and_notify( "Error during acquiring files.\nHTTP status: %d\nUrl: %s\nHeaders:%s" % (data.status_code, file_['url'], headers), eng) f = StringIO(data.content) else: f = open(file_['url']) existing_record.files[file_['name']] = f existing_record.files[ file_['name']]['filetype'] = file_['filetype'] obj.save() existing_record.commit() db.session.commit() else: __halt_and_notify('No files found.', eng)
def check_records_migration(app): """Check that a set of records have been migrated.""" expected_records = _load_json('expected_records.json') for exp_record in expected_records: db_record = Record.get_record(exp_record['id'], with_deleted=True) assert str(db_record.created) == exp_record['created'] # If the record is deleted there is no metadata to check if db_record.model.json is None: continue # Check that the parent pid is minted properly parent_pid = b2share_parent_pid_fetcher(exp_record['id'], db_record) fetched_pid = b2share_record_uuid_fetcher(exp_record['id'], db_record) record_pid = PersistentIdentifier.get(fetched_pid.pid_type, fetched_pid.pid_value) assert PIDVersioning(record_pid).parent.pid_value == parent_pid.pid_value # Remove the parent pid as it has been added by the migration db_record['_pid'].remove({ 'type': RecordUUIDProvider.parent_pid_type, 'value': parent_pid.pid_value, }) # The OAI-PMH identifier has been modified by the migration if db_record.get('_oai'): oai_prefix = app.config.get('OAISERVER_ID_PREFIX', 'oai:') record_id = exp_record['metadata']['_deposit']['id'] assert db_record['_oai']['id'] == str(oai_prefix) + record_id exp_record['metadata']['_oai']['id'] = db_record['_oai']['id'] assert db_record == exp_record['metadata']
def add_oai_information(obj, eng): """Adds OAI information like identifier""" recid = obj.data['control_number'] pid = PersistentIdentifier.get('recid', recid) existing_record = Record.get_record(pid.object_uuid) if '_oai' not in existing_record: try: oaiid_minter(pid.object_uuid, existing_record) except PIDAlreadyExists: existing_record['_oai'] = { 'id': 'oai:beta.scoap3.org:%s' % recid, 'sets': _get_oai_sets(existing_record) } if 'id' not in existing_record['_oai']: current_app.logger.info('adding new oai id') oaiid_minter(pid.object_uuid, existing_record) if 'sets' not in existing_record['_oai'] or not existing_record['_oai']['sets']: existing_record['_oai']['sets'] = _get_oai_sets(existing_record) existing_record['_oai']['updated'] = datetime.utcnow().strftime('%Y-%m-%dT%H:%M:%SZ') existing_record.commit() obj.save() db.session.commit()
def list_db_published_records(): """A generator for all the published records""" query = RecordMetadata.query.filter(RecordMetadata.json is not None) for obj in query.all(): record = Record(obj.json, model=obj) if is_publication(record.model): yield record
def store_record(obj, eng): """Stores record in database""" if 'Italiana di Fisica'.lower( ) in obj.data['abstracts'][0]['source'].lower(): obj.data['abstracts'][0]['source'] = 'Springer/SIF' if 'Italiana di Fisica'.lower( ) in obj.data['acquisition_source']['source'].lower(): obj.data['acquisition_source']['source'] = 'Springer/SIF' obj.data['record_creation_year'] = parse_date( obj.data['record_creation_date']).year try: record = Record.create(obj.data, id_=None) # Create persistent identifier. pid = scoap3_recid_minter(str(record.id), record) obj.save() record.commit() # Commit to DB before indexing db.session.commit() obj.data['control_number'] = record['control_number'] obj.save() # Index record indexer = RecordIndexer() indexer.index_by_id(pid.object_uuid) except ValidationError as err: __halt_and_notify("Validation error: %s. Skipping..." % (err, ), obj, eng) except PIDAlreadyExists: __halt_and_notify("Record with this id already in DB", obj, eng)
def transfer_cp(uuid, destination): """Transfer the files contained in the record to the destination. This method is automatically called by the module to transfer the files. Depending on your installation, you may want to have a different behavior (copy among servers...). Then, you can create your own factory and link it into the config variable :py:data:`invenio_archivematica.config.ARCHIVEMATICA_TRANSFER_FACTORY`. :param uuid: the id of the record containing files to transfer :param destination: the destination folder - this will be what is inside the config variable :py:data:`invenio_archivematica.config.ARCHIVEMATICA_TRANSFER_FOLDER`. It needs to be a absolute path to a folder """ record = Record.get_record(uuid) pid = PersistentIdentifier.get_by_object("recid", "rec", uuid) dir_name = join(destination, create_accessioned_id(pid.pid_value, 'recid')) try: mkdir(dir_name) except: pass for fileobj in record.files: copyfile(fileobj.file.storage().fileurl, join(dir_name, fileobj.key))
def update_record(pid, schema, data): """Updates the given record.""" record = Record.get_record(pid.object_uuid) record['$schema'] = schema record.update(data) record.commit() return record
def update_expired_embargoes(): """Release expired embargoes every midnight.""" logger = current_app.logger base_url = urlunsplit(( current_app.config.get('PREFERRED_URL_SCHEME', 'http'), current_app.config['JSONSCHEMAS_HOST'], current_app.config.get('APPLICATION_ROOT') or '', '', '' )) # The task needs to run in a request context as JSON Schema validation # will use url_for. with current_app.test_request_context('/', base_url=base_url): s = B2ShareRecordsSearch( using=current_search_client, index='records' ).query( 'query_string', query='open_access:false AND embargo_date:{{* TO {0}}}'.format( datetime.now(timezone.utc).isoformat() ), allow_leading_wildcard=False ).fields([]) record_ids = [hit.meta.id for hit in s.scan()] if record_ids: logger.info('Changing access of {} embargoed publications' ' to public.'.format(len(record_ids))) for record in Record.get_records(record_ids): logger.debug('Making embargoed publication {} public'.format( record.id)) record['open_access'] = True record.commit() db.session.commit() indexer = RecordIndexer() indexer.bulk_index(record_ids) indexer.process_bulk_queue()
def datacite_register(self, pid_value, record_uuid, max_retries=5, countdown=5): """Mint the DOI with DataCite. :param pid_value: Value of record PID, with pid_type='recid'. :type pid_value: str """ try: record = Record.get_record(record_uuid) # Bail out if not a CDS DOI. if not is_local_doi(record['doi']) or \ not current_app.config['DEPOSIT_DATACITE_MINTING_ENABLED']: return dcp = DataCiteProvider.get(record['doi']) url = current_app.config['CDS_RECORDS_UI_LINKS_FORMAT'].format( recid=pid_value) doc = datacite_v31.serialize(dcp.pid, record) if dcp.pid.status == PIDStatus.REGISTERED: dcp.update(url, doc) else: dcp.register(url, doc) db.session.commit() except Exception as exc: db.session.rollback() raise self.retry(max_retries=max_retries, countdown=countdown, exc=exc)
def test_record_create_no_bucket(app, db, location): """Test record creation without bucket creation.""" record = Record.create({}, with_bucket=False) db.session.commit() assert record.files is None assert '_bucket' not in record assert '_files' not in record
def glossary_terms(): """Load demo terms records.""" from invenio_db import db from invenio_records import Record from invenio_indexer.api import RecordIndexer from cernopendata.modules.records.minters.termid import \ cernopendata_termid_minter indexer = RecordIndexer() schema = current_app.extensions['invenio-jsonschemas'].path_to_url( 'records/glossary-term-v1.0.0.json') data = pkg_resources.resource_filename('cernopendata', 'modules/fixtures/data') glossary_terms_json = glob.glob(os.path.join(data, 'terms', '*.json')) for filename in glossary_terms_json: click.echo('Loading glossary-terms from {0} ...'.format(filename)) with open(filename, 'rb') as source: for data in json.load(source): if "collections" not in data and \ not isinstance( data.get("collections", None), basestring): data["collections"] = [] data["collections"].append({"primary": "Terms"}) id = uuid.uuid4() cernopendata_termid_minter(id, data) data['$schema'] = schema record = Record.create(data, id_=id) db.session.commit() indexer.index(record) db.session.expunge_all()
def test_record_create_files(app, db, location): """Test record creation with bucket and files.""" record = Record.create({'title': 'test'}) record.files['hello.txt'] = BytesIO(b'Hello world!') db.session.commit() assert record['_bucket'] == record.bucket_id assert record['_files']
def test_transfer_cp(db): """Test factories.transfer_cp function.""" # first we create a record recid = uuid.uuid4() PersistentIdentifier.create( 'recid', '1337', object_type='rec', object_uuid=recid, status=PIDStatus.REGISTERED) record = Record.create({'title': 'record test'}, recid) # we setup a file storage tmppath = tempfile.mkdtemp() db.session.add(Location(name='default', uri=tmppath, default=True)) db.session.commit() # we add a file to the record bucket = Bucket.create() content = b'Aaah! A headcrab!!!\n' record_buckets = RecordsBuckets.create(record=record.model, bucket=bucket) record.files['crab.txt'] = BytesIO(content) # test! rec_dir = join(tmppath, create_accessioned_id('1337', 'recid')) factories.transfer_cp(record.id, tmppath) assert isdir(rec_dir) assert isfile(join(rec_dir, 'crab.txt')) with open(join(rec_dir, 'crab.txt'), "r") as f: assert f.read() == content # finalization rmtree(tmppath)
def update_expired_embargos(): """Release expired embargoes every midnight.""" logger = current_app.logger base_url = urlunsplit( (current_app.config.get('PREFERRED_URL_SCHEME', 'http'), current_app.config['JSONSCHEMAS_HOST'], current_app.config.get('APPLICATION_ROOT') or '', '', '')) # The task needs to run in a request context as JSON Schema validation # will use url_for. with current_app.test_request_context('/', base_url=base_url): s = B2ShareRecordsSearch( using=current_search_client, index='records').query( 'query_string', query='open_access:false AND embargo_date:{{* TO {0}}}'.format( datetime.now(timezone.utc).isoformat()), allow_leading_wildcard=False).fields([]) record_ids = [hit.meta.id for hit in s.scan()] if record_ids: logger.info('Changing access of {} embargoed publications' ' to public.'.format(len(record_ids))) for record in Record.get_records(record_ids): logger.debug('Making embargoed publication {} public'.format( record.id)) record['open_access'] = True record.commit() db.session.commit() indexer = RecordIndexer() indexer.bulk_index(record_ids) indexer.process_bulk_queue()
def attach_files(obj, eng): if 'files' in obj.extra_data: recid = obj.data['control_number'] pid = PersistentIdentifier.get('recid', recid) existing_record = Record.get_record(pid.object_uuid) if '_files' not in existing_record or not existing_record['_files']: bucket = Bucket.create() RecordsBuckets.create(record=existing_record.model, bucket=bucket) for file_ in obj.extra_data['files']: if file_['url'].startswith('http'): request = urllib2.Request(file_['url'], headers=file_.get('headers', {})) f = urllib2.urlopen(request) else: f = open(file_['url']) existing_record.files[file_['name']] = f existing_record.files[ file_['name']]['filetype'] = file_['filetype'] obj.save() existing_record.commit() db.session.commit() else: __halt_and_notify('No files found.', obj, eng)
def check_records_migration(app): """Check that a set of records have been migrated.""" expected_records = _load_json('expected_records.json') for exp_record in expected_records: db_record = Record.get_record(exp_record['id'], with_deleted=True) assert str(db_record.created) == exp_record['created'] # If the record is deleted there is no metadata to check if db_record.model.json is None: continue # Check that the parent pid is minted properly parent_pid = b2share_parent_pid_fetcher(exp_record['id'], db_record) fetched_pid = b2share_record_uuid_fetcher(exp_record['id'], db_record) record_pid = PersistentIdentifier.get(fetched_pid.pid_type, fetched_pid.pid_value) assert PIDNodeVersioning( record_pid).parent.pid_value == parent_pid.pid_value # Remove the parent pid as it has been added by the migration db_record['_pid'].remove({ 'type': RecordUUIDProvider.parent_pid_type, 'value': parent_pid.pid_value, }) # The OAI-PMH identifier has been modified by the migration if db_record.get('_oai'): oai_prefix = app.config.get('OAISERVER_ID_PREFIX', 'oai:') record_id = exp_record['metadata']['_deposit']['id'] assert db_record['_oai']['id'] == str(oai_prefix) + record_id exp_record['metadata']['_oai']['id'] = db_record['_oai']['id'] assert db_record == exp_record['metadata']
def record(app, db): """Create a record.""" record = {'title': 'fuu'} record = Record.create(record) record.commit() db.session.commit() return record
def attach_files(obj, eng): if 'files' in obj.extra_data: recid = obj.data['control_number'] pid = PersistentIdentifier.get('recid', recid) existing_record = Record.get_record(pid.object_uuid) if '_files' not in existing_record or not existing_record['_files']: bucket = Bucket.create() RecordsBuckets.create(record=existing_record.model, bucket=bucket) for file_ in obj.extra_data['files']: if file_['url'].startswith('http'): data = requests_retry_session().get(file_['url'], headers=file_.get('headers', {})) f = StringIO(data.content) else: f = open(file_['url']) existing_record.files[file_['name']] = f existing_record.files[file_['name']]['filetype'] = file_['filetype'] obj.save() existing_record.commit() db.session.commit() else: __halt_and_notify('No files found.', eng)
def find_version_master_and_previous_record(version_of): """Retrieve the PIDNodeVersioning and previous record of a record PID. :params version_of: record PID. """ from b2share.modules.records.providers import RecordUUIDProvider from b2share.modules.records.utils import is_publication try: child_pid = RecordUUIDProvider.get(version_of).pid if child_pid.status == PIDStatus.DELETED: raise RecordNotFoundVersioningError() except PIDDoesNotExistError as e: raise RecordNotFoundVersioningError() from e parent_pid = PIDNodeVersioning(pid=child_pid).parents.first() version_master = PIDNodeVersioning(pid=parent_pid) prev_pid = version_master.last_child assert prev_pid.pid_type == RecordUUIDProvider.pid_type prev_version = Record.get_record(prev_pid.object_uuid) # check that version_of references the last version of a record assert is_publication(prev_version.model) if prev_pid.pid_value != version_of: raise IncorrectRecordVersioningError(prev_pid.pid_value) return version_master, prev_version
def datasets(skip_files): """Load demo datasets records.""" from invenio_db import db from invenio_records_files.api import Record from invenio_indexer.api import RecordIndexer from cernopendata.modules.records.minters.recid import \ cernopendata_recid_minter from cernopendata.modules.records.minters.datasetid import \ cernopendata_datasetid_minter from invenio_files_rest.models import \ Bucket, FileInstance, ObjectVersion from invenio_records_files.models import RecordsBuckets indexer = RecordIndexer() schema = current_app.extensions['invenio-jsonschemas'].path_to_url( 'records/datasets-v1.0.0.json') data = pkg_resources.resource_filename('cernopendata', 'modules/fixtures/data/datasets') datasets_json = glob.glob(os.path.join(data, '*.json')) # FIXME: change the treatment of `files` according to `records` fixtures. for filename in datasets_json: click.echo('Loading datasets from {0} ...'.format(filename)) with open(filename, 'rb') as source: for data in json.load(source): files = data.pop('files', []) id = uuid.uuid4() # (TOFIX) Remove if statement in production # as every dataset record should have a doi if data.get('doi', None): cernopendata_datasetid_minter(id, data) else: cernopendata_recid_minter(id, data) data['$schema'] = schema record = Record.create(data, id_=id) bucket = Bucket.create() RecordsBuckets.create(record=record.model, bucket=bucket) for file in files: if skip_files: break assert 'uri' in file assert 'size' in file assert 'checksum' in file f = FileInstance.create() filename = file.get("uri").split('/')[-1:][0] f.set_uri(file.get("uri"), file.get("size"), file.get("checksum")) ObjectVersion.create(bucket, filename, _file_id=f.id) db.session.commit() indexer.index(record) db.session.expunge_all()
def create_doc(data, schema): """Creates a new doc record.""" from invenio_records import Record id = uuid.uuid4() cernopendata_docid_minter(id, data) data['$schema'] = schema record = Record.create(data, id_=id) return record
def indexer_receiver(sender, json, doc_type, index, record): """To do.""" from invenio_records_files.api import Record as FileRecord record = FileRecord.get_record(record.id) for file in record.files: if file.get('filetype') == 'raw_text': with file.file.storage().open() as f: json['fulltext'] = f.read().decode('utf-8').replace('\n', ' ')
def minted_record(app, db): """Create a test record.""" data = {'title': 'fuu'} with db.session.begin_nested(): rec_uuid = uuid.uuid4() pid = current_pidstore.minters['recid'](rec_uuid, data) record = Record.create(data, id_=rec_uuid) return pid, record
def data_policies(skip_files): """Load demo Data Policy records.""" from invenio_db import db from invenio_indexer.api import RecordIndexer from cernopendata.modules.records.minters.recid import \ cernopendata_recid_minter from invenio_files_rest.models import \ Bucket, FileInstance, ObjectVersion from invenio_records_files.models import RecordsBuckets from invenio_records_files.api import Record from invenio_records.models import RecordMetadata indexer = RecordIndexer() schema = current_app.extensions['invenio-jsonschemas'].path_to_url( 'records/data-policies-v1.0.0.json' ) data = pkg_resources.resource_filename('cernopendata', 'modules/fixtures/data') data_policies_json = glob.glob(os.path.join(data, '*.json')) for filename in data_policies_json: click.echo('Loading data-policies from {0} ...'.format(filename)) with open(filename, 'rb') as source: for data in json.load(source): files = data.pop('files', []) id = uuid.uuid4() cernopendata_recid_minter(id, data) data['$schema'] = schema record = Record.create(data, id_=id) bucket = Bucket.create() RecordsBuckets.create( record=record.model, bucket=bucket) for file in files: if skip_files: break assert 'uri' in file assert 'size' in file assert 'checksum' in file f = FileInstance.create() filename = file.get("uri").split('/')[-1:][0] f.set_uri(file.get("uri"), file.get( "size"), file.get("checksum")) ObjectVersion.create( bucket, filename, _file_id=f.id ) db.session.commit() indexer.index(record) db.session.expunge_all()
def test_file_download_ui(app, objects): """Test get buckets.""" app.config.update(dict( FILES_REST_PERMISSION_FACTORY=lambda *a, **kw: type( 'Allow', (object, ), {'can': lambda self: True} )(), RECORDS_UI_DEFAULT_PERMISSION_FACTORY=None, # No permission checking RECORDS_UI_ENDPOINTS=dict( recid=dict( pid_type='recid', route='/records/<pid_value>', ), recid_files=dict( pid_type='recid', route='/records/<pid_value>/files/<filename>', view_imp='invenio_records_files.utils:file_download_ui', record_class='invenio_records_files.api:Record', ), ) )) InvenioRecordsUI(app) obj1 = objects[0] with app.test_request_context(): # Record 1 - Live record rec_uuid = uuid.uuid4() PersistentIdentifier.create( 'recid', '1', object_type='rec', object_uuid=rec_uuid, status=PIDStatus.REGISTERED) record = Record.create({ 'title': 'Registered', 'recid': 1, '_files': [ {'key': obj1.key, 'bucket': str(obj1.bucket_id), 'checksum': 'invalid'}, ] }, id_=rec_uuid) RecordsBuckets.create(record=record.model, bucket=obj1.bucket) db.session.commit() main_url = url_for('invenio_records_ui.recid', pid_value='1') file_url = url_for( 'invenio_records_ui.recid_files', pid_value='1', filename=obj1.key) no_file_url = url_for( 'invenio_records_ui.recid_files', pid_value='1', filename='') invalid_file_url = url_for( 'invenio_records_ui.recid_files', pid_value='1', filename='no') with app.test_client() as client: res = client.get(main_url) assert res.status_code == 200 res = client.get(file_url) assert res.status_code == 200 res = client.get(no_file_url) assert res.status_code == 404 res = client.get(invalid_file_url) assert res.status_code == 404
def record(app, db): """Create a record.""" record = { 'title': 'fuu' } record = Record.create(record) record.commit() db.session.commit() return record
def record_with_bucket(full_record, bucket, db): """Create a bucket.""" record = RecordFile.create(full_record) RecordsBuckets.create(bucket=bucket, record=record.model) pid = PersistentIdentifier.create( pid_type='recid', pid_value=12345, object_type='rec', object_uuid=record.id, status='R') db.session.commit() return pid, record
def create_record(schema, data, files, skip_files): """Creates a new record.""" id = uuid.uuid4() cernopendata_recid_minter(id, data) data['$schema'] = schema record = Record.create(data, id_=id, with_bucket=not skip_files) if not skip_files: handle_record_files(data, record.bucket, files, skip_files) return record
def publish(self, pid=None, id_=None): """Publish a deposit.""" pid = pid or self.pid if not pid.is_registered(): raise PIDInvalidAction() self['_deposit']['status'] = 'published' if self['_deposit'].get('pid') is None: # First publishing minter = current_pidstore.minters[ current_app.config['DEPOSIT_PID_MINTER'] ] id_ = id_ or uuid.uuid4() record_pid = minter(id_, self) self['_deposit']['pid'] = { 'type': record_pid.pid_type, 'value': record_pid.pid_value, 'revision_id': 0, } data = dict(self.dumps()) data['$schema'] = self.record_schema # During first publishing create snapshot the bucket. @contextmanager def process_files(data): """Process deposit files.""" if self.files and self.files.bucket: assert not self.files.bucket.locked self.files.bucket.locked = True snapshot = self.files.bucket.snapshot(lock=True) data['_files'] = self.files.dumps(bucket=snapshot.id) yield data db.session.add(RecordsBuckets( record_id=id_, bucket_id=snapshot.id )) else: yield data with process_files(data) as data: record = Record.create(data, id_=id_) else: # Update after edit record_pid, record = self.fetch_published() # TODO add support for patching assert record.revision_id == self['_deposit']['pid']['revision_id'] data = dict(self.dumps()) data['$schema'] = self.record_schema record = record.__class__(data, model=record.model) record.commit() self.commit() return self
def record_with_bucket(full_record, bucket, db): """Create a bucket.""" record = RecordFile.create(full_record) RecordsBuckets.create(bucket=bucket, record=record.model) pid = PersistentIdentifier.create(pid_type='recid', pid_value=1, object_type='rec', object_uuid=record.id, status='R') db.session.commit() return pid, record
def test_RecordSIP_create(db, mocker): """Test create method from the API class RecordSIP.""" # we setup a file storage tmppath = tempfile.mkdtemp() db.session.add(Location(name='default', uri=tmppath, default=True)) # setup metadata mtype = SIPMetadataType(title='JSON Test', name='json-test', format='json', schema='url://to/schema') db.session.add(mtype) db.session.commit() # first we create a record recid = uuid.uuid4() pid = PersistentIdentifier.create( 'recid', '1337', object_type='rec', object_uuid=recid, status=PIDStatus.REGISTERED) mocker.patch('invenio_records.api.RecordBase.validate', return_value=True, autospec=True) record = Record.create( {'title': 'record test', '$schema': 'url://to/schema'}, recid) # we add a file to the record bucket = Bucket.create() content = b'Test file\n' RecordsBuckets.create(record=record.model, bucket=bucket) record.files['test.txt'] = BytesIO(content) db.session.commit() # Let's create a SIP user = create_test_user('*****@*****.**') agent = {'email': '*****@*****.**', 'ip_address': '1.1.1.1'} rsip = RecordSIP.create(pid, record, True, user_id=user.id, agent=agent) db.session.commit() # test! assert RecordSIP_.query.count() == 1 assert SIP_.query.count() == 1 assert SIPFile.query.count() == 1 assert SIPMetadata.query.count() == 1 assert len(rsip.sip.files) == 1 assert len(rsip.sip.metadata) == 1 metadata = rsip.sip.metadata[0] assert metadata.type.format == 'json' assert '"title": "record test"' in metadata.content assert rsip.sip.archivable is True # we try with no files rsip = RecordSIP.create(pid, record, True, create_sip_files=False, user_id=user.id, agent=agent) assert SIPFile.query.count() == 1 assert SIPMetadata.query.count() == 2 assert len(rsip.sip.files) == 0 assert len(rsip.sip.metadata) == 1 # finalization rmtree(tmppath)
def record(db): """Record fixture.""" rec_uuid = uuid.uuid4() provider = RecordIdProvider.create( object_type='rec', object_uuid=rec_uuid) record = Record.create({ 'control_number': provider.pid.pid_value, 'title': 'TestDefault', }, id_=rec_uuid) db.session.commit() return record
def check_dois(record, allrecords, update): """ Checks that DOIs of records in the current instance are registered. """ if record: record = Record.get_record(record) check_record_doi(record, update) elif allrecords: click.secho('checking DOI for all records') for record in list_db_published_records(): check_record_doi(record, update) else: raise click.ClickException('Either -r or -a option must be selected')
def create_record(schema, data, files, skip_files): """Creates a new record.""" id = uuid.uuid4() cernopendata_recid_minter(id, data) data['$schema'] = schema record = Record.create(data, id_=id) if not skip_files: bucket = Bucket.create() handle_record_files(data, bucket, files, skip_files) RecordsBuckets.create(record=record.model, bucket=bucket) return record
def create_record(schema, data, files, skip_files): """Creates a new record.""" id = uuid.uuid4() cernopendata_recid_minter(id, data) data['$schema'] = schema record = Record.create(data, id_=id) if not skip_files: bucket = Bucket.create() handle_record_files(data, bucket, files, skip_files) RecordsBuckets.create( record=record.model, bucket=bucket) return record
def check_embargo(record_id, is_embargoed): with app.app_context(): with app.test_client() as client: login_user(non_creator, client) # test open_access field in record's metadata record = Record.get_record(record_id) assert record['open_access'] != is_embargoed # test record's file access subtest_file_bucket_permissions( client, record.files.bucket, access_level=None if is_embargoed else 'read', is_authenticated=True )
def test_filesmixin(app, db, location, record): """Test bucket creation and assignment.""" class CustomFilesMixin(FilesMixin): def _create_bucket(self): return Bucket.create() class CustomRecord(Record, CustomFilesMixin): pass record = CustomRecord.create({}) assert record.files is not None record = Record.create({}) assert record.files is None
def check_pids_migration(): """Check that the persistent identifiers have been migrated.""" expected_pids = _load_json('expected_pids.json') # Check unchanging properties for exp_pid in expected_pids: db_pid = PersistentIdentifier.get(exp_pid['pid_type'], exp_pid['pid_value']) for key, value in exp_pid.items(): if key != 'updated': assert str(getattr(db_pid, key)) == str(value) # check that deleted PID's records are (soft or hard) deleted if exp_pid['status'] == PIDStatus.DELETED.value: metadata = None try: record = Record.get_record(exp_pid['pid_value'], with_deleted=True) # Soft deleted record metadata = record.model.json except NoResultFound: # Hard deleted record pass assert metadata is None # Check versioning relations and PIDs if exp_pid['pid_type'] == 'b2dep': try: rec_pid = PersistentIdentifier.get('b2rec', exp_pid['pid_value']) # if the deposit is deleted, either the record PID was reserved # and has been deleted, or it still exists. if db_pid.status == PIDStatus.DELETED: assert rec_pid.status != PIDStatus.RESERVED except PIDDoesNotExistError: # The record PID was only reserved and has been deleted # with the deposit PID. assert db_pid.status == PIDStatus.DELETED continue # Check that a parent pid has been created versioning = PIDVersioning(child=rec_pid) parent = versioning.parent assert rec_pid.status in [PIDStatus.RESERVED, PIDStatus.REGISTERED] if rec_pid.status == PIDStatus.RESERVED: assert parent.status == PIDStatus.RESERVED else: assert parent.status == PIDStatus.REDIRECTED assert parent.get_redirect() == rec_pid
def datacite_register(pid_value, record_uuid): """Mint the DOI with DataCite. :param pid_value: Value of record PID, with pid_type='recid'. :type pid_value: str """ record = Record.get_record(record_uuid) dcp = DataCiteProvider.get(record['doi']) url = current_app.config['ZENODO_RECORDS_UI_LINKS_FORMAT'].format( recid=pid_value) doc = datacite_v31.serialize(dcp.pid, record) dcp.update(url, doc) if dcp.pid.status == PIDStatus.REGISTERED \ else dcp.register(url, doc) db.session.commit()
def datacite_register(pid_value, record_uuid): """Mint DOI and Concept DOI with DataCite. :param pid_value: Value of record PID, with pid_type='recid'. :type pid_value: str :param record_uuid: Record Metadata UUID. :type record_uuid: str """ try: record = Record.get_record(record_uuid) # Bail out if not a Zenodo DOI. if not is_local_doi(record['doi']): return dcp = DataCiteProvider.get(record['doi']) doc = datacite_v41.serialize(dcp.pid, record) url = current_app.config['ZENODO_RECORDS_UI_LINKS_FORMAT'].format( recid=pid_value) if dcp.pid.status == PIDStatus.REGISTERED: dcp.update(url, doc) else: dcp.register(url, doc) # If this is the latest record version, update/register the Concept DOI # using the metadata of the record. recid = PersistentIdentifier.get('recid', str(record['recid'])) pv = PIDVersioning(child=recid) conceptdoi = record.get('conceptdoi') if conceptdoi and pv.exists and pv.is_last_child: conceptrecid = record.get('conceptrecid') concept_dcp = DataCiteProvider.get(conceptdoi) url = current_app.config['ZENODO_RECORDS_UI_LINKS_FORMAT'].format( recid=conceptrecid) doc = datacite_v41.serialize(concept_dcp.pid, record) if concept_dcp.pid.status == PIDStatus.REGISTERED: concept_dcp.update(url, doc) else: concept_dcp.register(url, doc) db.session.commit() except Exception as exc: datacite_register.retry(exc=exc)
def test_files_protection(app, db, location): """Test record files property protection.""" record = Record.create({}) bucket = record.files.bucket assert bucket # Create first file: record.files['hello.txt'] = BytesIO(b'Hello world!') file_0 = record.files['hello.txt'] assert 'hello.txt' == file_0['key'] assert 1 == len(record.files) # Lock bucket. bucket.locked = True assert record.files.bucket.locked with pytest.raises(InvalidOperationError): del record.files['hello.txt']
def find_version_master_and_previous_record(version_of): """Retrieve the PIDVersioning and previous record of a record PID. :params version_of: record PID. """ try: child_pid = RecordUUIDProvider.get(version_of).pid if child_pid.status == PIDStatus.DELETED: raise RecordNotFoundVersioningError() except PIDDoesNotExistError as e: raise RecordNotFoundVersioningError() from e version_master = PIDVersioning(child=child_pid) prev_pid = version_master.last_child assert prev_pid.pid_type == RecordUUIDProvider.pid_type prev_version = Record.get_record(prev_pid.object_uuid) # check that version_of references the last version of a record assert is_publication(prev_version.model) if prev_pid.pid_value != version_of: raise IncorrectRecordVersioningError(prev_pid.pid_value) return version_master, prev_version
def test_RecordSIP(db): """Test RecordSIP API class.""" user = create_test_user('*****@*****.**') agent = {'email': '*****@*****.**', 'ip_address': '1.1.1.1'} # we create a record recid = uuid.uuid4() pid = PersistentIdentifier.create( 'recid', '1337', object_type='rec', object_uuid=recid, status=PIDStatus.REGISTERED) title = {'title': 'record test'} record = Record.create(title, recid) # we create the models sip = SIP.create(True, user_id=user.id, agent=agent) recordsip = RecordSIP_(sip_id=sip.id, pid_id=pid.id) db.session.commit() # We create an API SIP on top of it api_recordsip = RecordSIP(recordsip, sip) assert api_recordsip.model is recordsip assert api_recordsip.sip.id == sip.id
def update_record(pid, schema, data, files, skip_files): """Updates the given record.""" record = Record.get_record(pid.object_uuid) with db.session.begin_nested(): if record.files and not skip_files: bucket_id = record.files.bucket bucket = Bucket.get(bucket_id.id) for o in ObjectVersion.get_by_bucket(bucket).all(): o.remove() o.file.delete() RecordsBuckets.query.filter_by( record=record.model, bucket=bucket ).delete() bucket_id.remove() db.session.commit() record.update(data) if not skip_files: bucket = Bucket.create() handle_record_files(data, bucket, files, skip_files) RecordsBuckets.create( record=record.model, bucket=bucket) return record
def check_handles(update, record_pid): """Allocate handles for a record and its files, if necessary.""" rec_pid = RecordUUIDProvider.get(pid_value=record_pid).pid record = Record.get_record(rec_pid.object_uuid) record_updated = False pid_list = [p.get('value') for p in record['_pid'] if p.get('type') == 'ePIC_PID'] if pid_list: click.secho('record {} already has a handle'.format(record_pid), fg='green') else: click.secho('record {} has no handle'.format(record_pid), fg='red') if update: b2share_pid_minter(rec_pid, record) record_updated = True click.secho(' handle added to record', fg='green') else: click.secho('use -u argument to add a handle to the record') files_ok = True for f in record.get('_files', []): if f.get('ePIC_PID'): click.secho('file {} already has a handle'.format(f.get('key')), fg='green') else: click.secho('file {} has no handle'.format(f.get('key')), fg='red') files_ok = False if update and not files_ok: create_file_pids(record) record_updated = True click.secho(' files updated with handles', fg='green') elif not update and not files_ok: click.secho('use -u argument to add handles to the files') if record_updated: record.commit() db.session.commit()
def test_cascade_action_record_delete(app, db, location, record_with_bucket, generic_file, force, num_of_recordbuckets): """Test cascade action on record delete, with force false.""" record = record_with_bucket record_id = record.id bucket_id = record.files.bucket.id # check before assert len(RecordsBuckets.query.all()) == 1 assert len(Bucket.query.all()) == 1 assert len(Bucket.query.filter_by(id=bucket_id).all()) == 1 assert ObjectVersion.get(bucket=bucket_id, key=generic_file) record.delete(force=force) # check after db.session.expunge(record.model) with pytest.raises(NoResultFound): record = Record.get_record(record_id) assert len(RecordsBuckets.query.all()) == num_of_recordbuckets assert len(Bucket.query.all()) == 1 assert len(Bucket.query.filter_by(id=bucket_id).all()) == 1 assert ObjectVersion.get(bucket=bucket_id, key=generic_file)
def store_record(obj, eng): """Stores record in database""" set_springer_source_if_needed(obj) obj.data['record_creation_year'] = parse_date(obj.data['record_creation_date']).year try: record = Record.create(obj.data, id_=None) # Create persistent identifier. scoap3_recid_minter(str(record.id), record) obj.save() record.commit() # Commit to DB before indexing db.session.commit() obj.data['control_number'] = record['control_number'] obj.save() except ValidationError as err: __halt_and_notify("Validation error: %s." % err, eng) except PIDAlreadyExists: __halt_and_notify("Record with this id already in DB", eng)
def update_record(obj, eng): """Updates existing record""" doi = get_first_doi(obj) query = {'query': {'bool': {'must': [{'match': {'dois.value': doi}}], }}} search_result = es.search(index='records-record', doc_type='record-v1.0.0', body=query) recid = search_result['hits']['hits'][0]['_source']['control_number'] obj.extra_data['recid'] = recid obj.data['control_number'] = recid pid = PersistentIdentifier.get('recid', recid) existing_record = Record.get_record(pid.object_uuid) if '_files' in existing_record: obj.data['_files'] = existing_record['_files'] if '_oai' in existing_record: obj.data['_oai'] = existing_record['_oai'] # preserving original creation date creation_date = existing_record['record_creation_date'] obj.data['record_creation_date'] = creation_date obj.data['record_creation_year'] = parse_date(creation_date).year existing_record.clear() existing_record.update(obj.data) try: existing_record.commit() obj.save() db.session.commit() except ValidationError as err: __halt_and_notify("Validation error: %s." % err, eng) except SchemaError as err: __halt_and_notify('SchemaError during record validation! %s' % err, eng)
def test_deposit_versions_create(app, test_records, test_users): """Creating new versions of existing records.""" with app.app_context(): # Retrieve a record which will be the first version v1 = test_records[0].data v1_rec = B2ShareRecord.get_record(test_records[0].record_id) v1_pid, v1_id = pid_of(v1) assert list_published_pids(v1_pid) == [v1_pid] # create draft in version chain: # version chain becomes: [v1] -- [v2 draft] # v2 = create_deposit({}, version_of=v1_id) data = copy_data_from_previous(v1_rec.model.json) v2 = create_deposit(data, test_users['deposits_creator'], version_of=v1_id) assert filenames(v2) == [] ObjectVersion.create(v2.files.bucket, 'myfile1', stream=BytesIO(b'mycontent')) assert filenames(v2) == ['myfile1'] assert list_published_pids(v1_pid) == [v1_pid] # cannot create another draft if one exists # not possible: [v1] -- [v2 draft] # `- [new draft] with pytest.raises(DraftExistsVersioningError): data = copy_data_from_previous(v1_rec.model.json) create_deposit(data, test_users['deposits_creator'], version_of=v1_id) # cannot create a version from a draft pid # not possible: [v1] -- [v2 draft] -- [new draft] with pytest.raises(IncorrectRecordVersioningError): # record pid not created yet data = copy_data_from_previous(v1_rec.model.json) create_deposit(data, test_users['deposits_creator'], version_of=v2['_deposit']['id']) # publish previous draft # version chain becomes: [v1] -- [v2] v2.submit() v2.publish() v2_pid, v2_id = pid_of(v2) assert list_published_pids(v1_pid) == [v1_pid, v2_pid] # cannot create draft based on the first version in a chain # not possible: [v1] -- [v2] # `- [new draft] with pytest.raises(IncorrectRecordVersioningError): data = copy_data_from_previous(v1_rec.model.json) create_deposit(data, test_users['deposits_creator'], version_of=v1_id) # create and publish other versions: # version chain becomes: [v1] -- [v2] -- [v3] data = copy_data_from_previous(v1_rec.model.json) v3 = create_deposit(data, test_users['deposits_creator'], version_of=v2_id) # assert files are imported from v2 assert filenames(v3) == ['myfile1'] ObjectVersion.create(v3.files.bucket, 'myfile2', stream=BytesIO(b'mycontent')) assert filenames(v3) == ['myfile1', 'myfile2'] assert list_published_pids(v1_pid) == [v1_pid, v2_pid] v3.submit() v3.publish() v3_pid, v3_id = pid_of(v3) v3_rec = Record.get_record(v3_id) assert filenames(v3_rec) == ['myfile1', 'myfile2'] assert list_published_pids(v1_pid) == [v1_pid, v2_pid, v3_pid] # cannot create draft based on an intermediate version in a chain # not possible: [v1] -- [v2] -- [v3] # `- [new draft] with pytest.raises(IncorrectRecordVersioningError): create_deposit({}, test_users['deposits_creator'], version_of=v2_id) # Create yet another version # Version chain becomes: [v1] -- [v2] -- [v3] -- [v4] data = copy_data_from_previous(v1_rec.model.json) v4 = create_deposit(data, test_users['deposits_creator'], version_of=v3_id) v4.submit() v4.publish() assert filenames(v4) == ['myfile1', 'myfile2'] v4_pid, v4_id = pid_of(v4) assert list_published_pids(v1_pid) == [ v1_pid, v2_pid, v3_pid, v4_pid] # assert that creating a new version from a deleted pid is not allowed resolver = Resolver(pid_type=v4_pid.pid_type, object_type='rec', getter=partial(B2ShareRecord.get_record, with_deleted=True)) v4_pid, v4_rec = LazyPIDValue(resolver, v4_pid.pid_value).data # delete [v4] v4_rec.delete() with pytest.raises(RecordNotFoundVersioningError): v5 = create_deposit(data, test_users['deposits_creator'], version_of=v4_id)
def alembic_upgrade_database_data(alembic, verbose): """Migrate the database data from v2.0.0 to 2.1.0.""" ### Add versioning PIDs ### # Reserve the record PID and versioning PID for unpublished deposits # Hack: disable record indexing during record migration from invenio_indexer.api import RecordIndexer old_index_fn = RecordIndexer.index RecordIndexer.index = lambda s, record: None if verbose: click.secho('migrating deposits and records...') with db.session.begin_nested(): # Migrate published records records_pids = PersistentIdentifier.query.filter( PersistentIdentifier.pid_type == RecordUUIDProvider.pid_type, PersistentIdentifier.status == PIDStatus.REGISTERED, ).all() for rec_pid in records_pids: if verbose: click.secho(' record {}'.format(rec_pid.pid_value)) try: record = Record.get_record(rec_pid.object_uuid) except NoResultFound: # The record is deleted but not the PID. Fix it. rec_pid.status = PIDStatus.DELETED continue # Create parent version PID parent_pid = RecordUUIDProvider.create().pid version_master = PIDVersioning(parent=parent_pid) version_master.insert_draft_child(child=rec_pid) version_master.update_redirect() migrate_record_metadata( Record.get_record(rec_pid.object_uuid), parent_pid ) # Migrate deposits deposit_pids = PersistentIdentifier.query.filter( PersistentIdentifier.pid_type == DepositUUIDProvider.pid_type, PersistentIdentifier.status == PIDStatus.REGISTERED, ).all() for dep_pid in deposit_pids: if verbose: click.secho(' deposit {}'.format(dep_pid.pid_value)) try: deposit = Deposit.get_record(dep_pid.object_uuid) if deposit['publication_state'] != \ PublicationStates.published.name: # The record is not published yet. Reserve the PID. rec_pid = RecordUUIDProvider.create( object_type='rec', pid_value=dep_pid.pid_value, ).pid # Create parent version PID parent_pid = RecordUUIDProvider.create().pid assert parent_pid version_master = PIDVersioning(parent=parent_pid) version_master.insert_draft_child(child=rec_pid) else: # Retrieve previously created version PID rec_pid = RecordUUIDProvider.get(dep_pid.pid_value).pid version_master = PIDVersioning(child=rec_pid) parent_pid = version_master.parent if not parent_pid: click.secho(' record {} was deleted, but the deposit has not been removed'.format(rec_pid.pid_value), fg='red') if parent_pid: migrate_record_metadata( Deposit.get_record(dep_pid.object_uuid), parent_pid ) except NoResultFound: # The deposit is deleted but not the PID. Fix it. dep_pid.status = PIDStatus.DELETED if verbose: click.secho('done migrating deposits.') RecordIndexer.index = old_index_fn