def test_dbcommit(app, document_json): """Test record commit to db.""" record = DocumentRecord.create(document_json) record.dbcommit() assert DocumentRecord.get_record_by_pid( record['pid'])['pid'] == record['pid']
def test_import_records(mock_record_by_identifier, app, document_json, bucket_location): """Test import records.""" files = [{'key': 'test.pdf', 'url': 'http://some.url/file.pdf'}] # Successful importing record mock_record_by_identifier.return_value = None document_json['files'] = files ids = import_records([document_json]) record = DocumentRecord.get_record(ids[0]) assert record assert record['harvested'] # Update mock_record_by_identifier.return_value = record ids = import_records([document_json]) assert DocumentRecord.get_record(ids[0]) # Error during importation of record def exception_side_effect(data): raise Exception("No record found for identifier") mock_record_by_identifier.side_effect = exception_side_effect ids = import_records([document_json]) assert not ids
def test_create(app, document_json): """Test creating a record.""" record = DocumentRecord.create(document_json) assert DocumentRecord.get_record_by_pid( record['pid'])['pid'] == record['pid'] DocumentRecord.create(document_json, dbcommit=True) assert DocumentRecord.get_record_by_pid( record['pid'])['pid'] == record['pid']
def test_create(app): """Test creating a record.""" DocumentRecord.create({"pid": "1", "title": "The title of the record"}) DocumentRecord.create({ "pid": "2", "title": "The title of the record" }, dbcommit=True)
def test_missing_pids(app, es_clear, document_json): """Test missing PIDs.""" document_json['pid'] = '1000' document = DocumentRecord.create(data=document_json, dbcommit=True) monitoring = DataIntegrityMonitoring() # Only in DB assert monitoring.missing_pids('doc') == { 'db': ['1000'], 'es': [], 'es_double': [] } # OK document.reindex() assert monitoring.missing_pids('doc') == { 'db': [], 'es': [], 'es_double': [] } # Only in ES document.delete() assert monitoring.missing_pids('doc') == { 'db': [], 'es': ['1000'], 'es_double': [] } # With deleted assert monitoring.missing_pids('doc', True) == { 'db': [], 'es': [], 'es_double': [] } # Duplicate document2 = DocumentRecord.create(data=document_json, dbcommit=True) document2.reindex() current_search.flush_and_refresh('documents') assert monitoring.missing_pids('doc') == { 'db': [], 'es': ['1000'], 'es_double': ['1000'] } # Index not configured app.config.get('RECORDS_REST_ENDPOINTS')['doc'].pop('search_index', None) with pytest.raises(Exception) as exception: monitoring.missing_pids('doc') assert str( exception.value) == 'No "search_index" configured for resource "doc"' app.config.get( 'RECORDS_REST_ENDPOINTS')['doc']['search_index'] = 'documents'
def test_get_record_by_pid(app, document_json): """Test get record by PID.""" assert DocumentRecord.get_record_by_pid('not-existing') is None record = DocumentRecord.create(document_json) assert DocumentRecord.get_record_by_pid( record['pid'])['pid'] == record['pid'] record.delete() assert DocumentRecord.get_record_by_pid(record['pid']) is None
def test_get_all_pids(app, document): """Test get all identifiers for a record type.""" result = list(DocumentRecord.get_all_pids()) assert result == ['1'] # with delete --> false document.delete() result = list(DocumentRecord.get_all_pids()) assert result == [] # with delete --> true result = list(DocumentRecord.get_all_pids(with_deleted=True)) assert result == ['1']
def test_get_record_by_pid(app): """Test get record by PID.""" assert DocumentRecord.get_record_by_pid('ABCD') is None record = DocumentRecord.create({ "pid": "ABCD", "title": "The title of the record" }) assert DocumentRecord.get_record_by_pid('ABCD')['pid'] == 'ABCD' record.delete() assert DocumentRecord.get_record_by_pid('ABCD') is None
def test_get_record_by_identifier(app, document): """Test getting record by its identifier.""" # Record found record = DocumentRecord.get_record_by_identifier([{ 'value': '111111', 'type': 'bf:Local' }]) assert record['pid'] == document['pid'] # Record not found record = DocumentRecord.get_record_by_identifier([{ 'value': 'oai:unknown', 'type': 'bf:Identifier' }]) assert not record
def _make_document(organisation='org', with_file=False, pid=None): if organisation: make_organisation(organisation) document_json['organisation'] = [{ '$ref': 'https://sonar.ch/api/organisations/org' }] if pid: document_json['pid'] = pid else: document_json.pop('pid', None) document_json.pop('_oai', None) record = DocumentRecord.create(document_json, dbcommit=True, with_bucket=True) record.commit() db.session.commit() if with_file: with open(pdf_file, 'rb') as file: record.add_file(file.read(), 'test1.pdf', order=1, access='coar:c_f1cf', restricted_outside_organisation=False, embargo_date=embargo_date.isoformat()) record.commit() db.session.commit() record.reindex() return record
def populate_fulltext_field(sender=None, record=None, json=None, index=None, **kwargs): """Receive a signal before record is indexed, to add fulltext. This function is called just before a record is sent to index. :param sender: Sender of the signal. :param Record record: Record to index. :param dict json: JSON that will be indexed. :param str index: Name of the index in which record will be sent. """ # Takes care only about documents indexing if not index.startswith('documents'): return # Transform record in DocumentRecord if not isinstance(record, DocumentRecord): record = DocumentRecord.get_record(record.id) # No files are present in record if not record.files: return # Store fulltext in array for indexing json['fulltext'] = [] for file in record.files: if file.get('type') == 'fulltext': with file.file.storage().open() as pdf_file: json['fulltext'].append(pdf_file.read().decode('utf-8'))
def test_get_affiliations(): """Test getting controlled affiliations.""" affiliation = ''' Institute for Research in Biomedicine (IRB), Faculty of Biomedical Sciences, Università della Svizzera italiana, Switzerland - Graduate School for Cellular and Biomedical Sciences, University of Bern, c/o Theodor Kocher Institute, Freiestrasse 1, P.O. Box 938, CH-3000 Bern 9, Switzerland ''' affiliations = DocumentRecord.get_affiliations(affiliation) assert affiliations == [ 'Uni of Bern and Hospital', 'Uni of Italian Switzerland' ] affiliations = DocumentRecord.get_affiliations(None) assert not affiliations
def marc21_to_contribution_field_100(self, key, value): """Extract contribution from field 100.""" if not value.get('a'): return None contribution = self.get('contribution', []) data = { 'agent': { 'type': 'bf:Person', 'preferred_name': value.get('a') }, 'role': ['cre'] } # Affiliation if value.get('u'): data['affiliation'] = value.get('u') affiliations = DocumentRecord.get_affiliations(value.get('u')) if affiliations: data['controlledAffiliation'] = affiliations # Date of birth - date of death date_of_birth, date_of_death = marc21tojson.extract_date(value.get('d')) if date_of_birth: data['agent']['date_of_birth'] = date_of_birth if date_of_death: data['agent']['date_of_death'] = date_of_death contribution.append(data) self['contribution'] = contribution return None
def _make_document(organisation='org', with_file=False, pid=None): if organisation: make_organisation(organisation) document_json['organisation'] = { '$ref': 'https://sonar.ch/api/organisations/org' } if pid: document_json['pid'] = pid else: document_json.pop('pid', None) record = DocumentRecord.create(document_json, dbcommit=True, with_bucket=True) record.commit() db.session.commit() if with_file: with open(pdf_file, 'rb') as file: record.add_file(file.read(), 'test1.pdf', order=1, restricted='insitution', embargo_date='2021-01-01') record.commit() db.session.commit() record.reindex() return record
def read(cls, user, record): """Read permission check. :param user: Current user record. :param recor: Record to check. :returns: True is action can be done. """ # Only for moderator users. if not user or not user.is_moderator: return False # Superuser is allowed. if user.is_superuser: return True document = DocumentRecord.get_record_by_pid(record['pid']) document = document.replace_refs() # For admin or moderators users, they can access only to their # organisation's documents. for organisation in document['organisation']: if current_organisation['pid'] == organisation['pid']: return True return False
def detail(pid_value, view='global'): """Document detail page.""" record = DocumentRecord.get_record_by_pid(pid_value) if not record or record.get('hiddenFromPublic'): abort(404) # Add restriction, link and thumbnail to files if record.get('_files'): # Check if organisation's record forces to point file to an external # url record['external_url'] = has_external_urls_for_files(record) populate_files_properties(record) # Import is here to avoid a circular reference error. from sonar.modules.documents.serializers import google_scholar_v1, \ schemaorg_v1 # Get schema org data schema_org_data = json.dumps( schemaorg_v1.transform_record(record['pid'], record)) # Get scholar data google_scholar_data = google_scholar_v1.transform_record( record['pid'], record) # Resolve $ref properties record = record.replace_refs() return render_template('documents/record.html', pid=pid_value, record=record, schema_org_data=schema_org_data, google_scholar_data=google_scholar_data)
def test_get_record_by_ref_link(app, document): """Test getting a record by a reference link.""" link = url_for('invenio_records_rest.doc_item', _external=True, pid_value=document['pid']) record = DocumentRecord.get_record_by_ref_link(link) assert record['pid'] == document['pid']
def test_detail(app, client): """Test document detail page.""" record = DocumentRecord.create({ "title": "The title of the record" }, dbcommit=True) # assert isinstance(views.detail('1', record, ir='sonar'), str) assert client.get('/organization/sonar/documents/1').status_code == 200
def detail(pid_value): """Document detail page.""" record = DocumentRecord.get_record_by_pid(pid_value) if not record: abort(404) return render_template('documents/record.html', pid=pid_value, record=record)
def dump(self, obj, *args, **kwargs): """Dump object. Override the parent method to add the documents linked to projects. It was not possible to use the `pre_dump` decorator, because `add_permission` need this property and we cannot be sure that this hook will be executed first. """ obj['documents'] = DocumentRecord.get_documents_by_project(obj['pid']) return super(ProjectMetadataSchemaV1, self).dump(obj, *args, **kwargs)
def delete(cls, user, record): """Delete permission check. :param user: Current user record. :param record: Record to check. :returns: True if action can be done. """ documents = DocumentRecord.get_documents_by_project(record['pid']) if documents: return False return cls.update(user, record)
def test_get_record_by_identifier(app, document): """Test getting record by its identifier.""" # Record found record = DocumentRecord.get_record_by_identifier([{ 'value': '111111', 'type': 'bf:Local', 'source': 'RERO DOC' }, { 'value': 'R003415713', 'type': 'bf:Local', 'source': 'RERO' }]) assert record['pid'] == document['pid'] # Not matching complete identifier record = DocumentRecord.get_record_by_identifier([{ 'value': '111111', 'type': 'bf:Local', 'source': 'Unmatching' }, { 'value': 'R003415713', 'type': 'bf:Local', 'source': 'RERO' }]) assert not record # Mixing identifier data record = DocumentRecord.get_record_by_identifier([{ 'value': 'R003415713', 'type': 'bf:Local', 'source': 'RERO DOC' }]) assert not record # Record not found, cause juste `bf:Doi` and `bf:Local` are analyzed. record = DocumentRecord.get_record_by_identifier([{ 'value': 'oai:unknown', 'type': 'bf:Identifier' }]) assert not record
def test_get_documents_by_project(db, project, document): """"Test getting documents by a project.""" document['projects'] = [{ '$ref': f'https://sonar.ch/api/projects/{project.id}' }] document.commit() document.reindex() db.session.commit() documents = DocumentRecord.get_documents_by_project(project.id) assert documents[0]['pid'] == document['pid'] assert documents[0][ 'permalink'] == f'http://localhost/global/documents/{document["pid"]}'
def test_institution_resolver(client): """Test institution resolver.""" InstitutionRecord.create({ "pid": "usi", "name": "Università della Svizzera italiana" }) record = DocumentRecord.create({ "title": "The title of the record", "institution": { "$ref": "https://sonar.ch/api/institutions/usi" } }) assert record.replace_refs().get('institution')['name'] == 'Università ' \ 'della Svizzera italiana'
def test_index_record(client, db, document_json, superuser): """Test index a record.""" login_user_via_session(client, email=superuser['email']) res = client.get(url_for('invenio_records_rest.doc_list')) assert res.status_code == 200 total = res.json['hits']['total']['value'] record = DocumentRecord.create(document_json, dbcommit=True) db.session.commit() indexer = DocumentIndexer() indexer.index(record) res = client.get(url_for('invenio_records_rest.doc_list')) assert res.status_code == 200 assert res.json['hits']['total']['value'] == (total + 1)
def detail(pid_value, view='global'): """Document detail page.""" record = DocumentRecord.get_record_by_pid(pid_value) if not record: abort(404) # Add restriction, link and thumbnail to files if record.get('_files'): # Check if organisation's record forces to point file to an external # url record['external_url'] = has_external_urls_for_files(record) populate_files_properties(record) return render_template('documents/record.html', pid=pid_value, record=record)
def test_file_listener(db, document_with_file): """Test file listener when file is modified.""" # Remove files document_with_file['_files'] = [] document_with_file.commit() db.session.commit() # Reload record record = DocumentRecord.get_record_by_pid(document_with_file['pid']) assert not record['_files'] object_version = ObjectVersion.get_by_bucket(document_with_file['_bucket']) file_uploaded_listener(object_version) assert len(document_with_file.files) == 3 object_version = ObjectVersion.get_by_bucket(document_with_file['_bucket']) file_deleted_listener(object_version)
def read(cls, user, record): """Read permission check. :param user: Current user record. :param record: Record to check. :returns: True is action can be done. """ # Superuser is allowed. if user and user.is_superuser: return True document = DocumentRecord.get_record_by_pid(record['pid']) document = document.replace_refs() # Moderator can read their own documents. if user and user.is_moderator: if document.has_organisation(current_organisation['pid']): return True return not document.is_masked
def enrich_document_data(sender=None, record=None, json=None, index=None, **kwargs): """Receive a signal before record is indexed, to add fulltext. This function is called just before a record is sent to index. :param sender: Sender of the signal. :param Record record: Record to index. :param dict json: JSON that will be indexed. :param str index: Name of the index in which record will be sent. """ # Takes care only about documents indexing if not index.startswith('documents'): return # Transform record in DocumentRecord if not isinstance(record, DocumentRecord): record = DocumentRecord.get_record(record.id) # Check if record is open access. json['isOpenAccess'] = record.is_open_access() # Compile allowed IPs in document if json.get('organisation'): if json['organisation'][0].get('allowedIps'): json['organisation'][0]['ips'] = get_ips_list( json['organisation'][0]['allowedIps'].split('\n')) else: json['organisation'][0]['ips'] = [] # No files are present in record if not record.files: return # Store fulltext in array for indexing json['fulltext'] = [] for file in record.files: if file.get('type') == 'fulltext': with file.file.storage().open() as pdf_file: json['fulltext'].append(pdf_file.read().decode('utf-8'))
def test_reindex(app, db, client, document_json, superuser): """Test record reindex.""" record = DocumentRecord.create(document_json) db.session.commit() indexer = DocumentIndexer() indexer.index(record) headers = [('Content-Type', 'application/json')] url = url_for('invenio_records_rest.doc_item', pid_value=record['pid']) login_user_via_session(client, email=superuser['email']) response = client.get(url, headers=headers) data = response.json assert response.status_code == 200 assert data['metadata']['pid'] == record['pid']