def test_get_record_by_bucket(app, db, document_with_file): """Test retrieving a record with a given bucket.""" # OK record = SonarRecord.get_record_by_bucket(document_with_file['_bucket']) assert record # Record bucket not found assert not SonarRecord.get_record_by_bucket( '9bca9173-2c7b-4e22-bd6d-46e4f972dbf89') # Not record class found app.config.get('RECORDS_REST_ENDPOINTS', {}).get('doc', {}).pop('record_class', None) assert not SonarRecord.get_record_by_bucket(document_with_file['_bucket']) app.config['RECORDS_REST_ENDPOINTS']['doc'][ 'record_class'] = DocumentRecord # Persistent identifier not found pid = PersistentIdentifier.get('doc', document_with_file['pid']) db.session.delete(pid) db.session.commit() pid = PersistentIdentifier.get('oai', 'oai:sonar.ch:' + document_with_file['pid']) db.session.delete(pid) db.session.commit() assert not SonarRecord.get_record_by_bucket(document_with_file['_bucket'])
def test_get_pid_by_ref_link(app): """Test resolving PID by the given reference link.""" with pytest.raises(Exception) as e: SonarRecord.get_pid_by_ref_link('falsy-link') assert str(e.value) == 'falsy-link is not a valid ref link' link = url_for('invenio_records_rest.doc_item', _external=True, pid_value='10000') pid = SonarRecord.get_pid_by_ref_link(link) assert pid == '10000'
def update_oai_property(sender, record): """Called when a document is created or updated. Update `_oai` property of the record. :param sender: Sender :param record: Document record """ if not isinstance(record, DocumentRecord): return sets = [] for organisation in record.get('organisation', []): sets.append(SonarRecord.get_pid_by_ref_link(organisation['$ref'])) record['_oai'].update({ 'updated': pytz.utc.localize(datetime.utcnow()).isoformat(), 'sets': sets }) # Store the value in `json` property, as it's not more called during object # creation. https://github.com/inveniosoftware/invenio-records/commit/ab7fdc10ddf54249dde8bc968f98b1fdd633610f#diff-51263e1ef21bcc060a5163632df055ef67ac3e3b2e222930649c13865cffa5aeR171 record.model.json = record.model_cls.encode(dict(record))
def sync_record_files(file, deleted=False): """Sync files in record corresponding to bucket. :param file: File object :param delete: Wether file is deleted or not. """ record = SonarRecord.get_record_by_bucket(file.bucket_id) if not record: return record.sync_files(file, deleted)
def has_external_urls_for_files(record): """Check if files point to external website. :param record: Current record. :returns: True if record's organisation is configured to point files to an external URL. """ for organisation in record.get('organisation', []): organisation_pid = SonarRecord.get_pid_by_ref_link( organisation['$ref']) if organisation.get( '$ref') else organisation['pid'] return organisation_pid in current_app.config.get( 'SONAR_DOCUMENTS_ORGANISATIONS_EXTERNAL_FILES') return False
def update_oai_property(sender, record): """Called when a document is created or updated. Update `_oai` property of the record. :param sender: Sender :param record: Document record """ if not isinstance(record, DocumentRecord): return record['_oai']['updated'] = pytz.utc.localize( datetime.utcnow()).isoformat() record['_oai']['sets'] = [ SonarRecord.get_pid_by_ref_link(record['organisation']['$ref']) ] if record.get('organisation') else []
def add_validation_data(self, item, **kwargs): """Add validation data to record. :param item: Record item. :returns: The modified item. """ if not item.get('validation'): item['validation'] = { 'status': Status.IN_PROGRESS, 'action': Action.SAVE } # Store user if not item['validation'].get('user'): item['validation']['user'] = { '$ref': SonarRecord.get_ref_link('users', current_user_record['pid']) } return item
def test_get_record_class_by_pid_type(app): """Test get record class by PID type.""" record = SonarRecord.get_record_class_by_pid_type('doc') assert record.__name__ == 'DocumentRecord'
def export(pid_type, serializer_key, output_dir): """Export records for the given record type. :param pid_type: record type :param output_dir: Output directory """ click.secho('Export "{pid_type}" records in {dir}'.format( pid_type=pid_type, dir=output_dir.name)) try: # Get the correct record class record_class = SonarRecord.get_record_class_by_pid_type(pid_type) if not record_class: raise Exception('No record class found for type "{type}"'.format( type=pid_type)) # Load the serializer serializer_class = current_app.config.get( 'SONAR_APP_EXPORT_SERIALIZERS', {}).get(pid_type) if serializer_class: serializer = obj_or_import_string(serializer_class)() else: serializer = None pids = record_class.get_all_pids() records = [] # Create ouptut directory if not exists if pids: pathlib.Path(output_dir.name).mkdir(mode=0o755, parents=True, exist_ok=True) for pid in pids: record = record_class.get_record_by_pid(pid) if serializer: record = serializer.dump(record) else: record = record.dumps() for file in record.get('files', []): if file.get('uri'): target_path = join(output_dir.name, pid, file['key']) pathlib.Path(dirname(target_path)).mkdir(mode=0o755, parents=True, exist_ok=True) shutil.copyfile(file['uri'], target_path) file.pop('uri') file['path'] = './{pid}/{key}'.format(pid=pid, key=file['key']) records.append(record) if records: # Write data output_file = join(output_dir.name, 'data.json') f = open(output_file, 'w') f.write(json.dumps(records)) f.close() click.secho('Finished', fg='green') except Exception as exception: click.secho('An error occured during export: {error}'.format( error=str(exception)), fg='red')
def create_document(self): """Create document from deposit.""" # TODO : Do this whole process with a marshmallow schema serializer. metadata = {} # Organisation if current_user_record and current_user_record.get('organisation'): metadata['organisation'] = [current_user_record['organisation']] # Document type metadata['documentType'] = self['metadata']['documentType'] # Language language = self['metadata'].get('language', 'eng') # Title metadata['title'] = [{ 'type': 'bf:Title', 'mainTitle': [{ 'language': language, 'value': self['metadata']['title'] }] }] # Subtitle if self['metadata'].get('subtitle'): metadata['title'][0]['subtitle'] = [{ 'language': language, 'value': self['metadata']['subtitle'] }] # Other title if self['metadata'].get('otherLanguageTitle', {}).get('title'): metadata['title'][0]['mainTitle'].append({ 'language': self['metadata']['otherLanguageTitle'].get( 'language', language), 'value': self['metadata']['otherLanguageTitle']['title'] }) # Languages metadata['language'] = [{'value': language, 'type': 'bf:Language'}] # Document date metadata['provisionActivity'] = [{ 'type': 'bf:Publication', 'startDate': self['metadata']['documentDate'] }] metadata['provisionActivity'][0]['statement'] = [] # Publication place if self['metadata'].get('publicationPlace'): metadata['provisionActivity'][0]['statement'].append({ 'label': [{ 'value': self['metadata']['publicationPlace'] }], 'type': 'bf:Place' }) # Publisher if self['metadata'].get('publisher'): metadata['provisionActivity'][0]['statement'].append({ 'label': [{ 'value': self['metadata']['publisher'] }], 'type': 'bf:Agent' }) # Add a statement for date metadata['provisionActivity'][0]['statement'].append({ 'label': [{ 'value': self['metadata']['statementDate'] if self['metadata'].get('statementDate') else self['metadata']['documentDate'] }], 'type': 'Date' }) # Published in if self['metadata'].get('publication'): year = self['metadata']['publication']['year'] if self['metadata'][ 'publication'].get( 'year') else self['metadata']['documentDate'] part_of = { 'numberingYear': year, 'document': { 'title': self['metadata']['publication']['publishedIn'] } } if self['metadata']['publication'].get('pages'): part_of['numberingPages'] = self['metadata']['publication'][ 'pages'] if self['metadata']['publication'].get('volume'): part_of['numberingVolume'] = self['metadata']['publication'][ 'volume'] if self['metadata']['publication'].get('number'): part_of['numberingIssue'] = self['metadata']['publication'][ 'number'] if self['metadata']['publication'].get('editors'): part_of['document']['contribution'] = self['metadata'][ 'publication']['editors'] if self['metadata']['publication'].get('publisher'): part_of['document']['publication'] = { 'statement': self['metadata']['publication']['publisher'] } if self['metadata']['publication'].get('identifiedBy'): part_of['document']['identifiedBy'] = self['metadata'][ 'publication']['identifiedBy'] metadata['partOf'] = [part_of] # Other electronic versions if self['metadata'].get('otherElectronicVersions'): metadata['otherEdition'] = [{ 'document': { 'electronicLocator': link['url'] }, 'publicNote': link['publicNote'] } for link in self['metadata']['otherElectronicVersions']] # Collections if self['metadata'].get('collections'): collections = [] for collection in self['metadata'].get('collections'): # Create a new project if not collection.get('$ref'): data = collection.copy() # Store organisation data['organisation'] = current_user_record['organisation'] collection_record = CollectionRecord.create(data) collection_record.reindex() collection = { '$ref': SonarRecord.get_ref_link('collections', collection_record['pid']) } collections.append(collection) if collections: metadata['collections'] = collections # Classification if self['metadata'].get('classification'): metadata['classification'] = [{ 'type': 'bf:ClassificationUdc', 'classificationPortion': self['metadata']['classification'] }] # Abstracts if self['metadata'].get('abstracts'): metadata['abstracts'] = [{ 'language': abstract.get('language', language), 'value': abstract['abstract'] } for abstract in self['metadata']['abstracts']] # Dissertation if self['metadata'].get('dissertation'): metadata['dissertation'] = self['metadata']['dissertation'] # Subjects if self['metadata'].get('subjects'): metadata['subjects'] = [{ 'label': { 'language': subject.get('language', language), 'value': subject['subjects'] } } for subject in self['metadata']['subjects']] # Identifiers identifiers = [] if self['metadata'].get('identifiedBy'): for identifier in self['metadata']['identifiedBy']: data = { 'type': identifier['type'], 'value': identifier['value'], } if identifier.get('source'): data['source'] = identifier['source'] # Special for PMID if identifier['type'] == 'pmid': data['source'] = 'PMID' data['type'] = 'bf:Local' identifiers.append(data) if identifiers: metadata['identifiedBy'] = identifiers # Content note if self['metadata'].get('contentNote'): metadata['contentNote'] = self['metadata']['contentNote'] # Extent if self['metadata'].get('extent'): metadata['extent'] = self['metadata']['extent'] # Additional materials if self['metadata'].get('additionalMaterials'): metadata['additionalMaterials'] = self['metadata'][ 'additionalMaterials'] # Formats if self['metadata'].get('formats'): metadata['formats'] = self['metadata']['formats'] # Other material characteristics if self['metadata'].get('otherMaterialCharacteristics'): metadata['otherMaterialCharacteristics'] = self['metadata'][ 'otherMaterialCharacteristics'] # Edition statement if self['metadata'].get('editionStatement'): metadata['editionStatement'] = self['metadata']['editionStatement'] # Notes if self['metadata'].get('notes'): metadata['notes'] = self['metadata']['notes'] # Series if self['metadata'].get('series'): metadata['series'] = self['metadata']['series'] # Custom fields for field_number in range(1, 4): field = f'customField{field_number}' document_field = self['metadata'].get(field) if document_field: metadata[field] = document_field # Contributors contributors = [] for contributor in self.get('contributors', []): data = { 'agent': { 'type': 'bf:Person', 'preferred_name': contributor['name'] }, 'role': [contributor['role']] } if contributor.get('date_of_birth'): data['agent']['date_of_birth'] = contributor['date_of_birth'] if contributor.get('date_of_death'): data['agent']['date_of_death'] = contributor['date_of_death'] if contributor.get('affiliation'): data['affiliation'] = contributor['affiliation'] # ORCID for contributor if contributor.get('orcid'): data['agent']['identifiedBy'] = { 'type': 'bf:Local', 'source': 'ORCID', 'value': contributor['orcid'] } contributors.append(data) if contributors: metadata['contribution'] = contributors # Projects if self.get('projects'): projects = [] for project in self['projects']: # Create a new project if not project.get('$ref'): data = project.copy() # Store user data['user'] = self['user'] # Store organisation data['organisation'] = current_user_record['organisation'] project_record = sonar.service('projects').create( g.identity, {'metadata': data}) project = { '$ref': SonarRecord.get_ref_link('projects', project_record['id']) } projects.append(project) if projects: metadata['projects'] = projects # License metadata['usageAndAccessPolicy'] = { 'license': self['diffusion']['license'] } # Open access status if self['diffusion'].get('oa_status'): metadata['oa_status'] = self['diffusion']['oa_status'] # Subdivisions if self['diffusion'].get('subdivisions'): metadata['subdivisions'] = self['diffusion']['subdivisions'] # Masked if self['diffusion'].get('masked') is not None: metadata['masked'] = self['diffusion']['masked'] document = DocumentRecord.create(metadata, dbcommit=True, with_bucket=True) current_order = 2 for file in self.files: with file.file.storage().open() as pdf_file: content = pdf_file.read() if file.get('category', 'main') == 'main': order = 1 else: order = current_order current_order += 1 kwargs = { 'label': file.get('label', file['key']), 'order': order } if file.get('embargo', False) and file.get('embargoDate'): kwargs['access'] = 'coar:c_f1cf' # Embargoed access kwargs['embargo_date'] = file['embargoDate'] kwargs['restricted_outside_organisation'] = file.get( 'exceptInOrganisation', False) document.add_file(content, file['key'], **kwargs) document.commit() document.reindex() self['document'] = { '$ref': DocumentRecord.get_ref_link('documents', document['pid']) } return document