Example #1
0
def test_get_record_by_bucket(app, db, document_with_file):
    """Test retrieving a record with a given bucket."""
    # OK
    record = SonarRecord.get_record_by_bucket(document_with_file['_bucket'])
    assert record

    # Record bucket not found
    assert not SonarRecord.get_record_by_bucket(
        '9bca9173-2c7b-4e22-bd6d-46e4f972dbf89')

    # Not record class found
    app.config.get('RECORDS_REST_ENDPOINTS',
                   {}).get('doc', {}).pop('record_class', None)
    assert not SonarRecord.get_record_by_bucket(document_with_file['_bucket'])
    app.config['RECORDS_REST_ENDPOINTS']['doc'][
        'record_class'] = DocumentRecord

    # Persistent identifier not found
    pid = PersistentIdentifier.get('doc', document_with_file['pid'])
    db.session.delete(pid)
    db.session.commit()
    pid = PersistentIdentifier.get('oai',
                                   'oai:sonar.ch:' + document_with_file['pid'])
    db.session.delete(pid)
    db.session.commit()
    assert not SonarRecord.get_record_by_bucket(document_with_file['_bucket'])
Example #2
0
def test_get_pid_by_ref_link(app):
    """Test resolving PID by the given reference link."""
    with pytest.raises(Exception) as e:
        SonarRecord.get_pid_by_ref_link('falsy-link')
    assert str(e.value) == 'falsy-link is not a valid ref link'

    link = url_for('invenio_records_rest.doc_item',
                   _external=True,
                   pid_value='10000')

    pid = SonarRecord.get_pid_by_ref_link(link)
    assert pid == '10000'
Example #3
0
def update_oai_property(sender, record):
    """Called when a document is created or updated.

    Update `_oai` property of the record.

    :param sender: Sender
    :param record: Document record
    """
    if not isinstance(record, DocumentRecord):
        return

    sets = []
    for organisation in record.get('organisation', []):
        sets.append(SonarRecord.get_pid_by_ref_link(organisation['$ref']))

    record['_oai'].update({
        'updated':
        pytz.utc.localize(datetime.utcnow()).isoformat(),
        'sets':
        sets
    })

    # Store the value in `json` property, as it's not more called during object
    # creation. https://github.com/inveniosoftware/invenio-records/commit/ab7fdc10ddf54249dde8bc968f98b1fdd633610f#diff-51263e1ef21bcc060a5163632df055ef67ac3e3b2e222930649c13865cffa5aeR171
    record.model.json = record.model_cls.encode(dict(record))
Example #4
0
def sync_record_files(file, deleted=False):
    """Sync files in record corresponding to bucket.

    :param file: File object
    :param delete: Wether file is deleted or not.
    """
    record = SonarRecord.get_record_by_bucket(file.bucket_id)

    if not record:
        return

    record.sync_files(file, deleted)
Example #5
0
def has_external_urls_for_files(record):
    """Check if files point to external website.

    :param record: Current record.
    :returns: True if record's organisation is configured to point files to an
    external URL.
    """
    for organisation in record.get('organisation', []):
        organisation_pid = SonarRecord.get_pid_by_ref_link(
            organisation['$ref']) if organisation.get(
                '$ref') else organisation['pid']

        return organisation_pid in current_app.config.get(
            'SONAR_DOCUMENTS_ORGANISATIONS_EXTERNAL_FILES')

    return False
Example #6
0
def update_oai_property(sender, record):
    """Called when a document is created or updated.

    Update `_oai` property of the record.

    :param sender: Sender
    :param record: Document record
    """
    if not isinstance(record, DocumentRecord):
        return

    record['_oai']['updated'] = pytz.utc.localize(
        datetime.utcnow()).isoformat()
    record['_oai']['sets'] = [
        SonarRecord.get_pid_by_ref_link(record['organisation']['$ref'])
    ] if record.get('organisation') else []
Example #7
0
    def add_validation_data(self, item, **kwargs):
        """Add validation data to record.

        :param item: Record item.
        :returns: The modified item.
        """
        if not item.get('validation'):
            item['validation'] = {
                'status': Status.IN_PROGRESS,
                'action': Action.SAVE
            }

        # Store user
        if not item['validation'].get('user'):
            item['validation']['user'] = {
                '$ref':
                SonarRecord.get_ref_link('users', current_user_record['pid'])
            }

        return item
Example #8
0
def test_get_record_class_by_pid_type(app):
    """Test get record class by PID type."""
    record = SonarRecord.get_record_class_by_pid_type('doc')
    assert record.__name__ == 'DocumentRecord'
Example #9
0
def export(pid_type, serializer_key, output_dir):
    """Export records for the given record type.

    :param pid_type: record type
    :param output_dir: Output directory
    """
    click.secho('Export "{pid_type}" records in {dir}'.format(
        pid_type=pid_type, dir=output_dir.name))

    try:
        # Get the correct record class
        record_class = SonarRecord.get_record_class_by_pid_type(pid_type)

        if not record_class:
            raise Exception('No record class found for type "{type}"'.format(
                type=pid_type))

        # Load the serializer
        serializer_class = current_app.config.get(
            'SONAR_APP_EXPORT_SERIALIZERS', {}).get(pid_type)

        if serializer_class:
            serializer = obj_or_import_string(serializer_class)()
        else:
            serializer = None

        pids = record_class.get_all_pids()
        records = []

        # Create ouptut directory if not exists
        if pids:
            pathlib.Path(output_dir.name).mkdir(mode=0o755,
                                                parents=True,
                                                exist_ok=True)

        for pid in pids:
            record = record_class.get_record_by_pid(pid)

            if serializer:
                record = serializer.dump(record)
            else:
                record = record.dumps()

            for file in record.get('files', []):
                if file.get('uri'):
                    target_path = join(output_dir.name, pid, file['key'])
                    pathlib.Path(dirname(target_path)).mkdir(mode=0o755,
                                                             parents=True,
                                                             exist_ok=True)
                    shutil.copyfile(file['uri'], target_path)
                    file.pop('uri')
                    file['path'] = './{pid}/{key}'.format(pid=pid,
                                                          key=file['key'])

            records.append(record)

        if records:
            # Write data
            output_file = join(output_dir.name, 'data.json')
            f = open(output_file, 'w')
            f.write(json.dumps(records))
            f.close()

        click.secho('Finished', fg='green')

    except Exception as exception:
        click.secho('An error occured during export: {error}'.format(
            error=str(exception)),
                    fg='red')
Example #10
0
    def create_document(self):
        """Create document from deposit."""
        # TODO : Do this whole process with a marshmallow schema serializer.
        metadata = {}

        # Organisation
        if current_user_record and current_user_record.get('organisation'):
            metadata['organisation'] = [current_user_record['organisation']]

        # Document type
        metadata['documentType'] = self['metadata']['documentType']

        # Language
        language = self['metadata'].get('language', 'eng')

        # Title
        metadata['title'] = [{
            'type':
            'bf:Title',
            'mainTitle': [{
                'language': language,
                'value': self['metadata']['title']
            }]
        }]

        # Subtitle
        if self['metadata'].get('subtitle'):
            metadata['title'][0]['subtitle'] = [{
                'language':
                language,
                'value':
                self['metadata']['subtitle']
            }]

        # Other title
        if self['metadata'].get('otherLanguageTitle', {}).get('title'):
            metadata['title'][0]['mainTitle'].append({
                'language':
                self['metadata']['otherLanguageTitle'].get(
                    'language', language),
                'value':
                self['metadata']['otherLanguageTitle']['title']
            })

        # Languages
        metadata['language'] = [{'value': language, 'type': 'bf:Language'}]

        # Document date
        metadata['provisionActivity'] = [{
            'type':
            'bf:Publication',
            'startDate':
            self['metadata']['documentDate']
        }]
        metadata['provisionActivity'][0]['statement'] = []

        # Publication place
        if self['metadata'].get('publicationPlace'):
            metadata['provisionActivity'][0]['statement'].append({
                'label': [{
                    'value': self['metadata']['publicationPlace']
                }],
                'type':
                'bf:Place'
            })

        # Publisher
        if self['metadata'].get('publisher'):
            metadata['provisionActivity'][0]['statement'].append({
                'label': [{
                    'value': self['metadata']['publisher']
                }],
                'type':
                'bf:Agent'
            })

        # Add a statement for date
        metadata['provisionActivity'][0]['statement'].append({
            'label': [{
                'value':
                self['metadata']['statementDate']
                if self['metadata'].get('statementDate') else
                self['metadata']['documentDate']
            }],
            'type':
            'Date'
        })

        # Published in
        if self['metadata'].get('publication'):
            year = self['metadata']['publication']['year'] if self['metadata'][
                'publication'].get(
                    'year') else self['metadata']['documentDate']
            part_of = {
                'numberingYear': year,
                'document': {
                    'title': self['metadata']['publication']['publishedIn']
                }
            }

            if self['metadata']['publication'].get('pages'):
                part_of['numberingPages'] = self['metadata']['publication'][
                    'pages']

            if self['metadata']['publication'].get('volume'):
                part_of['numberingVolume'] = self['metadata']['publication'][
                    'volume']

            if self['metadata']['publication'].get('number'):
                part_of['numberingIssue'] = self['metadata']['publication'][
                    'number']

            if self['metadata']['publication'].get('editors'):
                part_of['document']['contribution'] = self['metadata'][
                    'publication']['editors']

            if self['metadata']['publication'].get('publisher'):
                part_of['document']['publication'] = {
                    'statement': self['metadata']['publication']['publisher']
                }

            if self['metadata']['publication'].get('identifiedBy'):
                part_of['document']['identifiedBy'] = self['metadata'][
                    'publication']['identifiedBy']

            metadata['partOf'] = [part_of]

        # Other electronic versions
        if self['metadata'].get('otherElectronicVersions'):
            metadata['otherEdition'] = [{
                'document': {
                    'electronicLocator': link['url']
                },
                'publicNote': link['publicNote']
            } for link in self['metadata']['otherElectronicVersions']]

        # Collections
        if self['metadata'].get('collections'):
            collections = []
            for collection in self['metadata'].get('collections'):
                # Create a new project
                if not collection.get('$ref'):
                    data = collection.copy()
                    # Store organisation
                    data['organisation'] = current_user_record['organisation']
                    collection_record = CollectionRecord.create(data)
                    collection_record.reindex()
                    collection = {
                        '$ref':
                        SonarRecord.get_ref_link('collections',
                                                 collection_record['pid'])
                    }

                collections.append(collection)

            if collections:
                metadata['collections'] = collections

        # Classification
        if self['metadata'].get('classification'):
            metadata['classification'] = [{
                'type':
                'bf:ClassificationUdc',
                'classificationPortion':
                self['metadata']['classification']
            }]

        # Abstracts
        if self['metadata'].get('abstracts'):
            metadata['abstracts'] = [{
                'language':
                abstract.get('language', language),
                'value':
                abstract['abstract']
            } for abstract in self['metadata']['abstracts']]

        # Dissertation
        if self['metadata'].get('dissertation'):
            metadata['dissertation'] = self['metadata']['dissertation']

        # Subjects
        if self['metadata'].get('subjects'):
            metadata['subjects'] = [{
                'label': {
                    'language': subject.get('language', language),
                    'value': subject['subjects']
                }
            } for subject in self['metadata']['subjects']]

        # Identifiers
        identifiers = []
        if self['metadata'].get('identifiedBy'):
            for identifier in self['metadata']['identifiedBy']:
                data = {
                    'type': identifier['type'],
                    'value': identifier['value'],
                }

                if identifier.get('source'):
                    data['source'] = identifier['source']

                # Special for PMID
                if identifier['type'] == 'pmid':
                    data['source'] = 'PMID'
                    data['type'] = 'bf:Local'

                identifiers.append(data)

        if identifiers:
            metadata['identifiedBy'] = identifiers

        # Content note
        if self['metadata'].get('contentNote'):
            metadata['contentNote'] = self['metadata']['contentNote']

        # Extent
        if self['metadata'].get('extent'):
            metadata['extent'] = self['metadata']['extent']

        # Additional materials
        if self['metadata'].get('additionalMaterials'):
            metadata['additionalMaterials'] = self['metadata'][
                'additionalMaterials']

        # Formats
        if self['metadata'].get('formats'):
            metadata['formats'] = self['metadata']['formats']

        # Other material characteristics
        if self['metadata'].get('otherMaterialCharacteristics'):
            metadata['otherMaterialCharacteristics'] = self['metadata'][
                'otherMaterialCharacteristics']

        # Edition statement
        if self['metadata'].get('editionStatement'):
            metadata['editionStatement'] = self['metadata']['editionStatement']

        # Notes
        if self['metadata'].get('notes'):
            metadata['notes'] = self['metadata']['notes']

        # Series
        if self['metadata'].get('series'):
            metadata['series'] = self['metadata']['series']

        # Custom fields
        for field_number in range(1, 4):
            field = f'customField{field_number}'
            document_field = self['metadata'].get(field)
            if document_field:
                metadata[field] = document_field

        # Contributors
        contributors = []
        for contributor in self.get('contributors', []):
            data = {
                'agent': {
                    'type': 'bf:Person',
                    'preferred_name': contributor['name']
                },
                'role': [contributor['role']]
            }

            if contributor.get('date_of_birth'):
                 data['agent']['date_of_birth'] = contributor['date_of_birth']

            if contributor.get('date_of_death'):
                 data['agent']['date_of_death'] = contributor['date_of_death']

            if contributor.get('affiliation'):
                data['affiliation'] = contributor['affiliation']

            # ORCID for contributor
            if contributor.get('orcid'):
                data['agent']['identifiedBy'] = {
                    'type': 'bf:Local',
                    'source': 'ORCID',
                    'value': contributor['orcid']
                }

            contributors.append(data)

        if contributors:
            metadata['contribution'] = contributors

        # Projects
        if self.get('projects'):
            projects = []

            for project in self['projects']:
                # Create a new project
                if not project.get('$ref'):
                    data = project.copy()

                    # Store user
                    data['user'] = self['user']

                    # Store organisation
                    data['organisation'] = current_user_record['organisation']

                    project_record = sonar.service('projects').create(
                        g.identity, {'metadata': data})
                    project = {
                        '$ref':
                        SonarRecord.get_ref_link('projects',
                                                 project_record['id'])
                    }

                projects.append(project)

            if projects:
                metadata['projects'] = projects

        # License
        metadata['usageAndAccessPolicy'] = {
            'license': self['diffusion']['license']
        }

        # Open access status
        if self['diffusion'].get('oa_status'):
            metadata['oa_status'] = self['diffusion']['oa_status']

        # Subdivisions
        if self['diffusion'].get('subdivisions'):
            metadata['subdivisions'] = self['diffusion']['subdivisions']


        # Masked
        if self['diffusion'].get('masked') is not None:
            metadata['masked'] = self['diffusion']['masked']
        document = DocumentRecord.create(metadata,
                                         dbcommit=True,
                                         with_bucket=True)

        current_order = 2
        for file in self.files:
            with file.file.storage().open() as pdf_file:
                content = pdf_file.read()

                if file.get('category', 'main') == 'main':
                    order = 1
                else:
                    order = current_order
                    current_order += 1

                kwargs = {
                    'label': file.get('label', file['key']),
                    'order': order
                }

                if file.get('embargo', False) and file.get('embargoDate'):
                    kwargs['access'] = 'coar:c_f1cf'  # Embargoed access
                    kwargs['embargo_date'] = file['embargoDate']
                    kwargs['restricted_outside_organisation'] = file.get(
                        'exceptInOrganisation', False)

                document.add_file(content, file['key'], **kwargs)

        document.commit()
        document.reindex()

        self['document'] = {
            '$ref': DocumentRecord.get_ref_link('documents', document['pid'])
        }

        return document