Example #1
0
def test_create(app, document_json):
    """Test creating a record."""
    record = DocumentRecord.create(document_json)
    assert DocumentRecord.get_record_by_pid(
        record['pid'])['pid'] == record['pid']
    DocumentRecord.create(document_json, dbcommit=True)
    assert DocumentRecord.get_record_by_pid(
        record['pid'])['pid'] == record['pid']
Example #2
0
def test_create(app):
    """Test creating a record."""
    DocumentRecord.create({"pid": "1", "title": "The title of the record"})

    DocumentRecord.create({
        "pid": "2",
        "title": "The title of the record"
    },
                          dbcommit=True)
def test_missing_pids(app, es_clear, document_json):
    """Test missing PIDs."""
    document_json['pid'] = '1000'

    document = DocumentRecord.create(data=document_json, dbcommit=True)
    monitoring = DataIntegrityMonitoring()

    # Only in DB
    assert monitoring.missing_pids('doc') == {
        'db': ['1000'],
        'es': [],
        'es_double': []
    }

    # OK
    document.reindex()
    assert monitoring.missing_pids('doc') == {
        'db': [],
        'es': [],
        'es_double': []
    }

    # Only in ES
    document.delete()
    assert monitoring.missing_pids('doc') == {
        'db': [],
        'es': ['1000'],
        'es_double': []
    }

    # With deleted
    assert monitoring.missing_pids('doc', True) == {
        'db': [],
        'es': [],
        'es_double': []
    }

    # Duplicate
    document2 = DocumentRecord.create(data=document_json, dbcommit=True)
    document2.reindex()
    current_search.flush_and_refresh('documents')
    assert monitoring.missing_pids('doc') == {
        'db': [],
        'es': ['1000'],
        'es_double': ['1000']
    }

    # Index not configured
    app.config.get('RECORDS_REST_ENDPOINTS')['doc'].pop('search_index', None)
    with pytest.raises(Exception) as exception:
        monitoring.missing_pids('doc')
    assert str(
        exception.value) == 'No "search_index" configured for resource "doc"'
    app.config.get(
        'RECORDS_REST_ENDPOINTS')['doc']['search_index'] = 'documents'
Example #4
0
    def _make_document(organisation='org', with_file=False, pid=None):
        if organisation:
            make_organisation(organisation)
            document_json['organisation'] = {
                '$ref': 'https://sonar.ch/api/organisations/org'
            }

        if pid:
            document_json['pid'] = pid
        else:
            document_json.pop('pid', None)

        record = DocumentRecord.create(document_json,
                                       dbcommit=True,
                                       with_bucket=True)
        record.commit()
        db.session.commit()

        if with_file:
            with open(pdf_file, 'rb') as file:
                record.add_file(file.read(),
                                'test1.pdf',
                                order=1,
                                restricted='insitution',
                                embargo_date='2021-01-01')
                record.commit()

        db.session.commit()
        record.reindex()
        return record
Example #5
0
    def _make_document(organisation='org', with_file=False, pid=None):
        if organisation:
            make_organisation(organisation)
            document_json['organisation'] = [{
                '$ref':
                'https://sonar.ch/api/organisations/org'
            }]

        if pid:
            document_json['pid'] = pid
        else:
            document_json.pop('pid', None)
            document_json.pop('_oai', None)

        record = DocumentRecord.create(document_json,
                                       dbcommit=True,
                                       with_bucket=True)
        record.commit()
        db.session.commit()

        if with_file:
            with open(pdf_file, 'rb') as file:
                record.add_file(file.read(),
                                'test1.pdf',
                                order=1,
                                access='coar:c_f1cf',
                                restricted_outside_organisation=False,
                                embargo_date=embargo_date.isoformat())
                record.commit()

        db.session.commit()
        record.reindex()
        return record
Example #6
0
def test_dbcommit(app, document_json):
    """Test record commit to db."""
    record = DocumentRecord.create(document_json)
    record.dbcommit()

    assert DocumentRecord.get_record_by_pid(
        record['pid'])['pid'] == record['pid']
Example #7
0
def test_detail(app, client):
    """Test document detail page."""
    record = DocumentRecord.create({
        "title": "The title of the record"
    }, dbcommit=True)

    # assert isinstance(views.detail('1', record, ir='sonar'), str)
    assert client.get('/organization/sonar/documents/1').status_code == 200
Example #8
0
def test_get_record_by_pid(app, document_json):
    """Test get record by PID."""
    assert DocumentRecord.get_record_by_pid('not-existing') is None

    record = DocumentRecord.create(document_json)

    assert DocumentRecord.get_record_by_pid(
        record['pid'])['pid'] == record['pid']

    record.delete()

    assert DocumentRecord.get_record_by_pid(record['pid']) is None
Example #9
0
def test_get_record_by_pid(app):
    """Test get record by PID."""
    assert DocumentRecord.get_record_by_pid('ABCD') is None

    record = DocumentRecord.create({
        "pid": "ABCD",
        "title": "The title of the record"
    })

    assert DocumentRecord.get_record_by_pid('ABCD')['pid'] == 'ABCD'

    record.delete()

    assert DocumentRecord.get_record_by_pid('ABCD') is None
Example #10
0
def test_institution_resolver(client):
    """Test institution resolver."""
    InstitutionRecord.create({
        "pid": "usi",
        "name": "Università della Svizzera italiana"
    })

    record = DocumentRecord.create({
        "title": "The title of the record",
        "institution": {
            "$ref": "https://sonar.ch/api/institutions/usi"
        }
    })

    assert record.replace_refs().get('institution')['name'] == 'Università ' \
        'della Svizzera italiana'
Example #11
0
def test_index_record(client, db, document_json, superuser):
    """Test index a record."""
    login_user_via_session(client, email=superuser['email'])

    res = client.get(url_for('invenio_records_rest.doc_list'))
    assert res.status_code == 200
    total = res.json['hits']['total']['value']

    record = DocumentRecord.create(document_json, dbcommit=True)
    db.session.commit()

    indexer = DocumentIndexer()
    indexer.index(record)

    res = client.get(url_for('invenio_records_rest.doc_list'))
    assert res.status_code == 200
    assert res.json['hits']['total']['value'] == (total + 1)
Example #12
0
def test_reindex(app, db, client, document_json, superuser):
    """Test record reindex."""
    record = DocumentRecord.create(document_json)
    db.session.commit()

    indexer = DocumentIndexer()
    indexer.index(record)

    headers = [('Content-Type', 'application/json')]

    url = url_for('invenio_records_rest.doc_item', pid_value=record['pid'])

    login_user_via_session(client, email=superuser['email'])

    response = client.get(url, headers=headers)
    data = response.json

    assert response.status_code == 200
    assert data['metadata']['pid'] == record['pid']
Example #13
0
def minimal_document(db, bucket_location, organisation):
    record = DocumentRecord.create(
        {
            'pid':
            '1000',
            'title': [{
                'type':
                'bf:Title',
                'mainTitle': [{
                    'language': 'eng',
                    'value': 'Title of the document'
                }]
            }],
            'organisation': [{
                '$ref': 'https://sonar.ch/api/organisations/org'
            }]
        },
        dbcommit=True,
        with_bucket=True)
    record.commit()
    db.session.commit()
    return record
Example #14
0
    def create_document(self):
        """Create document from deposit."""
        metadata = {}

        # Organisation
        if current_user_record and current_user_record.get('organisation'):
            metadata['organisation'] = [current_user_record['organisation']]

        # Document type
        metadata['documentType'] = self['metadata']['documentType']

        # Language
        language = self['metadata'].get('language', 'eng')

        # Title
        metadata['title'] = [{
            'type':
            'bf:Title',
            'mainTitle': [{
                'language': language,
                'value': self['metadata']['title']
            }]
        }]

        # Subtitle
        if self['metadata'].get('subtitle'):
            metadata['title'][0]['subtitle'] = [{
                'language':
                language,
                'value':
                self['metadata']['subtitle']
            }]

        # Other title
        if self['metadata'].get('otherLanguageTitle', {}).get('title'):
            metadata['title'][0]['mainTitle'].append({
                'language':
                self['metadata']['otherLanguageTitle'].get(
                    'language', language),
                'value':
                self['metadata']['otherLanguageTitle']['title']
            })

        # Languages
        metadata['language'] = [{'value': language, 'type': 'bf:Language'}]

        # Document date
        metadata['provisionActivity'] = [{
            'type':
            'bf:Publication',
            'startDate':
            self['metadata']['documentDate']
        }]
        metadata['provisionActivity'][0]['statement'] = []

        # Publication place
        if self['metadata'].get('publicationPlace'):
            metadata['provisionActivity'][0]['statement'].append({
                'label': [{
                    'value': self['metadata']['publicationPlace']
                }],
                'type':
                'bf:Place'
            })

        # Publisher
        if self['metadata'].get('publisher'):
            metadata['provisionActivity'][0]['statement'].append({
                'label': [{
                    'value': self['metadata']['publisher']
                }],
                'type':
                'bf:Agent'
            })

        # Add a statement for date
        metadata['provisionActivity'][0]['statement'].append({
            'label': [{
                'value': self['metadata']['documentDate']
            }],
            'type':
            'Date'
        })

        # Published in
        if self['metadata'].get('publication'):
            part_of = {
                'numberingYear': self['metadata']['publication']['year'],
                'document': {
                    'title': self['metadata']['publication']['publishedIn']
                }
            }

            if self['metadata']['publication'].get('pages'):
                part_of['numberingPages'] = self['metadata']['publication'][
                    'pages']

            if self['metadata']['publication'].get('volume'):
                part_of['numberingVolume'] = self['metadata']['publication'][
                    'volume']

            if self['metadata']['publication'].get('number'):
                part_of['numberingIssue'] = self['metadata']['publication'][
                    'number']

            if self['metadata']['publication'].get('editors'):
                part_of['document']['contribution'] = self['metadata'][
                    'publication']['editors']

            if self['metadata']['publication'].get('publisher'):
                part_of['document']['publication'] = {
                    'statement': self['metadata']['publication']['publisher']
                }

            metadata['partOf'] = [part_of]

        # Other electronic versions
        if self['metadata'].get('otherElectronicVersions'):
            metadata['otherEdition'] = [{
                'document': {
                    'electronicLocator': link['url']
                },
                'publicNote': link['publicNote']
            } for link in self['metadata']['otherElectronicVersions']]

        # Specific collections
        if self['metadata'].get('specificCollections'):
            metadata['specificCollections'] = self['metadata'][
                'specificCollections']

        # Classification
        if self['metadata'].get('classification'):
            metadata['classification'] = [{
                'type':
                'bf:ClassificationUdc',
                'classificationPortion':
                self['metadata']['classification']
            }]

        # Abstracts
        if self['metadata'].get('abstracts'):
            metadata['abstracts'] = [{
                'language':
                abstract.get('language', language),
                'value':
                abstract['abstract']
            } for abstract in self['metadata']['abstracts']]

        # Dissertation
        if self['metadata'].get('dissertation'):
            metadata['dissertation'] = self['metadata']['dissertation']

        # Subjects
        if self['metadata'].get('subjects'):
            metadata['subjects'] = [{
                'label': {
                    'language': subject.get('language', language),
                    'value': subject['subjects']
                }
            } for subject in self['metadata']['subjects']]

        # Identifiers
        identifiers = []
        if self['metadata'].get('identifiedBy'):
            for identifier in self['metadata']['identifiedBy']:
                data = {
                    'type': identifier['type'],
                    'value': identifier['value'],
                }

                if identifier.get('source'):
                    data['source'] = identifier['source']

                # Special for PMID
                if identifier['type'] == 'pmid':
                    data['source'] = 'PMID'
                    data['type'] = 'bf:Local'

                identifiers.append(data)

        if identifiers:
            metadata['identifiedBy'] = identifiers

        # Contributors
        contributors = []
        for contributor in self.get('contributors', []):
            data = {
                'agent': {
                    'type': 'bf:Person',
                    'preferred_name': contributor['name']
                },
                'role': [contributor['role']]
            }

            if contributor.get('affiliation'):
                data['affiliation'] = contributor['affiliation']

            # ORCID for contributor
            if contributor.get('orcid'):
                data['agent']['identifiedBy'] = {
                    'type': 'bf:Local',
                    'source': 'ORCID',
                    'value': contributor['orcid']
                }

            contributors.append(data)

        if contributors:
            metadata['contribution'] = contributors

        # Projects
        if self.get('projects'):
            projects = []

            for project in self['projects']:
                # Create a new project
                if not project.get('$ref'):
                    data = project.copy()

                    # Store user
                    data['user'] = self['user']

                    # Store organisation
                    data['organisation'] = current_user_record['organisation']

                    # Project identifier
                    if project.get('identifier'):
                        data['identifiedBy'] = {
                            'type': 'bf:Identifier',
                            'value': project['identifier']
                        }
                        data.pop('identifier')

                    # Investigators
                    if project.get('investigators'):
                        data['investigators'] = []
                        for investigator in project['investigators']:
                            investigator_data = {
                                'agent': {
                                    'preferred_name': investigator['name']
                                },
                                'role': [investigator['role']],
                            }

                            if investigator.get('affiliation'):
                                investigator_data[
                                    'affiliation'] = investigator.get(
                                        'affiliation')

                            if investigator.get('orcid'):
                                investigator_data['identifiedBy'] = {
                                    'type': 'bf:Local',
                                    'source': 'ORCID',
                                    'value': investigator.get('orcid')
                                }

                            data['investigators'].append(investigator_data)

                    # Funding organisations
                    if project.get('funding_organisations'):
                        data['funding_organisations'] = []
                        for funding_organisation in project[
                                'funding_organisations']:
                            funding_organisation_data = {
                                'agent': {
                                    'preferred_name':
                                    funding_organisation['name']
                                }
                            }

                            if funding_organisation.get('identifier'):
                                funding_organisation_data['identifiedBy'] = {
                                    'type': 'bf:Identifier',
                                    'value': funding_organisation['identifier']
                                }

                            data['funding_organisations'].append(
                                funding_organisation_data)

                    project_record = ProjectRecord.create(data, dbcommit=True)
                    project_record.reindex()
                    project = {
                        '$ref':
                        project_record.get_ref_link('projects',
                                                    project_record['pid'])
                    }

                projects.append(project)

            if projects:
                metadata['projects'] = projects

        # License
        metadata['usageAndAccessPolicy'] = {
            'license': self['diffusion']['license']
        }

        document = DocumentRecord.create(metadata,
                                         dbcommit=True,
                                         with_bucket=True)

        current_order = 2
        for file in self.files:
            with file.file.storage().open() as pdf_file:
                content = pdf_file.read()

                if file.get('category', 'main') == 'main':
                    order = 1
                else:
                    order = current_order
                    current_order += 1

                kwargs = {
                    'label': file.get('label', file['key']),
                    'order': order
                }

                if file.get('embargo', False) and file.get('embargoDate'):
                    kwargs['access'] = 'coar:c_f1cf'  # Embargoed access
                    kwargs['embargo_date'] = file['embargoDate']
                    kwargs['restricted_outside_organisation'] = file.get(
                        'exceptInOrganisation', False)

                document.add_file(content, file['key'], **kwargs)

        document.commit()
        document.reindex()

        self['document'] = {
            '$ref': DocumentRecord.get_ref_link('documents', document['pid'])
        }

        return document
Example #15
0
def test_dbcommit(app):
    """Test record commit to db."""
    record = DocumentRecord.create({"title": "The title of the record"})

    record.dbcommit()
Example #16
0
    def create_document(self):
        """Create document from deposit."""
        # TODO : Do this whole process with a marshmallow schema serializer.
        metadata = {}

        # Organisation
        if current_user_record and current_user_record.get('organisation'):
            metadata['organisation'] = [current_user_record['organisation']]

        # Document type
        metadata['documentType'] = self['metadata']['documentType']

        # Language
        language = self['metadata'].get('language', 'eng')

        # Title
        metadata['title'] = [{
            'type':
            'bf:Title',
            'mainTitle': [{
                'language': language,
                'value': self['metadata']['title']
            }]
        }]

        # Subtitle
        if self['metadata'].get('subtitle'):
            metadata['title'][0]['subtitle'] = [{
                'language':
                language,
                'value':
                self['metadata']['subtitle']
            }]

        # Other title
        if self['metadata'].get('otherLanguageTitle', {}).get('title'):
            metadata['title'][0]['mainTitle'].append({
                'language':
                self['metadata']['otherLanguageTitle'].get(
                    'language', language),
                'value':
                self['metadata']['otherLanguageTitle']['title']
            })

        # Languages
        metadata['language'] = [{'value': language, 'type': 'bf:Language'}]

        # Document date
        metadata['provisionActivity'] = [{
            'type':
            'bf:Publication',
            'startDate':
            self['metadata']['documentDate']
        }]
        metadata['provisionActivity'][0]['statement'] = []

        # Publication place
        if self['metadata'].get('publicationPlace'):
            metadata['provisionActivity'][0]['statement'].append({
                'label': [{
                    'value': self['metadata']['publicationPlace']
                }],
                'type':
                'bf:Place'
            })

        # Publisher
        if self['metadata'].get('publisher'):
            metadata['provisionActivity'][0]['statement'].append({
                'label': [{
                    'value': self['metadata']['publisher']
                }],
                'type':
                'bf:Agent'
            })

        # Add a statement for date
        metadata['provisionActivity'][0]['statement'].append({
            'label': [{
                'value':
                self['metadata']['statementDate']
                if self['metadata'].get('statementDate') else
                self['metadata']['documentDate']
            }],
            'type':
            'Date'
        })

        # Published in
        if self['metadata'].get('publication'):
            year = self['metadata']['publication']['year'] if self['metadata'][
                'publication'].get(
                    'year') else self['metadata']['documentDate']
            part_of = {
                'numberingYear': year,
                'document': {
                    'title': self['metadata']['publication']['publishedIn']
                }
            }

            if self['metadata']['publication'].get('pages'):
                part_of['numberingPages'] = self['metadata']['publication'][
                    'pages']

            if self['metadata']['publication'].get('volume'):
                part_of['numberingVolume'] = self['metadata']['publication'][
                    'volume']

            if self['metadata']['publication'].get('number'):
                part_of['numberingIssue'] = self['metadata']['publication'][
                    'number']

            if self['metadata']['publication'].get('editors'):
                part_of['document']['contribution'] = self['metadata'][
                    'publication']['editors']

            if self['metadata']['publication'].get('publisher'):
                part_of['document']['publication'] = {
                    'statement': self['metadata']['publication']['publisher']
                }

            if self['metadata']['publication'].get('identifiedBy'):
                part_of['document']['identifiedBy'] = self['metadata'][
                    'publication']['identifiedBy']

            metadata['partOf'] = [part_of]

        # Other electronic versions
        if self['metadata'].get('otherElectronicVersions'):
            metadata['otherEdition'] = [{
                'document': {
                    'electronicLocator': link['url']
                },
                'publicNote': link['publicNote']
            } for link in self['metadata']['otherElectronicVersions']]

        # Collections
        if self['metadata'].get('collections'):
            collections = []
            for collection in self['metadata'].get('collections'):
                # Create a new project
                if not collection.get('$ref'):
                    data = collection.copy()
                    # Store organisation
                    data['organisation'] = current_user_record['organisation']
                    collection_record = CollectionRecord.create(data)
                    collection_record.reindex()
                    collection = {
                        '$ref':
                        SonarRecord.get_ref_link('collections',
                                                 collection_record['pid'])
                    }

                collections.append(collection)

            if collections:
                metadata['collections'] = collections

        # Classification
        if self['metadata'].get('classification'):
            metadata['classification'] = [{
                'type':
                'bf:ClassificationUdc',
                'classificationPortion':
                self['metadata']['classification']
            }]

        # Abstracts
        if self['metadata'].get('abstracts'):
            metadata['abstracts'] = [{
                'language':
                abstract.get('language', language),
                'value':
                abstract['abstract']
            } for abstract in self['metadata']['abstracts']]

        # Dissertation
        if self['metadata'].get('dissertation'):
            metadata['dissertation'] = self['metadata']['dissertation']

        # Subjects
        if self['metadata'].get('subjects'):
            metadata['subjects'] = [{
                'label': {
                    'language': subject.get('language', language),
                    'value': subject['subjects']
                }
            } for subject in self['metadata']['subjects']]

        # Identifiers
        identifiers = []
        if self['metadata'].get('identifiedBy'):
            for identifier in self['metadata']['identifiedBy']:
                data = {
                    'type': identifier['type'],
                    'value': identifier['value'],
                }

                if identifier.get('source'):
                    data['source'] = identifier['source']

                # Special for PMID
                if identifier['type'] == 'pmid':
                    data['source'] = 'PMID'
                    data['type'] = 'bf:Local'

                identifiers.append(data)

        if identifiers:
            metadata['identifiedBy'] = identifiers

        # Content note
        if self['metadata'].get('contentNote'):
            metadata['contentNote'] = self['metadata']['contentNote']

        # Extent
        if self['metadata'].get('extent'):
            metadata['extent'] = self['metadata']['extent']

        # Additional materials
        if self['metadata'].get('additionalMaterials'):
            metadata['additionalMaterials'] = self['metadata'][
                'additionalMaterials']

        # Formats
        if self['metadata'].get('formats'):
            metadata['formats'] = self['metadata']['formats']

        # Other material characteristics
        if self['metadata'].get('otherMaterialCharacteristics'):
            metadata['otherMaterialCharacteristics'] = self['metadata'][
                'otherMaterialCharacteristics']

        # Edition statement
        if self['metadata'].get('editionStatement'):
            metadata['editionStatement'] = self['metadata']['editionStatement']

        # Notes
        if self['metadata'].get('notes'):
            metadata['notes'] = self['metadata']['notes']

        # Series
        if self['metadata'].get('series'):
            metadata['series'] = self['metadata']['series']

        # Custom fields
        for field_number in range(1, 4):
            field = f'customField{field_number}'
            document_field = self['metadata'].get(field)
            if document_field:
                metadata[field] = document_field

        # Contributors
        contributors = []
        for contributor in self.get('contributors', []):
            data = {
                'agent': {
                    'type': 'bf:Person',
                    'preferred_name': contributor['name']
                },
                'role': [contributor['role']]
            }

            if contributor.get('date_of_birth'):
                 data['agent']['date_of_birth'] = contributor['date_of_birth']

            if contributor.get('date_of_death'):
                 data['agent']['date_of_death'] = contributor['date_of_death']

            if contributor.get('affiliation'):
                data['affiliation'] = contributor['affiliation']

            # ORCID for contributor
            if contributor.get('orcid'):
                data['agent']['identifiedBy'] = {
                    'type': 'bf:Local',
                    'source': 'ORCID',
                    'value': contributor['orcid']
                }

            contributors.append(data)

        if contributors:
            metadata['contribution'] = contributors

        # Projects
        if self.get('projects'):
            projects = []

            for project in self['projects']:
                # Create a new project
                if not project.get('$ref'):
                    data = project.copy()

                    # Store user
                    data['user'] = self['user']

                    # Store organisation
                    data['organisation'] = current_user_record['organisation']

                    project_record = sonar.service('projects').create(
                        g.identity, {'metadata': data})
                    project = {
                        '$ref':
                        SonarRecord.get_ref_link('projects',
                                                 project_record['id'])
                    }

                projects.append(project)

            if projects:
                metadata['projects'] = projects

        # License
        metadata['usageAndAccessPolicy'] = {
            'license': self['diffusion']['license']
        }

        # Open access status
        if self['diffusion'].get('oa_status'):
            metadata['oa_status'] = self['diffusion']['oa_status']

        # Subdivisions
        if self['diffusion'].get('subdivisions'):
            metadata['subdivisions'] = self['diffusion']['subdivisions']


        # Masked
        if self['diffusion'].get('masked') is not None:
            metadata['masked'] = self['diffusion']['masked']
        document = DocumentRecord.create(metadata,
                                         dbcommit=True,
                                         with_bucket=True)

        current_order = 2
        for file in self.files:
            with file.file.storage().open() as pdf_file:
                content = pdf_file.read()

                if file.get('category', 'main') == 'main':
                    order = 1
                else:
                    order = current_order
                    current_order += 1

                kwargs = {
                    'label': file.get('label', file['key']),
                    'order': order
                }

                if file.get('embargo', False) and file.get('embargoDate'):
                    kwargs['access'] = 'coar:c_f1cf'  # Embargoed access
                    kwargs['embargo_date'] = file['embargoDate']
                    kwargs['restricted_outside_organisation'] = file.get(
                        'exceptInOrganisation', False)

                document.add_file(content, file['key'], **kwargs)

        document.commit()
        document.reindex()

        self['document'] = {
            '$ref': DocumentRecord.get_ref_link('documents', document['pid'])
        }

        return document
Example #17
0
def import_records(records_to_import):
    """Import records in database and index them.

    Used as celery task. "ignore_result" flag means that we don't want to
    get the status and/or the result of the task, execution is faster.

    :param list records_to_import: List of records to import.
    :returns: List of IDs.
    """
    indexer = RecordIndexer()

    ids = []

    for data in records_to_import:
        try:
            files_data = data.pop('files', [])

            record = DocumentRecord.get_record_by_identifier(
                data.get('identifiedBy', []))

            if not record:
                record = DocumentRecord.create(data,
                                               dbcommit=False,
                                               with_bucket=True)
            else:
                record.update(data)

            for file_data in files_data:
                # Store url and key and remove it from dict to pass dict to
                # kwargs in add_file_from_url method
                url = file_data.pop('url')
                key = file_data.pop('key')

                try:
                    record.add_file_from_url(url, key, **file_data)
                except Exception as exception:
                    current_app.logger.warning(
                        'Error during import of file {file} of record '
                        '{record}: {error}'.format(
                            file=key,
                            error=exception,
                            record=record['identifiedBy']))

            # Merge record in database, at this time it's not saved into DB.
            record.commit()

            # Pushing record to database, not yet persisted into DB
            db.session.flush()

            # Add ID for bulk index in elasticsearch
            ids.append(str(record.id))

            current_app.logger.info(
                'Record with reference "{reference}" imported successfully'.
                format(reference=record['identifiedBy']))

        except Exception as exception:
            current_app.logger.error(
                'Error during importation of record {record}: {exception}'.
                format(record=data, exception=exception))

    # Commit and index records
    db.session.commit()
    indexer.bulk_index(ids)
    indexer.process_bulk_queue()

    return ids
Example #18
0
File: api.py Project: weblate/sonar
    def create_document(self):
        """Create document from deposit."""
        metadata = {}

        # Organisation
        if current_user_record and current_user_record.get('organisation'):
            metadata['organisation'] = current_user_record['organisation']

        # Document type
        metadata['documentType'] = self['metadata']['documentType']

        # Language
        language = self['metadata'].get('language', 'eng')

        # Title
        metadata['title'] = [{
            'type':
            'bf:Title',
            'mainTitle': [{
                'language': language,
                'value': self['metadata']['title']
            }]
        }]

        # Subtitle
        if self['metadata'].get('subtitle'):
            metadata['title'][0]['subtitle'] = [{
                'language':
                language,
                'value':
                self['metadata']['subtitle']
            }]

        # Other title
        if self['metadata'].get('otherLanguageTitle', {}).get('title'):
            metadata['title'][0]['mainTitle'].append({
                'language':
                self['metadata']['otherLanguageTitle'].get(
                    'language', language),
                'value':
                self['metadata']['otherLanguageTitle']['title']
            })

        # Languages
        metadata['language'] = [{'value': language, 'type': 'bf:Language'}]

        # Document date
        if self['metadata'].get('documentDate'):
            metadata['provisionActivity'] = [{
                'type':
                'bf:Publication',
                'startDate':
                self['metadata']['documentDate']
            }]

        # Published in
        if self['metadata'].get('publication'):
            part_of = {
                'numberingYear': self['metadata']['publication']['year'],
                'document': {
                    'title': self['metadata']['publication']['publishedIn']
                }
            }

            if self['metadata']['publication'].get('pages'):
                part_of['numberingPages'] = self['metadata']['publication'][
                    'pages']

            if self['metadata']['publication'].get('volume'):
                part_of['numberingVolume'] = self['metadata']['publication'][
                    'volume']

            if self['metadata']['publication'].get('number'):
                part_of['numberingIssue'] = self['metadata']['publication'][
                    'number']

            if self['metadata']['publication'].get('editors'):
                part_of['document']['contribution'] = self['metadata'][
                    'publication']['editors']

            if self['metadata']['publication'].get('publisher'):
                part_of['document']['publication'] = {
                    'statement': self['metadata']['publication']['publisher']
                }

            metadata['partOf'] = [part_of]

        # Other electronic versions
        if self['metadata'].get('otherElectronicVersions'):
            metadata['otherEdition'] = [{
                'document': {
                    'electronicLocator': link['url']
                },
                'publicNote': link['publicNote']
            } for link in self['metadata']['otherElectronicVersions']]

        # Specific collections
        if self['metadata'].get('specificCollections'):
            metadata['specificCollections'] = self['metadata'][
                'specificCollections']

        # Classification
        if self['metadata'].get('classification'):
            metadata['classification'] = [{
                'type':
                'bf:ClassificationUdc',
                'classificationPortion':
                self['metadata']['classification']
            }]

        # Abstracts
        if self['metadata'].get('abstracts'):
            metadata['abstracts'] = [{
                'language':
                abstract.get('language', language),
                'value':
                abstract['abstract']
            } for abstract in self['metadata']['abstracts']]

        # Subjects
        if self['metadata'].get('subjects'):
            metadata['subjects'] = [{
                'label': {
                    'language': subject.get('language', language),
                    'value': subject['subjects']
                }
            } for subject in self['metadata']['subjects']]

        # Contributors
        contributors = []
        for contributor in self['contributors']:
            data = {
                'agent': {
                    'type': 'bf:Person',
                    'preferred_name': contributor['name']
                },
                'role': [contributor['role']],
                'affiliation': contributor.get('affiliation')
            }

            # ORCID for contributor
            if contributor.get('orcid'):
                data['agent']['identifiedBy'] = {
                    'type': 'bf:Doi',
                    'source': 'ORCID',
                    'value': contributor['orcid']
                }

            # Resolve controlled affiliations
            if data.get('affiliation'):
                affiliations = DocumentRecord.get_affiliations(
                    data['affiliation'])
                if affiliations:
                    data['controlledAffiliation'] = affiliations
            else:
                data.pop('affiliation', None)

            contributors.append(data)

        if contributors:
            metadata['contribution'] = contributors

        document = DocumentRecord.create(metadata,
                                         dbcommit=True,
                                         with_bucket=True)

        current_order = 2
        for file in self.files:
            with file.file.storage().open() as pdf_file:
                content = pdf_file.read()

                if file.get('category', 'main') == 'main':
                    order = 1
                else:
                    order = current_order
                    current_order += 1

                kwargs = {
                    'label': file.get('label', file['key']),
                    'order': order
                }

                if file.get('embargo', False) and file.get('embargoDate'):
                    kwargs['embargo_date'] = file['embargoDate']

                if file.get('exceptInOrganisation'):
                    kwargs['restricted'] = 'organisation'

                document.add_file(content, file['key'], **kwargs)

        document.commit()
        document.reindex()

        self['document'] = {
            '$ref': DocumentRecord.get_ref_link('documents', document['pid'])
        }

        return document