Esempio n. 1
0
def b2share_deposit_uuid_minter(record_uuid, data):
    """Mint deposit's PID."""
    dep_pid = DepositUUIDProvider.create(
        object_type='rec', object_uuid=record_uuid,
        # we reuse the deposit UUID as PID value. This makes the demo easier.
        pid_value=record_uuid.hex
    )

    # this change is done to keep the external_pids info for the new versions
    if '_deposit' in data and 'external_pids' in data['_deposit']:
        data['_deposit'].update({
            'id': dep_pid.pid.pid_value,
            # FIXME: do not set the status once it is done by invenio-deposit API
            'status': 'draft',
        })
    else:
        data['_deposit'] = {
            'id': dep_pid.pid.pid_value,
            # FIXME: do not set the status once it is done by invenio-deposit API
            'status': 'draft',
        }

    # reserve the record PID
    RecordUUIDProvider.create(
        object_type='rec',
        pid_value=dep_pid.pid.pid_value
    )

    return dep_pid.pid
Esempio n. 2
0
def b2share_deposit_uuid_minter(record_uuid, data):
    """Mint deposit's PID."""
    dep_pid = DepositUUIDProvider.create(
        object_type='rec', object_uuid=record_uuid,
        # we reuse the deposit UUID as PID value. This makes the demo easier.
        pid_value=record_uuid.hex
    )

    # this change is done to keep the external_pids info for the new versions
    if '_deposit' in data and 'external_pids' in data['_deposit']:
        data['_deposit'].update({
            'id': dep_pid.pid.pid_value,
            # FIXME: do not set the status once it is done by invenio-deposit API
            'status': 'draft',
        })
    else:
        data['_deposit'] = {
            'id': dep_pid.pid.pid_value,
            # FIXME: do not set the status once it is done by invenio-deposit API
            'status': 'draft',
        }

    from b2share.modules.records.providers import RecordUUIDProvider

    # reserve the record PID
    RecordUUIDProvider.create(
        object_type='rec',
        pid_value=dep_pid.pid.pid_value
    )

    return dep_pid.pid
Esempio n. 3
0
def find_version_master_and_previous_record(version_of):
    """Retrieve the PIDNodeVersioning and previous record of a record PID.

    :params version_of: record PID.
    """

    from b2share.modules.records.providers import RecordUUIDProvider
    from b2share.modules.records.utils import is_publication

    try:
        child_pid = RecordUUIDProvider.get(version_of).pid
        if child_pid.status == PIDStatus.DELETED:
            raise RecordNotFoundVersioningError()
    except PIDDoesNotExistError as e:
        raise RecordNotFoundVersioningError() from e

    parent_pid = PIDNodeVersioning(pid=child_pid).parents.first()
    version_master = PIDNodeVersioning(pid=parent_pid)

    prev_pid = version_master.last_child
    assert prev_pid.pid_type == RecordUUIDProvider.pid_type
    prev_version = Record.get_record(prev_pid.object_uuid)
    # check that version_of references the last version of a record
    assert is_publication(prev_version.model)
    if prev_pid.pid_value != version_of:
        raise IncorrectRecordVersioningError(prev_pid.pid_value)
    return version_master, prev_version
Esempio n. 4
0
    def delete(self):
        """Delete a deposit."""
        deposit_pid = self.pid
        pid_value = deposit_pid.pid_value
        record_pid = RecordUUIDProvider.get(pid_value).pid
        version_master = PIDNodeVersioning(child=record_pid)
        # every deposit has a parent version after the 2.1.0 upgrade
        # except deleted ones. We check the parent version in case of a delete
        # revert.
        assert version_master is not None, 'Unexpected deposit without versioning.'
        # if the record is unpublished hard delete it
        if record_pid.status == PIDStatus.RESERVED:
            version_master.remove_draft_child()
            db.session.delete(record_pid)
        # if the parent doesn't have any published records hard delete it
        if version_master.parent.status == PIDStatus.RESERVED:
            db.session.delete(version_master.parent)
        deposit_pid.delete()

        # delete all buckets linked to the deposit
        res = Bucket.query.join(RecordsBuckets).\
            filter(RecordsBuckets.bucket_id == Bucket.id,
                   RecordsBuckets.record_id == self.id).all()

        # remove the deposit from ES
        self.indexer.delete(self)

        # we call the super of Invenio deposit instead of B2Share deposit as
        # Invenio deposit doesn't support the deletion of published deposits
        super(InvenioDeposit, self).delete(force=True)

        for bucket in res:
            bucket.locked = False
            bucket.remove()
Esempio n. 5
0
    def delete(self):
        """Delete a deposit."""
        deposit_pid = self.pid
        pid_value = deposit_pid.pid_value
        record_pid = RecordUUIDProvider.get(pid_value).pid
        version_master = PIDVersioning(child=record_pid)
        # every deposit has a parent version after the 2.1.0 upgrade
        # except deleted ones. We check the parent version in case of a delete
        # revert.
        assert version_master is not None, 'Unexpected deposit without versioning.'
        # if the record is unpublished hard delete it
        if record_pid.status == PIDStatus.RESERVED:
            version_master.remove_draft_child()
            db.session.delete(record_pid)
        # if the parent doesn't have any published records hard delete it
        if version_master.parent.status == PIDStatus.RESERVED:
            db.session.delete(version_master.parent)
        deposit_pid.delete()

        # delete all buckets linked to the deposit
        res = Bucket.query.join(RecordsBuckets).\
            filter(RecordsBuckets.bucket_id == Bucket.id,
                   RecordsBuckets.record_id == self.id).all()

        # remove the deposit from ES
        self.indexer.delete(self)

        # we call the super of Invenio deposit instead of B2Share deposit as
        # Invenio deposit doesn't support the deletion of published deposits
        super(InvenioDeposit, self).delete(force=True)

        for bucket in res:
            bucket.locked = False
            bucket.remove()
Esempio n. 6
0
    def commit(self):
        """Store changes on current instance in database.

        This method extends the default implementation by publishing the
        deposition when 'publication_state' is set to 'published'.
        """
        if self.model is None or self.model.json is None:
            raise MissingModelError()

        # automatically make embargoed records private
        if self.get('embargo_date') and self.get('open_access'):
            if is_under_embargo(self):
                self['open_access'] = False

        if 'community' in self:
            try:
                community = Community.get(self['community'])
            except CommunityDoesNotExistError as e:
                raise InvalidDepositError('Community {} does not exist.'.format(
                    self['community'])) from e
            workflow = publication_workflows[community.publication_workflow]
            workflow(self.model, self)

        # publish the deposition if needed
        if (self['publication_state'] == PublicationStates.published.name
                # check invenio-deposit status so that we do not loop
                and self['_deposit']['status'] != PublicationStates.published.name):

            # Retrieve previous version in order to reindex it later.
            previous_version_pid = None
            # Save the previous "last" version for later use
            if self.versioning.parent.status == PIDStatus.REDIRECTED and \
                    self.versioning.has_children:
                previous_version_pid = self.versioning.last_child
                previous_version_uuid = str(RecordUUIDProvider.get(
                    previous_version_pid.pid_value
                ).pid.object_uuid)

            super(Deposit, self).publish()  # publish() already calls commit()
            # Register parent PID if necessary and update redirect
            self.versioning.update_redirect()
            # Reindex previous version. This is needed in order to update
            # the is_last_version flag
            if previous_version_pid is not None:
                self.indexer.index_by_id(previous_version_uuid)

            # save the action for later indexing
            if g:
                g.deposit_action = 'publish'
        else:
            super(Deposit, self).commit()
            if g:
                g.deposit_action = 'update-metadata'
        return self
Esempio n. 7
0
def b2share_deposit_uuid_minter(record_uuid, data):
    """Mint deposit's PID."""
    dep_pid = DepositUUIDProvider.create(
        object_type='rec', object_uuid=record_uuid,
        # we reuse the deposit UUID as PID value. This makes the demo easier.
        pid_value=record_uuid.hex
    )

    data['_deposit'] = {
        'id': dep_pid.pid.pid_value,
        # FIXME: do not set the status once it is done by invenio-deposit API
        'status': 'draft',
    }

    # reserve the record PID
    RecordUUIDProvider.create(
        object_type='rec',
        pid_value=dep_pid.pid.pid_value
    )

    return dep_pid.pid
Esempio n. 8
0
    def get(self, pid=None, **kwargs):
        """GET a list of record's versions."""
        record_endpoint = 'b2share_records_rest.{0}_item'.format(
            RecordUUIDProvider.pid_type)

        pid_value = request.view_args['pid_value']
        pid = RecordUUIDProvider.get(pid_value).pid
        pid_versioning = PIDVersioning(child=pid)
        if pid_versioning.is_child:
            # This is a record PID. Retrieve the parent versioning PID.
            version_parent_pid_value = pid_versioning.parent.pid_value
        else:
            # This is a parent versioning PID
            version_parent_pid_value = pid_value
        records = []
        child_pid_table = aliased(PersistentIdentifier)
        parent_pid_table = aliased(PersistentIdentifier)
        pids_and_meta = db.session.query(
            child_pid_table, RecordMetadata
        ).join(
            PIDRelation,
            PIDRelation.child_id == child_pid_table.id,
        ).join(
            parent_pid_table,
            PIDRelation.parent_id == parent_pid_table.id
        ).filter(
            parent_pid_table.pid_value == version_parent_pid_value,
            RecordMetadata.id == child_pid_table.object_uuid,
        ).order_by(RecordMetadata.created).all()

        for version_number, rec_pid_and_rec_meta in enumerate(pids_and_meta):
            rec_pid, rec_meta = rec_pid_and_rec_meta
            records.append({
                'version': version_number + 1,
                'id': str(rec_pid.pid_value),
                'url': url_for(record_endpoint,
                               pid_value=str(rec_pid.pid_value),
                               _external=True),
                'created': rec_meta.created,
                'updated': rec_meta.updated,
            })
        return {'versions': records}
Esempio n. 9
0
    def get(self, pid=None, **kwargs):
        """GET a list of record's versions."""
        record_endpoint = 'b2share_records_rest.{0}_item'.format(
            RecordUUIDProvider.pid_type)

        pid_value = request.view_args['pid_value']
        pid = RecordUUIDProvider.get(pid_value).pid
        pid_versioning = PIDVersioning(child=pid)
        if pid_versioning.is_child:
            # This is a record PID. Retrieve the parent versioning PID.
            version_parent_pid_value = pid_versioning.parent.pid_value
        else:
            # This is a parent versioning PID
            version_parent_pid_value = pid_value
        records = []
        child_pid_table = aliased(PersistentIdentifier)
        parent_pid_table = aliased(PersistentIdentifier)
        pids_and_meta = db.session.query(child_pid_table, RecordMetadata).join(
            PIDRelation,
            PIDRelation.child_id == child_pid_table.id,
        ).join(parent_pid_table,
               PIDRelation.parent_id == parent_pid_table.id).filter(
                   parent_pid_table.pid_value == version_parent_pid_value,
                   RecordMetadata.id == child_pid_table.object_uuid,
               ).order_by(RecordMetadata.created).all()
        for version_number, rec_pid_and_rec_meta in enumerate(pids_and_meta):
            rec_pid, rec_meta = rec_pid_and_rec_meta
            records.append({
                'version':
                version_number + 1,
                'id':
                str(rec_pid.pid_value),
                'url':
                url_for(record_endpoint,
                        pid_value=str(rec_pid.pid_value),
                        _external=True),
                'created':
                rec_meta.created,
                'updated':
                rec_meta.updated,
            })
        return {'versions': records}
Esempio n. 10
0
def check_handles(update, record_pid):
    """Allocate handles for a record and its files, if necessary."""
    rec_pid = RecordUUIDProvider.get(pid_value=record_pid).pid
    record = Record.get_record(rec_pid.object_uuid)
    record_updated = False

    pid_list = [
        p.get('value') for p in record['_pid'] if p.get('type') == 'ePIC_PID'
    ]
    if pid_list:
        click.secho('record {} already has a handle'.format(record_pid),
                    fg='green')
    else:
        click.secho('record {} has no handle'.format(record_pid), fg='red')
        if update:
            b2share_pid_minter(rec_pid, record)
            record_updated = True
            click.secho('    handle added to record', fg='green')
        else:
            click.secho('use -u argument to add a handle to the record')

    files_ok = True
    for f in record.get('_files', []):
        if f.get('ePIC_PID'):
            click.secho('file {} already has a handle'.format(f.get('key')),
                        fg='green')
        else:
            click.secho('file {} has no handle'.format(f.get('key')), fg='red')
            files_ok = False

    if update and not files_ok:
        create_file_pids(record)
        record_updated = True
        click.secho('    files updated with handles', fg='green')
    elif not update and not files_ok:
        click.secho('use -u argument to add handles to the files')

    if record_updated:
        record.commit()
        db.session.commit()
Esempio n. 11
0
def find_version_master_and_previous_record(version_of):
    """Retrieve the PIDVersioning and previous record of a record PID.

    :params version_of: record PID.
    """
    try:
        child_pid = RecordUUIDProvider.get(version_of).pid
        if child_pid.status == PIDStatus.DELETED:
            raise RecordNotFoundVersioningError()
    except PIDDoesNotExistError as e:
        raise RecordNotFoundVersioningError() from e

    version_master = PIDVersioning(child=child_pid)

    prev_pid = version_master.last_child
    assert prev_pid.pid_type == RecordUUIDProvider.pid_type
    prev_version = Record.get_record(prev_pid.object_uuid)
    # check that version_of references the last version of a record
    assert is_publication(prev_version.model)
    if prev_pid.pid_value != version_of:
        raise IncorrectRecordVersioningError(prev_pid.pid_value)
    return version_master, prev_version
Esempio n. 12
0
def check_handles(update, record_pid):
    """Allocate handles for a record and its files, if necessary."""
    rec_pid = RecordUUIDProvider.get(pid_value=record_pid).pid
    record = Record.get_record(rec_pid.object_uuid)
    record_updated = False

    pid_list = [p.get('value') for p in record['_pid']
                if p.get('type') == 'ePIC_PID']
    if pid_list:
        click.secho('record {} already has a handle'.format(record_pid), fg='green')
    else:
        click.secho('record {} has no handle'.format(record_pid), fg='red')
        if update:
            b2share_pid_minter(rec_pid, record)
            record_updated = True
            click.secho('    handle added to record', fg='green')
        else:
            click.secho('use -u argument to add a handle to the record')

    files_ok = True
    for f in record.get('_files', []):
        if f.get('ePIC_PID'):
            click.secho('file {} already has a handle'.format(f.get('key')), fg='green')
        else:
            click.secho('file {} has no handle'.format(f.get('key')), fg='red')
            files_ok = False

    if update and not files_ok:
        create_file_pids(record)
        record_updated = True
        click.secho('    files updated with handles', fg='green')
    elif not update and not files_ok:
         click.secho('use -u argument to add handles to the files')

    if record_updated:
        record.commit()
        db.session.commit()
def alembic_upgrade_database_data(alembic, verbose):
    """Migrate the database data from v2.0.0 to 2.1.0."""
    ### Add versioning PIDs ###
    # Reserve the record PID and versioning PID for unpublished deposits

    # Hack: disable record indexing during record migration
    from invenio_indexer.api import RecordIndexer
    old_index_fn = RecordIndexer.index
    RecordIndexer.index = lambda s, record: None

    if verbose:
        click.secho('migrating deposits and records...')
    with db.session.begin_nested():
        # Migrate published records
        records_pids = PersistentIdentifier.query.filter(
            PersistentIdentifier.pid_type == RecordUUIDProvider.pid_type,
            PersistentIdentifier.status == PIDStatus.REGISTERED,
        ).all()
        for rec_pid in records_pids:
            if verbose:
                click.secho('    record {}'.format(rec_pid.pid_value))
            try:
                record = Record.get_record(rec_pid.object_uuid)
            except NoResultFound:
                # The record is deleted but not the PID. Fix it.
                rec_pid.status = PIDStatus.DELETED
                continue
            # Create parent version PID
            parent_pid = RecordUUIDProvider.create().pid
            version_master = PIDVersioning(parent=parent_pid)
            version_master.insert_draft_child(child=rec_pid)
            version_master.update_redirect()
            migrate_record_metadata(
                Record.get_record(rec_pid.object_uuid),
                parent_pid
            )

        # Migrate deposits
        deposit_pids = PersistentIdentifier.query.filter(
            PersistentIdentifier.pid_type == DepositUUIDProvider.pid_type,
            PersistentIdentifier.status == PIDStatus.REGISTERED,
        ).all()
        for dep_pid in deposit_pids:
            if verbose:
                click.secho('    deposit {}'.format(dep_pid.pid_value))
            try:
                deposit = Deposit.get_record(dep_pid.object_uuid)

                if deposit['publication_state'] != \
                        PublicationStates.published.name:
                    # The record is not published yet. Reserve the PID.
                    rec_pid = RecordUUIDProvider.create(
                        object_type='rec',
                        pid_value=dep_pid.pid_value,
                    ).pid
                    # Create parent version PID
                    parent_pid = RecordUUIDProvider.create().pid
                    assert parent_pid
                    version_master = PIDVersioning(parent=parent_pid)
                    version_master.insert_draft_child(child=rec_pid)
                else:
                    # Retrieve previously created version PID
                    rec_pid = RecordUUIDProvider.get(dep_pid.pid_value).pid
                    version_master = PIDVersioning(child=rec_pid)
                    parent_pid = version_master.parent
                    if not parent_pid:
                        click.secho('    record {} was deleted, but the deposit has not been removed'.format(rec_pid.pid_value), fg='red')

                if parent_pid:
                    migrate_record_metadata(
                        Deposit.get_record(dep_pid.object_uuid),
                        parent_pid
                    )
            except NoResultFound:
                # The deposit is deleted but not the PID. Fix it.
                dep_pid.status = PIDStatus.DELETED


    if verbose:
        click.secho('done migrating deposits.')
    RecordIndexer.index = old_index_fn
Esempio n. 14
0
def alembic_upgrade_database_data(alembic, verbose):
    """Migrate the database data from v2.0.0 to 2.1.0."""
    ### Add versioning PIDs ###
    # Reserve the record PID and versioning PID for unpublished deposits

    # Hack: disable record indexing during record migration
    from invenio_indexer.api import RecordIndexer
    old_index_fn = RecordIndexer.index
    RecordIndexer.index = lambda s, record: None

    if verbose:
        click.secho('migrating deposits and records...')
    with db.session.begin_nested():
        # Migrate published records
        records_pids = PersistentIdentifier.query.filter(
            PersistentIdentifier.pid_type == RecordUUIDProvider.pid_type,
            PersistentIdentifier.status == PIDStatus.REGISTERED,
        ).all()
        for rec_pid in records_pids:
            if verbose:
                click.secho('    record {}'.format(rec_pid.pid_value))
            try:
                record = Record.get_record(rec_pid.object_uuid)
            except NoResultFound:
                # The record is deleted but not the PID. Fix it.
                rec_pid.status = PIDStatus.DELETED
                continue
            # Create parent version PID
            parent_pid = RecordUUIDProvider.create().pid
            version_master = PIDVersioning(parent=parent_pid)
            version_master.insert_draft_child(child=rec_pid)
            version_master.update_redirect()
            migrate_record_metadata(Record.get_record(rec_pid.object_uuid),
                                    parent_pid)

        # Migrate deposits
        deposit_pids = PersistentIdentifier.query.filter(
            PersistentIdentifier.pid_type == DepositUUIDProvider.pid_type,
            PersistentIdentifier.status == PIDStatus.REGISTERED,
        ).all()
        for dep_pid in deposit_pids:
            if verbose:
                click.secho('    deposit {}'.format(dep_pid.pid_value))
            try:
                deposit = Deposit.get_record(dep_pid.object_uuid)

                if deposit['publication_state'] != \
                        PublicationStates.published.name:
                    # The record is not published yet. Reserve the PID.
                    rec_pid = RecordUUIDProvider.create(
                        object_type='rec',
                        pid_value=dep_pid.pid_value,
                    ).pid
                    # Create parent version PID
                    parent_pid = RecordUUIDProvider.create().pid
                    assert parent_pid
                    version_master = PIDVersioning(parent=parent_pid)
                    version_master.insert_draft_child(child=rec_pid)
                else:
                    # Retrieve previously created version PID
                    rec_pid = RecordUUIDProvider.get(dep_pid.pid_value).pid
                    version_master = PIDVersioning(child=rec_pid)
                    parent_pid = version_master.parent
                    if not parent_pid:
                        click.secho(
                            '    record {} was deleted, but the deposit has not been removed'
                            .format(rec_pid.pid_value),
                            fg='red')

                if parent_pid:
                    migrate_record_metadata(
                        Deposit.get_record(dep_pid.object_uuid), parent_pid)
            except NoResultFound:
                # The deposit is deleted but not the PID. Fix it.
                dep_pid.status = PIDStatus.DELETED

    if verbose:
        click.secho('done migrating deposits.')
    RecordIndexer.index = old_index_fn
Esempio n. 15
0
    def commit(self):
        """Store changes on current instance in database.

        This method extends the default implementation by publishing the
        deposition when 'publication_state' is set to 'published'.
        """
        if 'external_pids' in self:
            deposit_id = self['_deposit']['id']
            recid = PersistentIdentifier.query.filter_by(
                pid_value=deposit_id).first()
            assert recid.status == 'R'
            record_bucket = RecordsBuckets.query.filter_by(
                record_id=recid.pid_value).first()
            bucket = Bucket.query.filter_by(id=record_bucket.bucket_id).first()
            object_versions = ObjectVersion.query.filter_by(
                bucket_id=bucket.id).all()
            key_to_pid = {
                ext_pid.get('key'): ext_pid.get('ePIC_PID')
                for ext_pid in self['external_pids']
            }
            # for the existing files
            for object_version in object_versions:
                if object_version.file is None or \
                        object_version.file.storage_class != 'B':
                    continue
                # check that they are still in the file pids list or remove
                if object_version.key not in key_to_pid:
                    ObjectVersion.delete(bucket,
                                         object_version.key)
                # check that the uri is still the same or update it
                elif object_version.file.uri != \
                        key_to_pid[object_version.key]:
                    db.session.query(FileInstance).\
                        filter(FileInstance.id == object_version.file_id).\
                        update({"uri": key_to_pid[object_version.key]})
            create_b2safe_file(self['external_pids'], bucket)
            del self['external_pids']

        if self.model is None or self.model.json is None:
            raise MissingModelError()

        # automatically make embargoed records private
        if self.get('embargo_date') and self.get('open_access'):
            if is_under_embargo(self):
                self['open_access'] = False

        if 'community' in self:
            try:
                community = Community.get(self['community'])
            except CommunityDoesNotExistError as e:
                raise InvalidDepositError('Community {} does not exist.'.format(
                    self['community'])) from e
            workflow = publication_workflows[community.publication_workflow]
            workflow(self.model, self)

        # publish the deposition if needed
        if (self['publication_state'] == PublicationStates.published.name
                # check invenio-deposit status so that we do not loop
                and self['_deposit']['status'] != PublicationStates.published.name):

            # Retrieve previous version in order to reindex it later.
            previous_version_pid = None
            # Save the previous "last" version for later use
            if self.versioning.parent.status == PIDStatus.REDIRECTED and \
                    self.versioning.has_children:
                previous_version_pid = self.versioning.last_child
                previous_version_uuid = str(RecordUUIDProvider.get(
                    previous_version_pid.pid_value
                ).pid.object_uuid)
            external_pids = generate_external_pids(self)
            if external_pids:
                self['_deposit']['external_pids'] = external_pids

            super(Deposit, self).publish()  # publish() already calls commit()
            # Register parent PID if necessary and update redirect
            self.versioning.update_redirect()
            # Reindex previous version. This is needed in order to update
            # the is_last_version flag
            if previous_version_pid is not None:
                self.indexer.index_by_id(previous_version_uuid)

            # save the action for later indexing
            if g:
                g.deposit_action = 'publish'
        else:
            super(Deposit, self).commit()
            if g:
                g.deposit_action = 'update-metadata'
        return self
Esempio n. 16
0
    def create(cls, data, id_=None, version_of=None):
        """Create a deposit with the optional id.

        :params version_of: PID of an existing record. If set, the new record
        will be marked as a new version of this referenced record. If no data
        is provided the new record will be a copy of this record. Note: this
        PID must reference the current last version of a record.
        """

        # check that the status field is not set
        if 'publication_state' in data:
            raise InvalidDepositError(
                'Field "publication_state" cannot be set.')
        data['publication_state'] = PublicationStates.draft.name
        # Set record's schema
        if '$schema' in data:
            raise InvalidDepositError('"$schema" field should not be set.')

        # Retrieve reserved record PID which should have already been created
        # by the deposit minter (The record PID value is the same
        # as the one of the deposit)
        rec_pid = RecordUUIDProvider.get(data['_deposit']['id']).pid
        version_master, prev_version = None, None
        # if this is a new version of an existing record, add the future
        # record pid in the chain of versions.
        if version_of:
            version_master, prev_version = \
                find_version_master_and_previous_record(version_of)
            # The new version must be in the same community
            if data['community'] != prev_version['community']:
                raise ValidationError(
                    'The community field cannot change between versions.')
            try:
                version_master.insert_draft_child(rec_pid)
            except Exception as exc:
                # Only one draft is allowed per version chain.
                if 'Draft child already exists for this relation' in \
                        exc.args[0]:
                    raise DraftExistsVersioningError(
                        version_master.draft_child
                    )
                raise exc
        else:
            # create parent PID
            parent_pid = RecordUUIDProvider.create().pid
            version_master = PIDNodeVersioning(parent=parent_pid)
            version_master.insert_draft_child(child=rec_pid)

        # Mint the deposit with the parent PID
        data['_pid'] = [{
            'value': version_master.parent.pid_value,
            'type': RecordUUIDProvider.parent_pid_type,
        }]
        if 'community' not in data or not data['community']:
            raise ValidationError(
                'Record\s metadata has no community field.')
        try:
            community_id = uuid.UUID(data['community'])
        except ValueError as e:
            raise InvalidDepositError(
                'Community ID is not a valid UUID.') from e
        try:
            schema = CommunitySchema.get_community_schema(community_id)
        except CommunitySchemaDoesNotExistError as e:
            raise InvalidDepositError(
                'No schema for community {}.'.format(community_id)) from e

        if version_of:
            data['$schema'] = Deposit._build_deposit_schema(prev_version)
        else:
            from b2share.modules.schemas.serializers import \
                community_schema_draft_json_schema_link
            data['$schema'] = community_schema_draft_json_schema_link(
                schema,
                _external=True
            )

        # create file bucket
        if prev_version and prev_version.files:
            # Clone the bucket from the previous version. This doesn't
            # duplicate files.
            bucket = prev_version.files.bucket.snapshot(lock=False)
            bucket.locked = False
        else:
            bucket = Bucket.create(storage_class=current_app.config[
                'DEPOSIT_DEFAULT_STORAGE_CLASS'
            ])

        if 'external_pids' in data:
            create_b2safe_file(data['external_pids'], bucket)
            del data['external_pids']

        deposit = super(Deposit, cls).create(data, id_=id_)
        db.session.add(bucket)
        db.session.add(RecordsBuckets(
            record_id=deposit.id, bucket_id=bucket.id
        ))

        return deposit
Esempio n. 17
0
    def create(cls, data, id_=None, version_of=None):
        """Create a deposit with the optional id.

        :params version_of: PID of an existing record. If set, the new record
        will be marked as a new version of this referenced record. If no data
        is provided the new record will be a copy of this record. Note: this
        PID must reference the current last version of a record.
        """

        # check that the status field is not set
        if 'publication_state' in data:
            raise InvalidDepositError(
                'Field "publication_state" cannot be set.')
        data['publication_state'] = PublicationStates.draft.name
        # Set record's schema
        if '$schema' in data:
            raise InvalidDepositError('"$schema" field should not be set.')

        # Retrieve reserved record PID which should have already been created
        # by the deposit minter (The record PID value is the same
        # as the one of the deposit)
        rec_pid = RecordUUIDProvider.get(data['_deposit']['id']).pid
        version_master, prev_version = None, None
        # if this is a new version of an existing record, add the future
        # record pid in the chain of versions.
        if version_of:
            version_master, prev_version = \
                find_version_master_and_previous_record(version_of)
            # The new version must be in the same community
            if data['community'] != prev_version['community']:
                raise ValidationError(
                    'The community field cannot change between versions.')
            try:
                version_master.insert_draft_child(rec_pid)
            except Exception as exc:
                # Only one draft is allowed per version chain.
                if 'Draft child already exists for this relation' in \
                        exc.args[0]:
                    raise DraftExistsVersioningError(
                        version_master.draft_child
                    )
                raise exc
        else:
            # create parent PID
            parent_pid = RecordUUIDProvider.create().pid
            version_master = PIDVersioning(parent=parent_pid)
            version_master.insert_draft_child(child=rec_pid)

        # Mint the deposit with the parent PID
        data['_pid'] = [{
            'value': version_master.parent.pid_value,
            'type': RecordUUIDProvider.parent_pid_type,
        }]
        if 'community' not in data or not data['community']:
            raise ValidationError(
                'Record\s metadata has no community field.')
        try:
            community_id = uuid.UUID(data['community'])
        except ValueError as e:
            raise InvalidDepositError(
                'Community ID is not a valid UUID.') from e
        try:
            schema = CommunitySchema.get_community_schema(community_id)
        except CommunitySchemaDoesNotExistError as e:
            raise InvalidDepositError(
                'No schema for community {}.'.format(community_id)) from e

        if version_of:
            data['$schema'] = Deposit._build_deposit_schema(prev_version)
        else:
            from b2share.modules.schemas.serializers import \
                community_schema_draft_json_schema_link
            data['$schema'] = community_schema_draft_json_schema_link(
                schema,
                _external=True
            )

        # create file bucket
        if prev_version and prev_version.files:
            # Clone the bucket from the previous version. This doesn't
            # duplicate files.
            bucket = prev_version.files.bucket.snapshot(lock=False)
            bucket.locked = False
        else:
            bucket = Bucket.create(storage_class=current_app.config[
                'DEPOSIT_DEFAULT_STORAGE_CLASS'
            ])

        if 'external_pids' in data:
            create_b2safe_file(data['external_pids'], bucket)
            del data['external_pids']

        deposit = super(Deposit, cls).create(data, id_=id_)
        db.session.add(bucket)
        db.session.add(RecordsBuckets(
            record_id=deposit.id, bucket_id=bucket.id
        ))

        return deposit
Esempio n. 18
0
    def commit(self):
        """Store changes on current instance in database.

        This method extends the default implementation by publishing the
        deposition when 'publication_state' is set to 'published'.
        """
        if 'external_pids' in self:
            deposit_id = self['_deposit']['id']
            recid = PersistentIdentifier.query.filter_by(
                pid_value=deposit_id).first()
            assert recid.status == 'R'
            record_bucket = RecordsBuckets.query.filter_by(
                record_id=recid.pid_value).first()
            bucket = Bucket.query.filter_by(id=record_bucket.bucket_id).first()
            object_versions = ObjectVersion.query.filter_by(
                bucket_id=bucket.id).all()
            key_to_pid = {
                ext_pid.get('key'): ext_pid.get('ePIC_PID')
                for ext_pid in self['external_pids']
            }
            # for the existing files
            for object_version in object_versions:
                if object_version.file is None or \
                        object_version.file.storage_class != 'B':
                    continue
                # check that they are still in the file pids list or remove
                if object_version.key not in key_to_pid:
                    ObjectVersion.delete(bucket,
                                         object_version.key)
                # check that the uri is still the same or update it
                elif object_version.file.uri != \
                        key_to_pid[object_version.key]:
                    db.session.query(FileInstance).\
                        filter(FileInstance.id == object_version.file_id).\
                        update({"uri": key_to_pid[object_version.key]})
            create_b2safe_file(self['external_pids'], bucket)
            del self['external_pids']

        if self.model is None or self.model.json is None:
            raise MissingModelError()

        # automatically make embargoed records private
        if self.get('embargo_date') and self.get('open_access'):
            if is_under_embargo(self):
                self['open_access'] = False

        if 'community' in self:
            try:
                community = Community.get(self['community'])
            except CommunityDoesNotExistError as e:
                raise InvalidDepositError('Community {} does not exist.'.format(
                    self['community'])) from e
            workflow = publication_workflows[community.publication_workflow]
            workflow(self.model, self)

        # publish the deposition if needed
        if (self['publication_state'] == PublicationStates.published.name
                # check invenio-deposit status so that we do not loop
                and self['_deposit']['status'] != PublicationStates.published.name):

            # Retrieve previous version in order to reindex it later.
            previous_version_pid = None
            # Save the previous "last" version for later use
            if self.versioning.parent.status == PIDStatus.REDIRECTED and \
                    self.versioning.has_children:
                previous_version_pid = self.versioning.last_child
                previous_version_uuid = str(RecordUUIDProvider.get(
                    previous_version_pid.pid_value
                ).pid.object_uuid)
            external_pids = generate_external_pids(self)
            if external_pids:
                self['_deposit']['external_pids'] = external_pids

            super(Deposit, self).publish()  # publish() already calls commit()
            # Register parent PID if necessary and update redirect
            self.versioning.update_redirect()
            # Reindex previous version. This is needed in order to update
            # the is_last_version flag
            if previous_version_pid is not None:
                self.indexer.index_by_id(previous_version_uuid)

            # save the action for later indexing
            if g:
                g.deposit_action = 'publish'
        else:
            super(Deposit, self).commit()
            if g:
                g.deposit_action = 'update-metadata'
        return self