Beispiel #1
0
    def _publish_new(self, id_=None):
        """Publish new deposit with communities handling."""
        dep_comms = set(self.pop('communities', []))
        record = super(ZenodoDeposit, self)._publish_new(id_=id_)
        conceptrecid = PersistentIdentifier.get('recid',
                                                record['conceptrecid'])
        pv = PIDVersioning(parent=conceptrecid)
        if pv.children.count() > 1:
            files_set = set(f.get_version().file.checksum for f in self.files)
            for prev_recid in pv.children.all()[:-1]:
                rec = ZenodoRecord.get_record(prev_recid.object_uuid)
                prev_files_set = set(f.get_version().file.checksum for f in
                                     rec.files)
                if files_set == prev_files_set:
                    raise VersioningFilesError()

            prev_recid = pv.children.all()[-2]
            rec_comms = set(ZenodoRecord.get_record(
                prev_recid.get_assigned_object()).get('communities', []))
        else:
            rec_comms = set()

        record = self._sync_communities(dep_comms, rec_comms, record)
        record.commit()

        # Update the concept recid redirection
        pv.update_redirect()
        RecordDraft.unlink(record.pid, self.pid)
        index_siblings(record.pid, neighbors_eager=True, with_deposits=True)

        return record
Beispiel #2
0
    def _publish_new(self, id_=None):
        """Publish new deposit with communities handling."""
        dep_comms = set(self.pop('communities', []))
        record = super(ZenodoDeposit, self)._publish_new(id_=id_)
        conceptrecid = PersistentIdentifier.get('recid',
                                                record['conceptrecid'])
        pv = PIDVersioning(parent=conceptrecid)
        if pv.children.count() > 1:
            files_set = set(f.get_version().file.checksum for f in self.files)
            for prev_recid in pv.children.all()[:-1]:
                rec = ZenodoRecord.get_record(prev_recid.object_uuid)
                prev_files_set = set(f.get_version().file.checksum for f in
                                     rec.files)
                if files_set == prev_files_set:
                    raise VersioningFilesError()

            prev_recid = pv.children.all()[-2]
            rec_comms = set(ZenodoRecord.get_record(
                prev_recid.get_assigned_object()).get('communities', []))
        else:
            rec_comms = set()

        record = self._sync_communities(dep_comms, rec_comms, record)
        record.commit()

        # Update the concept recid redirection
        pv.update_redirect()
        RecordDraft.unlink(record.pid, self.pid)
        index_siblings(record.pid, neighbors_eager=True, with_deposits=True)

        return record
Beispiel #3
0
    def delete(self, delete_published=False, *args, **kwargs):
        """Delete the deposit.

        :param delete_published: If True, even deposit of a published record
            will be deleted (usually used by admin operations).
        :type delete_published: bool
        """
        is_published = self['_deposit'].get('pid')
        if is_published and not delete_published:
            raise PIDInvalidAction()

        # Delete the recid
        recid = PersistentIdentifier.get(pid_type='recid',
                                         pid_value=self['recid'])

        versioning = PIDVersioning(child=recid)
        if versioning.exists:
            if versioning.draft_child and \
                    self.pid == versioning.draft_child_deposit:
                versioning.remove_draft_child()
            if versioning.last_child:
                index_siblings(versioning.last_child,
                               children=versioning.children.all(),
                               include_pid=True,
                               neighbors_eager=True,
                               with_deposits=True)

        if recid.status == PIDStatus.RESERVED:
            db.session.delete(recid)

        if 'conceptrecid' in self:
            concept_recid = PersistentIdentifier.get(
                pid_type='recid', pid_value=self['conceptrecid'])
            if concept_recid.status == PIDStatus.RESERVED:
                db.session.delete(concept_recid)

        # Completely remove bucket
        bucket = self.files.bucket
        with db.session.begin_nested():
            # Remove Record-Bucket link
            RecordsBuckets.query.filter_by(record_id=self.id).delete()
            mp_q = MultipartObject.query_by_bucket(bucket)
            # Remove multipart objects
            Part.query.filter(
                Part.upload_id.in_(
                    mp_q.with_entities(
                        MultipartObject.upload_id).subquery())).delete(
                            synchronize_session='fetch')
            mp_q.delete(synchronize_session='fetch')
        bucket.locked = False
        bucket.remove()

        depid = kwargs.get('pid', self.pid)
        if depid:
            depid.delete()

        # NOTE: We call the parent of Deposit, invenio_records.api.Record since
        # we need to completely override eveything that the Deposit.delete
        # method does.
        return super(Deposit, self).delete(*args, **kwargs)
Beispiel #4
0
    def delete(self, delete_published=False, *args, **kwargs):
        """Delete the deposit.

        :param delete_published: If True, even deposit of a published record
            will be deleted (usually used by admin operations).
        :type delete_published: bool
        """
        is_published = self['_deposit'].get('pid')
        if is_published and not delete_published:
            raise PIDInvalidAction()

        # Delete the recid
        recid = PersistentIdentifier.get(
            pid_type='recid', pid_value=self['recid'])

        versioning = PIDVersioning(child=recid)
        if versioning.exists:
            if versioning.draft_child and \
                    self.pid == versioning.draft_child_deposit:
                versioning.remove_draft_child()
            if versioning.last_child:
                index_siblings(versioning.last_child,
                               children=versioning.children.all(),
                               include_pid=True,
                               neighbors_eager=True,
                               with_deposits=True)

        if recid.status == PIDStatus.RESERVED:
            db.session.delete(recid)

        if 'conceptrecid' in self:
            concept_recid = PersistentIdentifier.get(
                pid_type='recid', pid_value=self['conceptrecid'])
            if concept_recid.status == PIDStatus.RESERVED:
                db.session.delete(concept_recid)

        # Completely remove bucket
        bucket = self.files.bucket
        with db.session.begin_nested():
            # Remove Record-Bucket link
            RecordsBuckets.query.filter_by(record_id=self.id).delete()
            mp_q = MultipartObject.query_by_bucket(bucket)
            # Remove multipart objects
            Part.query.filter(
                Part.upload_id.in_(mp_q.with_entities(
                    MultipartObject.upload_id).subquery())
            ).delete(synchronize_session='fetch')
            mp_q.delete(synchronize_session='fetch')
        bucket.locked = False
        bucket.remove()

        depid = kwargs.get('pid', self.pid)
        if depid:
            depid.delete()

        # NOTE: We call the parent of Deposit, invenio_records.api.Record since
        # we need to completely override eveything that the Deposit.delete
        # method does.
        return super(Deposit, self).delete(*args, **kwargs)
Beispiel #5
0
def index_versioned_record_siblings(sender, action=None, pid=None,
                                    deposit=None):
    """Send previous version of published record for indexing."""
    first_publish = (deposit.get('_deposit', {}).get('pid', {})
                     .get('revision_id')) == 0
    if action == "publish" and first_publish:
        recid_pid, _ = deposit.fetch_published()
        current_app.logger.info(u'indexing siblings of {}', recid_pid)
        index_siblings(recid_pid, neighbors_eager=True)
Beispiel #6
0
    def newversion(self, pid=None):
        """Create a new version deposit."""
        if not self.is_published():
            raise PIDInvalidAction()

        # Check that there is not a newer draft version for this record
        pid, record = self.fetch_published()
        pv = PIDVersioning(child=pid)
        if (not pv.draft_child and
                is_doi_locally_managed(record['doi'])):
            with db.session.begin_nested():

                # Get copy of the latest record
                latest_record = ZenodoRecord.get_record(
                    pv.last_child.object_uuid)
                data = latest_record.dumps()

                # Get the communities from the last deposit
                # and push those to the new version
                latest_depid = PersistentIdentifier.get(
                    'depid', data['_deposit']['id'])
                latest_deposit = ZenodoDeposit.get_record(
                    latest_depid.object_uuid)
                last_communities = latest_deposit.get('communities', [])

                owners = data['_deposit']['owners']

                # TODO: Check other data that may need to be removed
                keys_to_remove = (
                    '_deposit', 'doi', '_oai', '_files', '_buckets', '$schema')
                for k in keys_to_remove:
                    data.pop(k, None)

                # NOTE: We call the superclass `create()` method, because we
                # don't want a new empty bucket, but an unlocked snapshot of
                # the old record's bucket.
                deposit = (super(ZenodoDeposit, self).create(data))
                # Injecting owners is required in case of creating new
                # version this outside of request context
                deposit['_deposit']['owners'] = owners
                if last_communities:
                    deposit['communities'] = last_communities

                ###
                conceptrecid = PersistentIdentifier.get(
                    'recid', data['conceptrecid'])
                recid = PersistentIdentifier.get(
                    'recid', str(data['recid']))
                depid = PersistentIdentifier.get(
                    'depid', str(data['_deposit']['id']))
                PIDVersioning(parent=conceptrecid).insert_draft_child(
                    child=recid)
                RecordDraft.link(recid, depid)

                # Pre-fill the Zenodo DOI to prevent the user from changing it
                # to a custom DOI.
                deposit['doi'] = doi_generator(recid.pid_value)

                pv = PIDVersioning(child=pid)
                index_siblings(pv.draft_child, neighbors_eager=True,
                               with_deposits=True)

                with db.session.begin_nested():
                    # Create snapshot from the record's bucket and update data
                    snapshot = latest_record.files.bucket.snapshot(lock=False)
                    snapshot.locked = False
                    if 'extra_formats' in latest_record['_buckets']:
                        extra_formats_snapshot = \
                            latest_record.extra_formats.bucket.snapshot(
                                lock=False)
                deposit['_buckets'] = {'deposit': str(snapshot.id)}
                RecordsBuckets.create(record=deposit.model, bucket=snapshot)
                if 'extra_formats' in latest_record['_buckets']:
                    deposit['_buckets']['extra_formats'] = \
                        str(extra_formats_snapshot.id)
                    RecordsBuckets.create(
                        record=deposit.model, bucket=extra_formats_snapshot)
                deposit.commit()
        return self
Beispiel #7
0
    def newversion(self, pid=None):
        """Create a new version deposit."""
        if not self.is_published():
            raise PIDInvalidAction()

        # Check that there is not a newer draft version for this record
        pid, record = self.fetch_published()
        pv = PIDVersioning(child=pid)
        if (not pv.draft_child and
                is_doi_locally_managed(record['doi'])):
            with db.session.begin_nested():

                # Get copy of the latest record
                latest_record = ZenodoRecord.get_record(
                    pv.last_child.object_uuid)
                data = latest_record.dumps()

                # Get the communities from the last deposit
                # and push those to the new version
                latest_depid = PersistentIdentifier.get(
                    'depid', data['_deposit']['id'])
                latest_deposit = ZenodoDeposit.get_record(
                    latest_depid.object_uuid)
                last_communities = latest_deposit.get('communities', [])

                owners = data['_deposit']['owners']

                # TODO: Check other data that may need to be removed
                keys_to_remove = (
                    '_deposit', 'doi', '_oai', '_files', '_buckets', '$schema')
                for k in keys_to_remove:
                    data.pop(k, None)

                # NOTE: We call the superclass `create()` method, because we
                # don't want a new empty bucket, but an unlocked snapshot of
                # the old record's bucket.
                deposit = (super(ZenodoDeposit, self).create(data))
                # Injecting owners is required in case of creating new
                # version this outside of request context
                deposit['_deposit']['owners'] = owners
                if last_communities:
                    deposit['communities'] = last_communities

                ###
                conceptrecid = PersistentIdentifier.get(
                    'recid', data['conceptrecid'])
                recid = PersistentIdentifier.get(
                    'recid', str(data['recid']))
                depid = PersistentIdentifier.get(
                    'depid', str(data['_deposit']['id']))
                PIDVersioning(parent=conceptrecid).insert_draft_child(
                    child=recid)
                RecordDraft.link(recid, depid)

                # Pre-fill the Zenodo DOI to prevent the user from changing it
                # to a custom DOI.
                deposit['doi'] = doi_generator(recid.pid_value)

                pv = PIDVersioning(child=pid)
                index_siblings(pv.draft_child, neighbors_eager=True,
                               with_deposits=True)

                with db.session.begin_nested():
                    # Create snapshot from the record's bucket and update data
                    snapshot = latest_record.files.bucket.snapshot(lock=False)
                    snapshot.locked = False
                # FIXME: `snapshot.id` might not be present because we need to
                # commit first to the DB.
                # db.session.commit()
                deposit['_buckets'] = {'deposit': str(snapshot.id)}
                RecordsBuckets.create(record=deposit.model, bucket=snapshot)
                deposit.commit()
        return self
Beispiel #8
0
def versioning_link_records(recids):
    """Link several non-versioned records into one versioning scheme.

    The records are linked in the order as they appear in the list, with
    the first record being base for minting of the conceptdoi.
    In case one of the records is already upgraded, its taken as the base
    for conceptdoi instead, with preserving the requested order.

    :param recids: list of recid values (strings) to link,
                   e.g.: ['1234','55125','51269']
    :type recids: list of str
    """
    recids_records = [
        record_resolver.resolve(recid_val) for recid_val in recids
    ]
    depids_deposits = [
        deposit_resolver.resolve(record['_deposit']['id'])
        for _, record in recids_records
    ]

    rec_comms = sorted(
        set(sum([rec.get('communities', []) for _, rec in recids_records],
                [])))

    dep_comms = sorted(
        set(sum([dep.get('communities', []) for _, dep in depids_deposits],
                [])))

    upgraded = [(recid, rec) for recid, rec in recids_records
                if 'conceptdoi' in rec]

    # Determine the base record for versioning
    if len(upgraded) == 0:
        recid_v, record_v = recids_records[0]
    elif len(upgraded) == 1:
        recid_v, record_v = upgraded[0]
    elif len(upgraded) > 1:
        recid_v, record_v = upgraded[0]
        child_recids = [
            int(recid.pid_value)
            for recid in PIDVersioning(child=recid_v).children.all()
        ]

        i_upgraded = [int(recid.pid_value) for recid, rec in upgraded]
        if set(child_recids) != set(i_upgraded):
            raise Exception('Multiple upgraded records, which belong'
                            'to different versioning schemes.')

    # Get the first record and mint the concept DOI for it
    conceptdoi = zenodo_concept_doi_minter(record_v.id, record_v)

    conceptrecid_v = PersistentIdentifier.get('recid',
                                              record_v['conceptrecid'])
    conceptrecid_v_val = conceptrecid_v.pid_value

    pv_r1 = PIDVersioning(parent=conceptrecid_v)
    children_recids = [c.pid_value for c in pv_r1.children.all()]
    if not all(cr in recids for cr in children_recids):
        raise Exception('Children of the already upgraded record: {0} are '
                        'not specified in the ordering: {1}'
                        ''.format(children_recids, recids))

    for (recid, record), (depid, deposit) in \
            zip(recids_records, depids_deposits):

        # Remove old versioning schemes for non-base recids
        # Note: This will remove the child of the base-conceptrecid as well
        # but that's OK, since it will be added again afterwards in the
        # correct order.
        conceptrecid = PersistentIdentifier.get('recid',
                                                record['conceptrecid'])
        pv = PIDVersioning(parent=conceptrecid)
        pv.remove_child(recid)
        if conceptrecid.pid_value != conceptrecid_v_val:
            conceptrecid.delete()

        # Update the 'conceptrecid' and 'conceptdoi' in records and deposits
        record['conceptdoi'] = conceptdoi.pid_value
        record['conceptrecid'] = conceptrecid_v.pid_value
        record['communities'] = rec_comms
        record.commit()
        deposit['conceptdoi'] = conceptdoi.pid_value
        deposit['conceptrecid'] = conceptrecid_v.pid_value
        deposit['communities'] = dep_comms
        deposit.commit()

        # Add the child to the new versioning scheme
        pv_r1.insert_child(recid)

    pv_r1.update_redirect()
    db.session.commit()

    conceptrecid_v = PersistentIdentifier.get('recid', conceptrecid_v_val)
    pv = PIDVersioning(parent=conceptrecid_v)
    if current_app.config['DEPOSIT_DATACITE_MINTING_ENABLED']:
        datacite_register.delay(pv.last_child.pid_value,
                                str(pv.last_child.object_uuid))

    index_siblings(pv.last_child, with_deposits=True, eager=True)
Beispiel #9
0
def versioning_github_repository(uuid):
    """
    Migrate the GitHub repositories.

    :param uuid: UUID of the repository (invenio_github.models.Repository)
    """
    from invenio_github.models import Repository, ReleaseStatus
    from zenodo.modules.deposit.minters import zenodo_concept_recid_minter
    from zenodo.modules.records.minters import zenodo_concept_doi_minter
    from invenio_pidrelations.contrib.records import index_siblings

    repository = Repository.query.get(uuid)
    published_releases = repository.releases.filter_by(
        status=ReleaseStatus.PUBLISHED).all()

    # Nothing to migrate if no successful release was ever made
    if not published_releases:
        return

    deposits = [
        ZenodoDeposit.get_record(r.record_id) for r in published_releases
        if r.recordmetadata.json is not None
    ]
    deposits = [dep for dep in deposits if 'removed_by' not in dep]
    deposits = sorted(deposits, key=lambda dep: int(dep['recid']))

    recids = [
        PersistentIdentifier.get('recid', dep['recid']) for dep in deposits
    ]
    records = [ZenodoRecord.get_record(p.object_uuid) for p in recids]

    # There were successful releases, but deposits/records were removed since
    if not records:
        return

    assert not any('conceptrecid' in rec for rec in records), \
        "One or more of the release records have been already migrated"
    assert not any('conceptrecid' in dep for dep in deposits), \
        "One or more of the release deposits have been already migrated"

    conceptrecid = zenodo_concept_recid_minter(record_uuid=records[0].id,
                                               data=records[0])
    conceptrecid.register()

    # Mint the Concept DOI if we are migrating (linking) more than one record
    if len(records) > 1:
        conceptdoi = zenodo_concept_doi_minter(records[0].id, records[0])
    else:
        conceptdoi = None

    rec_comms = sorted(
        set(sum([rec.get('communities', []) for rec in records], [])))

    dep_comms = sorted(
        set(sum([dep.get('communities', []) for dep in deposits], [])))

    for rec in records:
        rec['conceptrecid'] = conceptrecid.pid_value
        if conceptdoi:
            rec['conceptdoi'] = conceptdoi.pid_value
        if rec_comms:
            rec['communities'] = rec_comms
        rec.commit()

    for dep in deposits:
        dep['conceptrecid'] = conceptrecid.pid_value
        if conceptdoi:
            dep['conceptdoi'] = conceptdoi.pid_value
        if dep_comms:
            dep['communities'] = dep_comms
        dep.commit()

    pv = PIDVersioning(parent=conceptrecid)
    for recid in recids:
        pv.insert_child(recid)
    pv.update_redirect()

    if current_app.config['DEPOSIT_DATACITE_MINTING_ENABLED']:
        datacite_register.delay(recids[-1].pid_value, str(records[-1].id))
    db.session.commit()

    # Reindex all siblings
    index_siblings(pv.last_child, with_deposits=True)