Exemple #1
0
    def delete(self):
        """Delete a deposit."""
        deposit_pid = self.pid
        pid_value = deposit_pid.pid_value
        record_pid = RecordUUIDProvider.get(pid_value).pid
        version_master = PIDVersioning(child=record_pid)
        # every deposit has a parent version after the 2.1.0 upgrade
        # except deleted ones. We check the parent version in case of a delete
        # revert.
        assert version_master is not None, 'Unexpected deposit without versioning.'
        # if the record is unpublished hard delete it
        if record_pid.status == PIDStatus.RESERVED:
            version_master.remove_draft_child()
            db.session.delete(record_pid)
        # if the parent doesn't have any published records hard delete it
        if version_master.parent.status == PIDStatus.RESERVED:
            db.session.delete(version_master.parent)
        deposit_pid.delete()

        # delete all buckets linked to the deposit
        res = Bucket.query.join(RecordsBuckets).\
            filter(RecordsBuckets.bucket_id == Bucket.id,
                   RecordsBuckets.record_id == self.id).all()

        # remove the deposit from ES
        self.indexer.delete(self)

        # we call the super of Invenio deposit instead of B2Share deposit as
        # Invenio deposit doesn't support the deletion of published deposits
        super(InvenioDeposit, self).delete(force=True)

        for bucket in res:
            bucket.locked = False
            bucket.remove()
Exemple #2
0
def doi_ish_view_method(parent_pid_value=0, version=0):
    """DOI-like item version endpoint view.

    :param pid: PID value.
    :returns: Redirect to correct version.
    """
    try:
        p_pid = PersistentIdentifier.get('parent',
                                         'parent:' + str(parent_pid_value))
    except PIDDoesNotExistError:
        p_pid = None

    if p_pid:
        pid_ver = PIDVersioning(parent=p_pid)
        all_versions = list(pid_ver.get_children(ordered=True,
                                                 pid_status=None))
        if version == 0 or version == len(all_versions):
            return redirect(
                url_for('invenio_records_ui.recid',
                        pid_value=pid_ver.last_child.pid_value))
        elif version <= len(all_versions):
            version_pid = all_versions[(version - 1)]
            current_app.logger.info(version_pid.__dict__)
            if version_pid.status == PIDStatus.REGISTERED:
                return redirect(
                    url_for('invenio_records_ui.recid',
                            pid_value=version_pid.pid_value))

    return abort(404)
Exemple #3
0
def versioning_published_record(uuid):
    """Migrate a published record."""
    record = ZenodoRecord.get_record(uuid)
    if 'conceptrecid' in record:
        return
    # ASSERT ZENODO DOI ONLY!
    assert 'conceptrecid' not in record, "Record already migrated"
    # doi = PersistentIdentifier.get('doi', str(record['doi']))
    # assert is_local_doi(doi.pid_value), 'DOI is not controlled by Zenodo.'
    conceptrecid = zenodo_concept_recid_minter(uuid, record)
    conceptrecid.register()
    recid = PersistentIdentifier.get('recid', str(record['recid']))
    pv = PIDVersioning(parent=conceptrecid)
    pv.insert_child(recid)
    record.commit()
    # Some old records have no deposit ID, some don't have '_deposit'
    if ('_deposit' in record and 'id' in record['_deposit']
            and record['_deposit']['id']):
        try:
            depid = PersistentIdentifier.get('depid',
                                             str(record['_deposit']['id']))
            deposit = ZenodoDeposit.get_record(depid.object_uuid)
            deposit['conceptrecid'] = conceptrecid.pid_value
            if deposit['_deposit']['status'] == 'draft':
                deposit['_deposit']['pid']['revision_id'] = \
                    deposit['_deposit']['pid']['revision_id'] + 1
            deposit.commit()
        except PIDDoesNotExistError:
            pass
    db.session.commit()
Exemple #4
0
    def delete(self, delete_published=False, *args, **kwargs):
        """Delete the deposit.

        :param delete_published: If True, even deposit of a published record
            will be deleted (usually used by admin operations).
        :type delete_published: bool
        """
        is_published = self['_deposit'].get('pid')
        if is_published and not delete_published:
            raise PIDInvalidAction()

        # Delete the recid
        recid = PersistentIdentifier.get(pid_type='recid',
                                         pid_value=self['recid'])

        versioning = PIDVersioning(child=recid)
        if versioning.exists:
            if versioning.draft_child and \
                    self.pid == versioning.draft_child_deposit:
                versioning.remove_draft_child()
            if versioning.last_child:
                index_siblings(versioning.last_child,
                               children=versioning.children.all(),
                               include_pid=True,
                               neighbors_eager=True,
                               with_deposits=True)

        if recid.status == PIDStatus.RESERVED:
            db.session.delete(recid)

        if 'conceptrecid' in self:
            concept_recid = PersistentIdentifier.get(
                pid_type='recid', pid_value=self['conceptrecid'])
            if concept_recid.status == PIDStatus.RESERVED:
                db.session.delete(concept_recid)

        # Completely remove bucket
        bucket = self.files.bucket
        with db.session.begin_nested():
            # Remove Record-Bucket link
            RecordsBuckets.query.filter_by(record_id=self.id).delete()
            mp_q = MultipartObject.query_by_bucket(bucket)
            # Remove multipart objects
            Part.query.filter(
                Part.upload_id.in_(
                    mp_q.with_entities(
                        MultipartObject.upload_id).subquery())).delete(
                            synchronize_session='fetch')
            mp_q.delete(synchronize_session='fetch')
        bucket.locked = False
        bucket.remove()

        depid = kwargs.get('pid', self.pid)
        if depid:
            depid.delete()

        # NOTE: We call the parent of Deposit, invenio_records.api.Record since
        # we need to completely override eveything that the Deposit.delete
        # method does.
        return super(Deposit, self).delete(*args, **kwargs)
Exemple #5
0
    def _publish_new(self, id_=None):
        """Publish new deposit with communities handling."""
        dep_comms = set(self.pop('communities', []))
        record = super(ZenodoDeposit, self)._publish_new(id_=id_)
        conceptrecid = PersistentIdentifier.get('recid',
                                                record['conceptrecid'])
        pv = PIDVersioning(parent=conceptrecid)
        if pv.children.count() > 1:
            files_set = set(f.get_version().file.checksum for f in self.files)
            for prev_recid in pv.children.all()[:-1]:
                rec = ZenodoRecord.get_record(prev_recid.object_uuid)
                prev_files_set = set(f.get_version().file.checksum for f in
                                     rec.files)
                if files_set == prev_files_set:
                    raise VersioningFilesError()

            prev_recid = pv.children.all()[-2]
            rec_comms = set(ZenodoRecord.get_record(
                prev_recid.get_assigned_object()).get('communities', []))
        else:
            rec_comms = set()

        record = self._sync_communities(dep_comms, rec_comms, record)
        record.commit()

        # Update the concept recid redirection
        pv.update_redirect()
        RecordDraft.unlink(record.pid, self.pid)
        index_siblings(record.pid, neighbors_eager=True, with_deposits=True)

        return record
Exemple #6
0
    def delete(self):
        """Delete a deposit."""
        deposit_pid = self.pid
        pid_value = deposit_pid.pid_value
        record_pid = RecordUUIDProvider.get(pid_value).pid
        version_master = PIDVersioning(child=record_pid)
        # every deposit has a parent version after the 2.1.0 upgrade
        # except deleted ones. We check the parent version in case of a delete
        # revert.
        assert version_master is not None, 'Unexpected deposit without versioning.'
        # if the record is unpublished hard delete it
        if record_pid.status == PIDStatus.RESERVED:
            version_master.remove_draft_child()
            db.session.delete(record_pid)
        # if the parent doesn't have any published records hard delete it
        if version_master.parent.status == PIDStatus.RESERVED:
            db.session.delete(version_master.parent)
        deposit_pid.delete()

        # delete all buckets linked to the deposit
        res = Bucket.query.join(RecordsBuckets).\
            filter(RecordsBuckets.bucket_id == Bucket.id,
                   RecordsBuckets.record_id == self.id).all()

        # remove the deposit from ES
        self.indexer.delete(self)

        # we call the super of Invenio deposit instead of B2Share deposit as
        # Invenio deposit doesn't support the deletion of published deposits
        super(InvenioDeposit, self).delete(force=True)

        for bucket in res:
            bucket.locked = False
            bucket.remove()
Exemple #7
0
    def _publish_new(self, id_=None):
        """Publish new deposit with communities handling."""
        dep_comms = set(self.pop('communities', []))
        record = super(ZenodoDeposit, self)._publish_new(id_=id_)
        conceptrecid = PersistentIdentifier.get('recid',
                                                record['conceptrecid'])
        pv = PIDVersioning(parent=conceptrecid)
        if pv.children.count() > 1:
            files_set = set(f.get_version().file.checksum for f in self.files)
            for prev_recid in pv.children.all()[:-1]:
                rec = ZenodoRecord.get_record(prev_recid.object_uuid)
                prev_files_set = set(f.get_version().file.checksum for f in
                                     rec.files)
                if files_set == prev_files_set:
                    raise VersioningFilesError()

            prev_recid = pv.children.all()[-2]
            rec_comms = set(ZenodoRecord.get_record(
                prev_recid.get_assigned_object()).get('communities', []))
        else:
            rec_comms = set()

        record = self._sync_communities(dep_comms, rec_comms, record)
        record.commit()

        # Update the concept recid redirection
        pv.update_redirect()
        RecordDraft.unlink(record.pid, self.pid)
        index_siblings(record.pid, neighbors_eager=True, with_deposits=True)

        return record
Exemple #8
0
    def create_deposit_and_record(pid_value, owner):
        """Utility function for creating records and deposits."""
        recid = PersistentIdentifier.create(
            'recid', pid_value, status=PIDStatus.RESERVED)
        pv = PIDVersioning(parent=conceptrecid)
        pv.insert_draft_child(recid)

        depid = PersistentIdentifier.create(
            'depid', pid_value, status=PIDStatus.REGISTERED)
        deposit = ZenodoRecord.create({'_deposit': {'id': depid.pid_value},
                                       'conceptrecid': conceptrecid.pid_value,
                                       'recid': recid.pid_value})
        deposit.commit()
        depid.assign('rec', deposit.id)

        record_metadata = deepcopy(minimal_record)
        record_metadata['_deposit'] = {'id': depid.pid_value}
        record_metadata['conceptrecid'] = conceptrecid.pid_value
        record_metadata['recid'] = int(recid.pid_value)
        record_metadata['owners'] = [owner.id]
        record = ZenodoRecord.create(record_metadata)
        zenodo_record_minter(record.id, record)
        record.commit()

        return (depid, deposit, recid, record)
Exemple #9
0
def preprocess_related_identifiers(pid, record, result):
    """Preprocess related identifiers for record serialization.

    Resolves the passed pid to the proper `recid` in order to add related
    identifiers from PID relations.
    """
    recid_value = record.get('recid')
    if pid.pid_type == 'doi' and pid.pid_value == record.get('conceptdoi'):
        recid_value = record.get('conceptrecid')
        result['metadata']['doi'] = record.get('conceptdoi')
    recid = (pid if pid.pid_value == recid_value else PersistentIdentifier.get(
        pid_type='recid', pid_value=recid_value))

    if recid.pid_value == record.get('conceptrecid'):
        pv = PIDVersioning(parent=recid)
    else:
        pv = PIDVersioning(child=recid)

    # Serialize PID versioning as related identifiers
    if pv.exists:
        rels = serialize_related_identifiers(recid)
        if rels:
            result['metadata'].setdefault('related_identifiers',
                                          []).extend(rels)
    return result
Exemple #10
0
def test_version_pids_create(app, db):

    # Create a child, initialize the Versioning API and create a parent
    assert PersistentIdentifier.query.count() == 0
    # Create a child
    h1v1 = PersistentIdentifier.create('recid', '12345', object_type='rec',
                                       status=PIDStatus.REGISTERED)
    assert PersistentIdentifier.query.count() == 1
    pv = PIDVersioning(child=h1v1)
    # Create a parent
    pv.create_parent('12345.parent')
    assert PersistentIdentifier.query.count() == 2
    assert pv.parent.get_redirect() == h1v1
    assert pv.parent.status == PIDStatus.REDIRECTED
    # Make sure 'pid_type', 'object_type' and 'status' are inherited from child
    assert pv.parent.pid_type == pv.child.pid_type
    assert pv.parent.object_type == pv.child.object_type

    pr = PIDRelation.query.one()
    assert pr.child == h1v1
    assert pr.parent == pv.parent

    VERSION = resolve_relation_type_config('version').id
    assert pr.relation_type == VERSION
    assert pr.index == 0
Exemple #11
0
def test_deposit_versioning_draft_child_unlinking_bug(app, db, communities,
                                                      deposit, deposit_file):
    """
    Bug with draft_child_deposit unlinking.

    Bug where a draft_child_deposit was unlinked from a new version draft,
    when another version of a record was edited and published.
    """
    deposit_v1 = publish_and_expunge(db, deposit)
    recid_v1, record_v1 = deposit.fetch_published()
    recid_v1_value = recid_v1.pid_value

    # Initiate a new version draft
    deposit_v1.newversion()
    recid_v1, record_v1 = record_resolver.resolve(recid_v1_value)
    pv = PIDVersioning(child=recid_v1)
    assert pv.draft_child_deposit
    assert pv.draft_child

    deposit_v1.edit()
    deposit_v1 = deposit_v1.edit()
    deposit_v1 = publish_and_expunge(db, deposit_v1)

    recid_v1, record_v1 = record_resolver.resolve(recid_v1_value)
    pv = PIDVersioning(child=recid_v1)
    # Make sure the draft child deposit was not unliked due to publishing of
    # the edited draft
    assert pv.draft_child_deposit
    assert pv.draft_child
Exemple #12
0
    def delete(self, delete_published=False, *args, **kwargs):
        """Delete the deposit.

        :param delete_published: If True, even deposit of a published record
            will be deleted (usually used by admin operations).
        :type delete_published: bool
        """
        is_published = self['_deposit'].get('pid')
        if is_published and not delete_published:
            raise PIDInvalidAction()

        # Delete the recid
        recid = PersistentIdentifier.get(
            pid_type='recid', pid_value=self['recid'])

        versioning = PIDVersioning(child=recid)
        if versioning.exists:
            if versioning.draft_child and \
                    self.pid == versioning.draft_child_deposit:
                versioning.remove_draft_child()
            if versioning.last_child:
                index_siblings(versioning.last_child,
                               children=versioning.children.all(),
                               include_pid=True,
                               neighbors_eager=True,
                               with_deposits=True)

        if recid.status == PIDStatus.RESERVED:
            db.session.delete(recid)

        if 'conceptrecid' in self:
            concept_recid = PersistentIdentifier.get(
                pid_type='recid', pid_value=self['conceptrecid'])
            if concept_recid.status == PIDStatus.RESERVED:
                db.session.delete(concept_recid)

        # Completely remove bucket
        bucket = self.files.bucket
        with db.session.begin_nested():
            # Remove Record-Bucket link
            RecordsBuckets.query.filter_by(record_id=self.id).delete()
            mp_q = MultipartObject.query_by_bucket(bucket)
            # Remove multipart objects
            Part.query.filter(
                Part.upload_id.in_(mp_q.with_entities(
                    MultipartObject.upload_id).subquery())
            ).delete(synchronize_session='fetch')
            mp_q.delete(synchronize_session='fetch')
        bucket.locked = False
        bucket.remove()

        depid = kwargs.get('pid', self.pid)
        if depid:
            depid.delete()

        # NOTE: We call the parent of Deposit, invenio_records.api.Record since
        # we need to completely override eveything that the Deposit.delete
        # method does.
        return super(Deposit, self).delete(*args, **kwargs)
Exemple #13
0
def cancel_action(activity_id='0', action_id=0):
    """Next action."""
    post_json = request.get_json()
    work_activity = WorkActivity()

    activity = dict(activity_id=activity_id,
                    action_id=action_id,
                    action_version=post_json.get('action_version'),
                    action_status=ActionStatusPolicy.ACTION_CANCELED,
                    commond=post_json.get('commond'))

    # clear deposit
    activity_detail = work_activity.get_activity_detail(activity_id)
    if activity_detail is not None:
        cancel_item_id = activity_detail.item_id
        if cancel_item_id is None:
            pid_value = post_json.get('pid_value')
            if pid_value is not None:
                pid = PersistentIdentifier.get('recid', pid_value)
                cancel_item_id = pid.object_uuid
        if cancel_item_id is not None:
            cancel_record = WekoDeposit.get_record(cancel_item_id)
            if cancel_record is not None:
                cancel_deposit = WekoDeposit(cancel_record,
                                             cancel_record.model)
                cancel_deposit.clear()
                # Remove draft child
                cancel_pid = PersistentIdentifier.get_by_object(
                    pid_type='recid',
                    object_type='rec',
                    object_uuid=cancel_item_id)
                cancel_pv = PIDVersioning(child=cancel_pid)
                if cancel_pv.exists:
                    previous_pid = cancel_pv.previous
                    if previous_pid is not None:
                        activity.update(dict(item_id=previous_pid.object_uuid))
                    cancel_pv.remove_child(cancel_pid)

    work_activity.upt_activity_action_status(
        activity_id=activity_id,
        action_id=action_id,
        action_status=ActionStatusPolicy.ACTION_CANCELED)

    rtn = work_activity.quit_activity(activity)

    if rtn is None:
        work_activity.upt_activity_action_status(
            activity_id=activity_id,
            action_id=action_id,
            action_status=ActionStatusPolicy.ACTION_DOING)
        return jsonify(code=-1, msg=_('Error! Cannot process quit activity!'))

    return jsonify(code=0,
                   msg=_('success'),
                   data={
                       'redirect':
                       url_for('weko_workflow.display_activity',
                               activity_id=activity_id)
                   })
def retrieve_version_master(child_pid):
    """Retrieve the PIDVersioning from a child PID."""
    if type(child_pid).__name__ == "FetchedPID":
        # when getting a pid-like object from elasticsearch
        child_pid = child_pid.provider.get(child_pid.pid_value).pid
    parent_pid = PIDVersioning(child=child_pid).parent
    if not parent_pid:
        return None
    return PIDVersioning(parent=parent_pid)
Exemple #15
0
def serialize_related_identifiers(pid):
    """Serialize PID Versioning relations as related_identifiers metadata."""
    pv = PIDVersioning(child=pid)
    related_identifiers = []
    if pv.exists:

        rec = ZenodoRecord.get_record(pid.get_assigned_object())
        # External DOI records don't have Concept DOI
        if 'conceptdoi' in rec:
            ri = {
                'scheme': 'doi',
                'relation': 'isVersionOf',
                'identifier': rec['conceptdoi']
            }
            related_identifiers.append(ri)

        # TODO: We do not serialize previous/next versions to
        # related identifiers because of the semantic-versioning cases
        # (e.g. GitHub releases of minor versions)
        #
        # children = pv.children.all()
        # idx = children.index(pid)
        # left = children[:idx]
        # right = children[idx + 1:]
        # for p in left:
        #     rec = ZenodoRecord.get_record(p.get_assigned_object())
        #     ri = {
        #         'scheme': 'doi',
        #         'relation': 'isNewVersionOf',
        #         'identifier': rec['doi']
        #     }
        #     related_identifiers.append(ri)

        # for p in right:
        #     rec = ZenodoRecord.get_record(p.get_assigned_object())
        #     ri = {
        #         'scheme': 'doi',
        #         'relation': 'isPreviousVersionOf',
        #         'identifier': rec['doi']
        #     }
        #     related_identifiers.append(ri)
    pv = PIDVersioning(parent=pid)
    if pv.exists:
        for p in pv.children:
            rec = ZenodoRecord.get_record(p.get_assigned_object())
            ri = {
                'scheme': 'doi',
                'relation': 'hasVersion',
                'identifier': rec['doi']
            }
            related_identifiers.append(ri)
    return related_identifiers
Exemple #16
0
    def create_versioned_record(recid_value, conceptrecid):
        """Utility function for creating versioned records."""
        recid = PersistentIdentifier.create(
            'recid', recid_value, status=PIDStatus.RESERVED)
        pv = PIDVersioning(parent=conceptrecid)
        pv.insert_draft_child(recid)

        record_metadata = deepcopy(minimal_record)
        record_metadata['conceptrecid'] = conceptrecid.pid_value
        record_metadata['recid'] = int(recid.pid_value)
        record = ZenodoRecord.create(record_metadata)
        zenodo_record_minter(record.id, record)
        record.commit()

        return recid, record
Exemple #17
0
def versioning_new_deposit(uuid):
    """Migrate a yet-unpublished deposit to a versioning scheme."""
    deposit = ZenodoDeposit.get_record(uuid)
    if 'conceptrecid' in deposit:
        return
    # ASSERT ZENODO DOI ONLY!
    assert 'conceptrecid' not in deposit, 'Concept RECID already in record.'
    conceptrecid = zenodo_concept_recid_minter(uuid, deposit)
    recid = PersistentIdentifier.get('recid', str(deposit['recid']))
    depid = PersistentIdentifier.get('depid', str(deposit['_deposit']['id']))
    pv = PIDVersioning(parent=conceptrecid)
    pv.insert_draft_child(recid)
    RecordDraft.link(recid, depid)
    deposit.commit()
    db.session.commit()
Exemple #18
0
def test_propagation_with_newversion_open(
        app, db, users, communities, deposit, deposit_file):
    """Adding old versions to a community should propagate to all drafts."""
    # deposit['communities'] = ['c1', 'c2']
    deposit_v1 = publish_and_expunge(db, deposit)
    deposit_v1 = deposit_v1.edit()

    recid_v1, record_v1 = deposit_v1.fetch_published()
    recid_v1_value = recid_v1.pid_value

    deposit_v1 = deposit_v1.newversion()
    pv = PIDVersioning(child=recid_v1)
    depid_v2 = pv.draft_child_deposit
    depid_v2_value = depid_v2.pid_value

    # New version in 'deposit_v2' has not been published yet
    deposit_v2 = ZenodoDeposit.get_record(depid_v2.get_assigned_object())

    # depid_v1_value = deposit_v1['_deposit']['id']
    # depid_v1, deposit_v1 = deposit_resolver.resolve(depid_v1_value)
    deposit_v1['communities'] = ['c1', 'c2', ]
    deposit_v1 = publish_and_expunge(db, deposit_v1)

    recid_v1, record_v1 = record_resolver.resolve(recid_v1_value)
    c1_api = ZenodoCommunity('c1')
    c1_api.accept_record(record_v1, pid=recid_v1)

    depid_v2, deposit_v2 = deposit_resolver.resolve(depid_v2_value)
    assert deposit_v2['communities'] == ['c1', 'c2']
    deposit_v2.files['file.txt'] = BytesIO(b('file1'))
    deposit_v2 = publish_and_expunge(db, deposit_v2)
    recid_v2, record_v2 = deposit_v2.fetch_published()

    assert record_v2['communities'] == ['c1', ]
Exemple #19
0
def check_records_migration(app):
    """Check that a set of records have been migrated."""
    expected_records = _load_json('expected_records.json')
    for exp_record in expected_records:
        db_record = Record.get_record(exp_record['id'], with_deleted=True)
        assert str(db_record.created) == exp_record['created']
        # If the record is deleted there is no metadata to check
        if db_record.model.json is None:
            continue
        # Check that the parent pid is minted properly
        parent_pid = b2share_parent_pid_fetcher(exp_record['id'], db_record)
        fetched_pid = b2share_record_uuid_fetcher(exp_record['id'], db_record)
        record_pid = PersistentIdentifier.get(fetched_pid.pid_type,
                                              fetched_pid.pid_value)
        assert PIDVersioning(
            record_pid).parent.pid_value == parent_pid.pid_value
        # Remove the parent pid as it has been added by the migration
        db_record['_pid'].remove({
            'type': RecordUUIDProvider.parent_pid_type,
            'value': parent_pid.pid_value,
        })
        # The OAI-PMH identifier has been modified by the migration
        if db_record.get('_oai'):
            oai_prefix = app.config.get('OAISERVER_ID_PREFIX', 'oai:')
            record_id = exp_record['metadata']['_deposit']['id']
            assert db_record['_oai']['id'] == str(oai_prefix) + record_id
            exp_record['metadata']['_oai']['id'] = db_record['_oai']['id']
        assert db_record == exp_record['metadata']
Exemple #20
0
def test_autoadd_explicit_newversion(
        app, db, users, communities, deposit, deposit_file,
        communities_autoadd_enabled):
    """Explicitly the autoadded communities in a new version."""
    deposit_v1 = publish_and_expunge(db, deposit)
    recid_v1, record_v1 = deposit_v1.fetch_published()
    depid_v1_value = deposit_v1['_deposit']['id']
    recid_v1_value = recid_v1.pid_value

    deposit_v1 = deposit_v1.newversion()
    pv = PIDVersioning(child=recid_v1)
    depid_v2 = pv.draft_child_deposit
    depid_v2_value = depid_v2.pid_value

    deposit_v2 = ZenodoDeposit.get_record(depid_v2.get_assigned_object())

    deposit_v2['communities'] = ['ecfunded', 'grants_comm', 'zenodo']
    deposit_v2['grants'] = [{'title': 'SomeGrant'}, ]
    deposit_v2.files['file.txt'] = BytesIO(b('file1'))
    deposit_v2 = publish_and_expunge(db, deposit_v2)
    recid_v2, record_v2 = deposit_v2.fetch_published()

    depid_v1, deposit_v1 = deposit_resolver.resolve(depid_v1_value)
    depid_v2, deposit_v2 = deposit_resolver.resolve(depid_v2_value)
    recid_v1, record_v1 = record_resolver.resolve(recid_v1_value)
    assert record_v1.get('communities', []) == ['grants_comm', ]
    assert deposit_v1.get('communities', []) == ['ecfunded', 'grants_comm',
                                                 'zenodo']
    assert record_v2.get('communities', []) == ['grants_comm', ]
    assert deposit_v2.get('communities', []) == ['ecfunded', 'grants_comm',
                                                 'zenodo']
Exemple #21
0
def update_record_statistics(start_date=None, end_date=None):
    """Update "_stats" field of affected records."""
    start_date = dateutil_parse(start_date) if start_date else None
    end_date = dateutil_parse(end_date) if start_date else None
    aggr_configs = {}

    if not start_date and not end_date:
        start_date = datetime.utcnow()
        end_date = datetime.utcnow()

        for aggr_name, aggr_cfg in current_stats.aggregations.items():
            aggr = aggr_cfg.cls(name=aggr_cfg.name, **aggr_cfg.params)
            if not Index(aggr.index, using=aggr.client).exists():
                if not Index(aggr.event_index, using=aggr.client).exists():
                    start_date = min(start_date, datetime.utcnow())
                else:
                    start_date = min(start_date,
                                     aggr._get_oldest_event_timestamp())

            # Retrieve the last two bookmarks
            bookmarks = aggr.list_bookmarks(limit=2)
            if len(bookmarks) >= 1:
                end_date = max(
                    end_date,
                    datetime.strptime(bookmarks[0].date, aggr.doc_id_suffix))
            if len(bookmarks) == 2:
                start_date = min(
                    start_date,
                    datetime.strptime(bookmarks[1].date, aggr.doc_id_suffix))

            aggr_configs[aggr.index] = aggr
    elif start_date and end_date:
        for aggr_name, aggr_cfg in current_stats.aggregations.items():
            aggr = aggr_cfg.cls(name=aggr_cfg.name, **aggr_cfg.params)
            aggr_configs[aggr.index] = aggr
    else:
        return

    # Get conceptrecids for all the affected records between the two dates
    conceptrecids = set()
    for aggr_index, aggr in aggr_configs.items():
        query = Search(
            using=aggr.client,
            index=aggr.index,
            doc_type=aggr.doc_type,
        ).filter('range',
                 timestamp={
                     'gte':
                     start_date.replace(microsecond=0).isoformat() + '||/d',
                     'lte':
                     end_date.replace(microsecond=0).isoformat() + '||/d'
                 }).source(include='conceptrecid')
        conceptrecids |= {b.conceptrecid for b in query.scan()}

    indexer = RecordIndexer()
    for concpetrecid_val in conceptrecids:
        conceptrecid = PersistentIdentifier.get('recid', concpetrecid_val)
        pv = PIDVersioning(parent=conceptrecid)
        children_recids = pv.children.all()
        indexer.bulk_index([str(p.object_uuid) for p in children_recids])
Exemple #22
0
    def get_data(self, record_id, query_date=None, get_period=False):
        """Public interface of _get_data."""
        result = dict(total=0, country=dict(), period=list())

        recid = PersistentIdentifier.query.filter_by(
            pid_type='recid', object_uuid=record_id).first()

        if recid:
            versioning = PIDVersioning(child=recid)

            if not versioning.exists:
                return self._get_data(record_id, query_date, get_period)

            _data = list(
                self._get_data(record_id=child.object_uuid, get_period=True)
                for child in versioning.children.all())

            countries = result['country']
            for _idx in _data:
                for key, value in _idx['country'].items():
                    countries[key] = countries.get(key, 0) + value
                result['total'] = result['total'] + _idx['total']
                result['period'] = _idx.get('period', [])

        return result
Exemple #23
0
def indexer_receiver(sender, json=None, record=None, index=None,
                     **dummy_kwargs):
    """Connect to before_record_index signal to transform record for ES."""
    if not index.startswith('records-') or record.get('$schema') is None:
        return

    # Remove files from index if record is not open access.
    if json['access_right'] != 'open' and '_files' in json:
        del json['_files']
    else:
        # Compute file count and total size
        files = json.get('_files', [])
        json['filecount'] = len(files)
        json['size'] = sum([f.get('size', 0) for f in files])

    pid = PersistentIdentifier.query.filter(
        PersistentIdentifier.object_uuid == record.id,
        PersistentIdentifier.pid_type == current_pidrelations.primary_pid_type,
        ).one_or_none()
    if pid:
        pv = PIDVersioning(child=pid)
        if pv.exists:
            relations = serialize_relations(pid)
        else:
            relations = {'version': [{'is_last': True, 'index': 0}, ]}
        if relations:
            json['relations'] = relations

        rels = serialize_related_identifiers(pid)
        if rels:
            json.setdefault('related_identifiers', []).extend(rels)

    # Remove internal data.
    if '_internal' in json:
        del json['_internal']
Exemple #24
0
def indexer_receiver(sender, json=None, record=None, index=None,
                     **dummy_kwargs):
    """Connect to before_record_index signal to transform record for ES.

    In order to avoid that a record and published deposit differs (e.g. if an
    embargo task updates the record), every time we index a record we also
    index the deposit and overwrite the content with that of the record.

    :param sender: Sender of the signal.
    :param json: JSON to be passed for the elastic search.
    :type json: `invenio_records.api.Deposit`
    :param record: Indexed deposit record.
    :type record: `invenio_records.api.Deposit`
    :param index: Elasticsearch index name.
    :type index: str
    """
    if not index.startswith('deposits-records-'):
        return

    if not isinstance(record, ZenodoDeposit):
        record = ZenodoDeposit(record, model=record.model)

    if record['_deposit']['status'] == 'published':
        schema = json['$schema']

        pub_record = record.fetch_published()[1]

        # Temporarily set to draft mode to ensure that `clear` can be called
        json['_deposit']['status'] = 'draft'
        json.clear()
        json.update(copy.deepcopy(pub_record.replace_refs()))

        # Set back to published mode and restore schema.
        json['_deposit']['status'] = 'published'
        json['$schema'] = schema
        json['_updated'] = pub_record.updated
    else:
        json['_updated'] = record.updated
    json['_created'] = record.created

    # Compute filecount and total file size
    files = json.get('_files', [])
    json['filecount'] = len(files)
    json['size'] = sum([f.get('size', 0) for f in files])

    recid = record.get('recid')
    if recid:
        pid = PersistentIdentifier.get('recid', recid)
        pv = PIDVersioning(child=pid)
        relations = serialize_relations(pid)
        if pv.exists:
            if pv.draft_child_deposit:
                is_last = (pv.draft_child_deposit.pid_value
                           == record['_deposit']['id'])
                relations['version'][0]['is_last'] = is_last
                relations['version'][0]['count'] += 1
        else:
            relations = {'version': [{'is_last': True, 'index': 0}, ]}
        if relations:
            json['relations'] = relations
Exemple #25
0
def record_minter(record_uuid, data):
    parent = data.get('conceptrecid')
    if not parent:
        parent_pid = RecordIdProvider.create(object_type='rec',
                                             object_uuid=None,
                                             status=PIDStatus.REGISTERED).pid
        data['conceptrecid'] = parent_pid.pid_value
    else:
        parent_pid = PersistentIdentifier.get(
            pid_type=RecordIdProvider.pid_type, pid_value=parent)
    provider = RecordIdProvider.create('rec', record_uuid)
    data['recid'] = provider.pid.pid_value

    versioning = PIDVersioning(parent=parent_pid)
    versioning.insert_child(child=provider.pid)
    return provider.pid
Exemple #26
0
    def get_irs(record, community_id=None, pid=None):
        """Get all inclusion requests for given record and community.

        :param record: record for which the inclusion requests are fetched.
            This includes all of the record's versions.
        :param community_id: Narrow down the query to given community.
            Query for all communities if 'None'.
        """
        if not pid:
            pid = PersistentIdentifier.get('recid', record['recid'])
        pv = PIDVersioning(child=pid)
        if pv.exists:
            sq = pv.children.with_entities(
                PersistentIdentifier.object_uuid).subquery()
            filter_cond = [
                InclusionRequest.id_record.in_(sq),
            ]
            if community_id:
                filter_cond.append(
                    InclusionRequest.id_community == community_id)
            q = (db.session.query(InclusionRequest).filter(*filter_cond))
        else:
            q = InclusionRequest.query.filter_by(id_record=record.id).order_by(
                InclusionRequest.id_community)
        return q
Exemple #27
0
    def create(cls, data, id_=None):
        """Create a deposit.

        Adds bucket creation immediately on deposit creation.
        """
        bucket = Bucket.create(
            quota_size=current_app.config['ZENODO_BUCKET_QUOTA_SIZE'],
            max_file_size=current_app.config['ZENODO_MAX_FILE_SIZE'],
        )
        data['_buckets'] = {'deposit': str(bucket.id)}
        deposit = super(ZenodoDeposit, cls).create(data, id_=id_)

        RecordsBuckets.create(record=deposit.model, bucket=bucket)

        recid = PersistentIdentifier.get(
            'recid', str(data['recid']))
        conceptrecid = PersistentIdentifier.get(
            'recid', str(data['conceptrecid']))
        depid = PersistentIdentifier.get(
            'depid', str(data['_deposit']['id']))

        PIDVersioning(parent=conceptrecid).insert_draft_child(child=recid)
        RecordDraft.link(recid, depid)

        return deposit
Exemple #28
0
def migrate_concept_recid_sips(recid, overwrite=False):
    """Create Bagit metadata for SIPs."""
    pid = PersistentIdentifier.get('recid', recid)
    pv = PIDVersioning(parent=pid)
    all_sips = []
    for child in pv.children:
        pid, rec = record_resolver.resolve(child.pid_value)
        rsips = RecordSIP.query.filter_by(pid_id=pid.id).order_by(
            RecordSIP.created)
        all_sips.append([rs.sip.id for rs in rsips])
    base_sip_id = None

    for sipv in all_sips:
        for idx, sip_id in enumerate(sipv):
            sip = SIP.query.get(sip_id)
            base_sip = SIP.query.get(base_sip_id) if base_sip_id else None
            bia = BagItArchiver(SIPApi(sip),
                                patch_of=base_sip,
                                include_all_previous=(idx > 0))

            bmeta = BagItArchiver.get_bagit_metadata(sip)

            if (not bmeta) or overwrite:
                bia.save_bagit_metadata(overwrite=True)
            base_sip_id = sip_id
            db.session.commit()
Exemple #29
0
def get_all_deposit_siblings(deposit):
    """Get all siblings of the deposit."""
    from invenio_pidstore.models import PersistentIdentifier
    from invenio_pidrelations.contrib.versioning import PIDVersioning
    recid = deposit['recid']
    rec_pid = PersistentIdentifier.get(pid_type='recid', pid_value=str(recid))
    pv = PIDVersioning(child=rec_pid)
    return [pid.get_assigned_object() for pid in pv.children]
Exemple #30
0
def test_record_delete_v2(mocker, app, db, users, deposit, deposit_file):
    """Delete a record (only last version) with multiple versions."""
    dc_mock = mocker.patch(
        'invenio_pidstore.providers.datacite.DataCiteMDSClient')
    deposit_v1 = publish_and_expunge(db, deposit)
    recid_v1, record_v1 = deposit.fetch_published()
    recid_v1_value = recid_v1.pid_value
    deposit_v1.newversion()
    recid_v1, record_v1 = record_resolver.resolve(recid_v1_value)

    # Stash a copy of v1 for later
    rec1 = deepcopy(record_v1)

    pv = PIDVersioning(child=recid_v1)
    depid_v2 = pv.draft_child_deposit
    deposit_v2 = ZenodoDeposit.get_record(depid_v2.get_assigned_object())
    deposit_v2.files['file.txt'] = BytesIO(b('file1'))
    deposit_v2 = publish_and_expunge(db, deposit_v2)
    recid_v2, record_v2 = deposit_v2.fetch_published()

    # Stash a copy of v2 for later
    rec2 = deepcopy(record_v2)
    rec2_id = str(record_v2.id)

    assert dc_mock().metadata_delete.call_count == 0

    # Remove the first version
    delete_record(rec2_id, 'spam', users[0]['id'])

    # Make sure all PIDs are deleted
    assert PID.get('doi', rec2['doi']).status == PIDStatus.DELETED
    assert PID.get('recid', rec2['recid']).status == PIDStatus.DELETED
    assert PID.get('depid', rec2['_deposit']['id']).status == PIDStatus.DELETED

    # Concept DOI should be left registered
    assert PID.get('doi', rec2['conceptdoi']).status == PIDStatus.REGISTERED

    # Make sure conceptrecid is redirecting to v1
    crecid = PID.get('recid', rec2['conceptrecid'])
    assert crecid.status == PIDStatus.REDIRECTED
    assert crecid.get_redirect() == PID.get('recid', rec1['recid'])

    # Make sure the v1 PIDs are kept intact
    assert PID.get('oai', rec1['_oai']['id']).status == PIDStatus.REGISTERED
    assert PID.get('doi', rec1['doi']).status == PIDStatus.REGISTERED
    assert PID.get('recid', rec1['recid']).status == PIDStatus.REGISTERED
    assert PID.get('depid', rec1['_deposit']['id']).status == \
        PIDStatus.REGISTERED

    # Only the v1 DOI should be deleted
    assert dc_mock().doi_post.call_count == 2
    assert dc_mock().doi_post.has_any_call('10.5072/zenodo.2')
    assert dc_mock().doi_post.has_any_call('10.5072/zenodo.1')
    assert dc_mock().metadata_delete.call_count == 1
    dc_mock().metadata_delete.assert_any_call('10.5072/zenodo.3')
    record = Record.get_record(rec2_id)
    assert record['removed_by'] == users[0]['id']
    assert record['removal_reason'] == 'Spam record, removed by Zenodo staff.'
Exemple #31
0
def test_basic_api(app, db, communities, deposit, deposit_file):
    """Test basic workflow using Deposit and Communities API."""
    deposit_v1 = publish_and_expunge(db, deposit)
    depid_v1_value = deposit_v1['_deposit']['id']

    recid_v1, record_v1 = deposit_v1.fetch_published()
    recid_v1_value = recid_v1.pid_value

    deposit_v1.newversion()
    pv = PIDVersioning(child=recid_v1)
    depid_v2 = pv.draft_child_deposit
    deposit_v2 = ZenodoDeposit.get_record(depid_v2.get_assigned_object())
    deposit_v2.files['file.txt'] = BytesIO(b('file1'))
    deposit_v2 = publish_and_expunge(db, deposit_v2)
    deposit_v2 = deposit_v2.edit()
    # 1. Request for 'c1' and 'c2' through deposit v2
    deposit_v2['communities'] = [
        'c1',
        'c2',
    ]
    deposit_v2 = publish_and_expunge(db, deposit_v2)
    recid_v2, record_v2 = deposit_v2.fetch_published()
    recid_v2_value = recid_v2.pid_value
    depid_v1, deposit_v1 = deposit_resolver.resolve(depid_v1_value)
    recid_v1, record_v1 = deposit_v1.fetch_published()
    assert record_v1.get('communities', []) == []
    assert record_v2.get('communities', []) == []

    c1_api = ZenodoCommunity('c1')
    c2_api = ZenodoCommunity('c2')

    # Inclusion requests should be visible for both records
    assert c1_api.get_comm_irs(record_v1, pid=recid_v1).count() == 1
    assert c1_api.get_comm_irs(record_v2, pid=recid_v2).count() == 1

    assert c2_api.get_comm_irs(record_v1, pid=recid_v1).count() == 1
    assert c2_api.get_comm_irs(record_v2, pid=recid_v2).count() == 1

    # Accept to 'c1' through record_v2 (as originally requested),
    # and 'c2' through record_v1 (version)
    c1_api.accept_record(record_v2, pid=recid_v2)
    c2_api.accept_record(record_v1, pid=recid_v1)
    recid_v1, record_v1 = record_resolver.resolve(recid_v1_value)
    recid_v2, record_v2 = record_resolver.resolve(recid_v2_value)
    # Accepting individual record to a community should propagate the changes
    # to all versions
    assert record_v1['communities'] == record_v2['communities'] == \
        ['c1', 'c2', ]

    # Removing 'c1' from deposit_v1 should remove it from two published records
    depid_v1, deposit_v1 = deposit_resolver.resolve(depid_v1_value)
    deposit_v1 = deposit_v1.edit()
    deposit_v1['communities'] = []
    deposit_v1 = publish_and_expunge(db, deposit_v1)
    recid_v1, record_v1 = record_resolver.resolve(recid_v1_value)
    recid_v2, record_v2 = record_resolver.resolve(recid_v2_value)
    assert record_v1.get('communities', []) == []
    assert record_v2.get('communities', []) == []
Exemple #32
0
def indexer_receiver(sender,
                     json=None,
                     record=None,
                     index=None,
                     **dummy_kwargs):
    """Connect to before_record_index signal to transform record for ES."""
    if not index.startswith('records-') or record.get('$schema') is None:
        return

    # Remove files from index if record is not open access.
    if json['access_right'] != 'open' and '_files' in json:
        del json['_files']
    else:
        # Compute file count and total size
        files = json.get('_files', [])
        json['filecount'] = len(files)
        json['size'] = sum([f.get('size', 0) for f in files])

    pid = PersistentIdentifier.query.filter(
        PersistentIdentifier.pid_value == str(record['recid']),
        PersistentIdentifier.pid_type == 'recid',
        PersistentIdentifier.object_uuid == record.id,
    ).one_or_none()
    if pid:
        pv = PIDVersioning(child=pid)
        if pv.exists:
            relations = serialize_relations(pid)
        else:
            relations = {
                'version': [
                    {
                        'is_last': True,
                        'index': 0
                    },
                ]
            }
        if relations:
            json['relations'] = relations

        rels = serialize_related_identifiers(pid)
        if rels:
            json.setdefault('related_identifiers', []).extend(rels)

    for loc in json.get('locations', []):
        if loc.get('lat') and loc.get('lon'):
            loc['point'] = {'lat': loc['lat'], 'lon': loc['lon']}

    # Remove internal data.
    if '_internal' in json:
        del json['_internal']

    json['_stats'] = build_record_stats(record['recid'],
                                        record.get('conceptrecid'))

    custom_es_fields = build_record_custom_fields(json)
    for es_field, es_value in custom_es_fields.items():
        json[es_field] = es_value
Exemple #33
0
def versioning_link(recids):
    """Link several records into a versioning scheme.

    Support cases with some records being already versioned, as long
    as they are all within a single versioning scheme.

    For example, given the following records:
    - 123, 234, 345 (record with 3 versions)
    - 543, 432 (record with 2 versions)
    - 111 (single non-versioned record)
    - 222 (single, non-versioned record)

    The following cases are supported (Good) or not supported (Error):
    versioning_link 111 123 234 345 (Good - will add 111 as first version)
    versioning_link 111 222 (Good, will create new versioning scheme)
    versioning_link 345 123 234 (Good - no new records liked, but will reorder
                                 the records in the versioning list)
    versioning_link 123 234 543 (Error - trying to link two versioned records)
    versioning_link 123 234 (Error - must specify all children)
    """
    int_recids = [int(recid) for recid in recids]
    if sorted(int_recids) != int_recids and not click.confirm(
            u'Requested RECIDS are not in the order of creation. Continue?'):
        click.echo(click.style(u'Record linking aborted.', fg='green'))
        return

    recids_records = [
        record_resolver.resolve(recid_val) for recid_val in recids
    ]

    upgraded = [(recid, rec) for recid, rec in recids_records
                if 'conceptdoi' in rec]

    if len(upgraded) == 1 and not click.confirm(
            u'Recid {0} already migrated. Its Concept recid: {1} will be used as'
            u'the base for the Concept DOI in the versioning linking. '
            u'Continue?'):
        return
    elif len(upgraded) > 1:
        i_recids = [int(recid) for recid in recids]
        child_recids = [
            int(recid.pid_value)
            for recid in PIDVersioning(child=upgraded[0][0]).children.all()
        ]
        if not all(cr in i_recids for cr in child_recids):
            click.echo(u'All children recids ({0}) of the upgraded record need'
                       u' to be specified. Aborting.'.format(
                           [recid for recid in child_recids]))
            return
        i_upgraded = [int(recid.pid_value) for recid, rec in upgraded]
        if set(child_recids) != set(i_upgraded):
            click.echo(u'Found multiple upgraded records {0}, which do not '
                       u'belong to a single versioning scheme. Aborting.'
                       u''.format(i_upgraded,
                                  [recid for recid in child_recids]))
            return
    versioning_link_records(recids)
Exemple #34
0
def _create_records(base_metadata, total, versions, files):
    records = []
    cur_recid_val = 1
    for _ in range(total):
        conceptrecid_val = cur_recid_val
        conceptrecid = PersistentIdentifier.create('recid',
                                                   str(conceptrecid_val),
                                                   status='R')
        db.session.commit()
        versioning = PIDVersioning(parent=conceptrecid)
        for ver_idx in range(versions):
            recid_val = conceptrecid_val + ver_idx + 1
            data = deepcopy(base_metadata)
            data.update({
                'conceptrecid': str(conceptrecid_val),
                'conceptdoi': '10.1234/{}'.format(recid_val),
                'recid': recid_val,
                'doi': '10.1234/{}'.format(recid_val),
            })
            record = ZenodoRecord.create(data)
            bucket = Bucket.create()
            record['_buckets'] = {'record': str(bucket.id)}
            record.commit()
            RecordsBuckets.create(bucket=bucket, record=record.model)
            recid = PersistentIdentifier.create(pid_type='recid',
                                                pid_value=record['recid'],
                                                object_type='rec',
                                                object_uuid=record.id,
                                                status='R')
            versioning.insert_child(recid)

            file_objects = []
            for f in range(files):
                filename = 'Test{0}_v{1}.pdf'.format(f, ver_idx)
                record.files[filename] = BytesIO(b'1234567890')  # 10 bytes
                record.files[filename]['type'] = 'pdf'
                file_objects.append(record.files[filename].obj)
            record.commit()

            db.session.commit()
            records.append((recid, record, file_objects))
        cur_recid_val += versions + 1
    return records
Exemple #35
0
def _create_records(base_metadata, total, versions, files):
    records = []
    cur_recid_val = 1
    for _ in range(total):
        conceptrecid_val = cur_recid_val
        conceptrecid = PersistentIdentifier.create(
            'recid', str(conceptrecid_val), status='R')
        db.session.commit()
        versioning = PIDVersioning(parent=conceptrecid)
        for ver_idx in range(versions):
            recid_val = conceptrecid_val + ver_idx + 1
            data = deepcopy(base_metadata)
            data.update({
                'conceptrecid': str(conceptrecid_val),
                'conceptdoi': '10.1234/{}'.format(recid_val),
                'recid': recid_val,
                'doi': '10.1234/{}'.format(recid_val),
            })
            record = ZenodoRecord.create(data)
            bucket = Bucket.create()
            record['_buckets'] = {'record': str(bucket.id)}
            record.commit()
            RecordsBuckets.create(bucket=bucket, record=record.model)
            recid = PersistentIdentifier.create(
                pid_type='recid', pid_value=record['recid'], object_type='rec',
                object_uuid=record.id, status='R')
            versioning.insert_child(recid)

            file_objects = []
            for f in range(files):
                filename = 'Test{0}_v{1}.pdf'.format(f, ver_idx)
                record.files[filename] = BytesIO(b'1234567890')  # 10 bytes
                record.files[filename]['type'] = 'pdf'
                file_objects.append(record.files[filename].obj)
            record.commit()

            db.session.commit()
            records.append((recid, record, file_objects))
        cur_recid_val += versions + 1
    return records
def alembic_upgrade_database_data(alembic, verbose):
    """Migrate the database data from v2.0.0 to 2.1.0."""
    ### Add versioning PIDs ###
    # Reserve the record PID and versioning PID for unpublished deposits

    # Hack: disable record indexing during record migration
    from invenio_indexer.api import RecordIndexer
    old_index_fn = RecordIndexer.index
    RecordIndexer.index = lambda s, record: None

    if verbose:
        click.secho('migrating deposits and records...')
    with db.session.begin_nested():
        # Migrate published records
        records_pids = PersistentIdentifier.query.filter(
            PersistentIdentifier.pid_type == RecordUUIDProvider.pid_type,
            PersistentIdentifier.status == PIDStatus.REGISTERED,
        ).all()
        for rec_pid in records_pids:
            if verbose:
                click.secho('    record {}'.format(rec_pid.pid_value))
            try:
                record = Record.get_record(rec_pid.object_uuid)
            except NoResultFound:
                # The record is deleted but not the PID. Fix it.
                rec_pid.status = PIDStatus.DELETED
                continue
            # Create parent version PID
            parent_pid = RecordUUIDProvider.create().pid
            version_master = PIDVersioning(parent=parent_pid)
            version_master.insert_draft_child(child=rec_pid)
            version_master.update_redirect()
            migrate_record_metadata(
                Record.get_record(rec_pid.object_uuid),
                parent_pid
            )

        # Migrate deposits
        deposit_pids = PersistentIdentifier.query.filter(
            PersistentIdentifier.pid_type == DepositUUIDProvider.pid_type,
            PersistentIdentifier.status == PIDStatus.REGISTERED,
        ).all()
        for dep_pid in deposit_pids:
            if verbose:
                click.secho('    deposit {}'.format(dep_pid.pid_value))
            try:
                deposit = Deposit.get_record(dep_pid.object_uuid)

                if deposit['publication_state'] != \
                        PublicationStates.published.name:
                    # The record is not published yet. Reserve the PID.
                    rec_pid = RecordUUIDProvider.create(
                        object_type='rec',
                        pid_value=dep_pid.pid_value,
                    ).pid
                    # Create parent version PID
                    parent_pid = RecordUUIDProvider.create().pid
                    assert parent_pid
                    version_master = PIDVersioning(parent=parent_pid)
                    version_master.insert_draft_child(child=rec_pid)
                else:
                    # Retrieve previously created version PID
                    rec_pid = RecordUUIDProvider.get(dep_pid.pid_value).pid
                    version_master = PIDVersioning(child=rec_pid)
                    parent_pid = version_master.parent
                    if not parent_pid:
                        click.secho('    record {} was deleted, but the deposit has not been removed'.format(rec_pid.pid_value), fg='red')

                if parent_pid:
                    migrate_record_metadata(
                        Deposit.get_record(dep_pid.object_uuid),
                        parent_pid
                    )
            except NoResultFound:
                # The deposit is deleted but not the PID. Fix it.
                dep_pid.status = PIDStatus.DELETED


    if verbose:
        click.secho('done migrating deposits.')
    RecordIndexer.index = old_index_fn
Exemple #37
0
    def publish(self):
        """Publish GitHub release as record."""
        id_ = uuid.uuid4()
        deposit_metadata = dict(self.metadata)
        deposit = None
        try:
            db.session.begin_nested()
            # TODO: Add filter on Published releases
            previous_releases = self.model.repository.releases.filter_by(
                status=ReleaseStatus.PUBLISHED)
            versioning = None
            stashed_draft_child = None
            if previous_releases.count():
                last_release = previous_releases.order_by(
                        Release.created.desc()).first()
                last_recid = PersistentIdentifier.get(
                    'recid', last_release.record['recid'])
                versioning = PIDVersioning(child=last_recid)
                last_record = ZenodoRecord.get_record(
                    versioning.last_child.object_uuid)
                deposit_metadata['conceptrecid'] = last_record['conceptrecid']
                if 'conceptdoi' not in last_record:
                    last_depid = PersistentIdentifier.get(
                        'depid', last_record['_deposit']['id'])
                    last_deposit = ZenodoDeposit.get_record(
                        last_depid.object_uuid)
                    last_deposit = last_deposit.registerconceptdoi()
                    last_recid, last_record = last_deposit.fetch_published()
                deposit_metadata['conceptdoi'] = last_record['conceptdoi']
                if versioning.draft_child:
                    stashed_draft_child = versioning.draft_child
                    versioning.remove_draft_child()

            deposit = self.deposit_class.create(deposit_metadata, id_=id_)

            deposit['_deposit']['created_by'] = self.event.user_id
            deposit['_deposit']['owners'] = [self.event.user_id]

            # Fetch the deposit files
            for key, url in self.files:
                # Make a HEAD request to get GitHub to compute the
                # Content-Length.
                res = self.gh.api.session.head(url, allow_redirects=True)
                # Now, download the file
                res = self.gh.api.session.get(url, stream=True,
                                              allow_redirects=True)
                if res.status_code != 200:
                    raise Exception(
                        "Could not retrieve archive from GitHub: {url}"
                        .format(url=url)
                    )

                size = int(res.headers.get('Content-Length', 0))
                ObjectVersion.create(
                    bucket=deposit.files.bucket,
                    key=key,
                    stream=res.raw,
                    size=size or None,
                    mimetype=res.headers.get('Content-Type'),
                )

            # GitHub-specific SIP store agent
            sip_agent = {
                '$schema': current_jsonschemas.path_to_url(
                    current_app.config['SIPSTORE_GITHUB_AGENT_JSONSCHEMA']),
                'user_id': self.event.user_id,
                'github_id': self.release['author']['id'],
                'email': self.gh.account.user.email,
            }
            deposit.publish(user_id=self.event.user_id, sip_agent=sip_agent)
            recid_pid, record = deposit.fetch_published()
            self.model.recordmetadata = record.model
            if versioning and stashed_draft_child:
                versioning.insert_draft_child(stashed_draft_child)
            record_id = str(record.id)
            db.session.commit()

            # Send Datacite DOI registration task
            datacite_register.delay(recid_pid.pid_value, record_id)

            # Index the record
            RecordIndexer().index_by_id(record_id)
        except Exception:
            db.session.rollback()
            # Remove deposit from index since it was not commited.
            if deposit and deposit.id:
                try:
                    RecordIndexer().delete(deposit)
                except Exception:
                    current_app.logger.exception(
                        "Failed to remove uncommited deposit from index.")
            raise
Exemple #38
0
    def create(cls, data, id_=None, version_of=None):
        """Create a deposit with the optional id.

        :params version_of: PID of an existing record. If set, the new record
        will be marked as a new version of this referenced record. If no data
        is provided the new record will be a copy of this record. Note: this
        PID must reference the current last version of a record.
        """

        # check that the status field is not set
        if 'publication_state' in data:
            raise InvalidDepositError(
                'Field "publication_state" cannot be set.')
        data['publication_state'] = PublicationStates.draft.name
        # Set record's schema
        if '$schema' in data:
            raise InvalidDepositError('"$schema" field should not be set.')

        # Retrieve reserved record PID which should have already been created
        # by the deposit minter (The record PID value is the same
        # as the one of the deposit)
        rec_pid = RecordUUIDProvider.get(data['_deposit']['id']).pid
        version_master, prev_version = None, None
        # if this is a new version of an existing record, add the future
        # record pid in the chain of versions.
        if version_of:
            version_master, prev_version = \
                find_version_master_and_previous_record(version_of)
            # The new version must be in the same community
            if data['community'] != prev_version['community']:
                raise ValidationError(
                    'The community field cannot change between versions.')
            try:
                version_master.insert_draft_child(rec_pid)
            except Exception as exc:
                # Only one draft is allowed per version chain.
                if 'Draft child already exists for this relation' in \
                        exc.args[0]:
                    raise DraftExistsVersioningError(
                        version_master.draft_child
                    )
                raise exc
        else:
            # create parent PID
            parent_pid = RecordUUIDProvider.create().pid
            version_master = PIDVersioning(parent=parent_pid)
            version_master.insert_draft_child(child=rec_pid)

        # Mint the deposit with the parent PID
        data['_pid'] = [{
            'value': version_master.parent.pid_value,
            'type': RecordUUIDProvider.parent_pid_type,
        }]
        if 'community' not in data or not data['community']:
            raise ValidationError(
                'Record\s metadata has no community field.')
        try:
            community_id = uuid.UUID(data['community'])
        except ValueError as e:
            raise InvalidDepositError(
                'Community ID is not a valid UUID.') from e
        try:
            schema = CommunitySchema.get_community_schema(community_id)
        except CommunitySchemaDoesNotExistError as e:
            raise InvalidDepositError(
                'No schema for community {}.'.format(community_id)) from e

        if version_of:
            data['$schema'] = Deposit._build_deposit_schema(prev_version)
        else:
            from b2share.modules.schemas.serializers import \
                community_schema_draft_json_schema_link
            data['$schema'] = community_schema_draft_json_schema_link(
                schema,
                _external=True
            )

        # create file bucket
        if prev_version and prev_version.files:
            # Clone the bucket from the previous version. This doesn't
            # duplicate files.
            bucket = prev_version.files.bucket.snapshot(lock=False)
            bucket.locked = False
        else:
            bucket = Bucket.create(storage_class=current_app.config[
                'DEPOSIT_DEFAULT_STORAGE_CLASS'
            ])

        if 'external_pids' in data:
            create_b2safe_file(data['external_pids'], bucket)
            del data['external_pids']

        deposit = super(Deposit, cls).create(data, id_=id_)
        db.session.add(bucket)
        db.session.add(RecordsBuckets(
            record_id=deposit.id, bucket_id=bucket.id
        ))

        return deposit
Exemple #39
0
def delete_record(record_uuid, reason, user):
    """Delete the record and it's PIDs.

    :param record_uuid: UUID of the record to be removed.
    :param reason: Reason for removal. Either one of: 'spam', 'uploader',
        'takedown' (see 'ZENODO_REMOVAL_REASONS' variable in config),
        otherwise using it as a verbatim "Reason" string.
    :param user: ID or email of the Zenodo user (admin)
        responsible for the removal.
    """
    from invenio_github.models import ReleaseStatus
    if isinstance(user, text_type):
        user_id = User.query.filter_by(email=user).one().id
    elif isinstance(user, int):
        user_id = User.query.get(user).id
    else:
        raise TypeError("User cannot be determined from argument: {0}".format(
            user))

    record = ZenodoRecord.get_record(record_uuid)

    # Remove the record from versioning and delete the recid
    recid = PersistentIdentifier.get('recid', record['recid'])
    pv = PIDVersioning(child=recid)
    pv.remove_child(recid)
    pv.update_redirect()
    recid.delete()

    # Remove the record from index
    try:
        RecordIndexer().delete(record)
    except NotFoundError:
        pass

    # Remove buckets
    record_bucket = record.files.bucket
    RecordsBuckets.query.filter_by(record_id=record.id).delete()
    record_bucket.locked = False
    record_bucket.remove()

    removal_reasons = dict(current_app.config['ZENODO_REMOVAL_REASONS'])
    if reason in removal_reasons:
        reason = removal_reasons[reason]

    depid, deposit = deposit_resolver.resolve(record['_deposit']['id'])

    try:
        doi = PersistentIdentifier.get('doi', record['doi'])
    except PIDDoesNotExistError:
        doi = None

    # Record OpenAIRE info
    try:
        original_id = openaire_original_id(record, openaire_type(record))[1]
        datasource_id = openaire_datasource_id(record)
    except PIDDoesNotExistError:
        original_id = None
        datasource_id = None

    if pv.children.count() == 0:
        conceptrecid = PersistentIdentifier.get('recid',
                                                record['conceptrecid'])
        conceptrecid.delete()
        new_last_child = None
    else:
        new_last_child = (pv.last_child.pid_value,
                          str(pv.last_child.object_uuid))

    if 'conceptdoi' in record:
        conceptdoi_value = record['conceptdoi']
    else:
        conceptdoi_value = None

    # Completely delete the deposit
    # Deposit will be removed from index
    deposit.delete(delete_published=True)

    # Clear the record and put the deletion information
    record.clear()
    record.update({
        'removal_reason': reason,
        'removed_by': user_id,
    })
    record.commit()

    # Mark the relevant GitHub Release as deleted
    for ghr in record.model.github_releases:
        ghr.status = ReleaseStatus.DELETED

    db.session.commit()

    # After successful DB commit, sync the DOIs with DataCite
    datacite_inactivate.delay(doi.pid_value)
    if conceptdoi_value:
        if new_last_child:
            # Update last child (updates also conceptdoi)
            pid_value, rec_uuid = new_last_child
            datacite_register.delay(pid_value, rec_uuid)
        else:
            datacite_inactivate.delay(conceptdoi_value)

    # Also delete from OpenAIRE index
    if current_app.config['OPENAIRE_DIRECT_INDEXING_ENABLED'] and original_id \
            and datasource_id:
        openaire_delete.delay(original_id=original_id,
                              datasource_id=datasource_id)