def test_appoint_profile_from_claimed_signature(small_app):
    """Check the module for the case where claimed signature takes
    everything.
    """
    from inspirehep.modules.disambiguation.tasks import disambiguation_clustering, update_authors_recid

    old_record_id = str(PersistentIdentifier.get("literature", 11883).object_uuid)
    old_record = get_es_record_by_uuid(old_record_id)
    old_author_uuid = old_record["authors"][0]["uuid"]

    # Add phonetic block to the record.
    old_record["authors"][0]["signature_block"] = "HAGp"
    old_record["authors"][0]["recid"] = "2"
    es.index(index="records-hep", doc_type="hep", id=old_record_id, body=old_record)
    es.indices.refresh("records-hep")

    record_id = str(PersistentIdentifier.get("literature", 1358492).object_uuid)
    record = get_es_record_by_uuid(record_id)
    author_uuid = record["authors"][0]["uuid"]

    # Add phonetic block to the record.
    record["authors"][0]["signature_block"] = "HAGp"
    record["authors"][0]["recid"] = "314159265"
    record["authors"][0]["curated_relation"] = True
    es.index(index="records-hep", doc_type="hep", id=record_id, body=record)
    es.indices.refresh("records-hep")

    with patch("celery.current_app.send_task", return_value=_BeardObject(({"2": [old_author_uuid, author_uuid]}, {}))):
        with patch(
            "inspirehep.modules.disambiguation.tasks.update_authors_recid.delay", side_effect=update_authors_recid
        ):
            disambiguation_clustering("HAGp")

    assert Record.get_record(old_record_id)["authors"][0]["recid"] == "314159265"
    assert Record.get_record(record_id)["authors"][0]["recid"] == "314159265"
Exemple #2
0
def test_published_external_doi(db, deposit, deposit_file):
    """Test published external DOI."""
    ext_doi1 = '10.1234/foo'
    ext_doi2 = '10.1234/bar'
    deposit['doi'] = ext_doi1
    deposit.publish()
    db.session.commit()

    # Published record with external DOI must have:
    # 1) a registered recid with object
    recid = PersistentIdentifier.get('recid', str(deposit['recid']))
    assert recid and recid.status == PIDStatus.REGISTERED \
        and recid.has_object()
    # 2) a reserved external doi with object
    doi = PersistentIdentifier.get('doi', ext_doi1)
    assert doi and doi.status == PIDStatus.RESERVED \
        and doi.has_object()

    # Now change external DOI.
    deposit = deposit.edit()
    deposit['doi'] = ext_doi2
    deposit.publish()
    db.session.commit()

    # Ensure DOI 1 has been removed.
    pytest.raises(
        PIDDoesNotExistError, PersistentIdentifier.get, 'doi', ext_doi1)

    # Ensure DOI 2 has been reserved.
    doi = PersistentIdentifier.get('doi', ext_doi2)
    assert doi and doi.status == PIDStatus.RESERVED \
        and doi.has_object()
def add_drafts_from_file(file_path, schema,
                         egroup=None, user=None, limit=None):
    """Adds drafts from a specified file.

    Drafts with specified pid will be registered under those.
    For drafts without pid, new pids will be minted.
    """
    with open(file_path, 'r') as fp:
        entries = json.load(fp)

        for entry in entries[0:limit]:
            data = construct_draft_obj(schema, entry)
            pid = cap_deposit_fetcher(None, data)
            pid_value = pid.pid_value if pid else None

            try:
                PersistentIdentifier.get('depid', pid_value)

                print('Draft with id {} already exist!'.format(pid_value))

            except PIDDoesNotExistError:
                record_uuid = uuid.uuid4()
                pid = cap_deposit_minter(record_uuid, data)
                if user:
                    user = User.query.filter_by(email=user).one()
                if egroup:
                    role = Role.query.filter_by(name=egroup).one()
                deposit = CAPDeposit.create(data, record_uuid, user)
                deposit.commit()

                if egroup:
                    add_read_permission_for_egroup(deposit, egroup)

                print('Draft {} added.'.format(pid.pid_value))
Exemple #4
0
    def create(cls, data, id_=None):
        """Create a deposit.

        Adds bucket creation immediately on deposit creation.
        """
        bucket = Bucket.create(
            quota_size=current_app.config['ZENODO_BUCKET_QUOTA_SIZE'],
            max_file_size=current_app.config['ZENODO_MAX_FILE_SIZE'],
        )
        data['_buckets'] = {'deposit': str(bucket.id)}
        deposit = super(ZenodoDeposit, cls).create(data, id_=id_)

        RecordsBuckets.create(record=deposit.model, bucket=bucket)

        recid = PersistentIdentifier.get(
            'recid', str(data['recid']))
        conceptrecid = PersistentIdentifier.get(
            'recid', str(data['conceptrecid']))
        depid = PersistentIdentifier.get(
            'depid', str(data['_deposit']['id']))

        PIDVersioning(parent=conceptrecid).insert_draft_child(child=recid)
        RecordDraft.link(recid, depid)

        return deposit
Exemple #5
0
    def validate_doi(self, value):
        """Validate if doi exists."""
        if value and has_request_context():
            required_doi = self.context.get('required_doi')
            if value == required_doi:
                return

            err = ValidationError(_('DOI already exists in Zenodo.'),
                                  field_names=['doi'])

            try:
                doi_pid = PersistentIdentifier.get('doi', value)
            except PIDDoesNotExistError:
                return

            # If the DOI exists, check if it's been assigned to this record
            # by fetching the recid and comparing both PIDs record UUID
            try:
                recid_pid = PersistentIdentifier.get(
                    'recid', self.context['recid'])
            except PIDDoesNotExistError:
                # There's no way to verify if this DOI belongs to this record
                raise err

            doi_uuid = doi_pid.get_assigned_object()
            recid_uuid = recid_pid.get_assigned_object()

            if doi_uuid and doi_uuid == recid_uuid:
                return
            else:  # DOI exists and belongs to a different record
                raise err
def test_app_fixture_lacks_db_isolation_step2(pids_count, app):
    assert PersistentIdentifier.query.count() == pids_count + 1
    # Force the cleanup.
    PersistentIdentifier.get(
        pid_type='type1',
        pid_value='value1',
    ).delete()
Exemple #7
0
    def delete(self, delete_published=False, *args, **kwargs):
        """Delete the deposit.

        :param delete_published: If True, even deposit of a published record
            will be deleted (usually used by admin operations).
        :type delete_published: bool
        """
        is_published = self['_deposit'].get('pid')
        if is_published and not delete_published:
            raise PIDInvalidAction()

        # Delete the recid
        recid = PersistentIdentifier.get(
            pid_type='recid', pid_value=self['recid'])

        versioning = PIDVersioning(child=recid)
        if versioning.exists:
            if versioning.draft_child and \
                    self.pid == versioning.draft_child_deposit:
                versioning.remove_draft_child()
            if versioning.last_child:
                index_siblings(versioning.last_child,
                               children=versioning.children.all(),
                               include_pid=True,
                               neighbors_eager=True,
                               with_deposits=True)

        if recid.status == PIDStatus.RESERVED:
            db.session.delete(recid)

        if 'conceptrecid' in self:
            concept_recid = PersistentIdentifier.get(
                pid_type='recid', pid_value=self['conceptrecid'])
            if concept_recid.status == PIDStatus.RESERVED:
                db.session.delete(concept_recid)

        # Completely remove bucket
        bucket = self.files.bucket
        with db.session.begin_nested():
            # Remove Record-Bucket link
            RecordsBuckets.query.filter_by(record_id=self.id).delete()
            mp_q = MultipartObject.query_by_bucket(bucket)
            # Remove multipart objects
            Part.query.filter(
                Part.upload_id.in_(mp_q.with_entities(
                    MultipartObject.upload_id).subquery())
            ).delete(synchronize_session='fetch')
            mp_q.delete(synchronize_session='fetch')
        bucket.locked = False
        bucket.remove()

        depid = kwargs.get('pid', self.pid)
        if depid:
            depid.delete()

        # NOTE: We call the parent of Deposit, invenio_records.api.Record since
        # we need to completely override eveything that the Deposit.delete
        # method does.
        return super(Deposit, self).delete(*args, **kwargs)
def generate_doi(prefix, experiment=None):
    """Generate random DOI, unique within PIDStore."""
    while True:
        doi = random_doi(prefix, experiment)
        try:
            PersistentIdentifier.get('doi', doi)
        except PIDDoesNotExistError:
            return doi
def generate_recid(experiment):
    """CAP Pid generator."""
    while True:
        pid_value = random_pid(experiment)
        try:
            PersistentIdentifier.get('recid', pid_value)
        except PIDDoesNotExistError:
            return pid_value
def test_delete_with_sqldatabase_error(app):
    """Test VALID record delete request (GET .../records/<record_id>)."""
    with app.app_context():
        # create the record using the internal API
        pid, record = create_record(test_data)
        db.session.expire(record.model)
        pid_value = pid.pid_value
        pid_type = pid.pid_type
        record_id = record.id

        db.session.commit()
        Record.get_record(record_id)

        def raise_exception():
            raise SQLAlchemyError()

        with app.test_client() as client:
            # start a new SQLAlchemy session so that it will rollback
            # everything
            nested_transaction = db.session().transaction
            orig_rollback = nested_transaction.rollback
            flags = {'rollbacked': False}

            def custom_rollback(*args, **kwargs):
                flags['rollbacked'] = True
                orig_rollback(*args, **kwargs)
            nested_transaction.rollback = custom_rollback

            with patch.object(PersistentIdentifier, 'delete',
                              side_effect=raise_exception):
                headers = [('Accept', 'application/json')]
                res = client.delete(url_for('invenio_records_rest.recid_item',
                                            pid_value=pid_value),
                                    headers=headers)
                assert res.status_code == 500
            # check that the transaction is finished
            assert db.session().transaction is not nested_transaction
            # check that the session has rollbacked
            assert flags['rollbacked']

    with app.app_context():
        with app.test_client() as client:
            # check that the record and PID have not been deleted
            Record.get_record(record_id)
            assert not PersistentIdentifier.get(pid_type,
                                                pid_value).is_deleted()
            # try to delete without exception, the transaction should have been
            # rollbacked
            headers = [('Accept', 'application/json')]
            res = client.delete(url_for('invenio_records_rest.recid_item',
                                        pid_value=pid_value),
                                headers=headers)
            assert res.status_code == 204
            # check database state
            with pytest.raises(NoResultFound):
                Record.get_record(record_id)
            assert PersistentIdentifier.get(pid_type,
                                            pid_value).is_deleted()
 def missing_pids(self):
     """Filter persistent identifiers."""
     missing = []
     for p in self.pids:
         try:
             PersistentIdentifier.get(p.pid_type, p.pid_value)
         except PIDDoesNotExistError:
             missing.append(p)
     return missing
def test_solve_claim_conflicts(small_app):
    """Check the module for the case where at least two claimed
    signatures are assigned to the same cluster.
    """
    from inspirehep.modules.disambiguation.tasks import disambiguation_clustering, update_authors_recid

    # Claimed signature #1.
    glashow_record_id_claimed = str(PersistentIdentifier.get("literature", 4328).object_uuid)
    glashow_record_claimed = get_es_record_by_uuid(glashow_record_id_claimed)
    glashow_record_uuid_claimed = glashow_record_claimed["authors"][0]["uuid"]

    # Add phonetic block to the record.
    glashow_record_claimed["authors"][0]["signature_block"] = "HAGp"
    glashow_record_claimed["authors"][0]["curated_relation"] = True
    glashow_record_claimed["authors"][0]["recid"] = "3"
    es.index(index="records-hep", doc_type="hep", id=glashow_record_id_claimed, body=glashow_record_claimed)
    es.indices.refresh("records-hep")

    # Claimed signature #2.
    higgs_record_id_claimed = str(PersistentIdentifier.get("literature", 1358492).object_uuid)
    higgs_record_claimed = get_es_record_by_uuid(higgs_record_id_claimed)
    higgs_record_uuid_claimed = higgs_record_claimed["authors"][0]["uuid"]

    # Add phonetic block to the record.
    higgs_record_claimed["authors"][0]["signature_block"] = "HAGp"
    higgs_record_claimed["authors"][0]["curated_relation"] = True
    higgs_record_claimed["authors"][0]["recid"] = "4"
    es.index(index="records-hep", doc_type="hep", id=higgs_record_id_claimed, body=higgs_record_claimed)
    es.indices.refresh("records-hep")

    # Not claimed signature.
    higgs_record_id_not_claimed = str(PersistentIdentifier.get("literature", 11883).object_uuid)
    higgs_record_not_claimed = get_es_record_by_uuid(higgs_record_id_not_claimed)
    higgs_record_uuid_not_claimed = higgs_record_not_claimed["authors"][0]["uuid"]

    # Add phonetic block to the record.
    higgs_record_not_claimed["authors"][0]["signature_block"] = "HAGp"
    es.index(index="records-hep", doc_type="hep", id=higgs_record_id_not_claimed, body=higgs_record_not_claimed)
    es.indices.refresh("records-hep")

    with patch(
        "celery.current_app.send_task",
        return_value=_BeardObject(
            ({"3": [glashow_record_uuid_claimed, higgs_record_uuid_claimed, higgs_record_uuid_not_claimed]}, {})
        ),
    ):
        with patch(
            "inspirehep.modules.disambiguation.logic._solve_claims_conflict",
            return_value=_ConflictObject({higgs_record_uuid_claimed: [higgs_record_uuid_not_claimed]}),
        ):
            with patch(
                "inspirehep.modules.disambiguation.tasks.update_authors_recid.delay", side_effect=update_authors_recid
            ):
                disambiguation_clustering("HAGp")

    assert Record.get_record(higgs_record_id_not_claimed)["authors"][0]["recid"] == "4"
Exemple #13
0
    def _sync_communities(self, dep_comms, rec_comms, record):
        new_dep_comms, new_rec_comms, new_ir_comms = \
            self._get_new_communities(dep_comms, rec_comms, record)

        # Update Communities and OAISet information for all record versions
        conceptrecid = PersistentIdentifier.get('recid',
                                                record['conceptrecid'])
        pv = PIDVersioning(parent=conceptrecid)
        for pid in pv.children:
            rec = ZenodoRecord.get_record(pid.get_assigned_object())
            if rec.id != record.id:
                rec['communities'] = sorted(new_rec_comms)
                if current_app.config['COMMUNITIES_OAI_ENABLED']:
                    rec = self._sync_oaisets_with_communities(rec)
                if not rec['communities']:
                    del rec['communities']
                rec.commit()
                depid = PersistentIdentifier.get(
                    'depid', rec['_deposit']['id'])
                deposit = ZenodoDeposit.get_record(depid.get_assigned_object())
                deposit['communities'] = sorted(new_dep_comms)
                if not deposit['communities']:
                    del deposit['communities']
                deposit.commit()

        # Update new version deposit
        if pv.draft_child_deposit:
            draft_dep = ZenodoDeposit.get_record(
                pv.draft_child_deposit.get_assigned_object())
            if draft_dep.id != self.id:
                draft_dep['communities'] = sorted(new_dep_comms)
                if not draft_dep['communities']:
                    del draft_dep['communities']
                draft_dep.commit()

        record['communities'] = sorted(new_rec_comms)
        if current_app.config['COMMUNITIES_OAI_ENABLED']:
            record = self._sync_oaisets_with_communities(record)
        if not record['communities']:
            del record['communities']

        self['communities'] = sorted(new_dep_comms)
        if not self['communities']:
            del self['communities']

        # Create Inclusion requests against this record
        self._create_inclusion_requests(new_ir_comms, record)

        # Remove obsolete InclusionRequests again the record and its versions
        self._remove_obsolete_irs(new_ir_comms, record)

        return record
Exemple #14
0
 def validate_doi(self, value):
     """Validate if doi exists."""
     if value and has_request_context():
         required_doi = self.context.get('required_doi')
         if value == required_doi:
             return
         try:
             PersistentIdentifier.get('doi', value)
             raise ValidationError(
                 _('DOI already exists in Zenodo.'),
                 field_names=['doi'])
         except PIDDoesNotExistError:
             pass
Exemple #15
0
def _delete_merged_records(pid_type, merged_pid_value, deleted_pid_value, merged_uuid, deleted_uuid):
    InspireRecord.get_record(merged_uuid)._delete(force=True)
    InspireRecord.get_record(deleted_uuid)._delete(force=True)

    merged_pid = PersistentIdentifier.get(pid_type, merged_pid_value)
    deleted_pid = PersistentIdentifier.get(pid_type, deleted_pid_value)

    Redirect.query.filter(Redirect.id == deleted_pid.object_uuid).delete()

    db.session.delete(merged_pid)
    db.session.delete(deleted_pid)

    db.session.commit()
Exemple #16
0
    def proc(ai):
        try:
            PersistentIdentifier.get('recid', ai.control_number)
        except PIDDoesNotExistError:
            api_response = requests_retry_session().get(crossref_url % ai.doi)
            if api_response.status_code != 200:
                error('Failed to query crossref for doi: %s. Error code: %s' % (ai.doi, api_response.status_code))
                result['not200'].append(ai.control_number)
                return None

            title = api_response.json()['message']['title'][0].lower()

            if 'addendum' in title or 'corrigendum' in title or 'erratum' in title:
                result['hit'].append((ai.control_number, title))
Exemple #17
0
def test_pid_unassign(app):
    """Test pid object unassignment."""
    runner = CliRunner()
    script_info = ScriptInfo(create_app=lambda info: app)

    with runner.isolated_filesystem():
        rec_uuid = uuid.uuid4()
        # Assigned object
        result = runner.invoke(cmd, [
            'create', 'recid', '101',
            '-t', 'rec', '-i', str(rec_uuid)
        ], obj=script_info)
        assert 0 == result.exit_code

        result = runner.invoke(cmd, [
            'get', 'recid', '101',
        ], obj=script_info)
        assert 0 == result.exit_code
        assert 'rec {0} N\n'.format(str(rec_uuid)) == result.output

        result = runner.invoke(cmd, [
            'dereference', 'rec', str(rec_uuid),
        ], obj=script_info)
        assert 0 == result.exit_code
        assert 'recid 101 None\n' == result.output

        result = runner.invoke(cmd, [
            'dereference', 'rec', str(rec_uuid), '-s', 'NEW',
        ], obj=script_info)
        assert 0 == result.exit_code
        assert 'recid 101 None\n' == result.output

        with app.app_context():
            pid = PersistentIdentifier.get('recid', '101')
            assert pid.has_object()
            assert pid.get_assigned_object() == rec_uuid
            assert pid.get_assigned_object('rec') == rec_uuid

        # Unassign the object
        result = runner.invoke(cmd, [
            'unassign', 'recid', '101',
        ], obj=script_info)
        assert 0 == result.exit_code

        with app.app_context():
            pid = PersistentIdentifier.get('recid', '101')
            assert not pid.has_object()
            assert pid.get_assigned_object() is None
            assert pid.get_assigned_object('rec') is None
Exemple #18
0
def sync_pidstore():
    """Populate PID store with all DOIs registered in DataCite."""
    cli = DataCiteMDSClientWrapper()
    dois = cli.doi_get_all().split('\n')

    for doi in dois:
        try:
            PersistentIdentifier.get('doi', doi)
        except PIDDoesNotExistError:
            DataCiteProviderWrapper.create(pid_value=doi)
            click.echo('Record with doi {} added to PID store'.
                       format(doi))
    db.session.commit()

    click.echo('PID Store updated')
Exemple #19
0
def check_pids_migration():
    """Check that the persistent identifiers have been migrated."""
    expected_pids = _load_json('expected_pids.json')
    # Check unchanging properties
    for exp_pid in expected_pids:
        db_pid = PersistentIdentifier.get(exp_pid['pid_type'],
                                          exp_pid['pid_value'])
        for key, value in exp_pid.items():
            if key != 'updated':
                assert str(getattr(db_pid, key)) == str(value)

        # check that deleted PID's records are (soft or hard) deleted
        if exp_pid['status'] == PIDStatus.DELETED.value:
            metadata = None
            try:
                record = Record.get_record(exp_pid['pid_value'],
                                           with_deleted=True)
                # Soft deleted record
                metadata = record.model.json
            except NoResultFound:
                # Hard deleted record
                pass
            assert metadata is None

        # Check versioning relations and PIDs
        if exp_pid['pid_type'] == 'b2dep':
            try:
                rec_pid = PersistentIdentifier.get('b2rec',
                                                    exp_pid['pid_value'])
                # if the deposit is deleted, either the record PID was reserved
                # and has been deleted, or it still exists.
                if db_pid.status == PIDStatus.DELETED:
                    assert rec_pid.status != PIDStatus.RESERVED
            except PIDDoesNotExistError:
                # The record PID was only reserved and has been deleted
                # with the deposit PID.
                assert db_pid.status == PIDStatus.DELETED
                continue

            # Check that a parent pid has been created
            versioning = PIDVersioning(child=rec_pid)
            parent = versioning.parent
            assert rec_pid.status in [PIDStatus.RESERVED, PIDStatus.REGISTERED]
            if rec_pid.status == PIDStatus.RESERVED:
                assert parent.status == PIDStatus.RESERVED
            else:
                assert parent.status == PIDStatus.REDIRECTED
                assert parent.get_redirect() == rec_pid
def ill_register(record_id=None):
    """Interface to register an inter library loan for the administrator.

    Without a record_id, an empty form will be presented.
    """
    if record_id:
        _uuid = PersistentIdentifier.get("recid", record_id).object_uuid
        rec = Record.get_record(_uuid)
    else:
        rec = {}

    _prepare_record(rec, rec_fields)
    _prepare_record_authors(rec)

    start_date = datetime.date.today().isoformat()
    end_date = datetime.date.today() + datetime.timedelta(weeks=4)

    return render_template(
        "circulation_ill_register.html",
        action="register",
        record_id=record_id,
        start_date=start_date,
        end_date=end_date,
        **rec
    )
Exemple #21
0
def datacite_delete(recid):
    """Delete DOI in DataCite.

    If it fails, it will retry every 10 minutes for 1 hour.
    """
    record = get_record(recid)

    if record is None:
        logger.debug("Record %s not found" % recid)
        return

    doi_val = record.get(cfg['PIDSTORE_DATACITE_RECORD_DOI_FIELD'], None)
    logger.debug("Found DOI %s in record %s" % (doi_val, recid))

    pid = PersistentIdentifier.get("doi", doi_val)
    if not pid:
        logger.debug("DOI not locally managed.")
        return
    else:
        logger.debug("DOI locally managed.")

    if not pid.has_object("rec", recid):
        raise Exception(
            "DOI %s is not assigned to record %s." % (doi_val, recid))

    if pid.is_registered():
        logger.info("Inactivating DOI %s for record %s" % (doi_val, recid))

        if not pid.delete():
            m = "Failed to inactive DOI %s" % doi_val
            logger.error(m)
            if not datacite_delete.request.is_eager:
                raise datacite_delete.retry(exc=Exception(m))
        else:
            logger.info("Successfully inactivated DOI %s." % doi_val)
Exemple #22
0
 def delete(self, *args, **kwargs):
     """Delete the deposit."""
     recid = PersistentIdentifier.get(pid_type='recid',
                                      pid_value=self['recid'])
     if recid.status == PIDStatus.RESERVED:
         db.session.delete(recid)
     return super(ZenodoDeposit, self).delete(*args, **kwargs)
def ill_request(record_id=None):
    """Interface to request an inter library loan for the user.

    Without a record_id, an empty form will be presented.
    """
    try:
        get_user(current_user)
    except AttributeError:
        # Anonymous User
        return render_template("invenio_theme/401.html")

    if record_id:
        _uuid = PersistentIdentifier.get("recid", record_id).object_uuid
        rec = Record.get_record(_uuid)
    else:
        rec = {}

    _prepare_record(rec, rec_fields)
    _prepare_record_authors(rec)

    start_date = datetime.date.today().isoformat()
    end_date = datetime.date.today() + datetime.timedelta(weeks=4)

    return render_template(
        "circulation_ill_request.html",
        action="request",
        record_id=record_id,
        start_date=start_date,
        end_date=end_date,
        **rec
    )
Exemple #24
0
def indexer_receiver(sender, json=None, record=None, index=None,
                     **dummy_kwargs):
    """Connect to before_record_index signal to transform record for ES.

    In order to avoid that a record and published deposit differs (e.g. if an
    embargo task updates the record), every time we index a record we also
    index the deposit and overwrite the content with that of the record.

    :param sender: Sender of the signal.
    :param json: JSON to be passed for the elastic search.
    :type json: `invenio_records.api.Deposit`
    :param record: Indexed deposit record.
    :type record: `invenio_records.api.Deposit`
    :param index: Elasticsearch index name.
    :type index: str
    """
    if not index.startswith('deposits-records-'):
        return

    if not isinstance(record, ZenodoDeposit):
        record = ZenodoDeposit(record, model=record.model)

    if record['_deposit']['status'] == 'published':
        schema = json['$schema']

        pub_record = record.fetch_published()[1]

        # Temporarily set to draft mode to ensure that `clear` can be called
        json['_deposit']['status'] = 'draft'
        json.clear()
        json.update(copy.deepcopy(pub_record.replace_refs()))

        # Set back to published mode and restore schema.
        json['_deposit']['status'] = 'published'
        json['$schema'] = schema
        json['_updated'] = pub_record.updated
    else:
        json['_updated'] = record.updated
    json['_created'] = record.created

    # Compute filecount and total file size
    files = json.get('_files', [])
    json['filecount'] = len(files)
    json['size'] = sum([f.get('size', 0) for f in files])

    recid = record.get('recid')
    if recid:
        pid = PersistentIdentifier.get('recid', recid)
        pv = PIDVersioning(child=pid)
        relations = serialize_relations(pid)
        if pv.exists:
            if pv.draft_child_deposit:
                is_last = (pv.draft_child_deposit.pid_value
                           == record['_deposit']['id'])
                relations['version'][0]['is_last'] = is_last
                relations['version'][0]['count'] += 1
        else:
            relations = {'version': [{'is_last': True, 'index': 0}, ]}
        if relations:
            json['relations'] = relations
Exemple #25
0
    def _publish_new(self, id_=None):
        """Publish new deposit with communities handling."""
        dep_comms = set(self.pop('communities', []))
        record = super(ZenodoDeposit, self)._publish_new(id_=id_)
        conceptrecid = PersistentIdentifier.get('recid',
                                                record['conceptrecid'])
        pv = PIDVersioning(parent=conceptrecid)
        if pv.children.count() > 1:
            files_set = set(f.get_version().file.checksum for f in self.files)
            for prev_recid in pv.children.all()[:-1]:
                rec = ZenodoRecord.get_record(prev_recid.object_uuid)
                prev_files_set = set(f.get_version().file.checksum for f in
                                     rec.files)
                if files_set == prev_files_set:
                    raise VersioningFilesError()

            prev_recid = pv.children.all()[-2]
            rec_comms = set(ZenodoRecord.get_record(
                prev_recid.get_assigned_object()).get('communities', []))
        else:
            rec_comms = set()

        record = self._sync_communities(dep_comms, rec_comms, record)
        record.commit()

        # Update the concept recid redirection
        pv.update_redirect()
        RecordDraft.unlink(record.pid, self.pid)
        index_siblings(record.pid, neighbors_eager=True, with_deposits=True)

        return record
Exemple #26
0
def test_double_minting_depid_recid(db):
    """Test using same integer for dep/rec ids."""
    dep_uuid = uuid4()
    data = dict()
    pid = zenodo_deposit_minter(dep_uuid, data)
    # Assert values added to data. Depid and recid have IDs starting from
    # '2' since the conceptrecid is minted first
    assert data['_deposit']['id'] == '2'
    assert data['conceptrecid'] == '1'
    assert data['recid'] == 2
    assert 'doi' not in data
    # Assert pid values
    assert pid.pid_type == 'depid'
    assert pid.pid_value == '2'
    assert pid.status == PIDStatus.REGISTERED
    assert pid.object_uuid == dep_uuid
    # Assert reservation of recid.
    assert PersistentIdentifier.get('recid', pid.pid_value).status \
        == PIDStatus.RESERVED
    db.session.commit()

    # Assert registration of recid.
    rec_uuid = uuid4()
    pid = zenodo_record_minter(rec_uuid, data)
    assert pid.pid_type == 'recid'
    assert pid.pid_value == '2'
    assert pid.status == PIDStatus.REGISTERED
    assert pid.object_uuid == rec_uuid
    assert data['doi'] == '10.5072/zenodo.2'
    assert data['_oai']['id'] == 'oai:zenodo.org:2'
Exemple #27
0
def add_oai_information(obj, eng):
    """Adds OAI information like identifier"""

    recid = obj.data['control_number']
    pid = PersistentIdentifier.get('recid', recid)
    existing_record = Record.get_record(pid.object_uuid)

    if '_oai' not in existing_record:
        try:
            oaiid_minter(pid.object_uuid, existing_record)
        except PIDAlreadyExists:
            existing_record['_oai'] = {
                'id': 'oai:beta.scoap3.org:%s' % recid,
                'sets': _get_oai_sets(existing_record)
            }

    if 'id' not in existing_record['_oai']:
        current_app.logger.info('adding new oai id')
        oaiid_minter(pid.object_uuid, existing_record)

    if 'sets' not in existing_record['_oai'] or not existing_record['_oai']['sets']:
        existing_record['_oai']['sets'] = _get_oai_sets(existing_record)

    existing_record['_oai']['updated'] = datetime.utcnow().strftime('%Y-%m-%dT%H:%M:%SZ')

    existing_record.commit()
    obj.save()
    db.session.commit()
def test_create_distance_signature_method(small_app):
    """Test the method responsible for creating data in Beard format."""
    pid = PersistentIdentifier.get("literature", 4328)
    publication_id = str(pid.object_uuid)

    signatures_map = {
        'aabe5373-39bf-4d27-bb91-2aec593940a9': {
            'author_affiliation': 'Copenhagen U.',
            'author_claimed': False,
            'author_name': 'Glashow, S.L.',
            'author_recid': False,
            'publication_id': publication_id,
            'signature_id': 'aabe5373-39bf-4d27-bb91-2aec593940a9'
        }
    }

    uuid = 'aabe5373-39bf-4d27-bb91-2aec593940a9'

    distance_signature = {
        'author_affiliation': 'Copenhagen U.',
        'author_claimed': False,
        'author_name': 'Glashow, S.L.',
        'author_recid': False,
        'publication_id': publication_id,
        'signature_id': 'aabe5373-39bf-4d27-bb91-2aec593940a9',
        'publication': {
            'publication_id': publication_id,
            'year': '1961',
            'authors': ['Glashow, S.L.']
        }
    }

    assert _create_distance_signature(signatures_map, uuid) == \
        distance_signature
Exemple #29
0
def test_delete_draft(api, api_client, db, es, location, json_auth_headers,
                      auth_headers, deposit_url, get_json, license_record):
    """Test deleting of Deposit draft using REST API."""
    # Setting var this way doesn't work
    headers = json_auth_headers
    client = api_client
    links, data = create_deposit(
        client, headers, auth_headers, deposit_url, get_json, {})

    # Two 'recid' PIDs - Concept PID and Version PID
    assert PersistentIdentifier.query.filter_by(pid_type='recid').count() == 2
    recid = PersistentIdentifier.get('recid', str(data['record_id']))
    depid = PersistentIdentifier.query.filter_by(pid_type='depid').one()
    assert recid.status == PIDStatus.RESERVED
    assert depid.status == PIDStatus.REGISTERED

    # Get deposition
    current_search.flush_and_refresh(index='deposits')
    response = client.get(links['self'], headers=auth_headers)
    assert response.status_code == 200

    # Delete deposition
    current_search.flush_and_refresh(index='deposits')
    response = client.delete(links['self'], headers=auth_headers)
    assert response.status_code == 204
    # 'recid' PID shuld be removed, while 'depid' should have status deleted.
    # No 'doi' PIDs should be created without publishing
    assert PersistentIdentifier.query.filter_by(pid_type='recid').count() == 0
    depid = PersistentIdentifier.query.filter_by(pid_type='depid').one()
    assert PersistentIdentifier.query.filter_by(pid_type='doi').count() == 0
    assert depid.status == PIDStatus.DELETED
Exemple #30
0
def attach_files(obj, eng):
    if 'files' in obj.extra_data:
        recid = obj.data['control_number']
        pid = PersistentIdentifier.get('recid', recid)
        existing_record = Record.get_record(pid.object_uuid)

        if '_files' not in existing_record or not existing_record['_files']:
            bucket = Bucket.create()
            RecordsBuckets.create(record=existing_record.model, bucket=bucket)

        for file_ in obj.extra_data['files']:
            if file_['url'].startswith('http'):
                data = requests_retry_session().get(file_['url'], headers=file_.get('headers', {}))
                f = StringIO(data.content)
            else:
                f = open(file_['url'])

            existing_record.files[file_['name']] = f
            existing_record.files[file_['name']]['filetype'] = file_['filetype']

        obj.save()
        existing_record.commit()
        db.session.commit()
    else:
        __halt_and_notify('No files found.', eng)
Exemple #31
0
def delete_record(record_uuid, reason, user):
    """Delete the record and it's PIDs.

    :param record_uuid: UUID of the record to be removed.
    :param reason: Reason for removal. Either one of: 'spam', 'uploader',
        'takedown' (see 'ZENODO_REMOVAL_REASONS' variable in config),
        otherwise using it as a verbatim "Reason" string.
    :param user: ID or email of the Zenodo user (admin)
        responsible for the removal.
    """
    from invenio_github.models import ReleaseStatus
    if isinstance(user, text_type):
        user_id = User.query.filter_by(email=user).one().id
    elif isinstance(user, int):
        user_id = User.query.get(user).id
    else:
        raise TypeError("User cannot be determined from argument: {0}".format(
            user))

    record = ZenodoRecord.get_record(record_uuid)

    # Remove the record from versioning and delete the recid
    recid = PersistentIdentifier.get('recid', record['recid'])
    pv = PIDVersioning(child=recid)
    pv.remove_child(recid)
    pv.update_redirect()
    recid.delete()

    # Remove the record from index
    try:
        RecordIndexer().delete(record)
    except NotFoundError:
        pass

    # Remove buckets
    record_bucket = record.files.bucket
    RecordsBuckets.query.filter_by(record_id=record.id).delete()
    record_bucket.locked = False
    record_bucket.remove()

    removal_reasons = dict(current_app.config['ZENODO_REMOVAL_REASONS'])
    if reason in removal_reasons:
        reason = removal_reasons[reason]

    depid, deposit = deposit_resolver.resolve(record['_deposit']['id'])

    try:
        doi = PersistentIdentifier.get('doi', record['doi'])
    except PIDDoesNotExistError:
        doi = None

    # Record OpenAIRE info
    try:
        original_id = openaire_original_id(record, openaire_type(record))[1]
        datasource_id = openaire_datasource_id(record)
    except PIDDoesNotExistError:
        original_id = None
        datasource_id = None

    if pv.children.count() == 0:
        conceptrecid = PersistentIdentifier.get('recid',
                                                record['conceptrecid'])
        conceptrecid.delete()
        new_last_child = None
    else:
        new_last_child = (pv.last_child.pid_value,
                          str(pv.last_child.object_uuid))

    if 'conceptdoi' in record:
        conceptdoi_value = record['conceptdoi']
    else:
        conceptdoi_value = None

    # Completely delete the deposit
    # Deposit will be removed from index
    deposit.delete(delete_published=True)

    # Clear the record and put the deletion information
    record.clear()
    record.update({
        'removal_reason': reason,
        'removed_by': user_id,
    })
    record.commit()

    # Mark the relevant GitHub Release as deleted
    for ghr in record.model.github_releases:
        ghr.status = ReleaseStatus.DELETED

    if not is_local_doi(doi.pid_value):
        db.session.delete(doi)
    db.session.commit()

    # After successful DB commit, sync the DOIs with DataCite
    if is_local_doi(doi.pid_value):
        datacite_inactivate.delay(doi.pid_value)
    if conceptdoi_value:
        if new_last_child:
            # Update last child (updates also conceptdoi)
            pid_value, rec_uuid = new_last_child
            datacite_register.delay(pid_value, rec_uuid)
        else:
            datacite_inactivate.delay(conceptdoi_value)

    # Also delete from OpenAIRE index
    if current_app.config['OPENAIRE_DIRECT_INDEXING_ENABLED'] and original_id \
            and datasource_id:
        openaire_delete.delay(original_id=original_id,
                              datasource_id=datasource_id)
Exemple #32
0
 def pid(self):
     """Return an instance of record PID."""
     pid = b2share_record_uuid_fetcher(self.id, self)
     return PersistentIdentifier.get(pid.pid_type, pid.pid_value)
Exemple #33
0
def make_combined_pdf(pid, obj_file_uri, fileobj, obj, lang_user):
    """Make the cover-page-combined PDF file.

    :param pid: PID object
    :param file_uri: URI of the file object
    :param lang_user: LANGUAGE of access user
    :return: cover-page-combined PDF file object
    """
    lang_filepath = current_app.config['PDF_COVERPAGE_LANG_FILEPATH']\
        + lang_user + current_app.config['PDF_COVERPAGE_LANG_FILENAME']

    pidObject = PersistentIdentifier.get('recid', pid.pid_value)
    item_metadata_json = ItemsMetadata.get_record(pidObject.object_uuid)
    item_type = ItemsMetadata.get_by_object_id(pidObject.object_uuid)
    item_type_id = item_type.item_type_id
    type_mapping = Mapping.get_record(item_type_id)
    item_map = get_mapping(type_mapping, "jpcoar_mapping")

    with open(lang_filepath) as json_datafile:
        lang_data = json.loads(json_datafile.read())

    # Initialize Instance
    pdf = FPDF('P', 'mm', 'A4')
    pdf.add_page()
    pdf.set_margins(20.0, 20.0)
    pdf.set_fill_color(100, 149, 237)

    pdf.add_font(
        'IPAexg',
        '',
        current_app.config["JPAEXG_TTF_FILEPATH"],
        uni=True)
    pdf.add_font(
        'IPAexm',
        '',
        current_app.config["JPAEXM_TTF_FILEPATH"],
        uni=True)

    # Parameters such as width and height of rows/columns
    w1 = 40  # width of the left column
    w2 = 130  # width of the right column
    footer_w = 90  # width of the footer cell
    # url_oapolicy_h = 7  # height of the URL & OA-policy
    # height of the URL & OA-policy
    url_oapolicy_h = current_app.config['URL_OA_POLICY_HEIGHT']
    # title_h = 8  # height of the title
    title_h = current_app.config['TITLE_HEIGHT']  # height of the title
    # header_h = 20  # height of the header cell
    header_h = current_app.config['HEADER_HEIGHT']  # height of the header cell
    # footer_h = 4  # height of the footer cell
    footer_h = current_app.config['FOOTER_HEIGHT']  # height of the footer cell
    # meta_h = 9  # height of the metadata cell
    # height of the metadata cell
    meta_h = current_app.config['METADATA_HEIGHT']
    max_letters_num = 51    # number of maximum letters that can be contained \
    # in the right column
    cc_logo_xposition = 160  # x-position of Creative Commons logos

    # Get the header settings
    record = PDFCoverPageSettings.find(1)
    header_display_type = record.header_display_type
    header_output_string = record.header_output_string
    header_output_image = record.header_output_image
    header_display_position = record.header_display_position

    # Set the header position
    positions = {}
    if header_display_position == 'left':
        positions['str_position'] = 'L'
        positions['img_position'] = 20
    elif header_display_position == 'center' or header_display_position is None:
        positions['str_position'] = 'C'
        positions['img_position'] = 85
    elif header_display_position == 'right':
        positions['str_position'] = 'R'
        positions['img_position'] = 150

    # Show header(string or image)
    if header_display_type == 'string':
        pdf.set_font('IPAexm', '', 22)
        pdf.multi_cell(
            w1 + w2,
            header_h,
            header_output_string,
            0,
            positions['str_position'],
            False)
    else:
        pdf.image(
            header_output_image,
            x=positions['img_position'],
            y=None,
            w=0,
            h=30,
            type='')
        pdf.set_y(55)

    # Title settings
    title = item_metadata_json['title']
    pdf.set_font('IPAexm', '', 20)
    pdf.multi_cell(w1 + w2, title_h, title, 0, 'L', False)
    pdf.ln(h='15')

    # Metadata
    fg = WekoFeedGenerator()
    fe = fg.add_entry()

    _file = 'file.URI.@value'
    _file_item_id = None
    if _file in item_map:
        _file_item_id = item_map[_file].split('.')[0]
        _file_item_id = _file_item_id.replace('fileinfo', 'files')

    _creator = 'creator.creatorName.@value'
    _creator_item_id = None
    if _creator in item_map:
        _creator_item_id = item_map[_creator].split('.')[0]

    publisher_attr_lang = '[email protected]:lang'
    publisher_value = 'publisher.@value'
    publisher_item_id = None
    publisher_lang_id = None
    publisher_text_id = None

    keyword_attr_lang = '[email protected]:lang'
    keyword_attr_value = 'subject.@value'
    keyword_base = None
    keyword_lang = None

    pdf.set_font('Arial', '', 14)
    pdf.set_font('IPAexg', '', 14)

    if item_metadata_json['lang'] == 'en':
        item_metadata_json['lang'] = 'English'
    elif item_metadata_json['lang'] == 'ja':
        item_metadata_json['lang'] = 'Japanese'
    try:
        lang_field = item_map['language.@value'].split('.')
        if item_metadata_json[lang_field[0]][lang_field[1]] == 'eng':
            item_metadata_json['lang'] = 'English'
        elif item_metadata_json[lang_field[0]][lang_field[1]] == 'jpn':
            item_metadata_json['lang'] = 'Japanese'
    except BaseException:
        pass
    try:
        lang = item_metadata_json.get('lang')
    except (KeyError, IndexError):
        lang = None
    try:
        publisher_item_id = item_map[publisher_attr_lang].split('.')[0]
        publisher_lang_ids = item_map[publisher_attr_lang].split('.')[1:]
        publisher_text_ids = item_map[publisher_value].split('.')[1:]
        publisher = None
        default_publisher = None
        publishers = item_metadata_json[publisher_item_id]
        pair_name_language_publisher = get_pair_value(publisher_text_ids,
                                                      publisher_lang_ids,
                                                      publishers)
        for publisher_name, publisher_lang in pair_name_language_publisher:
            if publisher_lang == lang_user:
                publisher = publisher_name
            if publisher_lang == 'en':
                default_publisher = publisher_name

        if publisher is None:
            publisher = default_publisher
    except (KeyError, IndexError):
        publisher = None
    try:
        pubdate = item_metadata_json.get('pubdate')
    except (KeyError, IndexError):
        pubdate = None
    try:
        keyword_item_id = item_map[keyword_attr_lang].split('.')[0]
        keyword_item_langs = item_map[keyword_attr_lang].split('.')[1:]
        keyword_item_values = item_map[keyword_attr_value].split('.')[1:]
        keyword_base = item_metadata_json.get(keyword_item_id)
        keywords_ja = None
        keywords_en = None

        pair_name_language_keyword = get_pair_value(keyword_item_values,
                                                    keyword_item_langs,
                                                    keyword_base)

        for name, lang in pair_name_language_keyword:
            keyword_lang = lang
            if keyword_lang == 'ja':
                keywords_ja = name
            elif keyword_lang == 'en':
                keywords_en = name

    except (KeyError, IndexError):
        pass
    creator_items = item_metadata_json.get(_creator_item_id)
    if type(creator_items) is dict:
        creator_items = [creator_items]
    creator_mail_list = []
    creator_name_list = []
    creator_affiliation_list = []
    for creator_item in creator_items:
        # Get creator mail
        if creator_item.get('creatorMails'):
            for creator_mail in creator_item.get('creatorMails'):
                if creator_mail.get('creatorMail'):
                    creator_mail_list.append(creator_mail.get('creatorMail'))
        # Get creator name
        default_creator_name_list = []
        if creator_item.get('creatorNames'):
            for creator_name in creator_item.get('creatorNames'):
                if creator_name.get('creatorNameLang') == lang_user:
                    creator_name_list.append(creator_name.get('creatorName'))
                if creator_name.get('creatorNameLang') == 'en':
                    default_creator_name_list.append(creator_name.get(
                        'creatorName'))
        if not creator_name_list and default_creator_name_list:
            creator_name_list = default_creator_name_list
        # Get creator affiliation
        default_creator_affiliation_list = []
        if creator_item.get('affiliation'):
            for creator_affiliation in creator_item.get('affiliation'):
                if creator_affiliation.get('affiliationNameLang') == lang_user:
                    creator_affiliation_list.append(creator_affiliation.get(
                        'affiliationName'))
                if creator_affiliation.get('affiliationNameLang') == 'en':
                    default_creator_affiliation_list.\
                        append(creator_affiliation.get('affiliationName'))
        if not creator_affiliation_list and default_creator_affiliation_list:
            creator_affiliation_list = default_creator_affiliation_list

    seperator = ', '
    metadata_dict = {
        "lang": lang,
        "publisher": publisher,
        "pubdate": pubdate,
        "keywords_ja": keywords_ja,
        "keywords_en": keywords_en,
        "creator_mail": seperator.join(creator_mail_list),
        "creator_name": seperator.join(creator_name_list),
        "affiliation": seperator.join(creator_affiliation_list)
    }

    # Change the values from None to '' for printing
    for key in metadata_dict:
        if metadata_dict[key] is None:
            metadata_dict[key] = ''
    metadata_list = [
        "{}: {}".format(lang_data["Metadata"]["LANG"], metadata_dict["lang"]),
        "{}: {}".format(
            lang_data["Metadata"]["PUBLISHER"],
            metadata_dict["publisher"]),
        "{}: {}".format(
            lang_data["Metadata"]["PUBLICDATE"],
            metadata_dict["pubdate"]),
        "{} (Ja): {}".format(
            lang_data["Metadata"]["KEY"],
            metadata_dict["keywords_ja"]),
        "{} (En): {}".format(
            lang_data["Metadata"]["KEY"],
            metadata_dict["keywords_en"]),
        "{}: {}".format(
            lang_data["Metadata"]["AUTHOR"],
            metadata_dict["creator_name"]),
        "{}: {}".format(
            lang_data["Metadata"]["EMAIL"],
            metadata_dict["creator_mail"]),
        "{}: {}".format(
            lang_data["Metadata"]["AFFILIATED"],
            metadata_dict["affiliation"])
    ]

    metadata = '\n'.join(metadata_list)
    metadata_lfnum = int(metadata.count('\n'))
    for item in metadata_list:
        metadata_lfnum += int(get_east_asian_width_count(item)
                              ) // max_letters_num

    url = ''  # will be modified later
    url_lfnum = int(get_east_asian_width_count(url)) // max_letters_num

    oa_policy = ''  # will be modified later
    oa_policy_lfnum = int(
        get_east_asian_width_count(oa_policy)) // max_letters_num

    # Save top coordinate
    top = pdf.y
    # Calculate x position of next cell
    offset = pdf.x + w1
    pdf.multi_cell(w1,
                   meta_h,
                   lang_data["Title"]["METADATA"]
                   + '\n' * (metadata_lfnum + 1),
                   1,
                   'C',
                   True)
    # Reset y coordinate
    pdf.y = top
    # Move to computed offset
    pdf.x = offset
    pdf.multi_cell(w2, meta_h, metadata, 1, 'L', False)
    top = pdf.y
    pdf.multi_cell(w1,
                   url_oapolicy_h,
                   lang_data["Title"]["URL"] + '\n' * (url_lfnum + 1),
                   1,
                   'C',
                   True)
    pdf.y = top
    pdf.x = offset
    pdf.multi_cell(w2, url_oapolicy_h, url, 1, 'L', False)
    top = pdf.y
    pdf.multi_cell(w1,
                   url_oapolicy_h,
                   lang_data["Title"]["OAPOLICY"]
                   + '\n' * (oa_policy_lfnum + 1),
                   1,
                   'C',
                   True)
    pdf.y = top
    pdf.x = offset
    pdf.multi_cell(w2, url_oapolicy_h, oa_policy, 1, 'L', False)
    pdf.ln(h=1)

    # Footer
    pdf.set_font('Courier', '', 10)
    pdf.set_x(108)

    try:
        license = item_metadata_json[_file_item_id][0].get('licensetype')
    except (KeyError, IndexError, TypeError):
        license = None

    list_license_dict = current_app.config['WEKO_RECORDS_UI_LICENSE_DICT']
    for item in list_license_dict:
        if item['value'] == license:
            get_license_pdf(license, item_metadata_json, pdf, _file_item_id,
                            footer_w, footer_h, cc_logo_xposition, item)
            break
    else:
        pdf.multi_cell(footer_w, footer_h, '', 0, 'L', False)

    """ Convert PDF cover page data as bytecode """
    output = pdf.output(dest='S').encode('latin-1')
    b_output = io.BytesIO(output)

    # Combine cover page and existing pages
    cover_page = PdfFileReader(b_output)
    f = open(obj_file_uri, "rb")
    existing_pages = PdfFileReader(f)

    # In the case the PDF file is encrypted by the password, ''(i.e. not
    # encrypted intentionally)
    if existing_pages.isEncrypted:
        try:
            existing_pages.decrypt('')
        except BaseException:  # Errors such as NotImplementedError
            return ObjectResource.send_object(
                obj.bucket, obj,
                expected_chksum=fileobj.get('checksum'),
                logger_data={
                    'bucket_id': obj.bucket_id,
                    'pid_type': pid.pid_type,
                    'pid_value': pid.pid_value,
                },
                as_attachment=False
            )

    # In the case the PDF file is encrypted by the password except ''
    if existing_pages.isEncrypted:
        return ObjectResource.send_object(
            obj.bucket, obj,
            expected_chksum=fileobj.get('checksum'),
            logger_data={
                'bucket_id': obj.bucket_id,
                'pid_type': pid.pid_type,
                'pid_value': pid.pid_value,
            },
            as_attachment=False
        )

    combined_pages = PdfFileWriter()
    combined_pages.addPage(cover_page.getPage(0))
    for page_num in range(existing_pages.numPages):
        existing_page = existing_pages.getPage(page_num)
        combined_pages.addPage(existing_page)

    # Download the newly generated combined PDF file
    try:
        combined_filename = 'CV_' + datetime.now().strftime('%Y%m%d') + '_' + \
                            item_metadata_json[_file_item_id][0].get("filename")

    except (KeyError, IndexError):
        combined_filename = 'CV_' + title + '.pdf'
    combined_filepath = "/code/invenio/{}.pdf".format(combined_filename)
    combined_file = open(combined_filepath, "wb")
    combined_pages.write(combined_file)
    combined_file.close()

    return send_file(
        combined_filepath,
        as_attachment=True,
        attachment_filename=combined_filename,
        mimetype='application/pdf',
        cache_timeout=-1)
Exemple #34
0
    def delete(self, delete_published=False, *args, **kwargs):
        """Delete the deposit.

        :param delete_published: If True, even deposit of a published record
            will be deleted (usually used by admin operations).
        :type delete_published: bool
        """
        is_published = self['_deposit'].get('pid')
        if is_published and not delete_published:
            raise PIDInvalidAction()

        # Delete the recid
        recid = PersistentIdentifier.get(
            pid_type='recid', pid_value=self['recid'])

        versioning = PIDVersioning(child=recid)
        if versioning.exists:
            if versioning.draft_child and \
                    self.pid == versioning.draft_child_deposit:
                versioning.remove_draft_child()
            if versioning.last_child:
                index_siblings(versioning.last_child,
                               children=versioning.children.all(),
                               include_pid=True,
                               neighbors_eager=True,
                               with_deposits=True)

        if recid.status == PIDStatus.RESERVED:
            db.session.delete(recid)

        if 'conceptrecid' in self:
            concept_recid = PersistentIdentifier.get(
                pid_type='recid', pid_value=self['conceptrecid'])
            if concept_recid.status == PIDStatus.RESERVED:
                db.session.delete(concept_recid)
        # Completely remove bucket
        bucket = self.files.bucket
        extra_formats_bucket = None
        if 'extra_formats' in self['_buckets']:
            extra_formats_bucket = self.extra_formats.bucket
        with db.session.begin_nested():
            # Remove Record-Bucket link
            RecordsBuckets.query.filter_by(record_id=self.id).delete()
            mp_q = MultipartObject.query_by_bucket(bucket)
            # Remove multipart objects
            Part.query.filter(
                Part.upload_id.in_(mp_q.with_entities(
                    MultipartObject.upload_id).subquery())
            ).delete(synchronize_session='fetch')
            mp_q.delete(synchronize_session='fetch')
        if extra_formats_bucket:
            extra_formats_bucket.remove()
        bucket.locked = False
        bucket.remove()

        depid = kwargs.get('pid', self.pid)
        if depid:
            depid.delete()

        # NOTE: We call the parent of Deposit, invenio_records.api.Record since
        # we need to completely override eveything that the Deposit.delete
        # method does.
        return super(Deposit, self).delete(*args, **kwargs)
Exemple #35
0
def pid_from_value(pid_value, pid_type='recid'):
    """Determine if DOI is managed locally."""
    try:
        return PersistentIdentifier.get(pid_type=pid_type, pid_value=pid_value)
    except Exception:
        pass
def test_avc_workflow_receiver_local_file_pass(api_app, db, api_project,
                                               access_token, json_headers,
                                               mock_sorenson, online_video,
                                               webhooks, local_file):
    """Test AVCWorkflow receiver."""
    project, video_1, video_2 = api_project
    video_1_depid = video_1['_deposit']['id']
    video_1_id = str(video_1.id)
    project_id = str(project.id)

    bucket_id = ObjectVersion.query.filter_by(
        version_id=local_file).one().bucket_id
    video_size = 5510872
    master_key = 'test.mp4'
    slave_keys = [
        '{0}.mp4'.format(quality) for quality in get_presets_applied().keys()
        if quality != '1024p'
    ]
    with api_app.test_request_context():
        url = url_for('invenio_webhooks.event_list',
                      receiver_id='avc',
                      access_token=access_token)

    with api_app.test_client() as client, \
            mock.patch('invenio_sse.ext._SSEState.publish') as mock_sse, \
            mock.patch('invenio_indexer.api.RecordIndexer.bulk_index') \
            as mock_indexer:
        sse_channel = 'mychannel'
        payload = dict(
            uri=online_video,
            deposit_id=video_1_depid,
            key=master_key,
            sse_channel=sse_channel,
            sleep_time=0,
            version_id=str(local_file),
        )
        # [[ RUN WORKFLOW ]]
        resp = client.post(url, headers=json_headers, data=json.dumps(payload))

        assert resp.status_code == 201
        data = json.loads(resp.data.decode('utf-8'))

        assert '_tasks' in data
        assert data['key'] == master_key
        assert 'version_id' in data
        assert data.get('presets') == get_all_distinct_qualities()
        assert 'links' in data  # TODO decide with links are needed

        assert ObjectVersion.query.count() == get_object_count()

        # Master file
        master = ObjectVersion.get(bucket_id, master_key)
        tags = master.get_tags()
        assert tags['_event_id'] == data['tags']['_event_id']
        assert master.key == master_key
        assert str(master.version_id) == data['version_id']
        assert master.file
        assert master.file.size == video_size

        # Check metadata tags
        metadata_keys = [
            'duration', 'bit_rate', 'size', 'avg_frame_rate', 'codec_name',
            'codec_long_name', 'width', 'height', 'nb_frames',
            'display_aspect_ratio', 'color_range'
        ]
        assert all([key in tags for key in metadata_keys])
        assert ObjectVersion.query.count() == get_object_count()
        assert ObjectVersionTag.query.count() == get_tag_count(is_local=True)

        # Check metadata patch
        recid = PersistentIdentifier.get('depid', video_1_depid).object_uuid
        record = Record.get_record(recid)
        assert 'extracted_metadata' in record['_cds']
        assert all([
            key in str(record['_cds']['extracted_metadata'])
            for key in metadata_keys
        ])

        # Check slaves
        for slave_key in slave_keys:
            slave = ObjectVersion.get(bucket_id, slave_key)
            tags = slave.get_tags()
            assert slave.key == slave_key
            assert '_sorenson_job_id' in tags
            assert tags['_sorenson_job_id'] == '1234'
            assert 'master' in tags
            assert tags['master'] == str(master.version_id)
            assert master.file
            assert master.file.size == video_size

        video = deposit_video_resolver(video_1_depid)
        events = get_deposit_events(video['_deposit']['id'])

        # check deposit tasks status
        tasks_status = get_tasks_status_by_task(events)
        assert len(tasks_status) == 3
        assert 'file_transcode' in tasks_status
        assert 'file_video_extract_frames' in tasks_status
        assert 'file_video_metadata_extraction' in tasks_status

        # check single status
        collector = CollectInfoTasks()
        iterate_events_results(events=events, fun=collector)
        info = list(collector)
        assert len(info) == 11
        assert info[0][0] == 'file_video_metadata_extraction'
        assert info[0][1].status == states.SUCCESS
        assert info[1][0] == 'file_video_extract_frames'
        assert info[1][1].status == states.SUCCESS
        transocode_tasks = info[2:]
        statuses = [task[1].status for task in info[2:]]
        assert len(transocode_tasks) == len(statuses)
        assert [
            states.SUCCESS, states.REVOKED, states.REVOKED, states.REVOKED,
            states.SUCCESS, states.REVOKED, states.REVOKED, states.REVOKED,
            states.REVOKED
        ] == statuses

        # check tags (exclude 'uri-origin')
        assert ObjectVersionTag.query.count() == (get_tag_count() - 1)

        # check sse is called
        assert mock_sse.called

        messages = [
            (sse_channel, states.SUCCESS, 'file_video_metadata_extraction'),
            (sse_channel, states.STARTED, 'file_transcode'),
            (sse_channel, states.SUCCESS, 'file_transcode'),
            (sse_channel, states.REVOKED, 'file_transcode'),  # ResolutionError
            (sse_channel, states.STARTED, 'file_video_extract_frames'),
            (sse_channel, states.SUCCESS, 'file_video_extract_frames'),
            (sse_channel, states.SUCCESS, 'update_deposit'),
        ]

        call_args = []
        for (_, kwargs) in mock_sse.call_args_list:
            type_ = kwargs['type_']
            state = kwargs['data']['state']
            channel = kwargs['channel']
            tuple_ = (channel, state, type_)
            if tuple_ not in call_args:
                call_args.append(tuple_)

        assert len(call_args) == len(messages)
        for message in messages:
            assert message in call_args

        deposit = deposit_video_resolver(video_1_depid)

        def filter_events(call_args):
            _, x = call_args
            return x['type_'] == 'update_deposit'

        list_kwargs = list(filter(filter_events, mock_sse.call_args_list))
        assert len(list_kwargs) == 10
        _, kwargs = list_kwargs[8]
        assert kwargs['type_'] == 'update_deposit'
        assert kwargs['channel'] == 'mychannel'
        assert kwargs['data']['state'] == states.SUCCESS
        assert kwargs['data']['meta']['payload'] == {
            'deposit_id': deposit['_deposit']['id'],
            'event_id': data['tags']['_event_id'],
            'deposit': deposit,
        }

        # check ElasticSearch is called
        ids = set(get_indexed_records_from_mock(mock_indexer))
        assert video_1_id in ids
        assert project_id in ids
        assert deposit['_cds']['state'] == {
            'file_video_metadata_extraction': states.SUCCESS,
            'file_video_extract_frames': states.SUCCESS,
            'file_transcode': states.SUCCESS,
        }

    # Test cleaning!
    url = '{0}?access_token={1}'.format(data['links']['cancel'], access_token)

    with mock.patch('invenio_sse.ext._SSEState.publish') as mock_sse, \
            mock.patch('invenio_indexer.api.RecordIndexer.bulk_index') \
            as mock_indexer, \
            api_app.test_client() as client:
        # [[ DELETE WORKFLOW ]]
        resp = client.delete(url, headers=json_headers)

        assert resp.status_code == 201

        # check that object versions and tags are deleted
        # (Create + Delete) * Num Objs - 1 (because the file is local and will
        # be not touched)
        assert ObjectVersion.query.count() == 2 * get_object_count() - 1
        # Tags associated with the old version
        assert ObjectVersionTag.query.count() == get_tag_count(is_local=True)
        bucket = Bucket.query.first()
        # and bucket is empty
        assert bucket.size == 0

        record = RecordMetadata.query.filter_by(id=video_1_id).one()

        # check metadata patch are deleted
        assert 'extracted_metadata' not in record.json['_cds']

        # check the corresponding Event persisted after cleaning
        assert len(get_deposit_events(record.json['_deposit']['id'])) == 0
        assert len(
            get_deposit_events(record.json['_deposit']['id'],
                               _deleted=True)) == 1

        # check no SSE message and reindexing is fired
        assert mock_sse.called is False
        assert mock_indexer.called is False
Exemple #37
0
def docs(files, mode):
    """Load demo article records."""
    from slugify import slugify

    indexer = RecordIndexer()
    schema = current_app.extensions['invenio-jsonschemas'].path_to_url(
        'records/docs-v1.0.0.json'
    )
    data = pkg_resources.resource_filename('cernopendata',
                                           'modules/fixtures/data/docs')

    if files:
        articles_json = files
    else:
        articles_json = get_jsons_from_dir(data)

    for filename in articles_json:
        # name = filename.split('/')[-1]
        # if name.startswith('opera'):
        #     click.echo('Skipping opera records ...')
        #     continue

        click.echo('Loading docs from {0} ...'.format(filename))
        with open(filename, 'rb') as source:
            for data in json.load(source):

                # Replace body with responding content
                assert data["body"]["content"]
                content_filename = os.path.join(
                    *(
                        ["/", ] +
                        filename.split('/')[:-1] +
                        [data["body"]["content"], ]
                    )
                )

                with open(content_filename) as body_field:
                    data["body"]["content"] = body_field.read()
                if "collections" not in data and \
                    not isinstance(
                        data.get("collections", None), str):
                    data["collections"] = []
                if mode == 'insert-or-replace':
                    try:
                        pid = PersistentIdentifier.get(
                            'docid', str(slugify(
                                data.get('slug', data['title']))))
                        if pid:
                            record = update_doc(pid, data)
                            action = 'updated'
                    except PIDDoesNotExistError:
                        record = create_doc(data, schema)
                        action = 'inserted'
                elif mode == 'insert':
                    try:
                        pid = PersistentIdentifier.get(
                            'docid', str(slugify(
                                data.get('slug', data['title']))))
                        if pid:
                            click.echo(
                                'Record docid {} exists already;'
                                ' cannot insert it.  '.format(
                                    str(slugify(
                                        data.get('slug', data['title'])))),
                                err=True)
                            return
                    except PIDDoesNotExistError:
                        record = create_doc(data, schema)
                        action = 'inserted'
                else:
                    try:
                        pid = PersistentIdentifier.get(
                            'docid', str(slugify(
                                data.get('slug', data['title']))))
                    except PIDDoesNotExistError:
                        click.echo(
                            'Record docid {} does not exist; '
                            'cannot replace it.'.format(
                                str(slugify(
                                    data.get('slug', data['title'])))),
                            err=True)
                        return
                    record = update_doc(pid, data)
                    action = 'updated'
                record.commit()
                db.session.commit()
                click.echo(
                    ' Record docid {0} {1}.'.format(
                        str(slugify(data.get(
                            'slug', data['title']))), action))
                indexer.index(record)
                db.session.expunge_all()
Exemple #38
0
def records(skip_files, files, profile, mode):
    """Load all records."""
    if profile:
        import cProfile
        import pstats
        import StringIO
        pr = cProfile.Profile()
        pr.enable()

    indexer = RecordIndexer()
    schema = current_app.extensions['invenio-jsonschemas'].path_to_url(
        'records/record-v1.0.0.json'
    )
    data = pkg_resources.resource_filename('cernopendata',
                                           'modules/fixtures/data/records')
    action = None

    if files:
        record_json = files
    else:
        record_json = glob.glob(os.path.join(data, '*.json'))

    for filename in record_json:
        # name = filename.split('/')[-1]
        # if name.startswith('opera'):
        #     click.echo('Skipping opera records ...')
        #     continue
        click.echo('Loading records from {0} ...'.format(filename))
        with open(filename, 'rb') as source:
            for data in json.load(source):

                if not data:
                    click.echo('IGNORING a possibly broken or corrupted '
                               'record entry in file {0} ...'.format(filename))
                    continue

                files = data.get('files', [])

                if mode == 'insert-or-replace':
                    try:
                        pid = PersistentIdentifier.get('recid', data['recid'])
                        if pid:
                            record = update_record(
                                pid, schema, data, files, skip_files)
                            action = 'updated'
                    except PIDDoesNotExistError:
                        record = create_record(schema, data, files, skip_files)
                        action = 'inserted'
                elif mode == 'insert':
                    try:
                        pid = PersistentIdentifier.get('recid', data['recid'])
                        if pid:
                            click.echo(
                                'Record recid {} exists already;'
                                ' cannot insert it.  '.format(
                                    data.get('recid')), err=True)
                            return
                    except PIDDoesNotExistError:
                        record = create_record(schema, data, files, skip_files)
                        action = 'inserted'
                else:
                    try:
                        pid = PersistentIdentifier.get('recid', data['recid'])
                    except PIDDoesNotExistError:
                        click.echo(
                            'Record recid {} does not exist; '
                            'cannot replace it.'.format(
                                data.get('recid')), err=True)
                        return
                    record = update_record(
                        pid, schema, data, files, skip_files)
                    action = 'updated'

                if not skip_files:
                    record.files.flush()
                record.commit()
                db.session.commit()
                click.echo(
                    'Record recid {0} {1}.'.format(
                        data.get('recid'), action))
                indexer.index(record)
                db.session.expunge_all()

    if profile:
        pr.disable()
        s = StringIO.StringIO()
        sortby = 'cumulative'
        ps = pstats.Stats(pr, stream=s).sort_stats(sortby)
        ps.print_stats()
        print(s.getvalue())
Exemple #39
0
 def get_record_by_pid(cls, pid):
     """Get record by pid."""
     pid = PersistentIdentifier.get('depid', pid)
     return cls.get_record(id_=pid.object_uuid)
def test_avc_workflow_receiver_pass(api_app, db, api_project, access_token,
                                    json_headers, mock_sorenson, online_video,
                                    webhooks, users):
    """Test AVCWorkflow receiver."""
    project, video_1, video_2 = api_project
    video_1_depid = video_1['_deposit']['id']
    video_1_id = str(video_1.id)
    project_id = str(project.id)

    bucket_id = video_1['_buckets']['deposit']
    video_size = 5510872
    master_key = 'test.mp4'
    slave_keys = [
        '{0}.mp4'.format(quality) for quality in get_presets_applied()
        if quality != '1024p'
    ]
    with api_app.test_request_context():
        url = url_for('invenio_webhooks.event_list',
                      receiver_id='avc',
                      access_token=access_token)

    with api_app.test_client() as client, \
            mock.patch('invenio_sse.ext._SSEState.publish') as mock_sse, \
            mock.patch('invenio_indexer.api.RecordIndexer.bulk_index') \
            as mock_indexer:
        sse_channel = 'mychannel'
        payload = dict(
            uri=online_video,
            deposit_id=video_1_depid,
            key=master_key,
            sse_channel=sse_channel,
            sleep_time=0,
        )
        resp = client.post(url, headers=json_headers, data=json.dumps(payload))

        assert resp.status_code == 201
        data = json.loads(resp.data.decode('utf-8'))

        assert '_tasks' in data
        assert data['tags']['uri_origin'] == online_video
        assert data['key'] == master_key
        assert 'version_id' in data
        assert data.get('presets') == get_all_distinct_qualities()
        assert 'links' in data  # TODO decide with links are needed

        assert ObjectVersion.query.count() == get_object_count()

        # Master file
        master = ObjectVersion.get(bucket_id, master_key)
        tags = master.get_tags()
        assert tags['_event_id'] == data['tags']['_event_id']
        assert master.key == master_key
        assert str(master.version_id) == data['version_id']
        assert master.file
        assert master.file.size == video_size

        # Check metadata tags
        metadata_keys = [
            'duration', 'bit_rate', 'size', 'avg_frame_rate', 'codec_name',
            'codec_long_name', 'width', 'height', 'nb_frames',
            'display_aspect_ratio', 'color_range'
        ]
        assert all([key in tags for key in metadata_keys])

        # Check metadata patch
        recid = PersistentIdentifier.get('depid', video_1_depid).object_uuid
        record = Record.get_record(recid)
        assert 'extracted_metadata' in record['_cds']
        assert all([
            key in str(record['_cds']['extracted_metadata'])
            for key in metadata_keys
        ])

        # Check slaves
        for slave_key in slave_keys:
            slave = ObjectVersion.get(bucket_id, slave_key)
            tags = slave.get_tags()
            assert slave.key == slave_key
            assert '_sorenson_job_id' in tags
            assert tags['_sorenson_job_id'] == '1234'
            assert 'master' in tags
            assert tags['master'] == str(master.version_id)
            assert master.file
            assert master.file.size == video_size

        video = deposit_video_resolver(video_1_depid)
        events = get_deposit_events(video['_deposit']['id'])

        # check deposit tasks status
        tasks_status = get_tasks_status_by_task(events)
        assert len(tasks_status) == 4
        assert 'file_download' in tasks_status
        assert 'file_transcode' in tasks_status
        assert 'file_video_extract_frames' in tasks_status
        assert 'file_video_metadata_extraction' in tasks_status

        # check single status
        collector = CollectInfoTasks()
        iterate_events_results(events=events, fun=collector)
        info = list(collector)
        presets = get_presets_applied().keys()
        assert info[0][0] == 'file_download'
        assert info[0][1].status == states.SUCCESS
        assert info[1][0] == 'file_video_metadata_extraction'
        assert info[1][1].status == states.SUCCESS
        assert info[2][0] == 'file_video_extract_frames'
        assert info[2][1].status == states.SUCCESS
        for i in info[3:]:
            assert i[0] == 'file_transcode'
            if i[1].status == states.SUCCESS:
                assert i[1].result['payload']['preset_quality'] in presets

        # check tags
        assert ObjectVersionTag.query.count() == get_tag_count()

        # check sse is called
        assert mock_sse.called

        messages = [
            (sse_channel, states.STARTED, 'file_download'),
            (sse_channel, states.SUCCESS, 'file_download'),
            (sse_channel, states.SUCCESS, 'file_video_metadata_extraction'),
            (sse_channel, states.STARTED, 'file_transcode'),
            (sse_channel, states.SUCCESS, 'file_transcode'),
            (sse_channel, states.REVOKED, 'file_transcode'),  # ResolutionError
            (sse_channel, states.STARTED, 'file_video_extract_frames'),
            (sse_channel, states.SUCCESS, 'file_video_extract_frames'),
            (sse_channel, states.SUCCESS, 'update_deposit'),
        ]

        call_args = []
        for (_, kwargs) in mock_sse.call_args_list:
            type_ = kwargs['type_']
            state = kwargs['data']['state']
            channel = kwargs['channel']
            tuple_ = (channel, state, type_)
            if tuple_ not in call_args:
                call_args.append(tuple_)

        assert len(call_args) == len(messages)
        for message in messages:
            assert message in call_args

        deposit = deposit_video_resolver(video_1_depid)

        def filter_events(call_args):
            _, x = call_args
            return x['type_'] == 'update_deposit'

        list_kwargs = list(filter(filter_events, mock_sse.call_args_list))
        assert len(list_kwargs) == 12
        _, kwargs = list_kwargs[10]
        assert kwargs['type_'] == 'update_deposit'
        assert kwargs['channel'] == 'mychannel'
        assert kwargs['data']['state'] == states.SUCCESS
        assert kwargs['data']['meta']['payload'] == {
            'deposit_id': deposit['_deposit']['id'],
            'event_id': data['tags']['_event_id'],
            'deposit': deposit,
        }

        # check ElasticSearch is called
        ids = set(get_indexed_records_from_mock(mock_indexer))
        assert video_1_id in ids
        assert project_id in ids
        assert deposit['_cds']['state'] == {
            'file_download': states.SUCCESS,
            'file_video_metadata_extraction': states.SUCCESS,
            'file_video_extract_frames': states.SUCCESS,
            'file_transcode': states.SUCCESS,
        }

    # check feedback from anoymous user
    event_id = data['tags']['_event_id']
    with api_app.test_request_context():
        url = url_for('invenio_webhooks.event_feedback_item',
                      event_id=event_id,
                      receiver_id='avc')
    with api_app.test_client() as client:
        resp = client.get(url, headers=json_headers)
        assert resp.status_code == 401
    # check feedback from owner
    with api_app.test_request_context():
        url = url_for('invenio_webhooks.event_feedback_item',
                      event_id=event_id,
                      receiver_id='avc')
    with api_app.test_client() as client:
        login_user_via_session(client, email=User.query.get(users[0]).email)
        resp = client.get(url, headers=json_headers)
        assert resp.status_code == 200
    # check feedback from another user without access
    with api_app.test_request_context():
        url = url_for('invenio_webhooks.event_feedback_item',
                      event_id=event_id,
                      receiver_id='avc')
    with api_app.test_client() as client:
        login_user_via_session(client, email=User.query.get(users[1]).email)
        resp = client.get(url, headers=json_headers)
        assert resp.status_code == 403
    # check feedback from another user with access
    user_2 = User.query.get(users[1])
    user_2_id = str(user_2.id)
    user_2_email = user_2.email
    project = deposit_project_resolver(project['_deposit']['id'])
    project['_access'] = {'update': [user_2_email]}
    project = project.commit()
    with api_app.test_request_context():
        url = url_for('invenio_webhooks.event_feedback_item',
                      event_id=event_id,
                      receiver_id='avc')
    with api_app.test_client() as client:

        @identity_loaded.connect
        def load_email(sender, identity):
            if current_user.get_id() == user_2_id:
                identity.provides.update([UserNeed(user_2_email)])

        login_user_via_session(client, email=user_2_email)
        resp = client.get(url, headers=json_headers)
        assert resp.status_code == 200

    # Test cleaning!
    url = '{0}?access_token={1}'.format(data['links']['cancel'], access_token)

    with mock.patch('invenio_sse.ext._SSEState.publish') as mock_sse, \
            mock.patch('invenio_indexer.api.RecordIndexer.bulk_index') \
            as mock_indexer, \
            api_app.test_client() as client:
        resp = client.delete(url, headers=json_headers)

        assert resp.status_code == 201

        # check that object versions and tags are deleted
        # (Create + Delete) * Num Objs
        assert ObjectVersion.query.count() == 2 * get_object_count()
        # Tags connected with the old version
        assert ObjectVersionTag.query.count() == get_tag_count()
        bucket = Bucket.query.first()
        # and bucket is empty
        assert bucket.size == 0

        record = RecordMetadata.query.filter_by(id=video_1_id).one()

        # check metadata patch are deleted
        assert 'extracted_metadata' not in record.json['_cds']

        # check the corresponding Event persisted after cleaning
        assert len(get_deposit_events(record.json['_deposit']['id'])) == 0
        assert len(
            get_deposit_events(record.json['_deposit']['id'],
                               _deleted=True)) == 1

        # check no SSE message and reindexing is fired
        assert mock_sse.called is False
        assert mock_indexer.called is False
Exemple #41
0
    def newversion(self, pid=None):
        """Create a new version deposit."""
        deposit = None
        try:
            if not self.is_published():
                raise PIDInvalidAction()

            # Check that there is not a newer draft version for this record
            # and this is the latest version
            pv = PIDVersioning(child=pid)
            if pv.exists and not pv.draft_child and pid == pv.last_child:
                last_pid = pv.last_child
                # Get copy of the latest record
                latest_record = WekoDeposit.get_record(last_pid.object_uuid)
                if latest_record is not None:
                    data = latest_record.dumps()

                    owners = data['_deposit']['owners']
                    keys_to_remove = ('_deposit', 'doi', '_oai', '_files',
                                      '_buckets', '$schema')
                    for k in keys_to_remove:
                        data.pop(k, None)

                    # NOTE: We call the superclass `create()` method, because we
                    # don't want a new empty bucket, but an unlocked snapshot of
                    # the old record's bucket.
                    deposit = super(WekoDeposit, self).create(data)
                    # Injecting owners is required in case of creating new
                    # version this outside of request context
                    deposit['_deposit']['owners'] = owners

                    recid = PersistentIdentifier.get(
                        'recid', str(data['_deposit']['id']))
                    depid = PersistentIdentifier.get(
                        'depid', str(data['_deposit']['id']))
                    PIDVersioning(parent=pv.parent).insert_draft_child(
                        child=recid)
                    RecordDraft.link(recid, depid)

                    # Create snapshot from the record's bucket and update data
                    snapshot = latest_record.files.bucket.snapshot(lock=False)
                    snapshot.locked = False
                    deposit['_buckets'] = {'deposit': str(snapshot.id)}
                    RecordsBuckets.create(record=deposit.model,
                                          bucket=snapshot)
                    if 'extra_formats' in latest_record['_buckets']:
                        extra_formats_snapshot = \
                            latest_record.extra_formats.bucket.snapshot(
                                lock=False)
                        deposit['_buckets']['extra_formats'] = \
                            str(extra_formats_snapshot.id)
                        RecordsBuckets.create(record=deposit.model,
                                              bucket=extra_formats_snapshot)
                    index = {
                        'index':
                        self.get('path', []),
                        'actions':
                        'private' if self.get('publish_status', '1') == '1'
                        else 'publish'
                    }
                    if 'activity_info' in session:
                        del session['activity_info']
                    item_metadata = ItemsMetadata.get_record(
                        last_pid.object_uuid).dumps()
                    args = [index, item_metadata]
                    deposit.update(*args)
                    deposit.commit()
            return deposit
        except SQLAlchemyError as ex:
            current_app.logger.debug(ex)
            db.session.rollback()
            return None
Exemple #42
0
 def pid(self):
     """Return an instance of deposit PID."""
     pid = self.deposit_fetcher(self.id, self)
     return PersistentIdentifier.get(pid.pid_type, pid.pid_value)
Exemple #43
0
# the Free Software Foundation, version 3 of the License.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.

"""{{ cookiecutter.class_name }} resolver."""

import jsonresolver
from flask import current_app
from invenio_pidstore.models import PersistentIdentifier, PIDStatus


@jsonresolver.route('/api/{{ cookiecutter.resource_name }}/<pid>', host='ils.rero.ch')
def {{ cookiecutter.name }}_resolver(pid):
    """Resolver for {{ cookiecutter.name }} record."""
    persistent_id = PersistentIdentifier.get('{{ cookiecutter.pid_type }}', pid)
    if persistent_id.status == PIDStatus.REGISTERED:
        return dict(pid=persistent_id.pid_value)
    current_app.logger.error(
        'Doc resolver error: /api/{{ cookiecutter.resource_name }}/{pid} {persistent_id}'.format(
            pid=pid,
            persistent_id=persistent_id
        )
    )
    raise Exception('unable to resolve')
Exemple #44
0
 def get_uuid_from_pid_value(cls, pid_value, pid_type=None):
     if not pid_type:
         pid_type = cls.pid_type
     pid = PersistentIdentifier.get(pid_type, pid_value)
     return pid.object_uuid
def test_solve_claim_conflicts(small_app):
    """Check the module for the case where at least two claimed
    signatures are assigned to the same cluster.
    """
    from inspirehep.modules.disambiguation.tasks import (
        disambiguation_clustering,
        update_authors_recid
    )

    # Claimed signature #1.
    glashow_record_id_claimed = str(
        PersistentIdentifier.get('lit', 4328).object_uuid)
    glashow_record_claimed = get_es_record_by_uuid(
        glashow_record_id_claimed)
    glashow_record_uuid_claimed = glashow_record_claimed[
        'authors'][0]['uuid']

    # Add phonetic block to the record.
    glashow_record_claimed['authors'][0]['signature_block'] = "HAGp"
    glashow_record_claimed['authors'][0]['curated_relation'] = True
    glashow_record_claimed['authors'][0]['recid'] = "3"
    es.index(index='records-hep', doc_type='hep',
             id=glashow_record_id_claimed, body=glashow_record_claimed)
    es.indices.refresh('records-hep')

    # Claimed signature #2.
    higgs_record_id_claimed = str(
        PersistentIdentifier.get('lit', 1358492).object_uuid)
    higgs_record_claimed = get_es_record_by_uuid(
        higgs_record_id_claimed)
    higgs_record_uuid_claimed = higgs_record_claimed[
        'authors'][0]['uuid']

    # Add phonetic block to the record.
    higgs_record_claimed['authors'][0]['signature_block'] = "HAGp"
    higgs_record_claimed['authors'][0]['curated_relation'] = True
    higgs_record_claimed['authors'][0]['recid'] = "4"
    es.index(index='records-hep', doc_type='hep',
             id=higgs_record_id_claimed, body=higgs_record_claimed)
    es.indices.refresh('records-hep')

    # Not claimed signature.
    higgs_record_id_not_claimed = str(
        PersistentIdentifier.get('lit', 11883).object_uuid)
    higgs_record_not_claimed = get_es_record_by_uuid(
        higgs_record_id_not_claimed)
    higgs_record_uuid_not_claimed = higgs_record_not_claimed[
        'authors'][0]['uuid']

    # Add phonetic block to the record.
    higgs_record_not_claimed['authors'][0]['signature_block'] = "HAGp"
    es.index(index='records-hep', doc_type='hep',
             id=higgs_record_id_not_claimed, body=higgs_record_not_claimed)
    es.indices.refresh('records-hep')

    with patch("celery.current_app.send_task",
               return_value=_BeardObject(
                   ({"3": [glashow_record_uuid_claimed,
                           higgs_record_uuid_claimed,
                           higgs_record_uuid_not_claimed]}, {}))):
        with patch(
            "inspirehep.modules.disambiguation.logic._solve_claims_conflict",
            return_value=_ConflictObject(
                {higgs_record_uuid_claimed: [
                    higgs_record_uuid_not_claimed]})):
            with patch("inspirehep.modules.disambiguation.tasks.update_authors_recid.delay", side_effect=update_authors_recid):
                disambiguation_clustering("HAGp")

    assert InspireRecord.get_record(
        higgs_record_id_not_claimed)['authors'][0]['recid'] == "4"
def get_db_record(pid_type, recid):
    from inspirehep.modules.records.api import InspireRecord
    pid = PersistentIdentifier.get(pid_type, recid)
    return InspireRecord.get_record(pid.object_uuid)
Exemple #47
0
def get_db_record(record_type, recid):
    pid = PersistentIdentifier.get(record_type, recid)
    return Record.get_record(pid.object_uuid)
Exemple #48
0
    def newversion(self, pid=None):
        """Create a new version deposit."""
        if not self.is_published():
            raise PIDInvalidAction()

        # Check that there is not a newer draft version for this record
        pid, record = self.fetch_published()
        pv = PIDVersioning(child=pid)
        if (not pv.draft_child and
                is_doi_locally_managed(record['doi'])):
            with db.session.begin_nested():

                # Get copy of the latest record
                latest_record = ZenodoRecord.get_record(
                    pv.last_child.object_uuid)
                data = latest_record.dumps()

                # Get the communities from the last deposit
                # and push those to the new version
                latest_depid = PersistentIdentifier.get(
                    'depid', data['_deposit']['id'])
                latest_deposit = ZenodoDeposit.get_record(
                    latest_depid.object_uuid)
                last_communities = latest_deposit.get('communities', [])

                owners = data['_deposit']['owners']

                # TODO: Check other data that may need to be removed
                keys_to_remove = (
                    '_deposit', 'doi', '_oai', '_files', '_buckets', '$schema')
                for k in keys_to_remove:
                    data.pop(k, None)

                # NOTE: We call the superclass `create()` method, because we
                # don't want a new empty bucket, but an unlocked snapshot of
                # the old record's bucket.
                deposit = (super(ZenodoDeposit, self).create(data))
                # Injecting owners is required in case of creating new
                # version this outside of request context
                deposit['_deposit']['owners'] = owners
                if last_communities:
                    deposit['communities'] = last_communities

                ###
                conceptrecid = PersistentIdentifier.get(
                    'recid', data['conceptrecid'])
                recid = PersistentIdentifier.get(
                    'recid', str(data['recid']))
                depid = PersistentIdentifier.get(
                    'depid', str(data['_deposit']['id']))
                PIDVersioning(parent=conceptrecid).insert_draft_child(
                    child=recid)
                RecordDraft.link(recid, depid)

                # Pre-fill the Zenodo DOI to prevent the user from changing it
                # to a custom DOI.
                deposit['doi'] = doi_generator(recid.pid_value)

                pv = PIDVersioning(child=pid)
                index_siblings(pv.draft_child, neighbors_eager=True,
                               with_deposits=True)

                with db.session.begin_nested():
                    # Create snapshot from the record's bucket and update data
                    snapshot = latest_record.files.bucket.snapshot(lock=False)
                    snapshot.locked = False
                    if 'extra_formats' in latest_record['_buckets']:
                        extra_formats_snapshot = \
                            latest_record.extra_formats.bucket.snapshot(
                                lock=False)
                deposit['_buckets'] = {'deposit': str(snapshot.id)}
                RecordsBuckets.create(record=deposit.model, bucket=snapshot)
                if 'extra_formats' in latest_record['_buckets']:
                    deposit['_buckets']['extra_formats'] = \
                        str(extra_formats_snapshot.id)
                    RecordsBuckets.create(
                        record=deposit.model, bucket=extra_formats_snapshot)
                deposit.commit()
        return self
Exemple #49
0
    def get_change_list_content_xml(self,
                                    from_date,
                                    from_date_args=None,
                                    to_date_args=None):
        """
        Get change list xml.

        :return: Updated Change List info
        """
        if not self._validation():
            return None

        from .utils import parse_date
        if from_date_args:
            from_date_args = parse_date(from_date_args)
        if to_date_args:
            to_date_args = parse_date(to_date_args)

        change_list = ChangeList()
        change_list.up = INVENIO_CAPABILITY_URL.format(request.url_root)
        change_list.index = '{}resync/{}/changelist.xml'.format(
            request.url_root,
            self.repository_id,
        )

        record_changes = self._get_record_changes_with_interval(from_date)

        for data in record_changes:
            try:
                if from_date_args and from_date_args > parse_date(
                        data.get("updated")):
                    continue
                if to_date_args and to_date_args < parse_date(
                        data.get("updated")):
                    continue
                pid_object = PersistentIdentifier.get('recid',
                                                      data.get('record_id'))
                latest_pid = PIDVersioning(child=pid_object).last_child
                is_latest = str(latest_pid.pid_value) == "{}.{}".format(
                    data.get('record_id'), data.get('record_version'))
                if not is_latest and data.get('status') != 'deleted':
                    loc = '{}resync/{}/records/{}'.format(
                        request.url_root, self.repository_id,
                        '{}.{}'.format(data.get('record_id'),
                                       data.get('record_version')))
                else:
                    loc = '{}resync/{}/records/{}'.format(
                        request.url_root, self.repository_id,
                        data.get('record_id'))
                rc = Resource(
                    loc,
                    lastmod=data.get("updated"),
                    change=data.get('status'),
                    md_at=data.get("updated"),
                )
                change_list.add(rc)
            except Exception:
                current_app.logger.error('-' * 60)
                traceback.print_exc(file=sys.stdout)
                current_app.logger.error('-' * 60)
                continue

        return change_list.as_xml()
Exemple #50
0
def test_create_acl_after_record(app, db, es, es_acl_prepare, test_users):
    with app.test_client() as client:
        login(client, test_users.u1)
        response = client.post(records_url(),
                               data=json.dumps({
                                   'title': 'blah',
                                   'contributors': []
                               }),
                               content_type='application/json')
        assert response.status_code == 201
        rest_metadata = get_json(response)['metadata']
        assert 'control_number' in rest_metadata

        current_search_client.indices.refresh()
        current_search_client.indices.flush()

        with db.session.begin_nested():
            acl1 = DefaultACL(name='default',
                              schemas=[RECORD_SCHEMA],
                              priority=0,
                              originator=test_users.u1,
                              operation='get')
            actor1 = SystemRoleActor(name='auth',
                                     system_role='any_user',
                                     acl=acl1,
                                     originator=test_users.u1)
            db.session.add(acl1)
            db.session.add(actor1)

        # reindex all resources that might be affected by the ACL change
        current_explicit_acls.reindex_acl(acl1, delayed=False)

        index, doctype = schema_to_index(RECORD_SCHEMA)

        rec_md = current_search_client.get(
            index=index,
            doc_type=doctype,
            id=str(
                PersistentIdentifier.get(
                    'recid', rest_metadata['control_number']).object_uuid))

        clear_timestamp(rec_md)

        assert rec_md['_source']['_invenio_explicit_acls'] == [{
            'operation':
            'get',
            'id':
            acl1.id,
            'timestamp':
            'cleared',
            'system_role': ['any_user']
        }]

        # remove the ACL from the database
        with db.session.begin_nested():
            db.session.delete(acl1)

        # reindex records affected by the removal of ACL
        current_explicit_acls.reindex_acl_removed(acl1, delayed=False)

        # make sure all changes had time to propagate and test
        current_search_client.indices.refresh()
        current_search_client.indices.flush()

        rec_md = current_search_client.get(
            index=index,
            doc_type=doctype,
            id=str(
                PersistentIdentifier.get(
                    'recid', rest_metadata['control_number']).object_uuid))

        # there is no ACL in the database => no acls are defined nor enforced on the record
        print(json.dumps(rec_md, indent=4))
        assert '_invenio_explicit_acls' not in rec_md['_source']
Exemple #51
0
 def record_pid(self):
     """Return the published/reserved record PID."""
     return PersistentIdentifier.get('b2rec', self.id.hex)
Exemple #52
0
def get_es_record(record_type, recid, **kwargs):
    pid = PersistentIdentifier.get(record_type, recid)
    search_conf = current_app.config['RECORDS_REST_ENDPOINTS'][record_type]
    search_class = import_string(search_conf['search_class'])()
    return search_class.get_source(pid.object_uuid, **kwargs)
Exemple #53
0
 def recid(self):
     """Get RECID object for the Release record."""
     if self.record:
         return PersistentIdentifier.get('recid', str(self.record['recid']))
Exemple #54
0
def create_files_and_sip(deposit, dep_pid):
    """Create deposit Bucket, Files and SIPs."""
    from invenio_pidstore.errors import PIDDoesNotExistError
    from invenio_pidstore.models import PersistentIdentifier
    from invenio_sipstore.errors import SIPUserDoesNotExist
    from invenio_sipstore.models import SIP, RecordSIP, SIPFile
    from invenio_files_rest.models import Bucket, FileInstance, ObjectVersion
    from invenio_records_files.models import RecordsBuckets
    from invenio_db import db
    buc = Bucket.create()
    recbuc = RecordsBuckets(record_id=deposit.id, bucket_id=buc.id)
    db.session.add(recbuc)
    deposit.setdefault('_deposit', dict())
    deposit.setdefault('_files', list())
    files = deposit.get('files', [])
    sips = deposit.get('sips', [])
    recid = None

    if sips:
        recids = [int(sip['metadata']['recid']) for sip in sips]
        if len(set(recids)) > 1:
            logger.error('Multiple recids ({recids}) found in deposit {depid}'
                         ' does not exists.'.format(recids=recids,
                                                    depid=dep_pid.pid_value))
            raise DepositMultipleRecids(dep_pid.pid_value, list(set(recids)))
        elif recids:  # If only one recid
            recid = recids[0]

    # Store the path -> FileInstance mappings for SIPFile creation later
    dep_file_instances = list()

    for file_ in files:
        fi = FileInstance.create()
        fi.set_uri(file_['path'], file_['size'], file_['checksum'])
        ov = ObjectVersion.create(buc, file_['name'], _file_id=fi.id)
        file_meta = dict(
            bucket=str(buc.id),
            key=file_['name'],
            checksum=file_['checksum'],
            size=file_['size'],
            version_id=str(ov.version_id),
        )
        deposit['_files'].append(file_meta)
        dep_file_instances.append((file_['path'], fi))

    for idx, sip in enumerate(sips):
        agent = None
        user_id = None
        if sip['agents']:
            agent = dict(
                ip_address=sip['agents'][0].get('ip_address', ""),
                email=sip['agents'][0].get('email_address', ""),
            )
            user_id = sip['agents'][0]['user_id']
        content = sip['package']
        sip_format = 'marcxml'
        try:
            sip = SIP.create(sip_format,
                             content,
                             user_id=user_id,
                             agent=agent)
        except SIPUserDoesNotExist:
            logger.exception('User ID {user_id} referred in deposit {depid} '
                             'does not exists.'.format(
                                 user_id=user_id, depid=dep_pid.pid_value))
            raise DepositSIPUserDoesNotExist(dep_pid.pid_value, user_id)

        # If recid was found, attach it to SIP
        # TODO: This is always uses the first recid, as we quit if multiple
        # recids are found in the sips information
        if recid:
            try:
                pid = PersistentIdentifier.get(pid_type='recid',
                                               pid_value=recid)
                record_sip = RecordSIP(sip_id=sip.id, pid_id=pid.id)
                db.session.add(record_sip)
            except PIDDoesNotExistError:
                logger.exception('Record {recid} referred in '
                                 'Deposit {depid} does not exists.'.format(
                                     recid=recid, depid=dep_pid.pid_value))
                raise DepositRecidDoesNotExist(dep_pid.pid_value, recid)
        if idx == 0:
            for fp, fi in dep_file_instances:
                sipf = SIPFile(sip_id=sip.id, filepath=fp, file_id=fi.id)
                db.session.add(sipf)
    deposit.commit()
    db.session.commit()
    return deposit
Exemple #55
0
    def publish(self):
        """Publish GitHub release as record."""
        id_ = uuid.uuid4()
        deposit_metadata = dict(self.metadata)
        deposit = None
        try:
            db.session.begin_nested()
            # TODO: Add filter on Published releases
            previous_releases = self.model.repository.releases.filter_by(
                status=ReleaseStatus.PUBLISHED)
            versioning = None
            stashed_draft_child = None
            if previous_releases.count():
                last_release = previous_releases.order_by(
                        Release.created.desc()).first()
                last_recid = PersistentIdentifier.get(
                    'recid', last_release.record['recid'])
                versioning = PIDVersioning(child=last_recid)
                last_record = ZenodoRecord.get_record(
                    versioning.last_child.object_uuid)
                deposit_metadata['conceptrecid'] = last_record['conceptrecid']
                if 'conceptdoi' not in last_record:
                    last_depid = PersistentIdentifier.get(
                        'depid', last_record['_deposit']['id'])
                    last_deposit = ZenodoDeposit.get_record(
                        last_depid.object_uuid)
                    last_deposit = last_deposit.registerconceptdoi()
                    last_recid, last_record = last_deposit.fetch_published()
                deposit_metadata['conceptdoi'] = last_record['conceptdoi']
                if last_record.get('communities'):
                    deposit_metadata.setdefault('communities',
                                                last_record['communities'])
                if versioning.draft_child:
                    stashed_draft_child = versioning.draft_child
                    versioning.remove_draft_child()

            deposit = self.deposit_class.create(deposit_metadata, id_=id_)

            deposit['_deposit']['created_by'] = self.event.user_id
            deposit['_deposit']['owners'] = [self.event.user_id]

            # Fetch the deposit files
            for key, url in self.files:
                # Make a HEAD request to get GitHub to compute the
                # Content-Length.
                res = self.gh.api.session.head(url, allow_redirects=True)
                # Now, download the file
                res = self.gh.api.session.get(url, stream=True,
                                              allow_redirects=True)
                if res.status_code != 200:
                    raise Exception(
                        "Could not retrieve archive from GitHub: {url}"
                        .format(url=url)
                    )

                size = int(res.headers.get('Content-Length', 0))
                ObjectVersion.create(
                    bucket=deposit.files.bucket,
                    key=key,
                    stream=res.raw,
                    size=size or None,
                    mimetype=res.headers.get('Content-Type'),
                )

            # GitHub-specific SIP store agent
            sip_agent = {
                '$schema': current_jsonschemas.path_to_url(
                    current_app.config['SIPSTORE_GITHUB_AGENT_JSONSCHEMA']),
                'user_id': self.event.user_id,
                'github_id': self.release['author']['id'],
                'email': self.gh.account.user.email,
            }
            deposit.publish(
                user_id=self.event.user_id, sip_agent=sip_agent,
                spam_check=False)
            recid_pid, record = deposit.fetch_published()
            self.model.recordmetadata = record.model
            if versioning and stashed_draft_child:
                versioning.insert_draft_child(stashed_draft_child)
            record_id = str(record.id)
            db.session.commit()

            # Send Datacite DOI registration task
            if current_app.config['DEPOSIT_DATACITE_MINTING_ENABLED']:
                datacite_register.delay(recid_pid.pid_value, record_id)

            # Index the record
            RecordIndexer().index_by_id(record_id)
        except Exception:
            db.session.rollback()
            # Remove deposit from index since it was not commited.
            if deposit and deposit.id:
                try:
                    RecordIndexer().delete(deposit)
                except Exception:
                    current_app.logger.exception(
                        "Failed to remove uncommited deposit from index.")
            raise
def test_deposit_create_versions(app, test_records_data, test_users,
                                 login_user):
    """Test the creation of new record version draft."""
    # Use admin user in order to publish easily the records.
    login = lambda c: login_user(test_users['admin'], c)

    data = test_records_data

    # create and publish first record in a chain
    v1_draft = create_ok(app, login, data[0])
    assert 'versions' in v1_draft['links']
    check_links(app, v1_draft, [])

    v1_rec = publish(app, login, v1_draft)
    assert 'versions' in v1_rec['links']
    check_links(app, v1_rec, [v1_rec])

    # try to create a new version from an unknown pid
    res, json_data = create(app, login, data[1], version_of=uuid.uuid4().hex)
    assert res.status_code == 400

    # try to create a new version from a parent pid
    with app.app_context():
        v1_pid = PersistentIdentifier.get(pid_value=v1_rec['id'],
                                          pid_type='b2rec')
        parent_pid = PIDVersioning(child=v1_pid).parent
    res, json_data = create(app,
                            login,
                            data[1],
                            version_of=parent_pid.pid_value)
    assert res.status_code == 400

    # create and publish second record in a chain
    v2_draft = create_ok(app, login, data[1], version_of=v1_rec['id'])
    check_links(app, v2_draft, [v1_rec])
    v2_rec = publish(app, login, v2_draft)
    check_links(app, v2_rec, [v1_rec, v2_rec])

    # test error if trying to create a non-linear version chain
    res, json_data = create(app, login, data[1], version_of=v1_rec['id'])
    assert res.status_code == 400
    assert json_data['use_record'] == v2_rec['id']

    # create third record draft in a chain
    v3_draft = create_ok(app, login, data[2], version_of=v2_rec['id'])
    check_links(app, v3_draft, [v1_rec, v2_rec])

    # test error when a draft already exists in a version chain
    res, json_data = create(app, login, data[1], version_of=v2_rec['id'])
    assert res.status_code == 400
    assert json_data['goto_draft'] == v3_draft['id']

    # publish third record in a chain
    v3_rec = publish(app, login, v3_draft)
    check_links(app, v3_rec, [v1_rec, v2_rec, v3_rec])

    # create a new version without data
    # assert that data is copied from the previous version
    v4_draft = create_ok(app, login, None, v3_rec['id'])
    with app.app_context():
        record_resolver = Resolver(
            pid_type='b2rec',
            object_type='rec',
            getter=B2ShareRecord.get_record,
        )
        deposit_resolver = Resolver(
            pid_type='b2dep',
            object_type='rec',
            getter=Deposit.get_record,
        )
        v4_metadata = deposit_resolver.resolve(v4_draft['id'])[1].model.json
        v3_metadata = record_resolver.resolve(v3_rec['id'])[1].model.json

        assert copy_data_from_previous(v4_metadata) == \
            copy_data_from_previous(v3_metadata)
Exemple #57
0
def loan_resolver(pid):
    """Loan resolver."""
    persistent_id = PersistentIdentifier.get('loanid', pid)
    if persistent_id.status == PIDStatus.REGISTERED:
        return dict(pid=persistent_id.pid_value)
    raise Exception('unable to resolve')
def pids():
    """Fetch and register PIDs."""
    from invenio_db import db
    from invenio_oaiserver.fetchers import oaiid_fetcher
    from invenio_oaiserver.minters import oaiid_minter
    from invenio_pidstore.errors import PIDDoesNotExistError, \
        PersistentIdentifierError
    from invenio_pidstore.models import PIDStatus, PersistentIdentifier
    from invenio_pidstore.fetchers import recid_fetcher
    from invenio_pidstore.minters import recid_minter
    from invenio_records.models import RecordMetadata

    recids = [r.id for r in RecordMetadata.query.all()]
    db.session.expunge_all()

    with click.progressbar(recids) as bar:
        for record_id in bar:
            record = RecordMetadata.query.get(record_id)
            try:
                pid = recid_fetcher(record.id, record.json)
                found = PersistentIdentifier.get(
                    pid_type=pid.pid_type,
                    pid_value=pid.pid_value,
                    pid_provider=pid.provider.pid_provider
                )
                click.echo('Found {0}.'.format(found))
            except PIDDoesNotExistError:
                db.session.add(
                    PersistentIdentifier.create(
                        pid.pid_type, pid.pid_value,
                        object_type='rec', object_uuid=record.id,
                        status=PIDStatus.REGISTERED
                    )
                )
            except KeyError:
                click.echo('Skiped: {0}'.format(record.id))
                continue

            pid_value = record.json.get('_oai', {}).get('id')
            if pid_value is None:
                assert 'control_number' in record.json
                pid_value = current_app.config.get(
                    'OAISERVER_ID_PREFIX'
                ) + str(record.json['control_number'])

                record.json.setdefault('_oai', {})
                record.json['_oai']['id'] = pid.pid_value

            pid = oaiid_fetcher(record.id, record.json)
            try:
                found = PersistentIdentifier.get(
                    pid_type=pid.pid_type,
                    pid_value=pid.pid_value,
                    pid_provider=pid.provider.pid_provider
                )
                click.echo('Found {0}.'.format(found))
            except PIDDoesNotExistError:
                pid = oaiid_minter(record.id, record.json)
                db.session.add(pid)

            flag_modified(record, 'json')
            assert record.json['_oai']['id']
            db.session.add(record)
            db.session.commit()
            db.session.expunge_all()
Exemple #59
0
 def pid(self):
     """Return an instance of record PID."""
     pid = self.record_fetcher(self.id, self)
     obj = PersistentIdentifier.get(pid.pid_type, pid.pid_value)
     return obj
Exemple #60
0
def test_resolver(app):
    """Test the class methods of PersistentIdentifier class."""
    status = [
        PIDStatus.NEW,
        PIDStatus.RESERVED,
        PIDStatus.REGISTERED,
        PIDStatus.DELETED,
    ]

    with app.app_context():
        i = 1
        rec_a = uuid.uuid4()

        # Create pids for each status with and without object
        for s in status:
            PersistentIdentifier.create('recid', i, status=s)
            i += 1
            if s != PIDStatus.DELETED:
                PersistentIdentifier.create('recid',
                                            i,
                                            status=s,
                                            object_type='rec',
                                            object_uuid=rec_a)
                i += 1

        # Create a DOI
        pid_doi = PersistentIdentifier.create('doi',
                                              '10.1234/foo',
                                              status=PIDStatus.REGISTERED,
                                              object_type='rec',
                                              object_uuid=rec_a)

        # Create redirects
        pid = PersistentIdentifier.create('recid',
                                          i,
                                          status=PIDStatus.REGISTERED)
        i += 1
        pid.redirect(PersistentIdentifier.get('recid', '2'))
        pid = PersistentIdentifier.create('recid',
                                          i,
                                          status=PIDStatus.REGISTERED)
        pid.redirect(pid_doi)
        db.session.commit()

        # Start tests
        resolver = Resolver(pid_type='recid',
                            object_type='rec',
                            getter=lambda x: x)

        # Resolve non-existing pid
        pytest.raises(PIDDoesNotExistError, resolver.resolve, '100')
        pytest.raises(PIDDoesNotExistError, resolver.resolve, '10.1234/foo')

        # Resolve status new
        pytest.raises(PIDUnregistered, resolver.resolve, '1')
        pytest.raises(PIDUnregistered, resolver.resolve, '2')

        # Resolve status reserved
        pytest.raises(PIDUnregistered, resolver.resolve, '3')
        pytest.raises(PIDUnregistered, resolver.resolve, '4')

        # Resolve status registered
        pytest.raises(PIDMissingObjectError, resolver.resolve, '5')
        pid, obj = resolver.resolve('6')
        assert pid and obj == rec_a

        # Resolve status deleted
        pytest.raises(PIDDeletedError, resolver.resolve, '7')

        # Resolve status redirected
        try:
            resolver.resolve('8')
            assert False
        except PIDRedirectedError as e:
            assert e.destination_pid.pid_type == 'recid'
            assert e.destination_pid.pid_value == '2'

        try:
            resolver.resolve('9')
            assert False
        except PIDRedirectedError as e:
            assert e.destination_pid.pid_type == 'doi'
            assert e.destination_pid.pid_value == '10.1234/foo'

        doiresolver = Resolver(pid_type='doi',
                               object_type='rec',
                               getter=lambda x: x)
        pytest.raises(PIDDoesNotExistError, doiresolver.resolve, '1')
        pid, obj = doiresolver.resolve('10.1234/foo')
        assert pid and obj == rec_a