Exemple #1
0
    def validate_doi(self, value):
        """Validate if doi exists."""
        if value and has_request_context():
            required_doi = self.context.get('required_doi')
            if value == required_doi:
                return

            err = ValidationError(_('DOI already exists in Zenodo.'),
                                  field_names=['doi'])

            try:
                doi_pid = PersistentIdentifier.get('doi', value)
            except PIDDoesNotExistError:
                return

            # If the DOI exists, check if it's been assigned to this record
            # by fetching the recid and comparing both PIDs record UUID
            try:
                recid_pid = PersistentIdentifier.get(
                    'recid', self.context['recid'])
            except PIDDoesNotExistError:
                # There's no way to verify if this DOI belongs to this record
                raise err

            doi_uuid = doi_pid.get_assigned_object()
            recid_uuid = recid_pid.get_assigned_object()

            if doi_uuid and doi_uuid == recid_uuid:
                return
            else:  # DOI exists and belongs to a different record
                raise err
Exemple #2
0
    def create(cls, data, id_=None):
        """Create a deposit.

        Adds bucket creation immediately on deposit creation.
        """
        bucket = Bucket.create(
            quota_size=current_app.config['ZENODO_BUCKET_QUOTA_SIZE'],
            max_file_size=current_app.config['ZENODO_MAX_FILE_SIZE'],
        )
        data['_buckets'] = {'deposit': str(bucket.id)}
        deposit = super(ZenodoDeposit, cls).create(data, id_=id_)

        RecordsBuckets.create(record=deposit.model, bucket=bucket)

        recid = PersistentIdentifier.get(
            'recid', str(data['recid']))
        conceptrecid = PersistentIdentifier.get(
            'recid', str(data['conceptrecid']))
        depid = PersistentIdentifier.get(
            'depid', str(data['_deposit']['id']))

        PIDVersioning(parent=conceptrecid).insert_draft_child(child=recid)
        RecordDraft.link(recid, depid)

        return deposit
Exemple #3
0
def zenodo_doi_updater(record_uuid, data):
    """Update the DOI (only external DOIs)."""
    assert 'recid' in data
    doi = data.get('doi')
    assert doi
    assert idutils.is_doi(doi)

    # If the DOI is the same as an already generated one, do nothing
    if doi == doi_generator(data['recid']):
        return
    if is_local_doi(doi):  # Zenodo DOI, but different than recid
        # ERROR, user provided a custom ZENODO DOI!
        raise PIDValueError('doi', doi)

    doi_pid = PersistentIdentifier.get_by_object(
        pid_type='doi', object_type='rec', object_uuid=record_uuid)

    if doi_pid.pid_value != doi:
        with db.session.begin_nested():
            db.session.delete(doi_pid)
            return PersistentIdentifier.create(
                'doi',
                doi,
                object_type='rec',
                object_uuid=record_uuid,
                status=PIDStatus.RESERVED,
            )
Exemple #4
0
def test_published_external_doi(db, deposit, deposit_file):
    """Test published external DOI."""
    ext_doi1 = '10.1234/foo'
    ext_doi2 = '10.1234/bar'
    deposit['doi'] = ext_doi1
    deposit.publish()
    db.session.commit()

    # Published record with external DOI must have:
    # 1) a registered recid with object
    recid = PersistentIdentifier.get('recid', str(deposit['recid']))
    assert recid and recid.status == PIDStatus.REGISTERED \
        and recid.has_object()
    # 2) a reserved external doi with object
    doi = PersistentIdentifier.get('doi', ext_doi1)
    assert doi and doi.status == PIDStatus.RESERVED \
        and doi.has_object()

    # Now change external DOI.
    deposit = deposit.edit()
    deposit['doi'] = ext_doi2
    deposit.publish()
    db.session.commit()

    # Ensure DOI 1 has been removed.
    pytest.raises(
        PIDDoesNotExistError, PersistentIdentifier.get, 'doi', ext_doi1)

    # Ensure DOI 2 has been reserved.
    doi = PersistentIdentifier.get('doi', ext_doi2)
    assert doi and doi.status == PIDStatus.RESERVED \
        and doi.has_object()
def test_deposit_index(db, es):
    """Test update embargoed records."""
    deposit_index_name = 'deposits-records-record-v1.0.0'
    rec1 = Record.create({
        'title': 'One',
        '_deposit': {
            'status': 'published',
            'pid': {
                'type': 'recid',
                'value': '1'
            }
        }
    })
    PersistentIdentifier.create(pid_type='recid', pid_value='1',
                                status=PIDStatus.REGISTERED,
                                object_uuid=rec1.id, object_type='rec')
    Deposit.create({
        '_deposit': {
            'status': 'published',
            'pid': {
                'type': 'recid',
                'value': '1'
            }
        }
    })
    db.session.commit()
    current_search.flush_and_refresh(deposit_index_name)
    res = current_search.client.search(index=deposit_index_name)
    # Make sure the 'title' was indexed from record
    assert res['hits']['hits'][0]['_source']['title'] == 'One'
Exemple #6
0
    def _mint_pid(obj, dummy_eng):
        d = Deposition(obj)
        recjson = d.get_latest_sip(sealed=False).metadata

        if 'recid' not in recjson:
            raise Exception("'recid' not found in sip metadata.")

        pid_text = None
        pid = recjson.get(pid_field, None)
        if not pid:
            # No pid found in recjson, so create new pid with user supplied
            # function.
            pid_text = recjson[pid_field] = pid_creator(recjson)
        else:
            # Pid found - check if it should be minted
            if existing_pid_checker and existing_pid_checker(pid, recjson):
                pid_text = pid

        # Create an assign pid internally - actually registration will happen
        # asynchronously later.
        if pid_text:
            current_app.logger.info("Registering pid %s" % pid_text)
            pid_obj = PersistentIdentifier.create(pid_store_type, pid_text)
            if pid_obj is None:
                pid_obj = PersistentIdentifier.get(pid_store_type, pid_text)

            try:
                pid_obj.assign("rec", recjson['recid'])
            except Exception:
                register_exception(alert_admin=True)

        d.update()
Exemple #7
0
    def create_deposit_and_record(pid_value, owner):
        """Utility function for creating records and deposits."""
        recid = PersistentIdentifier.create(
            'recid', pid_value, status=PIDStatus.RESERVED)
        pv = PIDVersioning(parent=conceptrecid)
        pv.insert_draft_child(recid)

        depid = PersistentIdentifier.create(
            'depid', pid_value, status=PIDStatus.REGISTERED)
        deposit = ZenodoRecord.create({'_deposit': {'id': depid.pid_value},
                                       'conceptrecid': conceptrecid.pid_value,
                                       'recid': recid.pid_value})
        deposit.commit()
        depid.assign('rec', deposit.id)

        record_metadata = deepcopy(minimal_record)
        record_metadata['_deposit'] = {'id': depid.pid_value}
        record_metadata['conceptrecid'] = conceptrecid.pid_value
        record_metadata['recid'] = int(recid.pid_value)
        record_metadata['owners'] = [owner.id]
        record = ZenodoRecord.create(record_metadata)
        zenodo_record_minter(record.id, record)
        record.commit()

        return (depid, deposit, recid, record)
def setup_record_fixture(app):
    """Setup a record fixture."""
    records = []

    def _create_pid(record):
        pid = PersistentIdentifier.create(
            'recid', record['recid'], pid_provider='recid')
        pid.assign('rec', record['recid'])
        pid.register()

    with before_record_insert.connected_to(_create_pid):
        with app.app_context():
            records.append(Record.create(
                {'title': 'Test record 1', 'recid': 1},
                identifier_key='recid'
            ))
            records.append(Record.create(
                {'title': 'Test record 2', 'recid': 2},
                identifier_key='recid'
            ))
            pid = PersistentIdentifier.create('recid', 3, pid_provider='recid')
            db.session.add(pid)
            db.session.commit()

            pid = PersistentIdentifier.get('recid', 2, pid_provider='recid')
            pid.delete()
            db.session.commit()

    return records
def add_drafts_from_file(file_path, schema,
                         egroup=None, user=None, limit=None):
    """Adds drafts from a specified file.

    Drafts with specified pid will be registered under those.
    For drafts without pid, new pids will be minted.
    """
    with open(file_path, 'r') as fp:
        entries = json.load(fp)

        for entry in entries[0:limit]:
            data = construct_draft_obj(schema, entry)
            pid = cap_deposit_fetcher(None, data)
            pid_value = pid.pid_value if pid else None

            try:
                PersistentIdentifier.get('depid', pid_value)

                print('Draft with id {} already exist!'.format(pid_value))

            except PIDDoesNotExistError:
                record_uuid = uuid.uuid4()
                pid = cap_deposit_minter(record_uuid, data)
                if user:
                    user = User.query.filter_by(email=user).one()
                if egroup:
                    role = Role.query.filter_by(name=egroup).one()
                deposit = CAPDeposit.create(data, record_uuid, user)
                deposit.commit()

                if egroup:
                    add_read_permission_for_egroup(deposit, egroup)

                print('Draft {} added.'.format(pid.pid_value))
def test_appoint_profile_from_claimed_signature(small_app):
    """Check the module for the case where claimed signature takes
    everything.
    """
    from inspirehep.modules.disambiguation.tasks import disambiguation_clustering, update_authors_recid

    old_record_id = str(PersistentIdentifier.get("literature", 11883).object_uuid)
    old_record = get_es_record_by_uuid(old_record_id)
    old_author_uuid = old_record["authors"][0]["uuid"]

    # Add phonetic block to the record.
    old_record["authors"][0]["signature_block"] = "HAGp"
    old_record["authors"][0]["recid"] = "2"
    es.index(index="records-hep", doc_type="hep", id=old_record_id, body=old_record)
    es.indices.refresh("records-hep")

    record_id = str(PersistentIdentifier.get("literature", 1358492).object_uuid)
    record = get_es_record_by_uuid(record_id)
    author_uuid = record["authors"][0]["uuid"]

    # Add phonetic block to the record.
    record["authors"][0]["signature_block"] = "HAGp"
    record["authors"][0]["recid"] = "314159265"
    record["authors"][0]["curated_relation"] = True
    es.index(index="records-hep", doc_type="hep", id=record_id, body=record)
    es.indices.refresh("records-hep")

    with patch("celery.current_app.send_task", return_value=_BeardObject(({"2": [old_author_uuid, author_uuid]}, {}))):
        with patch(
            "inspirehep.modules.disambiguation.tasks.update_authors_recid.delay", side_effect=update_authors_recid
        ):
            disambiguation_clustering("HAGp")

    assert Record.get_record(old_record_id)["authors"][0]["recid"] == "314159265"
    assert Record.get_record(record_id)["authors"][0]["recid"] == "314159265"
Exemple #11
0
def test_record_page(app, db, es, event_queues, full_record):
    """Test record page views."""
    full_record['conceptdoi'] = '10.1234/foo.concept'
    full_record['conceptrecid'] = 'foo.concept'
    r = Record.create(full_record)
    PersistentIdentifier.create(
        'recid', '12345', object_type='rec', object_uuid=r.id,
        status=PIDStatus.REGISTERED)
    db.session.commit()

    with app.test_client() as client:
        record_url = url_for('invenio_records_ui.recid', pid_value='12345')
        assert client.get(record_url).status_code == 200

    process_events(['record-view'])
    current_search.flush_and_refresh(index='events-stats-record-view')

    search = Search(using=es, index='events-stats-record-view')
    assert search.count() == 1
    doc = search.execute()[0]
    assert doc['doi'] == '10.1234/foo.bar'
    assert doc['conceptdoi'] == '10.1234/foo.concept'
    assert doc['recid'] == '12345'
    assert doc['conceptrecid'] == 'foo.concept'
    assert doc['resource_type'] == {'type': 'publication', 'subtype': 'book'}
    assert doc['access_right'] == 'open'
    assert doc['communities'] == ['zenodo']
    assert doc['owners'] == [1]
def test_register(logger, app):
    """Test pid register."""
    with app.app_context():
        i = 1
        for s in [PIDStatus.NEW, PIDStatus.RESERVED]:
            pid = PersistentIdentifier.create('rec', str(i), status=s)
            i += 1
            assert pid.register()
            assert logger.info.call_args[0][0].startswith(
                "Registered PID")
        for s in [PIDStatus.REGISTERED, PIDStatus.DELETED,
                  PIDStatus.REDIRECTED]:
            pid = PersistentIdentifier.create('rec', str(i), status=s)
            i += 1
            pytest.raises(PIDInvalidAction, pid.register)

        # Test logging of bad errors.
        pid = PersistentIdentifier.create('rec', str(i),
                                          status=PIDStatus.RESERVED)
        with patch('invenio_pidstore.models.db.session.begin_nested') as mock:
            mock.side_effect = SQLAlchemyError()
            pytest.raises(SQLAlchemyError, pid.register)
            assert logger.exception.call_args[0][0].startswith(
                "Failed to register")
            assert 'pid' in logger.exception.call_args[1]['extra']
def test_app_fixture_lacks_db_isolation_step2(pids_count, app):
    assert PersistentIdentifier.query.count() == pids_count + 1
    # Force the cleanup.
    PersistentIdentifier.get(
        pid_type='type1',
        pid_value='value1',
    ).delete()
Exemple #14
0
    def create_pids(cls, dump, deposit):
        """Create a persistent identifiers."""
        # Mark deposit deleted if recid is deleted.
        recid = dump.recid_pid
        # Create depid
        depid = PersistentIdentifier.create(
            pid_type='depid',
            pid_value=str(dump.depid),
            object_type='rec',
            object_uuid=deposit.id,
            status=PIDStatus.REGISTERED
        )
        if recid and recid.status == PIDStatus.DELETED:
            depid.delete()
        if RecordIdentifier.query.get(dump.depid) is None:
            RecordIdentifier.insert(dump.depid)

        # Pre-reserved recid.
        if not recid and dump.recid:
            if dump.has_pid:
                # Published deposit without a recid (this is an upload which
                # never got ingested so we set it back to draft status and
                # reserves the reid).
                pass
            recid = PersistentIdentifier.create(
                pid_type='recid',
                pid_value=str(dump.recid),
                status=PIDStatus.RESERVED
            )
            if RecordIdentifier.query.get(dump.recid) is None:
                RecordIdentifier.insert(dump.recid)

        return depid, recid
def test_delete(logger, app):
    """Test pid delete."""
    with app.app_context():
        i = 1
        for s in [PIDStatus.RESERVED, PIDStatus.RESERVED,
                  PIDStatus.REDIRECTED, PIDStatus.DELETED]:
            pid = PersistentIdentifier.create('rec', str(i), status=s)
            i += 1
            assert pid.delete()
            assert logger.info.call_args[0][0] == "Deleted PID."

        # New persistent identifiers are removed completely
        count = PersistentIdentifier.query.count()
        pid = PersistentIdentifier.create('rec', str(i), status=PIDStatus.NEW)
        db.session.commit()
        assert PersistentIdentifier.query.count() == count + 1
        pid.delete()
        assert PersistentIdentifier.query.count() == count
        assert logger.info.call_args[0][0] == "Deleted PID (removed)."

        pid = PersistentIdentifier.create('rec', str(i+1))
        with patch('invenio_pidstore.models.db.session.begin_nested') as mock:
            mock.side_effect = SQLAlchemyError()
            pytest.raises(SQLAlchemyError, pid.delete)
            assert logger.exception.call_args[0][0].startswith(
                "Failed to delete")
            assert 'pid' in logger.exception.call_args[1]['extra']
Exemple #16
0
def grant_records(db, funder_record):
    """Create grant records."""
    grants = [
        Record.create(dict(
            internal_id='10.13039/501100000780::282896',
            funder={'$ref': 'https://dx.doi.org/10.13039/501100000780'},
            identifiers=dict(
                eurepo='info:eu-repo/grantAgreement/EC/FP7/282896',
            ),
            code='282896',
            title='Open Access Research Infrastructure in Europe',
            acronym='OpenAIREplus',
            program='FP7',
        )),
        Record.create(dict(
            internal_id='10.13039/501100000780::027819',
            funder={'$ref': 'https://dx.doi.org/10.13039/501100000780'},
            identifiers=dict(
                eurepo='info:eu-repo/grantAgreement/EC/FP6/027819',
            ),
            code='027819',
            title='Integrating cognition, emotion and autonomy',
            acronym='ICEA',
            program='FP6',
        )),
    ]
    for g in grants:
        PersistentIdentifier.create(
            pid_type='grant', pid_value=g['internal_id'], object_type='rec',
            object_uuid=g.id, status='R')
    db.session.commit()
    return grants
Exemple #17
0
    def delete(self, delete_published=False, *args, **kwargs):
        """Delete the deposit.

        :param delete_published: If True, even deposit of a published record
            will be deleted (usually used by admin operations).
        :type delete_published: bool
        """
        is_published = self['_deposit'].get('pid')
        if is_published and not delete_published:
            raise PIDInvalidAction()

        # Delete the recid
        recid = PersistentIdentifier.get(
            pid_type='recid', pid_value=self['recid'])

        versioning = PIDVersioning(child=recid)
        if versioning.exists:
            if versioning.draft_child and \
                    self.pid == versioning.draft_child_deposit:
                versioning.remove_draft_child()
            if versioning.last_child:
                index_siblings(versioning.last_child,
                               children=versioning.children.all(),
                               include_pid=True,
                               neighbors_eager=True,
                               with_deposits=True)

        if recid.status == PIDStatus.RESERVED:
            db.session.delete(recid)

        if 'conceptrecid' in self:
            concept_recid = PersistentIdentifier.get(
                pid_type='recid', pid_value=self['conceptrecid'])
            if concept_recid.status == PIDStatus.RESERVED:
                db.session.delete(concept_recid)

        # Completely remove bucket
        bucket = self.files.bucket
        with db.session.begin_nested():
            # Remove Record-Bucket link
            RecordsBuckets.query.filter_by(record_id=self.id).delete()
            mp_q = MultipartObject.query_by_bucket(bucket)
            # Remove multipart objects
            Part.query.filter(
                Part.upload_id.in_(mp_q.with_entities(
                    MultipartObject.upload_id).subquery())
            ).delete(synchronize_session='fetch')
            mp_q.delete(synchronize_session='fetch')
        bucket.locked = False
        bucket.remove()

        depid = kwargs.get('pid', self.pid)
        if depid:
            depid.delete()

        # NOTE: We call the parent of Deposit, invenio_records.api.Record since
        # we need to completely override eveything that the Deposit.delete
        # method does.
        return super(Deposit, self).delete(*args, **kwargs)
def generate_recid(experiment):
    """CAP Pid generator."""
    while True:
        pid_value = random_pid(experiment)
        try:
            PersistentIdentifier.get('recid', pid_value)
        except PIDDoesNotExistError:
            return pid_value
def generate_doi(prefix, experiment=None):
    """Generate random DOI, unique within PIDStore."""
    while True:
        doi = random_doi(prefix, experiment)
        try:
            PersistentIdentifier.get('doi', doi)
        except PIDDoesNotExistError:
            return doi
def test_delete_with_sqldatabase_error(app):
    """Test VALID record delete request (GET .../records/<record_id>)."""
    with app.app_context():
        # create the record using the internal API
        pid, record = create_record(test_data)
        db.session.expire(record.model)
        pid_value = pid.pid_value
        pid_type = pid.pid_type
        record_id = record.id

        db.session.commit()
        Record.get_record(record_id)

        def raise_exception():
            raise SQLAlchemyError()

        with app.test_client() as client:
            # start a new SQLAlchemy session so that it will rollback
            # everything
            nested_transaction = db.session().transaction
            orig_rollback = nested_transaction.rollback
            flags = {'rollbacked': False}

            def custom_rollback(*args, **kwargs):
                flags['rollbacked'] = True
                orig_rollback(*args, **kwargs)
            nested_transaction.rollback = custom_rollback

            with patch.object(PersistentIdentifier, 'delete',
                              side_effect=raise_exception):
                headers = [('Accept', 'application/json')]
                res = client.delete(url_for('invenio_records_rest.recid_item',
                                            pid_value=pid_value),
                                    headers=headers)
                assert res.status_code == 500
            # check that the transaction is finished
            assert db.session().transaction is not nested_transaction
            # check that the session has rollbacked
            assert flags['rollbacked']

    with app.app_context():
        with app.test_client() as client:
            # check that the record and PID have not been deleted
            Record.get_record(record_id)
            assert not PersistentIdentifier.get(pid_type,
                                                pid_value).is_deleted()
            # try to delete without exception, the transaction should have been
            # rollbacked
            headers = [('Accept', 'application/json')]
            res = client.delete(url_for('invenio_records_rest.recid_item',
                                        pid_value=pid_value),
                                headers=headers)
            assert res.status_code == 204
            # check database state
            with pytest.raises(NoResultFound):
                Record.get_record(record_id)
            assert PersistentIdentifier.get(pid_type,
                                            pid_value).is_deleted()
def test_file_download_ui(app, objects):
    """Test get buckets."""
    app.config.update(dict(
        FILES_REST_PERMISSION_FACTORY=lambda *a, **kw: type(
            'Allow', (object, ), {'can': lambda self: True}
        )(),
        RECORDS_UI_DEFAULT_PERMISSION_FACTORY=None,  # No permission checking
        RECORDS_UI_ENDPOINTS=dict(
            recid=dict(
                pid_type='recid',
                route='/records/<pid_value>',
            ),
            recid_files=dict(
                pid_type='recid',
                route='/records/<pid_value>/files/<filename>',
                view_imp='invenio_records_files.utils:file_download_ui',
                record_class='invenio_records_files.api:Record',
            ),
        )
    ))
    InvenioRecordsUI(app)

    obj1 = objects[0]

    with app.test_request_context():
        # Record 1 - Live record
        rec_uuid = uuid.uuid4()
        PersistentIdentifier.create(
            'recid', '1', object_type='rec', object_uuid=rec_uuid,
            status=PIDStatus.REGISTERED)
        record = Record.create({
            'title': 'Registered',
            'recid': 1,
            '_files': [
                {'key': obj1.key, 'bucket': str(obj1.bucket_id),
                 'checksum': 'invalid'},
            ]
        }, id_=rec_uuid)
        RecordsBuckets.create(record=record.model, bucket=obj1.bucket)
        db.session.commit()

        main_url = url_for('invenio_records_ui.recid', pid_value='1')
        file_url = url_for(
            'invenio_records_ui.recid_files', pid_value='1', filename=obj1.key)
        no_file_url = url_for(
            'invenio_records_ui.recid_files', pid_value='1', filename='')
        invalid_file_url = url_for(
            'invenio_records_ui.recid_files', pid_value='1', filename='no')

    with app.test_client() as client:
        res = client.get(main_url)
        assert res.status_code == 200
        res = client.get(file_url)
        assert res.status_code == 200
        res = client.get(no_file_url)
        assert res.status_code == 404
        res = client.get(invalid_file_url)
        assert res.status_code == 404
def test_isolated_app_fixture_rollback(isolated_app):
    pids_count = PersistentIdentifier.query.count()

    PersistentIdentifier.create(
        pid_type='type1',
        pid_value='value1',
    )
    db.session.rollback()
    assert PersistentIdentifier.query.count() == pids_count
def test_funder_ep_resolving(app, db):
    """Test funder resolving through entry point-registered JSON resolver."""
    json1 = {"internal_id": "10.13039/001", "parent": "", "name": "Foo"}
    json2 = {"internal_id": "10.13039/002", "parent": {"$ref": "http://dx.doi.org/10.13039/001"}, "name": "Bar"}
    r1 = R.create(json1)
    PID.create("frdoi", json1["internal_id"], object_type="rec", object_uuid=r1.id, status=PIDStatus.REGISTERED)
    r2 = R.create(json2)
    PID.create("frdoi", json2["internal_id"], object_type="rec", object_uuid=r2.id, status=PIDStatus.REGISTERED)
    assert r2.replace_refs()["parent"] == json1
def test_app_fixture_lacks_db_isolation_step1(pids_count, app):
    assert PersistentIdentifier.query.count() == pids_count

    PersistentIdentifier.create(
        pid_type='type1',
        pid_value='value1',
    )
    # The #PIDs must have incremented.
    assert PersistentIdentifier.query.count() == pids_count + 1
 def missing_pids(self):
     """Filter persistent identifiers."""
     missing = []
     for p in self.pids:
         try:
             PersistentIdentifier.get(p.pid_type, p.pid_value)
         except PIDDoesNotExistError:
             missing.append(p)
     return missing
def test_tombstone(app):
    """Test tomstones."""
    with app.app_context():
        # OK PID
        pid_ok, record = create_record({'title': 'test'})

        # Deleted PID
        pid_del, record = create_record({'title': 'deleted'})
        pid_del.delete()

        # Missing object PID
        pid_noobj = PersistentIdentifier.create(
            'recid', '100', status=PIDStatus.REGISTERED)
        db.session.commit()

        # Redirected PID
        pid_red = PersistentIdentifier.create(
            'recid', '101', status=PIDStatus.REGISTERED)
        pid_red.redirect(pid_ok)

        # Redirect PID - different endpoint
        pid_doi = PersistentIdentifier.create(
            'doi', '10.1234/foo', status=PIDStatus.REGISTERED)
        pid_red_doi = PersistentIdentifier.create(
            'recid', '102', status=PIDStatus.REGISTERED)
        pid_red_doi.redirect(pid_doi)
        db.session.commit()

        with app.test_client() as client:
            # PID deleted
            headers = [('Accept', 'application/json')]
            res = client.get(
                url_for('invenio_records_rest.recid_item',
                        pid_value=pid_del.pid_value),
                headers=headers)
            assert res.status_code == 410

            # PID missing object
            res = client.get(
                url_for('invenio_records_rest.recid_item',
                        pid_value=pid_noobj.pid_value),
                headers=headers)
            assert res.status_code == 500

            # Redirected invalid endpoint
            res = client.get(
                url_for('invenio_records_rest.recid_item',
                        pid_value=pid_red_doi.pid_value),
                headers=headers)
            assert res.status_code == 500

            # Redirected
            res = client.get(
                url_for('invenio_records_rest.recid_item',
                        pid_value=pid_red.pid_value),
                headers=headers)
            assert res.status_code == 301
def test_solve_claim_conflicts(small_app):
    """Check the module for the case where at least two claimed
    signatures are assigned to the same cluster.
    """
    from inspirehep.modules.disambiguation.tasks import disambiguation_clustering, update_authors_recid

    # Claimed signature #1.
    glashow_record_id_claimed = str(PersistentIdentifier.get("literature", 4328).object_uuid)
    glashow_record_claimed = get_es_record_by_uuid(glashow_record_id_claimed)
    glashow_record_uuid_claimed = glashow_record_claimed["authors"][0]["uuid"]

    # Add phonetic block to the record.
    glashow_record_claimed["authors"][0]["signature_block"] = "HAGp"
    glashow_record_claimed["authors"][0]["curated_relation"] = True
    glashow_record_claimed["authors"][0]["recid"] = "3"
    es.index(index="records-hep", doc_type="hep", id=glashow_record_id_claimed, body=glashow_record_claimed)
    es.indices.refresh("records-hep")

    # Claimed signature #2.
    higgs_record_id_claimed = str(PersistentIdentifier.get("literature", 1358492).object_uuid)
    higgs_record_claimed = get_es_record_by_uuid(higgs_record_id_claimed)
    higgs_record_uuid_claimed = higgs_record_claimed["authors"][0]["uuid"]

    # Add phonetic block to the record.
    higgs_record_claimed["authors"][0]["signature_block"] = "HAGp"
    higgs_record_claimed["authors"][0]["curated_relation"] = True
    higgs_record_claimed["authors"][0]["recid"] = "4"
    es.index(index="records-hep", doc_type="hep", id=higgs_record_id_claimed, body=higgs_record_claimed)
    es.indices.refresh("records-hep")

    # Not claimed signature.
    higgs_record_id_not_claimed = str(PersistentIdentifier.get("literature", 11883).object_uuid)
    higgs_record_not_claimed = get_es_record_by_uuid(higgs_record_id_not_claimed)
    higgs_record_uuid_not_claimed = higgs_record_not_claimed["authors"][0]["uuid"]

    # Add phonetic block to the record.
    higgs_record_not_claimed["authors"][0]["signature_block"] = "HAGp"
    es.index(index="records-hep", doc_type="hep", id=higgs_record_id_not_claimed, body=higgs_record_not_claimed)
    es.indices.refresh("records-hep")

    with patch(
        "celery.current_app.send_task",
        return_value=_BeardObject(
            ({"3": [glashow_record_uuid_claimed, higgs_record_uuid_claimed, higgs_record_uuid_not_claimed]}, {})
        ),
    ):
        with patch(
            "inspirehep.modules.disambiguation.logic._solve_claims_conflict",
            return_value=_ConflictObject({higgs_record_uuid_claimed: [higgs_record_uuid_not_claimed]}),
        ):
            with patch(
                "inspirehep.modules.disambiguation.tasks.update_authors_recid.delay", side_effect=update_authors_recid
            ):
                disambiguation_clustering("HAGp")

    assert Record.get_record(higgs_record_id_not_claimed)["authors"][0]["recid"] == "4"
def test_file_download_ui(base_app, objects, db):
    """Test get buckets."""
    app = base_app
    app.config.update(dict(
        RECORDS_UI_DEFAULT_PERMISSION_FACTORY=None,  # No permission checking
        RECORDS_UI_ENDPOINTS=dict(
            recid=dict(
                pid_type='recid',
                route='/records/<pid_value>',
            ),
            recid_files=dict(
                pid_type='recid',
                route='/records/<pid_value>/files/<filename>',
                view_imp='invenio_files_rest.views.file_download_ui',
            ),
        )
    ))
    InvenioRecords(app)
    InvenioPIDStore(app)
    InvenioRecordsUI(app)

    obj1 = objects[0]

    with app.app_context():
        # Record 1 - Live record
        rec_uuid = uuid.uuid4()
        PersistentIdentifier.create(
            'recid', '1', object_type='rec', object_uuid=rec_uuid,
            status=PIDStatus.REGISTERED)
        Record.create({
            'title': 'Registered',
            'recid': 1,
            'files': [
                {'filename': obj1.key, 'bucket': str(obj1.bucket_id),
                 'checksum': 'invalid'},
            ]
        }, id_=rec_uuid)
        db.session.commit()

        main_url = url_for('invenio_records_ui.recid', pid_value='1')
        file_url = url_for(
            'invenio_records_ui.recid_files', pid_value='1', filename=obj1.key)
        no_file_url = url_for(
            'invenio_records_ui.recid_files', pid_value='1', filename='')
        invalid_file_url = url_for(
            'invenio_records_ui.recid_files', pid_value='1', filename='no')

    with app.test_client() as client:
        res = client.get(main_url)
        assert res.status_code == 200
        res = client.get(file_url)
        assert res.status_code == 200
        res = client.get(no_file_url)
        assert res.status_code == 404
        res = client.get(invalid_file_url)
        assert res.status_code == 404
def test_filter_uuid(app):
    """Test FilterUUID."""
    with app.app_context():
        myuuid = uuid.uuid4()
        PersistentIdentifier.create(
            'doi', '10.1234/a', object_type='tst', object_uuid=myuuid)

        query = FilterUUID(PersistentIdentifier.object_uuid, 'Test').apply(
            PersistentIdentifier.query, str(myuuid), None)
        assert query.count() == 1
Exemple #30
0
def test_file_permissions(app, db, test_object,  # fixtures
                          user, access_right, expected):
    """Test file permissions."""
    # Create test users
    admin = User(email='*****@*****.**', password='******')
    owner = User(email='*****@*****.**', password='******')
    auth = User(email='*****@*****.**', password='******')
    db.session.add_all([admin, owner, auth])
    db.session.add(
        ActionUsers.allow(ActionNeed('admin-access'), user=admin)
    )

    # Create test record
    rec_uuid = uuid.uuid4()
    PersistentIdentifier.create(
        'recid',
        '1',
        object_type='rec',
        object_uuid=rec_uuid,
        status=PIDStatus.REGISTERED
    )
    Record.create({
        'recid': 1,
        'owners': [2],
        'access_right': access_right,
        '_files': [
            {
                'key': test_object.key,
                'bucket': str(test_object.bucket_id),
                'checksum': 'invalid'
            },
        ]
    }, id_=rec_uuid)
    db.session.add(
        RecordsBuckets(record_id=rec_uuid, bucket_id=test_object.bucket_id)
    )

    file_url = url_for(
        'invenio_records_ui.recid_files',
        pid_value='1',
        filename=test_object.key
    )

    db.session.commit()

    with app.test_client() as client:
        if user:
            # Login as user
            with client.session_transaction() as sess:
                sess['user_id'] = User.query.filter_by(
                    email='{}@zenodo.org'.format(user)).one().id
                sess['_fresh'] = True

        res = client.get(file_url)
        assert res.status_code == expected
Exemple #31
0
    def has_record(self, record, pid=None, scope='any'):
        """Check if record is in a community.

        :type scope: str
        :param scope: Can take values 'any', 'all' or 'this'.
            * 'all': returns True if all record versions are in the community.
            * 'any': returns True if any of the record versions are in the
                community.
            * 'this': returns if the specified 'record' is in the community.
        """
        if not pid:
            pid = PersistentIdentifier.get('recid', record['recid'])

        pv = PIDVersioning(child=pid)
        if scope == 'this':
            return self.community.has_record(record)
        q = (self.community.has_record(
            ZenodoRecord.get_record(p.get_assigned_object()))
             for p in pv.children)
        if scope == 'all':
            return all(q)
        if scope == 'any':
            return any(q)
Exemple #32
0
def test_delete_member(reindex, db, minimal_organisation_record,
                       minimal_member_record):
    """Test OrganisationsMembers delete."""
    org = OrganisationWithMembers.create(minimal_organisation_record,
                                         dbcommit=True)
    member = MemberWithLocations.create(minimal_member_record, dbcommit=True)
    org.add_member(member, dbcommit=True)
    pid = PersistentIdentifier.get_by_object('memb', 'rec', member.id)
    assert pid.is_registered()
    org.remove_member(member)
    assert pid.is_deleted()
    assert org.members == []

    member1 = MemberWithLocations.create(minimal_member_record, dbcommit=True)
    org.add_member(member1, dbcommit=True)
    member2 = MemberWithLocations.create(minimal_member_record, dbcommit=True)
    org.add_member(member2, dbcommit=True)
    member3 = MemberWithLocations.create(minimal_member_record, dbcommit=True)
    org.add_member(member3, dbcommit=True)
    org.remove_member(member2)
    assert len(org.members) == 2
    assert org.members[0]['pid'] == '2'
    assert org.members[1]['pid'] == '4'
    def test_apa_citation(self, config, create_record):
        """Integration test with the citation serializer.

        This validates we are passing the right input and getting a citation
        from the underlying library. Formatting of the citation is left to the
        3rd-party citeproc-py library.
        """
        record = create_record({
            'authors': [{
                'first_name': 'Jane',
                'middle_name': 'Rachel',
                'last_name': 'Doe',
                'full_name': 'Doe, Jane Rachel'
            }, {
                'first_name': 'John',
                'last_name': 'Smith',
                'full_name': 'Smith, John'
            }],
            'resource_type': {
                'general': 'dataset',
                'specific': 'dataset',
                'full_hierarchy': ['dataset']
            }
        })
        record['doi'] = '10.5072/qwer-tyui'
        pid = PersistentIdentifier.get(
            record['_deposit']['pid']['type'],
            record['_deposit']['pid']['value'],
        )

        citation_str = citeproc_v1.serialize(pid, record, style='apa')

        assert citation_str == ("Doe, J., & Smith, J. ({year}). "
                                "A title [Data set]. {publisher}. "
                                "http://doi.org/10.5072/qwer-tyui".format(
                                    year=from_isodate(record.created).year,
                                    publisher=config['DOI_PUBLISHER']))
def merge_articles(obj, eng):
    """Merge two articles.

    The workflow payload is overwritten by the merged record, the conflicts are
    stored in ``extra_data.conflicts``. Also, it adds a ``callback_url`` which
    contains the endpoint which resolves the merge conflicts.

    Note:
        For the time being the ``root`` will be ignored, and we'll rely only
        on the ``head``, hence it is a rootless implementation. Also when
        the feature flag ``FEATURE_FLAG_ENABLE_MERGER`` is ``False`` it
        will skip the merge.

    """
    if not current_app.config.get('FEATURE_FLAG_ENABLE_MERGER'):
        return None

    matched_control_number = obj.extra_data['matches']['approved']

    head_uuid = PersistentIdentifier.get('lit',
                                         matched_control_number).object_uuid

    obj.extra_data['head_uuid'] = str(head_uuid)

    head = InspireRecord.get_record(head_uuid)
    root = {}
    update = obj.data

    merged, conflicts = merge(head=head.dumps(), root=root, update=update)

    obj.data = merged

    if conflicts:
        obj.extra_data['conflicts'] = conflicts
        obj.extra_data['callback_url'] = \
            get_resolve_merge_conflicts_callback_url()
    obj.save()
Exemple #35
0
def load_github_releases(releases_file):
    """Load GitHub releases information.

    Updates the missing releases and the submission dates.
    """
    from invenio_github.models import Release, Repository, ReleaseStatus
    from sqlalchemy.orm.exc import NoResultFound
    import arrow
    import json
    releases_db = json.load(releases_file)
    with click.progressbar(releases_db) as releases:
        for release in releases:
            repo_name, new_repo_name, gh_repo_id, ra_id, user_id, dep = release
            try:
                repo = Repository.query.filter_by(github_id=gh_repo_id).one()
            except NoResultFound:
                repo = Repository.create(user_id=user_id,
                                         github_id=gh_repo_id,
                                         name=new_repo_name)
            pid = PersistentIdentifier.get(pid_type='recid',
                                           pid_value=str(dep['record_id']))
            rel = Release.query.filter_by(
                repository_id=repo.id,
                record_id=pid.get_assigned_object()).first()
            created = arrow.get(dep['submitted']).datetime.replace(tzinfo=None)
            if rel:
                rel.created = created
            else:
                rel = Release(tag=dep['github_ref'],
                              errors=dep['errors'],
                              record_id=pid.get_assigned_object(),
                              repository_id=repo.id,
                              status=ReleaseStatus.PUBLISHED,
                              created=created)
                db.session.add(rel)
        db.session.commit()
Exemple #36
0
    def accept_record(self, record, pid=None):
        """Accept the record and all of its versions into the community.

        :type record: zenodo.modules.records.api.ZenodoRecord
        :param pid: PID of type 'recid'
        :type pid: invenio_pidstore.models.PersistentIdentifier
        """
        if not pid:
            pid = PersistentIdentifier.get('recid', record['recid'])
        with db.session.begin_nested():
            pending_q = self.get_comm_irs(record, pid=pid)
            if not pending_q.count():
                raise InclusionRequestMissingError(community=self,
                                                   record=record)

            pv = PIDVersioning(child=pid)
            for child in pv.children.all():
                rec = ZenodoRecord.get_record(child.get_assigned_object())
                self.community.add_record(rec)
                rec.commit()

                if request:

                    @after_this_request
                    def send_signals(response):
                        try:
                            record_accepted.send(
                                current_app._get_current_object(),
                                record_id=rec.id,
                                community_id=self.community.id,
                            )
                        except Exception:
                            pass
                        return response

            pending_q.delete(synchronize_session=False)
Exemple #37
0
def rerodoc_redirection(pid, filename=None):
    """Redirection to document with identifier from RERODOC.

    :param pid: PID from RERODOC.
    :returns: A redirection to record's detail page or 404 if not found.
    """
    try:
        pid = PersistentIdentifier.get('rerod', pid)
    except Exception:
        abort(404)

    # Files URLs does not contains view
    if filename:
        return redirect(
            url_for('invenio_records_ui.doc_files',
                    pid_value=pid.get_redirect().pid_value,
                    filename=filename))
    doc_pid = pid.get_redirect().pid_value
    doc = DocumentRecord.get_record_by_pid(doc_pid)
    if doc:
        doc = doc.replace_refs()
        orgs = doc.get('organisation', [])
        # In case of multiple organisations we redirect to the global view
        if len(orgs) == 1:
            org = orgs.pop()
            # Only for dedicated or shared
            if org.get('isDedicated') or org.get('isShared'):
                return redirect(
                    url_for('invenio_records_ui.doc',
                            view=org.get('code'),
                            pid_value=pid.get_redirect().pid_value))
    global_view = current_app.config.get('SONAR_APP_DEFAULT_ORGANISATION')
    return redirect(
        url_for('invenio_records_ui.doc',
                view=global_view,
                pid_value=pid.get_redirect().pid_value))
Exemple #38
0
def test_datacite_register_fail(mocker, app, db, es, minimal_record):
    # Make the datacite API unavailable
    dc_mock = mocker.patch(
        'invenio_pidstore.providers.datacite.DataCiteMDSClient')
    dc_mock().metadata_post.side_effect = datacite.errors.HttpError()

    # Create a reserved recid
    record = Record.create(minimal_record)
    record_uuid = record.id
    recid = record['recid']
    recid_pid = PersistentIdentifier.create(
        'recid', recid, status=PIDStatus.RESERVED)

    # Mint the record
    zenodo_record_minter(record_uuid, record)
    record.commit()
    db.session.commit()

    with pytest.raises(datacite.errors.HttpError):
        datacite_register.apply((recid_pid.pid_value, str(record_uuid)))

    # Check that the task was retried ("max_retries" + 1) times
    dc_calls = len(dc_mock().metadata_post.mock_calls)
    assert dc_calls == datacite_register.max_retries + 1
Exemple #39
0
    def clean(self, deposit_id, version_id, *args, **kwargs):
        """Undo metadata extraction."""
        # 1. Revert patch on record
        recid = str(PersistentIdentifier.get(
            'depid', deposit_id).object_uuid)
        patch = [{
            'op': 'remove',
            'path': '/_cds/extracted_metadata',
        }]
        validator = 'cds.modules.records.validators.PartialDraft4Validator'
        try:
            patch_record(recid=recid, patch=patch, validator=validator)
        except jsonpatch.JsonPatchConflict as c:
            logger.warning(
                'Failed to apply JSON Patch to deposit {0}: {1}'.format(
                    recid, c))

        # Delete tmp file if any
        obj = as_object_version(version_id)
        temp_location = obj.get_tags().get('temp_location', None)
        if temp_location:
            shutil.rmtree(temp_location)
            ObjectVersionTag.delete(obj, 'temp_location')
            db.session.commit()
Exemple #40
0
def zenodo_record_minter(record_uuid, data):
    """Zenodo record minter.

    Mint, or register if previously minted, the Concept RECID and RECID.
    Mint the Concept DOI and DOI.
    """
    if 'conceptrecid' not in data:
        zenodo_concept_recid_minter(record_uuid, data)

    if 'recid' in data:
        recid = PersistentIdentifier.get('recid', data['recid'])
        recid.assign('rec', record_uuid)
        recid.register()
    else:
        recid = RecordIdProvider.create(object_type='rec',
                                        object_uuid=record_uuid).pid
        data['recid'] = int(recid.pid_value)

    zenodo_doi_minter(record_uuid, data)
    oaiid_minter(record_uuid, data)

    if 'conceptdoi' not in data:
        zenodo_concept_doi_minter(record_uuid, data)
    return recid
Exemple #41
0
    def delete(self, force=True, pid=None):
        """Delete deposit.

        Status required: ``'draft'``.

        :param force: Force deposit delete.  (Default: ``True``)
        :param pid: Force pid object.  (Default: ``None``)
        :returns: A new Deposit object.
        """
        # Delete the recid
        recid = PersistentIdentifier.get(pid_type='recid',
                                         pid_value=self.pid.pid_value)

        if recid.status == PIDStatus.RESERVED:
            db.session.delete(recid)

        # if this item has been deleted
        self.delete_es_index_attempt(recid)

        # Completely remove bucket
        bucket = self.files.bucket
        with db.session.begin_nested():
            # Remove Record-Bucket link
            RecordsBuckets.query.filter_by(record_id=self.id).delete()
            mp_q = MultipartObject.query_by_bucket(bucket)
            # Remove multipart objects
            Part.query.filter(
                Part.upload_id.in_(
                    mp_q.with_entities(
                        MultipartObject.upload_id).subquery())).delete(
                            synchronize_session='fetch')
            mp_q.delete(synchronize_session='fetch')
        bucket.locked = False
        bucket.remove()

        return super(Deposit, self).delete()
Exemple #42
0
def test_minimal_record(app, minimal_record):
    """Test minimal record."""
    expected = {
        u'publication_distribution_imprint': [{
            'date_of_publication_distribution': (
                datetime.utcnow().date().isoformat())
        }],
        u'control_number': '123',
        u'information_relating_to_copyright_status': {
            'copyright_status': 'open'
        },
        u'summary': {
            'summary': 'My description'
        },
        u'main_entry_personal_name': {
            'personal_name': 'Test'
        },
        u'resource_type': {
            'type': 'software'
        },
        u'title_statement': {
            'title': 'Test'
        }
    }

    # Create record and pid.
    record = Record(minimal_record)
    pid = PersistentIdentifier(pid_type='recid', pid_value='2')
    assert record.validate() is None

    data = marcxml_v1.schema_class().dump(marcxml_v1.preprocess_record(
        pid=pid,
        record=record)).data
    assert_dict(expected, data)

    marcxml_v1.serialize(pid=pid, record=record)
Exemple #43
0
    def accept_record(self, record, pid=None):
        """Accept the record and all of its versions into the community.

        :type record: zenodo.modules.records.api.ZenodoRecord
        :param pid: PID of type 'recid'
        :type pid: invenio_pidstore.models.PersistentIdentifier
        """
        if not pid:
            pid = PersistentIdentifier.get('recid', record['recid'])
        with db.session.begin_nested():
            pending_q = self.get_comm_irs(record, pid=pid)
            if not pending_q.count():
                raise InclusionRequestMissingError(community=self,
                                                   record=record)

            pv = PIDVersioning(child=pid)
            for child in pv.children.all():
                rec = ZenodoRecord.get_record(child.get_assigned_object())
                # req = InclusionRequest.get(self.community.id, rec.id)
                # if req:
                #     req.delete()
                self.community.add_record(rec)
                rec.commit()
            pending_q.delete(synchronize_session=False)
Exemple #44
0
    def create(self,
               pid_value=None,
               pid_type=None,
               status=None,
               object_type=None,
               object_uuid=None,
               **kwargs):
        """Create a new instance for the given type and pid.

        :param pid_value: Persistent identifier value. (Default: None).
        :param pid_type: Persistent identifier type. (Default: None).
        :param status: Status for the created PID (Default:
            :attr:`invenio_pidstore.models.PIDStatus.NEW`).
        :param object_type: The object type is a string that identify its type.
            (Default: None).
        :param object_uuid: The object UUID. (Default: None).
        :returns: A :class:`invenio_pidstore.models.PersistentIdentifier`
            instance.
        """
        pid_type = pid_type or self.pid_type
        assert pid_type

        pid_value = pid_value or self._generate_id(**kwargs)
        assert pid_value

        status = status or self.default_status
        assert status

        return PersistentIdentifier.create(
            pid_type,
            pid_value,
            pid_provider=self.name,
            object_type=object_type,
            object_uuid=object_uuid,
            status=status,
        )
Exemple #45
0
def test_resolver_deleted_object(app, db):
    """Test the class methods of PersistentIdentifier class."""
    with app.app_context():
        rec_uuid = uuid.uuid4()
        records = {
            rec_uuid: {
                'title': 'test'
            },
        }
        with db.session.begin_nested():
            pid = PersistentIdentifier.create('recid',
                                              '1',
                                              status=PIDStatus.REGISTERED,
                                              object_type='rec',
                                              object_uuid=rec_uuid)

        with db.session.begin_nested():
            pid.delete()

        resolver = Resolver(pid_type='recid',
                            object_type='rec',
                            getter=records.get)

        assert pytest.raises(PIDDeletedError, resolver.resolve, '1')
def test_single_signature_with_no_profile(small_app):
    """Check the module for the case with a single, new signature."""
    from inspirehep.modules.disambiguation.tasks import (
        disambiguation_clustering, update_authors_recid)

    record_id = str(PersistentIdentifier.get('lit', 11883).object_uuid)
    record = get_es_record_by_uuid(record_id)
    author_uuid = record['authors'][0]['uuid']

    # Add phonetic block to the record.
    record['authors'][0]['signature_block'] = "HAGp"
    es.index(index='records-hep', doc_type='hep', id=record_id, body=record)
    es.indices.refresh('records-hep')

    with patch("celery.current_app.send_task",
               return_value=_BeardObject(({}, {
                   "0": [author_uuid]
               }))):
        with patch(
                "inspirehep.modules.disambiguation.tasks.update_authors_recid.delay",
                side_effect=update_authors_recid):
            disambiguation_clustering("HAGp")

    assert InspireRecord.get_record(record_id)['authors'][0]['recid'] == "1"
Exemple #47
0
def zenodo_deposit_minter(record_uuid, data):
    """Mint the DEPID, and reserve the Concept RECID and RECID PIDs."""
    if 'conceptrecid' not in data:
        zenodo_concept_recid_minter(data=data)

    recid = zenodo_reserved_record_minter(data=data)

    # Create depid with same pid_value of the recid
    depid = PersistentIdentifier.create(
        'depid',
        str(recid.pid_value),
        object_type='rec',
        object_uuid=record_uuid,
        status=PIDStatus.REGISTERED,
    )

    data.update({
        '_deposit': {
            'id': depid.pid_value,
            'status': 'draft',
        },
    })

    return depid
Exemple #48
0
 def pid(self):
     """Get the PersistentIdentifier for this record."""
     return PersistentIdentifier.get(pid_type=self._pid_type,
                                     pid_value=self["pid"])
Exemple #49
0
def pid_from_value(pid_value, pid_type='recid'):
    """Determine if DOI is managed locally."""
    try:
        return PersistentIdentifier.get(pid_type=pid_type, pid_value=pid_value)
    except Exception:
        pass
Exemple #50
0
def test_pid_creation(app):
    """Test pid creation."""
    runner = CliRunner()
    script_info = ScriptInfo(create_app=lambda info: app)

    with runner.isolated_filesystem():
        with app.app_context():
            assert PersistentIdentifier.query.count() == 0

        result = runner.invoke(cmd, [
            'create', 'doi', '10.1234/foo'
        ], obj=script_info)
        assert 0 == result.exit_code

        with app.app_context():
            assert PersistentIdentifier.query.count() == 1
            pid = PersistentIdentifier.get('doi', '10.1234/foo')
            assert pid.pid_type == 'doi'
            assert pid.pid_value == '10.1234/foo'
            assert pid.pid_provider is None
            assert pid.status == PIDStatus.NEW
            assert pid.object_type is None
            assert pid.object_uuid is None

        rec_uuid = uuid.uuid4()

        # Bad parameter status:
        result = runner.invoke(cmd, [
            'create', 'recid', '2', '--status', 'BADPARAMETER',
            '--type', 'rec', '--uuid', str(rec_uuid),
        ], obj=script_info)
        assert 2 == result.exit_code

        # Any or both type and uuid must be defined:
        result = runner.invoke(cmd, [
            'create', 'recid', '2',
            '--type', 'rec',
        ], obj=script_info)
        assert 2 == result.exit_code

        result = runner.invoke(cmd, [
            'create', 'recid', '2',
            '--uuid', str(rec_uuid),
        ], obj=script_info)
        assert 2 == result.exit_code

        # Everything should be fine now:
        result = runner.invoke(cmd, [
            'create', 'recid', '2', '--status', 'REGISTERED',
            '--type', 'rec', '--uuid', str(rec_uuid),
        ], obj=script_info)
        assert 0 == result.exit_code

        with app.app_context():
            assert PersistentIdentifier.query.count() == 2
            pid = PersistentIdentifier.get('recid', '2')
            assert pid.pid_type == 'recid'
            assert pid.pid_value == '2'
            assert pid.pid_provider is None
            assert pid.status == PIDStatus.REGISTERED
            assert pid.object_type == 'rec'
            assert pid.object_uuid == rec_uuid

        # Can't duplicate existing persistent identifier
        result = runner.invoke(cmd, [
            'create', 'recid', '2',
        ], obj=script_info)
        assert -1 == result.exit_code
Exemple #51
0
def test_pid_assign(app):
    """Test pid object assignment."""
    runner = CliRunner()
    script_info = ScriptInfo(create_app=lambda info: app)

    with runner.isolated_filesystem():
        # No assigned object
        result = runner.invoke(cmd, [
            'create', 'doi', '10.1234/foo'
        ], obj=script_info)
        assert 0 == result.exit_code

        with app.app_context():
            pid = PersistentIdentifier.get('doi', '10.1234/foo')
            assert not pid.has_object()
            assert pid.get_assigned_object() is None
            assert pid.get_assigned_object('rec') is None

        # Assign object
        rec_uuid = uuid.uuid4()
        result = runner.invoke(cmd, [
            'assign', 'doi', '10.1234/foo',
            '-t', 'rec', '-i', str(rec_uuid)
        ], obj=script_info)
        assert 0 == result.exit_code
        pid_status = result.output
        with app.app_context():
            pid = PersistentIdentifier.get('doi', '10.1234/foo')
            assert pid.has_object()
            assert pid.get_assigned_object() == rec_uuid
            assert pid.get_assigned_object('rec') == rec_uuid
            assert pid.get_assigned_object('oth') is None

        # Doesnt' raise
        result = runner.invoke(cmd, [
            'assign', 'doi', '10.1234/foo',
            '-t', 'rec', '-i', str(rec_uuid)
        ], obj=script_info)
        assert 0 == result.exit_code

        # Missing type or uuid:
        result = runner.invoke(cmd, [
            'assign', 'doi', '10.1234/foo',
        ], obj=script_info)
        assert 2 == result.exit_code

        result = runner.invoke(cmd, [
            'assign', 'doi', '10.1234/foo',
            '-t', 'rec',
        ], obj=script_info)
        assert 2 == result.exit_code

        result = runner.invoke(cmd, [
            'assign', 'doi', '10.1234/foo',
            '-i', str(rec_uuid),
        ], obj=script_info)
        assert 2 == result.exit_code

        # Assign without overwrite (uuid as str and uuid)
        new_uuid = uuid.uuid4()
        result = runner.invoke(cmd, [
            'assign', 'doi', '10.1234/foo',
            '-t', 'rec', '-i', str(new_uuid)
        ], obj=script_info)
        assert -1 == result.exit_code

        # Assign with overwrite
        result = runner.invoke(cmd, [
            'assign', 'doi', '10.1234/foo',
            '-s', 'REGISTERED',
            '-t', 'rec', '-i', str(new_uuid),
            '--overwrite'
        ], obj=script_info)
        assert 0 == result.exit_code

        with app.app_context():
            pid = PersistentIdentifier.get('doi', '10.1234/foo')
            assert pid.has_object()
            assert pid.status == PIDStatus.REGISTERED
            assert pid.get_assigned_object() == new_uuid
            assert pid.get_assigned_object('rec') == new_uuid
            assert pid.get_assigned_object('oth') is None
Exemple #52
0
 def get_persistent_identifier(cls, id):
     """Get Persistent Identifier."""
     return PersistentIdentifier.get_by_object(cls.provider.pid_type,
                                               cls.object_type, id)
Exemple #53
0
 def fetcher(obj_uuid, data):
     assert obj_uuid in ['a', 'b']
     return PersistentIdentifier(pid_type='doi', pid_value='a')
def get_db_record(pid_type, recid):
    from inspirehep.modules.records.api import InspireRecord
    pid = PersistentIdentifier.get(pid_type, recid)
    return InspireRecord.get_record(pid.object_uuid)
Exemple #55
0
    def publish(self):
        """Publish GitHub release as record."""
        id_ = uuid.uuid4()
        deposit_metadata = dict(self.metadata)
        deposit = None
        try:
            db.session.begin_nested()
            # TODO: Add filter on Published releases
            previous_releases = self.model.repository.releases.filter_by(
                status=ReleaseStatus.PUBLISHED)
            versioning = None
            stashed_draft_child = None
            if previous_releases.count():
                last_release = previous_releases.order_by(
                        Release.created.desc()).first()
                last_recid = PersistentIdentifier.get(
                    'recid', last_release.record['recid'])
                versioning = PIDVersioning(child=last_recid)
                last_record = ZenodoRecord.get_record(
                    versioning.last_child.object_uuid)
                deposit_metadata['conceptrecid'] = last_record['conceptrecid']
                if 'conceptdoi' not in last_record:
                    last_depid = PersistentIdentifier.get(
                        'depid', last_record['_deposit']['id'])
                    last_deposit = ZenodoDeposit.get_record(
                        last_depid.object_uuid)
                    last_deposit = last_deposit.registerconceptdoi()
                    last_recid, last_record = last_deposit.fetch_published()
                deposit_metadata['conceptdoi'] = last_record['conceptdoi']
                if versioning.draft_child:
                    stashed_draft_child = versioning.draft_child
                    versioning.remove_draft_child()

            deposit = self.deposit_class.create(deposit_metadata, id_=id_)

            deposit['_deposit']['created_by'] = self.event.user_id
            deposit['_deposit']['owners'] = [self.event.user_id]

            # Fetch the deposit files
            for key, url in self.files:
                # Make a HEAD request to get GitHub to compute the
                # Content-Length.
                res = self.gh.api.session.head(url, allow_redirects=True)
                # Now, download the file
                res = self.gh.api.session.get(url, stream=True,
                                              allow_redirects=True)
                if res.status_code != 200:
                    raise Exception(
                        "Could not retrieve archive from GitHub: {url}"
                        .format(url=url)
                    )

                size = int(res.headers.get('Content-Length', 0))
                ObjectVersion.create(
                    bucket=deposit.files.bucket,
                    key=key,
                    stream=res.raw,
                    size=size or None,
                    mimetype=res.headers.get('Content-Type'),
                )

            # GitHub-specific SIP store agent
            sip_agent = {
                '$schema': current_jsonschemas.path_to_url(
                    current_app.config['SIPSTORE_GITHUB_AGENT_JSONSCHEMA']),
                'user_id': self.event.user_id,
                'github_id': self.release['author']['id'],
                'email': self.gh.account.user.email,
            }
            deposit.publish(user_id=self.event.user_id, sip_agent=sip_agent)
            recid_pid, record = deposit.fetch_published()
            self.model.recordmetadata = record.model
            if versioning and stashed_draft_child:
                versioning.insert_draft_child(stashed_draft_child)
            record_id = str(record.id)
            db.session.commit()

            # Send Datacite DOI registration task
            datacite_register.delay(recid_pid.pid_value, record_id)

            # Index the record
            RecordIndexer().index_by_id(record_id)
        except Exception:
            db.session.rollback()
            # Remove deposit from index since it was not commited.
            if deposit and deposit.id:
                try:
                    RecordIndexer().delete(deposit)
                except Exception:
                    current_app.logger.exception(
                        "Failed to remove uncommited deposit from index.")
            raise
Exemple #56
0
 def recid(self):
     """Get RECID object for the Release record."""
     if self.record:
         return PersistentIdentifier.get('recid', str(self.record['recid']))
Exemple #57
0
def update_record_statistics(start_date=None, end_date=None):
    """Update "_stats" field of affected records."""
    start_date = dateutil_parse(start_date) if start_date else None
    end_date = dateutil_parse(end_date) if start_date else None
    aggr_configs = {}

    if not start_date and not end_date:
        start_date = datetime.utcnow()
        end_date = datetime.utcnow()

        for aggr_name in current_stats.enabled_aggregations:
            aggr_cfg = current_stats.aggregations[aggr_name]
            aggr = aggr_cfg.aggregator_class(name=aggr_cfg.name,
                                             **aggr_cfg.aggregator_config)

            if not Index(aggr.aggregation_alias, using=aggr.client).exists():
                if not Index(aggr.event_index, using=aggr.client).exists():
                    start_date = min(start_date, datetime.utcnow())
                else:
                    start_date = min(start_date,
                                     aggr._get_oldest_event_timestamp())

            # Retrieve the last two bookmarks
            bookmarks = Search(using=aggr.client,
                               index=aggr.aggregation_alias,
                               doc_type=aggr.bookmark_doc_type)[0:2].sort({
                                   'date': {
                                       'order': 'desc'
                                   }
                               }).execute()

            if len(bookmarks) >= 1:
                end_date = max(
                    end_date,
                    datetime.strptime(bookmarks[0].date, aggr.doc_id_suffix))
            if len(bookmarks) == 2:
                start_date = min(
                    start_date,
                    datetime.strptime(bookmarks[1].date, aggr.doc_id_suffix))

            aggr_configs[aggr.aggregation_alias] = aggr
    elif start_date and end_date:
        for aggr_name in current_stats.enabled_aggregations:
            aggr_cfg = current_stats.aggregations[aggr_name]
            aggr = aggr_cfg.aggregator_class(name=aggr_cfg.name,
                                             **aggr_cfg.aggregator_config)
            aggr_configs[aggr.aggregation_alias] = aggr
    else:
        return

    # Get conceptrecids for all the affected records between the two dates
    conceptrecids = set()
    for aggr_alias, aggr in aggr_configs.items():
        query = Search(
            using=aggr.client,
            index=aggr.aggregation_alias,
            doc_type=aggr.aggregation_doc_type,
        ).filter('range',
                 timestamp={
                     'gte':
                     start_date.replace(microsecond=0).isoformat() + '||/d',
                     'lte':
                     end_date.replace(microsecond=0).isoformat() + '||/d'
                 }).extra(_source=False)
        query.aggs.bucket('ids', 'terms', field='conceptrecid', size=0)
        conceptrecids |= {
            b.key
            for b in query.execute().aggregations.ids.buckets
        }

    indexer = RecordIndexer()
    for concpetrecid_val in conceptrecids:
        conceptrecid = PersistentIdentifier.get('recid', concpetrecid_val)
        pv = PIDVersioning(parent=conceptrecid)
        children_recids = pv.children.all()
        indexer.bulk_index([str(p.object_uuid) for p in children_recids])
Exemple #58
0
def license_record(db, sip_metadata_types):
    """Create a license record."""
    license = Record.create({
        "$schema":
        "https://zenodo.org/schemas/licenses/license-v1.0.0.json",
        "domain_content":
        True,
        "domain_data":
        True,
        "domain_software":
        True,
        "family":
        "",
        "id":
        "CC-BY-4.0",
        "maintainer":
        "Creative Commons",
        "od_conformance":
        "approved",
        "osd_conformance":
        "not reviewed",
        "status":
        "active",
        "title":
        "Creative Commons Attribution International 4.0",
        "url":
        "https://creativecommons.org/licenses/by/4.0/"
    })
    PersistentIdentifier.create(pid_type='od_lic',
                                pid_value=license['id'],
                                object_type='rec',
                                object_uuid=license.id,
                                status='R')
    license = Record.create({
        "$schema":
        "https://zenodo.org/schemas/licenses/license-v1.0.0.json",
        "domain_content":
        True,
        "domain_data":
        True,
        "domain_software":
        True,
        "family":
        "",
        "id":
        "CC0-1.0",
        "maintainer":
        "Creative Commons",
        "od_conformance":
        "approved",
        "osd_conformance":
        "not reviewed",
        "status":
        "active",
        "title":
        "CC0 1.0",
        "url":
        "https://creativecommons.org/publicdomain/zero/1.0/"
    })
    PersistentIdentifier.create(pid_type='od_lic',
                                pid_value=license['id'],
                                object_type='rec',
                                object_uuid=license.id,
                                status='R')
    db.session.commit()
    return license
def test_solve_claim_conflicts(small_app):
    """Check the module for the case where at least two claimed
    signatures are assigned to the same cluster.
    """
    from inspirehep.modules.disambiguation.tasks import (
        disambiguation_clustering, update_authors_recid)

    # Claimed signature #1.
    glashow_record_id_claimed = str(
        PersistentIdentifier.get('lit', 4328).object_uuid)
    glashow_record_claimed = get_es_record_by_uuid(glashow_record_id_claimed)
    glashow_record_uuid_claimed = glashow_record_claimed['authors'][0]['uuid']

    # Add phonetic block to the record.
    glashow_record_claimed['authors'][0]['signature_block'] = "HAGp"
    glashow_record_claimed['authors'][0]['curated_relation'] = True
    glashow_record_claimed['authors'][0]['recid'] = "3"
    es.index(index='records-hep',
             doc_type='hep',
             id=glashow_record_id_claimed,
             body=glashow_record_claimed)
    es.indices.refresh('records-hep')

    # Claimed signature #2.
    higgs_record_id_claimed = str(
        PersistentIdentifier.get('lit', 1358492).object_uuid)
    higgs_record_claimed = get_es_record_by_uuid(higgs_record_id_claimed)
    higgs_record_uuid_claimed = higgs_record_claimed['authors'][0]['uuid']

    # Add phonetic block to the record.
    higgs_record_claimed['authors'][0]['signature_block'] = "HAGp"
    higgs_record_claimed['authors'][0]['curated_relation'] = True
    higgs_record_claimed['authors'][0]['recid'] = "4"
    es.index(index='records-hep',
             doc_type='hep',
             id=higgs_record_id_claimed,
             body=higgs_record_claimed)
    es.indices.refresh('records-hep')

    # Not claimed signature.
    higgs_record_id_not_claimed = str(
        PersistentIdentifier.get('lit', 11883).object_uuid)
    higgs_record_not_claimed = get_es_record_by_uuid(
        higgs_record_id_not_claimed)
    higgs_record_uuid_not_claimed = higgs_record_not_claimed['authors'][0][
        'uuid']

    # Add phonetic block to the record.
    higgs_record_not_claimed['authors'][0]['signature_block'] = "HAGp"
    es.index(index='records-hep',
             doc_type='hep',
             id=higgs_record_id_not_claimed,
             body=higgs_record_not_claimed)
    es.indices.refresh('records-hep')

    with patch("celery.current_app.send_task",
               return_value=_BeardObject(({
                   "3": [
                       glashow_record_uuid_claimed, higgs_record_uuid_claimed,
                       higgs_record_uuid_not_claimed
                   ]
               }, {}))):
        with patch(
                "inspirehep.modules.disambiguation.logic._solve_claims_conflict",
                return_value=_ConflictObject({
                    higgs_record_uuid_claimed: [higgs_record_uuid_not_claimed]
                })):
            with patch(
                    "inspirehep.modules.disambiguation.tasks.update_authors_recid.delay",
                    side_effect=update_authors_recid):
                disambiguation_clustering("HAGp")

    assert InspireRecord.get_record(
        higgs_record_id_not_claimed)['authors'][0]['recid'] == "4"
Exemple #60
0
    def publish_record_internal(self, record_context, published_record_class,
                                published_pid_type, collected_records):
        draft_record = record_context.record
        draft_pid = record_context.record_pid

        # clone metadata
        metadata = copy.deepcopy(dict(draft_record))
        if 'oarepo:validity' in metadata:
            del metadata['oarepo:validity']
        metadata.pop('oarepo:draft', True)

        try:
            published_pid = PersistentIdentifier.get(published_pid_type,
                                                     draft_pid.pid_value)
        except PIDDoesNotExistError:
            published_pid = None

        before_publish_record.send(
            draft_record,
            metadata=metadata,
            record_context=record_context,
            record=record_context,  # back compatibility, deprecated
            collected_records=collected_records)

        if published_pid:
            if published_pid.status == PIDStatus.DELETED:
                # the draft is deleted, resurrect it
                # change the pid to registered
                published_pid.status = PIDStatus.REGISTERED
                db.session.add(published_pid)

                # and fetch the draft record and update its metadata
                return self._update_published_record(published_pid, metadata,
                                                     None,
                                                     published_record_class,
                                                     record_context)

            elif published_pid.status == PIDStatus.REGISTERED:
                # fetch the draft record and update its metadata
                # if it is older than the published one
                return self._update_published_record(published_pid, metadata,
                                                     draft_record.updated,
                                                     published_record_class,
                                                     record_context)

            raise NotImplementedError(
                'Can not unpublish record to draft record '
                'with pid status %s. Only registered or deleted '
                'statuses are implemented', published_pid.status)

        # create a new draft record. Do not call minter as the pid value will be the
        # same as the pid value of the published record
        id = uuid.uuid4()
        published_record = published_record_class.create(metadata, id_=id)
        published_pid = PersistentIdentifier.create(
            pid_type=published_pid_type,
            pid_value=draft_pid.pid_value,
            status=PIDStatus.REGISTERED,
            object_type='rec',
            object_uuid=id)

        self._copy_files_between_records(
            draft_record, published_record, record_context,
            RecordContext(record_pid=published_pid, record=published_record))

        after_publish_record.send(draft_record,
                                  published_record=published_record,
                                  published_pid=published_pid,
                                  collected_records=collected_records)
        return published_record, published_pid