Ejemplo n.º 1
0
def test_record_get_bucket_with_no_bucket(app, db, location):
    """Test retrival of the bucket when no bucket is associated."""
    record = Record.create({'title': 'test'}, with_bucket=False)
    db.session.commit()
    record = Record.get_record(record.id)
    assert record.bucket is None
    assert record.files is None
Ejemplo n.º 2
0
def create_fake_record(bulk_size, fake):
    """Create records for demo purposes."""
    records_bulk = []
    start = timeit.default_timer()
    for _ in range(bulk_size):
        # Create fake record metadata
        record_data = {
            "contributors": [{
                "name": fake.name()
            }],
            "description": fake.bs(),
            "title": fake.company() + "'s dataset",
        }

        # Create record in DB
        rec_uuid = uuid.uuid4()
        current_pidstore.minters["recid"](rec_uuid, record_data)
        Record.create(record_data, id_=rec_uuid)

        # Add record for bulk indexing
        records_bulk.append(rec_uuid)

    # Flush to index and database
    db.session.commit()
    click.secho(f"Writing {bulk_size} records to the database", fg="green")

    # Bulk index records
    ri = RecordIndexer()
    ri.bulk_index(records_bulk)
    current_search.flush_and_refresh(index="records")
    click.secho(f"Sending {bulk_size} records to be indexed", fg="green")
    stop = timeit.default_timer()
    click.secho(f"Creating {bulk_size} records took {stop - start}.",
                fg="green")
Ejemplo n.º 3
0
def create_object(bucket, record_dict):
    """Object creation inside the bucket using the file and its content."""

    rec_uuid = uuid4()
    provider = RecordIdProvider.create(object_type='rec', object_uuid=rec_uuid)

    files_meta, num_of_iiif_valid_files = generate_files_metadata(
        bucket, record_dict['_files'])

    # If there are any iiif valid image files, iiif manifest api is added on
    # record metadata.
    iiif_manifest_url = ''
    if num_of_iiif_valid_files > 0:
        iiif_manifest_url = '/record/{0}/iiif/manifest.json'.format(
            provider.pid.pid_value)
    deposit_dict = record_dict['_deposit']
    deposit_dict['iiif_manifest'] = iiif_manifest_url

    data = {
        'pid_value': provider.pid.pid_value,
        '_deposit': deposit_dict,
        '_files': files_meta,
    }

    # from invenio_records_files.api import Record as RecordFile
    record = RecordFile.create(data, id_=rec_uuid)

    # connect to record and bucket
    db.session.add(RecordsBuckets(
        record_id=record.id,
        bucket_id=bucket.id,
    ))
    db.session.commit()
Ejemplo n.º 4
0
def glossary_terms():
    """Load demo terms records."""
    from invenio_db import db
    from invenio_records import Record
    from invenio_indexer.api import RecordIndexer
    from cernopendata.modules.records.minters.termid import \
        cernopendata_termid_minter

    indexer = RecordIndexer()
    schema = current_app.extensions['invenio-jsonschemas'].path_to_url(
        'records/glossary-term-v1.0.0.json'
    )
    data = pkg_resources.resource_filename('cernopendata',
                                           'modules/fixtures/data')
    glossary_terms_json = glob.glob(os.path.join(data, 'terms', '*.json'))

    for filename in glossary_terms_json:

        click.echo('Loading glossary-terms from {0} ...'.format(filename))

        with open(filename, 'rb') as source:
            for data in json.load(source):
                if "collections" not in data and \
                    not isinstance(
                        data.get("collections", None), basestring):
                    data["collections"] = []
                data["collections"].append({"primary": "Terms"})
                id = uuid.uuid4()
                cernopendata_termid_minter(id, data)
                data['$schema'] = schema
                record = Record.create(data, id_=id)
                db.session.commit()
                indexer.index(record)
                db.session.expunge_all()
Ejemplo n.º 5
0
def datacite_register(pid_value, record_uuid):
    """Mint the DOI with DataCite.

    :param pid_value: Value of record PID, with pid_type='recid'.
    :type pid_value: str
    """
    try:
        record = Record.get_record(record_uuid)
        # Bail out if not a Zenodo DOI.
        if not is_local_doi(record['doi']):
            return

        dcp = DataCiteProvider.get(record['doi'])

        url = current_app.config['ZENODO_RECORDS_UI_LINKS_FORMAT'].format(
            recid=pid_value)
        doc = datacite_v31.serialize(dcp.pid, record)

        if dcp.pid.status == PIDStatus.REGISTERED:
            dcp.update(url, doc)
        else:
            dcp.register(url, doc)
        db.session.commit()
    except Exception as exc:
        datacite_register.retry(exc=exc)
Ejemplo n.º 6
0
def add_oai_information(obj, eng):
    """Adds OAI information like identifier"""

    recid = obj.data['control_number']
    pid = PersistentIdentifier.get('recid', recid)
    existing_record = Record.get_record(pid.object_uuid)

    if '_oai' not in existing_record:
        try:
            oaiid_minter(pid.object_uuid, existing_record)
        except PIDAlreadyExists:
            oai_prefix = current_app.config.get('OAISERVER_ID_PREFIX')
            existing_record['_oai'] = {
                'id': '%s:%s' % (oai_prefix, recid),
                'sets': _get_oai_sets(existing_record)
            }

    if 'id' not in existing_record['_oai']:
        current_app.logger.info('adding new oai id')
        oaiid_minter(pid.object_uuid, existing_record)

    if 'sets' not in existing_record[
            '_oai'] or not existing_record['_oai']['sets']:
        existing_record['_oai']['sets'] = _get_oai_sets(existing_record)

    existing_record['_oai']['updated'] = datetime.utcnow().strftime(
        '%Y-%m-%dT%H:%M:%SZ')

    existing_record.commit()
    obj.save()
    db.session.commit()
Ejemplo n.º 7
0
def attach_files(obj, eng):
    if 'files' in obj.extra_data:
        recid = obj.data['control_number']
        pid = PersistentIdentifier.get('recid', recid)
        existing_record = Record.get_record(pid.object_uuid)

        if '_files' not in existing_record or not existing_record['_files']:
            bucket = Bucket.create()
            RecordsBuckets.create(record=existing_record.model, bucket=bucket)

        for file_ in obj.extra_data['files']:
            if file_['url'].startswith('http'):
                headers = file_.get('headers', {})
                data = requests_retry_session().get(file_['url'],
                                                    headers=headers)

                if data.status_code != 200:
                    __halt_and_notify(
                        "Error during acquiring files.\nHTTP status: %d\nUrl: %s\nHeaders:%s"
                        % (data.status_code, file_['url'], headers), eng)

                f = StringIO(data.content)
            else:
                f = open(file_['url'])

            existing_record.files[file_['name']] = f
            existing_record.files[
                file_['name']]['filetype'] = file_['filetype']

        obj.save()
        existing_record.commit()
        db.session.commit()
    else:
        __halt_and_notify('No files found.', eng)
Ejemplo n.º 8
0
def check_records_migration(app):
    """Check that a set of records have been migrated."""
    expected_records = _load_json('expected_records.json')
    for exp_record in expected_records:
        db_record = Record.get_record(exp_record['id'], with_deleted=True)
        assert str(db_record.created) == exp_record['created']
        # If the record is deleted there is no metadata to check
        if db_record.model.json is None:
            continue
        # Check that the parent pid is minted properly
        parent_pid = b2share_parent_pid_fetcher(exp_record['id'],
                                                db_record)
        fetched_pid = b2share_record_uuid_fetcher(exp_record['id'], db_record)
        record_pid = PersistentIdentifier.get(fetched_pid.pid_type,
                                              fetched_pid.pid_value)
        assert PIDVersioning(record_pid).parent.pid_value == parent_pid.pid_value
        # Remove the parent pid as it has been added by the migration
        db_record['_pid'].remove({
            'type': RecordUUIDProvider.parent_pid_type,
            'value': parent_pid.pid_value,
        })
        # The OAI-PMH identifier has been modified by the migration
        if db_record.get('_oai'):
            oai_prefix = app.config.get('OAISERVER_ID_PREFIX', 'oai:')
            record_id = exp_record['metadata']['_deposit']['id']
            assert db_record['_oai']['id'] == str(oai_prefix) + record_id
            exp_record['metadata']['_oai']['id'] = db_record['_oai']['id']
        assert db_record == exp_record['metadata']
Ejemplo n.º 9
0
def add_oai_information(obj, eng):
    """Adds OAI information like identifier"""

    recid = obj.data['control_number']
    pid = PersistentIdentifier.get('recid', recid)
    existing_record = Record.get_record(pid.object_uuid)

    if '_oai' not in existing_record:
        try:
            oaiid_minter(pid.object_uuid, existing_record)
        except PIDAlreadyExists:
            existing_record['_oai'] = {
                'id': 'oai:beta.scoap3.org:%s' % recid,
                'sets': _get_oai_sets(existing_record)
            }

    if 'id' not in existing_record['_oai']:
        current_app.logger.info('adding new oai id')
        oaiid_minter(pid.object_uuid, existing_record)

    if 'sets' not in existing_record['_oai'] or not existing_record['_oai']['sets']:
        existing_record['_oai']['sets'] = _get_oai_sets(existing_record)

    existing_record['_oai']['updated'] = datetime.utcnow().strftime('%Y-%m-%dT%H:%M:%SZ')

    existing_record.commit()
    obj.save()
    db.session.commit()
Ejemplo n.º 10
0
def list_db_published_records():
    """A generator for all the published records"""
    query = RecordMetadata.query.filter(RecordMetadata.json is not None)
    for obj in query.all():
        record = Record(obj.json, model=obj)
        if is_publication(record.model):
            yield record
Ejemplo n.º 11
0
def store_record(obj, eng):
    """Stores record in database"""
    if 'Italiana di Fisica'.lower(
    ) in obj.data['abstracts'][0]['source'].lower():
        obj.data['abstracts'][0]['source'] = 'Springer/SIF'
    if 'Italiana di Fisica'.lower(
    ) in obj.data['acquisition_source']['source'].lower():
        obj.data['acquisition_source']['source'] = 'Springer/SIF'

    obj.data['record_creation_year'] = parse_date(
        obj.data['record_creation_date']).year

    try:
        record = Record.create(obj.data, id_=None)

        # Create persistent identifier.
        pid = scoap3_recid_minter(str(record.id), record)
        obj.save()
        record.commit()

        # Commit to DB before indexing
        db.session.commit()
        obj.data['control_number'] = record['control_number']
        obj.save()

        # Index record
        indexer = RecordIndexer()
        indexer.index_by_id(pid.object_uuid)

    except ValidationError as err:
        __halt_and_notify("Validation error: %s. Skipping..." % (err, ), obj,
                          eng)

    except PIDAlreadyExists:
        __halt_and_notify("Record with this id already in DB", obj, eng)
Ejemplo n.º 12
0
def transfer_cp(uuid, destination):
    """Transfer the files contained in the record to the destination.

    This method is automatically called by the module to transfer the files.
    Depending on your installation, you may want to have a different behavior
    (copy among servers...). Then, you can create your own factory and link it
    into the config variable
    :py:data:`invenio_archivematica.config.ARCHIVEMATICA_TRANSFER_FACTORY`.

    :param uuid: the id of the record containing files to transfer
    :param destination: the destination folder - this will be what is
        inside the config variable
        :py:data:`invenio_archivematica.config.ARCHIVEMATICA_TRANSFER_FOLDER`.
        It needs to be a absolute path to a folder
    """
    record = Record.get_record(uuid)
    pid = PersistentIdentifier.get_by_object("recid", "rec", uuid)
    dir_name = join(destination,
                    create_accessioned_id(pid.pid_value, 'recid'))
    try:
        mkdir(dir_name)
    except:
        pass
    for fileobj in record.files:
        copyfile(fileobj.file.storage().fileurl,
                 join(dir_name, fileobj.key))
Ejemplo n.º 13
0
def update_record(pid, schema, data):
    """Updates the given record."""
    record = Record.get_record(pid.object_uuid)
    record['$schema'] = schema
    record.update(data)
    record.commit()
    return record
Ejemplo n.º 14
0
def update_expired_embargoes():
    """Release expired embargoes every midnight."""
    logger = current_app.logger
    base_url = urlunsplit((
        current_app.config.get('PREFERRED_URL_SCHEME', 'http'),
        current_app.config['JSONSCHEMAS_HOST'],
        current_app.config.get('APPLICATION_ROOT') or '', '', ''
    ))
    # The task needs to run in a request context as JSON Schema validation
    # will use url_for.
    with current_app.test_request_context('/', base_url=base_url):
        s = B2ShareRecordsSearch(
            using=current_search_client,
            index='records'
        ).query(
            'query_string',
            query='open_access:false AND embargo_date:{{* TO {0}}}'.format(
                datetime.now(timezone.utc).isoformat()
            ),
            allow_leading_wildcard=False
        ).fields([])
        record_ids = [hit.meta.id for hit in s.scan()]
        if record_ids:
            logger.info('Changing access of {} embargoed publications'
                        ' to public.'.format(len(record_ids)))
        for record in Record.get_records(record_ids):
            logger.debug('Making embargoed publication {} public'.format(
                record.id))
            record['open_access'] = True
            record.commit()
        db.session.commit()

        indexer = RecordIndexer()
        indexer.bulk_index(record_ids)
        indexer.process_bulk_queue()
Ejemplo n.º 15
0
def datacite_register(self,
                      pid_value,
                      record_uuid,
                      max_retries=5,
                      countdown=5):
    """Mint the DOI with DataCite.

    :param pid_value: Value of record PID, with pid_type='recid'.
    :type pid_value: str
    """
    try:
        record = Record.get_record(record_uuid)
        # Bail out if not a CDS DOI.
        if not is_local_doi(record['doi']) or \
                not current_app.config['DEPOSIT_DATACITE_MINTING_ENABLED']:
            return

        dcp = DataCiteProvider.get(record['doi'])

        url = current_app.config['CDS_RECORDS_UI_LINKS_FORMAT'].format(
            recid=pid_value)
        doc = datacite_v31.serialize(dcp.pid, record)

        if dcp.pid.status == PIDStatus.REGISTERED:
            dcp.update(url, doc)
        else:
            dcp.register(url, doc)
        db.session.commit()
    except Exception as exc:
        db.session.rollback()
        raise self.retry(max_retries=max_retries, countdown=countdown, exc=exc)
Ejemplo n.º 16
0
def test_record_create_no_bucket(app, db, location):
    """Test record creation without bucket creation."""
    record = Record.create({}, with_bucket=False)
    db.session.commit()
    assert record.files is None
    assert '_bucket' not in record
    assert '_files' not in record
Ejemplo n.º 17
0
def glossary_terms():
    """Load demo terms records."""
    from invenio_db import db
    from invenio_records import Record
    from invenio_indexer.api import RecordIndexer
    from cernopendata.modules.records.minters.termid import \
        cernopendata_termid_minter

    indexer = RecordIndexer()
    schema = current_app.extensions['invenio-jsonschemas'].path_to_url(
        'records/glossary-term-v1.0.0.json')
    data = pkg_resources.resource_filename('cernopendata',
                                           'modules/fixtures/data')
    glossary_terms_json = glob.glob(os.path.join(data, 'terms', '*.json'))

    for filename in glossary_terms_json:

        click.echo('Loading glossary-terms from {0} ...'.format(filename))

        with open(filename, 'rb') as source:
            for data in json.load(source):
                if "collections" not in data and \
                    not isinstance(
                        data.get("collections", None), basestring):
                    data["collections"] = []
                data["collections"].append({"primary": "Terms"})
                id = uuid.uuid4()
                cernopendata_termid_minter(id, data)
                data['$schema'] = schema
                record = Record.create(data, id_=id)
                db.session.commit()
                indexer.index(record)
                db.session.expunge_all()
Ejemplo n.º 18
0
def test_record_create_files(app, db, location):
    """Test record creation with bucket and files."""
    record = Record.create({'title': 'test'})
    record.files['hello.txt'] = BytesIO(b'Hello world!')
    db.session.commit()
    assert record['_bucket'] == record.bucket_id
    assert record['_files']
def test_transfer_cp(db):
    """Test factories.transfer_cp function."""
    # first we create a record
    recid = uuid.uuid4()
    PersistentIdentifier.create(
        'recid',
        '1337',
        object_type='rec',
        object_uuid=recid,
        status=PIDStatus.REGISTERED)
    record = Record.create({'title': 'record test'}, recid)
    # we setup a file storage
    tmppath = tempfile.mkdtemp()
    db.session.add(Location(name='default', uri=tmppath, default=True))
    db.session.commit()
    # we add a file to the record
    bucket = Bucket.create()
    content = b'Aaah! A headcrab!!!\n'
    record_buckets = RecordsBuckets.create(record=record.model, bucket=bucket)
    record.files['crab.txt'] = BytesIO(content)
    # test!
    rec_dir = join(tmppath, create_accessioned_id('1337', 'recid'))
    factories.transfer_cp(record.id, tmppath)
    assert isdir(rec_dir)
    assert isfile(join(rec_dir, 'crab.txt'))
    with open(join(rec_dir, 'crab.txt'), "r") as f:
        assert f.read() == content
    # finalization
    rmtree(tmppath)
Ejemplo n.º 20
0
def update_expired_embargos():
    """Release expired embargoes every midnight."""
    logger = current_app.logger
    base_url = urlunsplit(
        (current_app.config.get('PREFERRED_URL_SCHEME', 'http'),
         current_app.config['JSONSCHEMAS_HOST'],
         current_app.config.get('APPLICATION_ROOT') or '', '', ''))
    # The task needs to run in a request context as JSON Schema validation
    # will use url_for.
    with current_app.test_request_context('/', base_url=base_url):
        s = B2ShareRecordsSearch(
            using=current_search_client, index='records').query(
                'query_string',
                query='open_access:false AND embargo_date:{{* TO {0}}}'.format(
                    datetime.now(timezone.utc).isoformat()),
                allow_leading_wildcard=False).fields([])
        record_ids = [hit.meta.id for hit in s.scan()]
        if record_ids:
            logger.info('Changing access of {} embargoed publications'
                        ' to public.'.format(len(record_ids)))
        for record in Record.get_records(record_ids):
            logger.debug('Making embargoed publication {} public'.format(
                record.id))
            record['open_access'] = True
            record.commit()
        db.session.commit()

        indexer = RecordIndexer()
        indexer.bulk_index(record_ids)
        indexer.process_bulk_queue()
Ejemplo n.º 21
0
def attach_files(obj, eng):
    if 'files' in obj.extra_data:
        recid = obj.data['control_number']
        pid = PersistentIdentifier.get('recid', recid)
        existing_record = Record.get_record(pid.object_uuid)

        if '_files' not in existing_record or not existing_record['_files']:
            bucket = Bucket.create()
            RecordsBuckets.create(record=existing_record.model, bucket=bucket)

        for file_ in obj.extra_data['files']:
            if file_['url'].startswith('http'):
                request = urllib2.Request(file_['url'],
                                          headers=file_.get('headers', {}))
                f = urllib2.urlopen(request)
            else:
                f = open(file_['url'])

            existing_record.files[file_['name']] = f
            existing_record.files[
                file_['name']]['filetype'] = file_['filetype']

        obj.save()
        existing_record.commit()
        db.session.commit()
    else:
        __halt_and_notify('No files found.', obj, eng)
Ejemplo n.º 22
0
def check_records_migration(app):
    """Check that a set of records have been migrated."""
    expected_records = _load_json('expected_records.json')
    for exp_record in expected_records:
        db_record = Record.get_record(exp_record['id'], with_deleted=True)
        assert str(db_record.created) == exp_record['created']
        # If the record is deleted there is no metadata to check
        if db_record.model.json is None:
            continue
        # Check that the parent pid is minted properly
        parent_pid = b2share_parent_pid_fetcher(exp_record['id'], db_record)
        fetched_pid = b2share_record_uuid_fetcher(exp_record['id'], db_record)
        record_pid = PersistentIdentifier.get(fetched_pid.pid_type,
                                              fetched_pid.pid_value)
        assert PIDNodeVersioning(
            record_pid).parent.pid_value == parent_pid.pid_value
        # Remove the parent pid as it has been added by the migration
        db_record['_pid'].remove({
            'type': RecordUUIDProvider.parent_pid_type,
            'value': parent_pid.pid_value,
        })
        # The OAI-PMH identifier has been modified by the migration
        if db_record.get('_oai'):
            oai_prefix = app.config.get('OAISERVER_ID_PREFIX', 'oai:')
            record_id = exp_record['metadata']['_deposit']['id']
            assert db_record['_oai']['id'] == str(oai_prefix) + record_id
            exp_record['metadata']['_oai']['id'] = db_record['_oai']['id']
        assert db_record == exp_record['metadata']
Ejemplo n.º 23
0
def record(app, db):
    """Create a record."""
    record = {'title': 'fuu'}
    record = Record.create(record)
    record.commit()
    db.session.commit()
    return record
Ejemplo n.º 24
0
def attach_files(obj, eng):
    if 'files' in obj.extra_data:
        recid = obj.data['control_number']
        pid = PersistentIdentifier.get('recid', recid)
        existing_record = Record.get_record(pid.object_uuid)

        if '_files' not in existing_record or not existing_record['_files']:
            bucket = Bucket.create()
            RecordsBuckets.create(record=existing_record.model, bucket=bucket)

        for file_ in obj.extra_data['files']:
            if file_['url'].startswith('http'):
                data = requests_retry_session().get(file_['url'], headers=file_.get('headers', {}))
                f = StringIO(data.content)
            else:
                f = open(file_['url'])

            existing_record.files[file_['name']] = f
            existing_record.files[file_['name']]['filetype'] = file_['filetype']

        obj.save()
        existing_record.commit()
        db.session.commit()
    else:
        __halt_and_notify('No files found.', eng)
Ejemplo n.º 25
0
def find_version_master_and_previous_record(version_of):
    """Retrieve the PIDNodeVersioning and previous record of a record PID.

    :params version_of: record PID.
    """

    from b2share.modules.records.providers import RecordUUIDProvider
    from b2share.modules.records.utils import is_publication

    try:
        child_pid = RecordUUIDProvider.get(version_of).pid
        if child_pid.status == PIDStatus.DELETED:
            raise RecordNotFoundVersioningError()
    except PIDDoesNotExistError as e:
        raise RecordNotFoundVersioningError() from e

    parent_pid = PIDNodeVersioning(pid=child_pid).parents.first()
    version_master = PIDNodeVersioning(pid=parent_pid)

    prev_pid = version_master.last_child
    assert prev_pid.pid_type == RecordUUIDProvider.pid_type
    prev_version = Record.get_record(prev_pid.object_uuid)
    # check that version_of references the last version of a record
    assert is_publication(prev_version.model)
    if prev_pid.pid_value != version_of:
        raise IncorrectRecordVersioningError(prev_pid.pid_value)
    return version_master, prev_version
Ejemplo n.º 26
0
def datasets(skip_files):
    """Load demo datasets records."""
    from invenio_db import db
    from invenio_records_files.api import Record
    from invenio_indexer.api import RecordIndexer
    from cernopendata.modules.records.minters.recid import \
        cernopendata_recid_minter
    from cernopendata.modules.records.minters.datasetid import \
        cernopendata_datasetid_minter

    from invenio_files_rest.models import \
        Bucket, FileInstance, ObjectVersion
    from invenio_records_files.models import RecordsBuckets

    indexer = RecordIndexer()
    schema = current_app.extensions['invenio-jsonschemas'].path_to_url(
        'records/datasets-v1.0.0.json')
    data = pkg_resources.resource_filename('cernopendata',
                                           'modules/fixtures/data/datasets')
    datasets_json = glob.glob(os.path.join(data, '*.json'))

    # FIXME: change the treatment of `files` according to `records` fixtures.
    for filename in datasets_json:

        click.echo('Loading datasets from {0} ...'.format(filename))

        with open(filename, 'rb') as source:
            for data in json.load(source):
                files = data.pop('files', [])

                id = uuid.uuid4()
                # (TOFIX) Remove if statement in production
                # as every dataset record should have a doi
                if data.get('doi', None):
                    cernopendata_datasetid_minter(id, data)
                else:
                    cernopendata_recid_minter(id, data)
                data['$schema'] = schema
                record = Record.create(data, id_=id)

                bucket = Bucket.create()
                RecordsBuckets.create(record=record.model, bucket=bucket)

                for file in files:
                    if skip_files:
                        break
                    assert 'uri' in file
                    assert 'size' in file
                    assert 'checksum' in file

                    f = FileInstance.create()
                    filename = file.get("uri").split('/')[-1:][0]
                    f.set_uri(file.get("uri"), file.get("size"),
                              file.get("checksum"))

                    ObjectVersion.create(bucket, filename, _file_id=f.id)
                db.session.commit()
                indexer.index(record)
                db.session.expunge_all()
Ejemplo n.º 27
0
def create_doc(data, schema):
    """Creates a new doc record."""
    from invenio_records import Record
    id = uuid.uuid4()
    cernopendata_docid_minter(id, data)
    data['$schema'] = schema
    record = Record.create(data, id_=id)
    return record
Ejemplo n.º 28
0
def create_doc(data, schema):
    """Creates a new doc record."""
    from invenio_records import Record
    id = uuid.uuid4()
    cernopendata_docid_minter(id, data)
    data['$schema'] = schema
    record = Record.create(data, id_=id)
    return record
Ejemplo n.º 29
0
def indexer_receiver(sender, json, doc_type, index, record):
    """To do."""
    from invenio_records_files.api import Record as FileRecord
    record = FileRecord.get_record(record.id)
    for file in record.files:
        if file.get('filetype') == 'raw_text':
            with file.file.storage().open() as f:
                json['fulltext'] = f.read().decode('utf-8').replace('\n', ' ')
Ejemplo n.º 30
0
def minted_record(app, db):
    """Create a test record."""
    data = {'title': 'fuu'}
    with db.session.begin_nested():
        rec_uuid = uuid.uuid4()
        pid = current_pidstore.minters['recid'](rec_uuid, data)
        record = Record.create(data, id_=rec_uuid)
    return pid, record
Ejemplo n.º 31
0
def data_policies(skip_files):
    """Load demo Data Policy records."""
    from invenio_db import db
    from invenio_indexer.api import RecordIndexer
    from cernopendata.modules.records.minters.recid import \
        cernopendata_recid_minter

    from invenio_files_rest.models import \
        Bucket, FileInstance, ObjectVersion
    from invenio_records_files.models import RecordsBuckets
    from invenio_records_files.api import Record

    from invenio_records.models import RecordMetadata

    indexer = RecordIndexer()
    schema = current_app.extensions['invenio-jsonschemas'].path_to_url(
        'records/data-policies-v1.0.0.json'
    )
    data = pkg_resources.resource_filename('cernopendata',
                                           'modules/fixtures/data')
    data_policies_json = glob.glob(os.path.join(data, '*.json'))

    for filename in data_policies_json:

        click.echo('Loading data-policies from {0} ...'.format(filename))

        with open(filename, 'rb') as source:
            for data in json.load(source):
                files = data.pop('files', [])

                id = uuid.uuid4()
                cernopendata_recid_minter(id, data)
                data['$schema'] = schema
                record = Record.create(data, id_=id)

                bucket = Bucket.create()
                RecordsBuckets.create(
                    record=record.model, bucket=bucket)

                for file in files:
                    if skip_files:
                        break
                    assert 'uri' in file
                    assert 'size' in file
                    assert 'checksum' in file

                    f = FileInstance.create()
                    filename = file.get("uri").split('/')[-1:][0]
                    f.set_uri(file.get("uri"), file.get(
                        "size"), file.get("checksum"))
                    ObjectVersion.create(
                        bucket,
                        filename,
                        _file_id=f.id
                    )
                db.session.commit()
                indexer.index(record)
                db.session.expunge_all()
def test_file_download_ui(app, objects):
    """Test get buckets."""
    app.config.update(dict(
        FILES_REST_PERMISSION_FACTORY=lambda *a, **kw: type(
            'Allow', (object, ), {'can': lambda self: True}
        )(),
        RECORDS_UI_DEFAULT_PERMISSION_FACTORY=None,  # No permission checking
        RECORDS_UI_ENDPOINTS=dict(
            recid=dict(
                pid_type='recid',
                route='/records/<pid_value>',
            ),
            recid_files=dict(
                pid_type='recid',
                route='/records/<pid_value>/files/<filename>',
                view_imp='invenio_records_files.utils:file_download_ui',
                record_class='invenio_records_files.api:Record',
            ),
        )
    ))
    InvenioRecordsUI(app)

    obj1 = objects[0]

    with app.test_request_context():
        # Record 1 - Live record
        rec_uuid = uuid.uuid4()
        PersistentIdentifier.create(
            'recid', '1', object_type='rec', object_uuid=rec_uuid,
            status=PIDStatus.REGISTERED)
        record = Record.create({
            'title': 'Registered',
            'recid': 1,
            '_files': [
                {'key': obj1.key, 'bucket': str(obj1.bucket_id),
                 'checksum': 'invalid'},
            ]
        }, id_=rec_uuid)
        RecordsBuckets.create(record=record.model, bucket=obj1.bucket)
        db.session.commit()

        main_url = url_for('invenio_records_ui.recid', pid_value='1')
        file_url = url_for(
            'invenio_records_ui.recid_files', pid_value='1', filename=obj1.key)
        no_file_url = url_for(
            'invenio_records_ui.recid_files', pid_value='1', filename='')
        invalid_file_url = url_for(
            'invenio_records_ui.recid_files', pid_value='1', filename='no')

    with app.test_client() as client:
        res = client.get(main_url)
        assert res.status_code == 200
        res = client.get(file_url)
        assert res.status_code == 200
        res = client.get(no_file_url)
        assert res.status_code == 404
        res = client.get(invalid_file_url)
        assert res.status_code == 404
Ejemplo n.º 33
0
def data_policies(skip_files):
    """Load demo Data Policy records."""
    from invenio_db import db
    from invenio_indexer.api import RecordIndexer
    from cernopendata.modules.records.minters.recid import \
        cernopendata_recid_minter

    from invenio_files_rest.models import \
        Bucket, FileInstance, ObjectVersion
    from invenio_records_files.models import RecordsBuckets
    from invenio_records_files.api import Record

    from invenio_records.models import RecordMetadata

    indexer = RecordIndexer()
    schema = current_app.extensions['invenio-jsonschemas'].path_to_url(
        'records/data-policies-v1.0.0.json'
    )
    data = pkg_resources.resource_filename('cernopendata',
                                           'modules/fixtures/data')
    data_policies_json = glob.glob(os.path.join(data, '*.json'))

    for filename in data_policies_json:

        click.echo('Loading data-policies from {0} ...'.format(filename))

        with open(filename, 'rb') as source:
            for data in json.load(source):
                files = data.pop('files', [])

                id = uuid.uuid4()
                cernopendata_recid_minter(id, data)
                data['$schema'] = schema
                record = Record.create(data, id_=id)

                bucket = Bucket.create()
                RecordsBuckets.create(
                    record=record.model, bucket=bucket)

                for file in files:
                    if skip_files:
                        break
                    assert 'uri' in file
                    assert 'size' in file
                    assert 'checksum' in file

                    f = FileInstance.create()
                    filename = file.get("uri").split('/')[-1:][0]
                    f.set_uri(file.get("uri"), file.get(
                        "size"), file.get("checksum"))
                    ObjectVersion.create(
                        bucket,
                        filename,
                        _file_id=f.id
                    )
                db.session.commit()
                indexer.index(record)
                db.session.expunge_all()
def record(app, db):
    """Create a record."""
    record = {
        'title': 'fuu'
    }
    record = Record.create(record)
    record.commit()
    db.session.commit()
    return record
Ejemplo n.º 35
0
def record_with_bucket(full_record, bucket, db):
    """Create a bucket."""
    record = RecordFile.create(full_record)
    RecordsBuckets.create(bucket=bucket, record=record.model)
    pid = PersistentIdentifier.create(
        pid_type='recid', pid_value=12345, object_type='rec',
        object_uuid=record.id, status='R')
    db.session.commit()
    return pid, record
Ejemplo n.º 36
0
def create_record(schema, data, files, skip_files):
    """Creates a new record."""
    id = uuid.uuid4()
    cernopendata_recid_minter(id, data)
    data['$schema'] = schema
    record = Record.create(data, id_=id, with_bucket=not skip_files)
    if not skip_files:
        handle_record_files(data, record.bucket, files, skip_files)

    return record
Ejemplo n.º 37
0
    def publish(self, pid=None, id_=None):
        """Publish a deposit."""
        pid = pid or self.pid

        if not pid.is_registered():
            raise PIDInvalidAction()

        self['_deposit']['status'] = 'published'

        if self['_deposit'].get('pid') is None:  # First publishing
            minter = current_pidstore.minters[
                current_app.config['DEPOSIT_PID_MINTER']
            ]
            id_ = id_ or uuid.uuid4()
            record_pid = minter(id_, self)

            self['_deposit']['pid'] = {
                'type': record_pid.pid_type, 'value': record_pid.pid_value,
                'revision_id': 0,
            }

            data = dict(self.dumps())
            data['$schema'] = self.record_schema

            # During first publishing create snapshot the bucket.
            @contextmanager
            def process_files(data):
                """Process deposit files."""
                if self.files and self.files.bucket:
                    assert not self.files.bucket.locked
                    self.files.bucket.locked = True
                    snapshot = self.files.bucket.snapshot(lock=True)
                    data['_files'] = self.files.dumps(bucket=snapshot.id)
                    yield data
                    db.session.add(RecordsBuckets(
                        record_id=id_, bucket_id=snapshot.id
                    ))
                else:
                    yield data

            with process_files(data) as data:
                record = Record.create(data, id_=id_)
        else:  # Update after edit
            record_pid, record = self.fetch_published()
            # TODO add support for patching
            assert record.revision_id == self['_deposit']['pid']['revision_id']

            data = dict(self.dumps())
            data['$schema'] = self.record_schema
            record = record.__class__(data, model=record.model)
            record.commit()

        self.commit()
        return self
Ejemplo n.º 38
0
def record_with_bucket(full_record, bucket, db):
    """Create a bucket."""
    record = RecordFile.create(full_record)
    RecordsBuckets.create(bucket=bucket, record=record.model)
    pid = PersistentIdentifier.create(pid_type='recid',
                                      pid_value=1,
                                      object_type='rec',
                                      object_uuid=record.id,
                                      status='R')
    db.session.commit()
    return pid, record
Ejemplo n.º 39
0
def test_RecordSIP_create(db, mocker):
    """Test create method from the API class RecordSIP."""
    # we setup a file storage
    tmppath = tempfile.mkdtemp()
    db.session.add(Location(name='default', uri=tmppath, default=True))
    # setup metadata
    mtype = SIPMetadataType(title='JSON Test', name='json-test',
                            format='json', schema='url://to/schema')
    db.session.add(mtype)
    db.session.commit()
    # first we create a record
    recid = uuid.uuid4()
    pid = PersistentIdentifier.create(
        'recid',
        '1337',
        object_type='rec',
        object_uuid=recid,
        status=PIDStatus.REGISTERED)
    mocker.patch('invenio_records.api.RecordBase.validate',
                 return_value=True, autospec=True)
    record = Record.create(
        {'title': 'record test', '$schema': 'url://to/schema'},
        recid)
    # we add a file to the record
    bucket = Bucket.create()
    content = b'Test file\n'
    RecordsBuckets.create(record=record.model, bucket=bucket)
    record.files['test.txt'] = BytesIO(content)
    db.session.commit()
    # Let's create a SIP
    user = create_test_user('*****@*****.**')
    agent = {'email': '*****@*****.**', 'ip_address': '1.1.1.1'}
    rsip = RecordSIP.create(pid, record, True, user_id=user.id, agent=agent)
    db.session.commit()
    # test!
    assert RecordSIP_.query.count() == 1
    assert SIP_.query.count() == 1
    assert SIPFile.query.count() == 1
    assert SIPMetadata.query.count() == 1
    assert len(rsip.sip.files) == 1
    assert len(rsip.sip.metadata) == 1
    metadata = rsip.sip.metadata[0]
    assert metadata.type.format == 'json'
    assert '"title": "record test"' in metadata.content
    assert rsip.sip.archivable is True
    # we try with no files
    rsip = RecordSIP.create(pid, record, True, create_sip_files=False,
                            user_id=user.id, agent=agent)
    assert SIPFile.query.count() == 1
    assert SIPMetadata.query.count() == 2
    assert len(rsip.sip.files) == 0
    assert len(rsip.sip.metadata) == 1
    # finalization
    rmtree(tmppath)
Ejemplo n.º 40
0
def record(db):
    """Record fixture."""
    rec_uuid = uuid.uuid4()
    provider = RecordIdProvider.create(
        object_type='rec', object_uuid=rec_uuid)
    record = Record.create({
        'control_number': provider.pid.pid_value,
        'title': 'TestDefault',
    }, id_=rec_uuid)
    db.session.commit()
    return record
Ejemplo n.º 41
0
def check_dois(record, allrecords, update):
    """ Checks that DOIs of records in the current instance are registered.
    """
    if record:
        record = Record.get_record(record)
        check_record_doi(record, update)
    elif allrecords:
        click.secho('checking DOI for all records')
        for record in list_db_published_records():
            check_record_doi(record, update)
    else:
        raise click.ClickException('Either -r or -a option must be selected')
Ejemplo n.º 42
0
def check_dois(record, allrecords, update):
    """ Checks that DOIs of records in the current instance are registered.
    """
    if record:
        record = Record.get_record(record)
        check_record_doi(record, update)
    elif allrecords:
        click.secho('checking DOI for all records')
        for record in list_db_published_records():
            check_record_doi(record, update)
    else:
        raise click.ClickException('Either -r or -a option must be selected')
Ejemplo n.º 43
0
def create_record(schema, data, files, skip_files):
    """Creates a new record."""
    id = uuid.uuid4()
    cernopendata_recid_minter(id, data)
    data['$schema'] = schema
    record = Record.create(data, id_=id)
    if not skip_files:
        bucket = Bucket.create()
        handle_record_files(data, bucket, files, skip_files)
        RecordsBuckets.create(record=record.model, bucket=bucket)

    return record
Ejemplo n.º 44
0
def create_record(schema, data, files, skip_files):
    """Creates a new record."""
    id = uuid.uuid4()
    cernopendata_recid_minter(id, data)
    data['$schema'] = schema
    record = Record.create(data, id_=id)
    if not skip_files:
        bucket = Bucket.create()
        handle_record_files(data, bucket, files, skip_files)
        RecordsBuckets.create(
            record=record.model, bucket=bucket)

    return record
Ejemplo n.º 45
0
 def check_embargo(record_id, is_embargoed):
     with app.app_context():
         with app.test_client() as client:
             login_user(non_creator, client)
             # test open_access field in record's metadata
             record = Record.get_record(record_id)
             assert record['open_access'] != is_embargoed
             # test record's file access
             subtest_file_bucket_permissions(
                 client, record.files.bucket,
                 access_level=None if is_embargoed else 'read',
                 is_authenticated=True
             )
Ejemplo n.º 46
0
def test_filesmixin(app, db, location, record):
    """Test bucket creation and assignment."""
    class CustomFilesMixin(FilesMixin):
        def _create_bucket(self):
            return Bucket.create()

    class CustomRecord(Record, CustomFilesMixin):
        pass

    record = CustomRecord.create({})
    assert record.files is not None

    record = Record.create({})
    assert record.files is None
Ejemplo n.º 47
0
def check_pids_migration():
    """Check that the persistent identifiers have been migrated."""
    expected_pids = _load_json('expected_pids.json')
    # Check unchanging properties
    for exp_pid in expected_pids:
        db_pid = PersistentIdentifier.get(exp_pid['pid_type'],
                                          exp_pid['pid_value'])
        for key, value in exp_pid.items():
            if key != 'updated':
                assert str(getattr(db_pid, key)) == str(value)

        # check that deleted PID's records are (soft or hard) deleted
        if exp_pid['status'] == PIDStatus.DELETED.value:
            metadata = None
            try:
                record = Record.get_record(exp_pid['pid_value'],
                                           with_deleted=True)
                # Soft deleted record
                metadata = record.model.json
            except NoResultFound:
                # Hard deleted record
                pass
            assert metadata is None

        # Check versioning relations and PIDs
        if exp_pid['pid_type'] == 'b2dep':
            try:
                rec_pid = PersistentIdentifier.get('b2rec',
                                                    exp_pid['pid_value'])
                # if the deposit is deleted, either the record PID was reserved
                # and has been deleted, or it still exists.
                if db_pid.status == PIDStatus.DELETED:
                    assert rec_pid.status != PIDStatus.RESERVED
            except PIDDoesNotExistError:
                # The record PID was only reserved and has been deleted
                # with the deposit PID.
                assert db_pid.status == PIDStatus.DELETED
                continue

            # Check that a parent pid has been created
            versioning = PIDVersioning(child=rec_pid)
            parent = versioning.parent
            assert rec_pid.status in [PIDStatus.RESERVED, PIDStatus.REGISTERED]
            if rec_pid.status == PIDStatus.RESERVED:
                assert parent.status == PIDStatus.RESERVED
            else:
                assert parent.status == PIDStatus.REDIRECTED
                assert parent.get_redirect() == rec_pid
Ejemplo n.º 48
0
def datacite_register(pid_value, record_uuid):
    """Mint the DOI with DataCite.

    :param pid_value: Value of record PID, with pid_type='recid'.
    :type pid_value: str
    """
    record = Record.get_record(record_uuid)
    dcp = DataCiteProvider.get(record['doi'])

    url = current_app.config['ZENODO_RECORDS_UI_LINKS_FORMAT'].format(
        recid=pid_value)
    doc = datacite_v31.serialize(dcp.pid, record)

    dcp.update(url, doc) if dcp.pid.status == PIDStatus.REGISTERED \
        else dcp.register(url, doc)
    db.session.commit()
Ejemplo n.º 49
0
def datacite_register(pid_value, record_uuid):
    """Mint DOI and Concept DOI with DataCite.

    :param pid_value: Value of record PID, with pid_type='recid'.
    :type pid_value: str
    :param record_uuid: Record Metadata UUID.
    :type record_uuid: str
    """
    try:
        record = Record.get_record(record_uuid)
        # Bail out if not a Zenodo DOI.
        if not is_local_doi(record['doi']):
            return

        dcp = DataCiteProvider.get(record['doi'])
        doc = datacite_v41.serialize(dcp.pid, record)

        url = current_app.config['ZENODO_RECORDS_UI_LINKS_FORMAT'].format(
            recid=pid_value)
        if dcp.pid.status == PIDStatus.REGISTERED:
            dcp.update(url, doc)
        else:
            dcp.register(url, doc)

        # If this is the latest record version, update/register the Concept DOI
        # using the metadata of the record.
        recid = PersistentIdentifier.get('recid', str(record['recid']))
        pv = PIDVersioning(child=recid)
        conceptdoi = record.get('conceptdoi')
        if conceptdoi and pv.exists and pv.is_last_child:
            conceptrecid = record.get('conceptrecid')
            concept_dcp = DataCiteProvider.get(conceptdoi)
            url = current_app.config['ZENODO_RECORDS_UI_LINKS_FORMAT'].format(
                recid=conceptrecid)

            doc = datacite_v41.serialize(concept_dcp.pid, record)
            if concept_dcp.pid.status == PIDStatus.REGISTERED:
                concept_dcp.update(url, doc)
            else:
                concept_dcp.register(url, doc)

        db.session.commit()
    except Exception as exc:
        datacite_register.retry(exc=exc)
def test_files_protection(app, db, location):
    """Test record files property protection."""
    record = Record.create({})

    bucket = record.files.bucket
    assert bucket

    # Create first file:
    record.files['hello.txt'] = BytesIO(b'Hello world!')

    file_0 = record.files['hello.txt']
    assert 'hello.txt' == file_0['key']
    assert 1 == len(record.files)

    # Lock bucket.
    bucket.locked = True

    assert record.files.bucket.locked
    with pytest.raises(InvalidOperationError):
        del record.files['hello.txt']
Ejemplo n.º 51
0
def find_version_master_and_previous_record(version_of):
    """Retrieve the PIDVersioning and previous record of a record PID.

    :params version_of: record PID.
    """
    try:
        child_pid = RecordUUIDProvider.get(version_of).pid
        if child_pid.status == PIDStatus.DELETED:
            raise RecordNotFoundVersioningError()
    except PIDDoesNotExistError as e:
        raise RecordNotFoundVersioningError() from e

    version_master = PIDVersioning(child=child_pid)

    prev_pid = version_master.last_child
    assert prev_pid.pid_type == RecordUUIDProvider.pid_type
    prev_version = Record.get_record(prev_pid.object_uuid)
    # check that version_of references the last version of a record
    assert is_publication(prev_version.model)
    if prev_pid.pid_value != version_of:
        raise IncorrectRecordVersioningError(prev_pid.pid_value)
    return version_master, prev_version
Ejemplo n.º 52
0
def test_RecordSIP(db):
    """Test RecordSIP API class."""
    user = create_test_user('*****@*****.**')
    agent = {'email': '*****@*****.**', 'ip_address': '1.1.1.1'}
    # we create a record
    recid = uuid.uuid4()
    pid = PersistentIdentifier.create(
        'recid',
        '1337',
        object_type='rec',
        object_uuid=recid,
        status=PIDStatus.REGISTERED)
    title = {'title': 'record test'}
    record = Record.create(title, recid)
    # we create the models
    sip = SIP.create(True, user_id=user.id, agent=agent)
    recordsip = RecordSIP_(sip_id=sip.id, pid_id=pid.id)
    db.session.commit()
    # We create an API SIP on top of it
    api_recordsip = RecordSIP(recordsip, sip)
    assert api_recordsip.model is recordsip
    assert api_recordsip.sip.id == sip.id
Ejemplo n.º 53
0
def update_record(pid, schema, data, files, skip_files):
    """Updates the given record."""
    record = Record.get_record(pid.object_uuid)
    with db.session.begin_nested():
        if record.files and not skip_files:
            bucket_id = record.files.bucket
            bucket = Bucket.get(bucket_id.id)
            for o in ObjectVersion.get_by_bucket(bucket).all():
                o.remove()
                o.file.delete()
            RecordsBuckets.query.filter_by(
                record=record.model,
                bucket=bucket
            ).delete()
            bucket_id.remove()
    db.session.commit()
    record.update(data)
    if not skip_files:
        bucket = Bucket.create()
        handle_record_files(data, bucket, files, skip_files)
        RecordsBuckets.create(
            record=record.model, bucket=bucket)
    return record
Ejemplo n.º 54
0
def check_handles(update, record_pid):
    """Allocate handles for a record and its files, if necessary."""
    rec_pid = RecordUUIDProvider.get(pid_value=record_pid).pid
    record = Record.get_record(rec_pid.object_uuid)
    record_updated = False

    pid_list = [p.get('value') for p in record['_pid']
                if p.get('type') == 'ePIC_PID']
    if pid_list:
        click.secho('record {} already has a handle'.format(record_pid), fg='green')
    else:
        click.secho('record {} has no handle'.format(record_pid), fg='red')
        if update:
            b2share_pid_minter(rec_pid, record)
            record_updated = True
            click.secho('    handle added to record', fg='green')
        else:
            click.secho('use -u argument to add a handle to the record')

    files_ok = True
    for f in record.get('_files', []):
        if f.get('ePIC_PID'):
            click.secho('file {} already has a handle'.format(f.get('key')), fg='green')
        else:
            click.secho('file {} has no handle'.format(f.get('key')), fg='red')
            files_ok = False

    if update and not files_ok:
        create_file_pids(record)
        record_updated = True
        click.secho('    files updated with handles', fg='green')
    elif not update and not files_ok:
         click.secho('use -u argument to add handles to the files')

    if record_updated:
        record.commit()
        db.session.commit()
def test_cascade_action_record_delete(app, db, location, record_with_bucket,
                                      generic_file, force,
                                      num_of_recordbuckets):
    """Test cascade action on record delete, with force false."""
    record = record_with_bucket
    record_id = record.id
    bucket_id = record.files.bucket.id

    # check before
    assert len(RecordsBuckets.query.all()) == 1
    assert len(Bucket.query.all()) == 1
    assert len(Bucket.query.filter_by(id=bucket_id).all()) == 1
    assert ObjectVersion.get(bucket=bucket_id, key=generic_file)

    record.delete(force=force)

    # check after
    db.session.expunge(record.model)
    with pytest.raises(NoResultFound):
        record = Record.get_record(record_id)
    assert len(RecordsBuckets.query.all()) == num_of_recordbuckets
    assert len(Bucket.query.all()) == 1
    assert len(Bucket.query.filter_by(id=bucket_id).all()) == 1
    assert ObjectVersion.get(bucket=bucket_id, key=generic_file)
Ejemplo n.º 56
0
def store_record(obj, eng):
    """Stores record in database"""
    set_springer_source_if_needed(obj)

    obj.data['record_creation_year'] = parse_date(obj.data['record_creation_date']).year

    try:
        record = Record.create(obj.data, id_=None)

        # Create persistent identifier.
        scoap3_recid_minter(str(record.id), record)
        obj.save()
        record.commit()

        # Commit to DB before indexing
        db.session.commit()
        obj.data['control_number'] = record['control_number']
        obj.save()

    except ValidationError as err:
        __halt_and_notify("Validation error: %s." % err, eng)

    except PIDAlreadyExists:
        __halt_and_notify("Record with this id already in DB", eng)
Ejemplo n.º 57
0
def update_record(obj, eng):
    """Updates existing record"""

    doi = get_first_doi(obj)

    query = {'query': {'bool': {'must': [{'match': {'dois.value': doi}}], }}}
    search_result = es.search(index='records-record', doc_type='record-v1.0.0', body=query)

    recid = search_result['hits']['hits'][0]['_source']['control_number']

    obj.extra_data['recid'] = recid
    obj.data['control_number'] = recid

    pid = PersistentIdentifier.get('recid', recid)
    existing_record = Record.get_record(pid.object_uuid)

    if '_files' in existing_record:
        obj.data['_files'] = existing_record['_files']
    if '_oai' in existing_record:
        obj.data['_oai'] = existing_record['_oai']

    # preserving original creation date
    creation_date = existing_record['record_creation_date']
    obj.data['record_creation_date'] = creation_date
    obj.data['record_creation_year'] = parse_date(creation_date).year
    existing_record.clear()
    existing_record.update(obj.data)

    try:
        existing_record.commit()
        obj.save()
        db.session.commit()
    except ValidationError as err:
        __halt_and_notify("Validation error: %s." % err, eng)
    except SchemaError as err:
        __halt_and_notify('SchemaError during record validation! %s' % err, eng)
def test_deposit_versions_create(app, test_records, test_users):
    """Creating new versions of existing records."""
    with app.app_context():
        # Retrieve a record which will be the first version
        v1 = test_records[0].data
        v1_rec = B2ShareRecord.get_record(test_records[0].record_id)
        v1_pid, v1_id = pid_of(v1)
        assert list_published_pids(v1_pid) == [v1_pid]

        # create draft in version chain:
        # version chain becomes: [v1] -- [v2 draft]
        # v2 = create_deposit({}, version_of=v1_id)
        data = copy_data_from_previous(v1_rec.model.json)
        v2 = create_deposit(data, test_users['deposits_creator'],
                            version_of=v1_id)
        assert filenames(v2) == []
        ObjectVersion.create(v2.files.bucket, 'myfile1',
                             stream=BytesIO(b'mycontent'))
        assert filenames(v2) == ['myfile1']

        assert list_published_pids(v1_pid) == [v1_pid]

        # cannot create another draft if one exists
        # not possible: [v1] -- [v2 draft]
        #                    `- [new draft]
        with pytest.raises(DraftExistsVersioningError):
            data = copy_data_from_previous(v1_rec.model.json)
            create_deposit(data, test_users['deposits_creator'],
                           version_of=v1_id)

        # cannot create a version from a draft pid
        # not possible: [v1] -- [v2 draft] -- [new draft]
        with pytest.raises(IncorrectRecordVersioningError): # record pid not created yet
            data = copy_data_from_previous(v1_rec.model.json)
            create_deposit(data, test_users['deposits_creator'],
                           version_of=v2['_deposit']['id'])

        # publish previous draft
        # version chain becomes: [v1] -- [v2]
        v2.submit()
        v2.publish()
        v2_pid, v2_id = pid_of(v2)
        assert list_published_pids(v1_pid) == [v1_pid, v2_pid]

        # cannot create draft based on the first version in a chain
        # not possible: [v1] -- [v2]
        #                    `- [new draft]
        with pytest.raises(IncorrectRecordVersioningError):
            data = copy_data_from_previous(v1_rec.model.json)
            create_deposit(data, test_users['deposits_creator'],
                           version_of=v1_id)

        # create and publish other versions:
        # version chain becomes: [v1] -- [v2] -- [v3]
        data = copy_data_from_previous(v1_rec.model.json)
        v3 = create_deposit(data, test_users['deposits_creator'],
                            version_of=v2_id)
        # assert files are imported from v2
        assert filenames(v3) == ['myfile1']
        ObjectVersion.create(v3.files.bucket, 'myfile2',
                                stream=BytesIO(b'mycontent'))
        assert filenames(v3) == ['myfile1', 'myfile2']

        assert list_published_pids(v1_pid) == [v1_pid, v2_pid]

        v3.submit()
        v3.publish()
        v3_pid, v3_id = pid_of(v3)
        v3_rec = Record.get_record(v3_id)
        assert filenames(v3_rec) == ['myfile1', 'myfile2']
        assert list_published_pids(v1_pid) == [v1_pid, v2_pid, v3_pid]

        # cannot create draft based on an intermediate version in a chain
        # not possible: [v1] -- [v2] -- [v3]
        #                            `- [new draft]
        with pytest.raises(IncorrectRecordVersioningError):
            create_deposit({}, test_users['deposits_creator'],
                           version_of=v2_id)

        # Create yet another version
        # Version chain becomes: [v1] -- [v2] -- [v3] -- [v4]
        data = copy_data_from_previous(v1_rec.model.json)
        v4 = create_deposit(data, test_users['deposits_creator'],
                            version_of=v3_id)
        v4.submit()
        v4.publish()
        assert filenames(v4) == ['myfile1', 'myfile2']
        v4_pid, v4_id = pid_of(v4)
        assert list_published_pids(v1_pid) == [
            v1_pid, v2_pid, v3_pid, v4_pid]

        # assert that creating a new version from a deleted pid is not allowed
        resolver = Resolver(pid_type=v4_pid.pid_type, object_type='rec',
                            getter=partial(B2ShareRecord.get_record,
                                           with_deleted=True))
        v4_pid, v4_rec = LazyPIDValue(resolver, v4_pid.pid_value).data
        # delete [v4]
        v4_rec.delete()
        with pytest.raises(RecordNotFoundVersioningError):
            v5 = create_deposit(data, test_users['deposits_creator'],
                                version_of=v4_id)
Ejemplo n.º 59
0
def alembic_upgrade_database_data(alembic, verbose):
    """Migrate the database data from v2.0.0 to 2.1.0."""
    ### Add versioning PIDs ###
    # Reserve the record PID and versioning PID for unpublished deposits

    # Hack: disable record indexing during record migration
    from invenio_indexer.api import RecordIndexer
    old_index_fn = RecordIndexer.index
    RecordIndexer.index = lambda s, record: None

    if verbose:
        click.secho('migrating deposits and records...')
    with db.session.begin_nested():
        # Migrate published records
        records_pids = PersistentIdentifier.query.filter(
            PersistentIdentifier.pid_type == RecordUUIDProvider.pid_type,
            PersistentIdentifier.status == PIDStatus.REGISTERED,
        ).all()
        for rec_pid in records_pids:
            if verbose:
                click.secho('    record {}'.format(rec_pid.pid_value))
            try:
                record = Record.get_record(rec_pid.object_uuid)
            except NoResultFound:
                # The record is deleted but not the PID. Fix it.
                rec_pid.status = PIDStatus.DELETED
                continue
            # Create parent version PID
            parent_pid = RecordUUIDProvider.create().pid
            version_master = PIDVersioning(parent=parent_pid)
            version_master.insert_draft_child(child=rec_pid)
            version_master.update_redirect()
            migrate_record_metadata(
                Record.get_record(rec_pid.object_uuid),
                parent_pid
            )

        # Migrate deposits
        deposit_pids = PersistentIdentifier.query.filter(
            PersistentIdentifier.pid_type == DepositUUIDProvider.pid_type,
            PersistentIdentifier.status == PIDStatus.REGISTERED,
        ).all()
        for dep_pid in deposit_pids:
            if verbose:
                click.secho('    deposit {}'.format(dep_pid.pid_value))
            try:
                deposit = Deposit.get_record(dep_pid.object_uuid)

                if deposit['publication_state'] != \
                        PublicationStates.published.name:
                    # The record is not published yet. Reserve the PID.
                    rec_pid = RecordUUIDProvider.create(
                        object_type='rec',
                        pid_value=dep_pid.pid_value,
                    ).pid
                    # Create parent version PID
                    parent_pid = RecordUUIDProvider.create().pid
                    assert parent_pid
                    version_master = PIDVersioning(parent=parent_pid)
                    version_master.insert_draft_child(child=rec_pid)
                else:
                    # Retrieve previously created version PID
                    rec_pid = RecordUUIDProvider.get(dep_pid.pid_value).pid
                    version_master = PIDVersioning(child=rec_pid)
                    parent_pid = version_master.parent
                    if not parent_pid:
                        click.secho('    record {} was deleted, but the deposit has not been removed'.format(rec_pid.pid_value), fg='red')

                if parent_pid:
                    migrate_record_metadata(
                        Deposit.get_record(dep_pid.object_uuid),
                        parent_pid
                    )
            except NoResultFound:
                # The deposit is deleted but not the PID. Fix it.
                dep_pid.status = PIDStatus.DELETED


    if verbose:
        click.secho('done migrating deposits.')
    RecordIndexer.index = old_index_fn