Esempio n. 1
0
def create_fake_record(bulk_size, fake):
    """Create records for demo purposes."""
    records_bulk = []
    start = timeit.default_timer()
    for _ in range(bulk_size):
        # Create fake record metadata
        record_data = {
            "contributors": [{
                "name": fake.name()
            }],
            "description": fake.bs(),
            "title": fake.company() + "'s dataset",
        }

        # Create record in DB
        rec_uuid = uuid.uuid4()
        current_pidstore.minters["recid"](rec_uuid, record_data)
        Record.create(record_data, id_=rec_uuid)

        # Add record for bulk indexing
        records_bulk.append(rec_uuid)

    # Flush to index and database
    db.session.commit()
    click.secho(f"Writing {bulk_size} records to the database", fg="green")

    # Bulk index records
    ri = RecordIndexer()
    ri.bulk_index(records_bulk)
    current_search.flush_and_refresh(index="records")
    click.secho(f"Sending {bulk_size} records to be indexed", fg="green")
    stop = timeit.default_timer()
    click.secho(f"Creating {bulk_size} records took {stop - start}.",
                fg="green")
Esempio n. 2
0
def glossary_terms():
    """Load demo terms records."""
    from invenio_db import db
    from invenio_records import Record
    from invenio_indexer.api import RecordIndexer
    from cernopendata.modules.records.minters.termid import \
        cernopendata_termid_minter

    indexer = RecordIndexer()
    schema = current_app.extensions['invenio-jsonschemas'].path_to_url(
        'records/glossary-term-v1.0.0.json')
    data = pkg_resources.resource_filename('cernopendata',
                                           'modules/fixtures/data')
    glossary_terms_json = glob.glob(os.path.join(data, 'terms', '*.json'))

    for filename in glossary_terms_json:

        click.echo('Loading glossary-terms from {0} ...'.format(filename))

        with open(filename, 'rb') as source:
            for data in json.load(source):
                if "collections" not in data and \
                    not isinstance(
                        data.get("collections", None), basestring):
                    data["collections"] = []
                data["collections"].append({"primary": "Terms"})
                id = uuid.uuid4()
                cernopendata_termid_minter(id, data)
                data['$schema'] = schema
                record = Record.create(data, id_=id)
                db.session.commit()
                indexer.index(record)
                db.session.expunge_all()
Esempio n. 3
0
def store_record(obj, eng):
    """Stores record in database"""
    if 'Italiana di Fisica'.lower(
    ) in obj.data['abstracts'][0]['source'].lower():
        obj.data['abstracts'][0]['source'] = 'Springer/SIF'
    if 'Italiana di Fisica'.lower(
    ) in obj.data['acquisition_source']['source'].lower():
        obj.data['acquisition_source']['source'] = 'Springer/SIF'

    obj.data['record_creation_year'] = parse_date(
        obj.data['record_creation_date']).year

    try:
        record = Record.create(obj.data, id_=None)

        # Create persistent identifier.
        pid = scoap3_recid_minter(str(record.id), record)
        obj.save()
        record.commit()

        # Commit to DB before indexing
        db.session.commit()
        obj.data['control_number'] = record['control_number']
        obj.save()

        # Index record
        indexer = RecordIndexer()
        indexer.index_by_id(pid.object_uuid)

    except ValidationError as err:
        __halt_and_notify("Validation error: %s. Skipping..." % (err, ), obj,
                          eng)

    except PIDAlreadyExists:
        __halt_and_notify("Record with this id already in DB", obj, eng)
def test_record_get_bucket_with_no_bucket(app, db, location):
    """Test retrival of the bucket when no bucket is associated."""
    record = Record.create({'title': 'test'}, with_bucket=False)
    db.session.commit()
    record = Record.get_record(record.id)
    assert record.bucket is None
    assert record.files is None
def test_record_create_no_bucket(app, db, location):
    """Test record creation without bucket creation."""
    record = Record.create({}, with_bucket=False)
    db.session.commit()
    assert record.files is None
    assert '_bucket' not in record
    assert '_files' not in record
def test_record_create_files(app, db, location):
    """Test record creation with bucket and files."""
    record = Record.create({'title': 'test'})
    record.files['hello.txt'] = BytesIO(b'Hello world!')
    db.session.commit()
    assert record['_bucket'] == record.bucket_id
    assert record['_files']
def test_transfer_cp(db):
    """Test factories.transfer_cp function."""
    # first we create a record
    recid = uuid.uuid4()
    PersistentIdentifier.create(
        'recid',
        '1337',
        object_type='rec',
        object_uuid=recid,
        status=PIDStatus.REGISTERED)
    record = Record.create({'title': 'record test'}, recid)
    # we setup a file storage
    tmppath = tempfile.mkdtemp()
    db.session.add(Location(name='default', uri=tmppath, default=True))
    db.session.commit()
    # we add a file to the record
    bucket = Bucket.create()
    content = b'Aaah! A headcrab!!!\n'
    record_buckets = RecordsBuckets.create(record=record.model, bucket=bucket)
    record.files['crab.txt'] = BytesIO(content)
    # test!
    rec_dir = join(tmppath, create_accessioned_id('1337', 'recid'))
    factories.transfer_cp(record.id, tmppath)
    assert isdir(rec_dir)
    assert isfile(join(rec_dir, 'crab.txt'))
    with open(join(rec_dir, 'crab.txt'), "r") as f:
        assert f.read() == content
    # finalization
    rmtree(tmppath)
Esempio n. 8
0
def record(app, db):
    """Create a record."""
    record = {'title': 'fuu'}
    record = Record.create(record)
    record.commit()
    db.session.commit()
    return record
Esempio n. 9
0
def glossary_terms():
    """Load demo terms records."""
    from invenio_db import db
    from invenio_records import Record
    from invenio_indexer.api import RecordIndexer
    from cernopendata.modules.records.minters.termid import \
        cernopendata_termid_minter

    indexer = RecordIndexer()
    schema = current_app.extensions['invenio-jsonschemas'].path_to_url(
        'records/glossary-term-v1.0.0.json'
    )
    data = pkg_resources.resource_filename('cernopendata',
                                           'modules/fixtures/data')
    glossary_terms_json = glob.glob(os.path.join(data, 'terms', '*.json'))

    for filename in glossary_terms_json:

        click.echo('Loading glossary-terms from {0} ...'.format(filename))

        with open(filename, 'rb') as source:
            for data in json.load(source):
                if "collections" not in data and \
                    not isinstance(
                        data.get("collections", None), basestring):
                    data["collections"] = []
                data["collections"].append({"primary": "Terms"})
                id = uuid.uuid4()
                cernopendata_termid_minter(id, data)
                data['$schema'] = schema
                record = Record.create(data, id_=id)
                db.session.commit()
                indexer.index(record)
                db.session.expunge_all()
Esempio n. 10
0
def create_object(bucket, record_dict):
    """Object creation inside the bucket using the file and its content."""

    rec_uuid = uuid4()
    provider = RecordIdProvider.create(object_type='rec', object_uuid=rec_uuid)

    files_meta, num_of_iiif_valid_files = generate_files_metadata(
        bucket, record_dict['_files'])

    # If there are any iiif valid image files, iiif manifest api is added on
    # record metadata.
    iiif_manifest_url = ''
    if num_of_iiif_valid_files > 0:
        iiif_manifest_url = '/record/{0}/iiif/manifest.json'.format(
            provider.pid.pid_value)
    deposit_dict = record_dict['_deposit']
    deposit_dict['iiif_manifest'] = iiif_manifest_url

    data = {
        'pid_value': provider.pid.pid_value,
        '_deposit': deposit_dict,
        '_files': files_meta,
    }

    # from invenio_records_files.api import Record as RecordFile
    record = RecordFile.create(data, id_=rec_uuid)

    # connect to record and bucket
    db.session.add(RecordsBuckets(
        record_id=record.id,
        bucket_id=bucket.id,
    ))
    db.session.commit()
Esempio n. 11
0
def datasets(skip_files):
    """Load demo datasets records."""
    from invenio_db import db
    from invenio_records_files.api import Record
    from invenio_indexer.api import RecordIndexer
    from cernopendata.modules.records.minters.recid import \
        cernopendata_recid_minter
    from cernopendata.modules.records.minters.datasetid import \
        cernopendata_datasetid_minter

    from invenio_files_rest.models import \
        Bucket, FileInstance, ObjectVersion
    from invenio_records_files.models import RecordsBuckets

    indexer = RecordIndexer()
    schema = current_app.extensions['invenio-jsonschemas'].path_to_url(
        'records/datasets-v1.0.0.json')
    data = pkg_resources.resource_filename('cernopendata',
                                           'modules/fixtures/data/datasets')
    datasets_json = glob.glob(os.path.join(data, '*.json'))

    # FIXME: change the treatment of `files` according to `records` fixtures.
    for filename in datasets_json:

        click.echo('Loading datasets from {0} ...'.format(filename))

        with open(filename, 'rb') as source:
            for data in json.load(source):
                files = data.pop('files', [])

                id = uuid.uuid4()
                # (TOFIX) Remove if statement in production
                # as every dataset record should have a doi
                if data.get('doi', None):
                    cernopendata_datasetid_minter(id, data)
                else:
                    cernopendata_recid_minter(id, data)
                data['$schema'] = schema
                record = Record.create(data, id_=id)

                bucket = Bucket.create()
                RecordsBuckets.create(record=record.model, bucket=bucket)

                for file in files:
                    if skip_files:
                        break
                    assert 'uri' in file
                    assert 'size' in file
                    assert 'checksum' in file

                    f = FileInstance.create()
                    filename = file.get("uri").split('/')[-1:][0]
                    f.set_uri(file.get("uri"), file.get("size"),
                              file.get("checksum"))

                    ObjectVersion.create(bucket, filename, _file_id=f.id)
                db.session.commit()
                indexer.index(record)
                db.session.expunge_all()
Esempio n. 12
0
def create_doc(data, schema):
    """Creates a new doc record."""
    from invenio_records import Record
    id = uuid.uuid4()
    cernopendata_docid_minter(id, data)
    data['$schema'] = schema
    record = Record.create(data, id_=id)
    return record
Esempio n. 13
0
def minted_record(app, db):
    """Create a test record."""
    data = {'title': 'fuu'}
    with db.session.begin_nested():
        rec_uuid = uuid.uuid4()
        pid = current_pidstore.minters['recid'](rec_uuid, data)
        record = Record.create(data, id_=rec_uuid)
    return pid, record
Esempio n. 14
0
def create_doc(data, schema):
    """Creates a new doc record."""
    from invenio_records import Record
    id = uuid.uuid4()
    cernopendata_docid_minter(id, data)
    data['$schema'] = schema
    record = Record.create(data, id_=id)
    return record
Esempio n. 15
0
def data_policies(skip_files):
    """Load demo Data Policy records."""
    from invenio_db import db
    from invenio_indexer.api import RecordIndexer
    from cernopendata.modules.records.minters.recid import \
        cernopendata_recid_minter

    from invenio_files_rest.models import \
        Bucket, FileInstance, ObjectVersion
    from invenio_records_files.models import RecordsBuckets
    from invenio_records_files.api import Record

    from invenio_records.models import RecordMetadata

    indexer = RecordIndexer()
    schema = current_app.extensions['invenio-jsonschemas'].path_to_url(
        'records/data-policies-v1.0.0.json'
    )
    data = pkg_resources.resource_filename('cernopendata',
                                           'modules/fixtures/data')
    data_policies_json = glob.glob(os.path.join(data, '*.json'))

    for filename in data_policies_json:

        click.echo('Loading data-policies from {0} ...'.format(filename))

        with open(filename, 'rb') as source:
            for data in json.load(source):
                files = data.pop('files', [])

                id = uuid.uuid4()
                cernopendata_recid_minter(id, data)
                data['$schema'] = schema
                record = Record.create(data, id_=id)

                bucket = Bucket.create()
                RecordsBuckets.create(
                    record=record.model, bucket=bucket)

                for file in files:
                    if skip_files:
                        break
                    assert 'uri' in file
                    assert 'size' in file
                    assert 'checksum' in file

                    f = FileInstance.create()
                    filename = file.get("uri").split('/')[-1:][0]
                    f.set_uri(file.get("uri"), file.get(
                        "size"), file.get("checksum"))
                    ObjectVersion.create(
                        bucket,
                        filename,
                        _file_id=f.id
                    )
                db.session.commit()
                indexer.index(record)
                db.session.expunge_all()
def test_file_download_ui(app, objects):
    """Test get buckets."""
    app.config.update(dict(
        FILES_REST_PERMISSION_FACTORY=lambda *a, **kw: type(
            'Allow', (object, ), {'can': lambda self: True}
        )(),
        RECORDS_UI_DEFAULT_PERMISSION_FACTORY=None,  # No permission checking
        RECORDS_UI_ENDPOINTS=dict(
            recid=dict(
                pid_type='recid',
                route='/records/<pid_value>',
            ),
            recid_files=dict(
                pid_type='recid',
                route='/records/<pid_value>/files/<filename>',
                view_imp='invenio_records_files.utils:file_download_ui',
                record_class='invenio_records_files.api:Record',
            ),
        )
    ))
    InvenioRecordsUI(app)

    obj1 = objects[0]

    with app.test_request_context():
        # Record 1 - Live record
        rec_uuid = uuid.uuid4()
        PersistentIdentifier.create(
            'recid', '1', object_type='rec', object_uuid=rec_uuid,
            status=PIDStatus.REGISTERED)
        record = Record.create({
            'title': 'Registered',
            'recid': 1,
            '_files': [
                {'key': obj1.key, 'bucket': str(obj1.bucket_id),
                 'checksum': 'invalid'},
            ]
        }, id_=rec_uuid)
        RecordsBuckets.create(record=record.model, bucket=obj1.bucket)
        db.session.commit()

        main_url = url_for('invenio_records_ui.recid', pid_value='1')
        file_url = url_for(
            'invenio_records_ui.recid_files', pid_value='1', filename=obj1.key)
        no_file_url = url_for(
            'invenio_records_ui.recid_files', pid_value='1', filename='')
        invalid_file_url = url_for(
            'invenio_records_ui.recid_files', pid_value='1', filename='no')

    with app.test_client() as client:
        res = client.get(main_url)
        assert res.status_code == 200
        res = client.get(file_url)
        assert res.status_code == 200
        res = client.get(no_file_url)
        assert res.status_code == 404
        res = client.get(invalid_file_url)
        assert res.status_code == 404
Esempio n. 17
0
def data_policies(skip_files):
    """Load demo Data Policy records."""
    from invenio_db import db
    from invenio_indexer.api import RecordIndexer
    from cernopendata.modules.records.minters.recid import \
        cernopendata_recid_minter

    from invenio_files_rest.models import \
        Bucket, FileInstance, ObjectVersion
    from invenio_records_files.models import RecordsBuckets
    from invenio_records_files.api import Record

    from invenio_records.models import RecordMetadata

    indexer = RecordIndexer()
    schema = current_app.extensions['invenio-jsonschemas'].path_to_url(
        'records/data-policies-v1.0.0.json'
    )
    data = pkg_resources.resource_filename('cernopendata',
                                           'modules/fixtures/data')
    data_policies_json = glob.glob(os.path.join(data, '*.json'))

    for filename in data_policies_json:

        click.echo('Loading data-policies from {0} ...'.format(filename))

        with open(filename, 'rb') as source:
            for data in json.load(source):
                files = data.pop('files', [])

                id = uuid.uuid4()
                cernopendata_recid_minter(id, data)
                data['$schema'] = schema
                record = Record.create(data, id_=id)

                bucket = Bucket.create()
                RecordsBuckets.create(
                    record=record.model, bucket=bucket)

                for file in files:
                    if skip_files:
                        break
                    assert 'uri' in file
                    assert 'size' in file
                    assert 'checksum' in file

                    f = FileInstance.create()
                    filename = file.get("uri").split('/')[-1:][0]
                    f.set_uri(file.get("uri"), file.get(
                        "size"), file.get("checksum"))
                    ObjectVersion.create(
                        bucket,
                        filename,
                        _file_id=f.id
                    )
                db.session.commit()
                indexer.index(record)
                db.session.expunge_all()
Esempio n. 18
0
def record_with_bucket(full_record, bucket, db):
    """Create a bucket."""
    record = RecordFile.create(full_record)
    RecordsBuckets.create(bucket=bucket, record=record.model)
    pid = PersistentIdentifier.create(
        pid_type='recid', pid_value=12345, object_type='rec',
        object_uuid=record.id, status='R')
    db.session.commit()
    return pid, record
def record(app, db):
    """Create a record."""
    record = {
        'title': 'fuu'
    }
    record = Record.create(record)
    record.commit()
    db.session.commit()
    return record
Esempio n. 20
0
def create_record(schema, data, files, skip_files):
    """Creates a new record."""
    id = uuid.uuid4()
    cernopendata_recid_minter(id, data)
    data['$schema'] = schema
    record = Record.create(data, id_=id, with_bucket=not skip_files)
    if not skip_files:
        handle_record_files(data, record.bucket, files, skip_files)

    return record
Esempio n. 21
0
def test_RecordSIP_create(db, mocker):
    """Test create method from the API class RecordSIP."""
    # we setup a file storage
    tmppath = tempfile.mkdtemp()
    db.session.add(Location(name='default', uri=tmppath, default=True))
    # setup metadata
    mtype = SIPMetadataType(title='JSON Test', name='json-test',
                            format='json', schema='url://to/schema')
    db.session.add(mtype)
    db.session.commit()
    # first we create a record
    recid = uuid.uuid4()
    pid = PersistentIdentifier.create(
        'recid',
        '1337',
        object_type='rec',
        object_uuid=recid,
        status=PIDStatus.REGISTERED)
    mocker.patch('invenio_records.api.RecordBase.validate',
                 return_value=True, autospec=True)
    record = Record.create(
        {'title': 'record test', '$schema': 'url://to/schema'},
        recid)
    # we add a file to the record
    bucket = Bucket.create()
    content = b'Test file\n'
    RecordsBuckets.create(record=record.model, bucket=bucket)
    record.files['test.txt'] = BytesIO(content)
    db.session.commit()
    # Let's create a SIP
    user = create_test_user('*****@*****.**')
    agent = {'email': '*****@*****.**', 'ip_address': '1.1.1.1'}
    rsip = RecordSIP.create(pid, record, True, user_id=user.id, agent=agent)
    db.session.commit()
    # test!
    assert RecordSIP_.query.count() == 1
    assert SIP_.query.count() == 1
    assert SIPFile.query.count() == 1
    assert SIPMetadata.query.count() == 1
    assert len(rsip.sip.files) == 1
    assert len(rsip.sip.metadata) == 1
    metadata = rsip.sip.metadata[0]
    assert metadata.type.format == 'json'
    assert '"title": "record test"' in metadata.content
    assert rsip.sip.archivable is True
    # we try with no files
    rsip = RecordSIP.create(pid, record, True, create_sip_files=False,
                            user_id=user.id, agent=agent)
    assert SIPFile.query.count() == 1
    assert SIPMetadata.query.count() == 2
    assert len(rsip.sip.files) == 0
    assert len(rsip.sip.metadata) == 1
    # finalization
    rmtree(tmppath)
Esempio n. 22
0
def record(db):
    """Record fixture."""
    rec_uuid = uuid.uuid4()
    provider = RecordIdProvider.create(
        object_type='rec', object_uuid=rec_uuid)
    record = Record.create({
        'control_number': provider.pid.pid_value,
        'title': 'TestDefault',
    }, id_=rec_uuid)
    db.session.commit()
    return record
Esempio n. 23
0
    def publish(self, pid=None, id_=None):
        """Publish a deposit."""
        pid = pid or self.pid

        if not pid.is_registered():
            raise PIDInvalidAction()

        self['_deposit']['status'] = 'published'

        if self['_deposit'].get('pid') is None:  # First publishing
            minter = current_pidstore.minters[
                current_app.config['DEPOSIT_PID_MINTER']
            ]
            id_ = id_ or uuid.uuid4()
            record_pid = minter(id_, self)

            self['_deposit']['pid'] = {
                'type': record_pid.pid_type, 'value': record_pid.pid_value,
                'revision_id': 0,
            }

            data = dict(self.dumps())
            data['$schema'] = self.record_schema

            # During first publishing create snapshot the bucket.
            @contextmanager
            def process_files(data):
                """Process deposit files."""
                if self.files and self.files.bucket:
                    assert not self.files.bucket.locked
                    self.files.bucket.locked = True
                    snapshot = self.files.bucket.snapshot(lock=True)
                    data['_files'] = self.files.dumps(bucket=snapshot.id)
                    yield data
                    db.session.add(RecordsBuckets(
                        record_id=id_, bucket_id=snapshot.id
                    ))
                else:
                    yield data

            with process_files(data) as data:
                record = Record.create(data, id_=id_)
        else:  # Update after edit
            record_pid, record = self.fetch_published()
            # TODO add support for patching
            assert record.revision_id == self['_deposit']['pid']['revision_id']

            data = dict(self.dumps())
            data['$schema'] = self.record_schema
            record = record.__class__(data, model=record.model)
            record.commit()

        self.commit()
        return self
Esempio n. 24
0
def record_with_bucket(full_record, bucket, db):
    """Create a bucket."""
    record = RecordFile.create(full_record)
    RecordsBuckets.create(bucket=bucket, record=record.model)
    pid = PersistentIdentifier.create(pid_type='recid',
                                      pid_value=1,
                                      object_type='rec',
                                      object_uuid=record.id,
                                      status='R')
    db.session.commit()
    return pid, record
Esempio n. 25
0
def create_record(schema, data, files, skip_files):
    """Creates a new record."""
    id = uuid.uuid4()
    cernopendata_recid_minter(id, data)
    data['$schema'] = schema
    record = Record.create(data, id_=id)
    if not skip_files:
        bucket = Bucket.create()
        handle_record_files(data, bucket, files, skip_files)
        RecordsBuckets.create(record=record.model, bucket=bucket)

    return record
Esempio n. 26
0
def create_record(schema, data, files, skip_files):
    """Creates a new record."""
    id = uuid.uuid4()
    cernopendata_recid_minter(id, data)
    data['$schema'] = schema
    record = Record.create(data, id_=id)
    if not skip_files:
        bucket = Bucket.create()
        handle_record_files(data, bucket, files, skip_files)
        RecordsBuckets.create(
            record=record.model, bucket=bucket)

    return record
Esempio n. 27
0
 def _create_deposit(cls, record):
     """Create a deposit from the record."""
     logging.debug('Create deposit')
     data = deepcopy(record)
     cls._resolve_schema(deposit=data, record=record)
     deposit = Record.create(data, validator=PartialDraft4Validator)
     cls._resolve_deposit(deposit=deposit, record=record)
     cls._resolve_bucket(deposit=deposit, record=record)
     cls._resolve_files(deposit=deposit, record=record)
     # generate files list
     cls._resolve_dumps(record=record)
     #  db.session.commit()
     return record, deposit
Esempio n. 28
0
def test_filesmixin(app, db, location, record):
    """Test bucket creation and assignment."""
    class CustomFilesMixin(FilesMixin):
        def _create_bucket(self):
            return Bucket.create()

    class CustomRecord(Record, CustomFilesMixin):
        pass

    record = CustomRecord.create({})
    assert record.files is not None

    record = Record.create({})
    assert record.files is None
Esempio n. 29
0
def draft_record(app, db, prepare_es, s3_location):
    """Testing draft-enabled record."""
    draft_uuid = uuid.uuid4()
    data = {
        'title': 'blah',
        # '$schema': TestRecord.PREFERRED_SCHEMA,
        'id': '1'
    }
    PersistentIdentifier.create(
        pid_type='drecid', pid_value='1', status=PIDStatus.REGISTERED,
        object_type='rec', object_uuid=draft_uuid
    )
    rec = Record.create(data, id_=draft_uuid)
    return rec
Esempio n. 30
0
def test_filesmixin(app, db, location, record):
    """Test bucket creation and assignment."""
    class CustomFilesMixin(FilesMixin):
        def _create_bucket(self):
            return Bucket.create()

    class CustomRecord(Record, CustomFilesMixin):
        pass

    record = CustomRecord.create({})
    assert record.files is not None

    record = Record.create({})
    assert record.files is None
Esempio n. 31
0
def software(skip_files):
    """Load demo software records."""
    from invenio_db import db
    from invenio_records_files.api import Record
    from invenio_indexer.api import RecordIndexer
    from cernopendata.modules.records.minters.softid import \
        cernopendata_softid_minter

    from invenio_files_rest.models import \
        Bucket, FileInstance, ObjectVersion
    from invenio_records_files.models import RecordsBuckets

    indexer = RecordIndexer()
    schema = current_app.extensions['invenio-jsonschemas'].path_to_url(
        'records/software-v1.0.0.json')
    data = pkg_resources.resource_filename('cernopendata',
                                           'modules/fixtures/data/software')
    software_json = glob.glob(os.path.join(data, '*.json'))

    # FIXME: change the treatment of `files` according to `records` fixtures.
    for filename in software_json:
        with open(filename, 'rb') as source:
            for data in json.load(source):
                files = data.pop('files', [])

                id = uuid.uuid4()
                cernopendata_softid_minter(id, data)
                record = Record.create(data, id_=id)
                record['$schema'] = schema
                bucket = Bucket.create()
                RecordsBuckets.create(record=record.model, bucket=bucket)

                for file in files:
                    if skip_files:
                        break
                    assert 'uri' in file
                    assert 'size' in file
                    assert 'checksum' in file

                    f = FileInstance.create()
                    filename = file.get("uri").split('/')[-1:][0]
                    f.set_uri(file.get("uri"), file.get("size"),
                              file.get("checksum"))
                    ObjectVersion.create(bucket, filename, _file_id=f.id)
                db.session.commit()
                indexer.index(record)
                db.session.expunge_all()
Esempio n. 32
0
def docs():
    """Load demo article records."""
    from invenio_db import db
    from invenio_records import Record
    from invenio_indexer.api import RecordIndexer
    from cernopendata.modules.records.minters.docid import \
        cernopendata_docid_minter

    indexer = RecordIndexer()
    schema = current_app.extensions['invenio-jsonschemas'].path_to_url(
        'records/docs-v1.0.0.json')
    data = pkg_resources.resource_filename('cernopendata',
                                           'modules/fixtures/data/docs')

    articles_json = get_jsons_from_dir(data)

    for filename in articles_json:
        name = filename.split('/')[-1]
        if name.startswith('opera'):
            click.echo('Skipping opera records ...')
            continue
        with open(filename, 'rb') as source:
            for data in json.load(source):

                # Replace body with responding content
                assert data["body"]["content"]
                content_filename = os.path.join(*([
                    "/",
                ] + filename.split('/')[:-1] + [
                    data["body"]["content"],
                ]))

                with open(content_filename) as body_field:
                    data["body"]["content"] = body_field.read()
                if "collections" not in data and \
                    not isinstance(
                        data.get("collections", None), basestring):
                    data["collections"] = []
                id = uuid.uuid4()
                cernopendata_docid_minter(id, data)
                record = Record.create(data, id_=id)
                record['$schema'] = schema
                db.session.commit()
                indexer.index(record)
                db.session.expunge_all()
def test_files_protection(app, db, location):
    """Test record files property protection."""
    record = Record.create({})

    bucket = record.files.bucket
    assert bucket

    # Create first file:
    record.files['hello.txt'] = BytesIO(b'Hello world!')

    file_0 = record.files['hello.txt']
    assert 'hello.txt' == file_0['key']
    assert 1 == len(record.files)

    # Lock bucket.
    bucket.locked = True

    assert record.files.bucket.locked
    with pytest.raises(InvalidOperationError):
        del record.files['hello.txt']
def test_files_protection(app, db, location):
    """Test record files property protection."""
    record = Record.create({})

    bucket = record.files.bucket
    assert bucket

    # Create first file:
    record.files['hello.txt'] = BytesIO(b'Hello world!')

    file_0 = record.files['hello.txt']
    assert 'hello.txt' == file_0['key']
    assert 1 == len(record.files)

    # Lock bucket.
    bucket.locked = True

    assert record.files.bucket.locked
    with pytest.raises(InvalidOperationError):
        del record.files['hello.txt']
Esempio n. 35
0
def fixtures():
    """Command for working with test data."""
    temp_path = os.path.join(os.path.dirname(__file__), 'instance/temp')
    demo_files_path = os.path.join(os.path.dirname(__file__), 'demo_files')

    # Create location
    loc = Location(name='local', uri=temp_path, default=True)
    db.session.add(loc)
    db.session.commit()

    # Example files from the data folder
    demo_files = (
        'markdown.md',
        'csvfile.csv',
        'zipfile.zip',
        'jsonfile.json',
        'xmlfile.xml',
        'notebook.ipynb',
        'pdffile.pdf',
        'jpgfile.jpg',
        'pngfile.png',
        'pdffile.pdf',
    )

    rec_uuid = uuid4()
    provider = RecordIdProvider.create(object_type='rec', object_uuid=rec_uuid)
    data = {
        'pid_value': provider.pid.pid_value,
    }

    record = Record.create(data, id_=rec_uuid)
    bucket = Bucket.create()
    RecordsBuckets.create(record=record.model, bucket=bucket)

    # Add files to the record
    for f in demo_files:
        with open(os.path.join(demo_files_path, f), 'rb') as fp:
            record.files[f] = fp
    record.files.flush()
    record.commit()
    db.session.commit()
Esempio n. 36
0
def test_RecordSIP(db):
    """Test RecordSIP API class."""
    user = create_test_user('*****@*****.**')
    agent = {'email': '*****@*****.**', 'ip_address': '1.1.1.1'}
    # we create a record
    recid = uuid.uuid4()
    pid = PersistentIdentifier.create('recid',
                                      '1337',
                                      object_type='rec',
                                      object_uuid=recid,
                                      status=PIDStatus.REGISTERED)
    title = {'title': 'record test'}
    record = Record.create(title, recid)
    # we create the models
    sip = SIP.create(True, user_id=user.id, agent=agent)
    recordsip = RecordSIP_(sip_id=sip.id, pid_id=pid.id)
    db.session.commit()
    # We create an API SIP on top of it
    api_recordsip = RecordSIP(recordsip, sip)
    assert api_recordsip.model is recordsip
    assert api_recordsip.sip.id == sip.id
Esempio n. 37
0
def test_RecordSIP(db):
    """Test RecordSIP API class."""
    user = create_test_user('*****@*****.**')
    agent = {'email': '*****@*****.**', 'ip_address': '1.1.1.1'}
    # we create a record
    recid = uuid.uuid4()
    pid = PersistentIdentifier.create(
        'recid',
        '1337',
        object_type='rec',
        object_uuid=recid,
        status=PIDStatus.REGISTERED)
    title = {'title': 'record test'}
    record = Record.create(title, recid)
    # we create the models
    sip = SIP.create(True, user_id=user.id, agent=agent)
    recordsip = RecordSIP_(sip_id=sip.id, pid_id=pid.id)
    db.session.commit()
    # We create an API SIP on top of it
    api_recordsip = RecordSIP(recordsip, sip)
    assert api_recordsip.model is recordsip
    assert api_recordsip.sip.id == sip.id
Esempio n. 38
0
def store_record(obj, eng):
    """Stores record in database"""
    set_springer_source_if_needed(obj)

    try:
        record = Record.create(obj.data, id_=None)

        # Create persistent identifier.
        scoap3_recid_minter(str(record.id), record)
        obj.save()
        record.commit()

        # Commit to DB before indexing
        db.session.commit()
        obj.data['control_number'] = record['control_number']
        obj.save()

    except ValidationError as err:
        __halt_and_notify("Validation error: %s." % err, eng)

    except PIDAlreadyExists:
        __halt_and_notify("Record with this id already in DB", eng)
Esempio n. 39
0
    def create_record(cls, data, file_content):
        """Create a record.

        :param dict data: The record data.
        :param file_content: The file to store.
        """
        with db.session.begin_nested():

            # create uuid
            rec_uuid = uuid.uuid4()
            # create PID
            current_pidstore.minters['recid'](rec_uuid, data)
            # create record and the associated bucket
            created_record = Record.create(data, id_=rec_uuid)
            # index the record
            RecordIndexer().index(created_record)
            # store the file and link it to the metadata
            created_record.files[file_content.filename] = file_content

        db.session.commit()

        current_app.logger.info("Created file= " + created_record['title'] +
                                ", by user= " + current_user.email)
Esempio n. 40
0
def record(database, location, image_path):
    """Test record."""
    rec_uuid = uuid.uuid4()
    pid1 = PersistentIdentifier.create(
        'recid',
        '1',
        object_type='rec',
        object_uuid=rec_uuid,
        status=PIDStatus.REGISTERED,
    )

    rec = Record.create(
        {
            'id': 1,
            'title': 'Lorem ipsum',
            'description': 'Lorem ipsum dolor sit amet',
        },
        id_=rec_uuid,
    )
    with open(image_path, 'rb') as fp:
        rec.files['image-public-domain.jpg'] = fp
    database.session.commit()
    return rec
Esempio n. 41
0
def create_record(schema, data, files, skip_files):
    """Creates a new record."""
    bucket = Bucket.create()

    for file in files:
        if skip_files:
            break
        assert 'uri' in file
        assert 'size' in file
        assert 'checksum' in file

        try:
            f = FileInstance.create()
            filename = file.get("uri").split('/')[-1:][0]
            f.set_uri(file.get("uri"), file.get("size"), file.get("checksum"))
            obj = ObjectVersion.create(bucket, filename, _file_id=f.id)

            file.update({
                'bucket': str(obj.bucket_id),
                'checksum': obj.file.checksum,
                'key': obj.key,
                'version_id': str(obj.version_id),
            })

        except Exception as e:
            click.echo('Recid {0} file {1} could not be loaded due '
                       'to {2}.'.format(data.get('recid'), filename, str(e)))
            continue

    id = uuid.uuid4()
    cernopendata_recid_minter(id, data)
    record = Record.create(data, id_=id)
    record['$schema'] = schema
    RecordsBuckets.create(record=record.model, bucket=bucket)

    return record
Esempio n. 42
0
def store_record(obj, eng):
    """Stores record in database"""
    set_springer_source_if_needed(obj)

    obj.data['record_creation_year'] = parse_date(obj.data['record_creation_date']).year

    try:
        record = Record.create(obj.data, id_=None)

        # Create persistent identifier.
        scoap3_recid_minter(str(record.id), record)
        obj.save()
        record.commit()

        # Commit to DB before indexing
        db.session.commit()
        obj.data['control_number'] = record['control_number']
        obj.save()

    except ValidationError as err:
        __halt_and_notify("Validation error: %s." % err, eng)

    except PIDAlreadyExists:
        __halt_and_notify("Record with this id already in DB", eng)
Esempio n. 43
0
def test_files_property(app, db, location, bucket):
    """Test record files property."""
    with pytest.raises(MissingModelError):
        Record({}).files

    record = Record.create({})
    RecordsBuckets.create(bucket=bucket, record=record.model)

    assert 0 == len(record.files)
    assert 'invalid' not in record.files
    # make sure that _files key is not added after accessing record.files
    assert '_files' not in record

    with pytest.raises(KeyError):
        record.files['invalid']

    bucket = record.files.bucket
    assert bucket

    # Create first file:
    record.files['hello.txt'] = BytesIO(b'Hello world!')

    file_0 = record.files['hello.txt']
    assert 'hello.txt' == file_0['key']
    assert 1 == len(record.files)
    assert 1 == len(record['_files'])

    # Update first file with new content:
    record.files['hello.txt'] = BytesIO(b'Hola mundo!')
    file_1 = record.files['hello.txt']
    assert 'hello.txt' == file_1['key']
    assert 1 == len(record.files)
    assert 1 == len(record['_files'])

    assert file_0['version_id'] != file_1['version_id']

    # Create second file and check number of items in files.
    record.files['second.txt'] = BytesIO(b'Second file.')
    record.files['second.txt']
    assert 2 == len(record.files)
    assert 'hello.txt' in record.files
    assert 'second.txt' in record.files

    # Check order of files.
    order_0 = [f['key'] for f in record.files]
    assert ['hello.txt', 'second.txt'] == order_0

    record.files.sort_by(*reversed(order_0))
    order_1 = [f['key'] for f in record.files]
    assert ['second.txt', 'hello.txt'] == order_1

    # Try to rename second file to 'hello.txt'.
    with pytest.raises(Exception):
        record.files.rename('second.txt', 'hello.txt')

    # Remove the 'hello.txt' file.
    del record.files['hello.txt']
    assert 'hello.txt' not in record.files
    # Make sure that 'second.txt' is still there.
    assert 'second.txt' in record.files

    with pytest.raises(KeyError):
        del record.files['hello.txt']

    # Now you can rename 'second.txt' to 'hello.txt'.
    record.files.rename('second.txt', 'hello.txt')
    assert 'second.txt' not in record.files
    assert 'hello.txt' in record.files
Esempio n. 44
0
def datasets(skip_files):
    """Load demo datasets records."""
    from invenio_db import db
    from invenio_records_files.api import Record
    from invenio_indexer.api import RecordIndexer
    from cernopendata.modules.records.minters.recid import \
        cernopendata_recid_minter
    from cernopendata.modules.records.minters.datasetid import \
        cernopendata_datasetid_minter

    from invenio_files_rest.models import \
        Bucket, FileInstance, ObjectVersion
    from invenio_records_files.models import RecordsBuckets

    indexer = RecordIndexer()
    schema = current_app.extensions['invenio-jsonschemas'].path_to_url(
        'records/datasets-v1.0.0.json'
    )
    data = pkg_resources.resource_filename('cernopendata',
                                           'modules/fixtures/data/datasets')
    datasets_json = glob.glob(os.path.join(data, '*.json'))

    # FIXME: change the treatment of `files` according to `records` fixtures.
    for filename in datasets_json:

        click.echo('Loading datasets from {0} ...'.format(filename))

        with open(filename, 'rb') as source:
            for data in json.load(source):
                files = data.pop('files', [])

                id = uuid.uuid4()
                # (TOFIX) Remove if statement in production
                # as every dataset record should have a doi
                if data.get('doi', None):
                    cernopendata_datasetid_minter(id, data)
                else:
                    cernopendata_recid_minter(id, data)
                data['$schema'] = schema
                record = Record.create(data, id_=id)

                bucket = Bucket.create()
                RecordsBuckets.create(
                    record=record.model, bucket=bucket)

                for file in files:
                    if skip_files:
                        break
                    assert 'uri' in file
                    assert 'size' in file
                    assert 'checksum' in file

                    f = FileInstance.create()
                    filename = file.get("uri").split('/')[-1:][0]
                    f.set_uri(file.get("uri"), file.get(
                        "size"), file.get("checksum"))

                    ObjectVersion.create(
                        bucket,
                        filename,
                        _file_id=f.id
                    )
                db.session.commit()
                indexer.index(record)
                db.session.expunge_all()
Esempio n. 45
0
def create_fake_record():
    """Create records for demo purposes."""
    fake = Faker()
    data_to_use = {
        "_access": {
            "metadata_restricted": False,
            "files_restricted": False
        },
        "_created_by":
        2,
        "_default_preview":
        "previewer one",
        "_internal_notes": [{
            "user":
            "******",
            "note":
            "RDM record",
            "timestamp":
            fake.iso8601(tzinfo=None, end_datetime=None),
        }],
        "_owners": [1],
        "access_right":
        "open",
        "embargo_date":
        fake.iso8601(tzinfo=None, end_datetime=None),
        "contact":
        "*****@*****.**",
        "resource_type":
        fake_resource_type(),
        "identifiers": {
            "DOI": "10.9999/rdm.9999999",
            "arXiv": "9999.99999",
        },
        "creators": [{
            "name":
            fake.name(),
            "type":
            "Personal",
            "identifiers": {
                "Orcid": "0000-0002-1825-0097",
            },
            "affiliations": [{
                "name": fake.company(),
                "identifiers": {
                    "ror": "03yrm5c26"
                }
            }]
        }],
        "titles": [{
            "title": fake.company() + "'s gallery",
            "type": "Other",
            "lang": "eng"
        }],
        "publication_date":
        fake_edtf_level_0(),
        "subjects": [{
            "subject": "Romans",
            "identifier": "subj-1",
            "scheme": "no-scheme"
        }],
        "contributors": [{
            "name":
            fake.name(),
            "type":
            "Personal",
            "identifiers": {
                "Orcid": "9999-9999-9999-9998",
            },
            "affiliations": [{
                "name": fake.company(),
                "identifiers": {
                    "ror": "03yrm5c26"
                }
            }],
            "role":
            "RightsHolder"
        }],
        "dates": [{
            # No end date to avoid computations based on start
            "start": fake.iso8601(tzinfo=None, end_datetime=None),
            "description": "Random test date",
            "type": "Other"
        }],
        "language":
        "eng",
        "related_identifiers": [{
            "identifier": "10.9999/rdm.9999988",
            "scheme": "DOI",
            "relation_type": "Requires",
            "resource_type": fake_resource_type()
        }],
        "version":
        "v0.0.1",
        "licenses": [{
            "license": "Berkeley Software Distribution 3",
            "uri": "https://opensource.org/licenses/BSD-3-Clause",
            "identifier": "BSD-3",
            "scheme": "BSD-3",
        }],
        "descriptions": [{
            "description": fake.text(max_nb_chars=3000),
            "type": "Abstract",
            "lang": "eng"
        }],
        "locations": [{
            "point": {
                "lat": str(fake.latitude()),
                "lon": str(fake.longitude())
            },
            "place":
            fake.location_on_land()[2],
            "description":
            "Random place on land for random coordinates..."
        }],
        "references": [{
            "reference_string": "Reference to something et al.",
            "identifier": "9999.99988",
            "scheme": "GRID"
        }]
    }

    # Create and index record
    rec_uuid = uuid.uuid4()
    current_pidstore.minters['recid_v2'](rec_uuid, data_to_use)
    record = Record.create(data_to_use, id_=rec_uuid)
    RecordIndexer().index(record)

    # Flush to index and database
    current_search.flush_and_refresh(index='records')
    db.session.commit()

    return record
Esempio n. 46
0
def test_missing_location(app, db):
    """Test missing location."""
    assert Record.create({}).files is None
Esempio n. 47
0
def add_record(metadata, collection, schema, force, files=[]):
    """Add record."""

    collection = Collection.query.filter(
        Collection.name == collection).first()

    if collection is None:
        return

    data, pid, recid = construct_record(
        collection, metadata, 1, {} if force else schema)
    d = current_app.config['DATADIR']

    buckets = []
    data['_files'] = []

    for file in files:
        bucket = Bucket.create(default_location=Location.get_default())
        buckets.append(bucket)

        with open(pkg_resources.resource_filename(
                'cap.modules.fixtures', os.path.join('data', 'files', file)
        ), 'rb') as fp:
            obj = ObjectVersion.create(bucket, file, stream=fp)

            data['_files'].append({
                'bucket': str(obj.bucket_id),
                'key': obj.key,
                'size': obj.file.size,
                'checksum': str(obj.file.checksum),
                'version_id': str(obj.version_id),
            })
    try:
        record = Record.create(data, id_=recid)

        for bucket in buckets:
            rb = RecordsBuckets(record_id=record.id, bucket_id=bucket.id)
            db.session.add(rb)

        # Invenio-Indexer is delegating the document inferring to
        # Invenio-Search which is analysing the string splitting by `/` and
        # using `.json` to be sure that it cans understand the mapping.
        record['$schema'] = 'mappings/{0}.json'.format(collection.name.lower())

        indexer = RecordIndexer()
        indexer.index(record)

        # Creating permission needs for the record
        action_edit_record = RecordUpdateActionNeed(str(recid))
        action_read_record = RecordReadActionNeed(str(recid))
        action_index_record = RecordIndexActionNeed(str(recid))

        # Giving index, read, write permissions to user/creator
        db.session.add(ActionUsers.allow(action_edit_record))
        db.session.add(ActionUsers.allow(action_read_record))
        db.session.add(ActionUsers.allow(action_index_record))

        db.session.commit()

        print("DONE!!!")

    except ValidationError as error:
        print("============================")
        pprint(error.message)
        pprint(error.path)
        print("============================")

        db.session.rollback()