Exemple #1
0
    def create(cls, data, id_=None, **kwargs):
        """Create a CDS deposit.

        Adds bucket creation immediately on deposit creation.
        """
        if '_deposit' not in data:
            id_ = id_ or uuid.uuid4()
            cls.deposit_minter(id_, data)
        bucket = Bucket.create(location=Location.get_by_name(
            kwargs.get('bucket_location', 'default')))
        data['_buckets'] = {'deposit': str(bucket.id)}
        data.setdefault('_cds', {})
        data['_cds'].setdefault('state', {})
        data.setdefault('keywords', [])
        data.setdefault('license', [{
            'license': 'CERN',
            'material': '',
            'url': 'http://copyright.web.cern.ch',
        }])
        if '_access' not in data:
            data.setdefault('_access', {})
        deposit = super(CDSDeposit,
                        cls).create(data,
                                    id_=id_,
                                    validator=PartialDraft4Validator)
        RecordsBuckets.create(record=deposit.model, bucket=bucket)
        return deposit
Exemple #2
0
 def link_to_record(cls, record, bucket):
     """Link a record its extra formats bucket."""
     if not record.get('_buckets', {}).get('extra_formats'):
         record.setdefault('_buckets', {})
         record['_buckets']['extra_formats'] = str(bucket.id)
         record.commit()
         RecordsBuckets.create(record=record.model, bucket=bucket)
Exemple #3
0
    def create(cls, data, id_=None):
        """Create a deposit.

        Adds bucket creation immediately on deposit creation.
        """
        bucket = Bucket.create(
            quota_size=current_app.config['ZENODO_BUCKET_QUOTA_SIZE'],
            max_file_size=current_app.config['ZENODO_MAX_FILE_SIZE'],
        )
        data['_buckets'] = {'deposit': str(bucket.id)}
        deposit = super(ZenodoDeposit, cls).create(data, id_=id_)

        RecordsBuckets.create(record=deposit.model, bucket=bucket)

        recid = PersistentIdentifier.get(
            'recid', str(data['recid']))
        conceptrecid = PersistentIdentifier.get(
            'recid', str(data['conceptrecid']))
        depid = PersistentIdentifier.get(
            'depid', str(data['_deposit']['id']))

        PIDVersioning(parent=conceptrecid).insert_draft_child(child=recid)
        RecordDraft.link(recid, depid)

        return deposit
def attach_files(obj, eng):
    if 'files' in obj.extra_data:
        recid = obj.data['control_number']
        pid = PersistentIdentifier.get('recid', recid)
        existing_record = Record.get_record(pid.object_uuid)

        if '_files' not in existing_record or not existing_record['_files']:
            bucket = Bucket.create()
            RecordsBuckets.create(record=existing_record.model, bucket=bucket)

        for file_ in obj.extra_data['files']:
            if file_['url'].startswith('http'):
                request = urllib2.Request(file_['url'],
                                          headers=file_.get('headers', {}))
                f = urllib2.urlopen(request)
            else:
                f = open(file_['url'])

            existing_record.files[file_['name']] = f
            existing_record.files[
                file_['name']]['filetype'] = file_['filetype']

        obj.save()
        existing_record.commit()
        db.session.commit()
    else:
        __halt_and_notify('No files found.', obj, eng)
    def create(cls, data, id_=None):
        """Create a deposit.

        Adds bucket creation immediately on deposit creation.
        """
        bucket = Bucket.create(
            default_location=Location.get_default()
        )

        try:
            schema = data.get("$schema", None) \
                .split('/schemas/', 1)[1]
        except (IndexError, AttributeError):
            return None

        if schema:
            _deposit_group = \
                next(
                    (depgroup
                     for dg, depgroup
                     in current_app.config.get('DEPOSIT_GROUPS').iteritems()
                     if schema in depgroup['schema']
                     ),
                    None
                )

            data["_experiment"] = _deposit_group.get("experiment", "Unknown")

        deposit = super(CAPDeposit, cls).create(data, id_=id_)

        add_owner_permissions(deposit.id)
        RecordsBuckets.create(record=deposit.model, bucket=bucket)
        return deposit
Exemple #6
0
 def link_to_record(cls, record, bucket):
     """Link a record its extra formats bucket."""
     if not record.get('_buckets', {}).get('extra_formats'):
         record.setdefault('_buckets', {})
         record['_buckets']['extra_formats'] = str(bucket.id)
         record.commit()
         RecordsBuckets.create(record=record.model, bucket=bucket)
def attach_files(obj, eng):
    if 'files' in obj.extra_data:
        recid = obj.data['control_number']
        pid = PersistentIdentifier.get('recid', recid)
        existing_record = Record.get_record(pid.object_uuid)

        if '_files' not in existing_record or not existing_record['_files']:
            bucket = Bucket.create()
            RecordsBuckets.create(record=existing_record.model, bucket=bucket)

        for file_ in obj.extra_data['files']:
            if file_['url'].startswith('http'):
                headers = file_.get('headers', {})
                data = requests_retry_session().get(file_['url'],
                                                    headers=headers)

                if data.status_code != 200:
                    __halt_and_notify(
                        "Error during acquiring files.\nHTTP status: %d\nUrl: %s\nHeaders:%s"
                        % (data.status_code, file_['url'], headers), eng)

                f = StringIO(data.content)
            else:
                f = open(file_['url'])

            existing_record.files[file_['name']] = f
            existing_record.files[
                file_['name']]['filetype'] = file_['filetype']

        obj.save()
        existing_record.commit()
        db.session.commit()
    else:
        __halt_and_notify('No files found.', eng)
Exemple #8
0
    def create(cls, data, id_=None):
        """Create a deposit.

        Adds bucket creation immediately on deposit creation.
        """
        bucket = Bucket.create(
            quota_size=current_app.config['ZENODO_BUCKET_QUOTA_SIZE'],
            max_file_size=current_app.config['ZENODO_MAX_FILE_SIZE'],
        )
        data['_buckets'] = {'deposit': str(bucket.id)}
        deposit = super(ZenodoDeposit, cls).create(data, id_=id_)

        RecordsBuckets.create(record=deposit.model, bucket=bucket)

        recid = PersistentIdentifier.get(
            'recid', str(data['recid']))
        conceptrecid = PersistentIdentifier.get(
            'recid', str(data['conceptrecid']))
        depid = PersistentIdentifier.get(
            'depid', str(data['_deposit']['id']))

        PIDVersioning(parent=conceptrecid).insert_draft_child(child=recid)
        RecordDraft.link(recid, depid)

        return deposit
Exemple #9
0
    def create(cls, data, id_=None, with_bucket=False, **kwargs):
        """Create a record and the associated buckets.

         Creates buckets:
          - ``bucket`` for files
          - ``bucket_content`` for files' extracted content.

        :param with_bucket: Create both buckets automatically on record creation if mapping allows.
        """
        bucket_content = None

        bucket_allowed = with_bucket or cls.__buckets_allowed(data)
        if bucket_allowed:
            bucket_content = cls.create_bucket(data)
            if bucket_content:
                cls.dump_bucket_content(data, bucket_content)

        record = super(CernSearchRecord,
                       cls).create(data,
                                   id_=id_,
                                   with_bucket=bucket_allowed,
                                   **kwargs)

        # Create link between record and file content bucket
        if bucket_allowed and bucket_content:
            RecordsBuckets.create(record=record.model, bucket=bucket_content)
            record._bucket_content = bucket_content

        return record
Exemple #10
0
def attach_files(obj, eng):
    if 'files' in obj.extra_data:
        recid = obj.data['control_number']
        pid = PersistentIdentifier.get('recid', recid)
        existing_record = Record.get_record(pid.object_uuid)

        if '_files' not in existing_record or not existing_record['_files']:
            bucket = Bucket.create()
            RecordsBuckets.create(record=existing_record.model, bucket=bucket)

        for file_ in obj.extra_data['files']:
            if file_['url'].startswith('http'):
                data = requests_retry_session().get(file_['url'], headers=file_.get('headers', {}))
                f = StringIO(data.content)
            else:
                f = open(file_['url'])

            existing_record.files[file_['name']] = f
            existing_record.files[file_['name']]['filetype'] = file_['filetype']

        obj.save()
        existing_record.commit()
        db.session.commit()
    else:
        __halt_and_notify('No files found.', eng)
Exemple #11
0
 def _process_files(self, record_id, data):
     """Snapshot bucket and add files in record during first publishing."""
     if self.files:
         file_uuids = set()
         for f in self.files:
             fs, path = f.file.storage()._get_fs()
             if not (fs.exists(path) and
                     f.file.verify_checksum(throws=False)):
                 file_uuids.add(str(f.file.id))
         if file_uuids:
             raise Exception('One of more files were not written to'
                             ' the storage: {}.'.format(file_uuids))
         assert not self.files.bucket.locked
         self.files.bucket.locked = True
         snapshot = self.files.bucket.snapshot(lock=True)
         data['_files'] = self.files.dumps(bucket=snapshot.id)
         data['_buckets']['record'] = str(snapshot.id)
         yield data
         db.session.add(RecordsBuckets(
             record_id=record_id, bucket_id=snapshot.id
         ))
         # Add extra_formats bucket
         if 'extra_formats' in self['_buckets']:
             db.session.add(RecordsBuckets(
                 record_id=record_id, bucket_id=self.extra_formats.bucket.id
             ))
     else:
         yield data
Exemple #12
0
def datasets(skip_files):
    """Load demo datasets records."""
    from invenio_db import db
    from invenio_records_files.api import Record
    from invenio_indexer.api import RecordIndexer
    from cernopendata.modules.records.minters.recid import \
        cernopendata_recid_minter
    from cernopendata.modules.records.minters.datasetid import \
        cernopendata_datasetid_minter

    from invenio_files_rest.models import \
        Bucket, FileInstance, ObjectVersion
    from invenio_records_files.models import RecordsBuckets

    indexer = RecordIndexer()
    schema = current_app.extensions['invenio-jsonschemas'].path_to_url(
        'records/datasets-v1.0.0.json')
    data = pkg_resources.resource_filename('cernopendata',
                                           'modules/fixtures/data/datasets')
    datasets_json = glob.glob(os.path.join(data, '*.json'))

    # FIXME: change the treatment of `files` according to `records` fixtures.
    for filename in datasets_json:

        click.echo('Loading datasets from {0} ...'.format(filename))

        with open(filename, 'rb') as source:
            for data in json.load(source):
                files = data.pop('files', [])

                id = uuid.uuid4()
                # (TOFIX) Remove if statement in production
                # as every dataset record should have a doi
                if data.get('doi', None):
                    cernopendata_datasetid_minter(id, data)
                else:
                    cernopendata_recid_minter(id, data)
                data['$schema'] = schema
                record = Record.create(data, id_=id)

                bucket = Bucket.create()
                RecordsBuckets.create(record=record.model, bucket=bucket)

                for file in files:
                    if skip_files:
                        break
                    assert 'uri' in file
                    assert 'size' in file
                    assert 'checksum' in file

                    f = FileInstance.create()
                    filename = file.get("uri").split('/')[-1:][0]
                    f.set_uri(file.get("uri"), file.get("size"),
                              file.get("checksum"))

                    ObjectVersion.create(bucket, filename, _file_id=f.id)
                db.session.commit()
                indexer.index(record)
                db.session.expunge_all()
Exemple #13
0
 def _resolve_bucket(cls, deposit, record):
     """Build bucket."""
     logging.debug('Creating new buckets, record and deposit.')
     bucket = Bucket.create(location=Location.get_by_name('videos'))
     deposit['_buckets'] = {'deposit': str(bucket.id)}
     RecordsBuckets.create(record=deposit.model, bucket=bucket)
     record['_buckets'] = deepcopy(deposit['_buckets'])
     db.session.commit()
Exemple #14
0
def data_policies(skip_files):
    """Load demo Data Policy records."""
    from invenio_db import db
    from invenio_indexer.api import RecordIndexer
    from cernopendata.modules.records.minters.recid import \
        cernopendata_recid_minter

    from invenio_files_rest.models import \
        Bucket, FileInstance, ObjectVersion
    from invenio_records_files.models import RecordsBuckets
    from invenio_records_files.api import Record

    from invenio_records.models import RecordMetadata

    indexer = RecordIndexer()
    schema = current_app.extensions['invenio-jsonschemas'].path_to_url(
        'records/data-policies-v1.0.0.json'
    )
    data = pkg_resources.resource_filename('cernopendata',
                                           'modules/fixtures/data')
    data_policies_json = glob.glob(os.path.join(data, '*.json'))

    for filename in data_policies_json:

        click.echo('Loading data-policies from {0} ...'.format(filename))

        with open(filename, 'rb') as source:
            for data in json.load(source):
                files = data.pop('files', [])

                id = uuid.uuid4()
                cernopendata_recid_minter(id, data)
                data['$schema'] = schema
                record = Record.create(data, id_=id)

                bucket = Bucket.create()
                RecordsBuckets.create(
                    record=record.model, bucket=bucket)

                for file in files:
                    if skip_files:
                        break
                    assert 'uri' in file
                    assert 'size' in file
                    assert 'checksum' in file

                    f = FileInstance.create()
                    filename = file.get("uri").split('/')[-1:][0]
                    f.set_uri(file.get("uri"), file.get(
                        "size"), file.get("checksum"))
                    ObjectVersion.create(
                        bucket,
                        filename,
                        _file_id=f.id
                    )
                db.session.commit()
                indexer.index(record)
                db.session.expunge_all()
Exemple #15
0
def data_policies(skip_files):
    """Load demo Data Policy records."""
    from invenio_db import db
    from invenio_indexer.api import RecordIndexer
    from cernopendata.modules.records.minters.recid import \
        cernopendata_recid_minter

    from invenio_files_rest.models import \
        Bucket, FileInstance, ObjectVersion
    from invenio_records_files.models import RecordsBuckets
    from invenio_records_files.api import Record

    from invenio_records.models import RecordMetadata

    indexer = RecordIndexer()
    schema = current_app.extensions['invenio-jsonschemas'].path_to_url(
        'records/data-policies-v1.0.0.json'
    )
    data = pkg_resources.resource_filename('cernopendata',
                                           'modules/fixtures/data')
    data_policies_json = glob.glob(os.path.join(data, '*.json'))

    for filename in data_policies_json:

        click.echo('Loading data-policies from {0} ...'.format(filename))

        with open(filename, 'rb') as source:
            for data in json.load(source):
                files = data.pop('files', [])

                id = uuid.uuid4()
                cernopendata_recid_minter(id, data)
                data['$schema'] = schema
                record = Record.create(data, id_=id)

                bucket = Bucket.create()
                RecordsBuckets.create(
                    record=record.model, bucket=bucket)

                for file in files:
                    if skip_files:
                        break
                    assert 'uri' in file
                    assert 'size' in file
                    assert 'checksum' in file

                    f = FileInstance.create()
                    filename = file.get("uri").split('/')[-1:][0]
                    f.set_uri(file.get("uri"), file.get(
                        "size"), file.get("checksum"))
                    ObjectVersion.create(
                        bucket,
                        filename,
                        _file_id=f.id
                    )
                db.session.commit()
                indexer.index(record)
                db.session.expunge_all()
Exemple #16
0
def clone_record_files(src_record, dst_record):
    """Create copy a record's files."""
    # NOTE `Bucket.snapshot` doesn't set `locked`
    snapshot = src_record.files.bucket.snapshot(lock=False)
    snapshot.locked = False

    RecordsBuckets.create(record=dst_record.model, bucket=snapshot)

    dst_record['_files'] = dst_record.files.dumps()
    dst_record['_buckets'] = {'deposit': str(snapshot.id)}
Exemple #17
0
    def create(cls, data, id_=None):
        """Create a deposit.

        Adds bucket creation immediately on deposit creation.
        """
        bucket = Bucket.create(default_location=Location.get_default())
        data['_buckets'] = {'deposit': str(bucket.id)}
        deposit = super(CAPDeposit, cls).create(data, id_=id_)
        RecordsBuckets.create(record=deposit.model, bucket=bucket)
        return deposit
def test_RecordSIP_create(db, mocker):
    """Test create method from the API class RecordSIP."""
    # we setup a file storage
    tmppath = tempfile.mkdtemp()
    db.session.add(Location(name='default', uri=tmppath, default=True))
    # setup metadata
    mtype = SIPMetadataType(title='JSON Test', name='json-test',
                            format='json', schema='url://to/schema')
    db.session.add(mtype)
    db.session.commit()
    # first we create a record
    recid = uuid.uuid4()
    pid = PersistentIdentifier.create(
        'recid',
        '1337',
        object_type='rec',
        object_uuid=recid,
        status=PIDStatus.REGISTERED)
    mocker.patch('invenio_records.api.RecordBase.validate',
                 return_value=True, autospec=True)
    record = Record.create(
        {'title': 'record test', '$schema': 'url://to/schema'},
        recid)
    # we add a file to the record
    bucket = Bucket.create()
    content = b'Test file\n'
    RecordsBuckets.create(record=record.model, bucket=bucket)
    record.files['test.txt'] = BytesIO(content)
    db.session.commit()
    # Let's create a SIP
    user = create_test_user('*****@*****.**')
    agent = {'email': '*****@*****.**', 'ip_address': '1.1.1.1'}
    rsip = RecordSIP.create(pid, record, True, user_id=user.id, agent=agent)
    db.session.commit()
    # test!
    assert RecordSIP_.query.count() == 1
    assert SIP_.query.count() == 1
    assert SIPFile.query.count() == 1
    assert SIPMetadata.query.count() == 1
    assert len(rsip.sip.files) == 1
    assert len(rsip.sip.metadata) == 1
    metadata = rsip.sip.metadata[0]
    assert metadata.type.format == 'json'
    assert '"title": "record test"' in metadata.content
    assert rsip.sip.archivable is True
    # we try with no files
    rsip = RecordSIP.create(pid, record, True, create_sip_files=False,
                            user_id=user.id, agent=agent)
    assert SIPFile.query.count() == 1
    assert SIPMetadata.query.count() == 2
    assert len(rsip.sip.files) == 0
    assert len(rsip.sip.metadata) == 1
    # finalization
    rmtree(tmppath)
Exemple #19
0
def create_record(schema, data, files, skip_files):
    """Creates a new record."""
    id = uuid.uuid4()
    cernopendata_recid_minter(id, data)
    data['$schema'] = schema
    record = Record.create(data, id_=id)
    if not skip_files:
        bucket = Bucket.create()
        handle_record_files(data, bucket, files, skip_files)
        RecordsBuckets.create(record=record.model, bucket=bucket)

    return record
Exemple #20
0
    def create(cls, data, id_=None):
        """Create a deposit.

        Adds bucket creation immediately on deposit creation.
        """
        bucket = Bucket.create(
            quota_size=current_app.config['ZENODO_BUCKET_QUOTA_SIZE'],
            max_file_size=current_app.config['ZENODO_MAX_FILE_SIZE'],
        )
        data['_buckets'] = {'deposit': str(bucket.id)}
        deposit = super(ZenodoDeposit, cls).create(data, id_=id_)
        RecordsBuckets.create(record=deposit.model, bucket=bucket)
        return deposit
Exemple #21
0
def create_record(schema, data, files, skip_files):
    """Creates a new record."""
    id = uuid.uuid4()
    cernopendata_recid_minter(id, data)
    data['$schema'] = schema
    record = Record.create(data, id_=id)
    if not skip_files:
        bucket = Bucket.create()
        handle_record_files(data, bucket, files, skip_files)
        RecordsBuckets.create(
            record=record.model, bucket=bucket)

    return record
Exemple #22
0
    def create(cls, data, id_=None):
        """Create a deposit.

        Adds bucket creation immediately on deposit creation.
        """
        bucket = Bucket.create(
            quota_size=current_app.config['ZENODO_BUCKET_QUOTA_SIZE'],
            max_file_size=current_app.config['ZENODO_MAX_FILE_SIZE'],
        )
        data['_buckets'] = {'deposit': str(bucket.id)}
        deposit = super(ZenodoDeposit, cls).create(data, id_=id_)
        RecordsBuckets.create(record=deposit.model, bucket=bucket)
        return deposit
Exemple #23
0
 def _resolve_files(cls, deposit, record):
     """Create files."""
     # build deposit files
     bucket = as_bucket(deposit['_buckets']['deposit'])
     # build objects/tags from marc21 metadata
     for file_ in record.get('_files', []):
         cls._resolve_file(deposit=deposit, bucket=bucket, file_=file_)
     # attach the master tag to the proper dependent files
     cls._resolve_master_tag(deposit=deposit)
     if Video.get_record_schema() == record['$schema']:
         # probe metadata from video
         cls._resolve_extracted_metadata(deposit=deposit, record=record)
         # update tag 'timestamp'
         cls._update_timestamp(deposit=deposit)
     # build a partial files dump
     cls._resolve_dumps(record=deposit)
     # snapshot them to record bucket
     snapshot = bucket.snapshot(lock=True)
     db.session.add(RecordsBuckets(
         record_id=record.id, bucket_id=snapshot.id
     ))
     if Video.get_record_schema() == record['$schema']:
         # create smil file
         cls._resolve_dumps(record=record)
         cls._resolve_smil(record=record)
         # update tag 'master'
         cls._update_tag_master(record=record)
Exemple #24
0
def create_object(bucket, record_dict):
    """Object creation inside the bucket using the file and its content."""

    rec_uuid = uuid4()
    provider = RecordIdProvider.create(object_type='rec', object_uuid=rec_uuid)

    files_meta, num_of_iiif_valid_files = generate_files_metadata(
        bucket, record_dict['_files'])

    # If there are any iiif valid image files, iiif manifest api is added on
    # record metadata.
    iiif_manifest_url = ''
    if num_of_iiif_valid_files > 0:
        iiif_manifest_url = '/record/{0}/iiif/manifest.json'.format(
            provider.pid.pid_value)
    deposit_dict = record_dict['_deposit']
    deposit_dict['iiif_manifest'] = iiif_manifest_url

    data = {
        'pid_value': provider.pid.pid_value,
        '_deposit': deposit_dict,
        '_files': files_meta,
    }

    # from invenio_records_files.api import Record as RecordFile
    record = RecordFile.create(data, id_=rec_uuid)

    # connect to record and bucket
    db.session.add(RecordsBuckets(
        record_id=record.id,
        bucket_id=bucket.id,
    ))
    db.session.commit()
def test_transfer_cp(db):
    """Test factories.transfer_cp function."""
    # first we create a record
    recid = uuid.uuid4()
    PersistentIdentifier.create(
        'recid',
        '1337',
        object_type='rec',
        object_uuid=recid,
        status=PIDStatus.REGISTERED)
    record = Record.create({'title': 'record test'}, recid)
    # we setup a file storage
    tmppath = tempfile.mkdtemp()
    db.session.add(Location(name='default', uri=tmppath, default=True))
    db.session.commit()
    # we add a file to the record
    bucket = Bucket.create()
    content = b'Aaah! A headcrab!!!\n'
    record_buckets = RecordsBuckets.create(record=record.model, bucket=bucket)
    record.files['crab.txt'] = BytesIO(content)
    # test!
    rec_dir = join(tmppath, create_accessioned_id('1337', 'recid'))
    factories.transfer_cp(record.id, tmppath)
    assert isdir(rec_dir)
    assert isfile(join(rec_dir, 'crab.txt'))
    with open(join(rec_dir, 'crab.txt'), "r") as f:
        assert f.read() == content
    # finalization
    rmtree(tmppath)
Exemple #26
0
def software(skip_files):
    """Load demo software records."""
    from invenio_db import db
    from invenio_records_files.api import Record
    from invenio_indexer.api import RecordIndexer
    from cernopendata.modules.records.minters.softid import \
        cernopendata_softid_minter

    from invenio_files_rest.models import \
        Bucket, FileInstance, ObjectVersion
    from invenio_records_files.models import RecordsBuckets

    indexer = RecordIndexer()
    schema = current_app.extensions['invenio-jsonschemas'].path_to_url(
        'records/software-v1.0.0.json')
    data = pkg_resources.resource_filename('cernopendata',
                                           'modules/fixtures/data/software')
    software_json = glob.glob(os.path.join(data, '*.json'))

    # FIXME: change the treatment of `files` according to `records` fixtures.
    for filename in software_json:
        with open(filename, 'rb') as source:
            for data in json.load(source):
                files = data.pop('files', [])

                id = uuid.uuid4()
                cernopendata_softid_minter(id, data)
                record = Record.create(data, id_=id)
                record['$schema'] = schema
                bucket = Bucket.create()
                RecordsBuckets.create(record=record.model, bucket=bucket)

                for file in files:
                    if skip_files:
                        break
                    assert 'uri' in file
                    assert 'size' in file
                    assert 'checksum' in file

                    f = FileInstance.create()
                    filename = file.get("uri").split('/')[-1:][0]
                    f.set_uri(file.get("uri"), file.get("size"),
                              file.get("checksum"))
                    ObjectVersion.create(bucket, filename, _file_id=f.id)
                db.session.commit()
                indexer.index(record)
                db.session.expunge_all()
Exemple #27
0
    def create(cls, data, id_=None):
        """Create a deposit.

        Adds bucket creation immediately on deposit creation.
        """
        bucket = Bucket.create(
            quota_size=current_app.config['WEKO_BUCKET_QUOTA_SIZE'],
            max_file_size=current_app.config['WEKO_MAX_FILE_SIZE'],
        )
        if '$schema' in data:
            data.pop('$schema')

        data['_buckets'] = {'deposit': str(bucket.id)}

        # save user_name & display name.
        if current_user and current_user.is_authenticated:
            user = UserProfile.get_by_userid(current_user.get_id())

            username = ''
            displayname = ''
            if user is not None:
                username = user._username
                displayname = user._displayname
            if '_deposit' in data:
                data['_deposit']['owners_ext'] = {
                    'username': username,
                    'displayname': displayname,
                    'email': current_user.email
                }
        deposit = super(WekoDeposit, cls).create(data, id_=id_)
        RecordsBuckets.create(record=deposit.model, bucket=bucket)

        recid = PersistentIdentifier.get('recid', str(data['_deposit']['id']))
        depid = PersistentIdentifier.get('depid', str(data['_deposit']['id']))
        p_depid = PersistentIdentifier.create('parent',
                                              'parent:recid/{0}'.format(
                                                  str(data['_deposit']['id'])),
                                              object_type='rec',
                                              object_uuid=uuid.uuid4(),
                                              status=PIDStatus.REGISTERED)

        PIDVersioning(parent=p_depid).insert_draft_child(child=recid)
        RecordDraft.link(recid, depid)

        return deposit
Exemple #28
0
    def create(cls, data, id_=None):
        """Generate a Deposit object.

        Overrides parent's `create`.

        The configured quota_size and max_file_size here are for
        public API constraints. There are no constraints
        (apart from physical volume we have available) when creating
        records differently.
        """
        max_size = 50 * 1024 * 1024 * 1024  # 50 GB
        bucket = Bucket.create(quota_size=max_size, max_file_size=max_size)
        data['_buckets'] = {'deposit': str(bucket.id)}
        # any newly created Deposit is a draft
        data['type'] = RecordType.draft.value
        deposit = super(Deposit, cls).create(data, id_=id_)
        RecordsBuckets.create(record=deposit.model, bucket=bucket)
        return deposit
    def clone(self, pid=None, id_=None):
        """Clone a deposit.

        Adds snapshot of the files when deposit is cloned.
        """
        data = copy.deepcopy(self.dumps())
        del data['_deposit']
        deposit = super(CAPDeposit, self).create(data, id_=id_)
        deposit['_deposit']['cloned_from'] = {
            'type': pid.pid_type,
            'value': pid.pid_value,
            'revision_id': self.revision_id,
        }
        bucket = self.files.bucket.snapshot()
        RecordsBuckets.create(record=deposit.model, bucket=bucket)
        # optionally we might need to do: deposit.files.flush()
        deposit.commit()
        return deposit
Exemple #30
0
def _create_records(base_metadata, total, versions, files):
    records = []
    cur_recid_val = 1
    for _ in range(total):
        conceptrecid_val = cur_recid_val
        conceptrecid = PersistentIdentifier.create('recid',
                                                   str(conceptrecid_val),
                                                   status='R')
        db.session.commit()
        versioning = PIDVersioning(parent=conceptrecid)
        for ver_idx in range(versions):
            recid_val = conceptrecid_val + ver_idx + 1
            data = deepcopy(base_metadata)
            data.update({
                'conceptrecid': str(conceptrecid_val),
                'conceptdoi': '10.1234/{}'.format(recid_val),
                'recid': recid_val,
                'doi': '10.1234/{}'.format(recid_val),
            })
            record = ZenodoRecord.create(data)
            bucket = Bucket.create()
            record['_buckets'] = {'record': str(bucket.id)}
            record.commit()
            RecordsBuckets.create(bucket=bucket, record=record.model)
            recid = PersistentIdentifier.create(pid_type='recid',
                                                pid_value=record['recid'],
                                                object_type='rec',
                                                object_uuid=record.id,
                                                status='R')
            versioning.insert_child(recid)

            file_objects = []
            for f in range(files):
                filename = 'Test{0}_v{1}.pdf'.format(f, ver_idx)
                record.files[filename] = BytesIO(b'1234567890')  # 10 bytes
                record.files[filename]['type'] = 'pdf'
                file_objects.append(record.files[filename].obj)
            record.commit()

            db.session.commit()
            records.append((recid, record, file_objects))
        cur_recid_val += versions + 1
    return records
Exemple #31
0
    def create(cls, data, id_=None):
        """Create a deposit.

        Adds bucket creation immediately on deposit creation.
        """
        if not isinstance(data, dict) or data == {}:
            raise EmptyDepositError()

        bucket = Bucket.create()

        avalaible_schemas = [x.get('schema').split('schemas/')[-1] for x in
                             current_app.config.get('DEPOSIT_GROUPS', {}).values()]

        try:
            schema = data.get("$schema", None) \
                .split('/schemas/', 1)[1]
        except (IndexError, AttributeError):
            raise WrongJSONSchemaError()

        if schema not in avalaible_schemas:
            raise WrongJSONSchemaError()

        if schema:
            _deposit_group = \
                next(
                    (depgroup
                     for dg, depgroup
                     in current_app.config.get('DEPOSIT_GROUPS').iteritems()
                     if schema in depgroup['schema']
                     ),
                    None
                )
            data["_experiment"] = _deposit_group.get(
                "experiment", "Unknown")

        deposit = super(CAPDeposit, cls).create(data, id_=id_)

        _access = add_owner_permissions(deposit)
        RecordsBuckets.create(record=deposit.model, bucket=bucket)
        if _access:
            deposit["_access"] = _access
            deposit.commit()
        return deposit
Exemple #32
0
def update_record(pid, schema, data, files, skip_files):
    """Updates the given record."""
    record = Record.get_record(pid.object_uuid)
    with db.session.begin_nested():
        if record.files and not skip_files:
            bucket_id = record.files.bucket
            bucket = Bucket.get(bucket_id.id)
            for o in ObjectVersion.get_by_bucket(bucket).all():
                o.remove()
                o.file.delete()
            RecordsBuckets.query.filter_by(record=record.model,
                                           bucket=bucket).delete()
            bucket_id.remove()
    db.session.commit()
    record.update(data)
    if not skip_files:
        bucket = Bucket.create()
        handle_record_files(data, bucket, files, skip_files)
        RecordsBuckets.create(record=record.model, bucket=bucket)
    return record
Exemple #33
0
    def create(cls, data, id_=None):
        """Create a deposit.

        Adds bucket creation immediately on deposit creation.
        """
        bucket = Bucket.create(**cls._get_bucket_settings())
        data['_buckets'] = {'deposit': str(bucket.id)}
        deposit = super(ZenodoDeposit, cls).create(data, id_=id_)

        RecordsBuckets.create(record=deposit.model, bucket=bucket)

        recid = PersistentIdentifier.get('recid', str(data['recid']))
        conceptrecid = PersistentIdentifier.get('recid',
                                                str(data['conceptrecid']))
        depid = PersistentIdentifier.get('depid', str(data['_deposit']['id']))

        PIDVersioning(parent=conceptrecid).insert_draft_child(child=recid)
        RecordDraft.link(recid, depid)

        return deposit
Exemple #34
0
def fixtures():
    """Command for working with test data."""
    temp_path = os.path.join(os.path.dirname(__file__), 'instance/temp')
    demo_files_path = os.path.join(os.path.dirname(__file__), 'demo_files')

    # Create location
    loc = Location(name='local', uri=temp_path, default=True)
    db.session.add(loc)
    db.session.commit()

    # Example files from the data folder
    demo_files = (
        'markdown.md',
        'csvfile.csv',
        'zipfile.zip',
        'jsonfile.json',
        'xmlfile.xml',
        'notebook.ipynb',
        'pdffile.pdf',
        'jpgfile.jpg',
        'pngfile.png',
        'pdffile.pdf',
    )

    rec_uuid = uuid4()
    provider = RecordIdProvider.create(object_type='rec', object_uuid=rec_uuid)
    data = {
        'pid_value': provider.pid.pid_value,
    }

    record = Record.create(data, id_=rec_uuid)
    bucket = Bucket.create()
    RecordsBuckets.create(record=record.model, bucket=bucket)

    # Add files to the record
    for f in demo_files:
        with open(os.path.join(demo_files_path, f), 'rb') as fp:
            record.files[f] = fp
    record.files.flush()
    record.commit()
    db.session.commit()
Exemple #35
0
    def create_files(cls, dump, deposit):
        """Create files."""
        # Create bucket and link to deposit.
        bucket = Bucket.create()
        db.session.add(
            RecordsBuckets(record_id=deposit.id, bucket_id=bucket.id))

        files = []
        for f in dump.files:
            files.append(cls.create_file(bucket, f))

        return bucket, files
Exemple #36
0
 def _process_files(self, record_id, data):
     """Snapshot bucket and add files in record during first publishing."""
     if self.files:
         assert not self.files.bucket.locked
         self.files.bucket.locked = True
         snapshot = self.files.bucket.snapshot(lock=True)
         data['_files'] = self.files.dumps(bucket=snapshot.id)
         yield data
         db.session.add(
             RecordsBuckets(record_id=record_id, bucket_id=snapshot.id))
     else:
         yield data
Exemple #37
0
def _create_records(base_metadata, total, versions, files):
    records = []
    cur_recid_val = 1
    for _ in range(total):
        conceptrecid_val = cur_recid_val
        conceptrecid = PersistentIdentifier.create(
            'recid', str(conceptrecid_val), status='R')
        db.session.commit()
        versioning = PIDVersioning(parent=conceptrecid)
        for ver_idx in range(versions):
            recid_val = conceptrecid_val + ver_idx + 1
            data = deepcopy(base_metadata)
            data.update({
                'conceptrecid': str(conceptrecid_val),
                'conceptdoi': '10.1234/{}'.format(recid_val),
                'recid': recid_val,
                'doi': '10.1234/{}'.format(recid_val),
            })
            record = ZenodoRecord.create(data)
            bucket = Bucket.create()
            record['_buckets'] = {'record': str(bucket.id)}
            record.commit()
            RecordsBuckets.create(bucket=bucket, record=record.model)
            recid = PersistentIdentifier.create(
                pid_type='recid', pid_value=record['recid'], object_type='rec',
                object_uuid=record.id, status='R')
            versioning.insert_child(recid)

            file_objects = []
            for f in range(files):
                filename = 'Test{0}_v{1}.pdf'.format(f, ver_idx)
                record.files[filename] = BytesIO(b'1234567890')  # 10 bytes
                record.files[filename]['type'] = 'pdf'
                file_objects.append(record.files[filename].obj)
            record.commit()

            db.session.commit()
            records.append((recid, record, file_objects))
        cur_recid_val += versions + 1
    return records
Exemple #38
0
    def _create_bucket(self, location=None, storage_class=None):
        """Create bucket and return it.

        Note:
            Overwrites base_class._create_bucket method as it is not implemented
            It can create more than one bucket for the same parameters.
            It's private method, do not use it. Instead use `get_bucket()`

        Args:
            location (Location): Bucket location object
                (default: 'RECORDS_DEFAULT_FILE_LOCATION_NAME') from config
            storage_class (str): Bucket storage class
                (default: 'RECORDS_DEFAULT_STORAGE_CLASS') from config

        Returns: Bucket for current record, selected location and storage_class
        """
        bucket = Bucket.create(location=location, storage_class=storage_class)
        RecordsBuckets.create(record=self.model, bucket=bucket)
        LOGGER.info("Created bucket",
                    uuid=self.id,
                    class_name=self.__class__.__name__)
        return bucket
Exemple #39
0
def update_record(pid, schema, data, files, skip_files):
    """Updates the given record."""
    record = Record.get_record(pid.object_uuid)
    with db.session.begin_nested():
        if record.files and not skip_files:
            bucket_id = record.files.bucket
            bucket = Bucket.get(bucket_id.id)
            for o in ObjectVersion.get_by_bucket(bucket).all():
                o.remove()
                o.file.delete()
            RecordsBuckets.query.filter_by(
                record=record.model,
                bucket=bucket
            ).delete()
            bucket_id.remove()
    db.session.commit()
    record.update(data)
    if not skip_files:
        bucket = Bucket.create()
        handle_record_files(data, bucket, files, skip_files)
        RecordsBuckets.create(
            record=record.model, bucket=bucket)
    return record
Exemple #40
0
    def newversion(self, pid=None):
        """Create a new version deposit."""
        if not self.is_published():
            raise PIDInvalidAction()

        # Check that there is not a newer draft version for this record
        pid, record = self.fetch_published()
        pv = PIDVersioning(child=pid)
        if (not pv.draft_child and
                is_doi_locally_managed(record['doi'])):
            with db.session.begin_nested():

                # Get copy of the latest record
                latest_record = ZenodoRecord.get_record(
                    pv.last_child.object_uuid)
                data = latest_record.dumps()

                # Get the communities from the last deposit
                # and push those to the new version
                latest_depid = PersistentIdentifier.get(
                    'depid', data['_deposit']['id'])
                latest_deposit = ZenodoDeposit.get_record(
                    latest_depid.object_uuid)
                last_communities = latest_deposit.get('communities', [])

                owners = data['_deposit']['owners']

                # TODO: Check other data that may need to be removed
                keys_to_remove = (
                    '_deposit', 'doi', '_oai', '_files', '_buckets', '$schema')
                for k in keys_to_remove:
                    data.pop(k, None)

                # NOTE: We call the superclass `create()` method, because we
                # don't want a new empty bucket, but an unlocked snapshot of
                # the old record's bucket.
                deposit = (super(ZenodoDeposit, self).create(data))
                # Injecting owners is required in case of creating new
                # version this outside of request context
                deposit['_deposit']['owners'] = owners
                if last_communities:
                    deposit['communities'] = last_communities

                ###
                conceptrecid = PersistentIdentifier.get(
                    'recid', data['conceptrecid'])
                recid = PersistentIdentifier.get(
                    'recid', str(data['recid']))
                depid = PersistentIdentifier.get(
                    'depid', str(data['_deposit']['id']))
                PIDVersioning(parent=conceptrecid).insert_draft_child(
                    child=recid)
                RecordDraft.link(recid, depid)

                # Pre-fill the Zenodo DOI to prevent the user from changing it
                # to a custom DOI.
                deposit['doi'] = doi_generator(recid.pid_value)

                pv = PIDVersioning(child=pid)
                index_siblings(pv.draft_child, neighbors_eager=True,
                               with_deposits=True)

                with db.session.begin_nested():
                    # Create snapshot from the record's bucket and update data
                    snapshot = latest_record.files.bucket.snapshot(lock=False)
                    snapshot.locked = False
                    if 'extra_formats' in latest_record['_buckets']:
                        extra_formats_snapshot = \
                            latest_record.extra_formats.bucket.snapshot(
                                lock=False)
                deposit['_buckets'] = {'deposit': str(snapshot.id)}
                RecordsBuckets.create(record=deposit.model, bucket=snapshot)
                if 'extra_formats' in latest_record['_buckets']:
                    deposit['_buckets']['extra_formats'] = \
                        str(extra_formats_snapshot.id)
                    RecordsBuckets.create(
                        record=deposit.model, bucket=extra_formats_snapshot)
                deposit.commit()
        return self
 def _create_buckets(self):
     bucket = Bucket.create()
     RecordsBuckets.create(record=self.model, bucket=bucket)
Exemple #42
0
def datasets(skip_files):
    """Load demo datasets records."""
    from invenio_db import db
    from invenio_records_files.api import Record
    from invenio_indexer.api import RecordIndexer
    from cernopendata.modules.records.minters.recid import \
        cernopendata_recid_minter
    from cernopendata.modules.records.minters.datasetid import \
        cernopendata_datasetid_minter

    from invenio_files_rest.models import \
        Bucket, FileInstance, ObjectVersion
    from invenio_records_files.models import RecordsBuckets

    indexer = RecordIndexer()
    schema = current_app.extensions['invenio-jsonschemas'].path_to_url(
        'records/datasets-v1.0.0.json'
    )
    data = pkg_resources.resource_filename('cernopendata',
                                           'modules/fixtures/data/datasets')
    datasets_json = glob.glob(os.path.join(data, '*.json'))

    # FIXME: change the treatment of `files` according to `records` fixtures.
    for filename in datasets_json:

        click.echo('Loading datasets from {0} ...'.format(filename))

        with open(filename, 'rb') as source:
            for data in json.load(source):
                files = data.pop('files', [])

                id = uuid.uuid4()
                # (TOFIX) Remove if statement in production
                # as every dataset record should have a doi
                if data.get('doi', None):
                    cernopendata_datasetid_minter(id, data)
                else:
                    cernopendata_recid_minter(id, data)
                data['$schema'] = schema
                record = Record.create(data, id_=id)

                bucket = Bucket.create()
                RecordsBuckets.create(
                    record=record.model, bucket=bucket)

                for file in files:
                    if skip_files:
                        break
                    assert 'uri' in file
                    assert 'size' in file
                    assert 'checksum' in file

                    f = FileInstance.create()
                    filename = file.get("uri").split('/')[-1:][0]
                    f.set_uri(file.get("uri"), file.get(
                        "size"), file.get("checksum"))

                    ObjectVersion.create(
                        bucket,
                        filename,
                        _file_id=f.id
                    )
                db.session.commit()
                indexer.index(record)
                db.session.expunge_all()