def create(cls, data, id_=None, **kwargs): """Create a CDS deposit. Adds bucket creation immediately on deposit creation. """ if '_deposit' not in data: id_ = id_ or uuid.uuid4() cls.deposit_minter(id_, data) bucket = Bucket.create(location=Location.get_by_name( kwargs.get('bucket_location', 'default'))) data['_buckets'] = {'deposit': str(bucket.id)} data.setdefault('_cds', {}) data['_cds'].setdefault('state', {}) data.setdefault('keywords', []) data.setdefault('license', [{ 'license': 'CERN', 'material': '', 'url': 'http://copyright.web.cern.ch', }]) if '_access' not in data: data.setdefault('_access', {}) deposit = super(CDSDeposit, cls).create(data, id_=id_, validator=PartialDraft4Validator) RecordsBuckets.create(record=deposit.model, bucket=bucket) return deposit
def link_to_record(cls, record, bucket): """Link a record its extra formats bucket.""" if not record.get('_buckets', {}).get('extra_formats'): record.setdefault('_buckets', {}) record['_buckets']['extra_formats'] = str(bucket.id) record.commit() RecordsBuckets.create(record=record.model, bucket=bucket)
def create(cls, data, id_=None): """Create a deposit. Adds bucket creation immediately on deposit creation. """ bucket = Bucket.create( quota_size=current_app.config['ZENODO_BUCKET_QUOTA_SIZE'], max_file_size=current_app.config['ZENODO_MAX_FILE_SIZE'], ) data['_buckets'] = {'deposit': str(bucket.id)} deposit = super(ZenodoDeposit, cls).create(data, id_=id_) RecordsBuckets.create(record=deposit.model, bucket=bucket) recid = PersistentIdentifier.get( 'recid', str(data['recid'])) conceptrecid = PersistentIdentifier.get( 'recid', str(data['conceptrecid'])) depid = PersistentIdentifier.get( 'depid', str(data['_deposit']['id'])) PIDVersioning(parent=conceptrecid).insert_draft_child(child=recid) RecordDraft.link(recid, depid) return deposit
def attach_files(obj, eng): if 'files' in obj.extra_data: recid = obj.data['control_number'] pid = PersistentIdentifier.get('recid', recid) existing_record = Record.get_record(pid.object_uuid) if '_files' not in existing_record or not existing_record['_files']: bucket = Bucket.create() RecordsBuckets.create(record=existing_record.model, bucket=bucket) for file_ in obj.extra_data['files']: if file_['url'].startswith('http'): request = urllib2.Request(file_['url'], headers=file_.get('headers', {})) f = urllib2.urlopen(request) else: f = open(file_['url']) existing_record.files[file_['name']] = f existing_record.files[ file_['name']]['filetype'] = file_['filetype'] obj.save() existing_record.commit() db.session.commit() else: __halt_and_notify('No files found.', obj, eng)
def create(cls, data, id_=None): """Create a deposit. Adds bucket creation immediately on deposit creation. """ bucket = Bucket.create( default_location=Location.get_default() ) try: schema = data.get("$schema", None) \ .split('/schemas/', 1)[1] except (IndexError, AttributeError): return None if schema: _deposit_group = \ next( (depgroup for dg, depgroup in current_app.config.get('DEPOSIT_GROUPS').iteritems() if schema in depgroup['schema'] ), None ) data["_experiment"] = _deposit_group.get("experiment", "Unknown") deposit = super(CAPDeposit, cls).create(data, id_=id_) add_owner_permissions(deposit.id) RecordsBuckets.create(record=deposit.model, bucket=bucket) return deposit
def link_to_record(cls, record, bucket): """Link a record its extra formats bucket.""" if not record.get('_buckets', {}).get('extra_formats'): record.setdefault('_buckets', {}) record['_buckets']['extra_formats'] = str(bucket.id) record.commit() RecordsBuckets.create(record=record.model, bucket=bucket)
def attach_files(obj, eng): if 'files' in obj.extra_data: recid = obj.data['control_number'] pid = PersistentIdentifier.get('recid', recid) existing_record = Record.get_record(pid.object_uuid) if '_files' not in existing_record or not existing_record['_files']: bucket = Bucket.create() RecordsBuckets.create(record=existing_record.model, bucket=bucket) for file_ in obj.extra_data['files']: if file_['url'].startswith('http'): headers = file_.get('headers', {}) data = requests_retry_session().get(file_['url'], headers=headers) if data.status_code != 200: __halt_and_notify( "Error during acquiring files.\nHTTP status: %d\nUrl: %s\nHeaders:%s" % (data.status_code, file_['url'], headers), eng) f = StringIO(data.content) else: f = open(file_['url']) existing_record.files[file_['name']] = f existing_record.files[ file_['name']]['filetype'] = file_['filetype'] obj.save() existing_record.commit() db.session.commit() else: __halt_and_notify('No files found.', eng)
def create(cls, data, id_=None): """Create a deposit. Adds bucket creation immediately on deposit creation. """ bucket = Bucket.create( quota_size=current_app.config['ZENODO_BUCKET_QUOTA_SIZE'], max_file_size=current_app.config['ZENODO_MAX_FILE_SIZE'], ) data['_buckets'] = {'deposit': str(bucket.id)} deposit = super(ZenodoDeposit, cls).create(data, id_=id_) RecordsBuckets.create(record=deposit.model, bucket=bucket) recid = PersistentIdentifier.get( 'recid', str(data['recid'])) conceptrecid = PersistentIdentifier.get( 'recid', str(data['conceptrecid'])) depid = PersistentIdentifier.get( 'depid', str(data['_deposit']['id'])) PIDVersioning(parent=conceptrecid).insert_draft_child(child=recid) RecordDraft.link(recid, depid) return deposit
def create(cls, data, id_=None, with_bucket=False, **kwargs): """Create a record and the associated buckets. Creates buckets: - ``bucket`` for files - ``bucket_content`` for files' extracted content. :param with_bucket: Create both buckets automatically on record creation if mapping allows. """ bucket_content = None bucket_allowed = with_bucket or cls.__buckets_allowed(data) if bucket_allowed: bucket_content = cls.create_bucket(data) if bucket_content: cls.dump_bucket_content(data, bucket_content) record = super(CernSearchRecord, cls).create(data, id_=id_, with_bucket=bucket_allowed, **kwargs) # Create link between record and file content bucket if bucket_allowed and bucket_content: RecordsBuckets.create(record=record.model, bucket=bucket_content) record._bucket_content = bucket_content return record
def attach_files(obj, eng): if 'files' in obj.extra_data: recid = obj.data['control_number'] pid = PersistentIdentifier.get('recid', recid) existing_record = Record.get_record(pid.object_uuid) if '_files' not in existing_record or not existing_record['_files']: bucket = Bucket.create() RecordsBuckets.create(record=existing_record.model, bucket=bucket) for file_ in obj.extra_data['files']: if file_['url'].startswith('http'): data = requests_retry_session().get(file_['url'], headers=file_.get('headers', {})) f = StringIO(data.content) else: f = open(file_['url']) existing_record.files[file_['name']] = f existing_record.files[file_['name']]['filetype'] = file_['filetype'] obj.save() existing_record.commit() db.session.commit() else: __halt_and_notify('No files found.', eng)
def _process_files(self, record_id, data): """Snapshot bucket and add files in record during first publishing.""" if self.files: file_uuids = set() for f in self.files: fs, path = f.file.storage()._get_fs() if not (fs.exists(path) and f.file.verify_checksum(throws=False)): file_uuids.add(str(f.file.id)) if file_uuids: raise Exception('One of more files were not written to' ' the storage: {}.'.format(file_uuids)) assert not self.files.bucket.locked self.files.bucket.locked = True snapshot = self.files.bucket.snapshot(lock=True) data['_files'] = self.files.dumps(bucket=snapshot.id) data['_buckets']['record'] = str(snapshot.id) yield data db.session.add(RecordsBuckets( record_id=record_id, bucket_id=snapshot.id )) # Add extra_formats bucket if 'extra_formats' in self['_buckets']: db.session.add(RecordsBuckets( record_id=record_id, bucket_id=self.extra_formats.bucket.id )) else: yield data
def datasets(skip_files): """Load demo datasets records.""" from invenio_db import db from invenio_records_files.api import Record from invenio_indexer.api import RecordIndexer from cernopendata.modules.records.minters.recid import \ cernopendata_recid_minter from cernopendata.modules.records.minters.datasetid import \ cernopendata_datasetid_minter from invenio_files_rest.models import \ Bucket, FileInstance, ObjectVersion from invenio_records_files.models import RecordsBuckets indexer = RecordIndexer() schema = current_app.extensions['invenio-jsonschemas'].path_to_url( 'records/datasets-v1.0.0.json') data = pkg_resources.resource_filename('cernopendata', 'modules/fixtures/data/datasets') datasets_json = glob.glob(os.path.join(data, '*.json')) # FIXME: change the treatment of `files` according to `records` fixtures. for filename in datasets_json: click.echo('Loading datasets from {0} ...'.format(filename)) with open(filename, 'rb') as source: for data in json.load(source): files = data.pop('files', []) id = uuid.uuid4() # (TOFIX) Remove if statement in production # as every dataset record should have a doi if data.get('doi', None): cernopendata_datasetid_minter(id, data) else: cernopendata_recid_minter(id, data) data['$schema'] = schema record = Record.create(data, id_=id) bucket = Bucket.create() RecordsBuckets.create(record=record.model, bucket=bucket) for file in files: if skip_files: break assert 'uri' in file assert 'size' in file assert 'checksum' in file f = FileInstance.create() filename = file.get("uri").split('/')[-1:][0] f.set_uri(file.get("uri"), file.get("size"), file.get("checksum")) ObjectVersion.create(bucket, filename, _file_id=f.id) db.session.commit() indexer.index(record) db.session.expunge_all()
def _resolve_bucket(cls, deposit, record): """Build bucket.""" logging.debug('Creating new buckets, record and deposit.') bucket = Bucket.create(location=Location.get_by_name('videos')) deposit['_buckets'] = {'deposit': str(bucket.id)} RecordsBuckets.create(record=deposit.model, bucket=bucket) record['_buckets'] = deepcopy(deposit['_buckets']) db.session.commit()
def data_policies(skip_files): """Load demo Data Policy records.""" from invenio_db import db from invenio_indexer.api import RecordIndexer from cernopendata.modules.records.minters.recid import \ cernopendata_recid_minter from invenio_files_rest.models import \ Bucket, FileInstance, ObjectVersion from invenio_records_files.models import RecordsBuckets from invenio_records_files.api import Record from invenio_records.models import RecordMetadata indexer = RecordIndexer() schema = current_app.extensions['invenio-jsonschemas'].path_to_url( 'records/data-policies-v1.0.0.json' ) data = pkg_resources.resource_filename('cernopendata', 'modules/fixtures/data') data_policies_json = glob.glob(os.path.join(data, '*.json')) for filename in data_policies_json: click.echo('Loading data-policies from {0} ...'.format(filename)) with open(filename, 'rb') as source: for data in json.load(source): files = data.pop('files', []) id = uuid.uuid4() cernopendata_recid_minter(id, data) data['$schema'] = schema record = Record.create(data, id_=id) bucket = Bucket.create() RecordsBuckets.create( record=record.model, bucket=bucket) for file in files: if skip_files: break assert 'uri' in file assert 'size' in file assert 'checksum' in file f = FileInstance.create() filename = file.get("uri").split('/')[-1:][0] f.set_uri(file.get("uri"), file.get( "size"), file.get("checksum")) ObjectVersion.create( bucket, filename, _file_id=f.id ) db.session.commit() indexer.index(record) db.session.expunge_all()
def data_policies(skip_files): """Load demo Data Policy records.""" from invenio_db import db from invenio_indexer.api import RecordIndexer from cernopendata.modules.records.minters.recid import \ cernopendata_recid_minter from invenio_files_rest.models import \ Bucket, FileInstance, ObjectVersion from invenio_records_files.models import RecordsBuckets from invenio_records_files.api import Record from invenio_records.models import RecordMetadata indexer = RecordIndexer() schema = current_app.extensions['invenio-jsonschemas'].path_to_url( 'records/data-policies-v1.0.0.json' ) data = pkg_resources.resource_filename('cernopendata', 'modules/fixtures/data') data_policies_json = glob.glob(os.path.join(data, '*.json')) for filename in data_policies_json: click.echo('Loading data-policies from {0} ...'.format(filename)) with open(filename, 'rb') as source: for data in json.load(source): files = data.pop('files', []) id = uuid.uuid4() cernopendata_recid_minter(id, data) data['$schema'] = schema record = Record.create(data, id_=id) bucket = Bucket.create() RecordsBuckets.create( record=record.model, bucket=bucket) for file in files: if skip_files: break assert 'uri' in file assert 'size' in file assert 'checksum' in file f = FileInstance.create() filename = file.get("uri").split('/')[-1:][0] f.set_uri(file.get("uri"), file.get( "size"), file.get("checksum")) ObjectVersion.create( bucket, filename, _file_id=f.id ) db.session.commit() indexer.index(record) db.session.expunge_all()
def clone_record_files(src_record, dst_record): """Create copy a record's files.""" # NOTE `Bucket.snapshot` doesn't set `locked` snapshot = src_record.files.bucket.snapshot(lock=False) snapshot.locked = False RecordsBuckets.create(record=dst_record.model, bucket=snapshot) dst_record['_files'] = dst_record.files.dumps() dst_record['_buckets'] = {'deposit': str(snapshot.id)}
def create(cls, data, id_=None): """Create a deposit. Adds bucket creation immediately on deposit creation. """ bucket = Bucket.create(default_location=Location.get_default()) data['_buckets'] = {'deposit': str(bucket.id)} deposit = super(CAPDeposit, cls).create(data, id_=id_) RecordsBuckets.create(record=deposit.model, bucket=bucket) return deposit
def test_RecordSIP_create(db, mocker): """Test create method from the API class RecordSIP.""" # we setup a file storage tmppath = tempfile.mkdtemp() db.session.add(Location(name='default', uri=tmppath, default=True)) # setup metadata mtype = SIPMetadataType(title='JSON Test', name='json-test', format='json', schema='url://to/schema') db.session.add(mtype) db.session.commit() # first we create a record recid = uuid.uuid4() pid = PersistentIdentifier.create( 'recid', '1337', object_type='rec', object_uuid=recid, status=PIDStatus.REGISTERED) mocker.patch('invenio_records.api.RecordBase.validate', return_value=True, autospec=True) record = Record.create( {'title': 'record test', '$schema': 'url://to/schema'}, recid) # we add a file to the record bucket = Bucket.create() content = b'Test file\n' RecordsBuckets.create(record=record.model, bucket=bucket) record.files['test.txt'] = BytesIO(content) db.session.commit() # Let's create a SIP user = create_test_user('*****@*****.**') agent = {'email': '*****@*****.**', 'ip_address': '1.1.1.1'} rsip = RecordSIP.create(pid, record, True, user_id=user.id, agent=agent) db.session.commit() # test! assert RecordSIP_.query.count() == 1 assert SIP_.query.count() == 1 assert SIPFile.query.count() == 1 assert SIPMetadata.query.count() == 1 assert len(rsip.sip.files) == 1 assert len(rsip.sip.metadata) == 1 metadata = rsip.sip.metadata[0] assert metadata.type.format == 'json' assert '"title": "record test"' in metadata.content assert rsip.sip.archivable is True # we try with no files rsip = RecordSIP.create(pid, record, True, create_sip_files=False, user_id=user.id, agent=agent) assert SIPFile.query.count() == 1 assert SIPMetadata.query.count() == 2 assert len(rsip.sip.files) == 0 assert len(rsip.sip.metadata) == 1 # finalization rmtree(tmppath)
def create_record(schema, data, files, skip_files): """Creates a new record.""" id = uuid.uuid4() cernopendata_recid_minter(id, data) data['$schema'] = schema record = Record.create(data, id_=id) if not skip_files: bucket = Bucket.create() handle_record_files(data, bucket, files, skip_files) RecordsBuckets.create(record=record.model, bucket=bucket) return record
def create(cls, data, id_=None): """Create a deposit. Adds bucket creation immediately on deposit creation. """ bucket = Bucket.create( quota_size=current_app.config['ZENODO_BUCKET_QUOTA_SIZE'], max_file_size=current_app.config['ZENODO_MAX_FILE_SIZE'], ) data['_buckets'] = {'deposit': str(bucket.id)} deposit = super(ZenodoDeposit, cls).create(data, id_=id_) RecordsBuckets.create(record=deposit.model, bucket=bucket) return deposit
def create_record(schema, data, files, skip_files): """Creates a new record.""" id = uuid.uuid4() cernopendata_recid_minter(id, data) data['$schema'] = schema record = Record.create(data, id_=id) if not skip_files: bucket = Bucket.create() handle_record_files(data, bucket, files, skip_files) RecordsBuckets.create( record=record.model, bucket=bucket) return record
def create(cls, data, id_=None): """Create a deposit. Adds bucket creation immediately on deposit creation. """ bucket = Bucket.create( quota_size=current_app.config['ZENODO_BUCKET_QUOTA_SIZE'], max_file_size=current_app.config['ZENODO_MAX_FILE_SIZE'], ) data['_buckets'] = {'deposit': str(bucket.id)} deposit = super(ZenodoDeposit, cls).create(data, id_=id_) RecordsBuckets.create(record=deposit.model, bucket=bucket) return deposit
def _resolve_files(cls, deposit, record): """Create files.""" # build deposit files bucket = as_bucket(deposit['_buckets']['deposit']) # build objects/tags from marc21 metadata for file_ in record.get('_files', []): cls._resolve_file(deposit=deposit, bucket=bucket, file_=file_) # attach the master tag to the proper dependent files cls._resolve_master_tag(deposit=deposit) if Video.get_record_schema() == record['$schema']: # probe metadata from video cls._resolve_extracted_metadata(deposit=deposit, record=record) # update tag 'timestamp' cls._update_timestamp(deposit=deposit) # build a partial files dump cls._resolve_dumps(record=deposit) # snapshot them to record bucket snapshot = bucket.snapshot(lock=True) db.session.add(RecordsBuckets( record_id=record.id, bucket_id=snapshot.id )) if Video.get_record_schema() == record['$schema']: # create smil file cls._resolve_dumps(record=record) cls._resolve_smil(record=record) # update tag 'master' cls._update_tag_master(record=record)
def create_object(bucket, record_dict): """Object creation inside the bucket using the file and its content.""" rec_uuid = uuid4() provider = RecordIdProvider.create(object_type='rec', object_uuid=rec_uuid) files_meta, num_of_iiif_valid_files = generate_files_metadata( bucket, record_dict['_files']) # If there are any iiif valid image files, iiif manifest api is added on # record metadata. iiif_manifest_url = '' if num_of_iiif_valid_files > 0: iiif_manifest_url = '/record/{0}/iiif/manifest.json'.format( provider.pid.pid_value) deposit_dict = record_dict['_deposit'] deposit_dict['iiif_manifest'] = iiif_manifest_url data = { 'pid_value': provider.pid.pid_value, '_deposit': deposit_dict, '_files': files_meta, } # from invenio_records_files.api import Record as RecordFile record = RecordFile.create(data, id_=rec_uuid) # connect to record and bucket db.session.add(RecordsBuckets( record_id=record.id, bucket_id=bucket.id, )) db.session.commit()
def test_transfer_cp(db): """Test factories.transfer_cp function.""" # first we create a record recid = uuid.uuid4() PersistentIdentifier.create( 'recid', '1337', object_type='rec', object_uuid=recid, status=PIDStatus.REGISTERED) record = Record.create({'title': 'record test'}, recid) # we setup a file storage tmppath = tempfile.mkdtemp() db.session.add(Location(name='default', uri=tmppath, default=True)) db.session.commit() # we add a file to the record bucket = Bucket.create() content = b'Aaah! A headcrab!!!\n' record_buckets = RecordsBuckets.create(record=record.model, bucket=bucket) record.files['crab.txt'] = BytesIO(content) # test! rec_dir = join(tmppath, create_accessioned_id('1337', 'recid')) factories.transfer_cp(record.id, tmppath) assert isdir(rec_dir) assert isfile(join(rec_dir, 'crab.txt')) with open(join(rec_dir, 'crab.txt'), "r") as f: assert f.read() == content # finalization rmtree(tmppath)
def software(skip_files): """Load demo software records.""" from invenio_db import db from invenio_records_files.api import Record from invenio_indexer.api import RecordIndexer from cernopendata.modules.records.minters.softid import \ cernopendata_softid_minter from invenio_files_rest.models import \ Bucket, FileInstance, ObjectVersion from invenio_records_files.models import RecordsBuckets indexer = RecordIndexer() schema = current_app.extensions['invenio-jsonschemas'].path_to_url( 'records/software-v1.0.0.json') data = pkg_resources.resource_filename('cernopendata', 'modules/fixtures/data/software') software_json = glob.glob(os.path.join(data, '*.json')) # FIXME: change the treatment of `files` according to `records` fixtures. for filename in software_json: with open(filename, 'rb') as source: for data in json.load(source): files = data.pop('files', []) id = uuid.uuid4() cernopendata_softid_minter(id, data) record = Record.create(data, id_=id) record['$schema'] = schema bucket = Bucket.create() RecordsBuckets.create(record=record.model, bucket=bucket) for file in files: if skip_files: break assert 'uri' in file assert 'size' in file assert 'checksum' in file f = FileInstance.create() filename = file.get("uri").split('/')[-1:][0] f.set_uri(file.get("uri"), file.get("size"), file.get("checksum")) ObjectVersion.create(bucket, filename, _file_id=f.id) db.session.commit() indexer.index(record) db.session.expunge_all()
def create(cls, data, id_=None): """Create a deposit. Adds bucket creation immediately on deposit creation. """ bucket = Bucket.create( quota_size=current_app.config['WEKO_BUCKET_QUOTA_SIZE'], max_file_size=current_app.config['WEKO_MAX_FILE_SIZE'], ) if '$schema' in data: data.pop('$schema') data['_buckets'] = {'deposit': str(bucket.id)} # save user_name & display name. if current_user and current_user.is_authenticated: user = UserProfile.get_by_userid(current_user.get_id()) username = '' displayname = '' if user is not None: username = user._username displayname = user._displayname if '_deposit' in data: data['_deposit']['owners_ext'] = { 'username': username, 'displayname': displayname, 'email': current_user.email } deposit = super(WekoDeposit, cls).create(data, id_=id_) RecordsBuckets.create(record=deposit.model, bucket=bucket) recid = PersistentIdentifier.get('recid', str(data['_deposit']['id'])) depid = PersistentIdentifier.get('depid', str(data['_deposit']['id'])) p_depid = PersistentIdentifier.create('parent', 'parent:recid/{0}'.format( str(data['_deposit']['id'])), object_type='rec', object_uuid=uuid.uuid4(), status=PIDStatus.REGISTERED) PIDVersioning(parent=p_depid).insert_draft_child(child=recid) RecordDraft.link(recid, depid) return deposit
def create(cls, data, id_=None): """Generate a Deposit object. Overrides parent's `create`. The configured quota_size and max_file_size here are for public API constraints. There are no constraints (apart from physical volume we have available) when creating records differently. """ max_size = 50 * 1024 * 1024 * 1024 # 50 GB bucket = Bucket.create(quota_size=max_size, max_file_size=max_size) data['_buckets'] = {'deposit': str(bucket.id)} # any newly created Deposit is a draft data['type'] = RecordType.draft.value deposit = super(Deposit, cls).create(data, id_=id_) RecordsBuckets.create(record=deposit.model, bucket=bucket) return deposit
def clone(self, pid=None, id_=None): """Clone a deposit. Adds snapshot of the files when deposit is cloned. """ data = copy.deepcopy(self.dumps()) del data['_deposit'] deposit = super(CAPDeposit, self).create(data, id_=id_) deposit['_deposit']['cloned_from'] = { 'type': pid.pid_type, 'value': pid.pid_value, 'revision_id': self.revision_id, } bucket = self.files.bucket.snapshot() RecordsBuckets.create(record=deposit.model, bucket=bucket) # optionally we might need to do: deposit.files.flush() deposit.commit() return deposit
def _create_records(base_metadata, total, versions, files): records = [] cur_recid_val = 1 for _ in range(total): conceptrecid_val = cur_recid_val conceptrecid = PersistentIdentifier.create('recid', str(conceptrecid_val), status='R') db.session.commit() versioning = PIDVersioning(parent=conceptrecid) for ver_idx in range(versions): recid_val = conceptrecid_val + ver_idx + 1 data = deepcopy(base_metadata) data.update({ 'conceptrecid': str(conceptrecid_val), 'conceptdoi': '10.1234/{}'.format(recid_val), 'recid': recid_val, 'doi': '10.1234/{}'.format(recid_val), }) record = ZenodoRecord.create(data) bucket = Bucket.create() record['_buckets'] = {'record': str(bucket.id)} record.commit() RecordsBuckets.create(bucket=bucket, record=record.model) recid = PersistentIdentifier.create(pid_type='recid', pid_value=record['recid'], object_type='rec', object_uuid=record.id, status='R') versioning.insert_child(recid) file_objects = [] for f in range(files): filename = 'Test{0}_v{1}.pdf'.format(f, ver_idx) record.files[filename] = BytesIO(b'1234567890') # 10 bytes record.files[filename]['type'] = 'pdf' file_objects.append(record.files[filename].obj) record.commit() db.session.commit() records.append((recid, record, file_objects)) cur_recid_val += versions + 1 return records
def create(cls, data, id_=None): """Create a deposit. Adds bucket creation immediately on deposit creation. """ if not isinstance(data, dict) or data == {}: raise EmptyDepositError() bucket = Bucket.create() avalaible_schemas = [x.get('schema').split('schemas/')[-1] for x in current_app.config.get('DEPOSIT_GROUPS', {}).values()] try: schema = data.get("$schema", None) \ .split('/schemas/', 1)[1] except (IndexError, AttributeError): raise WrongJSONSchemaError() if schema not in avalaible_schemas: raise WrongJSONSchemaError() if schema: _deposit_group = \ next( (depgroup for dg, depgroup in current_app.config.get('DEPOSIT_GROUPS').iteritems() if schema in depgroup['schema'] ), None ) data["_experiment"] = _deposit_group.get( "experiment", "Unknown") deposit = super(CAPDeposit, cls).create(data, id_=id_) _access = add_owner_permissions(deposit) RecordsBuckets.create(record=deposit.model, bucket=bucket) if _access: deposit["_access"] = _access deposit.commit() return deposit
def update_record(pid, schema, data, files, skip_files): """Updates the given record.""" record = Record.get_record(pid.object_uuid) with db.session.begin_nested(): if record.files and not skip_files: bucket_id = record.files.bucket bucket = Bucket.get(bucket_id.id) for o in ObjectVersion.get_by_bucket(bucket).all(): o.remove() o.file.delete() RecordsBuckets.query.filter_by(record=record.model, bucket=bucket).delete() bucket_id.remove() db.session.commit() record.update(data) if not skip_files: bucket = Bucket.create() handle_record_files(data, bucket, files, skip_files) RecordsBuckets.create(record=record.model, bucket=bucket) return record
def create(cls, data, id_=None): """Create a deposit. Adds bucket creation immediately on deposit creation. """ bucket = Bucket.create(**cls._get_bucket_settings()) data['_buckets'] = {'deposit': str(bucket.id)} deposit = super(ZenodoDeposit, cls).create(data, id_=id_) RecordsBuckets.create(record=deposit.model, bucket=bucket) recid = PersistentIdentifier.get('recid', str(data['recid'])) conceptrecid = PersistentIdentifier.get('recid', str(data['conceptrecid'])) depid = PersistentIdentifier.get('depid', str(data['_deposit']['id'])) PIDVersioning(parent=conceptrecid).insert_draft_child(child=recid) RecordDraft.link(recid, depid) return deposit
def fixtures(): """Command for working with test data.""" temp_path = os.path.join(os.path.dirname(__file__), 'instance/temp') demo_files_path = os.path.join(os.path.dirname(__file__), 'demo_files') # Create location loc = Location(name='local', uri=temp_path, default=True) db.session.add(loc) db.session.commit() # Example files from the data folder demo_files = ( 'markdown.md', 'csvfile.csv', 'zipfile.zip', 'jsonfile.json', 'xmlfile.xml', 'notebook.ipynb', 'pdffile.pdf', 'jpgfile.jpg', 'pngfile.png', 'pdffile.pdf', ) rec_uuid = uuid4() provider = RecordIdProvider.create(object_type='rec', object_uuid=rec_uuid) data = { 'pid_value': provider.pid.pid_value, } record = Record.create(data, id_=rec_uuid) bucket = Bucket.create() RecordsBuckets.create(record=record.model, bucket=bucket) # Add files to the record for f in demo_files: with open(os.path.join(demo_files_path, f), 'rb') as fp: record.files[f] = fp record.files.flush() record.commit() db.session.commit()
def create_files(cls, dump, deposit): """Create files.""" # Create bucket and link to deposit. bucket = Bucket.create() db.session.add( RecordsBuckets(record_id=deposit.id, bucket_id=bucket.id)) files = [] for f in dump.files: files.append(cls.create_file(bucket, f)) return bucket, files
def _process_files(self, record_id, data): """Snapshot bucket and add files in record during first publishing.""" if self.files: assert not self.files.bucket.locked self.files.bucket.locked = True snapshot = self.files.bucket.snapshot(lock=True) data['_files'] = self.files.dumps(bucket=snapshot.id) yield data db.session.add( RecordsBuckets(record_id=record_id, bucket_id=snapshot.id)) else: yield data
def _create_records(base_metadata, total, versions, files): records = [] cur_recid_val = 1 for _ in range(total): conceptrecid_val = cur_recid_val conceptrecid = PersistentIdentifier.create( 'recid', str(conceptrecid_val), status='R') db.session.commit() versioning = PIDVersioning(parent=conceptrecid) for ver_idx in range(versions): recid_val = conceptrecid_val + ver_idx + 1 data = deepcopy(base_metadata) data.update({ 'conceptrecid': str(conceptrecid_val), 'conceptdoi': '10.1234/{}'.format(recid_val), 'recid': recid_val, 'doi': '10.1234/{}'.format(recid_val), }) record = ZenodoRecord.create(data) bucket = Bucket.create() record['_buckets'] = {'record': str(bucket.id)} record.commit() RecordsBuckets.create(bucket=bucket, record=record.model) recid = PersistentIdentifier.create( pid_type='recid', pid_value=record['recid'], object_type='rec', object_uuid=record.id, status='R') versioning.insert_child(recid) file_objects = [] for f in range(files): filename = 'Test{0}_v{1}.pdf'.format(f, ver_idx) record.files[filename] = BytesIO(b'1234567890') # 10 bytes record.files[filename]['type'] = 'pdf' file_objects.append(record.files[filename].obj) record.commit() db.session.commit() records.append((recid, record, file_objects)) cur_recid_val += versions + 1 return records
def _create_bucket(self, location=None, storage_class=None): """Create bucket and return it. Note: Overwrites base_class._create_bucket method as it is not implemented It can create more than one bucket for the same parameters. It's private method, do not use it. Instead use `get_bucket()` Args: location (Location): Bucket location object (default: 'RECORDS_DEFAULT_FILE_LOCATION_NAME') from config storage_class (str): Bucket storage class (default: 'RECORDS_DEFAULT_STORAGE_CLASS') from config Returns: Bucket for current record, selected location and storage_class """ bucket = Bucket.create(location=location, storage_class=storage_class) RecordsBuckets.create(record=self.model, bucket=bucket) LOGGER.info("Created bucket", uuid=self.id, class_name=self.__class__.__name__) return bucket
def update_record(pid, schema, data, files, skip_files): """Updates the given record.""" record = Record.get_record(pid.object_uuid) with db.session.begin_nested(): if record.files and not skip_files: bucket_id = record.files.bucket bucket = Bucket.get(bucket_id.id) for o in ObjectVersion.get_by_bucket(bucket).all(): o.remove() o.file.delete() RecordsBuckets.query.filter_by( record=record.model, bucket=bucket ).delete() bucket_id.remove() db.session.commit() record.update(data) if not skip_files: bucket = Bucket.create() handle_record_files(data, bucket, files, skip_files) RecordsBuckets.create( record=record.model, bucket=bucket) return record
def newversion(self, pid=None): """Create a new version deposit.""" if not self.is_published(): raise PIDInvalidAction() # Check that there is not a newer draft version for this record pid, record = self.fetch_published() pv = PIDVersioning(child=pid) if (not pv.draft_child and is_doi_locally_managed(record['doi'])): with db.session.begin_nested(): # Get copy of the latest record latest_record = ZenodoRecord.get_record( pv.last_child.object_uuid) data = latest_record.dumps() # Get the communities from the last deposit # and push those to the new version latest_depid = PersistentIdentifier.get( 'depid', data['_deposit']['id']) latest_deposit = ZenodoDeposit.get_record( latest_depid.object_uuid) last_communities = latest_deposit.get('communities', []) owners = data['_deposit']['owners'] # TODO: Check other data that may need to be removed keys_to_remove = ( '_deposit', 'doi', '_oai', '_files', '_buckets', '$schema') for k in keys_to_remove: data.pop(k, None) # NOTE: We call the superclass `create()` method, because we # don't want a new empty bucket, but an unlocked snapshot of # the old record's bucket. deposit = (super(ZenodoDeposit, self).create(data)) # Injecting owners is required in case of creating new # version this outside of request context deposit['_deposit']['owners'] = owners if last_communities: deposit['communities'] = last_communities ### conceptrecid = PersistentIdentifier.get( 'recid', data['conceptrecid']) recid = PersistentIdentifier.get( 'recid', str(data['recid'])) depid = PersistentIdentifier.get( 'depid', str(data['_deposit']['id'])) PIDVersioning(parent=conceptrecid).insert_draft_child( child=recid) RecordDraft.link(recid, depid) # Pre-fill the Zenodo DOI to prevent the user from changing it # to a custom DOI. deposit['doi'] = doi_generator(recid.pid_value) pv = PIDVersioning(child=pid) index_siblings(pv.draft_child, neighbors_eager=True, with_deposits=True) with db.session.begin_nested(): # Create snapshot from the record's bucket and update data snapshot = latest_record.files.bucket.snapshot(lock=False) snapshot.locked = False if 'extra_formats' in latest_record['_buckets']: extra_formats_snapshot = \ latest_record.extra_formats.bucket.snapshot( lock=False) deposit['_buckets'] = {'deposit': str(snapshot.id)} RecordsBuckets.create(record=deposit.model, bucket=snapshot) if 'extra_formats' in latest_record['_buckets']: deposit['_buckets']['extra_formats'] = \ str(extra_formats_snapshot.id) RecordsBuckets.create( record=deposit.model, bucket=extra_formats_snapshot) deposit.commit() return self
def _create_buckets(self): bucket = Bucket.create() RecordsBuckets.create(record=self.model, bucket=bucket)
def datasets(skip_files): """Load demo datasets records.""" from invenio_db import db from invenio_records_files.api import Record from invenio_indexer.api import RecordIndexer from cernopendata.modules.records.minters.recid import \ cernopendata_recid_minter from cernopendata.modules.records.minters.datasetid import \ cernopendata_datasetid_minter from invenio_files_rest.models import \ Bucket, FileInstance, ObjectVersion from invenio_records_files.models import RecordsBuckets indexer = RecordIndexer() schema = current_app.extensions['invenio-jsonschemas'].path_to_url( 'records/datasets-v1.0.0.json' ) data = pkg_resources.resource_filename('cernopendata', 'modules/fixtures/data/datasets') datasets_json = glob.glob(os.path.join(data, '*.json')) # FIXME: change the treatment of `files` according to `records` fixtures. for filename in datasets_json: click.echo('Loading datasets from {0} ...'.format(filename)) with open(filename, 'rb') as source: for data in json.load(source): files = data.pop('files', []) id = uuid.uuid4() # (TOFIX) Remove if statement in production # as every dataset record should have a doi if data.get('doi', None): cernopendata_datasetid_minter(id, data) else: cernopendata_recid_minter(id, data) data['$schema'] = schema record = Record.create(data, id_=id) bucket = Bucket.create() RecordsBuckets.create( record=record.model, bucket=bucket) for file in files: if skip_files: break assert 'uri' in file assert 'size' in file assert 'checksum' in file f = FileInstance.create() filename = file.get("uri").split('/')[-1:][0] f.set_uri(file.get("uri"), file.get( "size"), file.get("checksum")) ObjectVersion.create( bucket, filename, _file_id=f.id ) db.session.commit() indexer.index(record) db.session.expunge_all()