def test_delete(client, db, bucket, objects, permissions, user, expected): """Test deleting an object.""" login_user(client, permissions[user]) for obj in objects: # Valid object resp = client.delete( url_for( 'invenio_files_rest.object_api', bucket_id=bucket.id, key=obj.key, )) assert resp.status_code == expected if resp.status_code == 204: assert not ObjectVersion.get(bucket.id, obj.key) resp = client.get( url_for( 'invenio_files_rest.object_api', bucket_id=bucket.id, key=obj.key, )) assert resp.status_code == 404 else: assert ObjectVersion.get(bucket.id, obj.key) # Invalid object assert client.delete( url_for( 'invenio_files_rest.object_api', bucket_id=bucket.id, key='invalid', )).status_code == 404
def test_object_delete(app, db, dummy_location): """Test object creation.""" # Create three versions, with latest being a delete marker. with db.session.begin_nested(): b1 = Bucket.create() ObjectVersion.create(b1, "test").set_location( "b1test1", 1, "achecksum") ObjectVersion.create(b1, "test").set_location( "b1test2", 1, "achecksum") obj_deleted = ObjectVersion.delete(b1, "test") assert ObjectVersion.query.count() == 3 assert ObjectVersion.get(b1, "test") is None assert ObjectVersion.get_by_bucket(b1).count() == 0 obj = ObjectVersion.get(b1, "test", version_id=obj_deleted.version_id) assert obj.is_deleted assert obj.file_id is None ObjectVersion.create(b1, "test").set_location( "b1test4", 1, "achecksum") assert ObjectVersion.query.count() == 4 assert ObjectVersion.get(b1.id, "test") is not None assert ObjectVersion.get_by_bucket(b1.id).count() == 1
def rename_file(recid, key, new_key): """Remove a file from a publishd record.""" pid, record = record_resolver.resolve(recid) bucket = record.files.bucket obj = ObjectVersion.get(bucket, key) if obj is None: click.echo(click.style(u'File with key "{key}" not found.'.format( key=key), fg='red')) return new_obj = ObjectVersion.get(bucket, new_key) if new_obj is not None: click.echo(click.style(u'File with key "{key}" already exists.'.format( key=new_key), fg='red')) return if click.confirm(u'Rename "{key}" to "{new_key}" on bucket {bucket}.' u' Continue?'.format( key=obj.key, new_key=new_key, bucket=bucket.id)): record.files.bucket.locked = False file_id = obj.file.id ObjectVersion.delete(bucket, obj.key) ObjectVersion.create(bucket, new_key, _file_id=file_id) record.files.bucket.locked = True record.files.flush() record.commit() db.session.commit() click.echo(click.style(u'File renamed successfully.', fg='green')) else: click.echo(click.style(u'Aborted file rename.', fg='green'))
def test_object_delete(app, db, dummy_location): """Test object creation.""" # Create three versions, with latest being a delete marker. with db.session.begin_nested(): b1 = Bucket.create() ObjectVersion.create(b1, "test").set_location( "b1test1", 1, "achecksum") ObjectVersion.create(b1, "test").set_location( "b1test2", 1, "achecksum") obj_deleted = ObjectVersion.delete(b1, "test") assert ObjectVersion.query.count() == 3 assert ObjectVersion.get(b1, "test") is None assert ObjectVersion.get_by_bucket(b1).count() == 0 obj = ObjectVersion.get(b1, "test", version_id=obj_deleted.version_id) assert obj.deleted assert obj.file_id is None ObjectVersion.create(b1, "test").set_location( "b1test4", 1, "achecksum") assert ObjectVersion.query.count() == 4 assert ObjectVersion.get(b1.id, "test") is not None assert ObjectVersion.get_by_bucket(b1.id).count() == 1
def test_object_create(app, db, dummy_location): """Test object creation.""" with db.session.begin_nested(): b = Bucket.create() # Create one object version obj1 = ObjectVersion.create(b, "test") assert obj1.bucket_id == b.id assert obj1.key == 'test' assert obj1.version_id assert obj1.file_id is None assert obj1.is_head is True assert obj1.bucket == b # Set fake location. obj1.set_location("file:///tmp/obj1", 1, "checksum") # Create one object version for same object key obj2 = ObjectVersion.create(b, "test") assert obj2.bucket_id == b.id assert obj2.key == 'test' assert obj2.version_id != obj1.version_id assert obj2.file_id is None assert obj2.is_head is True assert obj2.bucket == b # Set fake location obj2.set_location("file:///tmp/obj2", 2, "checksum") # Create a new object version for a different object with no location. # I.e. it is considered a delete marker. obj3 = ObjectVersion.create(b, "deleted_obj") # Object __repr__ assert str(obj1) == \ "{0}:{1}:{2}".format(obj1.bucket_id, obj1.version_id, obj1.key) # Sanity check assert ObjectVersion.query.count() == 3 # Assert that obj2 is the head version obj = ObjectVersion.get(b.id, "test", version_id=obj1.version_id) assert obj.version_id == obj1.version_id assert obj.is_head is False obj = ObjectVersion.get(b.id, "test", version_id=obj2.version_id) assert obj.version_id == obj2.version_id assert obj.is_head is True # Assert that getting latest version gets obj2 obj = ObjectVersion.get(b.id, "test") assert obj.version_id == obj2.version_id assert obj.is_head is True # Assert that obj3 is not retrievable (without specifying version id). assert ObjectVersion.get(b.id, "deleted_obj") is None # Assert that obj3 *is* retrievable (when specifying version id). assert \ ObjectVersion.get(b.id, "deleted_obj", version_id=obj3.version_id) == \ obj3
def test_object_create(app, db, dummy_location): """Test object creation.""" with db.session.begin_nested(): b = Bucket.create() # Create one object version obj1 = ObjectVersion.create(b, "test") assert obj1.bucket_id == b.id assert obj1.key == 'test' assert obj1.version_id assert obj1.file_id is None assert obj1.is_head is True assert obj1.bucket == b # Set fake location. obj1.set_location("file:///tmp/obj1", 1, "checksum") # Create one object version for same object key obj2 = ObjectVersion.create(b, "test") assert obj2.bucket_id == b.id assert obj2.key == 'test' assert obj2.version_id != obj1.version_id assert obj2.file_id is None assert obj2.is_head is True assert obj2.bucket == b # Set fake location obj2.set_location("file:///tmp/obj2", 2, "checksum") # Create a new object version for a different object with no location. # I.e. it is considered a delete marker. obj3 = ObjectVersion.create(b, "deleted_obj") # Object __repr__ assert str(obj1) == \ "{0}:{1}:{2}".format(obj1.bucket_id, obj1.version_id, obj1.key) # Sanity check assert ObjectVersion.query.count() == 3 # Assert that obj2 is the head version obj = ObjectVersion.get(b.id, "test", version_id=obj1.version_id) assert obj.version_id == obj1.version_id assert obj.is_head is False obj = ObjectVersion.get(b.id, "test", version_id=obj2.version_id) assert obj.version_id == obj2.version_id assert obj.is_head is True # Assert that getting latest version gets obj2 obj = ObjectVersion.get(b.id, "test") assert obj.version_id == obj2.version_id assert obj.is_head is True # Assert that obj3 is not retrievable (without specifying version id). assert ObjectVersion.get(b.id, "deleted_obj") is None # Assert that obj3 *is* retrievable (when specifying version id). assert \ ObjectVersion.get(b.id, "deleted_obj", version_id=obj3.version_id) == \ obj3
def attach_file(file_id, pid_type1, pid_value1, key1, pid_type2, pid_value2, key2): """Attach a file to a record or deposit. You must provide the information which will determine the first file, i.e.: either 'file-id' OR 'pid-type1', 'pid-value1' and 'key1'. Additionally you need to specify the information on the target record/deposit, i.e.: 'pid-type2', 'pid-value2' and 'key2'. """ assert ((file_id or (pid_type1 and pid_value1 and key1)) and (pid_type2 and pid_value2 and key2)) msg = u"PID type must be 'recid' or 'depid'." if pid_type1: assert pid_type1 in ('recid', 'depid', ), msg assert pid_type2 in ('recid', 'depid', ), msg if not file_id: resolver = record_resolver if pid_type1 == 'recid' \ else deposit_resolver pid1, record1 = resolver.resolve(pid_value1) bucket1 = record1.files.bucket obj1 = ObjectVersion.get(bucket1, key1) if obj1 is None: click.echo(click.style(u'File with key "{key}" not found.'.format( key=key1), fg='red')) return file_id = obj1.file.id resolver = record_resolver if pid_type2 == 'recid' else deposit_resolver pid2, record2 = resolver.resolve(pid_value2) bucket2 = record2.files.bucket obj2 = ObjectVersion.get(bucket2, key2) if obj2 is not None: click.echo(click.style(u'File with key "{key}" already exists on' u' bucket {bucket}.'.format( key=key2, bucket=bucket2.id), fg='red')) return if click.confirm(u'Attaching file "{file_id}" to bucket {bucket2}' u' as "{key2}". Continue?'.format( file_id=file_id, key2=key2, bucket2=bucket2.id)): record2.files.bucket.locked = False ObjectVersion.create(bucket2, key2, _file_id=file_id) if pid_type2 == 'recid': record2.files.bucket.locked = True record2.files.flush() record2.commit() db.session.commit() click.echo(click.style(u'File attached successfully.', fg='green')) else: click.echo(click.style(u'Aborted file attaching.', fg='green'))
def attach_file(file_id, pid_type1, pid_value1, key1, pid_type2, pid_value2, key2): """Attach a file to a record or deposit. You must provide the information which will determine the first file, i.e.: either 'file-id' OR 'pid-type1', 'pid-value1' and 'key1'. Additionally you need to specify the information on the target record/deposit, i.e.: 'pid-type2', 'pid-value2' and 'key2'. """ assert ((file_id or (pid_type1 and pid_value1 and key1)) and (pid_type2 and pid_value2 and key2)) msg = u"PID type must be 'recid' or 'depid'." if pid_type1: assert pid_type1 in ('recid', 'depid', ), msg assert pid_type2 in ('recid', 'depid', ), msg if not file_id: resolver = record_resolver if pid_type1 == 'recid' \ else deposit_resolver pid1, record1 = resolver.resolve(pid_value1) bucket1 = record1.files.bucket obj1 = ObjectVersion.get(bucket1, key1) if obj1 is None: click.echo(click.style(u'File with key "{key}" not found.'.format( key=key1), fg='red')) return file_id = obj1.file.id resolver = record_resolver if pid_type2 == 'recid' else deposit_resolver pid2, record2 = resolver.resolve(pid_value2) bucket2 = record2.files.bucket obj2 = ObjectVersion.get(bucket2, key2) if obj2 is not None: click.echo(click.style(u'File with key "{key}" already exists on' u' bucket {bucket}.'.format( key=key2, bucket=bucket2.id), fg='red')) return if click.confirm(u'Attaching file "{file_id}" to bucket {bucket2}' u' as "{key2}". Continue?'.format( file_id=file_id, key2=key2, bucket2=bucket2.id)): record2.files.bucket.locked = False ObjectVersion.create(bucket2, key2, _file_id=file_id) if pid_type2 == 'recid': record2.files.bucket.locked = True record2.files.flush() record2.commit() db.session.commit() click.echo(click.style(u'File attached successfully.', fg='green')) else: click.echo(click.style(u'Aborted file attaching.', fg='green'))
def test_download_repo_when_failed_creates_empty_file_object_with_failed_tag( deposit, git_repo_tar): responses.add( responses.GET, 'https://codeload.github.com/owner/repository/legacy.tar.gz/mybranchsha', # noqa body=git_repo_tar, content_type='application/x-gzip', headers={ 'Transfer-Encoding': 'chunked', 'Content-Length': '287' }, stream=True, status=400) download_repo( deposit.id, 'repositories/github.com/owner/repository/mybranch.tar.gz', 'https://codeload.github.com/owner/repository/legacy.tar.gz/mybranchsha', # noqa {'Authorization': 'token mysecretsecret'}, ) # file object was created obj = ObjectVersion.get( deposit.files.bucket.id, 'repositories/github.com/owner/repository/mybranch.tar.gz') # but tagged as failed tag = obj.tags[0] assert tag.key, tag.value == ('status', 'failed')
def test_download_repo(deposit, git_repo_tar): responses.add( responses.GET, 'https://codeload.github.com/owner/repository/legacy.tar.gz/mybranchsha', body=git_repo_tar, content_type='application/x-gzip', headers={ 'Transfer-Encoding': 'chunked', 'Content-Length': '287' }, stream=True, status=200) download_repo( deposit.id, 'repositories/github.com/owner/repository/mybranch.tar.gz', 'https://codeload.github.com/owner/repository/legacy.tar.gz/mybranchsha', # noqa {'Authorization': 'token mysecretsecret'}, ) assert responses.calls[0].request.headers[ 'Authorization'] == 'token mysecretsecret' obj = ObjectVersion.get( deposit.files.bucket.id, 'repositories/github.com/owner/repository/mybranch.tar.gz') tar_obj = tarfile.open(obj.file.uri) repo_file_name = tar_obj.getmembers()[1] repo_content = tar_obj.extractfile(repo_file_name).read() assert repo_content == b'test repo for cap\n'
def test_download_repo_file_when_failed_creates_empty_file_object_with_failed_tag( deposit, file_tar): responses.add( responses.GET, 'https://raw.githubusercontent.com/owner/repository/mybranchsha/README.md', # noqa body=file_tar, content_type='text/plain', headers={ 'Content-Length': '18', 'Content-Encoding': 'gzip', 'Content-Type': 'text/plain; charset=utf-8' }, stream=True, status=400) download_repo_file( deposit.id, 'repositories/github.com/owner/repository/mybranch/README.md', 'https://raw.githubusercontent.com/owner/repository/mybranchsha/README.md', # noqa 18, {'Authorization': 'token mysecretsecret'}, ) assert responses.calls[0].request.headers[ 'Authorization'] == 'token mysecretsecret' # file object was created obj = ObjectVersion.get( deposit.files.bucket.id, 'repositories/github.com/owner/repository/mybranch/README.md') # but tagged as failed tag = obj.tags[0] assert tag.key, tag.value == ('status', 'failed')
def test_update_record(app, db, dummy_location, record_dump, record_db, resolver, record_file): """Test update of a record.""" # Smoke test record_db['files'] = [record_file] record_db.commit() db.session.commit() pytest.raises(IntegrityError, RecordIdentifier.insert, 11782) # Update record instead of create a new RecordDumpLoader.create(record_dump) pid, record = resolver.resolve('11782') # Basic some test that record exists assert record['title'] assert record.created == datetime(2014, 10, 13, 8, 27, 47) # Test that old revisions are kept assert len(record.revisions) == 4 # Test the PIDs are extracted and created assert PersistentIdentifier.get('doi', '10.5281/zenodo.11782') assert Bucket.query.count() == 1 assert ObjectVersion.query.filter_by(is_head=True).count() == 1 assert FileInstance.query.count() == 2 assert len(record['files']) == 1 f = record['files'][0] obj = ObjectVersion.get(f['bucket'], f['filename']) assert obj.file.checksum != record_file['checksum'] assert obj.file.size != record_file['size']
def test_import_record(app, db, dummy_location, record_dump, records_json, resolver): """Test import record celery task.""" assert RecordMetadata.query.count() == 0 import_record(records_json[0], source_type='json') assert RecordMetadata.query.count() == 1 pid, record = resolver.resolve('11782') assert record['_collections'] == [] assert len(record['_files']) == 1 assert ObjectVersion.get( record['_files'][0]['bucket'], record['_files'][0]['key']) import_record(records_json[1], source_type='marcxml') assert RecordMetadata.query.count() == 2 pid, record = resolver.resolve('10') assert record['_collections'] == [ "ALEPH Papers", "Articles & Preprints", "Experimental Physics (EP)", "CERN Divisions", "Atlantis Institute of Fictive Science", "CERN Experiments", "Preprints", "ALEPH", ] assert len(record['_files']) == 2
def test_import_record(app, db, dummy_location, record_dump, records_json, resolver): """Test import record celery task.""" assert RecordMetadata.query.count() == 0 import_record(records_json[0], source_type='json') assert RecordMetadata.query.count() == 1 pid, record = resolver.resolve('11782') assert record['_collections'] == [] assert len(record['_files']) == 1 assert ObjectVersion.get(record['_files'][0]['bucket'], record['_files'][0]['key']) import_record(records_json[1], source_type='marcxml') assert RecordMetadata.query.count() == 2 pid, record = resolver.resolve('10') assert record['_collections'] == [ "ALEPH Papers", "Articles & Preprints", "Experimental Physics (EP)", "CERN Divisions", "Atlantis Institute of Fictive Science", "CERN Experiments", "Preprints", "ALEPH", ] assert len(record['_files']) == 2
def file_version_update(): """Bulk delete items and index trees.""" # Only allow authorised users to update object version from invenio_files_rest.permissions import has_update_version_role if has_update_version_role(current_user): bucket_id = request.values.get('bucket_id') key = request.values.get('key') version_id = request.values.get('version_id') is_show = request.values.get('is_show') if bucket_id is not None and key is not None and version_id is not None: from invenio_files_rest.models import ObjectVersion object_version = ObjectVersion.get(bucket=bucket_id, key=key, version_id=version_id) if object_version is not None: # Do update the path on record object_version.is_show = True if is_show == '1' else False db.session.commit() return jsonify({'status': 1}) else: return jsonify({'status': 0, 'msg': 'Version not found'}) else: return jsonify({'status': 0, 'msg': 'Invalid data'}) else: return jsonify({'status': 0, 'msg': 'Insufficient permission'})
def commit_file(self, id, file_key, identity, record): """Commit file handler.""" # TODO: Add other checks here (e.g. verify checksum, S3 upload) file_obj = ObjectVersion.get(record.bucket.id, file_key) if not file_obj: raise Exception(f'File with key {file_key} not uploaded yet.') record.files[file_key] = file_obj
def test_download_file_branch(client, db, get_git_attributes, json_headers, git_url, git, git_record): owner, deposit, pid, bucket, headers = get_git_attributes data = { 'url': git_url, 'type': 'url', 'for_download': True, 'for_connection': False } if get_access_token(git) is None: pytest.skip("No access token found for Git integration. Skipping.") resp = client.post('/deposits/{}/actions/upload'.format(pid), headers=headers + json_headers, data=json.dumps(data)) assert resp.status_code == 201 resp = client.get('/deposits/{}/files'.format(pid), headers=headers) assert resp.status_code == 200 obj = ObjectVersion.get(bucket.id, git_record) open_file = open(obj.file.uri) repo_content = open_file.read() assert repo_content == 'test repo for cap - branch\n'
def test_download_filename_should_not_be_renamed(location): """Test files not renamed when the file to download is not a slave.""" bucket = Bucket.create(location) _fill_bucket_with_files(bucket) obj = ObjectVersion.get(bucket, _MASTER_FILENAME) on_download_rename_file(None, obj) assert obj.key == _MASTER_FILENAME obj = ObjectVersion.get(bucket, _SUBTITLE_FILENAME) on_download_rename_file(None, obj) assert obj.key == _SUBTITLE_FILENAME obj = ObjectVersion.get(bucket, _EXTRA_FILENAME) on_download_rename_file(None, obj) assert obj.key == _EXTRA_FILENAME
def download_record(cls, record, bucket, key, version_id, usr='******'): """Download a record. :param record: the record object from invenio_records_files :param bucket: the record's 'bucket. :param key: the record's key. :param version_id: the record's version id. :param usr: a string that identify the current user """ obj_version = ObjectVersion.get(bucket, key, version_id) pid = PersistentIdentifier.get('recid', record['id']) current_app.logger.info("Download file= " + record['title'] + ", requested by user= " + usr) # Send file return ObjectResource.send_object( bucket, obj_version, expected_chksum=obj_version.file.checksum, logger_data={ 'bucket_id': bucket, 'pid_type': pid.pid_type, 'pid_value': pid.pid_value, }, as_attachment=True)
def test_download_gitlab_archive_private(client, db, get_git_attributes, json_headers): owner, deposit, pid, bucket, headers = get_git_attributes data = { 'url': 'https://gitlab.cern.ch/analysispreservation/test-private-repo', 'type': 'repo', 'for_download': True, 'for_connection': False } if get_access_token('GITLAB') is None: pytest.skip("No access token found for Git integration. Skipping.") resp = client.post('/deposits/{}/actions/upload'.format(pid), headers=headers + json_headers, data=json.dumps(data)) assert resp.status_code == 201 resp = client.get('/deposits/{}/files'.format(pid), headers=headers) assert resp.status_code == 200 obj = ObjectVersion.get( bucket.id, 'analysispreservation_test-private-repo_master.tar.gz') tar_obj = tarfile.open(obj.file.uri) repo_file_name = tar_obj.getmembers()[1] repo_content = tar_obj.extractfile(repo_file_name).read() assert repo_content == 'test repo for cap'
def test_download_repo_file(deposit, file_tar): responses.add( responses.GET, 'https://raw.githubusercontent.com/owner/repository/mybranchsha/README.md', # noqa body=file_tar, content_type='text/plain', headers={ 'Content-Length': '18', 'Content-Encoding': 'gzip', 'Content-Type': 'text/plain; charset=utf-8' }, stream=True, status=200) download_repo_file( deposit.id, 'repositories/github.com/owner/repository/mybranch/README.md', 'https://raw.githubusercontent.com/owner/repository/mybranchsha/README.md', # noqa 18, {'Authorization': 'token mysecretsecret'}, ) assert responses.calls[0].request.headers[ 'Authorization'] == 'token mysecretsecret' obj = ObjectVersion.get( deposit.files.bucket.id, 'repositories/github.com/owner/repository/mybranch/README.md') open_file = open(obj.file.uri) repo_content = open_file.read() assert repo_content == 'test repo for cap\n'
def test_migrate_file(app, db, dummy_location, extra_location, bucket, objects): """Test file migration.""" obj = objects[0] # Test pre-condition old_uri = obj.file.uri assert exists(old_uri) assert old_uri == join(dummy_location.uri, str(obj.file.id)[0:2], str(obj.file.id)[2:4], str(obj.file.id)[4:], 'data') assert FileInstance.query.count() == 4 # Migrate file with patch('invenio_files_rest.tasks.verify_checksum') as verify_checksum: migrate_file( obj.file_id, location_name=extra_location.name, post_fixity_check=True) assert verify_checksum.delay.called # Get object again obj = ObjectVersion.get(bucket, obj.key) new_uri = obj.file.uri assert exists(old_uri) assert exists(new_uri) assert new_uri != old_uri assert FileInstance.query.count() == 5
def index_attachments(sender, json=None, record=None, index=None, doc_type=None): """Load and index attached files for given record. It iterates over ``_files`` field in ``record`` and checks if ``_attachment`` subfiled has been configured with following values: * ``True``/``False`` simply enables/disables automatic fulltext indexing for given file instance; * Alternativelly, one can provide a ``dict`` instance with all configuration options as defined in Elasticsearch guide on https://www.elastic.co/guide/en/elasticsearch/ search for mapper-attachment. .. note:: Make sure that ``mapper-attachment`` plugin is installed and running in Elasticsearch when using this signal handler. """ for index, data in enumerate(record['_files']): attachment = json['_files'][index].pop('_attachment', None) if attachment: obj = ObjectVersion.get(data['bucket'], data['key'], version_id=data.get('version_id')) attachment = attachment if isinstance(attachment, dict) else {} attachment.setdefault('_content', base64.b64encode( obj.file.storage().open().read() ).decode('utf-8')) json['_files'][index]['_attachment'] = attachment
def test_download_archive_branch(client, db, get_git_attributes, json_headers, git_url, git, git_record): """Given a git url, check if the link correctly identifies the repo, downloads its data, and then CAP is able to retrieve them from a bucket. """ owner, deposit, pid, bucket, headers = get_git_attributes data = { 'url': git_url, 'type': 'repo', 'for_download': True, 'for_connection': False } if get_access_token(git) is None: pytest.skip("No access token found for Git integration. Skipping.") resp = client.post('/deposits/{}/actions/upload'.format(pid), headers=headers + json_headers, data=json.dumps(data)) assert resp.status_code == 201 resp = client.get('/deposits/{}/files'.format(pid), headers=headers) assert resp.status_code == 200 obj = ObjectVersion.get(bucket.id, git_record) tar_obj = tarfile.open(obj.file.uri) repo_file_name = tar_obj.getmembers()[1] repo_content = tar_obj.extractfile(repo_file_name).read() assert repo_content == 'test repo for cap - branch\n'
def test_new_record(app, db, dummy_location, record_dumps, resolver): """Test creation of new record.""" RecordDumpLoader.create(record_dumps) pid, record = resolver.resolve('11783') created = datetime(2011, 10, 13, 8, 27, 47) # Basic some test that record exists assert record['title'] assert record.created == created # Test that this is a completely new record assert len(record.revisions) == 3 # check revisions assert record.revisions[2].created == created assert record.revisions[2].updated == datetime(2012, 10, 13, 8, 27, 47) assert record.revisions[1].created == created assert record.revisions[1].updated == datetime(2012, 10, 13, 8, 27, 47) assert record.revisions[0].created == created assert record.revisions[0].updated == datetime(2011, 10, 13, 8, 27, 47) pytest.raises(IntegrityError, RecordIdentifier.insert, 11783) # Test the PIDs are extracted and created assert PersistentIdentifier.get('doi', '10.5281/zenodo.11783') assert len(record['_files']) == 1 f = record['_files'][0] obj = ObjectVersion.get(f['bucket'], f['key']) assert obj.file.checksum == f['checksum'] assert obj.file.size == f['size'] assert BucketTag.get_value(f['bucket'], 'record') == str(record.id)
def test_delete_versions(client, db, bucket, versions, permissions, user, expected): """Test deleting an object.""" login_user(client, permissions[user]) for obj in versions: # Valid delete resp = client.delete( url_for( 'invenio_files_rest.object_api', bucket_id=bucket.id, key=obj.key, versionId=obj.version_id, )) assert resp.status_code == expected if resp.status_code == 204: assert not ObjectVersion.get( bucket.id, obj.key, version_id=obj.version_id) # Invalid object assert client.delete( url_for('invenio_files_rest.object_api', bucket_id=bucket.id, key=obj.key, versionId='deadbeef-65bd-4d9b-93e2-ec88cc59aec5') ).status_code == 404
def add_file(recid, fp, replace_existing): """Add a new file to a published record.""" pid, record = record_resolver.resolve(recid) bucket = record.files.bucket key = os.path.basename(fp.name) obj = ObjectVersion.get(bucket, key) if obj is not None and not replace_existing: click.echo( click.style( u'File with key "{key}" already exists.' u' Use `--replace-existing/-f` to overwrite it.'.format( key=key, recid=recid), fg='red')) return fp.seek(SEEK_SET, SEEK_END) size = fp.tell() fp.seek(SEEK_SET) click.echo(u'Will add the following file:\n') click.echo( click.style(u' key: "{key}"\n' u' bucket: {bucket}\n' u' size: {size}\n' u''.format(key=key, bucket=bucket.id, size=size), fg='green')) click.echo(u'to record:\n') click.echo( click.style(u' Title: "{title}"\n' u' RECID: {recid}\n' u' UUID: {uuid}\n' u''.format(recid=record['recid'], title=record['title'], uuid=record.id), fg='green')) if replace_existing and obj is not None: click.echo(u'and remove the file:\n') click.echo( click.style(u' key: "{key}"\n' u' bucket: {bucket}\n' u' size: {size}\n' u''.format(key=obj.key, bucket=obj.bucket, size=obj.file.size), fg='green')) if click.confirm(u'Continue?'): bucket.locked = False if obj is not None and replace_existing: ObjectVersion.delete(bucket, obj.key) ObjectVersion.create(bucket, key, stream=fp, size=size) bucket.locked = True record.files.flush() record.commit() db.session.commit() click.echo(click.style(u'File added successfully.', fg='green')) else: click.echo(click.style(u'File addition aborted.', fg='green'))
def post(self, pid, record, **kwargs): """Send a signal to count record view for the record stats.""" factory = RecordPermission(record, "read") if not factory.is_public() and not backoffice_permission().can(): if not current_user.is_authenticated: abort(401) abort(403) data = request.get_json() event_name = data.get("event") if event_name == "record-view": record_viewed.send( current_app._get_current_object(), pid=pid, record=record, ) return self.make_response(pid, record, 202) elif event_name == "file-download": if "key" not in data: abort(406, "File key is required") if "bucket_id" not in record: abort(406, "Record has no bucket") obj = ObjectVersion.get(record["bucket_id"], data["key"]) file_downloaded.send(current_app._get_current_object(), obj=obj, record=record) return self.make_response(pid, record, 202) return StatsError( description="Invalid stats event request: {}".format(event_name))
def test_bucket_writer(writer): """Test bucket writer.""" writer.open() assert writer.obj.file_id is None writer.write(BytesIO(b'this is a test')) writer.close() assert ObjectVersion.get(writer.bucket_id, writer.key).file_id is not None
def test_new_record(app, db, dummy_location, record_dumps, resolver): """Test creation of new record.""" RecordDumpLoader.create(record_dumps) pid, record = resolver.resolve('11783') created = datetime(2011, 10, 13, 8, 27, 47) # Basic some test that record exists assert record['title'] assert record.created == created # Test that this is a completely new record assert len(record.revisions) == 3 # check revisions assert record.revisions[2].created == created assert record.revisions[2].updated == datetime(2012, 10, 13, 8, 27, 47) assert record.revisions[1].created == created assert record.revisions[1].updated == datetime(2012, 10, 13, 8, 27, 47) assert record.revisions[0].created == created assert record.revisions[0].updated == datetime(2011, 10, 13, 8, 27, 47) pytest.raises(IntegrityError, RecordIdentifier.insert, 11783) # Test the PIDs are extracted and created assert PersistentIdentifier.get('doi', '10.5281/zenodo.11783') assert len(record['_files']) == 1 f = record['_files'][0] obj = ObjectVersion.get(f['bucket'], f['key']) assert obj.file.checksum == f['checksum'] assert obj.file.size == f['size'] assert BucketTag.get_value(f['bucket'], 'record') == str(record.id)
def index_attachments(sender, json=None, record=None, index=None, doc_type=None): """Load and index attached files for given record. It iterates over ``_files`` field in ``record`` and checks if ``_attachment`` subfiled has been configured with following values: * ``True``/``False`` simply enables/disables automatic fulltext indexing for given file instance; * Alternativelly, one can provide a ``dict`` instance with all configuration options as defined in Elasticsearch guide on https://www.elastic.co/guide/en/elasticsearch/ search for mapper-attachment. .. note:: Make sure that ``mapper-attachment`` plugin is installed and running in Elasticsearch when using this signal handler. """ for index, data in enumerate(record['_files']): attachment = json['_files'][index].pop('_attachment', None) if attachment: obj = ObjectVersion.get(data['bucket'], data['key'], version_id=data.get('version_id')) attachment = attachment if isinstance(attachment, dict) else {} attachment.setdefault( '_content', base64.b64encode( obj.file.storage().open().read()).decode('utf-8')) json['_files'][index]['_attachment'] = attachment
def test_migrate_file(app, db, dummy_location, extra_location, bucket, objects): """Test file migration.""" obj = objects[0] # Test pre-condition old_uri = obj.file.uri assert exists(old_uri) assert old_uri == join(dummy_location.uri, str(obj.file.id)[0:2], str(obj.file.id)[2:4], str(obj.file.id)[4:], 'data') assert FileInstance.query.count() == 4 # Migrate file with patch('invenio_files_rest.tasks.verify_checksum') as verify_checksum: migrate_file(obj.file_id, location_name=extra_location.name, post_fixity_check=True) assert verify_checksum.delay.called # Get object again obj = ObjectVersion.get(bucket, obj.key) new_uri = obj.file.uri assert exists(old_uri) assert exists(new_uri) assert new_uri != old_uri assert FileInstance.query.count() == 5
def delete(self, pid, record, files, file_rec, multipart_config, key, upload_id): if multipart_config['upload_id'] != upload_id: abort(404) before_upload_abort.send(file_rec, record=record, file=file_rec, multipart_config=multipart_config) res = current_s3.client.abort_multipart_upload( bucket=multipart_config['bucket'], key=multipart_config['key'], upload_id=upload_id) with db.session.begin_nested(): delete_file_object_version(file_rec.bucket, file_rec.obj) head = ObjectVersion.get(file_rec.bucket, key) if not head: del files.filesmap[key] files.flush() record.commit() db.session.commit() after_upload_abort.send(file_rec, record=record, file=file_rec) return jsonify({})
def test_download_file(mock_git_api, client, db, get_git_attributes, json_headers): owner, deposit, pid, bucket, headers = get_git_attributes data = {'url': FILE, 'type': 'file', 'download': True, 'webhook': False} responses.add(responses.GET, 'https://gitlab.cern.ch/file', body=FILE_BODY, content_type='text/plain', headers={ 'Content-Length': '12', 'Content-Type': 'text/plain; charset=utf-8', }, stream=True, status=200) resp = client.post('/deposits/{}/actions/upload'.format(pid), headers=headers + json_headers, data=json.dumps(data)) assert resp.status_code == 201 resp = client.get('/deposits/{}/files'.format(pid), headers=headers) assert resp.status_code == 200 obj = ObjectVersion.get( bucket.id, 'repositories/gitlab.cern.ch/pfokiano/test-repo/master/README.md') open_file = open(obj.file.uri) repo_content = open_file.read() assert repo_content == FILE_BODY
def test_bucket_sync_same_object(app, db, dummy_location): """Test that an exiting file in src and dest is not changed.""" b1 = Bucket.create() b2 = Bucket.create() ObjectVersion.create(b1, "filename").set_location("b1v1", 1, "achecksum") b1.sync(b2) db.session.commit() b1_version_id = ObjectVersion.get(b1, "filename").version_id b2_version_id = ObjectVersion.get(b2, "filename").version_id b1.sync(b2) assert ObjectVersion.get_by_bucket(b1).count() == 1 assert ObjectVersion.get_by_bucket(b2).count() == 1 assert ObjectVersion.get(b1, "filename").version_id == b1_version_id assert ObjectVersion.get(b2, "filename").version_id == b2_version_id
def test_object_get_by_bucket(app, db, dummy_location): """Test object listing.""" b1 = Bucket.create() b2 = Bucket.create() # First version of object obj1_first = ObjectVersion.create(b1, "test") obj1_first.set_location("b1test1", 1, "achecksum") # Intermediate version which is a delete marker. obj1_intermediate = ObjectVersion.create(b1, "test") obj1_intermediate.set_location("b1test2", 1, "achecksum") # Latest version of object obj1_latest = ObjectVersion.create(b1, "test") obj1_latest.set_location("b1test3", 1, "achecksum") # Create objects in/not in same bucket using different key. ObjectVersion.create(b1, "another").set_location( "b1another1", 1, "achecksum") ObjectVersion.create(b2, "test").set_location("b2test1", 1, "achecksum") db.session.commit() # Sanity check assert ObjectVersion.query.count() == 5 assert ObjectVersion.get(b1, "test") assert ObjectVersion.get(b1, "another") assert ObjectVersion.get(b2, "test") # Retrieve objects for a bucket with/without versions assert ObjectVersion.get_by_bucket(b1).count() == 2 assert ObjectVersion.get_by_bucket(b1, versions=True).count() == 4 assert ObjectVersion.get_by_bucket(b2).count() == 1 assert ObjectVersion.get_by_bucket(b2, versions=True).count() == 1 # Assert order of returned objects (alphabetical) objs = ObjectVersion.get_by_bucket(b1.id).all() assert objs[0].key == "another" assert objs[1].key == "test" # Assert order of returned objects verions (creation date ascending) objs = ObjectVersion.get_by_bucket(b1.id, versions=True).all() assert objs[0].key == "another" assert objs[1].key == "test" assert objs[1].version_id == obj1_latest.version_id assert objs[2].key == "test" assert objs[2].version_id == obj1_intermediate.version_id assert objs[3].key == "test" assert objs[3].version_id == obj1_first.version_id
def test_object_get_by_bucket(app, db, dummy_location): """Test object listing.""" b1 = Bucket.create() b2 = Bucket.create() # First version of object obj1_first = ObjectVersion.create(b1, "test") obj1_first.set_location("b1test1", 1, "achecksum") # Intermediate version which is a delete marker. obj1_intermediate = ObjectVersion.create(b1, "test") obj1_intermediate.set_location("b1test2", 1, "achecksum") # Latest version of object obj1_latest = ObjectVersion.create(b1, "test") obj1_latest.set_location("b1test3", 1, "achecksum") # Create objects in/not in same bucket using different key. ObjectVersion.create(b1, "another").set_location( "b1another1", 1, "achecksum") ObjectVersion.create(b2, "test").set_location("b2test1", 1, "achecksum") db.session.commit() # Sanity check assert ObjectVersion.query.count() == 5 assert ObjectVersion.get(b1, "test") assert ObjectVersion.get(b1, "another") assert ObjectVersion.get(b2, "test") # Retrieve objects for a bucket with/without versions assert ObjectVersion.get_by_bucket(b1).count() == 2 assert ObjectVersion.get_by_bucket(b1, versions=True).count() == 4 assert ObjectVersion.get_by_bucket(b2).count() == 1 assert ObjectVersion.get_by_bucket(b2, versions=True).count() == 1 # Assert order of returned objects (alphabetical) objs = ObjectVersion.get_by_bucket(b1.id).all() assert objs[0].key == "another" assert objs[1].key == "test" # Assert order of returned objects verions (creation date ascending) objs = ObjectVersion.get_by_bucket(b1.id, versions=True).all() assert objs[0].key == "another" assert objs[1].key == "test" assert objs[1].version_id == obj1_latest.version_id assert objs[2].key == "test" assert objs[2].version_id == obj1_intermediate.version_id assert objs[3].key == "test" assert objs[3].version_id == obj1_first.version_id
def test_get_webhook_event_view_when_release_event(m_gitlab, deposit, client, gitlab_release_webhook_sub, git_repo_tar): class MockBranchManager: def get(self, name): m = Mock(commit=dict(id='mybranchsha')) m.name = 'mybranch' return m class MockProjectManager: def get(self, name, lazy): return Mock(branches=MockBranchManager(), id='12345') m_gitlab.return_value = Mock(projects=MockProjectManager()) responses.add(responses.GET, ( 'https://gitlab.cern.ch/api/v4/projects/12345/repository/archive?sha=mybranchsha' ), body=git_repo_tar, content_type='application/octet_stream', headers={ 'Transfer-Encoding': 'binary', 'Content-Length': '287' }, stream=True, status=200) resp = client.post('/repos/event', headers=tag_push_headers, data=json.dumps(tag_push_payload_shortened)) assert resp.status_code == 200 assert resp.json == {'message': 'Snapshot of repository was saved.'} assert responses.calls[0].request.headers['Private-Token'] == 'some-token' obj = ObjectVersion.get( deposit.files.bucket.id, 'repositories/gitlab.cern.ch/owner_name/myrepository/v3.0.0.tar.gz') tar_obj = tarfile.open(obj.file.uri) repo_file_name = tar_obj.getmembers()[1] repo_content = tar_obj.extractfile(repo_file_name).read() assert repo_content == b'test repo for cap\n' snapshot = gitlab_release_webhook_sub.snapshots[0] assert obj.snapshot_id == snapshot.id assert GitSnapshot.query.count() == 1 assert snapshot.payload == { 'event_type': 'release', 'author': { 'name': 'owner_name', 'id': 1 }, 'link': 'https://gitlab.com/owner_name/myrepository/tags/v3.0.0', 'release': { 'tag': 'v3.0.0', 'name': 'My release' } }
def get_version(self, version_id=None): """Return specific version ``ObjectVersion`` instance or HEAD. :param version_id: Version ID of the object. :returns: :class:`~invenio_files_rest.models.ObjectVersion` instance or HEAD of the stored object. """ return ObjectVersion.get(bucket=self.obj.bucket, key=self.obj.key, version_id=version_id)
def test_object_remove_marker(app, db, bucket, objects): """Test object remove.""" obj = objects[0] assert ObjectVersion.query.count() == 4 obj = ObjectVersion.delete(obj.bucket, obj.key) db.session.commit() assert ObjectVersion.query.count() == 5 obj = ObjectVersion.get(obj.bucket, obj.key, version_id=obj.version_id) obj.remove() assert ObjectVersion.query.count() == 4
def test_import_record(app, db, dummy_location, record_dump, records_json, resolver): """Test import record celery task.""" assert RecordMetadata.query.count() == 0 import_record(records_json[0], source_type='json') assert RecordMetadata.query.count() == 1 pid, record = resolver.resolve('11782') assert len(record['files']) == 1 assert ObjectVersion.get( record['files'][0]['bucket'], record['files'][0]['filename'])
def test_object_multibucket(app, db, dummy_location): """Test object creation in multiple buckets.""" with db.session.begin_nested(): # Create two buckets each with an object using the same key b1 = Bucket.create() b2 = Bucket.create() obj1 = ObjectVersion.create(b1, "test") obj1.set_location("file:///tmp/obj1", 1, "checksum") obj2 = ObjectVersion.create(b2, "test") obj2.set_location("file:///tmp/obj2", 2, "checksum") # Sanity check assert ObjectVersion.query.count() == 2 # Assert object versions are correctly created in each bucket. obj = ObjectVersion.get(b1.id, "test") assert obj.is_head is True assert obj.version_id == obj1.version_id obj = ObjectVersion.get(b2.id, "test") assert obj.is_head is True assert obj.version_id == obj2.version_id
def test_delete(client, db, bucket, objects, permissions, user, expected): """Test deleting an object.""" login_user(client, permissions[user]) for obj in objects: # Valid object resp = client.delete(url_for( 'invenio_files_rest.object_api', bucket_id=bucket.id, key=obj.key, )) assert resp.status_code == expected if resp.status_code == 204: assert not ObjectVersion.get(bucket.id, obj.key) else: assert ObjectVersion.get(bucket.id, obj.key) # Invalid object assert client.delete(url_for( 'invenio_files_rest.object_api', bucket_id=bucket.id, key='invalid', )).status_code == 404
def test_object_mimetype(app, db, dummy_location): """Test object set file.""" b = Bucket.create() db.session.commit() obj1 = ObjectVersion.create(b, "test.pdf", stream=BytesIO(b'pdfdata')) obj2 = ObjectVersion.create(b, "README", stream=BytesIO(b'pdfdata')) assert obj1.mimetype == "application/pdf" assert obj2.mimetype == "application/octet-stream" # Override computed MIME type. obj2.mimetype = "text/plain" db.session.commit() assert ObjectVersion.get(b, "README").mimetype == "text/plain"
def __extract_article_text(record): # fixme extraction shouldn't happen in article_upload? extracted_text = {} for file in record.get('_files', ()): filetype = file['filetype'] if filetype in ('pdf', 'pdf/a'): path = ObjectVersion.get(file['bucket'], file['key']).file.uri try: extracted_text[filetype] = extract_text_from_pdf(path).decode('utf-8') except PDFSyntaxError as e: current_app.logger.error('Error while extracting text from pdf with uri %s: %s' % (path, e)) return extracted_text
def test_cascade_action_record_delete(app, db, location, record_with_bucket, generic_file, force, num_of_recordbuckets): """Test cascade action on record delete, with force false.""" record = record_with_bucket record_id = record.id bucket_id = record.files.bucket.id # check before assert len(RecordsBuckets.query.all()) == 1 assert len(Bucket.query.all()) == 1 assert len(Bucket.query.filter_by(id=bucket_id).all()) == 1 assert ObjectVersion.get(bucket=bucket_id, key=generic_file) record.delete(force=force) # check after db.session.expunge(record.model) with pytest.raises(NoResultFound): record = Record.get_record(record_id) assert len(RecordsBuckets.query.all()) == num_of_recordbuckets assert len(Bucket.query.all()) == 1 assert len(Bucket.query.filter_by(id=bucket_id).all()) == 1 assert ObjectVersion.get(bucket=bucket_id, key=generic_file)
def create_b2safe_file(external_pids, bucket): """Create a FileInstance which contains a PID in its uri.""" validate_schema(external_pids, { 'type': 'array', 'items': { 'type': 'object', 'properties': { 'ePIC_PID': {'type': 'string'}, 'key': {'type': 'string'} }, 'additionalProperties': False, 'required': ['ePIC_PID', 'key'] } }) keys_list = [e['key'] for e in external_pids] keys_set = set(keys_list) if len(keys_list) != len(keys_set): raise InvalidDepositError([FieldError('external_pids', 'Field external_pids contains duplicate keys.')]) for external_pid in external_pids: if not external_pid['ePIC_PID'].startswith('http://hdl.handle.net/'): external_pid['ePIC_PID'] = 'http://hdl.handle.net/' + \ external_pid['ePIC_PID'] if external_pid['key'].startswith('/'): raise InvalidDepositError( [FieldError('external_pids', 'File key cannot start with a "/".')]) try: # Create the file instance if it does not already exist file_instance = FileInstance.get_by_uri(external_pid['ePIC_PID']) if file_instance is None: file_instance = FileInstance.create() file_instance.set_uri( external_pid['ePIC_PID'], 1, 0, storage_class='B') assert file_instance.storage_class == 'B' # Add the file to the bucket if it is not already in it current_version = ObjectVersion.get(bucket, external_pid['key']) if not current_version or \ current_version.file_id != file_instance.id: ObjectVersion.create(bucket, external_pid['key'], file_instance.id) except IntegrityError as e: raise InvalidDepositError( [FieldError('external_pids', 'File URI already exists.')])
def test_new_record(app, db, dummy_location, record_dump, resolver): """Test creation of new record.""" RecordDumpLoader.create(record_dump) pid, record = resolver.resolve("11782") # Basic some test that record exists assert record["title"] assert record.created == datetime(2014, 10, 13, 8, 27, 47) # Test that this is a completely new record assert len(record.revisions) == 2 pytest.raises(IntegrityError, RecordIdentifier.insert, 11782) # Test the PIDs are extracted and created assert PersistentIdentifier.get("doi", "10.5281/zenodo.11782") assert len(record["_files"]) == 1 f = record["_files"][0] obj = ObjectVersion.get(f["bucket"], f["key"]) assert obj.file.checksum == f["checksum"] assert obj.file.size == f["size"] assert BucketTag.get_value(f["bucket"], "record") == str(record.id)
def remove_file(recid, key=None, index=None): """Remove a file from a publishd record.""" pid, record = record_resolver.resolve(recid) bucket = record.files.bucket obj = ObjectVersion.get(bucket, key) if obj is None: click.echo(click.style(u'File with key "{key}" not found.'.format( key=key, recid=recid), fg='red')) return click.echo(u'Will remove the following file:\n') click.echo(click.style( u' key: "{key}"\n' u' {checksum}\n' u' bucket: {bucket}\n' u''.format( key=key, checksum=obj.file.checksum, bucket=bucket.id), fg='green')) click.echo('from record:\n') click.echo(click.style( u' Title: "{title}"\n' u' RECID: {recid}\n' u' UUID: {uuid}\n' u''.format( recid=record['recid'], title=record['title'], uuid=record.id), fg='green')) if click.confirm(u'Continue?'): bucket.locked = False ObjectVersion.delete(bucket, obj.key) bucket.locked = True record.files.flush() record.commit() db.session.commit() click.echo(click.style(u'File removed successfully.', fg='green')) else: click.echo(click.style(u'Aborted file removal.', fg='green'))
def upload_to_zenodo(bucket_id, filename): """Upload code to zenodo.""" zenodo_server_url = current_app.config.get('ZENODO_SERVER_URL') params = {"access_token": current_app.config.get( 'ZENODO_ACCESS_TOKEN')} filename = filename + '.tar.gz' r = requests.post(zenodo_server_url, params=params, json={}, ) file_obj = ObjectVersion.get(bucket_id, filename) file = FileInstance.get(file_obj.file_id) bucket_url = r.json()['links']['bucket'] with open(file.uri, 'rb') as fp: response = requests.put( bucket_url + '/{}'.format(filename), data=fp, params=params, ) return jsonify({"status": response.status_code})
def test_delete_versions(client, db, bucket, versions, permissions, user, expected): """Test deleting an object.""" login_user(client, permissions[user]) for obj in versions: # Valid delete resp = client.delete(url_for( 'invenio_files_rest.object_api', bucket_id=bucket.id, key=obj.key, versionId=obj.version_id, )) assert resp.status_code == expected if resp.status_code == 204: assert not ObjectVersion.get( bucket.id, obj.key, version_id=obj.version_id) # Invalid object assert client.delete(url_for( 'invenio_files_rest.object_api', bucket_id=bucket.id, key=obj.key, versionId='deadbeef-65bd-4d9b-93e2-ec88cc59aec5' )).status_code == 404
def test_part_creation(app, db, bucket, get_md5): """Test part creation.""" assert bucket.size == 0 mp = MultipartObject.create(bucket, 'test.txt', 5, 2) db.session.commit() assert bucket.size == 5 Part.create(mp, 2, stream=BytesIO(b'p')) Part.create(mp, 0, stream=BytesIO(b'p1')) Part.create(mp, 1, stream=BytesIO(b'p2')) db.session.commit() assert bucket.size == 5 mp.complete() db.session.commit() assert bucket.size == 5 # Assert checksum of part. m = hashlib.md5() m.update(b'p2') assert "md5:{0}".format(m.hexdigest()) == Part.get_or_none(mp, 1).checksum obj = mp.merge_parts() db.session.commit() assert bucket.size == 5 assert MultipartObject.query.count() == 0 assert Part.query.count() == 0 assert obj.file.size == 5 assert obj.file.checksum == get_md5(b'p1p2p') assert obj.file.storage().open().read() == b'p1p2p' assert obj.file.writable is False assert obj.file.readable is True assert obj.version_id == ObjectVersion.get(bucket, 'test.txt').version_id
def __getitem__(self, key): """Get a specific file.""" obj = ObjectVersion.get(self.bucket, key) if obj: return self.file_cls(obj, self.filesmap.get(obj.key, {})) raise KeyError(key)
def __getitem__(self, key): """Get a specific file.""" obj = ObjectVersion.get(self.bucket, key) if obj: return FileObject(self.bucket, obj) raise KeyError(key)
def get_version(self, version_id=None): """Return specific version ``ObjectVersion`` instance or HEAD.""" return ObjectVersion.get(bucket=self.bucket, key=self.obj.key, version_id=version_id)
def proc(record): rinfo('start...', record) if '_files' not in record.json: rerror('Skipping. No _files', record) return xml = filter(lambda x: x['filetype'] == 'xml', record.json['_files']) if not xml: rerror('Skipping. No xml in _files', record) return object = ObjectVersion.get(xml[0]['bucket'], xml[0]['key']) uri = object.file.uri xml = parse(open(uri, 'rt')) x_author_groups = xml.getElementsByTagName('ce:author-group') if not x_author_groups: rerror('Skipping. No author groups.', record) return if len(x_author_groups) > 1: rinfo('Reparse all authors.', record) authors = [] for x_author_group in x_author_groups: # skip if not deepest author-group if x_author_group.getElementsByTagName('ce:author-group'): continue # extract affiliations x_affiliations = x_author_group.getElementsByTagName('ce:affiliation') affs = [] for a in x_affiliations: value = a.getElementsByTagName('ce:textfn')[0].childNodes[0].nodeValue affs.append({ u'country': find_country(value), u'value': value }) # extract authors, add affiliations x_authors = x_author_group.getElementsByTagName('ce:author') for x_author in x_authors: given_name = x_author.getElementsByTagName('ce:given-name')[0].childNodes[0].nodeValue surname = x_author.getElementsByTagName('ce:surname')[0].childNodes[0].nodeValue full_name = '%s, %s' % (surname, given_name) author_affs = [] for ref in x_author.getElementsByTagName('ce:cross-ref'): affid = ref.attributes.get('refid').value if 'aff' in affid: aff_value = get_aff_by_id(x_author_group, affid) aff_country = find_country(aff_value) author_affs.append({ u'country': aff_country, u'value': aff_value }) if not (author_affs or affs): rerror('no affs for author: %s. Skip this record.' % surname, record) return authors.append({ 'full_name': full_name, 'given_name': given_name, 'surname': surname, 'affiliations': author_affs or affs }) if authors: record.json['authors'] = authors flag_modified(record, 'json') rinfo('updated', record) else: rerror('No authors found', record) else: for x_author_group in x_author_groups: x_collaborations = x_author_group.getElementsByTagName('ce:collaboration') x_affiliations = x_author_group.getElementsByTagName('ce:affiliation') # needed for supporting multiple author groups with author matching, but author matching is not rly possible. # authors_in_group = [ # (c.getElementsByTagName('ce:given-name')[0].childNodes[0].nodeValue.replace('-', '').title(), # c.getElementsByTagName('ce:surname')[0].childNodes[0].nodeValue.replace('-', '').title()) # for c in x_author_group.getElementsByTagName('ce:author') # ] if 'authors' not in record.json: # Type 1 and 3: has no authors at all. Fix: add collaborations if there are affiliations in xml. rerror('No authors... SKIPPING', record) return # extract collaborations, find countries later # FIXME we should always extract collaborations, but that would cause a lot more problems now. authors = [{'full_name': c.getElementsByTagName('ce:text')[0].childNodes[0].nodeValue} for c in x_collaborations] if authors: rinfo('Collaborations found: %s' % authors, record) record.json['authors'] = authors else: rerror('No collaborations. Not fixable.', record) # possibly we added authors in the previous step. if 'authors' in record.json: # Type 2 and 4: has authors, but no affiliations. authors = record.json['authors'] aff_count = sum(map(lambda x: 'affiliations' in x, authors)) if aff_count == 0: # Type 4: No affiliations in data. new_affs = [ {u'country': find_country(a.getElementsByTagName('ce:textfn')[0].childNodes[0].nodeValue), u'value': a.getElementsByTagName('ce:textfn')[0].childNodes[0].nodeValue } for a in x_affiliations] if new_affs: rinfo('New affiliations: %s' % new_affs, record) # FIXME modify this, if multiple author groups should be supported # FIXME (not all authors should be updated)!!! # update_authors(record, authors_in_group, new_affs) for i, a in enumerate(record.json.get('authors')): record.json['authors'][i]['affiliations'] = new_affs flag_modified(record, 'json') else: rerror('No affiliations at all. Not fixable.', record) elif aff_count == len(authors): empty_aff_count = sum(map(lambda x: len(x['affiliations']) == 0, authors)) if empty_aff_count == len(authors): # Type 2: Only empty affiliations. rinfo('Type 2. Not fixable.', record) else: rerror('Only SOME authors have EMPTY affiliations. What now?', record) else: rerror('Only SOME authors have affiliations. What now?', record) rinfo('OK', record)
def add_file(recid, fp, replace_existing): """Add a new file to a publishd record.""" pid, record = record_resolver.resolve(recid) bucket = record.files.bucket key = os.path.basename(fp.name) obj = ObjectVersion.get(bucket, key) if obj is not None and not replace_existing: click.echo(click.style(u'File with key "{key}" already exists.' u' Use `--replace-existing/-f` to overwrite it.'.format( key=key, recid=recid), fg='red')) return fp.seek(SEEK_SET, SEEK_END) size = fp.tell() fp.seek(SEEK_SET) click.echo(u'Will add the following file:\n') click.echo(click.style( u' key: "{key}"\n' u' bucket: {bucket}\n' u' size: {size}\n' u''.format( key=key, bucket=bucket.id, size=size), fg='green')) click.echo(u'to record:\n') click.echo(click.style( u' Title: "{title}"\n' u' RECID: {recid}\n' u' UUID: {uuid}\n' u''.format( recid=record['recid'], title=record['title'], uuid=record.id), fg='green')) if replace_existing and obj is not None: click.echo(u'and remove the file:\n') click.echo(click.style( u' key: "{key}"\n' u' bucket: {bucket}\n' u' size: {size}\n' u''.format( key=obj.key, bucket=obj.bucket, size=obj.file.size), fg='green')) if click.confirm(u'Continue?'): bucket.locked = False if obj is not None and replace_existing: ObjectVersion.delete(bucket, obj.key) ObjectVersion.create(bucket, key, stream=fp, size=size) bucket.locked = True record.files.flush() record.commit() db.session.commit() click.echo(click.style(u'File added successfully.', fg='green')) else: click.echo(click.style(u'File addition aborted.', fg='green'))