def test_get_empty_bucket(db, client, headers, bucket, objects, permissions,
                          get_json):
    """Test getting objects from an empty bucket."""
    # Delete the objects created in the fixtures to have an empty bucket with
    # permissions set up.
    for obj in objects:
        ObjectVersion.delete(obj.bucket_id, obj.key)
    db.session.commit()

    cases = [
        (None, 404),
        ('auth', 404),
        ('objects', 404),  # TODO - return 403 instead
        ('bucket', 200),
        ('location', 200),
    ]

    for user, expected in cases:
        login_user(client, permissions[user])

        resp = client.get(
            url_for('invenio_files_rest.bucket_api', bucket_id=bucket.id),
            headers=headers
        )
        assert resp.status_code == expected
        if resp.status_code == 200:
            assert get_json(resp)['contents'] == []
def test_b2share_storage_with_pid(base_app, app, tmp_location, login_user, test_users):
    """Check that the storage class will redirect pid files."""
    pid = 'http://hdl.handle.net/11304/74c66f0b-f814-4202-9dcb-4889ba9b1047'
    with app.app_context():
        # Disable access control for this test
        tmp_location = Location.query.first()
        with db.session.begin_nested():
            bucket = Bucket.create(tmp_location, storage_class='B')
            pid_file = FileInstance.create()
            pid_file.set_uri(pid, 1, 0, storage_class='B')
            ObjectVersion.create(bucket, 'test.txt', pid_file.id)

        db.session.commit()
        url = url_for('invenio_files_rest.object_api',
                        bucket_id=bucket.id,
                        key='test.txt')
    try:
        with app.app_context():
            permission = current_files_rest.permission_factory
            current_files_rest.permission_factory = allow_all
        # Check that accessing the file redirects to the PID
        with app.test_client() as client:
            resp = client.get(url)
            assert resp.headers['Location'] == pid
            assert resp.status_code == 302
    finally:
        with app.app_context():
            current_files_rest.permission_factory = permission
def test_object_restore(app, db, dummy_location):
    """Restore object."""
    f1 = FileInstance(uri="f1", size=1, checksum="mychecksum")
    f2 = FileInstance(uri="f2", size=2, checksum="mychecksum2")
    db.session.add(f1)
    db.session.add(f2)
    b1 = Bucket.create()

    obj1 = ObjectVersion.create(b1, "test").set_file(f1)
    ObjectVersion.create(b1, "test").set_file(f2)
    obj_deleted = ObjectVersion.delete(b1, "test")
    db.session.commit()

    assert ObjectVersion.query.count() == 3
    # Cannot restore a deleted version.
    pytest.raises(InvalidOperationError, obj_deleted.restore)

    # Restore first version
    obj_new = obj1.restore()
    db.session.commit()

    assert ObjectVersion.query.count() == 4
    assert obj_new.is_head is True
    assert obj_new.version_id != obj1.version_id
    assert obj_new.key == obj1.key
    assert obj_new.file_id == obj1.file_id
    assert obj_new.bucket == obj1.bucket
def test_record_publish_adds_no_handles_for_external_files(app,
                            records_data_with_external_pids,
                            test_records_data):
    """Test that no handle PIDs are created for external files."""
    for metadata in test_records_data:
        with app.app_context():
            app.config.update({'FAKE_EPIC_PID': True})

            external_pids = records_data_with_external_pids['external_pids']
            external_dict = {x['key']: x['ePIC_PID'] for x in external_pids}
            data = deepcopy(metadata)
            data['external_pids'] = deepcopy(external_pids)

            record_uuid = uuid.uuid4()
            b2share_deposit_uuid_minter(record_uuid, data=data)

            deposit = Deposit.create(data, id_=record_uuid)
            ObjectVersion.create(deposit.files.bucket, 'real_file_1.txt',
                             stream=BytesIO(b'mycontent'))
            ObjectVersion.create(deposit.files.bucket, 'real_file_2.txt',
                             stream=BytesIO(b'mycontent'))
            deposit.submit()
            deposit.publish()
            deposit.commit()

            _, record = deposit.fetch_published()

            # external files don't get a handle PID, they already have one
            # which is stored in record['_deposit']['external_pids']
            for f in record.files:
                if f['key'] in external_dict:
                    assert f.get('ePIC_PID') is None
                else:
                    assert '0000' in f['ePIC_PID'] # is a new fake PID
def test_object_relink_all(app, db, dummy_location):
    """Test relinking files."""
    b1 = Bucket.create()
    obj1 = ObjectVersion.create(
        b1, "relink-test", stream=BytesIO(b('relinkthis')))
    ObjectVersion.create(b1, "do-not-touch", stream=BytesIO(b('na')))
    b1.snapshot()
    db.session.commit()

    assert ObjectVersion.query.count() == 4
    assert FileInstance.query.count() == 2

    fnew = FileInstance.create()
    fnew.copy_contents(obj1.file, location=b1.location)
    db.session.commit()

    fold = obj1.file

    assert ObjectVersion.query.filter_by(file_id=fold.id).count() == 2
    assert ObjectVersion.query.filter_by(file_id=fnew.id).count() == 0

    ObjectVersion.relink_all(obj1.file, fnew)
    db.session.commit()

    assert ObjectVersion.query.filter_by(file_id=fold.id).count() == 0
    assert ObjectVersion.query.filter_by(file_id=fnew.id).count() == 2
Exemple #6
0
def _create_record_from_filepath(path, rec_uuid, indexer, versions, verbose):
    with open(path) as record_file:
        record_str = record_file.read()
    record_str = resolve_community_id(record_str)
    record_str = resolve_block_schema_id(record_str)
    json_data = json.loads(record_str)
    b2share_deposit_uuid_minter(rec_uuid, data=json_data)
    deposit = Deposit.create(json_data, id_=rec_uuid)
    ObjectVersion.create(deposit.files.bucket, 'myfile',
                         stream=BytesIO(b'mycontent'))
    deposit.publish()
    pid, record = deposit.fetch_published()
    indexer.index(record)
    if verbose > 0:
        click.secho('created new record: {}'.format(str(rec_uuid)))

    last_id = pid.pid_value
    for i in range(2*versions):
        rec_uuid = uuid4()
        json_data = json.loads(record_str)
        b2share_deposit_uuid_minter(rec_uuid, data=json_data)
        deposit2 = Deposit.create(json_data, id_=rec_uuid,
                                  version_of=last_id)

        ObjectVersion.create(deposit2.files.bucket, 'myfile-ver{}'.format(i),
                             stream=BytesIO(b'mycontent'))
        deposit2.publish()
        pid, record2 = deposit2.fetch_published()
        indexer.index(record2)
        last_id = pid.pid_value
        if verbose > 0:
            click.secho('created new version: {}'.format(str(rec_uuid)))

    return record, deposit
Exemple #7
0
def data_policies(skip_files):
    """Load demo Data Policy records."""
    from invenio_db import db
    from invenio_indexer.api import RecordIndexer
    from cernopendata.modules.records.minters.recid import \
        cernopendata_recid_minter

    from invenio_files_rest.models import \
        Bucket, FileInstance, ObjectVersion
    from invenio_records_files.models import RecordsBuckets
    from invenio_records_files.api import Record

    from invenio_records.models import RecordMetadata

    indexer = RecordIndexer()
    schema = current_app.extensions['invenio-jsonschemas'].path_to_url(
        'records/data-policies-v1.0.0.json'
    )
    data = pkg_resources.resource_filename('cernopendata',
                                           'modules/fixtures/data')
    data_policies_json = glob.glob(os.path.join(data, '*.json'))

    for filename in data_policies_json:

        click.echo('Loading data-policies from {0} ...'.format(filename))

        with open(filename, 'rb') as source:
            for data in json.load(source):
                files = data.pop('files', [])

                id = uuid.uuid4()
                cernopendata_recid_minter(id, data)
                data['$schema'] = schema
                record = Record.create(data, id_=id)

                bucket = Bucket.create()
                RecordsBuckets.create(
                    record=record.model, bucket=bucket)

                for file in files:
                    if skip_files:
                        break
                    assert 'uri' in file
                    assert 'size' in file
                    assert 'checksum' in file

                    f = FileInstance.create()
                    filename = file.get("uri").split('/')[-1:][0]
                    f.set_uri(file.get("uri"), file.get(
                        "size"), file.get("checksum"))
                    ObjectVersion.create(
                        bucket,
                        filename,
                        _file_id=f.id
                    )
                db.session.commit()
                indexer.index(record)
                db.session.expunge_all()
def objects(db, bucket):
    """File system location."""
    obj1 = ObjectVersion.create(
        bucket, 'LICENSE', stream=BytesIO(b('license file')))
    obj2 = ObjectVersion.create(
        bucket, 'README.rst', stream=BytesIO(b('readme file')))
    db.session.commit()

    yield [obj1, obj2]
Exemple #9
0
def attach_file(file_id, pid_type1, pid_value1, key1, pid_type2, pid_value2,
                key2):
    """Attach a file to a record or deposit.

    You must provide the information which will determine the first file, i.e.:
    either 'file-id' OR 'pid-type1', 'pid-value1' and 'key1'.
    Additionally you need to specify the information on the target
    record/deposit, i.e.: 'pid-type2', 'pid-value2' and 'key2'.
    """
    assert ((file_id or (pid_type1 and pid_value1 and key1))
            and (pid_type2 and pid_value2 and key2))

    msg = u"PID type must be 'recid' or 'depid'."
    if pid_type1:
        assert pid_type1 in ('recid', 'depid', ), msg
    assert pid_type2 in ('recid', 'depid', ), msg

    if not file_id:
        resolver = record_resolver if pid_type1 == 'recid' \
            else deposit_resolver
        pid1, record1 = resolver.resolve(pid_value1)
        bucket1 = record1.files.bucket

        obj1 = ObjectVersion.get(bucket1, key1)
        if obj1 is None:
            click.echo(click.style(u'File with key "{key}" not found.'.format(
                key=key1), fg='red'))
            return
        file_id = obj1.file.id

    resolver = record_resolver if pid_type2 == 'recid' else deposit_resolver
    pid2, record2 = resolver.resolve(pid_value2)
    bucket2 = record2.files.bucket

    obj2 = ObjectVersion.get(bucket2, key2)
    if obj2 is not None:
        click.echo(click.style(u'File with key "{key}" already exists on'
                               u' bucket {bucket}.'.format(
                                   key=key2, bucket=bucket2.id), fg='red'))
        return

    if click.confirm(u'Attaching file "{file_id}" to bucket {bucket2}'
                     u' as "{key2}". Continue?'.format(
                         file_id=file_id, key2=key2,
                         bucket2=bucket2.id)):
        record2.files.bucket.locked = False

        ObjectVersion.create(bucket2, key2, _file_id=file_id)
        if pid_type2 == 'recid':
            record2.files.bucket.locked = True
        record2.files.flush()
        record2.commit()
        db.session.commit()
        click.echo(click.style(u'File attached successfully.', fg='green'))
    else:
        click.echo(click.style(u'Aborted file attaching.', fg='green'))
Exemple #10
0
def test_exporter(app, db, es, exporter_bucket, record_with_files_creation):
    """Test record exporter."""
    pid, record, record_url = record_with_files_creation
    RecordIndexer().index_by_id(record.id)
    current_search.flush_and_refresh('records')

    with app.app_context():
        assert ObjectVersion.get_by_bucket(exporter_bucket).count() == 0
        export_job(job_id='records')
        assert ObjectVersion.get_by_bucket(exporter_bucket).count() == 1
def test_object_remove_marker(app, db, bucket, objects):
    """Test object remove."""
    obj = objects[0]
    assert ObjectVersion.query.count() == 4
    obj = ObjectVersion.delete(obj.bucket, obj.key)
    db.session.commit()
    assert ObjectVersion.query.count() == 5
    obj = ObjectVersion.get(obj.bucket, obj.key, version_id=obj.version_id)
    obj.remove()
    assert ObjectVersion.query.count() == 4
def test_object_create_with_fileid(app, db, dummy_location):
    """Test object creation."""
    with db.session.begin_nested():
        b = Bucket.create()
        obj = ObjectVersion.create(b, 'test', stream=BytesIO(b'test'))

    assert b.size == 4

    ObjectVersion.create(b, 'test', _file_id=obj.file)
    assert b.size == 8
def test_object_set_contents(app, db, dummy_location):
    """Test object set contents."""
    with db.session.begin_nested():
        b1 = Bucket.create()
        obj = ObjectVersion.create(b1, "LICENSE")
        assert obj.file_id is None
        assert FileInstance.query.count() == 0

        # Save a file.
        with open('LICENSE', 'rb') as fp:
            obj.set_contents(fp)

    # Assert size, location and checksum
    assert obj.file_id is not None
    assert obj.file.uri is not None
    assert obj.file.size == getsize('LICENSE')
    assert obj.file.checksum is not None
    assert b1.size == obj.file.size

    # Try to overwrite
    with db.session.begin_nested():
        with open('LICENSE', 'rb') as fp:
            pytest.raises(FileInstanceAlreadySetError, obj.set_contents, fp)

    # Save a new version with different content
    with db.session.begin_nested():
        obj2 = ObjectVersion.create(b1, "LICENSE")
        with open('README.rst', 'rb') as fp:
            obj2.set_contents(fp)

    assert obj2.file_id is not None and obj2.file_id != obj.file_id
    assert obj2.file.size == getsize('README.rst')
    assert obj2.file.uri != obj.file.uri
    assert Bucket.get(b1.id).size == obj.file.size + obj2.file.size

    obj2.file.verify_checksum()
    assert obj2.file.last_check_at
    assert obj2.file.last_check is True
    old_checksum = obj2.file.checksum
    obj2.file.checksum = "md5:invalid"
    assert obj2.file.verify_checksum() is False

    previous_last_check = obj2.file.last_check
    previous_last_check_date = obj2.file.last_check_at
    with db.session.begin_nested():
        obj2.file.checksum = old_checksum
        obj2.file.uri = 'invalid'
    pytest.raises(ResourceNotFoundError, obj2.file.verify_checksum)
    assert obj2.file.last_check == previous_last_check
    assert obj2.file.last_check_at == previous_last_check_date

    obj2.file.verify_checksum(throws=False)
    assert obj2.file.last_check is None
    assert obj2.file.last_check_at != previous_last_check_date
Exemple #14
0
def _create_bucket(deposit, record_json, directory, logfile):
    for index, file_dict in enumerate(record_json.get('files', [])):
        click.secho('    Load file "{}"'.format(file_dict.get('name')))
        filepath = os.path.join(directory, 'file_{}'.format(index))
        if int(os.path.getsize(filepath)) != int(file_dict.get('size')):
            logfile.write("\n********************")
            logfile.write("\nERROR: downloaded file size differs for file {}: {} instead of {}"
                          .format(filepath, os.path.getsize(filepath), file_dict.get('size')))
            logfile.write("\n********************")
        else:
            with open(filepath, 'r+b') as f:
                ObjectVersion.create(deposit.files.bucket, file_dict['name'],
                                     stream=BytesIO(f.read()))
def test_object_mimetype(app, db, dummy_location):
    """Test object set file."""
    b = Bucket.create()
    db.session.commit()
    obj1 = ObjectVersion.create(b, "test.pdf", stream=BytesIO(b'pdfdata'))
    obj2 = ObjectVersion.create(b, "README", stream=BytesIO(b'pdfdata'))

    assert obj1.mimetype == "application/pdf"
    assert obj2.mimetype == "application/octet-stream"

    # Override computed MIME type.
    obj2.mimetype = "text/plain"
    db.session.commit()
    assert ObjectVersion.get(b, "README").mimetype == "text/plain"
    def rename(self, old_key, new_key):
        """Rename a file."""
        assert new_key not in self

        file_ = self[old_key]
        # create a new version with the new name
        obj = ObjectVersion.create(
            bucket=self.bucket, key=new_key,
            _file_id=file_.obj.file_id
        )
        self.record['_files'][self.keys.index(old_key)]['key'] = new_key
        # delete the old version
        ObjectVersion.delete(bucket=self.bucket, key=old_key)
        return obj
def objects(db, bucket):
    """File system location."""
    data_bytes = b('license file')
    obj1 = ObjectVersion.create(
        bucket, 'LICENSE', stream=BytesIO(data_bytes),
        size=len(data_bytes)
    )
    data_bytes2 = b('readme file')
    obj2 = ObjectVersion.create(
        bucket, 'README.rst', stream=BytesIO(data_bytes2),
        size=len(data_bytes2)
    )
    db.session.commit()

    yield [obj1, obj2]
def save_and_validate_logo(logo_stream, logo_filename, community_id):
    """Validate if communities logo is in limit size and save it."""
    cfg = current_app.config

    logos_bucket_id = cfg['COMMUNITIES_BUCKET_UUID']
    logos_bucket = Bucket.query.get(logos_bucket_id)
    ext = os.path.splitext(logo_filename)[1]
    ext = ext[1:] if ext.startswith('.') else ext

    if ext in cfg['COMMUNITIES_LOGO_EXTENSIONS']:
        key = "{0}/logo.{1}".format(community_id, ext)
        ObjectVersion.create(logos_bucket, key, stream=logo_stream)
        return ext
    else:
        return None
def test_new_record(app, db, dummy_location, record_dumps, resolver):
    """Test creation of new record."""
    RecordDumpLoader.create(record_dumps)
    pid, record = resolver.resolve('11783')
    created = datetime(2011, 10, 13, 8, 27, 47)
    # Basic some test that record exists
    assert record['title']
    assert record.created == created
    # Test that this is a completely new record
    assert len(record.revisions) == 3

    # check revisions
    assert record.revisions[2].created == created
    assert record.revisions[2].updated == datetime(2012, 10, 13, 8, 27, 47)
    assert record.revisions[1].created == created
    assert record.revisions[1].updated == datetime(2012, 10, 13, 8, 27, 47)
    assert record.revisions[0].created == created
    assert record.revisions[0].updated == datetime(2011, 10, 13, 8, 27, 47)

    pytest.raises(IntegrityError, RecordIdentifier.insert, 11783)
    # Test the PIDs are extracted and created
    assert PersistentIdentifier.get('doi', '10.5281/zenodo.11783')

    assert len(record['_files']) == 1
    f = record['_files'][0]
    obj = ObjectVersion.get(f['bucket'], f['key'])
    assert obj.file.checksum == f['checksum']
    assert obj.file.size == f['size']

    assert BucketTag.get_value(f['bucket'], 'record') == str(record.id)
def versions(objects):
    """Get objects with all their versions."""
    versions = []
    for obj in objects:
        versions.extend(ObjectVersion.get_versions(obj.bucket, obj.key))

    yield versions
def test_pyfilesystemstorage(app, db, dummy_location):
    """Test pyfs storage."""
    # Create bucket and object
    with db.session.begin_nested():
        b = Bucket.create()
        obj = ObjectVersion.create(b, "LICENSE")
        obj.file = FileInstance()
        db.session.add(obj.file)

    storage = PyFilesystemStorage(obj, obj.file)
    with open('LICENSE', 'rb') as fp:
        loc, size, checksum = storage.save(fp)

    # Verify checksum, size and location.
    with open('LICENSE', 'rb') as fp:
        m = hashlib.md5()
        m.update(fp.read())
        assert "md5:{0}".format(m.hexdigest()) == checksum

    assert size == getsize('LICENSE')
    assert size == getsize('LICENSE')
    assert loc == \
        join(
            dummy_location.uri,
            str(b.id),
            str(obj.version_id),
            "data")
def test_verify_checksum(app, db, dummy_location):
    """Test celery tasks for checksum verification."""
    b1 = Bucket.create()
    with open('README.rst', 'rb') as fp:
        obj = ObjectVersion.create(b1, 'README.rst', stream=fp)
    db.session.commit()
    file_id = obj.file_id

    verify_checksum(str(file_id))

    f = FileInstance.query.get(file_id)
    assert f.last_check_at
    assert f.last_check is True

    f.uri = 'invalid'
    db.session.add(f)
    db.session.commit()
    pytest.raises(ResourceNotFoundError, verify_checksum, str(file_id),
                  throws=True)

    f = FileInstance.query.get(file_id)
    assert f.last_check is True

    verify_checksum(str(file_id), throws=False)
    f = FileInstance.query.get(file_id)
    assert f.last_check is None

    f.last_check = True
    db.session.add(f)
    db.session.commit()
    with pytest.raises(ResourceNotFoundError):
        verify_checksum(str(file_id), pessimistic=True)
    f = FileInstance.query.get(file_id)
    assert f.last_check is None
Exemple #23
0
def handle_record_files(data, bucket, files, skip_files):
    """Handles record files."""
    for file in files:
        if skip_files:
            break
        assert 'uri' in file
        assert 'size' in file
        assert 'checksum' in file

        try:
            f = FileInstance.create()
            filename = file.get("uri").split('/')[-1:][0]
            f.set_uri(file.get("uri"), file.get(
                "size"), file.get("checksum"))
            obj = ObjectVersion.create(
                bucket,
                filename,
                _file_id=f.id
            )

            file.update({
                'bucket': str(obj.bucket_id),
                'checksum': obj.file.checksum,
                'key': obj.key,
                'version_id': str(obj.version_id),
            })

        except Exception as e:
            click.echo(
                'Recid {0} file {1} could not be loaded due '
                'to {2}.'.format(data.get('recid'), filename,
                                 str(e)))
            continue
def index_attachments(sender, json=None, record=None,
                      index=None, doc_type=None):
    """Load and index attached files for given record.

    It iterates over ``_files`` field in ``record`` and checks if
    ``_attachment`` subfiled has been configured with following values:

    * ``True``/``False`` simply enables/disables automatic fulltext indexing
      for given file instance;
    * Alternativelly, one can provide a ``dict`` instance with all
      configuration options as defined in Elasticsearch guide on
      https://www.elastic.co/guide/en/elasticsearch/ search for
      mapper-attachment.

    .. note::
       Make sure that ``mapper-attachment`` plugin is installed and running
       in Elasticsearch when using this signal handler.
    """
    for index, data in enumerate(record['_files']):
        attachment = json['_files'][index].pop('_attachment', None)
        if attachment:
            obj = ObjectVersion.get(data['bucket'], data['key'],
                                    version_id=data.get('version_id'))
            attachment = attachment if isinstance(attachment, dict) else {}
            attachment.setdefault('_content', base64.b64encode(
                obj.file.storage().open().read()
            ).decode('utf-8'))
            json['_files'][index]['_attachment'] = attachment
def test_SIP_files(db):
    """Test the files methods of API SIP."""
    # we create a SIP model
    sip = SIP_.create()
    db.session.commit()
    # We create an API SIP on top of it
    api_sip = SIP(sip)
    assert len(api_sip.files) == 0
    # we setup a file storage
    tmppath = tempfile.mkdtemp()
    db.session.add(Location(name='default', uri=tmppath, default=True))
    db.session.commit()
    # we create a file
    content = b'test lol\n'
    bucket = Bucket.create()
    obj = ObjectVersion.create(bucket, 'test.txt', stream=BytesIO(content))
    db.session.commit()
    # we attach it to the SIP
    sf = api_sip.attach_file(obj)
    db.session.commit()
    assert len(api_sip.files) == 1
    assert api_sip.files[0].filepath == 'test.txt'
    assert sip.sip_files[0].filepath == 'test.txt'
    # finalization
    rmtree(tmppath)
def test_update_record(app, db, dummy_location, record_dump, record_db,
                       resolver, record_file):
    """Test update of a record."""
    # Smoke test
    record_db['files'] = [record_file]
    record_db.commit()
    db.session.commit()

    pytest.raises(IntegrityError, RecordIdentifier.insert, 11782)
    # Update record instead of create a new
    RecordDumpLoader.create(record_dump)
    pid, record = resolver.resolve('11782')
    # Basic some test that record exists
    assert record['title']
    assert record.created == datetime(2014, 10, 13, 8, 27, 47)
    # Test that old revisions are kept
    assert len(record.revisions) == 4
    # Test the PIDs are extracted and created
    assert PersistentIdentifier.get('doi', '10.5281/zenodo.11782')

    assert Bucket.query.count() == 1
    assert ObjectVersion.query.filter_by(is_head=True).count() == 1
    assert FileInstance.query.count() == 2

    assert len(record['files']) == 1
    f = record['files'][0]
    obj = ObjectVersion.get(f['bucket'], f['filename'])
    assert obj.file.checksum != record_file['checksum']
    assert obj.file.size != record_file['size']
def test_migrate_file(app, db, dummy_location, extra_location, bucket,
                      objects):
    """Test file migration."""
    obj = objects[0]

    # Test pre-condition
    old_uri = obj.file.uri
    assert exists(old_uri)
    assert old_uri == join(dummy_location.uri, str(obj.file.id)[0:2],
                           str(obj.file.id)[2:4], str(obj.file.id)[4:], 'data')
    assert FileInstance.query.count() == 4

    # Migrate file
    with patch('invenio_files_rest.tasks.verify_checksum') as verify_checksum:
        migrate_file(
            obj.file_id, location_name=extra_location.name,
            post_fixity_check=True)
        assert verify_checksum.delay.called

    # Get object again
    obj = ObjectVersion.get(bucket, obj.key)
    new_uri = obj.file.uri
    assert exists(old_uri)
    assert exists(new_uri)
    assert new_uri != old_uri
    assert FileInstance.query.count() == 5
def sorted_files_from_bucket(bucket, keys=None):
    """Return files from bucket sorted by given keys."""
    keys = keys or []
    total = len(keys)
    sortby = dict(zip(keys, range(total)))
    values = ObjectVersion.get_by_bucket(bucket).all()
    return sorted(values, key=lambda x: sortby.get(x.key, total))
 def __delitem__(self, key):
     """Delete a file from the deposit."""
     obj = ObjectVersion.delete(bucket=self.bucket, key=key)
     self.record['_files'] = [file_ for file_ in self.record['_files']
                              if file_['key'] != key]
     if obj is None:
         raise KeyError(key)
def test_import_record(app, db, dummy_location, record_dump, records_json,
                       resolver):
    """Test import record celery task."""
    assert RecordMetadata.query.count() == 0
    import_record(records_json[0], source_type='json')
    assert RecordMetadata.query.count() == 1
    pid, record = resolver.resolve('11782')
    assert record['_collections'] == []
    assert len(record['_files']) == 1
    assert ObjectVersion.get(
        record['_files'][0]['bucket'], record['_files'][0]['key'])

    import_record(records_json[1], source_type='marcxml')
    assert RecordMetadata.query.count() == 2
    pid, record = resolver.resolve('10')
    assert record['_collections'] == [
        "ALEPH Papers",
        "Articles & Preprints",
        "Experimental Physics (EP)",
        "CERN Divisions",
        "Atlantis Institute of Fictive Science",
        "CERN Experiments",
        "Preprints",
        "ALEPH",
    ]
    assert len(record['_files']) == 2
Exemple #31
0
 def _update_tag_master(cls, record):
     """Update tag master of files dependent from master."""
     bucket = cls._get_bucket(record=record)
     master_video = CDSVideosFilesIterator.get_master_video_file(record)
     for obj in ObjectVersion.get_by_bucket(bucket=bucket):
         if obj.get_tags()['context_type'] in cls.dependent_objs:
             ObjectVersionTag.create_or_update(obj, 'master',
                                               master_video['version_id'])
Exemple #32
0
def local_file(db, bucket, location, online_video):
    """A local file."""
    response = requests.get(online_video, stream=True)
    object_version = ObjectVersion.create(
        bucket, "test.mp4", stream=response.raw)
    version_id = object_version.version_id
    db.session.commit()
    return version_id
Exemple #33
0
 def __delitem__(self, key):
     """Delete a file from the deposit."""
     obj = ObjectVersion.delete(bucket=self.bucket, key=key)
     self.record['_files'] = [
         file_ for file_ in self.record['_files'] if file_['key'] != key
     ]
     if obj is None:
         raise KeyError(key)
Exemple #34
0
 def _update_timestamp(cls, deposit):
     """Update timestamp from percentage to seconds."""
     duration = float(deposit['_cds']['extracted_metadata']['duration'])
     bucket = CDSRecordDumpLoader._get_bucket(record=deposit)
     for obj in ObjectVersion.get_by_bucket(bucket=bucket):
         if 'timestamp' in obj.get_tags().keys():
             timestamp = duration * float(obj.get_tags()['timestamp']) / 100
             ObjectVersionTag.create_or_update(obj, 'timestamp', timestamp)
Exemple #35
0
def check_deposit_record_files(deposit, deposit_expected, record,
                               record_expected):
    """Check deposit and record files expected."""
    # check deposit
    deposit_objs = [
        obj.key for obj in ObjectVersion.query_heads_by_bucket(
            deposit.files.bucket).all()
    ]
    assert sorted(deposit_expected) == sorted(deposit_objs)
    assert deposit.files.bucket.locked is False
    # check record
    record_objs = [
        obj.key for obj in ObjectVersion.query_heads_by_bucket(
            record.files.bucket).all()
    ]
    assert sorted(record_expected) == sorted(record_objs)
    assert record.files.bucket.locked is True
Exemple #36
0
def delete_file(bucket_id):
    key = ''
    deleted_file = ObjectVersion.delete(bucket_id, key)

    if deleted_file:
        return jsonify({"status": "ok"})

    return jsonify({"error": "not found"}), 404
    def delete(self, key):
        """Delete a file."""
        rf = self[key]
        ov = rf.object_version
        # Delete the entire row
        rf.delete(force=True)
        if ov:
            # TODO: Should we also remove the FileInstance? Configurable?
            ObjectVersion.delete(ov.bucket, key)
        del self._entries[key]

        # Unset the default preview if the file is removed
        if self.default_preview == key:
            self.default_preview = None
        if key in self._order:
            self._order.remove(key)
        return rf
Exemple #38
0
 def open(self):
     """Open the bucket for writing."""
     self.obj = ObjectVersion.create(
         self.bucket_id,
         self.key() if callable(self.key) else self.key
     )
     db.session.commit()
     return self
Exemple #39
0
def software(skip_files):
    """Load demo software records."""
    from invenio_db import db
    from invenio_records_files.api import Record
    from invenio_indexer.api import RecordIndexer
    from cernopendata.modules.records.minters.softid import \
        cernopendata_softid_minter

    from invenio_files_rest.models import \
        Bucket, FileInstance, ObjectVersion
    from invenio_records_files.models import RecordsBuckets

    indexer = RecordIndexer()
    schema = current_app.extensions['invenio-jsonschemas'].path_to_url(
        'records/software-v1.0.0.json')
    data = pkg_resources.resource_filename('cernopendata',
                                           'modules/fixtures/data/software')
    software_json = glob.glob(os.path.join(data, '*.json'))

    for filename in software_json:
        with open(filename, 'rb') as source:
            for data in json.load(source):
                files = data.pop('files', [])

                id = uuid.uuid4()
                cernopendata_softid_minter(id, data)
                record = Record.create(data, id_=id)
                record['$schema'] = schema
                bucket = Bucket.create()
                RecordsBuckets.create(record=record.model, bucket=bucket)

                for file in files:
                    if skip_files:
                        break
                    assert 'uri' in file
                    assert 'size' in file
                    assert 'checksum' in file

                    f = FileInstance.create()
                    filename = file.get("uri").split('/')[-1:][0]
                    f.set_uri(file.get("uri"), file.get("size"),
                              file.get("checksum"))
                    ObjectVersion.create(bucket, filename, _file_id=f.id)
                db.session.commit()
                indexer.index(record)
                db.session.expunge_all()
Exemple #40
0
 def __setitem__(self, key, stream):
     """Add file inside a deposit."""
     with db.session.begin_nested():
         # save the file
         obj = ObjectVersion.create(
             bucket=self.bucket, key=key, stream=stream)
         self.filesmap[key] = self.file_cls(obj, {}).dumps()
         self.flush()
Exemple #41
0
def test_get_webhook_event_view_when_release_event(m_gitlab, deposit, client,
                                                   gitlab_release_webhook_sub,
                                                   git_repo_tar):
    class MockBranchManager:
        def get(self, name):
            m = Mock(commit=dict(id='mybranchsha'))
            m.name = 'mybranch'
            return m

    class MockProjectManager:
        def get(self, name, lazy):
            return Mock(branches=MockBranchManager(), id='12345')

    m_gitlab.return_value = Mock(projects=MockProjectManager())
    responses.add(responses.GET, (
        'https://gitlab.cern.ch/api/v4/projects/12345/repository/archive?sha=mybranchsha'
    ),
                  body=git_repo_tar,
                  content_type='application/octet_stream',
                  headers={
                      'Transfer-Encoding': 'binary',
                      'Content-Length': '287'
                  },
                  stream=True,
                  status=200)

    resp = client.post('/repos/event',
                       headers=tag_push_headers,
                       data=json.dumps(tag_push_payload_shortened))

    assert resp.status_code == 200
    assert resp.json == {'message': 'Snapshot of repository was saved.'}
    assert responses.calls[0].request.headers['Private-Token'] == 'some-token'

    obj = ObjectVersion.get(
        deposit.files.bucket.id,
        'repositories/gitlab.cern.ch/owner_name/myrepository/v3.0.0.tar.gz')
    tar_obj = tarfile.open(obj.file.uri)
    repo_file_name = tar_obj.getmembers()[1]
    repo_content = tar_obj.extractfile(repo_file_name).read()

    assert repo_content == b'test repo for cap\n'

    snapshot = gitlab_release_webhook_sub.snapshots[0]
    assert obj.snapshot_id == snapshot.id
    assert GitSnapshot.query.count() == 1
    assert snapshot.payload == {
        'event_type': 'release',
        'author': {
            'name': 'owner_name',
            'id': 1
        },
        'link': 'https://gitlab.com/owner_name/myrepository/tags/v3.0.0',
        'release': {
            'tag': 'v3.0.0',
            'name': 'My release'
        }
    }
Exemple #42
0
def sync_buckets(src_bucket, dest_bucket, delete_extras=False):
    """Sync source bucket ObjectVersions to the destination bucket.

    The bucket is fully mirrored with the destination bucket following the
    logic:

        * same ObjectVersions are not touched
        * new ObjectVersions are added to destination
        * deleted ObjectVersions are deleted in destination
        * extra ObjectVersions in dest are deleted if `delete_extras` param is
          True

    :param src_bucket: Source bucket.
    :param dest_bucket: Destination bucket.
    :param delete_extras: Delete extra ObjectVersions in destination if True.
    :returns: The bucket with an exact copy of ObjectVersions in `
        `src_bucket``.
    """
    assert not dest_bucket.locked

    src_ovs = ObjectVersion.query.filter(
        ObjectVersion.bucket_id == src_bucket.id,
        ObjectVersion.is_head.is_(True)).all()
    dest_ovs = ObjectVersion.query.filter(
        ObjectVersion.bucket_id == dest_bucket.id,
        ObjectVersion.is_head.is_(True)).all()

    # transform into a dict { key: object version }
    src_keys = {ov.key: ov for ov in src_ovs}
    dest_keys = {ov.key: ov for ov in dest_ovs}

    for key, ov in src_keys.items():
        if not ov.deleted:
            if key not in dest_keys or \
                    ov.file_id != dest_keys[key].file_id:
                ov.copy(bucket=dest_bucket)
        elif key in dest_keys and not dest_keys[key].deleted:
            ObjectVersion.delete(dest_bucket, key)

    if delete_extras:
        for key, ov in dest_keys.items():
            if key not in src_keys:
                ObjectVersion.delete(dest_bucket, key)

    return dest_bucket
def test_bucket_sync_delete_extras(app, db, dummy_location):
    """Test that an extra object in dest is deleted when syncing."""
    b1 = Bucket.create()
    b2 = Bucket.create()
    ObjectVersion.create(b1, "filename").set_location("b1v1", 1, "achecksum")
    ObjectVersion.create(b2, "filename").set_location("b2v1", 1, "achecksum")
    ObjectVersion.create(b2, "extra-deleted").set_location("b3v1", 1, "asum")
    db.session.commit()

    b1.sync(b2, delete_extras=True)

    assert ObjectVersion.get_by_bucket(b1).count() == 1
    assert ObjectVersion.get_by_bucket(b2).count() == 1
    assert not ObjectVersion.get(b2, "extra-deleted")
Exemple #44
0
def create_b2safe_file(external_pids, bucket):
    """Create a FileInstance which contains a PID in its uri."""
    validate_schema(external_pids, {
        'type': 'array',
        'items': {
            'type': 'object',
            'properties': {
                'ePIC_PID': {'type': 'string'},
                'key': {'type': 'string'}
            },
            'additionalProperties': False,
            'required': ['ePIC_PID', 'key']
        }
    })

    keys_list = [e['key'] for e in external_pids]
    keys_set = set(keys_list)
    if len(keys_list) != len(keys_set):
        raise InvalidDepositError([FieldError('external_pids',
            'Field external_pids contains duplicate keys.')])
    for external_pid in external_pids:
        if not external_pid['ePIC_PID'].startswith('http://hdl.handle.net/'):
            external_pid['ePIC_PID'] = 'http://hdl.handle.net/' + \
                external_pid['ePIC_PID']
        if external_pid['key'].startswith('/'):
            raise InvalidDepositError(
                [FieldError('external_pids',
                            'File key cannot start with a "/".')])
        try:
            # Create the file instance if it does not already exist
            file_instance = FileInstance.get_by_uri(external_pid['ePIC_PID'])
            if file_instance is None:
                file_instance = FileInstance.create()
                file_instance.set_uri(
                    external_pid['ePIC_PID'], 1, 0, storage_class='B')
            assert file_instance.storage_class == 'B'
            # Add the file to the bucket if it is not already in it
            current_version = ObjectVersion.get(bucket, external_pid['key'])
            if not current_version or \
                    current_version.file_id != file_instance.id:
                ObjectVersion.create(bucket, external_pid['key'],
                                     file_instance.id)
        except IntegrityError as e:
            raise InvalidDepositError(
                [FieldError('external_pids', 'File URI already exists.')])
def test_object_set_file(app, db, dummy_location):
    """Test object set file."""
    b = Bucket.create()
    f = FileInstance(uri="f1", size=1, checksum="mychecksum")
    obj = ObjectVersion.create(b, "test").set_file(f)
    db.session.commit()
    assert obj.file == f

    assert pytest.raises(FileInstanceAlreadySetError, obj.set_file, f)
def get_master_object(bucket):
    """Get master ObjectVersion from a bucket."""
    # TODO do as we do in `get_master_video_file()`?
    return ObjectVersion.get_by_bucket(bucket).join(
        ObjectVersionTag
    ).filter(
        ObjectVersionTag.key == 'context_type',
        ObjectVersionTag.value == 'master'
    ).one_or_none()
Exemple #47
0
def test_SIP_create(app, db, mocker):
    """Test the create method from SIP API."""
    # we setup a file storage
    tmppath = tempfile.mkdtemp()
    db.session.add(Location(name='default', uri=tmppath, default=True))
    db.session.commit()
    # we create a file
    content = b'test lol\n'
    bucket = Bucket.create()
    obj = ObjectVersion.create(bucket, 'test.txt', stream=BytesIO(content))
    db.session.commit()
    files = [obj]
    # setup metadata
    mjson = SIPMetadataType(title='JSON Test',
                            name='json-test',
                            format='json',
                            schema='url')
    marcxml = SIPMetadataType(title='MARC XML Test',
                              name='marcxml-test',
                              format='xml',
                              schema='uri')
    db.session.add(mjson)
    db.session.add(marcxml)
    metadata = {
        'json-test': json.dumps({
            'this': 'is',
            'not': 'sparta'
        }),
        'marcxml-test': '<record></record>'
    }
    # Let's create a SIP
    user = create_test_user('*****@*****.**')
    agent = {'email': '*****@*****.**', 'ip_address': '1.1.1.1'}
    sip = SIP.create(True,
                     files=files,
                     metadata=metadata,
                     user_id=user.id,
                     agent=agent)
    db.session.commit()
    assert SIP_.query.count() == 1
    assert len(sip.files) == 1
    assert len(sip.metadata) == 2
    assert SIPFile.query.count() == 1
    assert SIPMetadata.query.count() == 2
    assert sip.user.id == user.id
    assert sip.agent == agent
    # we mock the user and the agent to test if the creation works
    app.config['SIPSTORE_AGENT_JSONSCHEMA_ENABLED'] = False
    mock_current_user = mocker.patch('invenio_sipstore.api.current_user')
    type(mock_current_user).is_anonymous = mocker.PropertyMock(
        return_value=True)
    sip = SIP.create(True, files=files, metadata=metadata)
    assert sip.model.user_id is None
    assert sip.user is None
    assert sip.agent == {}
    # finalization
    rmtree(tmppath)
Exemple #48
0
def test_file_listener(db, document_with_file):
    """Test file listener when file is modified."""
    # Remove files
    document_with_file['_files'] = []
    document_with_file.commit()
    db.session.commit()

    # Reload record
    record = DocumentRecord.get_record_by_pid(document_with_file['pid'])
    assert not record['_files']

    object_version = ObjectVersion.get_by_bucket(document_with_file['_bucket'])
    file_uploaded_listener(object_version)

    assert len(document_with_file.files) == 3

    object_version = ObjectVersion.get_by_bucket(document_with_file['_bucket'])
    file_deleted_listener(object_version)
Exemple #49
0
    def get_version(self, version_id=None):
        """Return specific version ``ObjectVersion`` instance or HEAD.

        :param version_id: Version ID of the object.
        :returns: :class:`~invenio_files_rest.models.ObjectVersion` instance or
            HEAD of the stored object.
        """
        return ObjectVersion.get(bucket=self.obj.bucket, key=self.obj.key,
                                 version_id=version_id)
Exemple #50
0
 def _create_object(cls, bucket, key, stream, size, media_type,
                    context_type, master_id, **tags):
     """Create object versions with given type and tags."""
     obj = ObjectVersion.create(
         bucket=bucket, key=key, stream=stream, size=size)
     ObjectVersionTag.create(obj, 'master', str(master_id))
     ObjectVersionTag.create(obj, 'media_type', media_type)
     ObjectVersionTag.create(obj, 'context_type', context_type)
     [ObjectVersionTag.create(obj, k, tags[k]) for k in tags]
Exemple #51
0
 def create_object(key, media_type, context_type, **tags):
     """Create object versions with given type and tags."""
     obj = ObjectVersion.create(bucket=self.object.bucket,
                                key=key,
                                stream=open(in_output(key), 'rb'))
     ObjectVersionTag.create(obj, 'master', self.obj_id)
     ObjectVersionTag.create(obj, 'media_type', media_type)
     ObjectVersionTag.create(obj, 'context_type', context_type)
     [ObjectVersionTag.create(obj, k, tags[k]) for k in tags]
Exemple #52
0
    def _force_sync_deposit_bucket(record):
        """Replace deposit bucket with a copy of the record bucket."""
        deposit = Video.get_record(record.depid.object_uuid)
        # if deposit['_deposit']['status'] == 'draft':
        #     raise RuntimeError('Deposit in edit mode: {0}'.format(deposit.id))
        deposit_old_bucket = deposit.files.bucket
        # create a copy of record bucket
        new_bucket = record.files.bucket.snapshot()
        new_bucket.locked = False
        db.session.commit()
        rb = RecordsBuckets.query.filter(
            RecordsBuckets.bucket_id == deposit_old_bucket.id).one()
        rb.bucket = new_bucket
        db.session.add(rb)
        db.session.commit()

        # Put tags correctly pointing to the right object
        master_file = CDSVideosFilesIterator.get_master_video_file(record)
        if master_file:
            master_deposit_obj = ObjectVersion.get(new_bucket,
                                                   master_file['key'])

            for slave in ObjectVersion.query_heads_by_bucket(
                    bucket=new_bucket).join(ObjectVersion.tags).filter(
                        ObjectVersion.file_id.isnot(None),
                        ObjectVersionTag.key == 'master'):
                ObjectVersionTag.create_or_update(
                    slave, 'master', str(master_deposit_obj.version_id))
                db.session.add(slave)
                db.session.commit()

        # Delete the old bucket
        deposit_old_bucket.locked = False
        _ = deposit_old_bucket.remove()

        deposit['_buckets']['deposit'] = str(new_bucket.id)
        record['_buckets']['deposit'] = str(new_bucket.id)
        record['_deposit'] = deposit['_deposit']
        deposit['_files'] = deposit.files.dumps()
        deposit.commit()
        record.commit()
        db.session.commit()

        return deposit_old_bucket.id, new_bucket.id
def test_tag_manager_update(api, users, location, es, update_style):
    with api.test_request_context():
        bucket = Bucket.create()
        object_version = ObjectVersion.create(bucket=bucket, key="hello")
        ObjectVersionTag.create(
            object_version=object_version,
            key=ObjectTagKey.Packaging.value,
            value="old-packaging",
        )
        ObjectVersionTag.create(
            object_version=object_version,
            key=ObjectTagKey.MetadataFormat.value,
            value="old-metadata",
        )
        tags = TagManager(object_version)

        assert (
            ObjectVersionTag.query.filter_by(object_version=object_version).count() == 2
        )

        assert tags == {
            ObjectTagKey.Packaging: "old-packaging",
            ObjectTagKey.MetadataFormat: "old-metadata",
        }

        if update_style == "dict":
            tags.update(
                {
                    ObjectTagKey.MetadataFormat: "new-metadata",
                    ObjectTagKey.DerivedFrom: "new-derived-from",
                }
            )
        elif update_style == "kwargs":
            tags.update(
                **{
                    ObjectTagKey.MetadataFormat.value: "new-metadata",
                    ObjectTagKey.DerivedFrom.value: "new-derived-from",
                }
            )

        assert tags == {
            ObjectTagKey.Packaging: "old-packaging",
            ObjectTagKey.MetadataFormat: "new-metadata",
            ObjectTagKey.DerivedFrom: "new-derived-from",
        }

        assert (
            ObjectVersionTag.query.filter_by(object_version=object_version).count() == 3
        )

        db.session.refresh(object_version)
        assert object_version.get_tags() == {
            ObjectTagKey.Packaging.value: "old-packaging",
            ObjectTagKey.MetadataFormat.value: "new-metadata",
            ObjectTagKey.DerivedFrom.value: "new-derived-from",
        }
Exemple #54
0
def test_object(db, bucket):
    """File system location."""
    data_bytes = b('test object')
    obj = ObjectVersion.create(bucket,
                               'test.txt',
                               stream=BytesIO(data_bytes),
                               size=len(data_bytes))
    db.session.commit()

    return obj
Exemple #55
0
 def _copy_files(self, source_record, target_record, source_record_context,
                 target_record_context):
     draft_by_key = {x['key']: x for x in source_record.get('_files', [])}
     published_files = []
     for ov in ObjectVersion.get_by_bucket(bucket=source_record.bucket):
         file_md = copy.copy(draft_by_key.get(ov.key, {}))
         if self._copy_file(source_record, ov, target_record, file_md,
                            source_record_context, target_record_context):
             published_files.append(file_md)
     target_record['_files'] = published_files
Exemple #56
0
def _create_bucket(deposit, record_json,directory, logfile, verbose):
    for index, file_dict in enumerate(record_json.get('files', [])):
        if not file_dict.get('name'):
            click.secho('    Ignore file with no name "{}"'.format(
                        file_dict.get('url')),
                    fg='red')
        else:
            if verbose:
                click.secho('    Load file "{}"'.format(
                    file_dict.get('name')))
            filepath = os.path.join(directory,
                'file_{}'.format(index))
            if int(os.path.getsize(filepath)) != int(file_dict.get('size')):
                logfile.write("***** downloaded file size differs, {} ******".format(filepath))
            else:
                with open(filepath, 'r+b') as f:
                    ObjectVersion.create(deposit.files.bucket,
                        file_dict['name'],
                        stream=BytesIO(f.read()))
def test_deposit_poster_tags(api_app, db, api_project, users):
    """Test poster tag generation."""
    project, video_1, video_2 = api_project
    video_1_depid = video_1['_deposit']['id']
    master_video_filename = 'test.mp4'
    poster_filename = 'poster.jpg'
    poster_filename2 = 'poster.png'

    # insert a master file inside the video
    add_master_to_video(
        video_deposit=video_1,
        filename=master_video_filename,
        stream=BytesIO(b'1234'), video_duration='15'
    )
    # try to insert a new vtt object
    obj = ObjectVersion.create(
        video_1._bucket, key=poster_filename,
        stream=BytesIO(b'hello'))
    # publish the video
    prepare_videos_for_publish([video_1])
    video_1 = deposit_video_resolver(video_1_depid)
    login_user(User.query.get(users[0]))
    video_1 = video_1.publish()

    # check tags
    check_object_tags(obj, video_1, content_type='jpg',
                      context_type='poster', media_type='image')

    # edit the video
    video_1 = video_1.edit()

    # try to delete the old poster frame and substitute with a new one
    video_1 = deposit_video_resolver(video_1_depid)
    ObjectVersion.delete(bucket=video_1._bucket, key=obj.key)
    obj2 = ObjectVersion.create(
        video_1._bucket, key=poster_filename2, stream=BytesIO(b'hello'))

    # publish again the video
    video_1 = video_1.publish()

    # check tags
    check_object_tags(obj2, video_1, content_type='png',
                      context_type='poster', media_type='image')
def test_bucket_modification(app, db, location, record):
    """Test direct modification of bucket."""
    record.files['hello.txt'] = BytesIO(b'Hello world!')
    record.files['hello.txt']['type'] = 'txt'

    # Modify bucket outside of record.files property
    ObjectVersion.create(
        record.bucket, 'second.txt', stream=BytesIO(b'Second'))

    # Bucket and record are out of sync:
    assert len(record.files) == 2
    assert len(record['_files']) == 1

    # Flush changes to ensure they are in sync.
    record.files.flush()
    assert len(record['_files']) == 2

    # Check that extra metadata is not overwritten.
    assert [f.get('type') for f in record.files] == ['txt', None]
def test_non_binary_doesnt_shortcut_unpack(
    api, location, es, packaging_cls: Type[Packaging]
):
    with api.test_request_context():
        record = SWORDDeposit.create({})
        object_version = ObjectVersion.create(
            bucket=record.bucket, key="some-file.txt", stream=io.BytesIO(b"data")
        )
        packaging = packaging_cls(record)
        assert packaging.shortcut_unpack(object_version) == NotImplemented
Exemple #60
0
    def __delitem__(self, key):
        """Delete a file from the deposit."""
        obj = ObjectVersion.delete(bucket=self.bucket, key=key)

        if obj is None:
            raise KeyError(key)

        if key in self.filesmap:
            del self.filesmap[key]
            self.flush()