def test_bucket_sync(app, db, dummy_location):
    """Test that a bucket is correctly synced."""
    b1 = Bucket.create()
    b2 = Bucket.create()
    ObjectVersion.create(b1, "filename1").set_location("b1v11", 1, "achecksum")
    ObjectVersion.create(b1, "filename2").set_location("b1v12", 1, "achecksum")
    ObjectVersion.create(b1, "filename3").set_location("b1v13", 1, "achecksum")
    ObjectVersion.create(b2, "extra1").set_location("b2v11", 1, "achecksum")
    db.session.commit()

    b1.sync(b2)

    assert ObjectVersion.get_by_bucket(b1).count() == 3
    assert ObjectVersion.get_by_bucket(b2).count() == 4

    ObjectVersion.delete(b1, "filename1")
    ObjectVersion.create(b2, "extra2").set_location("b2v12", 1, "achecksum")
    ObjectVersion.create(b2, "extra3").set_location("b2v13", 1, "achecksum")
    ObjectVersion.delete(b2, "extra3")
    db.session.commit()

    b1.sync(b2, delete_extras=True)

    assert ObjectVersion.get_by_bucket(b1).count() == 2
    assert ObjectVersion.get_by_bucket(b2).count() == 2
def test_bucket_sync_deleted(app, db, dummy_location):
    """Test bucket sync of a deleted bucket."""
    b1 = Bucket.create()
    b1.deleted = True
    db.session.commit()

    with pytest.raises(InvalidOperationError) as excinfo:
        b1.sync(Bucket.create())
    assert excinfo.value.get_body() != {}
Example #3
0
def test_object_snapshot_deleted(app, db, dummy_location):
    """Deleted bucket."""
    b1 = Bucket.create()
    b2 = Bucket.create()
    b2.deleted = True
    db.session.commit()

    b3 = b1.snapshot()
    assert b3.id != b1.id
    assert b3.locked is False

    # b2 is deleted.
    pytest.raises(InvalidOperationError, b2.snapshot)
Example #4
0
def test_object_snapshot_deleted(app, db, dummy_location):
    """Deleted bucket."""
    b1 = Bucket.create()
    b2 = Bucket.create()
    b2.deleted = True
    db.session.commit()

    b3 = b1.snapshot()
    assert b3.id != b1.id
    assert b3.locked is False

    # b2 is deleted.
    pytest.raises(InvalidOperationError, b2.snapshot)
def test_bucket_sync_new_object(app, db, dummy_location):
    """Test that a new file in src in synced to dest."""
    b1 = Bucket.create()
    b2 = Bucket.create()
    ObjectVersion.create(b1, "filename").set_location("b1v1", 1, "achecksum")
    db.session.commit()

    assert ObjectVersion.get_by_bucket(b1).count() == 1
    assert ObjectVersion.get_by_bucket(b2).count() == 0
    b1.sync(b2)
    assert ObjectVersion.get_by_bucket(b1).count() == 1
    assert ObjectVersion.get_by_bucket(b2).count() == 1
    assert ObjectVersion.get(b2, "filename")
def test_bucket_sync_delete_extras(app, db, dummy_location):
    """Test that an extra object in dest is deleted when syncing."""
    b1 = Bucket.create()
    b2 = Bucket.create()
    ObjectVersion.create(b1, "filename").set_location("b1v1", 1, "achecksum")
    ObjectVersion.create(b2, "filename").set_location("b2v1", 1, "achecksum")
    ObjectVersion.create(b2, "extra-deleted").set_location("b3v1", 1, "asum")
    db.session.commit()

    b1.sync(b2, delete_extras=True)

    assert ObjectVersion.get_by_bucket(b1).count() == 1
    assert ObjectVersion.get_by_bucket(b2).count() == 1
    assert not ObjectVersion.get(b2, "extra-deleted")
Example #7
0
def test_bucket_retrieval(app, db, dummy_location):
    """Test bucket get/create."""
    # Create two buckets
    with db.session.begin_nested():
        b1 = Bucket.create()
        Bucket.create()

    assert Bucket.all().count() == 2

    # Mark one as deleted.
    with db.session.begin_nested():
        b1.deleted = True

    assert Bucket.all().count() == 1
Example #8
0
def test_bucket_retrieval(app, db, dummy_location):
    """Test bucket get/create."""
    # Create two buckets
    with db.session.begin_nested():
        b1 = Bucket.create()
        Bucket.create()

    assert Bucket.all().count() == 2

    # Mark one as deleted.
    with db.session.begin_nested():
        b1.deleted = True

    assert Bucket.all().count() == 1
Example #9
0
def files():
    """Load files."""
    srcroot = dirname(dirname(__file__))
    d = current_app.config['DATADIR']
    if exists(d):
        shutil.rmtree(d)
    makedirs(d)

    # Clear data
    Part.query.delete()
    MultipartObject.query.delete()
    ObjectVersion.query.delete()
    Bucket.query.delete()
    FileInstance.query.delete()
    Location.query.delete()
    db.session.commit()

    # Create location
    loc = Location(name='local', uri=d, default=True)
    db.session.add(loc)
    db.session.commit()

    # Bucket 0
    b1 = Bucket.create(loc)
    b1.id = '00000000-0000-0000-0000-000000000000'
    for f in ['README.rst', 'LICENSE']:
        with open(join(srcroot, f), 'rb') as fp:
            ObjectVersion.create(b1, f, stream=fp)

    # Bucket 1
    b2 = Bucket.create(loc)
    b2.id = '11111111-1111-1111-1111-111111111111'
    k = 'AUTHORS.rst'
    with open(join(srcroot, 'CHANGES.rst'), 'rb') as fp:
        ObjectVersion.create(b2, k, stream=fp)
    with open(join(srcroot, 'AUTHORS.rst'), 'rb') as fp:
        ObjectVersion.create(b2, k, stream=fp)

    k = 'RELEASE-NOTES.rst'
    with open(join(srcroot, 'RELEASE-NOTES.rst'), 'rb') as fp:
        ObjectVersion.create(b2, k, stream=fp)
    with open(join(srcroot, 'CHANGES.rst'), 'rb') as fp:
        ObjectVersion.create(b2, k, stream=fp)
    ObjectVersion.delete(b2.id, k)

    # Bucket 2
    b2 = Bucket.create(loc)
    b2.id = '22222222-2222-2222-2222-222222222222'

    db.session.commit()
Example #10
0
def files():
    """Load files."""
    srcroot = dirname(dirname(__file__))
    d = current_app.config['DATADIR']
    if exists(d):
        shutil.rmtree(d)
    makedirs(d)

    # Clear data
    Part.query.delete()
    MultipartObject.query.delete()
    ObjectVersion.query.delete()
    Bucket.query.delete()
    FileInstance.query.delete()
    Location.query.delete()
    db.session.commit()

    # Create location
    loc = Location(name='local', uri=d, default=True)
    db.session.commit()

    # Bucket 0
    b1 = Bucket.create(loc)
    b1.id = '00000000-0000-0000-0000-000000000000'
    for f in ['README.rst', 'LICENSE']:
        with open(join(srcroot, f), 'rb') as fp:
            ObjectVersion.create(b1, f, stream=fp)

    # Bucket 1
    b2 = Bucket.create(loc)
    b2.id = '11111111-1111-1111-1111-111111111111'
    k = 'AUTHORS.rst'
    with open(join(srcroot, 'CHANGES.rst'), 'rb') as fp:
        ObjectVersion.create(b2, k, stream=fp)
    with open(join(srcroot, 'AUTHORS.rst'), 'rb') as fp:
        ObjectVersion.create(b2, k, stream=fp)

    k = 'RELEASE-NOTES.rst'
    with open(join(srcroot, 'RELEASE-NOTES.rst'), 'rb') as fp:
        ObjectVersion.create(b2, k, stream=fp)
    with open(join(srcroot, 'CHANGES.rst'), 'rb') as fp:
        ObjectVersion.create(b2, k, stream=fp)
    ObjectVersion.delete(b2.id, k)

    # Bucket 2
    b2 = Bucket.create(loc)
    b2.id = '22222222-2222-2222-2222-222222222222'

    db.session.commit()
Example #11
0
def test_object_snapshot_deleted(app, db, dummy_location):
    """Test snapshot creation of a deleted bucket."""
    b1 = Bucket.create()
    b2 = Bucket.create()
    b2.deleted = True
    db.session.commit()

    b3 = b1.snapshot()
    assert b3.id != b1.id
    assert b3.locked is False

    # b2 is deleted.
    with pytest.raises(InvalidOperationError) as excinfo:
        b2.snapshot()
    assert excinfo.value.get_body() != {}
Example #12
0
def attach_files(obj, eng):
    if 'files' in obj.extra_data:
        recid = obj.data['control_number']
        pid = PersistentIdentifier.get('recid', recid)
        existing_record = Record.get_record(pid.object_uuid)

        if '_files' not in existing_record or not existing_record['_files']:
            bucket = Bucket.create()
            RecordsBuckets.create(record=existing_record.model, bucket=bucket)

        for file_ in obj.extra_data['files']:
            if file_['url'].startswith('http'):
                request = urllib2.Request(file_['url'],
                                          headers=file_.get('headers', {}))
                f = urllib2.urlopen(request)
            else:
                f = open(file_['url'])

            existing_record.files[file_['name']] = f
            existing_record.files[
                file_['name']]['filetype'] = file_['filetype']

        obj.save()
        existing_record.commit()
        db.session.commit()
    else:
        __halt_and_notify('No files found.', obj, eng)
def test_pyfilesystemstorage(app, db, dummy_location):
    """Test pyfs storage."""
    # Create bucket and object
    with db.session.begin_nested():
        b1 = Bucket.create()
        obj = ObjectVersion.create(b1, "LICENSE")
        obj.file = FileInstance.create()

    storage = PyFilesystemStorage(obj.file, base_uri=obj.bucket.location.uri)
    counter = dict(size=0)

    def callback(total, size):
        counter['size'] = size

    data = b("this is some content")
    stream = BytesIO(data)
    loc, size, checksum = storage.save(stream, progress_callback=callback)

    # Verify checksum, size and location.
    m = hashlib.md5()
    m.update(data)
    assert "md5:{0}".format(m.hexdigest()) == checksum

    assert size == len(data)
    assert loc == join(
        dummy_location.uri,
        str(obj.file.id),
        "data")
def test_SIP_files(db):
    """Test the files methods of API SIP."""
    # we create a SIP model
    sip = SIP_.create()
    db.session.commit()
    # We create an API SIP on top of it
    api_sip = SIP(sip)
    assert len(api_sip.files) == 0
    # we setup a file storage
    tmppath = tempfile.mkdtemp()
    db.session.add(Location(name='default', uri=tmppath, default=True))
    db.session.commit()
    # we create a file
    content = b'test lol\n'
    bucket = Bucket.create()
    obj = ObjectVersion.create(bucket, 'test.txt', stream=BytesIO(content))
    db.session.commit()
    # we attach it to the SIP
    sf = api_sip.attach_file(obj)
    db.session.commit()
    assert len(api_sip.files) == 1
    assert api_sip.files[0].filepath == 'test.txt'
    assert sip.sip_files[0].filepath == 'test.txt'
    # finalization
    rmtree(tmppath)
Example #15
0
def test_object_delete(app, db, dummy_location):
    """Test object creation."""
    # Create three versions, with latest being a delete marker.
    with db.session.begin_nested():
        b1 = Bucket.create()
        ObjectVersion.create(b1, "test").set_location(
            "b1test1", 1, "achecksum")
        ObjectVersion.create(b1, "test").set_location(
            "b1test2", 1, "achecksum")
        obj_deleted = ObjectVersion.delete(b1, "test")

    assert ObjectVersion.query.count() == 3
    assert ObjectVersion.get(b1, "test") is None
    assert ObjectVersion.get_by_bucket(b1).count() == 0

    obj = ObjectVersion.get(b1, "test", version_id=obj_deleted.version_id)
    assert obj.is_deleted
    assert obj.file_id is None

    ObjectVersion.create(b1, "test").set_location(
        "b1test4", 1, "achecksum")

    assert ObjectVersion.query.count() == 4
    assert ObjectVersion.get(b1.id, "test") is not None
    assert ObjectVersion.get_by_bucket(b1.id).count() == 1
Example #16
0
def test_object_restore(app, db, dummy_location):
    """Restore object."""
    f1 = FileInstance(uri="f1", size=1, checksum="mychecksum")
    f2 = FileInstance(uri="f2", size=2, checksum="mychecksum2")
    db.session.add(f1)
    db.session.add(f2)
    b1 = Bucket.create()

    obj1 = ObjectVersion.create(b1, "test").set_file(f1)
    ObjectVersion.create(b1, "test").set_file(f2)
    obj_deleted = ObjectVersion.delete(b1, "test")
    db.session.commit()

    assert ObjectVersion.query.count() == 3
    # Cannot restore a deleted version.
    pytest.raises(InvalidOperationError, obj_deleted.restore)

    # Restore first version
    obj_new = obj1.restore()
    db.session.commit()

    assert ObjectVersion.query.count() == 4
    assert obj_new.is_head is True
    assert obj_new.version_id != obj1.version_id
    assert obj_new.key == obj1.key
    assert obj_new.file_id == obj1.file_id
    assert obj_new.bucket == obj1.bucket
    def create(cls, data, id_=None):
        """Create a deposit.

        Adds bucket creation immediately on deposit creation.
        """
        bucket = Bucket.create(
            default_location=Location.get_default()
        )

        try:
            schema = data.get("$schema", None) \
                .split('/schemas/', 1)[1]
        except (IndexError, AttributeError):
            return None

        if schema:
            _deposit_group = \
                next(
                    (depgroup
                     for dg, depgroup
                     in current_app.config.get('DEPOSIT_GROUPS').iteritems()
                     if schema in depgroup['schema']
                     ),
                    None
                )

            data["_experiment"] = _deposit_group.get("experiment", "Unknown")

        deposit = super(CAPDeposit, cls).create(data, id_=id_)

        add_owner_permissions(deposit.id)
        RecordsBuckets.create(record=deposit.model, bucket=bucket)
        return deposit
Example #18
0
    def create(cls, data, id_=None):
        """Create a deposit.

        Adds bucket creation immediately on deposit creation.
        """
        bucket = Bucket.create(
            quota_size=current_app.config['ZENODO_BUCKET_QUOTA_SIZE'],
            max_file_size=current_app.config['ZENODO_MAX_FILE_SIZE'],
        )
        data['_buckets'] = {'deposit': str(bucket.id)}
        deposit = super(ZenodoDeposit, cls).create(data, id_=id_)

        RecordsBuckets.create(record=deposit.model, bucket=bucket)

        recid = PersistentIdentifier.get(
            'recid', str(data['recid']))
        conceptrecid = PersistentIdentifier.get(
            'recid', str(data['conceptrecid']))
        depid = PersistentIdentifier.get(
            'depid', str(data['_deposit']['id']))

        PIDVersioning(parent=conceptrecid).insert_draft_child(child=recid)
        RecordDraft.link(recid, depid)

        return deposit
Example #19
0
def attach_files(obj, eng):
    if 'files' in obj.extra_data:
        recid = obj.data['control_number']
        pid = PersistentIdentifier.get('recid', recid)
        existing_record = Record.get_record(pid.object_uuid)

        if '_files' not in existing_record or not existing_record['_files']:
            bucket = Bucket.create()
            RecordsBuckets.create(record=existing_record.model, bucket=bucket)

        for file_ in obj.extra_data['files']:
            if file_['url'].startswith('http'):
                headers = file_.get('headers', {})
                data = requests_retry_session().get(file_['url'],
                                                    headers=headers)

                if data.status_code != 200:
                    __halt_and_notify(
                        "Error during acquiring files.\nHTTP status: %d\nUrl: %s\nHeaders:%s"
                        % (data.status_code, file_['url'], headers), eng)

                f = StringIO(data.content)
            else:
                f = open(file_['url'])

            existing_record.files[file_['name']] = f
            existing_record.files[
                file_['name']]['filetype'] = file_['filetype']

        obj.save()
        existing_record.commit()
        db.session.commit()
    else:
        __halt_and_notify('No files found.', eng)
Example #20
0
File: api.py Project: zenodo/zenodo
    def create(cls, data, id_=None):
        """Create a deposit.

        Adds bucket creation immediately on deposit creation.
        """
        bucket = Bucket.create(
            quota_size=current_app.config['ZENODO_BUCKET_QUOTA_SIZE'],
            max_file_size=current_app.config['ZENODO_MAX_FILE_SIZE'],
        )
        data['_buckets'] = {'deposit': str(bucket.id)}
        deposit = super(ZenodoDeposit, cls).create(data, id_=id_)

        RecordsBuckets.create(record=deposit.model, bucket=bucket)

        recid = PersistentIdentifier.get(
            'recid', str(data['recid']))
        conceptrecid = PersistentIdentifier.get(
            'recid', str(data['conceptrecid']))
        depid = PersistentIdentifier.get(
            'depid', str(data['_deposit']['id']))

        PIDVersioning(parent=conceptrecid).insert_draft_child(child=recid)
        RecordDraft.link(recid, depid)

        return deposit
Example #21
0
def test_admin_views(app, db, dummy_location):
    """Test admin views."""
    app.config['SECRET_KEY'] = 'CHANGEME'
    InvenioAdmin(app, permission_factory=None, view_class_factory=lambda x: x)

    b1 = Bucket.create(location=dummy_location)
    obj = ObjectVersion.create(b1, 'test').set_location('placeuri', 1, 'chk')
    db.session.commit()

    with app.test_client() as client:
        res = client.get('/admin/bucket/')
        assert res.status_code == 200
        assert str(b1.id) in res.get_data(as_text=True)

        res = client.get('/admin/fileinstance/')
        assert res.status_code == 200
        assert str(obj.file_id) in res.get_data(as_text=True)

        res = client.get('/admin/location/')
        assert res.status_code == 200
        assert str(b1.location.name) in res.get_data(as_text=True)

        res = client.get('/admin/objectversion/')
        assert res.status_code == 200
        assert str(obj.version_id) in res.get_data(as_text=True)
def test_transfer_cp(db):
    """Test factories.transfer_cp function."""
    # first we create a record
    recid = uuid.uuid4()
    PersistentIdentifier.create(
        'recid',
        '1337',
        object_type='rec',
        object_uuid=recid,
        status=PIDStatus.REGISTERED)
    record = Record.create({'title': 'record test'}, recid)
    # we setup a file storage
    tmppath = tempfile.mkdtemp()
    db.session.add(Location(name='default', uri=tmppath, default=True))
    db.session.commit()
    # we add a file to the record
    bucket = Bucket.create()
    content = b'Aaah! A headcrab!!!\n'
    record_buckets = RecordsBuckets.create(record=record.model, bucket=bucket)
    record.files['crab.txt'] = BytesIO(content)
    # test!
    rec_dir = join(tmppath, create_accessioned_id('1337', 'recid'))
    factories.transfer_cp(record.id, tmppath)
    assert isdir(rec_dir)
    assert isfile(join(rec_dir, 'crab.txt'))
    with open(join(rec_dir, 'crab.txt'), "r") as f:
        assert f.read() == content
    # finalization
    rmtree(tmppath)
Example #23
0
def test_object_restore(app, db, dummy_location):
    """Restore object."""
    f1 = FileInstance(uri="f1", size=1, checksum="mychecksum")
    f2 = FileInstance(uri="f2", size=2, checksum="mychecksum2")
    db.session.add(f1)
    db.session.add(f2)
    b1 = Bucket.create()

    obj1 = ObjectVersion.create(b1, "test").set_file(f1)
    ObjectVersion.create(b1, "test").set_file(f2)
    obj_deleted = ObjectVersion.delete(b1, "test")
    db.session.commit()

    assert ObjectVersion.query.count() == 3
    # Cannot restore a deleted version.
    pytest.raises(InvalidOperationError, obj_deleted.restore)

    # Restore first version
    obj_new = obj1.restore()
    db.session.commit()

    assert ObjectVersion.query.count() == 4
    assert obj_new.is_head is True
    assert obj_new.version_id != obj1.version_id
    assert obj_new.key == obj1.key
    assert obj_new.file_id == obj1.file_id
    assert obj_new.bucket == obj1.bucket
Example #24
0
def test_object_relink_all(app, db, dummy_location):
    """Test relinking files."""
    b1 = Bucket.create()
    obj1 = ObjectVersion.create(
        b1, "relink-test", stream=BytesIO(b('relinkthis')))
    ObjectVersion.create(b1, "do-not-touch", stream=BytesIO(b('na')))
    b1.snapshot()
    db.session.commit()

    assert ObjectVersion.query.count() == 4
    assert FileInstance.query.count() == 2

    fnew = FileInstance.create()
    fnew.copy_contents(obj1.file, default_location=b1.location.uri)
    db.session.commit()

    fold = obj1.file

    assert ObjectVersion.query.filter_by(file_id=fold.id).count() == 2
    assert ObjectVersion.query.filter_by(file_id=fnew.id).count() == 0

    ObjectVersion.relink_all(obj1.file, fnew)
    db.session.commit()

    assert ObjectVersion.query.filter_by(file_id=fold.id).count() == 0
    assert ObjectVersion.query.filter_by(file_id=fnew.id).count() == 2
def test_b2share_storage_with_pid(base_app, app, tmp_location, login_user, test_users):
    """Check that the storage class will redirect pid files."""
    pid = 'http://hdl.handle.net/11304/74c66f0b-f814-4202-9dcb-4889ba9b1047'
    with app.app_context():
        # Disable access control for this test
        tmp_location = Location.query.first()
        with db.session.begin_nested():
            bucket = Bucket.create(tmp_location, storage_class='B')
            pid_file = FileInstance.create()
            pid_file.set_uri(pid, 1, 0, storage_class='B')
            ObjectVersion.create(bucket, 'test.txt', pid_file.id)

        db.session.commit()
        url = url_for('invenio_files_rest.object_api',
                        bucket_id=bucket.id,
                        key='test.txt')
    try:
        with app.app_context():
            permission = current_files_rest.permission_factory
            current_files_rest.permission_factory = allow_all
        # Check that accessing the file redirects to the PID
        with app.test_client() as client:
            resp = client.get(url)
            assert resp.headers['Location'] == pid
            assert resp.status_code == 302
    finally:
        with app.app_context():
            current_files_rest.permission_factory = permission
def test_pyfilesystemstorage(app, db, dummy_location):
    """Test pyfs storage."""
    # Create bucket and object
    with db.session.begin_nested():
        b = Bucket.create()
        obj = ObjectVersion.create(b, "LICENSE")
        obj.file = FileInstance()
        db.session.add(obj.file)

    storage = PyFilesystemStorage(obj, obj.file)
    with open('LICENSE', 'rb') as fp:
        loc, size, checksum = storage.save(fp)

    # Verify checksum, size and location.
    with open('LICENSE', 'rb') as fp:
        m = hashlib.md5()
        m.update(fp.read())
        assert "md5:{0}".format(m.hexdigest()) == checksum

    assert size == getsize('LICENSE')
    assert size == getsize('LICENSE')
    assert loc == \
        join(
            dummy_location.uri,
            str(b.id),
            str(obj.version_id),
            "data")
Example #27
0
def attach_files(obj, eng):
    if 'files' in obj.extra_data:
        recid = obj.data['control_number']
        pid = PersistentIdentifier.get('recid', recid)
        existing_record = Record.get_record(pid.object_uuid)

        if '_files' not in existing_record or not existing_record['_files']:
            bucket = Bucket.create()
            RecordsBuckets.create(record=existing_record.model, bucket=bucket)

        for file_ in obj.extra_data['files']:
            if file_['url'].startswith('http'):
                data = requests_retry_session().get(file_['url'], headers=file_.get('headers', {}))
                f = StringIO(data.content)
            else:
                f = open(file_['url'])

            existing_record.files[file_['name']] = f
            existing_record.files[file_['name']]['filetype'] = file_['filetype']

        obj.save()
        existing_record.commit()
        db.session.commit()
    else:
        __halt_and_notify('No files found.', eng)
def test_object_relink_all(app, db, dummy_location):
    """Test relinking files."""
    b1 = Bucket.create()
    obj1 = ObjectVersion.create(
        b1, "relink-test", stream=BytesIO(b('relinkthis')))
    ObjectVersion.create(b1, "do-not-touch", stream=BytesIO(b('na')))
    b1.snapshot()
    db.session.commit()

    assert ObjectVersion.query.count() == 4
    assert FileInstance.query.count() == 2

    fnew = FileInstance.create()
    fnew.copy_contents(obj1.file, location=b1.location)
    db.session.commit()

    fold = obj1.file

    assert ObjectVersion.query.filter_by(file_id=fold.id).count() == 2
    assert ObjectVersion.query.filter_by(file_id=fnew.id).count() == 0

    ObjectVersion.relink_all(obj1.file, fnew)
    db.session.commit()

    assert ObjectVersion.query.filter_by(file_id=fold.id).count() == 0
    assert ObjectVersion.query.filter_by(file_id=fnew.id).count() == 2
Example #29
0
def test_verify_checksum(app, tmp_location):
    """Test that verify_checksum sets last_check=False
       if the checksum is different and last_check=None
       if it failed to calculate the checksum, e.g. an exception was raised."""
    with app.app_context():
        b1 = Bucket.create(tmp_location)
        objects = []
        for i in range(10):
            objects.append(
                ObjectVersion.create(b1, str(i), stream=BytesIO(b'test')))
        db.session.commit()

        for obj in objects:
            verify_checksum.apply([str(obj.file_id)])
            assert obj.file.last_check

        # assert that mismatches in md5 checksums are caught
        corrupted_file = objects[0].file
        with open(corrupted_file.uri, 'w') as file_writer:
            file_writer.write('modified content')

        verify_checksum.apply([str(corrupted_file.id)])
        assert corrupted_file.last_check is False

        # assert that when exceptions occur last_check=None
        failed_file = objects[1].file
        with patch.object(FileStorage,
                          'checksum') \
                as mock_check:
            mock_check.side_effect = KeyError()
            verify_checksum.apply_async(args=[str(failed_file.id)],
                                        kwargs={'throws': False})
            assert failed_file.last_check is None
def test_b2share_storage_with_pid(base_app, app, tmp_location, login_user,
                                  test_users):
    """Check that the storage class will redirect pid files."""
    pid = 'http://hdl.handle.net/11304/74c66f0b-f814-4202-9dcb-4889ba9b1047'
    with app.app_context():
        # Disable access control for this test
        tmp_location = Location.query.first()
        with db.session.begin_nested():
            bucket = Bucket.create(tmp_location, storage_class='B')
            pid_file = FileInstance.create()
            pid_file.set_uri(pid, 1, 0, storage_class='B')
            ObjectVersion.create(bucket, 'test.txt', pid_file.id)

        db.session.commit()
        url = url_for('invenio_files_rest.object_api',
                      bucket_id=bucket.id,
                      key='test.txt')
    try:
        with app.app_context():
            permission = current_files_rest.permission_factory
            current_files_rest.permission_factory = allow_all
        # Check that accessing the file redirects to the PID
        with app.test_client() as client:
            resp = client.get(url)
            assert resp.headers['Location'] == pid
            assert resp.status_code == 302
    finally:
        with app.app_context():
            current_files_rest.permission_factory = permission
Example #31
0
def test_SIP_files(db):
    """Test the files methods of API SIP."""
    # we create a SIP model
    sip = SIP_.create()
    db.session.commit()
    # We create an API SIP on top of it
    api_sip = SIP(sip)
    assert len(api_sip.files) == 0
    # we setup a file storage
    tmppath = tempfile.mkdtemp()
    db.session.add(Location(name='default', uri=tmppath, default=True))
    db.session.commit()
    # we create a file
    content = b'test lol\n'
    bucket = Bucket.create()
    obj = ObjectVersion.create(bucket, 'test.txt', stream=BytesIO(content))
    db.session.commit()
    # we attach it to the SIP
    sf = api_sip.attach_file(obj)
    db.session.commit()
    assert len(api_sip.files) == 1
    assert api_sip.files[0].filepath == 'test.txt'
    assert sip.sip_files[0].filepath == 'test.txt'
    # finalization
    rmtree(tmppath)
Example #32
0
def test_scheduling(app, test_communities, login_user):
    """Test that scheduling files happens properly."""
    with app.app_context():
        b1 = Bucket.create()
        objects = []
        for i in range(10):
            objects.append(
                ObjectVersion.create(b1, str(i), stream=BytesIO(b'test')))
        db.session.commit()

        # corrupt 1 file
        corrupted_file = objects[0].file
        with open(corrupted_file.uri, 'w') as file_writer:
            file_writer.write('modified content')

        # schedule all files
        schedule_all_files_for_checksum()

        # assert that all will be checked
        assert not corrupted_file.last_check
        for o in objects[1:]:
            assert o.file.last_check

        # make 1 file fail
        failed_file = objects[1].file
        failed_file.last_check = None

        # schedule all failed
        schedule_failed_checksum_files()
        # assert that 1 wiil run again
        assert failed_file.last_check
        assert not corrupted_file.last_check
Example #33
0
def test_verify_checksum(app, db, dummy_location):
    """Test celery tasks for checksum verification."""
    b1 = Bucket.create()
    with open('README.rst', 'rb') as fp:
        obj = ObjectVersion.create(b1, 'README.rst', stream=fp)
    db.session.commit()
    file_id = obj.file_id

    verify_checksum(str(file_id))

    f = FileInstance.query.get(file_id)
    assert f.last_check_at
    assert f.last_check is True

    f.uri = 'invalid'
    db.session.add(f)
    db.session.commit()
    pytest.raises(ResourceNotFoundError, verify_checksum, str(file_id),
                  throws=True)

    f = FileInstance.query.get(file_id)
    assert f.last_check is True

    verify_checksum(str(file_id), throws=False)
    f = FileInstance.query.get(file_id)
    assert f.last_check is None

    f.last_check = True
    db.session.add(f)
    db.session.commit()
    with pytest.raises(ResourceNotFoundError):
        verify_checksum(str(file_id), pessimistic=True)
    f = FileInstance.query.get(file_id)
    assert f.last_check is None
Example #34
0
    def create(cls, data, id_=None, **kwargs):
        """Create a CDS deposit.

        Adds bucket creation immediately on deposit creation.
        """
        if '_deposit' not in data:
            id_ = id_ or uuid.uuid4()
            cls.deposit_minter(id_, data)
        bucket = Bucket.create(location=Location.get_by_name(
            kwargs.get('bucket_location', 'default')))
        data['_buckets'] = {'deposit': str(bucket.id)}
        data.setdefault('_cds', {})
        data['_cds'].setdefault('state', {})
        data.setdefault('keywords', [])
        data.setdefault('license', [{
            'license': 'CERN',
            'material': '',
            'url': 'http://copyright.web.cern.ch',
        }])
        if '_access' not in data:
            data.setdefault('_access', {})
        deposit = super(CDSDeposit,
                        cls).create(data,
                                    id_=id_,
                                    validator=PartialDraft4Validator)
        RecordsBuckets.create(record=deposit.model, bucket=bucket)
        return deposit
Example #35
0
def test_object_delete(app, db, dummy_location):
    """Test object creation."""
    # Create three versions, with latest being a delete marker.
    with db.session.begin_nested():
        b1 = Bucket.create()
        ObjectVersion.create(b1, "test").set_location(
            "b1test1", 1, "achecksum")
        ObjectVersion.create(b1, "test").set_location(
            "b1test2", 1, "achecksum")
        obj_deleted = ObjectVersion.delete(b1, "test")

    assert ObjectVersion.query.count() == 3
    assert ObjectVersion.get(b1, "test") is None
    assert ObjectVersion.get_by_bucket(b1).count() == 0

    obj = ObjectVersion.get(b1, "test", version_id=obj_deleted.version_id)
    assert obj.deleted
    assert obj.file_id is None

    ObjectVersion.create(b1, "test").set_location(
        "b1test4", 1, "achecksum")

    assert ObjectVersion.query.count() == 4
    assert ObjectVersion.get(b1.id, "test") is not None
    assert ObjectVersion.get_by_bucket(b1.id).count() == 1
Example #36
0
def datasets(skip_files):
    """Load demo datasets records."""
    from invenio_db import db
    from invenio_records_files.api import Record
    from invenio_indexer.api import RecordIndexer
    from cernopendata.modules.records.minters.recid import \
        cernopendata_recid_minter
    from cernopendata.modules.records.minters.datasetid import \
        cernopendata_datasetid_minter

    from invenio_files_rest.models import \
        Bucket, FileInstance, ObjectVersion
    from invenio_records_files.models import RecordsBuckets

    indexer = RecordIndexer()
    schema = current_app.extensions['invenio-jsonschemas'].path_to_url(
        'records/datasets-v1.0.0.json')
    data = pkg_resources.resource_filename('cernopendata',
                                           'modules/fixtures/data/datasets')
    datasets_json = glob.glob(os.path.join(data, '*.json'))

    # FIXME: change the treatment of `files` according to `records` fixtures.
    for filename in datasets_json:

        click.echo('Loading datasets from {0} ...'.format(filename))

        with open(filename, 'rb') as source:
            for data in json.load(source):
                files = data.pop('files', [])

                id = uuid.uuid4()
                # (TOFIX) Remove if statement in production
                # as every dataset record should have a doi
                if data.get('doi', None):
                    cernopendata_datasetid_minter(id, data)
                else:
                    cernopendata_recid_minter(id, data)
                data['$schema'] = schema
                record = Record.create(data, id_=id)

                bucket = Bucket.create()
                RecordsBuckets.create(record=record.model, bucket=bucket)

                for file in files:
                    if skip_files:
                        break
                    assert 'uri' in file
                    assert 'size' in file
                    assert 'checksum' in file

                    f = FileInstance.create()
                    filename = file.get("uri").split('/')[-1:][0]
                    f.set_uri(file.get("uri"), file.get("size"),
                              file.get("checksum"))

                    ObjectVersion.create(bucket, filename, _file_id=f.id)
                db.session.commit()
                indexer.index(record)
                db.session.expunge_all()
    def create_bucket(cls, data):
        """Create a bucket for this record.

        Override this method to provide more advanced bucket creation
        capabilities. This method may return a new or existing bucket, or may
        return None, in case no bucket should be created.
        """
        return Bucket.create()
Example #38
0
def test_bucket_sync_same_object(app, db, dummy_location):
    """Test that an exiting file in src and dest is not changed."""
    b1 = Bucket.create()
    b2 = Bucket.create()
    ObjectVersion.create(b1, "filename").set_location("b1v1", 1, "achecksum")
    b1.sync(b2)
    db.session.commit()

    b1_version_id = ObjectVersion.get(b1, "filename").version_id
    b2_version_id = ObjectVersion.get(b2, "filename").version_id

    b1.sync(b2)

    assert ObjectVersion.get_by_bucket(b1).count() == 1
    assert ObjectVersion.get_by_bucket(b2).count() == 1
    assert ObjectVersion.get(b1, "filename").version_id == b1_version_id
    assert ObjectVersion.get(b2, "filename").version_id == b2_version_id
Example #39
0
 def _resolve_bucket(cls, deposit, record):
     """Build bucket."""
     logging.debug('Creating new buckets, record and deposit.')
     bucket = Bucket.create(location=Location.get_by_name('videos'))
     deposit['_buckets'] = {'deposit': str(bucket.id)}
     RecordsBuckets.create(record=deposit.model, bucket=bucket)
     record['_buckets'] = deepcopy(deposit['_buckets'])
     db.session.commit()
Example #40
0
def test_object_get_by_bucket(app, db, dummy_location):
    """Test object listing."""
    b1 = Bucket.create()
    b2 = Bucket.create()

    # First version of object
    obj1_first = ObjectVersion.create(b1, "test")
    obj1_first.set_location("b1test1", 1, "achecksum")
    # Intermediate version which is a delete marker.
    obj1_intermediate = ObjectVersion.create(b1, "test")
    obj1_intermediate.set_location("b1test2", 1, "achecksum")
    # Latest version of object
    obj1_latest = ObjectVersion.create(b1, "test")
    obj1_latest.set_location("b1test3", 1, "achecksum")
    # Create objects in/not in same bucket using different key.
    ObjectVersion.create(b1, "another").set_location(
        "b1another1", 1, "achecksum")
    ObjectVersion.create(b2, "test").set_location("b2test1", 1, "achecksum")
    db.session.commit()

    # Sanity check
    assert ObjectVersion.query.count() == 5
    assert ObjectVersion.get(b1, "test")
    assert ObjectVersion.get(b1, "another")
    assert ObjectVersion.get(b2, "test")

    # Retrieve objects for a bucket with/without versions
    assert ObjectVersion.get_by_bucket(b1).count() == 2
    assert ObjectVersion.get_by_bucket(b1, versions=True).count() == 4
    assert ObjectVersion.get_by_bucket(b2).count() == 1
    assert ObjectVersion.get_by_bucket(b2, versions=True).count() == 1

    # Assert order of returned objects (alphabetical)
    objs = ObjectVersion.get_by_bucket(b1.id).all()
    assert objs[0].key == "another"
    assert objs[1].key == "test"

    # Assert order of returned objects verions (creation date ascending)
    objs = ObjectVersion.get_by_bucket(b1.id, versions=True).all()
    assert objs[0].key == "another"
    assert objs[1].key == "test"
    assert objs[1].version_id == obj1_latest.version_id
    assert objs[2].key == "test"
    assert objs[2].version_id == obj1_intermediate.version_id
    assert objs[3].key == "test"
    assert objs[3].version_id == obj1_first.version_id
Example #41
0
def test_object_get_by_bucket(app, db, dummy_location):
    """Test object listing."""
    b1 = Bucket.create()
    b2 = Bucket.create()

    # First version of object
    obj1_first = ObjectVersion.create(b1, "test")
    obj1_first.set_location("b1test1", 1, "achecksum")
    # Intermediate version which is a delete marker.
    obj1_intermediate = ObjectVersion.create(b1, "test")
    obj1_intermediate.set_location("b1test2", 1, "achecksum")
    # Latest version of object
    obj1_latest = ObjectVersion.create(b1, "test")
    obj1_latest.set_location("b1test3", 1, "achecksum")
    # Create objects in/not in same bucket using different key.
    ObjectVersion.create(b1, "another").set_location(
        "b1another1", 1, "achecksum")
    ObjectVersion.create(b2, "test").set_location("b2test1", 1, "achecksum")
    db.session.commit()

    # Sanity check
    assert ObjectVersion.query.count() == 5
    assert ObjectVersion.get(b1, "test")
    assert ObjectVersion.get(b1, "another")
    assert ObjectVersion.get(b2, "test")

    # Retrieve objects for a bucket with/without versions
    assert ObjectVersion.get_by_bucket(b1).count() == 2
    assert ObjectVersion.get_by_bucket(b1, versions=True).count() == 4
    assert ObjectVersion.get_by_bucket(b2).count() == 1
    assert ObjectVersion.get_by_bucket(b2, versions=True).count() == 1

    # Assert order of returned objects (alphabetical)
    objs = ObjectVersion.get_by_bucket(b1.id).all()
    assert objs[0].key == "another"
    assert objs[1].key == "test"

    # Assert order of returned objects verions (creation date ascending)
    objs = ObjectVersion.get_by_bucket(b1.id, versions=True).all()
    assert objs[0].key == "another"
    assert objs[1].key == "test"
    assert objs[1].version_id == obj1_latest.version_id
    assert objs[2].key == "test"
    assert objs[2].version_id == obj1_intermediate.version_id
    assert objs[3].key == "test"
    assert objs[3].version_id == obj1_first.version_id
Example #42
0
def data_policies(skip_files):
    """Load demo Data Policy records."""
    from invenio_db import db
    from invenio_indexer.api import RecordIndexer
    from cernopendata.modules.records.minters.recid import \
        cernopendata_recid_minter

    from invenio_files_rest.models import \
        Bucket, FileInstance, ObjectVersion
    from invenio_records_files.models import RecordsBuckets
    from invenio_records_files.api import Record

    from invenio_records.models import RecordMetadata

    indexer = RecordIndexer()
    schema = current_app.extensions['invenio-jsonschemas'].path_to_url(
        'records/data-policies-v1.0.0.json'
    )
    data = pkg_resources.resource_filename('cernopendata',
                                           'modules/fixtures/data')
    data_policies_json = glob.glob(os.path.join(data, '*.json'))

    for filename in data_policies_json:

        click.echo('Loading data-policies from {0} ...'.format(filename))

        with open(filename, 'rb') as source:
            for data in json.load(source):
                files = data.pop('files', [])

                id = uuid.uuid4()
                cernopendata_recid_minter(id, data)
                data['$schema'] = schema
                record = Record.create(data, id_=id)

                bucket = Bucket.create()
                RecordsBuckets.create(
                    record=record.model, bucket=bucket)

                for file in files:
                    if skip_files:
                        break
                    assert 'uri' in file
                    assert 'size' in file
                    assert 'checksum' in file

                    f = FileInstance.create()
                    filename = file.get("uri").split('/')[-1:][0]
                    f.set_uri(file.get("uri"), file.get(
                        "size"), file.get("checksum"))
                    ObjectVersion.create(
                        bucket,
                        filename,
                        _file_id=f.id
                    )
                db.session.commit()
                indexer.index(record)
                db.session.expunge_all()
Example #43
0
def data_policies(skip_files):
    """Load demo Data Policy records."""
    from invenio_db import db
    from invenio_indexer.api import RecordIndexer
    from cernopendata.modules.records.minters.recid import \
        cernopendata_recid_minter

    from invenio_files_rest.models import \
        Bucket, FileInstance, ObjectVersion
    from invenio_records_files.models import RecordsBuckets
    from invenio_records_files.api import Record

    from invenio_records.models import RecordMetadata

    indexer = RecordIndexer()
    schema = current_app.extensions['invenio-jsonschemas'].path_to_url(
        'records/data-policies-v1.0.0.json'
    )
    data = pkg_resources.resource_filename('cernopendata',
                                           'modules/fixtures/data')
    data_policies_json = glob.glob(os.path.join(data, '*.json'))

    for filename in data_policies_json:

        click.echo('Loading data-policies from {0} ...'.format(filename))

        with open(filename, 'rb') as source:
            for data in json.load(source):
                files = data.pop('files', [])

                id = uuid.uuid4()
                cernopendata_recid_minter(id, data)
                data['$schema'] = schema
                record = Record.create(data, id_=id)

                bucket = Bucket.create()
                RecordsBuckets.create(
                    record=record.model, bucket=bucket)

                for file in files:
                    if skip_files:
                        break
                    assert 'uri' in file
                    assert 'size' in file
                    assert 'checksum' in file

                    f = FileInstance.create()
                    filename = file.get("uri").split('/')[-1:][0]
                    f.set_uri(file.get("uri"), file.get(
                        "size"), file.get("checksum"))
                    ObjectVersion.create(
                        bucket,
                        filename,
                        _file_id=f.id
                    )
                db.session.commit()
                indexer.index(record)
                db.session.expunge_all()
Example #44
0
def test_object_snapshot(app, db, dummy_location):
    """Test snapshot creation."""
    b1 = Bucket.create()
    b2 = Bucket.create()
    ObjectVersion.create(b1, "versioned").set_location("b1v1", 1, "achecksum")
    ObjectVersion.create(b1, "versioned").set_location("b1v2", 1, "achecksum")
    ObjectVersion.create(b1, "deleted").set_location("b1d1", 1, "achecksum")
    ObjectVersion.delete(b1, "deleted")
    ObjectVersion.create(b1, "undeleted").set_location("b1u1", 1, "achecksum")
    ObjectVersion.delete(b1, "undeleted")
    ObjectVersion.create(b1, "undeleted").set_location("b1u2", 1, "achecksum")
    ObjectVersion.create(b1, "simple").set_location("b1s1", 1, "achecksum")
    ObjectVersion.create(b2, "another").set_location("b2a1", 1, "achecksum")
    db.session.commit()

    assert ObjectVersion.query.count() == 9
    assert FileInstance.query.count() == 7
    assert Bucket.query.count() == 2
    assert ObjectVersion.get_by_bucket(b1).count() == 3
    assert ObjectVersion.get_by_bucket(b2).count() == 1

    # check that for 'undeleted' key there is only one HEAD
    heads = [
        o for o in ObjectVersion.query.filter_by(bucket_id=b1.id,
                                                 key='undeleted').all()
        if o.is_head
    ]
    assert len(heads) == 1
    assert heads[0].file.uri == 'b1u2'

    b3 = b1.snapshot(lock=True)
    db.session.commit()

    # Must be locked as requested.
    assert b1.locked is False
    assert b3.locked is True

    assert Bucket.query.count() == 3
    assert ObjectVersion.query.count() == 12
    assert FileInstance.query.count() == 7
    assert ObjectVersion.get_by_bucket(b1).count() == 3
    assert ObjectVersion.get_by_bucket(b2).count() == 1
    assert ObjectVersion.get_by_bucket(b3).count() == 3
    assert ObjectVersion.get_by_bucket(b1, versions=True).count() == 8
    assert ObjectVersion.get_by_bucket(b3, versions=True).count() == 3
def test_object_set_file(app, db, dummy_location):
    """Test object set file."""
    b = Bucket.create()
    f = FileInstance(uri="f1", size=1, checksum="mychecksum")
    obj = ObjectVersion.create(b, "test").set_file(f)
    db.session.commit()
    assert obj.file == f

    assert pytest.raises(FileInstanceAlreadySetError, obj.set_file, f)
Example #46
0
def test_object_create(app, db, dummy_location):
    """Test object creation."""
    with db.session.begin_nested():
        b = Bucket.create()
        # Create one object version
        obj1 = ObjectVersion.create(b, "test")
        assert obj1.bucket_id == b.id
        assert obj1.key == 'test'
        assert obj1.version_id
        assert obj1.file_id is None
        assert obj1.is_head is True
        assert obj1.bucket == b

        # Set fake location.
        obj1.set_location("file:///tmp/obj1", 1, "checksum")

        # Create one object version for same object key
        obj2 = ObjectVersion.create(b, "test")
        assert obj2.bucket_id == b.id
        assert obj2.key == 'test'
        assert obj2.version_id != obj1.version_id
        assert obj2.file_id is None
        assert obj2.is_head is True
        assert obj2.bucket == b

        # Set fake location
        obj2.set_location("file:///tmp/obj2", 2, "checksum")

        # Create a new object version for a different object with no location.
        # I.e. it is considered a delete marker.
        obj3 = ObjectVersion.create(b, "deleted_obj")

    # Object __repr__
    assert str(obj1) == \
        "{0}:{1}:{2}".format(obj1.bucket_id, obj1.version_id, obj1.key)

    # Sanity check
    assert ObjectVersion.query.count() == 3

    # Assert that obj2 is the head version
    obj = ObjectVersion.get(b.id, "test", version_id=obj1.version_id)
    assert obj.version_id == obj1.version_id
    assert obj.is_head is False
    obj = ObjectVersion.get(b.id, "test", version_id=obj2.version_id)
    assert obj.version_id == obj2.version_id
    assert obj.is_head is True
    # Assert that getting latest version gets obj2
    obj = ObjectVersion.get(b.id, "test")
    assert obj.version_id == obj2.version_id
    assert obj.is_head is True

    # Assert that obj3 is not retrievable (without specifying version id).
    assert ObjectVersion.get(b.id, "deleted_obj") is None
    # Assert that obj3 *is* retrievable (when specifying version id).
    assert \
        ObjectVersion.get(b.id, "deleted_obj", version_id=obj3.version_id) == \
        obj3
Example #47
0
def create_eitem_with_bucket_for_document(document_pid):
    """Create EItem and its file bucket."""
    eitem = create_eitem(document_pid, open_access=True)
    with db.session.begin_nested():
        bucket = Bucket.create()
        eitem["bucket_id"] = str(bucket.id)
        eitem.commit()
    db.session.commit()
    return eitem, bucket
Example #48
0
def test_bucket_create_object(app, db):
    """Test bucket creation."""
    with db.session.begin_nested():
        l1 = Location(name='test1', uri='file:///tmp/1', default=False)
        l2 = Location(name='test2', uri='file:///tmp/2', default=True)
        db.session.add(l1)
        db.session.add(l2)

    assert Location.query.count() == 2

    # Simple create
    with db.session.begin_nested():
        b = Bucket.create()
        assert b.id
        assert b.default_location == Location.get_default().id
        assert b.location == Location.get_default()
        assert b.default_storage_class == \
            app.config['FILES_REST_DEFAULT_STORAGE_CLASS']
        assert b.size == 0
        assert b.quota_size is None
        assert b.max_file_size is None
        assert b.deleted is False

    # __repr__ test
    assert str(b) == str(b.id)

    # Retrieve one
    assert Bucket.get(b.id).id == b.id

    # Create with location_name and storage class
    with db.session.begin_nested():
        b = Bucket.create(location=l1, storage_class='A')
        assert b.default_location == Location.get_by_name('test1').id
        assert b.default_storage_class == 'A'

        # Create using location name instead
        b = Bucket.create(location=l2.name, storage_class='A')
        assert b.default_location == Location.get_by_name('test2').id

    # Retrieve one
    assert Bucket.all().count() == 3

    # Invalid storage class.
    pytest.raises(ValueError, Bucket.create, storage_class='X')
Example #49
0
def test_object_create_with_fileid(app, db, dummy_location):
    """Test object creation."""
    with db.session.begin_nested():
        b = Bucket.create()
        obj = ObjectVersion.create(b, 'test', stream=BytesIO(b'test'))

    assert b.size == 4

    ObjectVersion.create(b, 'test', _file_id=obj.file)
    assert b.size == 8
def test_object_snapshot(app, db, dummy_location):
    """Test snapshot creation."""
    b1 = Bucket.create()
    b2 = Bucket.create()
    ObjectVersion.create(b1, "versioned").set_location("b1v1", 1, "achecksum")
    ObjectVersion.create(b1, "versioned").set_location("b1v2", 1, "achecksum")
    ObjectVersion.create(b1, "deleted").set_location("b1d1", 1, "achecksum")
    ObjectVersion.delete(b1, "deleted")
    ObjectVersion.create(b1, "undeleted").set_location("b1u1", 1, "achecksum")
    ObjectVersion.delete(b1, "undeleted")
    ObjectVersion.create(b1, "undeleted").set_location("b1u2", 1, "achecksum")
    ObjectVersion.create(b1, "simple").set_location("b1s1", 1, "achecksum")
    ObjectVersion.create(b2, "another").set_location("b2a1", 1, "achecksum")
    db.session.commit()

    assert ObjectVersion.query.count() == 9
    assert FileInstance.query.count() == 7
    assert Bucket.query.count() == 2
    assert ObjectVersion.get_by_bucket(b1).count() == 3
    assert ObjectVersion.get_by_bucket(b2).count() == 1

    # check that for 'undeleted' key there is only one HEAD
    heads = [o for o in ObjectVersion.query.filter_by(
        bucket_id=b1.id, key='undeleted').all() if o.is_head]
    assert len(heads) == 1
    assert heads[0].file.uri == 'b1u2'

    b3 = b1.snapshot(lock=True)
    db.session.commit()

    # Must be locked as requested.
    assert b1.locked is False
    assert b3.locked is True

    assert Bucket.query.count() == 3
    assert ObjectVersion.query.count() == 12
    assert FileInstance.query.count() == 7
    assert ObjectVersion.get_by_bucket(b1).count() == 3
    assert ObjectVersion.get_by_bucket(b2).count() == 1
    assert ObjectVersion.get_by_bucket(b3).count() == 3
    assert ObjectVersion.get_by_bucket(b1, versions=True).count() == 8
    assert ObjectVersion.get_by_bucket(b3, versions=True).count() == 3
def test_object_set_contents(app, db, dummy_location):
    """Test object set contents."""
    with db.session.begin_nested():
        b1 = Bucket.create()
        obj = ObjectVersion.create(b1, "LICENSE")
        assert obj.file_id is None
        assert FileInstance.query.count() == 0

        # Save a file.
        with open('LICENSE', 'rb') as fp:
            obj.set_contents(fp)

    # Assert size, location and checksum
    assert obj.file_id is not None
    assert obj.file.uri is not None
    assert obj.file.size == getsize('LICENSE')
    assert obj.file.checksum is not None
    assert b1.size == obj.file.size

    # Try to overwrite
    with db.session.begin_nested():
        with open('LICENSE', 'rb') as fp:
            pytest.raises(FileInstanceAlreadySetError, obj.set_contents, fp)

    # Save a new version with different content
    with db.session.begin_nested():
        obj2 = ObjectVersion.create(b1, "LICENSE")
        with open('README.rst', 'rb') as fp:
            obj2.set_contents(fp)

    assert obj2.file_id is not None and obj2.file_id != obj.file_id
    assert obj2.file.size == getsize('README.rst')
    assert obj2.file.uri != obj.file.uri
    assert Bucket.get(b1.id).size == obj.file.size + obj2.file.size

    obj2.file.verify_checksum()
    assert obj2.file.last_check_at
    assert obj2.file.last_check is True
    old_checksum = obj2.file.checksum
    obj2.file.checksum = "md5:invalid"
    assert obj2.file.verify_checksum() is False

    previous_last_check = obj2.file.last_check
    previous_last_check_date = obj2.file.last_check_at
    with db.session.begin_nested():
        obj2.file.checksum = old_checksum
        obj2.file.uri = 'invalid'
    pytest.raises(ResourceNotFoundError, obj2.file.verify_checksum)
    assert obj2.file.last_check == previous_last_check
    assert obj2.file.last_check_at == previous_last_check_date

    obj2.file.verify_checksum(throws=False)
    assert obj2.file.last_check is None
    assert obj2.file.last_check_at != previous_last_check_date
Example #52
0
def test_RecordSIP_create(db, mocker):
    """Test create method from the API class RecordSIP."""
    # we setup a file storage
    tmppath = tempfile.mkdtemp()
    db.session.add(Location(name='default', uri=tmppath, default=True))
    # setup metadata
    mtype = SIPMetadataType(title='JSON Test', name='json-test',
                            format='json', schema='url://to/schema')
    db.session.add(mtype)
    db.session.commit()
    # first we create a record
    recid = uuid.uuid4()
    pid = PersistentIdentifier.create(
        'recid',
        '1337',
        object_type='rec',
        object_uuid=recid,
        status=PIDStatus.REGISTERED)
    mocker.patch('invenio_records.api.RecordBase.validate',
                 return_value=True, autospec=True)
    record = Record.create(
        {'title': 'record test', '$schema': 'url://to/schema'},
        recid)
    # we add a file to the record
    bucket = Bucket.create()
    content = b'Test file\n'
    RecordsBuckets.create(record=record.model, bucket=bucket)
    record.files['test.txt'] = BytesIO(content)
    db.session.commit()
    # Let's create a SIP
    user = create_test_user('*****@*****.**')
    agent = {'email': '*****@*****.**', 'ip_address': '1.1.1.1'}
    rsip = RecordSIP.create(pid, record, True, user_id=user.id, agent=agent)
    db.session.commit()
    # test!
    assert RecordSIP_.query.count() == 1
    assert SIP_.query.count() == 1
    assert SIPFile.query.count() == 1
    assert SIPMetadata.query.count() == 1
    assert len(rsip.sip.files) == 1
    assert len(rsip.sip.metadata) == 1
    metadata = rsip.sip.metadata[0]
    assert metadata.type.format == 'json'
    assert '"title": "record test"' in metadata.content
    assert rsip.sip.archivable is True
    # we try with no files
    rsip = RecordSIP.create(pid, record, True, create_sip_files=False,
                            user_id=user.id, agent=agent)
    assert SIPFile.query.count() == 1
    assert SIPMetadata.query.count() == 2
    assert len(rsip.sip.files) == 0
    assert len(rsip.sip.metadata) == 1
    # finalization
    rmtree(tmppath)
Example #53
0
def image_object(database, location, image_path):
    """Get ObjectVersion of test image."""
    bucket = Bucket.create()
    database.session.commit()

    with open(image_path, 'rb') as fp:
        obj = ObjectVersion.create(
            bucket, 'test.jpg', stream=fp, size=getsize(image_path)
        )
    database.session.commit()
    return obj
Example #54
0
def test_verify_checksum(app, db, dummy_location):
    """Test celery tasks for checksum verification."""
    b = Bucket.create()
    with open('README.rst', 'rb') as fp:
        obj = ObjectVersion.create(b, 'README.rst', stream=fp)
    db.session.commit()

    verify_checksum(str(obj.file_id))

    f = FileInstance.query.get(obj.file_id)
    assert f.last_check_at
    assert f.last_check is True
Example #55
0
def test_object_set_location(app, db, dummy_location):
    """Test object set contents."""
    with db.session.begin_nested():
        b1 = Bucket.create()
        obj = ObjectVersion.create(b1, "LICENSE")
        assert obj.file_id is None
        assert FileInstance.query.count() == 0
        obj.set_location("b1test1", 1, "achecksum")
        assert FileInstance.query.count() == 1
        pytest.raises(
            FileInstanceAlreadySetError,
            obj.set_location, "b1test1", 1, "achecksum")
Example #56
0
def files():
    """Load files."""
    data_path = os.path.join(os.path.dirname(__file__), 'data')

    # Create location
    loc = Location(name='local', uri=data_path, default=True)
    db.session.commit()

    # Bucket
    bucket = Bucket.create(loc)

    # Example files from the data folder
    example_files = (
        'markdown.md',
        'csvfile.csv',
        'zipfile.zip',
        'jsonfile.json',
        'xmlfile.xml',
        'notebook.ipynb',
        'jpgfile.jpg',
        'pngfile.png',
    )

    # Create single file records
    for f in example_files:
        with open(os.path.join(data_path, f), 'rb') as fp:
            create_object(bucket, f, fp)

    # Create a multi-file record
    rec_uuid = uuid4()
    provider = RecordIdProvider.create(object_type='rec', object_uuid=rec_uuid)
    data = {
        'pid_value': provider.pid.pid_value,
        'files': []
    }

    # Template to create different files
    template_file = {
        'uri': '/files/{0}/{1}',
        'key': '',
        'bucket': str(bucket.id),
        'local': True
    }

    for filename in example_files:
        file_data = template_file.copy()
        file_data['uri'] = file_data['uri'].format(str(bucket.id), filename)
        file_data['key'] = filename
        data['files'].append(file_data)

    Record.create(data, id_=rec_uuid)

    db.session.commit()
Example #57
0
def test_object_multibucket(app, db, dummy_location):
    """Test object creation in multiple buckets."""
    with db.session.begin_nested():
        # Create two buckets each with an object using the same key
        b1 = Bucket.create()
        b2 = Bucket.create()
        obj1 = ObjectVersion.create(b1, "test")
        obj1.set_location("file:///tmp/obj1", 1, "checksum")
        obj2 = ObjectVersion.create(b2, "test")
        obj2.set_location("file:///tmp/obj2", 2, "checksum")

    # Sanity check
    assert ObjectVersion.query.count() == 2

    # Assert object versions are correctly created in each bucket.
    obj = ObjectVersion.get(b1.id, "test")
    assert obj.is_head is True
    assert obj.version_id == obj1.version_id
    obj = ObjectVersion.get(b2.id, "test")
    assert obj.is_head is True
    assert obj.version_id == obj2.version_id
Example #58
0
def test_bucket_tags(app, db, dummy_location):
    """Test bucket tags."""
    b = Bucket.create()
    BucketTag.create(b, "mykey", "testvalue")
    BucketTag.create(b, "another_key", "another value")
    db.session.commit()

    # Duplicate key
    pytest.raises(Exception, BucketTag.create, b, "mykey", "newvalue")

    # Test get
    assert BucketTag.query.count() == 2
    assert BucketTag.get(b.id, "mykey").value == "testvalue"
    assert BucketTag.get_value(b, "another_key") == "another value"
    assert BucketTag.get_value(b.id, "invalid") is None

    # Test delete
    BucketTag.delete(b, "mykey")
    assert BucketTag.query.count() == 1
    BucketTag.delete(b, "invalid")
    assert BucketTag.query.count() == 1

    # Create or update
    BucketTag.create_or_update(b, "another_key", "newval")
    BucketTag.create_or_update(b, "newkey", "testval")
    db.session.commit()
    assert BucketTag.get_value(b, "another_key") == "newval"
    assert BucketTag.get_value(b, "newkey") == "testval"

    # Get tags as dictionary
    assert b.get_tags() == dict(another_key="newval", newkey="testval")

    b2 = Bucket.create()
    assert b2.get_tags() == dict()

    # Test cascading delete.
    Bucket.query.delete()
    db.session.commit()
    assert BucketTag.query.count() == 0
Example #59
0
def test_object_copy(app, db, dummy_location):
    """Copy object."""
    f = FileInstance(uri="f1", size=1, checksum="mychecksum")
    db.session.add(f)
    db.session.commit()
    b1 = Bucket.create()
    b2 = Bucket.create()

    # Delete markers cannot be copied
    obj_deleted = ObjectVersion.create(b1, "deleted")
    assert pytest.raises(InvalidOperationError, obj_deleted.copy, b2)

    # Copy onto self.
    obj = ObjectVersion.create(b1, "selftest").set_file(f)
    db.session.commit()
    obj_copy = obj.copy()
    db.session.commit()
    assert obj_copy.version_id != obj.version_id
    assert obj_copy.key == obj.key
    assert obj_copy.bucket == obj.bucket
    assert obj_copy.file_id == obj.file_id
    versions = ObjectVersion.get_versions(b1, "selftest").all()
    assert versions[0] == obj_copy
    assert versions[1] == obj

    # Copy new key
    obj_copy2 = obj_copy.copy(key='newkeytest')
    db.session.commit()
    assert obj_copy2.version_id != obj_copy.version_id
    assert obj_copy2.key == "newkeytest"
    assert obj_copy2.bucket == obj_copy.bucket
    assert obj_copy2.file_id == obj_copy.file_id

    # Copy to bucket
    obj_copy3 = obj_copy2.copy(bucket=b2)
    assert obj_copy3.version_id != obj_copy2.version_id
    assert obj_copy3.key == obj_copy2.key
    assert obj_copy3.bucket == b2
    assert obj_copy3.file_id == obj_copy2.file_id